ION System Heap 与 CMA Heap 深度解析

基于 Linux Kernel v5.4.123 | 源码：drivers/staging/android/ion/ion_system_heap.c、ion_cma_heap.c

ION 框架通过 ion_heap_ops 虚函数表将分配策略与核心框架解耦。本文聚焦两种最重要的堆实现：System Heap（Android 上最常用）和 CMA Heap（物理连续分配），深入对比它们的设计哲学、分配算法、释放机制和适用场景。

1. 两种 Heap 解决什么问题

需求	System Heap	CMA Heap
GPU 纹理、Camera 预览帧	适合（GPU 支持 scatter-gather DMA）	可用但浪费
不支持 SG-DMA 的简单设备	不适合（内存不连续）	适合（物理连续）
大 buffer（几十 MB）	适合（不要求连续，碎片化下也能成功）	可能失败（需要大块连续区域）
分配频率高（每帧分配/释放）	适合（page pool 加速）	不适合（无缓存，CMA 开销大）

核心区别：System Heap 分配的内存物理上可以散布，CMA Heap 分配的内存物理上必须连续。

2. System Heap 详解

2.1 数据结构

c
// ion_system_heap.c:42
struct ion_system_heap {
    struct ion_heap heap;                    // 嵌入基类（C 语言继承）
    struct ion_page_pool *pools[NUM_ORDERS]; // 3 个 page pool
};

static const unsigned int orders[] = {8, 4, 0};
// orders[0] = 8 → 2^8 = 256 页 = 1MB
// orders[1] = 4 → 2^4 = 16 页  = 64KB
// orders[2] = 0 → 2^0 = 1 页   = 4KB


ion_system_heap
  │
  ├── heap (ion_heap 基类)
  │     ├── ops = &system_heap_ops
  │     ├── type = ION_HEAP_TYPE_SYSTEM
  │     ├── flags = ION_HEAP_FLAG_DEFER_FREE
  │     ├── shrinker (已注册)
  │     ├── free_list (deferred free 队列)
  │     └── task (deferred free kthread)
  │
  └── pools[3]
        ├── [0] → ion_page_pool { order=8, gfp=HIGH_ORDER_FLAGS }
        ├── [1] → ion_page_pool { order=4, gfp=HIGH_ORDER_FLAGS }
        └── [2] → ion_page_pool { order=0, gfp=LOW_ORDER_FLAGS  }

2.2 GFP 标志策略

c
// 高阶（order > 4）：快速失败，不回收不等待不重试
static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO |
    __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_RECLAIM;

// 低阶（order <= 4）：允许回收和等待，必须成功
static gfp_t low_order_gfp_flags = GFP_HIGHUSER | __GFP_ZERO;

标志	含义	为什么
`GFP_HIGHUSER`	可用 highmem，用户态可访问	ION buffer 主要给用户态和设备用
`__GFP_ZERO`	分配后清零	安全性，防止信息泄漏
`__GFP_NOWARN`	失败不打内核警告	高阶失败是正常的，不要刷日志
`__GFP_NORETRY`	不重试	高阶失败直接 fallback 到低阶
`~__GFP_RECLAIM`	不触发内存回收	高阶分配不值得等待回收

设计意图： 高阶大页能拿到最好，拿不到立刻降级，不让分配路径卡在内存回收上。低阶（4KB）是兜底，允许等待以保证成功。

2.3 分配算法：多阶贪心

c
// ion_system_heap.c:97
static int ion_system_heap_allocate(struct ion_heap *heap,
                                    struct ion_buffer *buffer,
                                    unsigned long size, unsigned long flags)
{
    // 安全检查：不能超过系统总内存一半
    if (size / PAGE_SIZE > totalram_pages() / 2)
        return -ENOMEM;

    INIT_LIST_HEAD(&pages);
    while (size_remaining > 0) {
        // 贪心：从当前 max_order 开始，尝试最大可用的 order
        page = alloc_largest_available(sys_heap, buffer,
                                       size_remaining, max_order);
        if (!page) goto free_pages;

        list_add_tail(&page->lru, &pages);
        size_remaining -= page_size(page);
        max_order = compound_order(page);  // 降序约束
        i++;
    }

    // 构建 sg_table
    sg_alloc_table(table, i, GFP_KERNEL);
    list_for_each_entry_safe(page, tmp_page, &pages, lru) {
        sg_set_page(sg, page, page_size(page), 0);
        sg = sg_next(sg);
    }
    buffer->sg_table = table;
}

alloc_largest_available 的选择逻辑：

c
static struct page *alloc_largest_available(heap, buffer, size, max_order)
{
    for (i = 0; i < NUM_ORDERS; i++) {
        if (size < order_to_size(orders[i]))   // 剩余量不够这个 order
            continue;
        if (max_order < orders[i])              // 不能超过上一次的 order
            continue;
        page = alloc_buffer_page(heap, buffer, orders[i]);  // 先查 pool
        if (!page)
            continue;
        return page;
    }
    return NULL;
}

max_order 降序约束是一个重要细节：一旦某个 order 分配成功，后续只能分配同等或更小的 order。这避免了 [order0, order8, order0, order8, ...] 的乒乓现象，让 sg_table 的 entry 有序排列。

分配示例

分配 3.1MB (3,245,056 bytes) 的过程：


size_remaining = 3.1MB, max_order = 8 (初始)

轮次1: orders 扫描
  order=8 (1MB) ← size=3.1MB ≥ 1MB, max=8 ≥ 8 → pool_alloc(order=8) ✓
  size_remaining = 2.1MB, max_order = 8

轮次2:
  order=8 (1MB) ← size=2.1MB ≥ 1MB, max=8 ≥ 8 → pool_alloc(order=8) ✓
  size_remaining = 1.1MB, max_order = 8

轮次3:
  order=8 (1MB) ← size=1.1MB ≥ 1MB, max=8 ≥ 8 → pool_alloc(order=8) ✓
  size_remaining = 0.1MB (102,400), max_order = 8

轮次4:
  order=8 (1MB) ← size=100KB < 1MB → 跳过
  order=4 (64KB) ← size=100KB ≥ 64KB, max=8 ≥ 4 → pool_alloc(order=4) ✓
  size_remaining = 36,864, max_order = 4

轮次5:
  order=8 → 跳过 (size 不够)
  order=4 (64KB) ← size=36KB < 64KB → 跳过
  order=0 (4KB) ← size=36KB ≥ 4KB, max=4 ≥ 0 → pool_alloc(order=0) ✓
  size_remaining = 32,768, max_order = 0

轮次6-14: 继续 order=0，每次 4KB
  ... 8 次 order=0 分配

结果 sg_table:
  sg[0]: order=8, 1MB    ─┐
  sg[1]: order=8, 1MB     │ 大块在前
  sg[2]: order=8, 1MB    ─┘
  sg[3]: order=4, 64KB
  sg[4-12]: order=0, 4KB × 9

共 13 个 sg entries，物理上散布在内存各处

2.4 Page Pool 交互

分配时先查 pool，释放时放回 pool：


分配路径:
  alloc_buffer_page(heap, buffer, order)
    └── ion_page_pool_alloc(pool)
          ├── pool 有缓存 → 取出 (O(1))
          └── pool 空 → alloc_pages(gfp, order)  // 走 buddy

释放路径:
  free_buffer_page(heap, buffer, page)
    ├── SHRINKER_FREE 标志？→ __free_pages()     // 直接还系统
    └── 否 → ion_page_pool_free(pool, page)       // 放回 pool

c
// ion_system_heap.c:56
static void free_buffer_page(struct ion_system_heap *heap,
                             struct ion_buffer *buffer, struct page *page)
{
    unsigned int order = compound_order(page);

    if (buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE) {
        __free_pages(page, order);    // shrinker 触发：必须真正释放
        return;
    }

    pool = heap->pools[order_to_index(order)];
    ion_page_pool_free(pool, page);   // 正常释放：放回 pool 缓存
}

2.5 释放流程

c
// ion_system_heap.c:152
static void ion_system_heap_free(struct ion_buffer *buffer)
{
    // 1. 清零（安全性）—— shrinker 路径跳过
    if (!(buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE))
        ion_heap_buffer_zero(buffer);

    // 2. 每个 sg entry 的 page 放回 pool 或直接释放
    for_each_sg(table->sgl, sg, table->nents, i)
        free_buffer_page(sys_heap, buffer, sg_page(sg));

    // 3. 释放 sg_table 元数据
    sg_free_table(table);
    kfree(table);
}

但用户不会直接调到这里。完整的释放链路：


close(dma_buf_fd)
  → dma_buf refcount = 0
    → ion_dma_buf_release()
      → _ion_buffer_destroy()
        │
        ├── DEFER_FREE (System Heap 默认开启)
        │     → ion_heap_freelist_add()    // 加入 freelist，立即返回
        │     → 后台 kthread 异步调用:
        │         ion_buffer_destroy()
        │           → ion_system_heap_free()  // zero + 放回 pool
        │
        └── 无 DEFER_FREE
              → ion_buffer_destroy()
                → ion_system_heap_free()     // 同步执行

2.6 Shrink 机制

c
// ion_system_heap.c:171
static int ion_system_heap_shrink(struct ion_heap *heap,
                                  gfp_t gfp_mask, int nr_to_scan)
{
    if (!nr_to_scan) only_scan = 1;   // 只统计

    for (i = 0; i < NUM_ORDERS; i++) {
        pool = sys_heap->pools[i];

        if (only_scan) {
            nr_total += ion_page_pool_shrink(pool, gfp_mask, 0);
        } else {
            nr_freed = ion_page_pool_shrink(pool, gfp_mask, nr_to_scan);
            nr_to_scan -= nr_freed;
            nr_total += nr_freed;
            if (nr_to_scan <= 0) break;  // 够了就停
        }
    }
    return nr_total;
}

按 order 从高到低回收：先回收 order=8（每释放一个就归还 256 页），效率最高。

2.7 System Contig Heap

同文件中还实现了 ION_HEAP_TYPE_SYSTEM_CONTIG，是 System Heap 的简化变体：

c
static int ion_system_contig_heap_allocate(heap, buffer, len, flags)
{
    int order = get_order(len);
    page = alloc_pages(low_order_gfp_flags | __GFP_NOWARN, order);
    split_page(page, order);

    // 释放 order 对齐多出的页面
    for (i = len >> PAGE_SHIFT; i < (1 << order); i++)
        __free_page(page + i);

    // sg_table 只有 1 个 entry
    sg_set_page(table->sgl, page, len, 0);
    buffer->sg_table = table;
}

对比	System Heap	System Contig Heap
物理连续	不保证	保证
Page Pool	有	无
Deferred Free	有	无
Shrinker	有	无
释放前清零	是	否
sg entries	多个	1 个
大 buffer	容易成功	容易失败（碎片化）

3. CMA Heap 详解

3.1 数据结构

c
// ion_cma_heap.c:19
struct ion_cma_heap {
    struct ion_heap heap;    // 嵌入基类
    struct cma *cma;         // 指向一个 CMA 预留区域
};

#define to_cma_heap(x) container_of(x, struct ion_cma_heap, heap)

CMA（Contiguous Memory Allocator）是内核的连续内存分配器，在启动时通过设备树或命令行参数预留一块物理连续区域。平时这块区域可以被 buddy allocator 用于可移动页面（movable pages），需要连续分配时再迁移走。

3.2 分配算法

c
// ion_cma_heap.c:27
static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
                            unsigned long len, unsigned long flags)
{
    struct ion_cma_heap *cma_heap = to_cma_heap(heap);
    unsigned long size = PAGE_ALIGN(len);
    unsigned long nr_pages = size >> PAGE_SHIFT;
    unsigned long align = get_order(size);

    // 对齐不超过 CMA 最大对齐限制
    if (align > CONFIG_CMA_ALIGNMENT)
        align = CONFIG_CMA_ALIGNMENT;

    // 从 CMA 区域分配连续物理页面
    pages = cma_alloc(cma_heap->cma, nr_pages, align, false);
    if (!pages) return -ENOMEM;

    // 清零（区分 highmem/lowmem）
    if (PageHighMem(pages)) {
        // highmem: 逐页 kmap_atomic → memset → kunmap_atomic
        while (nr_clear_pages > 0) {
            void *vaddr = kmap_atomic(page);
            memset(vaddr, 0, PAGE_SIZE);
            kunmap_atomic(vaddr);
            page++;
            nr_clear_pages--;
        }
    } else {
        // lowmem: 直接用 page_address 获取内核虚拟地址
        memset(page_address(pages), 0, size);
    }

    // sg_table 只有 1 个 entry（物理连续）
    sg_set_page(table->sgl, pages, size, 0);

    buffer->priv_virt = pages;     // 保存 page 指针，释放时用
    buffer->sg_table = table;
}

与 System Heap 的根本差异：


System Heap 分配 3MB:
  sg[0]: phys 0x8000_0000, 1MB  ──┐
  sg[1]: phys 0x9200_0000, 1MB    │ 物理上散布
  sg[2]: phys 0x8500_0000, 1MB  ──┘
  共 3 个 sg entries

CMA Heap 分配 3MB:
  sg[0]: phys 0xA000_0000, 3MB  ── 一整块连续
  共 1 个 sg entry

3.3 清零的 highmem/lowmem 分支

这是一个值得关注的实现细节：


32-bit ARM 内存布局:
┌─────────────────┐ 高地址
│   HIGHMEM       │ ← 无固定内核虚拟地址映射
│   (> 896MB)     │    必须 kmap_atomic 临时映射后才能访问
├─────────────────┤ 896MB 边界
│   LOWMEM        │ ← 有固定的内核虚拟地址映射
│   (0 ~ 896MB)   │    page_address(page) 直接返回虚拟地址
└─────────────────┘ 低地址

lowmem 页面：page_address() 直接得到内核虚拟地址，一次 memset 搞定
highmem 页面：必须 kmap_atomic() 建立临时映射，每次只能映射一页，所以逐页循环

在 64-bit 系统上所有物理内存都有内核直接映射，理论上不会走 highmem 分支。但代码保留了兼容性。

3.4 释放

c
// ion_cma_heap.c:83
static void ion_cma_free(struct ion_buffer *buffer)
{
    struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
    struct page *pages = buffer->priv_virt;
    unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;

    cma_release(cma_heap->cma, pages, nr_pages);   // 归还 CMA 区域
    sg_free_table(buffer->sg_table);
    kfree(buffer->sg_table);
}

没有 page pool，没有 deferred free，没有清零（分配时已清零）。cma_release 直接将页面归还给 CMA 管理器，buddy allocator 可以重新将其用作 movable pages。

3.5 Heap 注册：自动发现所有 CMA 区域

c
// ion_cma_heap.c:119
static int __ion_add_cma_heaps(struct cma *cma, void *data)
{
    heap = __ion_cma_heap_create(cma);
    heap->name = cma_get_name(cma);   // 使用 CMA 区域的名字
    ion_device_add_heap(heap);
    return 0;
}

static int ion_add_cma_heaps(void)
{
    cma_for_each_area(__ion_add_cma_heaps, NULL);  // 遍历所有 CMA 区域
    return 0;
}
device_initcall(ion_add_cma_heaps);

如果设备树中定义了多个 CMA 区域，ION 会为每个区域创建一个独立的 CMA Heap，各自有不同的 heap id 和 name：


设备树定义:
  reserved-memory {
      display_cma: cma@A0000000 { size = <0x4000000>; };   // 64MB
      camera_cma:  cma@B0000000 { size = <0x8000000>; };   // 128MB
  };

ION 注册结果:
  heap[2]: name="display_cma"  type=DMA  id=2
  heap[3]: name="camera_cma"   type=DMA  id=3

3.6 ops 对比

c
// System Heap ops
static struct ion_heap_ops system_heap_ops = {
    .allocate    = ion_system_heap_allocate,
    .free        = ion_system_heap_free,
    .map_kernel  = ion_heap_map_kernel,     // 通用实现
    .unmap_kernel = ion_heap_unmap_kernel,  // 通用实现
    .map_user    = ion_heap_map_user,       // 通用实现
    .shrink      = ion_system_heap_shrink,  // ← 有 shrink
};

// CMA Heap ops
static struct ion_heap_ops ion_cma_ops = {
    .allocate    = ion_cma_allocate,
    .free        = ion_cma_free,
    .map_user    = ion_heap_map_user,       // 通用实现
    .map_kernel  = ion_heap_map_kernel,     // 通用实现
    .unmap_kernel = ion_heap_unmap_kernel,  // 通用实现
    // 没有 .shrink — CMA 无缓存可回收
};

两者共用 ion_heap_map_kernel、ion_heap_unmap_kernel、ion_heap_map_user 这三个通用实现（定义在 ion_heap.c），因为这些操作只依赖 buffer->sg_table，不关心底层页面是怎么分配的。

4. 完整对比

维度	System Heap	CMA Heap
类型枚举	`ION_HEAP_TYPE_SYSTEM` (0)	`ION_HEAP_TYPE_DMA` (4)
内存来源	buddy allocator（全系统内存）	CMA 预留区
物理连续	不保证（scatter-gather）	保证
sg entries	多个（不同 order 的页面）	1 个
分配策略	多阶贪心 (order 8→4→0)	cma_alloc 一次性
GFP 策略	高阶快速失败 + 低阶兜底	不适用（CMA 自己管理）
Page Pool	有（3 个 pool，按 order）	无
Deferred Free	有（SCHED_IDLE kthread）	无
Shrinker	有（freelist + pool 可回收）	无（无缓存可回收）
释放前清零	是（ion_heap_buffer_zero）	否（分配时已清零）
分配时清零	是（GFP_ZERO）	是（手动 memset）
安全检查	size ≤ totalram/2	无（受限于 CMA 区域大小）
大 buffer 成功率	高（不要求连续）	取决于 CMA 区域碎片状态
分配速度	快（pool 命中时 O(1)）	慢（CMA 可能需要迁移页面）
适用设备	支持 SG-DMA 的设备	不支持 SG-DMA 的设备
heap 数量	固定 1 个	每个 CMA 区域 1 个
代码行数	282 行（含 Contig Heap）	138 行

5. 示例程序

以下用户态 C 程序模拟 System Heap 的多阶贪心分配算法和 Page Pool 缓存机制，帮助理解其核心设计而无需内核环境。

保存为 ion_heap_sim.c：

c
/*
 * ION System Heap 与 CMA Heap 分配策略用户态模拟
 *
 * 模拟内容：
 *   1. System Heap 的多阶贪心分配 (order 8/4/0)
 *   2. Page Pool 的缓存命中 / 未命中
 *   3. CMA Heap 的连续分配
 *   4. 两者的 sg_table 结构对比
 *
 * 编译: gcc -o ion_heap_sim ion_heap_sim.c -Wall
 * 运行: ./ion_heap_sim
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE       4096
#define ORDER_8_SIZE    (256 * PAGE_SIZE)   /* 1MB */
#define ORDER_4_SIZE    (16  * PAGE_SIZE)   /* 64KB */
#define ORDER_0_SIZE    (1   * PAGE_SIZE)   /* 4KB */

#define MAX_SG_ENTRIES  256
#define POOL_CAPACITY   8  /* 每个 pool 最多缓存 8 个 */

/* --- 模拟 Page Pool --- */
typedef struct {
    int order;
    int count;
    int capacity;
    int alloc_from_pool;   /* 统计：从 pool 命中的次数 */
    int alloc_from_buddy;  /* 统计：从 buddy 分配的次数 */
} PagePool;

static PagePool pools[3]; /* order 8, 4, 0 */

static void pool_init(PagePool *p, int order)
{
    p->order = order;
    p->count = 0;
    p->capacity = POOL_CAPACITY;
    p->alloc_from_pool = 0;
    p->alloc_from_buddy = 0;
}

static int pool_alloc(PagePool *p)
{
    if (p->count > 0) {
        p->count--;
        p->alloc_from_pool++;
        return 1;  /* pool 命中 */
    }
    p->alloc_from_buddy++;
    return 1;  /* 模拟 buddy 分配成功 */
}

static void pool_free(PagePool *p)
{
    if (p->count < p->capacity) {
        p->count++;  /* 放回 pool */
    }
    /* 超出容量则丢弃（模拟 __free_pages） */
}

/* --- 模拟 sg_table entry --- */
typedef struct {
    unsigned long size;
    int order;
} SgEntry;

/* --- System Heap 模拟 --- */
static int system_heap_allocate(unsigned long req_size,
                                SgEntry *sg, int *sg_count)
{
    static const unsigned int orders[] = {8, 4, 0};
    static const unsigned long order_sizes[] = {
        ORDER_8_SIZE, ORDER_4_SIZE, ORDER_0_SIZE
    };

    unsigned long size_remaining = req_size;
    unsigned int max_order = orders[0];
    int count = 0;

    /* 安全检查 */
    unsigned long total_ram_sim = 4UL * 1024 * 1024 * 1024; /* 模拟 4GB */
    if (req_size > total_ram_sim / 2) {
        printf("  [REJECTED] size %lu > totalram/2 (%lu)\n",
               req_size, total_ram_sim / 2);
        return -1;
    }

    printf("  System Heap: allocating %lu bytes (%.2f MB)\n",
           req_size, (double)req_size / (1024 * 1024));

    while (size_remaining > 0 && count < MAX_SG_ENTRIES) {
        int allocated = 0;

        for (int i = 0; i < 3; i++) {
            if (size_remaining < order_sizes[i])
                continue;
            if (max_order < orders[i])
                continue;

            /* 尝试从对应 pool 分配 */
            pool_alloc(&pools[i]);
            sg[count].size = order_sizes[i];
            sg[count].order = orders[i];
            count++;
            size_remaining -= order_sizes[i];
            max_order = orders[i];  /* 降序约束 */
            allocated = 1;
            break;
        }

        if (!allocated) {
            printf("  [ERROR] Cannot allocate remaining %lu bytes\n",
                   size_remaining);
            return -1;
        }
    }

    *sg_count = count;
    return 0;
}

/* --- CMA Heap 模拟 --- */
static int cma_heap_allocate(unsigned long req_size,
                             SgEntry *sg, int *sg_count)
{
    printf("  CMA Heap: allocating %lu bytes (%.2f MB) contiguous\n",
           req_size, (double)req_size / (1024 * 1024));

    /* CMA: 一次性分配，物理连续，1 个 sg entry */
    sg[0].size = req_size;
    sg[0].order = -1;  /* 不适用 */
    *sg_count = 1;

    printf("  CMA: cma_alloc(%lu pages, align=%d)\n",
           req_size / PAGE_SIZE,
           /* get_order 模拟 */ 0);

    return 0;
}

/* --- 打印 sg_table --- */
static void print_sg_table(SgEntry *sg, int count, const char *label)
{
    unsigned long total = 0;

    printf("\n  %s sg_table (%d entries):\n", label, count);
    printf("  %-8s %-12s %-8s\n", "entry", "size", "order");
    printf("  %-8s %-12s %-8s\n", "-----", "----", "-----");

    for (int i = 0; i < count && i < 20; i++) {
        if (sg[i].order >= 0)
            printf("  sg[%-3d] %-12lu order=%d\n",
                   i, sg[i].size, sg[i].order);
        else
            printf("  sg[%-3d] %-12lu (contiguous)\n",
                   i, sg[i].size);
        total += sg[i].size;
    }
    if (count > 20)
        printf("  ... (%d more entries)\n", count - 20);

    printf("  Total: %lu bytes (%.2f MB)\n", total,
           (double)total / (1024 * 1024));
}

/* --- 模拟释放 --- */
static void system_heap_free(SgEntry *sg, int count, int shrinker_free)
{
    printf("\n  System Heap free (%s):\n",
           shrinker_free ? "shrinker path" : "normal path");

    if (!shrinker_free)
        printf("    → ion_heap_buffer_zero() (清零 buffer)\n");

    for (int i = 0; i < count; i++) {
        int order = sg[i].order;
        int pool_idx = (order == 8) ? 0 : (order == 4) ? 1 : 2;

        if (shrinker_free) {
            printf("    sg[%d]: __free_pages(order=%d) → 归还系统\n",
                   i, order);
        } else {
            pool_free(&pools[pool_idx]);
            printf("    sg[%d]: pool_free(order=%d) → 放回 pool "
                   "(pool count=%d)\n", i, order, pools[pool_idx].count);
        }
    }
}

/* --- 打印 pool 统计 --- */
static void print_pool_stats(void)
{
    printf("\n  Page Pool Statistics:\n");
    printf("  %-10s %-8s %-14s %-14s\n",
           "order", "cached", "from_pool", "from_buddy");
    printf("  %-10s %-8s %-14s %-14s\n",
           "-----", "------", "---------", "----------");
    for (int i = 0; i < 3; i++) {
        int order = (i == 0) ? 8 : (i == 1) ? 4 : 0;
        printf("  order=%-3d  %-8d %-14d %-14d\n",
               order, pools[i].count,
               pools[i].alloc_from_pool,
               pools[i].alloc_from_buddy);
    }
}

int main(void)
{
    SgEntry sg[MAX_SG_ENTRIES];
    int sg_count;

    /* 初始化 pools */
    pool_init(&pools[0], 8);
    pool_init(&pools[1], 4);
    pool_init(&pools[2], 0);

    printf("====================================\n");
    printf("ION Heap Allocation Simulator\n");
    printf("====================================\n");

    /* --- 场景 1: System Heap 分配 3.1MB --- */
    printf("\n[Scenario 1] System Heap allocate 3.1MB\n");
    if (system_heap_allocate(3 * 1024 * 1024 + 100 * 1024,
                             sg, &sg_count) == 0) {
        print_sg_table(sg, sg_count, "System Heap");

        /* 释放 (正常路径 → 放回 pool) */
        system_heap_free(sg, sg_count, 0);
    }

    print_pool_stats();

    /* --- 场景 2: 再次分配 → pool 命中 --- */
    printf("\n[Scenario 2] System Heap allocate 2MB (should hit pool)\n");
    if (system_heap_allocate(2 * 1024 * 1024, sg, &sg_count) == 0) {
        print_sg_table(sg, sg_count, "System Heap");
    }

    print_pool_stats();

    /* --- 场景 3: CMA Heap 分配同样大小 --- */
    printf("\n[Scenario 3] CMA Heap allocate 3.1MB (contiguous)\n");
    if (cma_heap_allocate(3 * 1024 * 1024 + 100 * 1024,
                          sg, &sg_count) == 0) {
        print_sg_table(sg, sg_count, "CMA Heap");
    }

    /* --- 场景 4: Shrinker 释放 --- */
    printf("\n[Scenario 4] System Heap free via shrinker\n");
    SgEntry shrink_sg[2] = {{ORDER_8_SIZE, 8}, {ORDER_4_SIZE, 4}};
    system_heap_free(shrink_sg, 2, 1);

    printf("\n====================================\n");
    printf("Simulation Complete\n");
    printf("====================================\n");

    return 0;
}

编译和运行：

bash
$ gcc -o ion_heap_sim ion_heap_sim.c -Wall
$ ./ion_heap_sim

预期输出：


====================================
ION Heap Allocation Simulator
====================================

[Scenario 1] System Heap allocate 3.1MB
  System Heap: allocating 3272704 bytes (3.12 MB)

  System Heap sg_table (13 entries):
  entry    size         order
  -----    ----         -----
  sg[0  ] 1048576      order=8
  sg[1  ] 1048576      order=8
  sg[2  ] 1048576      order=8
  sg[3  ] 65536        order=4
  sg[4  ] 4096         order=0
  sg[5  ] 4096         order=0
  sg[6  ] 4096         order=0
  sg[7  ] 4096         order=0
  sg[8  ] 4096         order=0
  sg[9  ] 4096         order=0
  sg[10 ] 4096         order=0
  sg[11 ] 4096         order=0
  sg[12 ] 4096         order=0
  Total: 3272704 bytes (3.12 MB)

  System Heap free (normal path):
    → ion_heap_buffer_zero() (清零 buffer)
    sg[0]: pool_free(order=8) → 放回 pool (pool count=1)
    sg[1]: pool_free(order=8) → 放回 pool (pool count=2)
    sg[2]: pool_free(order=8) → 放回 pool (pool count=3)
    sg[3]: pool_free(order=4) → 放回 pool (pool count=1)
    sg[4]: pool_free(order=0) → 放回 pool (pool count=1)
    sg[5]: pool_free(order=0) → 放回 pool (pool count=2)
    sg[6]: pool_free(order=0) → 放回 pool (pool count=3)
    sg[7]: pool_free(order=0) → 放回 pool (pool count=4)
    sg[8]: pool_free(order=0) → 放回 pool (pool count=5)
    sg[9]: pool_free(order=0) → 放回 pool (pool count=6)
    sg[10]: pool_free(order=0) → 放回 pool (pool count=7)
    sg[11]: pool_free(order=0) → 放回 pool (pool count=8)
    sg[12]: pool_free(order=0) → 放回 pool (pool count=8)

  Page Pool Statistics:
  order      cached   from_pool      from_buddy
  -----      ------   ---------      ----------
  order=8    3        0              3
  order=4    1        0              1
  order=0    8        0              9

[Scenario 2] System Heap allocate 2MB (should hit pool)
  System Heap: allocating 2097152 bytes (2.00 MB)

  System Heap sg_table (2 entries):
  entry    size         order
  -----    ----         -----
  sg[0  ] 1048576      order=8
  sg[1  ] 1048576      order=8
  Total: 2097152 bytes (2.00 MB)

  Page Pool Statistics:
  order      cached   from_pool      from_buddy
  -----      ------   ---------      ----------
  order=8    1        2              3
  order=4    1        0              1
  order=0    8        0              9

[Scenario 3] CMA Heap allocate 3.1MB (contiguous)
  CMA Heap: allocating 3272704 bytes (3.12 MB) contiguous
  CMA: cma_alloc(799 pages, align=0)

  CMA Heap sg_table (1 entries):
  entry    size         order
  -----    ----         -----
  sg[0  ] 3272704      (contiguous)
  Total: 3272704 bytes (3.12 MB)

[Scenario 4] System Heap free via shrinker
  System Heap free (shrinker path):
    sg[0]: __free_pages(order=8) → 归还系统
    sg[1]: __free_pages(order=4) → 归还系统

====================================
Simulation Complete
====================================

关键观察点：

场景 1→2：第一次分配 3 个 order=8 页面全部走 buddy（from_buddy=3），释放后放回 pool（cached=3）。第二次分配 2MB 时两个 order=8 直接从 pool 命中（from_pool=2），pool 降到 1
场景 3：CMA Heap 同样的 3.1MB 只产生 1 个 sg entry（连续），而 System Heap 产生了 13 个
场景 4：shrinker 路径直接 __free_pages 归还系统，不经过 pool

总结

System Heap 是 Android 上最常用的 ION 堆，使用多阶贪心算法（order 8→4→0）从 buddy allocator 分配散布的物理页面，通过 sg_table 描述，适合支持 scatter-gather DMA 的设备
CMA Heap 从预留的 CMA 区域分配物理连续内存，sg_table 只有 1 个 entry，适合不支持 SG-DMA 的简单设备，但分配速度慢且受限于预留区大小
Page Pool 是 System Heap 的性能核心，每个 order 维护独立缓存池，释放的页面放回 pool 而非还给系统，下次分配 O(1) 命中，避免反复进出 buddy allocator
GFP 标志分层策略是 System Heap 可靠性的关键：高阶分配快速失败不等待（__GFP_NORETRY & ~__GFP_RECLAIM），低阶 4KB 允许内存回收以保证兜底成功
Deferred Free 和 Shrinker 只存在于 System Heap，CMA Heap 无缓存，无需这些机制
ION_PRIV_FLAG_SHRINKER_FREE 区分两条释放路径：正常释放放回 pool 加速复用，shrinker 释放跳过 pool 直接归还系统，保证内存压力时真正回收物理页面
两者共用 map_kernel/map_user 通用实现，因为映射操作只依赖 sg_table，不关心底层页面来源，体现了 ION 分层解耦的设计
System Contig Heap 是 System Heap 的简化变体，保证物理连续但无 pool/deferred-free/shrinker，用于简单场景

参考资料

Linux Kernel v5.4.123 源码
- drivers/staging/android/ion/ion_system_heap.c (377 行)
- drivers/staging/android/ion/ion_cma_heap.c (138 行)
- drivers/staging/android/ion/ion_page_pool.c (155 行)
- drivers/staging/android/ion/ion_heap.c (315 行)
Linux CMA 文档：Documentation/admin-guide/cma.rst
Linux Buddy Allocator：mm/page_alloc.c
GFP 标志参考：include/linux/gfp.h

目录