2.6.11版本基数树源码解析

文章深入源码解析了 2.6.11 版本的基数树实现。

初始化

1
2
3
4
5
6
7
8
init/main.c

asmlinkage void __init start_kernel(void)
{
...;
radix_tree_init();
...;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
lib/radix-tree.c

/*
* 在 kernel 中 RADIX_TREE_MAP_SHIFT 的值为 6,意味着将 index 看作是每 6 位一组,
* 每一组对应每一层高的 offset,譬如当前树高为 3,要往这棵树中插入 index 65。那么可以将
* 65 写成 0,000001,000001,所以 index 65 height 1 的 offset 为 0,height 2 的
* offset 为 1,height 3 的 offset 为 1
*/
#ifdef __KERNEL__
#define RADIX_TREE_MAP_SHIFT 6
#else
#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
#endif

/*
* RADIX_TREE_TAGS 为 2 是因为当前版本中只有两个 TAG,这两个 TAG 定义在 include/linux/fs.h 中:
* #define PAGECACHE_TAG_DIRTY 0
* #define PAGECACHE_TAG_WRITEBACK 1
*/
#define RADIX_TREE_TAGS 2

/*
* RADIX_TREE_MAP_SIZE 定义了一个 radix_tree_node 有几个 slot,由于 RADIX_TREE_MAP_SHIFT 为 6,
* 也就是说一个 radix_tree_node 最多可以有 64(1UL << RADIX_TREE_MAP_SHIFT) 个slot。
*/
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
/*
* RADIX_TREE_MAP_MASK 作用是防止 offset 溢出,在求 offset 时,只要按位与上 RADIX_TREE_MAP_MASK,所求得的
* offset 肯定是在 [0,RADIX_TREE_MAP_SIZE) 范围内的
*/
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)

/*
* 上面讲到 RADIX_TREE_MAP_SIZE 定义了一个 radix_tree_node 有几个 slot,每个 slot 都有单独的 TAG 位,
* RADIX_TREE_TAG_LONGS 定义了一个 radix_tree_node 的 slot 位图需要几个 1ong 类型才可以表示
*/
#define RADIX_TREE_TAG_LONGS \
((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)

/*
* count:radix_tree_node 的计数,在从基数树中删除一个 index 时,会检查删除该 index 之后 count 是不是为 0,
* 如果为 0,则释放该 radix_tree_node。
* slots:一个 RADIX_TREE_MAP_SIZE 大小的数组,元素类型为 void*,其真实类型可以是 index 关联的 key 的真实类型,
* 也可以是 struct radix_tree_node*。
* tags:struct radix_tree_node 的 TAG 位图。
*/
struct radix_tree_node {
unsigned int count;
void *slots[RADIX_TREE_MAP_SIZE];
unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
};

/*
* struct radix_tree_path 用于 radix_tree_tag_clear 和 radix_tree_delete 函数,作用是
* 记录 index 自 root 到叶子所经过的路径,而且需要查看 radix_tree_node 中各个 slot 的 TAG 情况,所以就
* 需要 struct radix_tree_node *node; 和 int offset; 这两个成员了。offset 用于记录 slot 的位置。
*/
struct radix_tree_path {
struct radix_tree_node *node, **slot;
int offset;
};

/*
* 因为一颗基数树的 index 是以 unsigned long 类型表示的,
* 所以这里求下 unsigned long 类型占用的比特数,就是一个 unsigned 1ong 类型数据有多少位
* 用于防止 index 溢出。这里我们假设是 ILP32 数据模型,所以 RADIX_TREE_INDEX_BITS 为 32
*/
#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
/*
* RADIX_TREE_MAX_PATH 表示基数树查找一个 index 最多经过这么多条路径。按理来说只要
* RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 1 就好,这里为什么 +2 呢?
* 因为在下面的实现中基数树中有节点的话,它的树高至少为 1。树高为 0 的最大 index 是 0,相当于哨兵的作用。
* 按照前面的假设,RADIX_TREE_MAX_PATH 值为 32/6 + 2 == 7
*/
#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)

/*
* height_to_maxindex 数组存放基数树树高对应的最大 index
*/
static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH];

/*
* Radix tree node cache.
*/
static kmem_cache_t *radix_tree_node_cachep;

/*
* Per-cpu pool of preloaded nodes
*/
/*
* 使用 struct radix_tree_preload 结构体定义了一个 percpu 变量 radix_tree_preloads
* 一般在调用 radix_tree_insert 函数前会先调用 radix_tree_preload 函数填充 percpu 变量,
* 存储 RADIX_TREE_MAX_PATH 个 struct radix_tree_node。
* nr:缓存了几个 struct radix_tree_node
* nodes: 存放分配的 struct radix_tree_node
* TODO:为什么要设置这么一个 percpu 变量呢?缓和没有设置 __GFP_WAIT(不能睡眠) 分配失败?收益怎么样呢?
*/
struct radix_tree_preload {
int nr;
struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
};
/*
* 这里有个初始化,将 radix_tree_preloads 的 nr 和 nodes 都初始化为0
*/
DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };

void __init radix_tree_init(void)
{
/*
* radix_tree_node_cachep 用于分配 struct radix_tree_node,
* 这里的 kmem_cache_create 带了 SLAB_PANIC 标记,表示如果 kmem_cache_create
* 返回失败,则 panic。还有一点就是 radix_tree_node_ctor 构造函数了,这个函数具体见下面
*/
radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
sizeof(struct radix_tree_node), 0,
SLAB_PANIC, radix_tree_node_ctor, NULL);

/*
* 初始化 height_to_maxindex 数组
*/
radix_tree_init_maxindex();

/*
* 注册 cpu 热插拔回调函数,这是因为基数树中用了一个 percpu 变量 radix_tree_preloads
* 来缓解内存分配失败的情况。不过这个热插拔回调不是做 radix_tree_preloads 的填充,而是
* 要在 cpu 挂掉之后释放之前缓存的内存。
* TODO:如果 cpu offline 时不释放,cpu online 之后可不可以继续使用这部分内存呢?
*/
hotcpu_notifier(radix_tree_callback, 0);
}

/*
* 从 radix_tree_node_cachep 中分配 struct radix_tree_node 自动调用的构造函数,
* 将 struct radix_tree_node 清空
*/
static void
radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
{
memset(node, 0, sizeof(struct radix_tree_node));
}

/*
* 初始化 height_to_maxindex 数组
*/
static __init void radix_tree_init_maxindex(void)
{
unsigned int i;

for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
height_to_maxindex[i] = __maxindex(i);
}

/*
* 参数是 height,返回值是 height 对应的最大 index
* 假设 height 为 3,那么最大的 index 是 111111,111111,111111
* 所以一个求法是 ~0UL >> (RADIX_TREE_INDEX_BITS - height * RADIX_TREE_MAP_SHIFT),
* 但直接这么做是有陷阱的,因为当 height 为0时,上面就相当于是 ~0UL >> 32了,
* 这个在 K&R Appendix-A.7.8 中写道:移位运算符...如果右操作数为负值,或者大于或等于左操作数
* 类型的位数,则结果没有定义。
* 所以 __maxindex 中使用了这样的技法:
* (~0UL >> (RADIX_TREE_INDEX_BITS - height * RADIX_TREE_MAP_SHIFT - 1)) >> 1
*/
static __init unsigned long __maxindex(unsigned int height)
{
unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;

/*
* 按照上面的解释,当 RADIX_TREE_INDEX_BITS - tmp - 1 >= 32 或者
* RADIX_TREE_INDEX_BITS - tmp - 1 < 0 时,index 的值是没有定义的。
* 也就是 tmp <= -1 或 tmp > 31。tmp <= -1 是不可能的,但是 tmp > 31 是有可能的,
* 而且 tmp > 31 时,也就是 tmp >= 32 时,index 是 ~9UL
*/
if (tmp >= RADIX_TREE_INDEX_BITS)
index = ~0UL;
return index;
}

/*
* cpu 热插拔函数
* CPU_DEAD 时将缓存的 node 释放
*/
#ifdef CONFIG_HOTPLUG_CPU
static int radix_tree_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
int cpu = (long)hcpu;
struct radix_tree_preload *rtp;

/* Free per-cpu pool of perloaded nodes */
if (action == CPU_DEAD) {
rtp = &per_cpu(radix_tree_preloads, cpu);
while (rtp->nr) {
kmem_cache_free(radix_tree_node_cachep,
rtp->nodes[rtp->nr-1]);
/*
* 释放之后将 node 置为 NULL
*/
rtp->nodes[rtp->nr-1] = NULL;
rtp->nr--;
}
}
return NOTIFY_OK;
}
#endif /* CONFIG_HOTPLUG_CPU */

API

基本的增删查操作:

1
2
3
插入:int radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item)
删除:void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
查找:void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)

TAG 的基本操作:

1
2
3
插入:void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, int tag)
删除:void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, int tag)
测试树中有没有 tagged 的:int radix_tree_tagged(struct radix_tree_root *root, int tag)

批量查找操作,为了更方便使用:

1
2
3
4
5
6
7
8
通过 index 查找:
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items)
通过 index + tag 查找:
unsigned int
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items, int tag)

radix_tree_insert

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/**
* radix_tree_insert - insert into a radix tree
* @root: radix tree root
* @index: index key
* @item: item to insert
*
* Insert an item into the radix tree at position @index.
*/
/*
* 往基数树 root 中插入 index,值为 item。
* 返回值:成功返回 0,失败返回错误码,如 -EEXIST。
*/
int radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node = NULL, *tmp, **slot;
unsigned int height, shift;
int offset;
int error;

/* Make sure the tree is high enough. */
/*
* 这里是看当前的树高能不能存下传进来的 index,有两种情况:
* 1. index 为 0 时,为什么要对 0 特殊处理呢?因为 radix_tree_maxindex(0) 是 0,
* 这使得 index 为 0 不能和其他 index 一样处理
* 2. 其他 index
*/
if ((!index && !root->rnode) ||
index > radix_tree_maxindex(root->height)) {
/*
* 调用 radix_tree_extend 扩展基数树,会更新 root->height 和 root->rnode
*/
error = radix_tree_extend(root, index);
if (error)
return error;
}

/*
* 自上而下找到 index 对应的 slot,并将 item 插入
* 所以 slot 为 &root->rnode,height 为 root->height
* shift:height 高度对应的偏移就是 (height-1) * RADIX_TREE_MAP_SHIFT;
*/
slot = &root->rnode;
height = root->height;
shift = (height-1) * RADIX_TREE_MAP_SHIFT;

/*
* 编译器警告消除
*/
offset = 0; /* uninitialised var warning */
while (height > 0) {
/*
* 还能跑进来就意味着 height 还不为 0, slot 是当前遍历 height,index 的父/祖先,
* 如果此时为 NULL,即是这个 index 是第一个这个 slot 的孩子/孙子,所以要新分配一个 node
*/
if (*slot == NULL) {
/* Have to add a child node. */
if (!(tmp = radix_tree_node_alloc(root)))
return -ENOMEM;
/*
* 将 slot 指向新分配的 node
*/
*slot = tmp;
/*
* node 是 *slot 的父节点,如果 node 存在(只要这时的 *slot 不是根节点,node 就存在),
* 因为增加了一个子节点,所以 count 要加 1
*/
if (node)
node->count++;
}

/* Go a level down */
/*
* 求出 index 在当前 height 的 offset
*/
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
/*
* node 是当前节点的子节点,下一次循环的 *slot 的父节点
*/
node = *slot;
/*
* 更新 slot, shift, height
*/
slot = (struct radix_tree_node **)(node->slots + offset);
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}

/*
* index 对应的 slot,如果里面已经存放了东西,就表示 index 已经存在了,返回 -EEXIST
*/
if (*slot != NULL)
return -EEXIST;
/*
* 如果 index 还不存在,node 是 index 对应的 slot 所在的 node,也就是 *slot 的父节点
* 因为即将要将 item 放到 slot,后面不会再有错误,所以 node 的计数要加 1
*/
if (node) {
node->count++;
/*
* 检查 tag 是否是没有设置的状态
*/
BUG_ON(tag_get(node, 0, offset));
BUG_ON(tag_get(node, 1, offset));
}

/*
* 更新 *slot
*/
*slot = item;
return 0;
}
EXPORT_SYMBOL(radix_tree_insert);

/*
* Extend a radix tree so it can store key @index.
*/
/*
* 扩展基数树使其可以存下 index
*/
static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
{
struct radix_tree_node *node;
unsigned int height;
char tags[RADIX_TREE_TAGS];
int tag;

/* Figure out what the height should be. */
/*
* 计算 height 需要多大才可以容纳 index,因为原本 root->height 容纳不了,
* 才会跑到这个函数,所以 height 至少是 root->height + 1
*/
height = root->height + 1;
while (index > radix_tree_maxindex(height))
height++;

/*
* 如果 root->rnode 为 NULL,表示这棵树没有节点,所以不需要额外处理,
* 直接将 root->height 置为 height 就可以
*/
if (root->rnode == NULL) {
root->height = height;
goto out;
}

/*
* Prepare the tag status of the top-level node for propagation
* into the newly-pushed top-level node(s)
*/
/*
* 基数树中的 tag 是具有传播性质的,从下往上传播,即是如果子节点有这个 tag,
* 那么其父节点肯定也会有这个 tag。所以在增加高度之前,判断下这个新增的 node 的 slots[0]
* 需不需要设置某个 tag,为什么是 slots[0] 呢?因为所有旧节点的父/祖先节点肯定是新增节点的
* slots[0]。
*/
/*
* 遍历所有 tag
*/
for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
int idx;

tags[tag] = 0;
/*
* 遍历 root->rnode 的所有 slots 的 tag
*/
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
/*
* 只要其中一个设置了这个 tag,就设置 tags[tag] = 1
*/
if (root->rnode->tags[tag][idx]) {
tags[tag] = 1;
break;
}
}
}

/*
* 增加新节点,增加 root->height
*/
do {
/*
* 分配 radix_tree_node
*/
if (!(node = radix_tree_node_alloc(root)))
return -ENOMEM;

/* Increase the height. */
/*
* 旧节点是新节点的 slots[0] 的子节点
*/
node->slots[0] = root->rnode;

/* Propagate the aggregated tag info into the new root */
/*
* 根据上面的 tags[tag] 检查是否要给新节点的 slots[0] 设置 tag
*/
for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
if (tags[tag])
tag_set(node, tag, 0);
}

/*
* 因为 slots[0] 有值了,所以 count 要置为 1
*/
node->count = 1;
/*
* 新的 root->rnode
*/
root->rnode = node;
/*
* 更新 root->height
*/
root->height++;
} while (height > root->height);
out:
return 0;
}

/*
* This assumes that the caller has performed appropriate preallocation, and
* that the caller has pinned this thread of control to the current CPU.
*/
/*
* 分配 radix_tree_node,成功返回 分配的地址,失败返回 NULL
*/
static struct radix_tree_node *
radix_tree_node_alloc(struct radix_tree_root *root)
{
struct radix_tree_node *ret;

/*
* 先通过 radix_tree_node_cachep 分配
*/
ret = kmem_cache_alloc(radix_tree_node_cachep, root->gfp_mask);
/*
* kmem_cache_alloc 分配失败,__GFP_WAIT 和慢速分配有关,如果没有设置 __GFP_WAIT,
* 则不会进入同步回收分配内存,设置了 __GFP_WAIT 标志可能会睡眠,这在某些场景下是不允许的。
* 这时候会使用全局的 radix_tree_preloads 分配内存
*/
if (ret == NULL && !(root->gfp_mask & __GFP_WAIT)) {
struct radix_tree_preload *rtp;

rtp = &__get_cpu_var(radix_tree_preloads);
/*
* 查看是否有 preload 的内存
*/
if (rtp->nr) {
ret = rtp->nodes[rtp->nr - 1];
rtp->nodes[rtp->nr - 1] = NULL;
rtp->nr--;
}
}
return ret;
}

radix_tree_delete

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/**
* radix_tree_delete - delete an item from a radix tree
* @root: radix tree root
* @index: index key
*
* Remove the item at @index from the radix tree rooted at @root.
*
* Returns the address of the deleted item, or NULL if it was not present.
*/
/*
* 从基数树 root 中删除 index,如果 index 存在,返回里面的 item,否则返回 NULL
*/
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
{
struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
struct radix_tree_path *orig_pathp;
unsigned int height, shift;
void *ret = NULL;
char tags[RADIX_TREE_TAGS];
int nr_cleared_tags;

/*
* index 已经超出这棵树的最大 index,返回 NULL
*/
height = root->height;
if (index > radix_tree_maxindex(height))
goto out;

/*
* shift 是 height 对应的偏移
*/
shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
/*
* slot 保存的是下一层 node 的地址或者 index 对应的 item,
* node 是当前层 node,
* 所以一开始 node 为 NULL, slot 为 &root->rnode
*/
pathp->node = NULL;
pathp->slot = &root->rnode;

/*
* 从高到低遍历
*/
while (height > 0) {
int offset;

/*
* 如果这个 slot 里面没有东西(下一层 node 的地址或 item)
*/
if (*pathp->slot == NULL)
goto out;

/*
* 这里的写法有点奇怪,用的是 pathp[1],下面 pathp++
*/
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
pathp[1].offset = offset;
pathp[1].node = *pathp[0].slot;
pathp[1].slot = (struct radix_tree_node **)
(pathp[1].node->slots + offset);
pathp++;
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}

/*
* 注意这里的 pathp 经过上面的循环之后已经是最后一层了
* 如果这个 slot 里面没东西,就表示这个 index 不存在
*/
ret = *pathp[0].slot;
if (ret == NULL)
goto out;

/*
* 保存倒序的 pathp
*/
orig_pathp = pathp;

/*
* Clear all tags associated with the just-deleted item
*/
/*
* 清除这个路径上的 tag。
* 因为删除了这个 index 之后,如果没有其他 index 也设置了这个 tag,
* 这条路径的 tag 也需要被清掉
*/
memset(tags, 0, sizeof(tags));
do {
int tag;

nr_cleared_tags = RADIX_TREE_TAGS;
for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
int idx;

/*
* 这条路径还有 slot 设置了这个 tag,所以不需要做清除操作
*/
if (tags[tag])
continue;

tag_clear(pathp[0].node, tag, pathp[0].offset);

/*
* 查看这个 node 是否还有 slot 设置了这个 tag
*/
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
if (pathp[0].node->tags[tag][idx]) {
tags[tag] = 1;
nr_cleared_tags--;
break;
}
}
}
pathp--;
} while (pathp[0].node && nr_cleared_tags);

pathp = orig_pathp;
/*
* 清除 item
*/
*pathp[0].slot = NULL;
/*
* 将 node 计数减 1,如果计数为 0 就释放这个 node。释放了这个 node 之后
* 上一层的 node 计数可能也变为了 0,也需要释放。
*/
while (pathp[0].node && --pathp[0].node->count == 0) {
pathp--;
BUG_ON(*pathp[0].slot == NULL);
*pathp[0].slot = NULL;
radix_tree_node_free(pathp[1].node);
}
/*
* 由于基数树的插入特点:插入由于高度不够时,增加树高的方法是将子树的根节点作为新的根节点的子节点。
* 所以只要 root->rnode 没有被删除,树高就不需要改变
*/
if (root->rnode == NULL)
root->height = 0;
out:
return ret;
}
EXPORT_SYMBOL(radix_tree_delete);

radix_tree_lookup

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/**
* radix_tree_lookup - perform lookup operation on a radix tree
* @root: radix tree root
* @index: index key
*
* Lookup the item at the position @index in the radix tree @root.
*/
/*
* 查找 index,index 不存在返回 NULL,存在返回 item
* 这个函数基本就是 radix_tree_delete 的前面那部分实现
*/
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
{
unsigned int height, shift;
struct radix_tree_node **slot;

/*
* index 超出树表示的范围,肯定不存在,返回 NULL
*/
height = root->height;
if (index > radix_tree_maxindex(height))
return NULL;

shift = (height-1) * RADIX_TREE_MAP_SHIFT;
slot = &root->rnode;

/*
* 和 radix_tree_delete 一样
*/
while (height > 0) {
if (*slot == NULL)
return NULL;

/*
* 这里的 (*slot) 是 radix_tree_delete 的 pathp[1].node
*/
slot = (struct radix_tree_node **)
((*slot)->slots +
((index >> shift) & RADIX_TREE_MAP_MASK));
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}

return *slot;
}
EXPORT_SYMBOL(radix_tree_lookup);

radix_tree_tag_set

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
/**
* radix_tree_tag_set - set a tag on a radix tree node
* @root: radix tree root
* @index: index key
* @tag: tag index
*
* Set the search tag corresponging to @index in the radix tree. From
* the root all the way down to the leaf node.
*
* Returns the address of the tagged item. Setting a tag on a not-present
* item is a bug.
*/
/*
* 这个函数和 radix_tree_lookup 几乎一模一样,只不过多了一个 tag_set 调用
* index 存在返回 item,不存在返回 NULL
*/
void *radix_tree_tag_set(struct radix_tree_root *root,
unsigned long index, int tag)
{
unsigned int height, shift;
struct radix_tree_node **slot;

height = root->height;
if (index > radix_tree_maxindex(height))
return NULL;

shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
slot = &root->rnode;

while (height > 0) {
int offset;

offset = (index >> shift) & RADIX_TREE_MAP_MASK;
tag_set(*slot, tag, offset);
slot = (struct radix_tree_node **)((*slot)->slots + offset);
BUG_ON(*slot == NULL);
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}

return *slot;
}
EXPORT_SYMBOL(radix_tree_tag_set);

radix_tree_tag_clear

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/**
* radix_tree_tag_clear - clear a tag on a radix tree node
* @root: radix tree root
* @index: index key
* @tag: tag index
*
* Clear the search tag corresponging to @index in the radix tree. If
* this causes the leaf node to have no tags set then clear the tag in the
* next-to-leaf node, etc.
*
* Returns the address of the tagged item on success, else NULL. ie:
* has the same return value and semantics as radix_tree_lookup().
*/
/*
* 这个函数和 readix_tree_delete 几乎一模一样
* 返回 item
*/
void *radix_tree_tag_clear(struct radix_tree_root *root,
unsigned long index, int tag)
{
struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
unsigned int height, shift;
void *ret = NULL;

height = root->height;
if (index > radix_tree_maxindex(height))
goto out;

shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
pathp->node = NULL;
pathp->slot = &root->rnode;

while (height > 0) {
int offset;

if (*pathp->slot == NULL)
goto out;

offset = (index >> shift) & RADIX_TREE_MAP_MASK;
pathp[1].offset = offset;
pathp[1].node = *pathp[0].slot;
pathp[1].slot = (struct radix_tree_node **)
(pathp[1].node->slots + offset);
pathp++;
shift -= RADIX_TREE_MAP_SHIFT;
height--;
}

ret = *pathp[0].slot;
if (ret == NULL)
goto out;

/*
* 这里和 radix_tree_delete 稍有不同是因为删除需要判断两个 tag,这里只清除一个 tag
* 所以只对一个 tag 做处理就行
*/
do {
int idx;

tag_clear(pathp[0].node, tag, pathp[0].offset);
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
if (pathp[0].node->tags[tag][idx])
goto out;
}
pathp--;
} while (pathp[0].node);
out:
return ret;
}
EXPORT_SYMBOL(radix_tree_tag_clear);

radix_tree_tagged

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/**
* radix_tree_tagged - test whether any items in the tree are tagged
* @root: radix tree root
* @tag: tag to test
*/
/*
* 简单明了的函数
*/
int radix_tree_tagged(struct radix_tree_root *root, int tag)
{
int idx;

if (!root->rnode)
return 0;
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
if (root->rnode->tags[tag][idx])
return 1;
}
return 0;
}
EXPORT_SYMBOL(radix_tree_tagged);

radix_tree_gang_lookup

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/**
* radix_tree_gang_lookup - perform multiple lookup on a radix tree
* @root: radix tree root
* @results: where the results of the lookup are placed
* @first_index: start the lookup from this key
* @max_items: place up to this many items at *results
*
* Performs an index-ascending scan of the tree for present items. Places
* them at *@results and returns the number of items which were placed at
* *@results.
*
* The implementation is naive.
*/
/*
* 批量查找,results 存放查找到的 item,first_index 是开始查找的 index,
* max_items 表示最多查找这么多个
* 返回值是最终找到多少个
*/
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items)
{
const unsigned long max_index = radix_tree_maxindex(root->height);
unsigned long cur_index = first_index;
unsigned int ret = 0;

while (ret < max_items) {
unsigned int nr_found;
unsigned long next_index; /* Index of next search */

/*
* 当下一个要开始查找的 index 比树表示的 index 还要大时,表示已经查找结束
*/
if (cur_index > max_index)
break;
/*
* 详情看下面
*/
nr_found = __lookup(root, results + ret, cur_index,
max_items - ret, &next_index);
ret += nr_found;
/*
* next_index 为 0 表示已经遍历完整棵树了,不会重头开始遍历
*/
if (next_index == 0)
break;
cur_index = next_index;
}
return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup);

static unsigned int
__lookup(struct radix_tree_root *root, void **results, unsigned long index,
unsigned int max_items, unsigned long *next_index)
{
unsigned int nr_found = 0;
unsigned int shift;
unsigned int height = root->height;
struct radix_tree_node *slot;

shift = (height-1) * RADIX_TREE_MAP_SHIFT;
slot = root->rnode;

while (height > 0) {
unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;

/*
* 这个 for 循环是查找的重点,当当前 index 对应的 slot 找不到元素时,
* 当前 height 的偏移要加 1,而更底层的 offset 要从 0 开始。
* 譬如 height 为 3 的树,传入的 index 为 3,4,5。假设 3 找不到 node,
* 那么就要更新 index 为 4,0,0。假设 3 找得到,4找不到,那么 index
* 就要更新为 3,5,0
*/
for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
if (slot->slots[i] != NULL)
break;
index &= ~((1UL << shift) - 1);
index += 1UL << shift;
/*
* 当 index 为 0,表示基数树已经不能再继续往下走了
*/
if (index == 0)
goto out; /* 32-bit wraparound */
}
if (i == RADIX_TREE_MAP_SIZE)
goto out;
height--;
if (height == 0) { /* Bottom level: grab some items */
unsigned long j = index & RADIX_TREE_MAP_MASK;

for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
index++;
if (slot->slots[j]) {
results[nr_found++] = slot->slots[j];
if (nr_found == max_items)
goto out;
}
}
}
shift -= RADIX_TREE_MAP_SHIFT;
slot = slot->slots[i];
}
out:
*next_index = index;
return nr_found;
}

radix_tree_gang_lookup_tag

这个函数和 radix_tree_gang_lookup 几乎一模一样,就不再解析了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/**
* radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
* based on a tag
* @root: radix tree root
* @results: where the results of the lookup are placed
* @first_index: start the lookup from this key
* @max_items: place up to this many items at *results
* @tag: the tag index
*
* Performs an index-ascending scan of the tree for present items which
* have the tag indexed by @tag set. Places the items at *@results and
* returns the number of items which were placed at *@results.
*/
unsigned int
radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items, int tag)
{
const unsigned long max_index = radix_tree_maxindex(root->height);
unsigned long cur_index = first_index;
unsigned int ret = 0;

while (ret < max_items) {
unsigned int nr_found;
unsigned long next_index; /* Index of next search */

if (cur_index > max_index)
break;
nr_found = __lookup_tag(root, results + ret, cur_index,
max_items - ret, &next_index, tag);
ret += nr_found;
if (next_index == 0)
break;
cur_index = next_index;
}
return ret;
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag);

/*
* FIXME: the two tag_get()s here should use find_next_bit() instead of
* open-coding the search.
*/
static unsigned int
__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
unsigned int max_items, unsigned long *next_index, int tag)
{
unsigned int nr_found = 0;
unsigned int shift;
unsigned int height = root->height;
struct radix_tree_node *slot;

shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
slot = root->rnode;

while (height > 0) {
unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;

for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
if (tag_get(slot, tag, i)) {
BUG_ON(slot->slots[i] == NULL);
break;
}
index &= ~((1UL << shift) - 1);
index += 1UL << shift;
if (index == 0)
goto out; /* 32-bit wraparound */
}
if (i == RADIX_TREE_MAP_SIZE)
goto out;
height--;
if (height == 0) { /* Bottom level: grab some items */
unsigned long j = index & RADIX_TREE_MAP_MASK;

for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
index++;
if (tag_get(slot, tag, j)) {
BUG_ON(slot->slots[j] == NULL);
results[nr_found++] = slot->slots[j];
if (nr_found == max_items)
goto out;
}
}
}
shift -= RADIX_TREE_MAP_SHIFT;
slot = slot->slots[i];
}
out:
*next_index = index;
return nr_found;
}