Commit 1dcab0875b113a148b6601d87b4e0e3444440339
Committed by
Greg Kroah-Hartman
1 parent
c5f5c4db39
Exists in
master
and in
6 other branches
Staging: zcache: signedness bug in tmem_get()
"ret" needs to be signed for the error handling to work properly. Signed-off-by: Dan Carpenter <error27@gmail.com> Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Showing 1 changed file with 1 additions and 1 deletions Inline Diff
drivers/staging/zcache/tmem.c
1 | /* | 1 | /* |
2 | * In-kernel transcendent memory (generic implementation) | 2 | * In-kernel transcendent memory (generic implementation) |
3 | * | 3 | * |
4 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. | 4 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. |
5 | * | 5 | * |
6 | * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented | 6 | * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented |
7 | * "handles" (triples containing a pool id, and object id, and an index), to | 7 | * "handles" (triples containing a pool id, and object id, and an index), to |
8 | * pages in a page-accessible memory (PAM). Tmem references the PAM pages via | 8 | * pages in a page-accessible memory (PAM). Tmem references the PAM pages via |
9 | * an abstract "pampd" (PAM page-descriptor), which can be operated on by a | 9 | * an abstract "pampd" (PAM page-descriptor), which can be operated on by a |
10 | * set of functions (pamops). Each pampd contains some representation of | 10 | * set of functions (pamops). Each pampd contains some representation of |
11 | * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of | 11 | * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of |
12 | * pages and must be able to insert, find, and delete these pages at a | 12 | * pages and must be able to insert, find, and delete these pages at a |
13 | * potential frequency of thousands per second concurrently across many CPUs, | 13 | * potential frequency of thousands per second concurrently across many CPUs, |
14 | * (and, if used with KVM, across many vcpus across many guests). | 14 | * (and, if used with KVM, across many vcpus across many guests). |
15 | * Tmem is tracked with a hierarchy of data structures, organized by | 15 | * Tmem is tracked with a hierarchy of data structures, organized by |
16 | * the elements in a handle-tuple: pool_id, object_id, and page index. | 16 | * the elements in a handle-tuple: pool_id, object_id, and page index. |
17 | * One or more "clients" (e.g. guests) each provide one or more tmem_pools. | 17 | * One or more "clients" (e.g. guests) each provide one or more tmem_pools. |
18 | * Each pool, contains a hash table of rb_trees of tmem_objs. Each | 18 | * Each pool, contains a hash table of rb_trees of tmem_objs. Each |
19 | * tmem_obj contains a radix-tree-like tree of pointers, with intermediate | 19 | * tmem_obj contains a radix-tree-like tree of pointers, with intermediate |
20 | * nodes called tmem_objnodes. Each leaf pointer in this tree points to | 20 | * nodes called tmem_objnodes. Each leaf pointer in this tree points to |
21 | * a pampd, which is accessible only through a small set of callbacks | 21 | * a pampd, which is accessible only through a small set of callbacks |
22 | * registered by the PAM implementation (see tmem_register_pamops). Tmem | 22 | * registered by the PAM implementation (see tmem_register_pamops). Tmem |
23 | * does all memory allocation via a set of callbacks registered by the tmem | 23 | * does all memory allocation via a set of callbacks registered by the tmem |
24 | * host implementation (e.g. see tmem_register_hostops). | 24 | * host implementation (e.g. see tmem_register_hostops). |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/list.h> | 27 | #include <linux/list.h> |
28 | #include <linux/spinlock.h> | 28 | #include <linux/spinlock.h> |
29 | #include <linux/atomic.h> | 29 | #include <linux/atomic.h> |
30 | 30 | ||
31 | #include "tmem.h" | 31 | #include "tmem.h" |
32 | 32 | ||
33 | /* data structure sentinels used for debugging... see tmem.h */ | 33 | /* data structure sentinels used for debugging... see tmem.h */ |
34 | #define POOL_SENTINEL 0x87658765 | 34 | #define POOL_SENTINEL 0x87658765 |
35 | #define OBJ_SENTINEL 0x12345678 | 35 | #define OBJ_SENTINEL 0x12345678 |
36 | #define OBJNODE_SENTINEL 0xfedcba09 | 36 | #define OBJNODE_SENTINEL 0xfedcba09 |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * A tmem host implementation must use this function to register callbacks | 39 | * A tmem host implementation must use this function to register callbacks |
40 | * for memory allocation. | 40 | * for memory allocation. |
41 | */ | 41 | */ |
42 | static struct tmem_hostops tmem_hostops; | 42 | static struct tmem_hostops tmem_hostops; |
43 | 43 | ||
44 | static void tmem_objnode_tree_init(void); | 44 | static void tmem_objnode_tree_init(void); |
45 | 45 | ||
46 | void tmem_register_hostops(struct tmem_hostops *m) | 46 | void tmem_register_hostops(struct tmem_hostops *m) |
47 | { | 47 | { |
48 | tmem_objnode_tree_init(); | 48 | tmem_objnode_tree_init(); |
49 | tmem_hostops = *m; | 49 | tmem_hostops = *m; |
50 | } | 50 | } |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * A tmem host implementation must use this function to register | 53 | * A tmem host implementation must use this function to register |
54 | * callbacks for a page-accessible memory (PAM) implementation | 54 | * callbacks for a page-accessible memory (PAM) implementation |
55 | */ | 55 | */ |
56 | static struct tmem_pamops tmem_pamops; | 56 | static struct tmem_pamops tmem_pamops; |
57 | 57 | ||
58 | void tmem_register_pamops(struct tmem_pamops *m) | 58 | void tmem_register_pamops(struct tmem_pamops *m) |
59 | { | 59 | { |
60 | tmem_pamops = *m; | 60 | tmem_pamops = *m; |
61 | } | 61 | } |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Oid's are potentially very sparse and tmem_objs may have an indeterminately | 64 | * Oid's are potentially very sparse and tmem_objs may have an indeterminately |
65 | * short life, being added and deleted at a relatively high frequency. | 65 | * short life, being added and deleted at a relatively high frequency. |
66 | * So an rb_tree is an ideal data structure to manage tmem_objs. But because | 66 | * So an rb_tree is an ideal data structure to manage tmem_objs. But because |
67 | * of the potentially huge number of tmem_objs, each pool manages a hashtable | 67 | * of the potentially huge number of tmem_objs, each pool manages a hashtable |
68 | * of rb_trees to reduce search, insert, delete, and rebalancing time. | 68 | * of rb_trees to reduce search, insert, delete, and rebalancing time. |
69 | * Each hashbucket also has a lock to manage concurrent access. | 69 | * Each hashbucket also has a lock to manage concurrent access. |
70 | * | 70 | * |
71 | * The following routines manage tmem_objs. When any tmem_obj is accessed, | 71 | * The following routines manage tmem_objs. When any tmem_obj is accessed, |
72 | * the hashbucket lock must be held. | 72 | * the hashbucket lock must be held. |
73 | */ | 73 | */ |
74 | 74 | ||
75 | /* searches for object==oid in pool, returns locked object if found */ | 75 | /* searches for object==oid in pool, returns locked object if found */ |
76 | static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb, | 76 | static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb, |
77 | struct tmem_oid *oidp) | 77 | struct tmem_oid *oidp) |
78 | { | 78 | { |
79 | struct rb_node *rbnode; | 79 | struct rb_node *rbnode; |
80 | struct tmem_obj *obj; | 80 | struct tmem_obj *obj; |
81 | 81 | ||
82 | rbnode = hb->obj_rb_root.rb_node; | 82 | rbnode = hb->obj_rb_root.rb_node; |
83 | while (rbnode) { | 83 | while (rbnode) { |
84 | BUG_ON(RB_EMPTY_NODE(rbnode)); | 84 | BUG_ON(RB_EMPTY_NODE(rbnode)); |
85 | obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); | 85 | obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); |
86 | switch (tmem_oid_compare(oidp, &obj->oid)) { | 86 | switch (tmem_oid_compare(oidp, &obj->oid)) { |
87 | case 0: /* equal */ | 87 | case 0: /* equal */ |
88 | goto out; | 88 | goto out; |
89 | case -1: | 89 | case -1: |
90 | rbnode = rbnode->rb_left; | 90 | rbnode = rbnode->rb_left; |
91 | break; | 91 | break; |
92 | case 1: | 92 | case 1: |
93 | rbnode = rbnode->rb_right; | 93 | rbnode = rbnode->rb_right; |
94 | break; | 94 | break; |
95 | } | 95 | } |
96 | } | 96 | } |
97 | obj = NULL; | 97 | obj = NULL; |
98 | out: | 98 | out: |
99 | return obj; | 99 | return obj; |
100 | } | 100 | } |
101 | 101 | ||
102 | static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *); | 102 | static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *); |
103 | 103 | ||
104 | /* free an object that has no more pampds in it */ | 104 | /* free an object that has no more pampds in it */ |
105 | static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb) | 105 | static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb) |
106 | { | 106 | { |
107 | struct tmem_pool *pool; | 107 | struct tmem_pool *pool; |
108 | 108 | ||
109 | BUG_ON(obj == NULL); | 109 | BUG_ON(obj == NULL); |
110 | ASSERT_SENTINEL(obj, OBJ); | 110 | ASSERT_SENTINEL(obj, OBJ); |
111 | BUG_ON(obj->pampd_count > 0); | 111 | BUG_ON(obj->pampd_count > 0); |
112 | pool = obj->pool; | 112 | pool = obj->pool; |
113 | BUG_ON(pool == NULL); | 113 | BUG_ON(pool == NULL); |
114 | if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */ | 114 | if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */ |
115 | tmem_pampd_destroy_all_in_obj(obj); | 115 | tmem_pampd_destroy_all_in_obj(obj); |
116 | BUG_ON(obj->objnode_tree_root != NULL); | 116 | BUG_ON(obj->objnode_tree_root != NULL); |
117 | BUG_ON((long)obj->objnode_count != 0); | 117 | BUG_ON((long)obj->objnode_count != 0); |
118 | atomic_dec(&pool->obj_count); | 118 | atomic_dec(&pool->obj_count); |
119 | BUG_ON(atomic_read(&pool->obj_count) < 0); | 119 | BUG_ON(atomic_read(&pool->obj_count) < 0); |
120 | INVERT_SENTINEL(obj, OBJ); | 120 | INVERT_SENTINEL(obj, OBJ); |
121 | obj->pool = NULL; | 121 | obj->pool = NULL; |
122 | tmem_oid_set_invalid(&obj->oid); | 122 | tmem_oid_set_invalid(&obj->oid); |
123 | rb_erase(&obj->rb_tree_node, &hb->obj_rb_root); | 123 | rb_erase(&obj->rb_tree_node, &hb->obj_rb_root); |
124 | } | 124 | } |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * initialize, and insert an tmem_object_root (called only if find failed) | 127 | * initialize, and insert an tmem_object_root (called only if find failed) |
128 | */ | 128 | */ |
129 | static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, | 129 | static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, |
130 | struct tmem_pool *pool, | 130 | struct tmem_pool *pool, |
131 | struct tmem_oid *oidp) | 131 | struct tmem_oid *oidp) |
132 | { | 132 | { |
133 | struct rb_root *root = &hb->obj_rb_root; | 133 | struct rb_root *root = &hb->obj_rb_root; |
134 | struct rb_node **new = &(root->rb_node), *parent = NULL; | 134 | struct rb_node **new = &(root->rb_node), *parent = NULL; |
135 | struct tmem_obj *this; | 135 | struct tmem_obj *this; |
136 | 136 | ||
137 | BUG_ON(pool == NULL); | 137 | BUG_ON(pool == NULL); |
138 | atomic_inc(&pool->obj_count); | 138 | atomic_inc(&pool->obj_count); |
139 | obj->objnode_tree_height = 0; | 139 | obj->objnode_tree_height = 0; |
140 | obj->objnode_tree_root = NULL; | 140 | obj->objnode_tree_root = NULL; |
141 | obj->pool = pool; | 141 | obj->pool = pool; |
142 | obj->oid = *oidp; | 142 | obj->oid = *oidp; |
143 | obj->objnode_count = 0; | 143 | obj->objnode_count = 0; |
144 | obj->pampd_count = 0; | 144 | obj->pampd_count = 0; |
145 | (*tmem_pamops.new_obj)(obj); | 145 | (*tmem_pamops.new_obj)(obj); |
146 | SET_SENTINEL(obj, OBJ); | 146 | SET_SENTINEL(obj, OBJ); |
147 | while (*new) { | 147 | while (*new) { |
148 | BUG_ON(RB_EMPTY_NODE(*new)); | 148 | BUG_ON(RB_EMPTY_NODE(*new)); |
149 | this = rb_entry(*new, struct tmem_obj, rb_tree_node); | 149 | this = rb_entry(*new, struct tmem_obj, rb_tree_node); |
150 | parent = *new; | 150 | parent = *new; |
151 | switch (tmem_oid_compare(oidp, &this->oid)) { | 151 | switch (tmem_oid_compare(oidp, &this->oid)) { |
152 | case 0: | 152 | case 0: |
153 | BUG(); /* already present; should never happen! */ | 153 | BUG(); /* already present; should never happen! */ |
154 | break; | 154 | break; |
155 | case -1: | 155 | case -1: |
156 | new = &(*new)->rb_left; | 156 | new = &(*new)->rb_left; |
157 | break; | 157 | break; |
158 | case 1: | 158 | case 1: |
159 | new = &(*new)->rb_right; | 159 | new = &(*new)->rb_right; |
160 | break; | 160 | break; |
161 | } | 161 | } |
162 | } | 162 | } |
163 | rb_link_node(&obj->rb_tree_node, parent, new); | 163 | rb_link_node(&obj->rb_tree_node, parent, new); |
164 | rb_insert_color(&obj->rb_tree_node, root); | 164 | rb_insert_color(&obj->rb_tree_node, root); |
165 | } | 165 | } |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * Tmem is managed as a set of tmem_pools with certain attributes, such as | 168 | * Tmem is managed as a set of tmem_pools with certain attributes, such as |
169 | * "ephemeral" vs "persistent". These attributes apply to all tmem_objs | 169 | * "ephemeral" vs "persistent". These attributes apply to all tmem_objs |
170 | * and all pampds that belong to a tmem_pool. A tmem_pool is created | 170 | * and all pampds that belong to a tmem_pool. A tmem_pool is created |
171 | * or deleted relatively rarely (for example, when a filesystem is | 171 | * or deleted relatively rarely (for example, when a filesystem is |
172 | * mounted or unmounted. | 172 | * mounted or unmounted. |
173 | */ | 173 | */ |
174 | 174 | ||
175 | /* flush all data from a pool and, optionally, free it */ | 175 | /* flush all data from a pool and, optionally, free it */ |
176 | static void tmem_pool_flush(struct tmem_pool *pool, bool destroy) | 176 | static void tmem_pool_flush(struct tmem_pool *pool, bool destroy) |
177 | { | 177 | { |
178 | struct rb_node *rbnode; | 178 | struct rb_node *rbnode; |
179 | struct tmem_obj *obj; | 179 | struct tmem_obj *obj; |
180 | struct tmem_hashbucket *hb = &pool->hashbucket[0]; | 180 | struct tmem_hashbucket *hb = &pool->hashbucket[0]; |
181 | int i; | 181 | int i; |
182 | 182 | ||
183 | BUG_ON(pool == NULL); | 183 | BUG_ON(pool == NULL); |
184 | for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { | 184 | for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { |
185 | spin_lock(&hb->lock); | 185 | spin_lock(&hb->lock); |
186 | rbnode = rb_first(&hb->obj_rb_root); | 186 | rbnode = rb_first(&hb->obj_rb_root); |
187 | while (rbnode != NULL) { | 187 | while (rbnode != NULL) { |
188 | obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); | 188 | obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); |
189 | rbnode = rb_next(rbnode); | 189 | rbnode = rb_next(rbnode); |
190 | tmem_pampd_destroy_all_in_obj(obj); | 190 | tmem_pampd_destroy_all_in_obj(obj); |
191 | tmem_obj_free(obj, hb); | 191 | tmem_obj_free(obj, hb); |
192 | (*tmem_hostops.obj_free)(obj, pool); | 192 | (*tmem_hostops.obj_free)(obj, pool); |
193 | } | 193 | } |
194 | spin_unlock(&hb->lock); | 194 | spin_unlock(&hb->lock); |
195 | } | 195 | } |
196 | if (destroy) | 196 | if (destroy) |
197 | list_del(&pool->pool_list); | 197 | list_del(&pool->pool_list); |
198 | } | 198 | } |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * A tmem_obj contains a radix-tree-like tree in which the intermediate | 201 | * A tmem_obj contains a radix-tree-like tree in which the intermediate |
202 | * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation | 202 | * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation |
203 | * is very specialized and tuned for specific uses and is not particularly | 203 | * is very specialized and tuned for specific uses and is not particularly |
204 | * suited for use from this code, though some code from the core algorithms has | 204 | * suited for use from this code, though some code from the core algorithms has |
205 | * been reused, thus the copyright notices below). Each tmem_objnode contains | 205 | * been reused, thus the copyright notices below). Each tmem_objnode contains |
206 | * a set of pointers which point to either a set of intermediate tmem_objnodes | 206 | * a set of pointers which point to either a set of intermediate tmem_objnodes |
207 | * or a set of of pampds. | 207 | * or a set of of pampds. |
208 | * | 208 | * |
209 | * Portions Copyright (C) 2001 Momchil Velikov | 209 | * Portions Copyright (C) 2001 Momchil Velikov |
210 | * Portions Copyright (C) 2001 Christoph Hellwig | 210 | * Portions Copyright (C) 2001 Christoph Hellwig |
211 | * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> | 211 | * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> |
212 | */ | 212 | */ |
213 | 213 | ||
214 | struct tmem_objnode_tree_path { | 214 | struct tmem_objnode_tree_path { |
215 | struct tmem_objnode *objnode; | 215 | struct tmem_objnode *objnode; |
216 | int offset; | 216 | int offset; |
217 | }; | 217 | }; |
218 | 218 | ||
219 | /* objnode height_to_maxindex translation */ | 219 | /* objnode height_to_maxindex translation */ |
220 | static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1]; | 220 | static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1]; |
221 | 221 | ||
222 | static void tmem_objnode_tree_init(void) | 222 | static void tmem_objnode_tree_init(void) |
223 | { | 223 | { |
224 | unsigned int ht, tmp; | 224 | unsigned int ht, tmp; |
225 | 225 | ||
226 | for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) { | 226 | for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) { |
227 | tmp = ht * OBJNODE_TREE_MAP_SHIFT; | 227 | tmp = ht * OBJNODE_TREE_MAP_SHIFT; |
228 | if (tmp >= OBJNODE_TREE_INDEX_BITS) | 228 | if (tmp >= OBJNODE_TREE_INDEX_BITS) |
229 | tmem_objnode_tree_h2max[ht] = ~0UL; | 229 | tmem_objnode_tree_h2max[ht] = ~0UL; |
230 | else | 230 | else |
231 | tmem_objnode_tree_h2max[ht] = | 231 | tmem_objnode_tree_h2max[ht] = |
232 | (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1; | 232 | (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1; |
233 | } | 233 | } |
234 | } | 234 | } |
235 | 235 | ||
236 | static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj) | 236 | static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj) |
237 | { | 237 | { |
238 | struct tmem_objnode *objnode; | 238 | struct tmem_objnode *objnode; |
239 | 239 | ||
240 | ASSERT_SENTINEL(obj, OBJ); | 240 | ASSERT_SENTINEL(obj, OBJ); |
241 | BUG_ON(obj->pool == NULL); | 241 | BUG_ON(obj->pool == NULL); |
242 | ASSERT_SENTINEL(obj->pool, POOL); | 242 | ASSERT_SENTINEL(obj->pool, POOL); |
243 | objnode = (*tmem_hostops.objnode_alloc)(obj->pool); | 243 | objnode = (*tmem_hostops.objnode_alloc)(obj->pool); |
244 | if (unlikely(objnode == NULL)) | 244 | if (unlikely(objnode == NULL)) |
245 | goto out; | 245 | goto out; |
246 | objnode->obj = obj; | 246 | objnode->obj = obj; |
247 | SET_SENTINEL(objnode, OBJNODE); | 247 | SET_SENTINEL(objnode, OBJNODE); |
248 | memset(&objnode->slots, 0, sizeof(objnode->slots)); | 248 | memset(&objnode->slots, 0, sizeof(objnode->slots)); |
249 | objnode->slots_in_use = 0; | 249 | objnode->slots_in_use = 0; |
250 | obj->objnode_count++; | 250 | obj->objnode_count++; |
251 | out: | 251 | out: |
252 | return objnode; | 252 | return objnode; |
253 | } | 253 | } |
254 | 254 | ||
255 | static void tmem_objnode_free(struct tmem_objnode *objnode) | 255 | static void tmem_objnode_free(struct tmem_objnode *objnode) |
256 | { | 256 | { |
257 | struct tmem_pool *pool; | 257 | struct tmem_pool *pool; |
258 | int i; | 258 | int i; |
259 | 259 | ||
260 | BUG_ON(objnode == NULL); | 260 | BUG_ON(objnode == NULL); |
261 | for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) | 261 | for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) |
262 | BUG_ON(objnode->slots[i] != NULL); | 262 | BUG_ON(objnode->slots[i] != NULL); |
263 | ASSERT_SENTINEL(objnode, OBJNODE); | 263 | ASSERT_SENTINEL(objnode, OBJNODE); |
264 | INVERT_SENTINEL(objnode, OBJNODE); | 264 | INVERT_SENTINEL(objnode, OBJNODE); |
265 | BUG_ON(objnode->obj == NULL); | 265 | BUG_ON(objnode->obj == NULL); |
266 | ASSERT_SENTINEL(objnode->obj, OBJ); | 266 | ASSERT_SENTINEL(objnode->obj, OBJ); |
267 | pool = objnode->obj->pool; | 267 | pool = objnode->obj->pool; |
268 | BUG_ON(pool == NULL); | 268 | BUG_ON(pool == NULL); |
269 | ASSERT_SENTINEL(pool, POOL); | 269 | ASSERT_SENTINEL(pool, POOL); |
270 | objnode->obj->objnode_count--; | 270 | objnode->obj->objnode_count--; |
271 | objnode->obj = NULL; | 271 | objnode->obj = NULL; |
272 | (*tmem_hostops.objnode_free)(objnode, pool); | 272 | (*tmem_hostops.objnode_free)(objnode, pool); |
273 | } | 273 | } |
274 | 274 | ||
275 | /* | 275 | /* |
276 | * lookup index in object and return associated pampd (or NULL if not found) | 276 | * lookup index in object and return associated pampd (or NULL if not found) |
277 | */ | 277 | */ |
278 | static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) | 278 | static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) |
279 | { | 279 | { |
280 | unsigned int height, shift; | 280 | unsigned int height, shift; |
281 | struct tmem_objnode **slot = NULL; | 281 | struct tmem_objnode **slot = NULL; |
282 | 282 | ||
283 | BUG_ON(obj == NULL); | 283 | BUG_ON(obj == NULL); |
284 | ASSERT_SENTINEL(obj, OBJ); | 284 | ASSERT_SENTINEL(obj, OBJ); |
285 | BUG_ON(obj->pool == NULL); | 285 | BUG_ON(obj->pool == NULL); |
286 | ASSERT_SENTINEL(obj->pool, POOL); | 286 | ASSERT_SENTINEL(obj->pool, POOL); |
287 | 287 | ||
288 | height = obj->objnode_tree_height; | 288 | height = obj->objnode_tree_height; |
289 | if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) | 289 | if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) |
290 | goto out; | 290 | goto out; |
291 | if (height == 0 && obj->objnode_tree_root) { | 291 | if (height == 0 && obj->objnode_tree_root) { |
292 | slot = &obj->objnode_tree_root; | 292 | slot = &obj->objnode_tree_root; |
293 | goto out; | 293 | goto out; |
294 | } | 294 | } |
295 | shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; | 295 | shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; |
296 | slot = &obj->objnode_tree_root; | 296 | slot = &obj->objnode_tree_root; |
297 | while (height > 0) { | 297 | while (height > 0) { |
298 | if (*slot == NULL) | 298 | if (*slot == NULL) |
299 | goto out; | 299 | goto out; |
300 | slot = (struct tmem_objnode **) | 300 | slot = (struct tmem_objnode **) |
301 | ((*slot)->slots + | 301 | ((*slot)->slots + |
302 | ((index >> shift) & OBJNODE_TREE_MAP_MASK)); | 302 | ((index >> shift) & OBJNODE_TREE_MAP_MASK)); |
303 | shift -= OBJNODE_TREE_MAP_SHIFT; | 303 | shift -= OBJNODE_TREE_MAP_SHIFT; |
304 | height--; | 304 | height--; |
305 | } | 305 | } |
306 | out: | 306 | out: |
307 | return slot != NULL ? (void **)slot : NULL; | 307 | return slot != NULL ? (void **)slot : NULL; |
308 | } | 308 | } |
309 | 309 | ||
310 | static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) | 310 | static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) |
311 | { | 311 | { |
312 | struct tmem_objnode **slot; | 312 | struct tmem_objnode **slot; |
313 | 313 | ||
314 | slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); | 314 | slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); |
315 | return slot != NULL ? *slot : NULL; | 315 | return slot != NULL ? *slot : NULL; |
316 | } | 316 | } |
317 | 317 | ||
318 | static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index, | 318 | static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index, |
319 | void *new_pampd) | 319 | void *new_pampd) |
320 | { | 320 | { |
321 | struct tmem_objnode **slot; | 321 | struct tmem_objnode **slot; |
322 | void *ret = NULL; | 322 | void *ret = NULL; |
323 | 323 | ||
324 | slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); | 324 | slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); |
325 | if ((slot != NULL) && (*slot != NULL)) { | 325 | if ((slot != NULL) && (*slot != NULL)) { |
326 | void *old_pampd = *(void **)slot; | 326 | void *old_pampd = *(void **)slot; |
327 | *(void **)slot = new_pampd; | 327 | *(void **)slot = new_pampd; |
328 | (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0); | 328 | (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0); |
329 | ret = new_pampd; | 329 | ret = new_pampd; |
330 | } | 330 | } |
331 | return ret; | 331 | return ret; |
332 | } | 332 | } |
333 | 333 | ||
334 | static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, | 334 | static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, |
335 | void *pampd) | 335 | void *pampd) |
336 | { | 336 | { |
337 | int ret = 0; | 337 | int ret = 0; |
338 | struct tmem_objnode *objnode = NULL, *newnode, *slot; | 338 | struct tmem_objnode *objnode = NULL, *newnode, *slot; |
339 | unsigned int height, shift; | 339 | unsigned int height, shift; |
340 | int offset = 0; | 340 | int offset = 0; |
341 | 341 | ||
342 | /* if necessary, extend the tree to be higher */ | 342 | /* if necessary, extend the tree to be higher */ |
343 | if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) { | 343 | if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) { |
344 | height = obj->objnode_tree_height + 1; | 344 | height = obj->objnode_tree_height + 1; |
345 | if (index > tmem_objnode_tree_h2max[height]) | 345 | if (index > tmem_objnode_tree_h2max[height]) |
346 | while (index > tmem_objnode_tree_h2max[height]) | 346 | while (index > tmem_objnode_tree_h2max[height]) |
347 | height++; | 347 | height++; |
348 | if (obj->objnode_tree_root == NULL) { | 348 | if (obj->objnode_tree_root == NULL) { |
349 | obj->objnode_tree_height = height; | 349 | obj->objnode_tree_height = height; |
350 | goto insert; | 350 | goto insert; |
351 | } | 351 | } |
352 | do { | 352 | do { |
353 | newnode = tmem_objnode_alloc(obj); | 353 | newnode = tmem_objnode_alloc(obj); |
354 | if (!newnode) { | 354 | if (!newnode) { |
355 | ret = -ENOMEM; | 355 | ret = -ENOMEM; |
356 | goto out; | 356 | goto out; |
357 | } | 357 | } |
358 | newnode->slots[0] = obj->objnode_tree_root; | 358 | newnode->slots[0] = obj->objnode_tree_root; |
359 | newnode->slots_in_use = 1; | 359 | newnode->slots_in_use = 1; |
360 | obj->objnode_tree_root = newnode; | 360 | obj->objnode_tree_root = newnode; |
361 | obj->objnode_tree_height++; | 361 | obj->objnode_tree_height++; |
362 | } while (height > obj->objnode_tree_height); | 362 | } while (height > obj->objnode_tree_height); |
363 | } | 363 | } |
364 | insert: | 364 | insert: |
365 | slot = obj->objnode_tree_root; | 365 | slot = obj->objnode_tree_root; |
366 | height = obj->objnode_tree_height; | 366 | height = obj->objnode_tree_height; |
367 | shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; | 367 | shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; |
368 | while (height > 0) { | 368 | while (height > 0) { |
369 | if (slot == NULL) { | 369 | if (slot == NULL) { |
370 | /* add a child objnode. */ | 370 | /* add a child objnode. */ |
371 | slot = tmem_objnode_alloc(obj); | 371 | slot = tmem_objnode_alloc(obj); |
372 | if (!slot) { | 372 | if (!slot) { |
373 | ret = -ENOMEM; | 373 | ret = -ENOMEM; |
374 | goto out; | 374 | goto out; |
375 | } | 375 | } |
376 | if (objnode) { | 376 | if (objnode) { |
377 | 377 | ||
378 | objnode->slots[offset] = slot; | 378 | objnode->slots[offset] = slot; |
379 | objnode->slots_in_use++; | 379 | objnode->slots_in_use++; |
380 | } else | 380 | } else |
381 | obj->objnode_tree_root = slot; | 381 | obj->objnode_tree_root = slot; |
382 | } | 382 | } |
383 | /* go down a level */ | 383 | /* go down a level */ |
384 | offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; | 384 | offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; |
385 | objnode = slot; | 385 | objnode = slot; |
386 | slot = objnode->slots[offset]; | 386 | slot = objnode->slots[offset]; |
387 | shift -= OBJNODE_TREE_MAP_SHIFT; | 387 | shift -= OBJNODE_TREE_MAP_SHIFT; |
388 | height--; | 388 | height--; |
389 | } | 389 | } |
390 | BUG_ON(slot != NULL); | 390 | BUG_ON(slot != NULL); |
391 | if (objnode) { | 391 | if (objnode) { |
392 | objnode->slots_in_use++; | 392 | objnode->slots_in_use++; |
393 | objnode->slots[offset] = pampd; | 393 | objnode->slots[offset] = pampd; |
394 | } else | 394 | } else |
395 | obj->objnode_tree_root = pampd; | 395 | obj->objnode_tree_root = pampd; |
396 | obj->pampd_count++; | 396 | obj->pampd_count++; |
397 | out: | 397 | out: |
398 | return ret; | 398 | return ret; |
399 | } | 399 | } |
400 | 400 | ||
401 | static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index) | 401 | static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index) |
402 | { | 402 | { |
403 | struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1]; | 403 | struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1]; |
404 | struct tmem_objnode_tree_path *pathp = path; | 404 | struct tmem_objnode_tree_path *pathp = path; |
405 | struct tmem_objnode *slot = NULL; | 405 | struct tmem_objnode *slot = NULL; |
406 | unsigned int height, shift; | 406 | unsigned int height, shift; |
407 | int offset; | 407 | int offset; |
408 | 408 | ||
409 | BUG_ON(obj == NULL); | 409 | BUG_ON(obj == NULL); |
410 | ASSERT_SENTINEL(obj, OBJ); | 410 | ASSERT_SENTINEL(obj, OBJ); |
411 | BUG_ON(obj->pool == NULL); | 411 | BUG_ON(obj->pool == NULL); |
412 | ASSERT_SENTINEL(obj->pool, POOL); | 412 | ASSERT_SENTINEL(obj->pool, POOL); |
413 | height = obj->objnode_tree_height; | 413 | height = obj->objnode_tree_height; |
414 | if (index > tmem_objnode_tree_h2max[height]) | 414 | if (index > tmem_objnode_tree_h2max[height]) |
415 | goto out; | 415 | goto out; |
416 | slot = obj->objnode_tree_root; | 416 | slot = obj->objnode_tree_root; |
417 | if (height == 0 && obj->objnode_tree_root) { | 417 | if (height == 0 && obj->objnode_tree_root) { |
418 | obj->objnode_tree_root = NULL; | 418 | obj->objnode_tree_root = NULL; |
419 | goto out; | 419 | goto out; |
420 | } | 420 | } |
421 | shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT; | 421 | shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT; |
422 | pathp->objnode = NULL; | 422 | pathp->objnode = NULL; |
423 | do { | 423 | do { |
424 | if (slot == NULL) | 424 | if (slot == NULL) |
425 | goto out; | 425 | goto out; |
426 | pathp++; | 426 | pathp++; |
427 | offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; | 427 | offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; |
428 | pathp->offset = offset; | 428 | pathp->offset = offset; |
429 | pathp->objnode = slot; | 429 | pathp->objnode = slot; |
430 | slot = slot->slots[offset]; | 430 | slot = slot->slots[offset]; |
431 | shift -= OBJNODE_TREE_MAP_SHIFT; | 431 | shift -= OBJNODE_TREE_MAP_SHIFT; |
432 | height--; | 432 | height--; |
433 | } while (height > 0); | 433 | } while (height > 0); |
434 | if (slot == NULL) | 434 | if (slot == NULL) |
435 | goto out; | 435 | goto out; |
436 | while (pathp->objnode) { | 436 | while (pathp->objnode) { |
437 | pathp->objnode->slots[pathp->offset] = NULL; | 437 | pathp->objnode->slots[pathp->offset] = NULL; |
438 | pathp->objnode->slots_in_use--; | 438 | pathp->objnode->slots_in_use--; |
439 | if (pathp->objnode->slots_in_use) { | 439 | if (pathp->objnode->slots_in_use) { |
440 | if (pathp->objnode == obj->objnode_tree_root) { | 440 | if (pathp->objnode == obj->objnode_tree_root) { |
441 | while (obj->objnode_tree_height > 0 && | 441 | while (obj->objnode_tree_height > 0 && |
442 | obj->objnode_tree_root->slots_in_use == 1 && | 442 | obj->objnode_tree_root->slots_in_use == 1 && |
443 | obj->objnode_tree_root->slots[0]) { | 443 | obj->objnode_tree_root->slots[0]) { |
444 | struct tmem_objnode *to_free = | 444 | struct tmem_objnode *to_free = |
445 | obj->objnode_tree_root; | 445 | obj->objnode_tree_root; |
446 | 446 | ||
447 | obj->objnode_tree_root = | 447 | obj->objnode_tree_root = |
448 | to_free->slots[0]; | 448 | to_free->slots[0]; |
449 | obj->objnode_tree_height--; | 449 | obj->objnode_tree_height--; |
450 | to_free->slots[0] = NULL; | 450 | to_free->slots[0] = NULL; |
451 | to_free->slots_in_use = 0; | 451 | to_free->slots_in_use = 0; |
452 | tmem_objnode_free(to_free); | 452 | tmem_objnode_free(to_free); |
453 | } | 453 | } |
454 | } | 454 | } |
455 | goto out; | 455 | goto out; |
456 | } | 456 | } |
457 | tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */ | 457 | tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */ |
458 | pathp--; | 458 | pathp--; |
459 | } | 459 | } |
460 | obj->objnode_tree_height = 0; | 460 | obj->objnode_tree_height = 0; |
461 | obj->objnode_tree_root = NULL; | 461 | obj->objnode_tree_root = NULL; |
462 | 462 | ||
463 | out: | 463 | out: |
464 | if (slot != NULL) | 464 | if (slot != NULL) |
465 | obj->pampd_count--; | 465 | obj->pampd_count--; |
466 | BUG_ON(obj->pampd_count < 0); | 466 | BUG_ON(obj->pampd_count < 0); |
467 | return slot; | 467 | return slot; |
468 | } | 468 | } |
469 | 469 | ||
470 | /* recursively walk the objnode_tree destroying pampds and objnodes */ | 470 | /* recursively walk the objnode_tree destroying pampds and objnodes */ |
471 | static void tmem_objnode_node_destroy(struct tmem_obj *obj, | 471 | static void tmem_objnode_node_destroy(struct tmem_obj *obj, |
472 | struct tmem_objnode *objnode, | 472 | struct tmem_objnode *objnode, |
473 | unsigned int ht) | 473 | unsigned int ht) |
474 | { | 474 | { |
475 | int i; | 475 | int i; |
476 | 476 | ||
477 | if (ht == 0) | 477 | if (ht == 0) |
478 | return; | 478 | return; |
479 | for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) { | 479 | for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) { |
480 | if (objnode->slots[i]) { | 480 | if (objnode->slots[i]) { |
481 | if (ht == 1) { | 481 | if (ht == 1) { |
482 | obj->pampd_count--; | 482 | obj->pampd_count--; |
483 | (*tmem_pamops.free)(objnode->slots[i], | 483 | (*tmem_pamops.free)(objnode->slots[i], |
484 | obj->pool, NULL, 0); | 484 | obj->pool, NULL, 0); |
485 | objnode->slots[i] = NULL; | 485 | objnode->slots[i] = NULL; |
486 | continue; | 486 | continue; |
487 | } | 487 | } |
488 | tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1); | 488 | tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1); |
489 | tmem_objnode_free(objnode->slots[i]); | 489 | tmem_objnode_free(objnode->slots[i]); |
490 | objnode->slots[i] = NULL; | 490 | objnode->slots[i] = NULL; |
491 | } | 491 | } |
492 | } | 492 | } |
493 | } | 493 | } |
494 | 494 | ||
495 | static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) | 495 | static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) |
496 | { | 496 | { |
497 | if (obj->objnode_tree_root == NULL) | 497 | if (obj->objnode_tree_root == NULL) |
498 | return; | 498 | return; |
499 | if (obj->objnode_tree_height == 0) { | 499 | if (obj->objnode_tree_height == 0) { |
500 | obj->pampd_count--; | 500 | obj->pampd_count--; |
501 | (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0); | 501 | (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0); |
502 | } else { | 502 | } else { |
503 | tmem_objnode_node_destroy(obj, obj->objnode_tree_root, | 503 | tmem_objnode_node_destroy(obj, obj->objnode_tree_root, |
504 | obj->objnode_tree_height); | 504 | obj->objnode_tree_height); |
505 | tmem_objnode_free(obj->objnode_tree_root); | 505 | tmem_objnode_free(obj->objnode_tree_root); |
506 | obj->objnode_tree_height = 0; | 506 | obj->objnode_tree_height = 0; |
507 | } | 507 | } |
508 | obj->objnode_tree_root = NULL; | 508 | obj->objnode_tree_root = NULL; |
509 | (*tmem_pamops.free_obj)(obj->pool, obj); | 509 | (*tmem_pamops.free_obj)(obj->pool, obj); |
510 | } | 510 | } |
511 | 511 | ||
512 | /* | 512 | /* |
513 | * Tmem is operated on by a set of well-defined actions: | 513 | * Tmem is operated on by a set of well-defined actions: |
514 | * "put", "get", "flush", "flush_object", "new pool" and "destroy pool". | 514 | * "put", "get", "flush", "flush_object", "new pool" and "destroy pool". |
515 | * (The tmem ABI allows for subpages and exchanges but these operations | 515 | * (The tmem ABI allows for subpages and exchanges but these operations |
516 | * are not included in this implementation.) | 516 | * are not included in this implementation.) |
517 | * | 517 | * |
518 | * These "tmem core" operations are implemented in the following functions. | 518 | * These "tmem core" operations are implemented in the following functions. |
519 | */ | 519 | */ |
520 | 520 | ||
521 | /* | 521 | /* |
522 | * "Put" a page, e.g. copy a page from the kernel into newly allocated | 522 | * "Put" a page, e.g. copy a page from the kernel into newly allocated |
523 | * PAM space (if such space is available). Tmem_put is complicated by | 523 | * PAM space (if such space is available). Tmem_put is complicated by |
524 | * a corner case: What if a page with matching handle already exists in | 524 | * a corner case: What if a page with matching handle already exists in |
525 | * tmem? To guarantee coherency, one of two actions is necessary: Either | 525 | * tmem? To guarantee coherency, one of two actions is necessary: Either |
526 | * the data for the page must be overwritten, or the page must be | 526 | * the data for the page must be overwritten, or the page must be |
527 | * "flushed" so that the data is not accessible to a subsequent "get". | 527 | * "flushed" so that the data is not accessible to a subsequent "get". |
528 | * Since these "duplicate puts" are relatively rare, this implementation | 528 | * Since these "duplicate puts" are relatively rare, this implementation |
529 | * always flushes for simplicity. | 529 | * always flushes for simplicity. |
530 | */ | 530 | */ |
531 | int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, | 531 | int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, |
532 | char *data, size_t size, bool raw, bool ephemeral) | 532 | char *data, size_t size, bool raw, bool ephemeral) |
533 | { | 533 | { |
534 | struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; | 534 | struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; |
535 | void *pampd = NULL, *pampd_del = NULL; | 535 | void *pampd = NULL, *pampd_del = NULL; |
536 | int ret = -ENOMEM; | 536 | int ret = -ENOMEM; |
537 | struct tmem_hashbucket *hb; | 537 | struct tmem_hashbucket *hb; |
538 | 538 | ||
539 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; | 539 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; |
540 | spin_lock(&hb->lock); | 540 | spin_lock(&hb->lock); |
541 | obj = objfound = tmem_obj_find(hb, oidp); | 541 | obj = objfound = tmem_obj_find(hb, oidp); |
542 | if (obj != NULL) { | 542 | if (obj != NULL) { |
543 | pampd = tmem_pampd_lookup_in_obj(objfound, index); | 543 | pampd = tmem_pampd_lookup_in_obj(objfound, index); |
544 | if (pampd != NULL) { | 544 | if (pampd != NULL) { |
545 | /* if found, is a dup put, flush the old one */ | 545 | /* if found, is a dup put, flush the old one */ |
546 | pampd_del = tmem_pampd_delete_from_obj(obj, index); | 546 | pampd_del = tmem_pampd_delete_from_obj(obj, index); |
547 | BUG_ON(pampd_del != pampd); | 547 | BUG_ON(pampd_del != pampd); |
548 | (*tmem_pamops.free)(pampd, pool, oidp, index); | 548 | (*tmem_pamops.free)(pampd, pool, oidp, index); |
549 | if (obj->pampd_count == 0) { | 549 | if (obj->pampd_count == 0) { |
550 | objnew = obj; | 550 | objnew = obj; |
551 | objfound = NULL; | 551 | objfound = NULL; |
552 | } | 552 | } |
553 | pampd = NULL; | 553 | pampd = NULL; |
554 | } | 554 | } |
555 | } else { | 555 | } else { |
556 | obj = objnew = (*tmem_hostops.obj_alloc)(pool); | 556 | obj = objnew = (*tmem_hostops.obj_alloc)(pool); |
557 | if (unlikely(obj == NULL)) { | 557 | if (unlikely(obj == NULL)) { |
558 | ret = -ENOMEM; | 558 | ret = -ENOMEM; |
559 | goto out; | 559 | goto out; |
560 | } | 560 | } |
561 | tmem_obj_init(obj, hb, pool, oidp); | 561 | tmem_obj_init(obj, hb, pool, oidp); |
562 | } | 562 | } |
563 | BUG_ON(obj == NULL); | 563 | BUG_ON(obj == NULL); |
564 | BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); | 564 | BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); |
565 | pampd = (*tmem_pamops.create)(data, size, raw, ephemeral, | 565 | pampd = (*tmem_pamops.create)(data, size, raw, ephemeral, |
566 | obj->pool, &obj->oid, index); | 566 | obj->pool, &obj->oid, index); |
567 | if (unlikely(pampd == NULL)) | 567 | if (unlikely(pampd == NULL)) |
568 | goto free; | 568 | goto free; |
569 | ret = tmem_pampd_add_to_obj(obj, index, pampd); | 569 | ret = tmem_pampd_add_to_obj(obj, index, pampd); |
570 | if (unlikely(ret == -ENOMEM)) | 570 | if (unlikely(ret == -ENOMEM)) |
571 | /* may have partially built objnode tree ("stump") */ | 571 | /* may have partially built objnode tree ("stump") */ |
572 | goto delete_and_free; | 572 | goto delete_and_free; |
573 | goto out; | 573 | goto out; |
574 | 574 | ||
575 | delete_and_free: | 575 | delete_and_free: |
576 | (void)tmem_pampd_delete_from_obj(obj, index); | 576 | (void)tmem_pampd_delete_from_obj(obj, index); |
577 | free: | 577 | free: |
578 | if (pampd) | 578 | if (pampd) |
579 | (*tmem_pamops.free)(pampd, pool, NULL, 0); | 579 | (*tmem_pamops.free)(pampd, pool, NULL, 0); |
580 | if (objnew) { | 580 | if (objnew) { |
581 | tmem_obj_free(objnew, hb); | 581 | tmem_obj_free(objnew, hb); |
582 | (*tmem_hostops.obj_free)(objnew, pool); | 582 | (*tmem_hostops.obj_free)(objnew, pool); |
583 | } | 583 | } |
584 | out: | 584 | out: |
585 | spin_unlock(&hb->lock); | 585 | spin_unlock(&hb->lock); |
586 | return ret; | 586 | return ret; |
587 | } | 587 | } |
588 | 588 | ||
589 | /* | 589 | /* |
590 | * "Get" a page, e.g. if one can be found, copy the tmem page with the | 590 | * "Get" a page, e.g. if one can be found, copy the tmem page with the |
591 | * matching handle from PAM space to the kernel. By tmem definition, | 591 | * matching handle from PAM space to the kernel. By tmem definition, |
592 | * when a "get" is successful on an ephemeral page, the page is "flushed", | 592 | * when a "get" is successful on an ephemeral page, the page is "flushed", |
593 | * and when a "get" is successful on a persistent page, the page is retained | 593 | * and when a "get" is successful on a persistent page, the page is retained |
594 | * in tmem. Note that to preserve | 594 | * in tmem. Note that to preserve |
595 | * coherency, "get" can never be skipped if tmem contains the data. | 595 | * coherency, "get" can never be skipped if tmem contains the data. |
596 | * That is, if a get is done with a certain handle and fails, any | 596 | * That is, if a get is done with a certain handle and fails, any |
597 | * subsequent "get" must also fail (unless of course there is a | 597 | * subsequent "get" must also fail (unless of course there is a |
598 | * "put" done with the same handle). | 598 | * "put" done with the same handle). |
599 | 599 | ||
600 | */ | 600 | */ |
601 | int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, | 601 | int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, |
602 | char *data, size_t *size, bool raw, int get_and_free) | 602 | char *data, size_t *size, bool raw, int get_and_free) |
603 | { | 603 | { |
604 | struct tmem_obj *obj; | 604 | struct tmem_obj *obj; |
605 | void *pampd; | 605 | void *pampd; |
606 | bool ephemeral = is_ephemeral(pool); | 606 | bool ephemeral = is_ephemeral(pool); |
607 | uint32_t ret = -1; | 607 | int ret = -1; |
608 | struct tmem_hashbucket *hb; | 608 | struct tmem_hashbucket *hb; |
609 | bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); | 609 | bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); |
610 | bool lock_held = false; | 610 | bool lock_held = false; |
611 | 611 | ||
612 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; | 612 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; |
613 | spin_lock(&hb->lock); | 613 | spin_lock(&hb->lock); |
614 | lock_held = true; | 614 | lock_held = true; |
615 | obj = tmem_obj_find(hb, oidp); | 615 | obj = tmem_obj_find(hb, oidp); |
616 | if (obj == NULL) | 616 | if (obj == NULL) |
617 | goto out; | 617 | goto out; |
618 | if (free) | 618 | if (free) |
619 | pampd = tmem_pampd_delete_from_obj(obj, index); | 619 | pampd = tmem_pampd_delete_from_obj(obj, index); |
620 | else | 620 | else |
621 | pampd = tmem_pampd_lookup_in_obj(obj, index); | 621 | pampd = tmem_pampd_lookup_in_obj(obj, index); |
622 | if (pampd == NULL) | 622 | if (pampd == NULL) |
623 | goto out; | 623 | goto out; |
624 | if (free) { | 624 | if (free) { |
625 | if (obj->pampd_count == 0) { | 625 | if (obj->pampd_count == 0) { |
626 | tmem_obj_free(obj, hb); | 626 | tmem_obj_free(obj, hb); |
627 | (*tmem_hostops.obj_free)(obj, pool); | 627 | (*tmem_hostops.obj_free)(obj, pool); |
628 | obj = NULL; | 628 | obj = NULL; |
629 | } | 629 | } |
630 | } | 630 | } |
631 | if (tmem_pamops.is_remote(pampd)) { | 631 | if (tmem_pamops.is_remote(pampd)) { |
632 | lock_held = false; | 632 | lock_held = false; |
633 | spin_unlock(&hb->lock); | 633 | spin_unlock(&hb->lock); |
634 | } | 634 | } |
635 | if (free) | 635 | if (free) |
636 | ret = (*tmem_pamops.get_data_and_free)( | 636 | ret = (*tmem_pamops.get_data_and_free)( |
637 | data, size, raw, pampd, pool, oidp, index); | 637 | data, size, raw, pampd, pool, oidp, index); |
638 | else | 638 | else |
639 | ret = (*tmem_pamops.get_data)( | 639 | ret = (*tmem_pamops.get_data)( |
640 | data, size, raw, pampd, pool, oidp, index); | 640 | data, size, raw, pampd, pool, oidp, index); |
641 | if (ret < 0) | 641 | if (ret < 0) |
642 | goto out; | 642 | goto out; |
643 | ret = 0; | 643 | ret = 0; |
644 | out: | 644 | out: |
645 | if (lock_held) | 645 | if (lock_held) |
646 | spin_unlock(&hb->lock); | 646 | spin_unlock(&hb->lock); |
647 | return ret; | 647 | return ret; |
648 | } | 648 | } |
649 | 649 | ||
650 | /* | 650 | /* |
651 | * If a page in tmem matches the handle, "flush" this page from tmem such | 651 | * If a page in tmem matches the handle, "flush" this page from tmem such |
652 | * that any subsequent "get" does not succeed (unless, of course, there | 652 | * that any subsequent "get" does not succeed (unless, of course, there |
653 | * was another "put" with the same handle). | 653 | * was another "put" with the same handle). |
654 | */ | 654 | */ |
655 | int tmem_flush_page(struct tmem_pool *pool, | 655 | int tmem_flush_page(struct tmem_pool *pool, |
656 | struct tmem_oid *oidp, uint32_t index) | 656 | struct tmem_oid *oidp, uint32_t index) |
657 | { | 657 | { |
658 | struct tmem_obj *obj; | 658 | struct tmem_obj *obj; |
659 | void *pampd; | 659 | void *pampd; |
660 | int ret = -1; | 660 | int ret = -1; |
661 | struct tmem_hashbucket *hb; | 661 | struct tmem_hashbucket *hb; |
662 | 662 | ||
663 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; | 663 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; |
664 | spin_lock(&hb->lock); | 664 | spin_lock(&hb->lock); |
665 | obj = tmem_obj_find(hb, oidp); | 665 | obj = tmem_obj_find(hb, oidp); |
666 | if (obj == NULL) | 666 | if (obj == NULL) |
667 | goto out; | 667 | goto out; |
668 | pampd = tmem_pampd_delete_from_obj(obj, index); | 668 | pampd = tmem_pampd_delete_from_obj(obj, index); |
669 | if (pampd == NULL) | 669 | if (pampd == NULL) |
670 | goto out; | 670 | goto out; |
671 | (*tmem_pamops.free)(pampd, pool, oidp, index); | 671 | (*tmem_pamops.free)(pampd, pool, oidp, index); |
672 | if (obj->pampd_count == 0) { | 672 | if (obj->pampd_count == 0) { |
673 | tmem_obj_free(obj, hb); | 673 | tmem_obj_free(obj, hb); |
674 | (*tmem_hostops.obj_free)(obj, pool); | 674 | (*tmem_hostops.obj_free)(obj, pool); |
675 | } | 675 | } |
676 | ret = 0; | 676 | ret = 0; |
677 | 677 | ||
678 | out: | 678 | out: |
679 | spin_unlock(&hb->lock); | 679 | spin_unlock(&hb->lock); |
680 | return ret; | 680 | return ret; |
681 | } | 681 | } |
682 | 682 | ||
683 | /* | 683 | /* |
684 | * If a page in tmem matches the handle, replace the page so that any | 684 | * If a page in tmem matches the handle, replace the page so that any |
685 | * subsequent "get" gets the new page. Returns 0 if | 685 | * subsequent "get" gets the new page. Returns 0 if |
686 | * there was a page to replace, else returns -1. | 686 | * there was a page to replace, else returns -1. |
687 | */ | 687 | */ |
688 | int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp, | 688 | int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp, |
689 | uint32_t index, void *new_pampd) | 689 | uint32_t index, void *new_pampd) |
690 | { | 690 | { |
691 | struct tmem_obj *obj; | 691 | struct tmem_obj *obj; |
692 | int ret = -1; | 692 | int ret = -1; |
693 | struct tmem_hashbucket *hb; | 693 | struct tmem_hashbucket *hb; |
694 | 694 | ||
695 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; | 695 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; |
696 | spin_lock(&hb->lock); | 696 | spin_lock(&hb->lock); |
697 | obj = tmem_obj_find(hb, oidp); | 697 | obj = tmem_obj_find(hb, oidp); |
698 | if (obj == NULL) | 698 | if (obj == NULL) |
699 | goto out; | 699 | goto out; |
700 | new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd); | 700 | new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd); |
701 | ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj); | 701 | ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj); |
702 | out: | 702 | out: |
703 | spin_unlock(&hb->lock); | 703 | spin_unlock(&hb->lock); |
704 | return ret; | 704 | return ret; |
705 | } | 705 | } |
706 | 706 | ||
707 | /* | 707 | /* |
708 | * "Flush" all pages in tmem matching this oid. | 708 | * "Flush" all pages in tmem matching this oid. |
709 | */ | 709 | */ |
710 | int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) | 710 | int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) |
711 | { | 711 | { |
712 | struct tmem_obj *obj; | 712 | struct tmem_obj *obj; |
713 | struct tmem_hashbucket *hb; | 713 | struct tmem_hashbucket *hb; |
714 | int ret = -1; | 714 | int ret = -1; |
715 | 715 | ||
716 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; | 716 | hb = &pool->hashbucket[tmem_oid_hash(oidp)]; |
717 | spin_lock(&hb->lock); | 717 | spin_lock(&hb->lock); |
718 | obj = tmem_obj_find(hb, oidp); | 718 | obj = tmem_obj_find(hb, oidp); |
719 | if (obj == NULL) | 719 | if (obj == NULL) |
720 | goto out; | 720 | goto out; |
721 | tmem_pampd_destroy_all_in_obj(obj); | 721 | tmem_pampd_destroy_all_in_obj(obj); |
722 | tmem_obj_free(obj, hb); | 722 | tmem_obj_free(obj, hb); |
723 | (*tmem_hostops.obj_free)(obj, pool); | 723 | (*tmem_hostops.obj_free)(obj, pool); |
724 | ret = 0; | 724 | ret = 0; |
725 | 725 | ||
726 | out: | 726 | out: |
727 | spin_unlock(&hb->lock); | 727 | spin_unlock(&hb->lock); |
728 | return ret; | 728 | return ret; |
729 | } | 729 | } |
730 | 730 | ||
731 | /* | 731 | /* |
732 | * "Flush" all pages (and tmem_objs) from this tmem_pool and disable | 732 | * "Flush" all pages (and tmem_objs) from this tmem_pool and disable |
733 | * all subsequent access to this tmem_pool. | 733 | * all subsequent access to this tmem_pool. |
734 | */ | 734 | */ |
735 | int tmem_destroy_pool(struct tmem_pool *pool) | 735 | int tmem_destroy_pool(struct tmem_pool *pool) |
736 | { | 736 | { |
737 | int ret = -1; | 737 | int ret = -1; |
738 | 738 | ||
739 | if (pool == NULL) | 739 | if (pool == NULL) |
740 | goto out; | 740 | goto out; |
741 | tmem_pool_flush(pool, 1); | 741 | tmem_pool_flush(pool, 1); |
742 | ret = 0; | 742 | ret = 0; |
743 | out: | 743 | out: |
744 | return ret; | 744 | return ret; |
745 | } | 745 | } |
746 | 746 | ||
747 | static LIST_HEAD(tmem_global_pool_list); | 747 | static LIST_HEAD(tmem_global_pool_list); |
748 | 748 | ||
749 | /* | 749 | /* |
750 | * Create a new tmem_pool with the provided flag and return | 750 | * Create a new tmem_pool with the provided flag and return |
751 | * a pool id provided by the tmem host implementation. | 751 | * a pool id provided by the tmem host implementation. |
752 | */ | 752 | */ |
753 | void tmem_new_pool(struct tmem_pool *pool, uint32_t flags) | 753 | void tmem_new_pool(struct tmem_pool *pool, uint32_t flags) |
754 | { | 754 | { |
755 | int persistent = flags & TMEM_POOL_PERSIST; | 755 | int persistent = flags & TMEM_POOL_PERSIST; |
756 | int shared = flags & TMEM_POOL_SHARED; | 756 | int shared = flags & TMEM_POOL_SHARED; |
757 | struct tmem_hashbucket *hb = &pool->hashbucket[0]; | 757 | struct tmem_hashbucket *hb = &pool->hashbucket[0]; |
758 | int i; | 758 | int i; |
759 | 759 | ||
760 | for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { | 760 | for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { |
761 | hb->obj_rb_root = RB_ROOT; | 761 | hb->obj_rb_root = RB_ROOT; |
762 | spin_lock_init(&hb->lock); | 762 | spin_lock_init(&hb->lock); |
763 | } | 763 | } |
764 | INIT_LIST_HEAD(&pool->pool_list); | 764 | INIT_LIST_HEAD(&pool->pool_list); |
765 | atomic_set(&pool->obj_count, 0); | 765 | atomic_set(&pool->obj_count, 0); |
766 | SET_SENTINEL(pool, POOL); | 766 | SET_SENTINEL(pool, POOL); |
767 | list_add_tail(&pool->pool_list, &tmem_global_pool_list); | 767 | list_add_tail(&pool->pool_list, &tmem_global_pool_list); |
768 | pool->persistent = persistent; | 768 | pool->persistent = persistent; |
769 | pool->shared = shared; | 769 | pool->shared = shared; |
770 | } | 770 | } |
771 | 771 |