Commit 1dcab0875b113a148b6601d87b4e0e3444440339

Authored by Dan Carpenter
Committed by Greg Kroah-Hartman
1 parent c5f5c4db39

Staging: zcache: signedness bug in tmem_get()

"ret" needs to be signed for the error handling to work properly.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

Showing 1 changed file with 1 additions and 1 deletions Inline Diff

drivers/staging/zcache/tmem.c
1 /* 1 /*
2 * In-kernel transcendent memory (generic implementation) 2 * In-kernel transcendent memory (generic implementation)
3 * 3 *
4 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. 4 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
5 * 5 *
6 * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented 6 * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
7 * "handles" (triples containing a pool id, and object id, and an index), to 7 * "handles" (triples containing a pool id, and object id, and an index), to
8 * pages in a page-accessible memory (PAM). Tmem references the PAM pages via 8 * pages in a page-accessible memory (PAM). Tmem references the PAM pages via
9 * an abstract "pampd" (PAM page-descriptor), which can be operated on by a 9 * an abstract "pampd" (PAM page-descriptor), which can be operated on by a
10 * set of functions (pamops). Each pampd contains some representation of 10 * set of functions (pamops). Each pampd contains some representation of
11 * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of 11 * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
12 * pages and must be able to insert, find, and delete these pages at a 12 * pages and must be able to insert, find, and delete these pages at a
13 * potential frequency of thousands per second concurrently across many CPUs, 13 * potential frequency of thousands per second concurrently across many CPUs,
14 * (and, if used with KVM, across many vcpus across many guests). 14 * (and, if used with KVM, across many vcpus across many guests).
15 * Tmem is tracked with a hierarchy of data structures, organized by 15 * Tmem is tracked with a hierarchy of data structures, organized by
16 * the elements in a handle-tuple: pool_id, object_id, and page index. 16 * the elements in a handle-tuple: pool_id, object_id, and page index.
17 * One or more "clients" (e.g. guests) each provide one or more tmem_pools. 17 * One or more "clients" (e.g. guests) each provide one or more tmem_pools.
18 * Each pool, contains a hash table of rb_trees of tmem_objs. Each 18 * Each pool, contains a hash table of rb_trees of tmem_objs. Each
19 * tmem_obj contains a radix-tree-like tree of pointers, with intermediate 19 * tmem_obj contains a radix-tree-like tree of pointers, with intermediate
20 * nodes called tmem_objnodes. Each leaf pointer in this tree points to 20 * nodes called tmem_objnodes. Each leaf pointer in this tree points to
21 * a pampd, which is accessible only through a small set of callbacks 21 * a pampd, which is accessible only through a small set of callbacks
22 * registered by the PAM implementation (see tmem_register_pamops). Tmem 22 * registered by the PAM implementation (see tmem_register_pamops). Tmem
23 * does all memory allocation via a set of callbacks registered by the tmem 23 * does all memory allocation via a set of callbacks registered by the tmem
24 * host implementation (e.g. see tmem_register_hostops). 24 * host implementation (e.g. see tmem_register_hostops).
25 */ 25 */
26 26
27 #include <linux/list.h> 27 #include <linux/list.h>
28 #include <linux/spinlock.h> 28 #include <linux/spinlock.h>
29 #include <linux/atomic.h> 29 #include <linux/atomic.h>
30 30
31 #include "tmem.h" 31 #include "tmem.h"
32 32
33 /* data structure sentinels used for debugging... see tmem.h */ 33 /* data structure sentinels used for debugging... see tmem.h */
34 #define POOL_SENTINEL 0x87658765 34 #define POOL_SENTINEL 0x87658765
35 #define OBJ_SENTINEL 0x12345678 35 #define OBJ_SENTINEL 0x12345678
36 #define OBJNODE_SENTINEL 0xfedcba09 36 #define OBJNODE_SENTINEL 0xfedcba09
37 37
38 /* 38 /*
39 * A tmem host implementation must use this function to register callbacks 39 * A tmem host implementation must use this function to register callbacks
40 * for memory allocation. 40 * for memory allocation.
41 */ 41 */
42 static struct tmem_hostops tmem_hostops; 42 static struct tmem_hostops tmem_hostops;
43 43
44 static void tmem_objnode_tree_init(void); 44 static void tmem_objnode_tree_init(void);
45 45
46 void tmem_register_hostops(struct tmem_hostops *m) 46 void tmem_register_hostops(struct tmem_hostops *m)
47 { 47 {
48 tmem_objnode_tree_init(); 48 tmem_objnode_tree_init();
49 tmem_hostops = *m; 49 tmem_hostops = *m;
50 } 50 }
51 51
52 /* 52 /*
53 * A tmem host implementation must use this function to register 53 * A tmem host implementation must use this function to register
54 * callbacks for a page-accessible memory (PAM) implementation 54 * callbacks for a page-accessible memory (PAM) implementation
55 */ 55 */
56 static struct tmem_pamops tmem_pamops; 56 static struct tmem_pamops tmem_pamops;
57 57
58 void tmem_register_pamops(struct tmem_pamops *m) 58 void tmem_register_pamops(struct tmem_pamops *m)
59 { 59 {
60 tmem_pamops = *m; 60 tmem_pamops = *m;
61 } 61 }
62 62
63 /* 63 /*
64 * Oid's are potentially very sparse and tmem_objs may have an indeterminately 64 * Oid's are potentially very sparse and tmem_objs may have an indeterminately
65 * short life, being added and deleted at a relatively high frequency. 65 * short life, being added and deleted at a relatively high frequency.
66 * So an rb_tree is an ideal data structure to manage tmem_objs. But because 66 * So an rb_tree is an ideal data structure to manage tmem_objs. But because
67 * of the potentially huge number of tmem_objs, each pool manages a hashtable 67 * of the potentially huge number of tmem_objs, each pool manages a hashtable
68 * of rb_trees to reduce search, insert, delete, and rebalancing time. 68 * of rb_trees to reduce search, insert, delete, and rebalancing time.
69 * Each hashbucket also has a lock to manage concurrent access. 69 * Each hashbucket also has a lock to manage concurrent access.
70 * 70 *
71 * The following routines manage tmem_objs. When any tmem_obj is accessed, 71 * The following routines manage tmem_objs. When any tmem_obj is accessed,
72 * the hashbucket lock must be held. 72 * the hashbucket lock must be held.
73 */ 73 */
74 74
75 /* searches for object==oid in pool, returns locked object if found */ 75 /* searches for object==oid in pool, returns locked object if found */
76 static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb, 76 static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
77 struct tmem_oid *oidp) 77 struct tmem_oid *oidp)
78 { 78 {
79 struct rb_node *rbnode; 79 struct rb_node *rbnode;
80 struct tmem_obj *obj; 80 struct tmem_obj *obj;
81 81
82 rbnode = hb->obj_rb_root.rb_node; 82 rbnode = hb->obj_rb_root.rb_node;
83 while (rbnode) { 83 while (rbnode) {
84 BUG_ON(RB_EMPTY_NODE(rbnode)); 84 BUG_ON(RB_EMPTY_NODE(rbnode));
85 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); 85 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
86 switch (tmem_oid_compare(oidp, &obj->oid)) { 86 switch (tmem_oid_compare(oidp, &obj->oid)) {
87 case 0: /* equal */ 87 case 0: /* equal */
88 goto out; 88 goto out;
89 case -1: 89 case -1:
90 rbnode = rbnode->rb_left; 90 rbnode = rbnode->rb_left;
91 break; 91 break;
92 case 1: 92 case 1:
93 rbnode = rbnode->rb_right; 93 rbnode = rbnode->rb_right;
94 break; 94 break;
95 } 95 }
96 } 96 }
97 obj = NULL; 97 obj = NULL;
98 out: 98 out:
99 return obj; 99 return obj;
100 } 100 }
101 101
102 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *); 102 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
103 103
104 /* free an object that has no more pampds in it */ 104 /* free an object that has no more pampds in it */
105 static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb) 105 static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
106 { 106 {
107 struct tmem_pool *pool; 107 struct tmem_pool *pool;
108 108
109 BUG_ON(obj == NULL); 109 BUG_ON(obj == NULL);
110 ASSERT_SENTINEL(obj, OBJ); 110 ASSERT_SENTINEL(obj, OBJ);
111 BUG_ON(obj->pampd_count > 0); 111 BUG_ON(obj->pampd_count > 0);
112 pool = obj->pool; 112 pool = obj->pool;
113 BUG_ON(pool == NULL); 113 BUG_ON(pool == NULL);
114 if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */ 114 if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
115 tmem_pampd_destroy_all_in_obj(obj); 115 tmem_pampd_destroy_all_in_obj(obj);
116 BUG_ON(obj->objnode_tree_root != NULL); 116 BUG_ON(obj->objnode_tree_root != NULL);
117 BUG_ON((long)obj->objnode_count != 0); 117 BUG_ON((long)obj->objnode_count != 0);
118 atomic_dec(&pool->obj_count); 118 atomic_dec(&pool->obj_count);
119 BUG_ON(atomic_read(&pool->obj_count) < 0); 119 BUG_ON(atomic_read(&pool->obj_count) < 0);
120 INVERT_SENTINEL(obj, OBJ); 120 INVERT_SENTINEL(obj, OBJ);
121 obj->pool = NULL; 121 obj->pool = NULL;
122 tmem_oid_set_invalid(&obj->oid); 122 tmem_oid_set_invalid(&obj->oid);
123 rb_erase(&obj->rb_tree_node, &hb->obj_rb_root); 123 rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
124 } 124 }
125 125
126 /* 126 /*
127 * initialize, and insert an tmem_object_root (called only if find failed) 127 * initialize, and insert an tmem_object_root (called only if find failed)
128 */ 128 */
129 static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, 129 static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
130 struct tmem_pool *pool, 130 struct tmem_pool *pool,
131 struct tmem_oid *oidp) 131 struct tmem_oid *oidp)
132 { 132 {
133 struct rb_root *root = &hb->obj_rb_root; 133 struct rb_root *root = &hb->obj_rb_root;
134 struct rb_node **new = &(root->rb_node), *parent = NULL; 134 struct rb_node **new = &(root->rb_node), *parent = NULL;
135 struct tmem_obj *this; 135 struct tmem_obj *this;
136 136
137 BUG_ON(pool == NULL); 137 BUG_ON(pool == NULL);
138 atomic_inc(&pool->obj_count); 138 atomic_inc(&pool->obj_count);
139 obj->objnode_tree_height = 0; 139 obj->objnode_tree_height = 0;
140 obj->objnode_tree_root = NULL; 140 obj->objnode_tree_root = NULL;
141 obj->pool = pool; 141 obj->pool = pool;
142 obj->oid = *oidp; 142 obj->oid = *oidp;
143 obj->objnode_count = 0; 143 obj->objnode_count = 0;
144 obj->pampd_count = 0; 144 obj->pampd_count = 0;
145 (*tmem_pamops.new_obj)(obj); 145 (*tmem_pamops.new_obj)(obj);
146 SET_SENTINEL(obj, OBJ); 146 SET_SENTINEL(obj, OBJ);
147 while (*new) { 147 while (*new) {
148 BUG_ON(RB_EMPTY_NODE(*new)); 148 BUG_ON(RB_EMPTY_NODE(*new));
149 this = rb_entry(*new, struct tmem_obj, rb_tree_node); 149 this = rb_entry(*new, struct tmem_obj, rb_tree_node);
150 parent = *new; 150 parent = *new;
151 switch (tmem_oid_compare(oidp, &this->oid)) { 151 switch (tmem_oid_compare(oidp, &this->oid)) {
152 case 0: 152 case 0:
153 BUG(); /* already present; should never happen! */ 153 BUG(); /* already present; should never happen! */
154 break; 154 break;
155 case -1: 155 case -1:
156 new = &(*new)->rb_left; 156 new = &(*new)->rb_left;
157 break; 157 break;
158 case 1: 158 case 1:
159 new = &(*new)->rb_right; 159 new = &(*new)->rb_right;
160 break; 160 break;
161 } 161 }
162 } 162 }
163 rb_link_node(&obj->rb_tree_node, parent, new); 163 rb_link_node(&obj->rb_tree_node, parent, new);
164 rb_insert_color(&obj->rb_tree_node, root); 164 rb_insert_color(&obj->rb_tree_node, root);
165 } 165 }
166 166
167 /* 167 /*
168 * Tmem is managed as a set of tmem_pools with certain attributes, such as 168 * Tmem is managed as a set of tmem_pools with certain attributes, such as
169 * "ephemeral" vs "persistent". These attributes apply to all tmem_objs 169 * "ephemeral" vs "persistent". These attributes apply to all tmem_objs
170 * and all pampds that belong to a tmem_pool. A tmem_pool is created 170 * and all pampds that belong to a tmem_pool. A tmem_pool is created
171 * or deleted relatively rarely (for example, when a filesystem is 171 * or deleted relatively rarely (for example, when a filesystem is
172 * mounted or unmounted. 172 * mounted or unmounted.
173 */ 173 */
174 174
175 /* flush all data from a pool and, optionally, free it */ 175 /* flush all data from a pool and, optionally, free it */
176 static void tmem_pool_flush(struct tmem_pool *pool, bool destroy) 176 static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
177 { 177 {
178 struct rb_node *rbnode; 178 struct rb_node *rbnode;
179 struct tmem_obj *obj; 179 struct tmem_obj *obj;
180 struct tmem_hashbucket *hb = &pool->hashbucket[0]; 180 struct tmem_hashbucket *hb = &pool->hashbucket[0];
181 int i; 181 int i;
182 182
183 BUG_ON(pool == NULL); 183 BUG_ON(pool == NULL);
184 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { 184 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
185 spin_lock(&hb->lock); 185 spin_lock(&hb->lock);
186 rbnode = rb_first(&hb->obj_rb_root); 186 rbnode = rb_first(&hb->obj_rb_root);
187 while (rbnode != NULL) { 187 while (rbnode != NULL) {
188 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); 188 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
189 rbnode = rb_next(rbnode); 189 rbnode = rb_next(rbnode);
190 tmem_pampd_destroy_all_in_obj(obj); 190 tmem_pampd_destroy_all_in_obj(obj);
191 tmem_obj_free(obj, hb); 191 tmem_obj_free(obj, hb);
192 (*tmem_hostops.obj_free)(obj, pool); 192 (*tmem_hostops.obj_free)(obj, pool);
193 } 193 }
194 spin_unlock(&hb->lock); 194 spin_unlock(&hb->lock);
195 } 195 }
196 if (destroy) 196 if (destroy)
197 list_del(&pool->pool_list); 197 list_del(&pool->pool_list);
198 } 198 }
199 199
200 /* 200 /*
201 * A tmem_obj contains a radix-tree-like tree in which the intermediate 201 * A tmem_obj contains a radix-tree-like tree in which the intermediate
202 * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation 202 * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
203 * is very specialized and tuned for specific uses and is not particularly 203 * is very specialized and tuned for specific uses and is not particularly
204 * suited for use from this code, though some code from the core algorithms has 204 * suited for use from this code, though some code from the core algorithms has
205 * been reused, thus the copyright notices below). Each tmem_objnode contains 205 * been reused, thus the copyright notices below). Each tmem_objnode contains
206 * a set of pointers which point to either a set of intermediate tmem_objnodes 206 * a set of pointers which point to either a set of intermediate tmem_objnodes
207 * or a set of of pampds. 207 * or a set of of pampds.
208 * 208 *
209 * Portions Copyright (C) 2001 Momchil Velikov 209 * Portions Copyright (C) 2001 Momchil Velikov
210 * Portions Copyright (C) 2001 Christoph Hellwig 210 * Portions Copyright (C) 2001 Christoph Hellwig
211 * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> 211 * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
212 */ 212 */
213 213
214 struct tmem_objnode_tree_path { 214 struct tmem_objnode_tree_path {
215 struct tmem_objnode *objnode; 215 struct tmem_objnode *objnode;
216 int offset; 216 int offset;
217 }; 217 };
218 218
219 /* objnode height_to_maxindex translation */ 219 /* objnode height_to_maxindex translation */
220 static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1]; 220 static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
221 221
222 static void tmem_objnode_tree_init(void) 222 static void tmem_objnode_tree_init(void)
223 { 223 {
224 unsigned int ht, tmp; 224 unsigned int ht, tmp;
225 225
226 for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) { 226 for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
227 tmp = ht * OBJNODE_TREE_MAP_SHIFT; 227 tmp = ht * OBJNODE_TREE_MAP_SHIFT;
228 if (tmp >= OBJNODE_TREE_INDEX_BITS) 228 if (tmp >= OBJNODE_TREE_INDEX_BITS)
229 tmem_objnode_tree_h2max[ht] = ~0UL; 229 tmem_objnode_tree_h2max[ht] = ~0UL;
230 else 230 else
231 tmem_objnode_tree_h2max[ht] = 231 tmem_objnode_tree_h2max[ht] =
232 (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1; 232 (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
233 } 233 }
234 } 234 }
235 235
236 static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj) 236 static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
237 { 237 {
238 struct tmem_objnode *objnode; 238 struct tmem_objnode *objnode;
239 239
240 ASSERT_SENTINEL(obj, OBJ); 240 ASSERT_SENTINEL(obj, OBJ);
241 BUG_ON(obj->pool == NULL); 241 BUG_ON(obj->pool == NULL);
242 ASSERT_SENTINEL(obj->pool, POOL); 242 ASSERT_SENTINEL(obj->pool, POOL);
243 objnode = (*tmem_hostops.objnode_alloc)(obj->pool); 243 objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
244 if (unlikely(objnode == NULL)) 244 if (unlikely(objnode == NULL))
245 goto out; 245 goto out;
246 objnode->obj = obj; 246 objnode->obj = obj;
247 SET_SENTINEL(objnode, OBJNODE); 247 SET_SENTINEL(objnode, OBJNODE);
248 memset(&objnode->slots, 0, sizeof(objnode->slots)); 248 memset(&objnode->slots, 0, sizeof(objnode->slots));
249 objnode->slots_in_use = 0; 249 objnode->slots_in_use = 0;
250 obj->objnode_count++; 250 obj->objnode_count++;
251 out: 251 out:
252 return objnode; 252 return objnode;
253 } 253 }
254 254
255 static void tmem_objnode_free(struct tmem_objnode *objnode) 255 static void tmem_objnode_free(struct tmem_objnode *objnode)
256 { 256 {
257 struct tmem_pool *pool; 257 struct tmem_pool *pool;
258 int i; 258 int i;
259 259
260 BUG_ON(objnode == NULL); 260 BUG_ON(objnode == NULL);
261 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) 261 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
262 BUG_ON(objnode->slots[i] != NULL); 262 BUG_ON(objnode->slots[i] != NULL);
263 ASSERT_SENTINEL(objnode, OBJNODE); 263 ASSERT_SENTINEL(objnode, OBJNODE);
264 INVERT_SENTINEL(objnode, OBJNODE); 264 INVERT_SENTINEL(objnode, OBJNODE);
265 BUG_ON(objnode->obj == NULL); 265 BUG_ON(objnode->obj == NULL);
266 ASSERT_SENTINEL(objnode->obj, OBJ); 266 ASSERT_SENTINEL(objnode->obj, OBJ);
267 pool = objnode->obj->pool; 267 pool = objnode->obj->pool;
268 BUG_ON(pool == NULL); 268 BUG_ON(pool == NULL);
269 ASSERT_SENTINEL(pool, POOL); 269 ASSERT_SENTINEL(pool, POOL);
270 objnode->obj->objnode_count--; 270 objnode->obj->objnode_count--;
271 objnode->obj = NULL; 271 objnode->obj = NULL;
272 (*tmem_hostops.objnode_free)(objnode, pool); 272 (*tmem_hostops.objnode_free)(objnode, pool);
273 } 273 }
274 274
275 /* 275 /*
276 * lookup index in object and return associated pampd (or NULL if not found) 276 * lookup index in object and return associated pampd (or NULL if not found)
277 */ 277 */
278 static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) 278 static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
279 { 279 {
280 unsigned int height, shift; 280 unsigned int height, shift;
281 struct tmem_objnode **slot = NULL; 281 struct tmem_objnode **slot = NULL;
282 282
283 BUG_ON(obj == NULL); 283 BUG_ON(obj == NULL);
284 ASSERT_SENTINEL(obj, OBJ); 284 ASSERT_SENTINEL(obj, OBJ);
285 BUG_ON(obj->pool == NULL); 285 BUG_ON(obj->pool == NULL);
286 ASSERT_SENTINEL(obj->pool, POOL); 286 ASSERT_SENTINEL(obj->pool, POOL);
287 287
288 height = obj->objnode_tree_height; 288 height = obj->objnode_tree_height;
289 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) 289 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
290 goto out; 290 goto out;
291 if (height == 0 && obj->objnode_tree_root) { 291 if (height == 0 && obj->objnode_tree_root) {
292 slot = &obj->objnode_tree_root; 292 slot = &obj->objnode_tree_root;
293 goto out; 293 goto out;
294 } 294 }
295 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; 295 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
296 slot = &obj->objnode_tree_root; 296 slot = &obj->objnode_tree_root;
297 while (height > 0) { 297 while (height > 0) {
298 if (*slot == NULL) 298 if (*slot == NULL)
299 goto out; 299 goto out;
300 slot = (struct tmem_objnode **) 300 slot = (struct tmem_objnode **)
301 ((*slot)->slots + 301 ((*slot)->slots +
302 ((index >> shift) & OBJNODE_TREE_MAP_MASK)); 302 ((index >> shift) & OBJNODE_TREE_MAP_MASK));
303 shift -= OBJNODE_TREE_MAP_SHIFT; 303 shift -= OBJNODE_TREE_MAP_SHIFT;
304 height--; 304 height--;
305 } 305 }
306 out: 306 out:
307 return slot != NULL ? (void **)slot : NULL; 307 return slot != NULL ? (void **)slot : NULL;
308 } 308 }
309 309
310 static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) 310 static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
311 { 311 {
312 struct tmem_objnode **slot; 312 struct tmem_objnode **slot;
313 313
314 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); 314 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
315 return slot != NULL ? *slot : NULL; 315 return slot != NULL ? *slot : NULL;
316 } 316 }
317 317
318 static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index, 318 static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
319 void *new_pampd) 319 void *new_pampd)
320 { 320 {
321 struct tmem_objnode **slot; 321 struct tmem_objnode **slot;
322 void *ret = NULL; 322 void *ret = NULL;
323 323
324 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); 324 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
325 if ((slot != NULL) && (*slot != NULL)) { 325 if ((slot != NULL) && (*slot != NULL)) {
326 void *old_pampd = *(void **)slot; 326 void *old_pampd = *(void **)slot;
327 *(void **)slot = new_pampd; 327 *(void **)slot = new_pampd;
328 (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0); 328 (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
329 ret = new_pampd; 329 ret = new_pampd;
330 } 330 }
331 return ret; 331 return ret;
332 } 332 }
333 333
334 static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, 334 static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
335 void *pampd) 335 void *pampd)
336 { 336 {
337 int ret = 0; 337 int ret = 0;
338 struct tmem_objnode *objnode = NULL, *newnode, *slot; 338 struct tmem_objnode *objnode = NULL, *newnode, *slot;
339 unsigned int height, shift; 339 unsigned int height, shift;
340 int offset = 0; 340 int offset = 0;
341 341
342 /* if necessary, extend the tree to be higher */ 342 /* if necessary, extend the tree to be higher */
343 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) { 343 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
344 height = obj->objnode_tree_height + 1; 344 height = obj->objnode_tree_height + 1;
345 if (index > tmem_objnode_tree_h2max[height]) 345 if (index > tmem_objnode_tree_h2max[height])
346 while (index > tmem_objnode_tree_h2max[height]) 346 while (index > tmem_objnode_tree_h2max[height])
347 height++; 347 height++;
348 if (obj->objnode_tree_root == NULL) { 348 if (obj->objnode_tree_root == NULL) {
349 obj->objnode_tree_height = height; 349 obj->objnode_tree_height = height;
350 goto insert; 350 goto insert;
351 } 351 }
352 do { 352 do {
353 newnode = tmem_objnode_alloc(obj); 353 newnode = tmem_objnode_alloc(obj);
354 if (!newnode) { 354 if (!newnode) {
355 ret = -ENOMEM; 355 ret = -ENOMEM;
356 goto out; 356 goto out;
357 } 357 }
358 newnode->slots[0] = obj->objnode_tree_root; 358 newnode->slots[0] = obj->objnode_tree_root;
359 newnode->slots_in_use = 1; 359 newnode->slots_in_use = 1;
360 obj->objnode_tree_root = newnode; 360 obj->objnode_tree_root = newnode;
361 obj->objnode_tree_height++; 361 obj->objnode_tree_height++;
362 } while (height > obj->objnode_tree_height); 362 } while (height > obj->objnode_tree_height);
363 } 363 }
364 insert: 364 insert:
365 slot = obj->objnode_tree_root; 365 slot = obj->objnode_tree_root;
366 height = obj->objnode_tree_height; 366 height = obj->objnode_tree_height;
367 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; 367 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
368 while (height > 0) { 368 while (height > 0) {
369 if (slot == NULL) { 369 if (slot == NULL) {
370 /* add a child objnode. */ 370 /* add a child objnode. */
371 slot = tmem_objnode_alloc(obj); 371 slot = tmem_objnode_alloc(obj);
372 if (!slot) { 372 if (!slot) {
373 ret = -ENOMEM; 373 ret = -ENOMEM;
374 goto out; 374 goto out;
375 } 375 }
376 if (objnode) { 376 if (objnode) {
377 377
378 objnode->slots[offset] = slot; 378 objnode->slots[offset] = slot;
379 objnode->slots_in_use++; 379 objnode->slots_in_use++;
380 } else 380 } else
381 obj->objnode_tree_root = slot; 381 obj->objnode_tree_root = slot;
382 } 382 }
383 /* go down a level */ 383 /* go down a level */
384 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; 384 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
385 objnode = slot; 385 objnode = slot;
386 slot = objnode->slots[offset]; 386 slot = objnode->slots[offset];
387 shift -= OBJNODE_TREE_MAP_SHIFT; 387 shift -= OBJNODE_TREE_MAP_SHIFT;
388 height--; 388 height--;
389 } 389 }
390 BUG_ON(slot != NULL); 390 BUG_ON(slot != NULL);
391 if (objnode) { 391 if (objnode) {
392 objnode->slots_in_use++; 392 objnode->slots_in_use++;
393 objnode->slots[offset] = pampd; 393 objnode->slots[offset] = pampd;
394 } else 394 } else
395 obj->objnode_tree_root = pampd; 395 obj->objnode_tree_root = pampd;
396 obj->pampd_count++; 396 obj->pampd_count++;
397 out: 397 out:
398 return ret; 398 return ret;
399 } 399 }
400 400
401 static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index) 401 static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
402 { 402 {
403 struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1]; 403 struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
404 struct tmem_objnode_tree_path *pathp = path; 404 struct tmem_objnode_tree_path *pathp = path;
405 struct tmem_objnode *slot = NULL; 405 struct tmem_objnode *slot = NULL;
406 unsigned int height, shift; 406 unsigned int height, shift;
407 int offset; 407 int offset;
408 408
409 BUG_ON(obj == NULL); 409 BUG_ON(obj == NULL);
410 ASSERT_SENTINEL(obj, OBJ); 410 ASSERT_SENTINEL(obj, OBJ);
411 BUG_ON(obj->pool == NULL); 411 BUG_ON(obj->pool == NULL);
412 ASSERT_SENTINEL(obj->pool, POOL); 412 ASSERT_SENTINEL(obj->pool, POOL);
413 height = obj->objnode_tree_height; 413 height = obj->objnode_tree_height;
414 if (index > tmem_objnode_tree_h2max[height]) 414 if (index > tmem_objnode_tree_h2max[height])
415 goto out; 415 goto out;
416 slot = obj->objnode_tree_root; 416 slot = obj->objnode_tree_root;
417 if (height == 0 && obj->objnode_tree_root) { 417 if (height == 0 && obj->objnode_tree_root) {
418 obj->objnode_tree_root = NULL; 418 obj->objnode_tree_root = NULL;
419 goto out; 419 goto out;
420 } 420 }
421 shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT; 421 shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
422 pathp->objnode = NULL; 422 pathp->objnode = NULL;
423 do { 423 do {
424 if (slot == NULL) 424 if (slot == NULL)
425 goto out; 425 goto out;
426 pathp++; 426 pathp++;
427 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; 427 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
428 pathp->offset = offset; 428 pathp->offset = offset;
429 pathp->objnode = slot; 429 pathp->objnode = slot;
430 slot = slot->slots[offset]; 430 slot = slot->slots[offset];
431 shift -= OBJNODE_TREE_MAP_SHIFT; 431 shift -= OBJNODE_TREE_MAP_SHIFT;
432 height--; 432 height--;
433 } while (height > 0); 433 } while (height > 0);
434 if (slot == NULL) 434 if (slot == NULL)
435 goto out; 435 goto out;
436 while (pathp->objnode) { 436 while (pathp->objnode) {
437 pathp->objnode->slots[pathp->offset] = NULL; 437 pathp->objnode->slots[pathp->offset] = NULL;
438 pathp->objnode->slots_in_use--; 438 pathp->objnode->slots_in_use--;
439 if (pathp->objnode->slots_in_use) { 439 if (pathp->objnode->slots_in_use) {
440 if (pathp->objnode == obj->objnode_tree_root) { 440 if (pathp->objnode == obj->objnode_tree_root) {
441 while (obj->objnode_tree_height > 0 && 441 while (obj->objnode_tree_height > 0 &&
442 obj->objnode_tree_root->slots_in_use == 1 && 442 obj->objnode_tree_root->slots_in_use == 1 &&
443 obj->objnode_tree_root->slots[0]) { 443 obj->objnode_tree_root->slots[0]) {
444 struct tmem_objnode *to_free = 444 struct tmem_objnode *to_free =
445 obj->objnode_tree_root; 445 obj->objnode_tree_root;
446 446
447 obj->objnode_tree_root = 447 obj->objnode_tree_root =
448 to_free->slots[0]; 448 to_free->slots[0];
449 obj->objnode_tree_height--; 449 obj->objnode_tree_height--;
450 to_free->slots[0] = NULL; 450 to_free->slots[0] = NULL;
451 to_free->slots_in_use = 0; 451 to_free->slots_in_use = 0;
452 tmem_objnode_free(to_free); 452 tmem_objnode_free(to_free);
453 } 453 }
454 } 454 }
455 goto out; 455 goto out;
456 } 456 }
457 tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */ 457 tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
458 pathp--; 458 pathp--;
459 } 459 }
460 obj->objnode_tree_height = 0; 460 obj->objnode_tree_height = 0;
461 obj->objnode_tree_root = NULL; 461 obj->objnode_tree_root = NULL;
462 462
463 out: 463 out:
464 if (slot != NULL) 464 if (slot != NULL)
465 obj->pampd_count--; 465 obj->pampd_count--;
466 BUG_ON(obj->pampd_count < 0); 466 BUG_ON(obj->pampd_count < 0);
467 return slot; 467 return slot;
468 } 468 }
469 469
470 /* recursively walk the objnode_tree destroying pampds and objnodes */ 470 /* recursively walk the objnode_tree destroying pampds and objnodes */
471 static void tmem_objnode_node_destroy(struct tmem_obj *obj, 471 static void tmem_objnode_node_destroy(struct tmem_obj *obj,
472 struct tmem_objnode *objnode, 472 struct tmem_objnode *objnode,
473 unsigned int ht) 473 unsigned int ht)
474 { 474 {
475 int i; 475 int i;
476 476
477 if (ht == 0) 477 if (ht == 0)
478 return; 478 return;
479 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) { 479 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
480 if (objnode->slots[i]) { 480 if (objnode->slots[i]) {
481 if (ht == 1) { 481 if (ht == 1) {
482 obj->pampd_count--; 482 obj->pampd_count--;
483 (*tmem_pamops.free)(objnode->slots[i], 483 (*tmem_pamops.free)(objnode->slots[i],
484 obj->pool, NULL, 0); 484 obj->pool, NULL, 0);
485 objnode->slots[i] = NULL; 485 objnode->slots[i] = NULL;
486 continue; 486 continue;
487 } 487 }
488 tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1); 488 tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
489 tmem_objnode_free(objnode->slots[i]); 489 tmem_objnode_free(objnode->slots[i]);
490 objnode->slots[i] = NULL; 490 objnode->slots[i] = NULL;
491 } 491 }
492 } 492 }
493 } 493 }
494 494
495 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) 495 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
496 { 496 {
497 if (obj->objnode_tree_root == NULL) 497 if (obj->objnode_tree_root == NULL)
498 return; 498 return;
499 if (obj->objnode_tree_height == 0) { 499 if (obj->objnode_tree_height == 0) {
500 obj->pampd_count--; 500 obj->pampd_count--;
501 (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0); 501 (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
502 } else { 502 } else {
503 tmem_objnode_node_destroy(obj, obj->objnode_tree_root, 503 tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
504 obj->objnode_tree_height); 504 obj->objnode_tree_height);
505 tmem_objnode_free(obj->objnode_tree_root); 505 tmem_objnode_free(obj->objnode_tree_root);
506 obj->objnode_tree_height = 0; 506 obj->objnode_tree_height = 0;
507 } 507 }
508 obj->objnode_tree_root = NULL; 508 obj->objnode_tree_root = NULL;
509 (*tmem_pamops.free_obj)(obj->pool, obj); 509 (*tmem_pamops.free_obj)(obj->pool, obj);
510 } 510 }
511 511
512 /* 512 /*
513 * Tmem is operated on by a set of well-defined actions: 513 * Tmem is operated on by a set of well-defined actions:
514 * "put", "get", "flush", "flush_object", "new pool" and "destroy pool". 514 * "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
515 * (The tmem ABI allows for subpages and exchanges but these operations 515 * (The tmem ABI allows for subpages and exchanges but these operations
516 * are not included in this implementation.) 516 * are not included in this implementation.)
517 * 517 *
518 * These "tmem core" operations are implemented in the following functions. 518 * These "tmem core" operations are implemented in the following functions.
519 */ 519 */
520 520
521 /* 521 /*
522 * "Put" a page, e.g. copy a page from the kernel into newly allocated 522 * "Put" a page, e.g. copy a page from the kernel into newly allocated
523 * PAM space (if such space is available). Tmem_put is complicated by 523 * PAM space (if such space is available). Tmem_put is complicated by
524 * a corner case: What if a page with matching handle already exists in 524 * a corner case: What if a page with matching handle already exists in
525 * tmem? To guarantee coherency, one of two actions is necessary: Either 525 * tmem? To guarantee coherency, one of two actions is necessary: Either
526 * the data for the page must be overwritten, or the page must be 526 * the data for the page must be overwritten, or the page must be
527 * "flushed" so that the data is not accessible to a subsequent "get". 527 * "flushed" so that the data is not accessible to a subsequent "get".
528 * Since these "duplicate puts" are relatively rare, this implementation 528 * Since these "duplicate puts" are relatively rare, this implementation
529 * always flushes for simplicity. 529 * always flushes for simplicity.
530 */ 530 */
531 int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, 531 int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
532 char *data, size_t size, bool raw, bool ephemeral) 532 char *data, size_t size, bool raw, bool ephemeral)
533 { 533 {
534 struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; 534 struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
535 void *pampd = NULL, *pampd_del = NULL; 535 void *pampd = NULL, *pampd_del = NULL;
536 int ret = -ENOMEM; 536 int ret = -ENOMEM;
537 struct tmem_hashbucket *hb; 537 struct tmem_hashbucket *hb;
538 538
539 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 539 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
540 spin_lock(&hb->lock); 540 spin_lock(&hb->lock);
541 obj = objfound = tmem_obj_find(hb, oidp); 541 obj = objfound = tmem_obj_find(hb, oidp);
542 if (obj != NULL) { 542 if (obj != NULL) {
543 pampd = tmem_pampd_lookup_in_obj(objfound, index); 543 pampd = tmem_pampd_lookup_in_obj(objfound, index);
544 if (pampd != NULL) { 544 if (pampd != NULL) {
545 /* if found, is a dup put, flush the old one */ 545 /* if found, is a dup put, flush the old one */
546 pampd_del = tmem_pampd_delete_from_obj(obj, index); 546 pampd_del = tmem_pampd_delete_from_obj(obj, index);
547 BUG_ON(pampd_del != pampd); 547 BUG_ON(pampd_del != pampd);
548 (*tmem_pamops.free)(pampd, pool, oidp, index); 548 (*tmem_pamops.free)(pampd, pool, oidp, index);
549 if (obj->pampd_count == 0) { 549 if (obj->pampd_count == 0) {
550 objnew = obj; 550 objnew = obj;
551 objfound = NULL; 551 objfound = NULL;
552 } 552 }
553 pampd = NULL; 553 pampd = NULL;
554 } 554 }
555 } else { 555 } else {
556 obj = objnew = (*tmem_hostops.obj_alloc)(pool); 556 obj = objnew = (*tmem_hostops.obj_alloc)(pool);
557 if (unlikely(obj == NULL)) { 557 if (unlikely(obj == NULL)) {
558 ret = -ENOMEM; 558 ret = -ENOMEM;
559 goto out; 559 goto out;
560 } 560 }
561 tmem_obj_init(obj, hb, pool, oidp); 561 tmem_obj_init(obj, hb, pool, oidp);
562 } 562 }
563 BUG_ON(obj == NULL); 563 BUG_ON(obj == NULL);
564 BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); 564 BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
565 pampd = (*tmem_pamops.create)(data, size, raw, ephemeral, 565 pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
566 obj->pool, &obj->oid, index); 566 obj->pool, &obj->oid, index);
567 if (unlikely(pampd == NULL)) 567 if (unlikely(pampd == NULL))
568 goto free; 568 goto free;
569 ret = tmem_pampd_add_to_obj(obj, index, pampd); 569 ret = tmem_pampd_add_to_obj(obj, index, pampd);
570 if (unlikely(ret == -ENOMEM)) 570 if (unlikely(ret == -ENOMEM))
571 /* may have partially built objnode tree ("stump") */ 571 /* may have partially built objnode tree ("stump") */
572 goto delete_and_free; 572 goto delete_and_free;
573 goto out; 573 goto out;
574 574
575 delete_and_free: 575 delete_and_free:
576 (void)tmem_pampd_delete_from_obj(obj, index); 576 (void)tmem_pampd_delete_from_obj(obj, index);
577 free: 577 free:
578 if (pampd) 578 if (pampd)
579 (*tmem_pamops.free)(pampd, pool, NULL, 0); 579 (*tmem_pamops.free)(pampd, pool, NULL, 0);
580 if (objnew) { 580 if (objnew) {
581 tmem_obj_free(objnew, hb); 581 tmem_obj_free(objnew, hb);
582 (*tmem_hostops.obj_free)(objnew, pool); 582 (*tmem_hostops.obj_free)(objnew, pool);
583 } 583 }
584 out: 584 out:
585 spin_unlock(&hb->lock); 585 spin_unlock(&hb->lock);
586 return ret; 586 return ret;
587 } 587 }
588 588
589 /* 589 /*
590 * "Get" a page, e.g. if one can be found, copy the tmem page with the 590 * "Get" a page, e.g. if one can be found, copy the tmem page with the
591 * matching handle from PAM space to the kernel. By tmem definition, 591 * matching handle from PAM space to the kernel. By tmem definition,
592 * when a "get" is successful on an ephemeral page, the page is "flushed", 592 * when a "get" is successful on an ephemeral page, the page is "flushed",
593 * and when a "get" is successful on a persistent page, the page is retained 593 * and when a "get" is successful on a persistent page, the page is retained
594 * in tmem. Note that to preserve 594 * in tmem. Note that to preserve
595 * coherency, "get" can never be skipped if tmem contains the data. 595 * coherency, "get" can never be skipped if tmem contains the data.
596 * That is, if a get is done with a certain handle and fails, any 596 * That is, if a get is done with a certain handle and fails, any
597 * subsequent "get" must also fail (unless of course there is a 597 * subsequent "get" must also fail (unless of course there is a
598 * "put" done with the same handle). 598 * "put" done with the same handle).
599 599
600 */ 600 */
601 int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, 601 int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
602 char *data, size_t *size, bool raw, int get_and_free) 602 char *data, size_t *size, bool raw, int get_and_free)
603 { 603 {
604 struct tmem_obj *obj; 604 struct tmem_obj *obj;
605 void *pampd; 605 void *pampd;
606 bool ephemeral = is_ephemeral(pool); 606 bool ephemeral = is_ephemeral(pool);
607 uint32_t ret = -1; 607 int ret = -1;
608 struct tmem_hashbucket *hb; 608 struct tmem_hashbucket *hb;
609 bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); 609 bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
610 bool lock_held = false; 610 bool lock_held = false;
611 611
612 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 612 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
613 spin_lock(&hb->lock); 613 spin_lock(&hb->lock);
614 lock_held = true; 614 lock_held = true;
615 obj = tmem_obj_find(hb, oidp); 615 obj = tmem_obj_find(hb, oidp);
616 if (obj == NULL) 616 if (obj == NULL)
617 goto out; 617 goto out;
618 if (free) 618 if (free)
619 pampd = tmem_pampd_delete_from_obj(obj, index); 619 pampd = tmem_pampd_delete_from_obj(obj, index);
620 else 620 else
621 pampd = tmem_pampd_lookup_in_obj(obj, index); 621 pampd = tmem_pampd_lookup_in_obj(obj, index);
622 if (pampd == NULL) 622 if (pampd == NULL)
623 goto out; 623 goto out;
624 if (free) { 624 if (free) {
625 if (obj->pampd_count == 0) { 625 if (obj->pampd_count == 0) {
626 tmem_obj_free(obj, hb); 626 tmem_obj_free(obj, hb);
627 (*tmem_hostops.obj_free)(obj, pool); 627 (*tmem_hostops.obj_free)(obj, pool);
628 obj = NULL; 628 obj = NULL;
629 } 629 }
630 } 630 }
631 if (tmem_pamops.is_remote(pampd)) { 631 if (tmem_pamops.is_remote(pampd)) {
632 lock_held = false; 632 lock_held = false;
633 spin_unlock(&hb->lock); 633 spin_unlock(&hb->lock);
634 } 634 }
635 if (free) 635 if (free)
636 ret = (*tmem_pamops.get_data_and_free)( 636 ret = (*tmem_pamops.get_data_and_free)(
637 data, size, raw, pampd, pool, oidp, index); 637 data, size, raw, pampd, pool, oidp, index);
638 else 638 else
639 ret = (*tmem_pamops.get_data)( 639 ret = (*tmem_pamops.get_data)(
640 data, size, raw, pampd, pool, oidp, index); 640 data, size, raw, pampd, pool, oidp, index);
641 if (ret < 0) 641 if (ret < 0)
642 goto out; 642 goto out;
643 ret = 0; 643 ret = 0;
644 out: 644 out:
645 if (lock_held) 645 if (lock_held)
646 spin_unlock(&hb->lock); 646 spin_unlock(&hb->lock);
647 return ret; 647 return ret;
648 } 648 }
649 649
650 /* 650 /*
651 * If a page in tmem matches the handle, "flush" this page from tmem such 651 * If a page in tmem matches the handle, "flush" this page from tmem such
652 * that any subsequent "get" does not succeed (unless, of course, there 652 * that any subsequent "get" does not succeed (unless, of course, there
653 * was another "put" with the same handle). 653 * was another "put" with the same handle).
654 */ 654 */
655 int tmem_flush_page(struct tmem_pool *pool, 655 int tmem_flush_page(struct tmem_pool *pool,
656 struct tmem_oid *oidp, uint32_t index) 656 struct tmem_oid *oidp, uint32_t index)
657 { 657 {
658 struct tmem_obj *obj; 658 struct tmem_obj *obj;
659 void *pampd; 659 void *pampd;
660 int ret = -1; 660 int ret = -1;
661 struct tmem_hashbucket *hb; 661 struct tmem_hashbucket *hb;
662 662
663 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 663 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
664 spin_lock(&hb->lock); 664 spin_lock(&hb->lock);
665 obj = tmem_obj_find(hb, oidp); 665 obj = tmem_obj_find(hb, oidp);
666 if (obj == NULL) 666 if (obj == NULL)
667 goto out; 667 goto out;
668 pampd = tmem_pampd_delete_from_obj(obj, index); 668 pampd = tmem_pampd_delete_from_obj(obj, index);
669 if (pampd == NULL) 669 if (pampd == NULL)
670 goto out; 670 goto out;
671 (*tmem_pamops.free)(pampd, pool, oidp, index); 671 (*tmem_pamops.free)(pampd, pool, oidp, index);
672 if (obj->pampd_count == 0) { 672 if (obj->pampd_count == 0) {
673 tmem_obj_free(obj, hb); 673 tmem_obj_free(obj, hb);
674 (*tmem_hostops.obj_free)(obj, pool); 674 (*tmem_hostops.obj_free)(obj, pool);
675 } 675 }
676 ret = 0; 676 ret = 0;
677 677
678 out: 678 out:
679 spin_unlock(&hb->lock); 679 spin_unlock(&hb->lock);
680 return ret; 680 return ret;
681 } 681 }
682 682
683 /* 683 /*
684 * If a page in tmem matches the handle, replace the page so that any 684 * If a page in tmem matches the handle, replace the page so that any
685 * subsequent "get" gets the new page. Returns 0 if 685 * subsequent "get" gets the new page. Returns 0 if
686 * there was a page to replace, else returns -1. 686 * there was a page to replace, else returns -1.
687 */ 687 */
688 int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp, 688 int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
689 uint32_t index, void *new_pampd) 689 uint32_t index, void *new_pampd)
690 { 690 {
691 struct tmem_obj *obj; 691 struct tmem_obj *obj;
692 int ret = -1; 692 int ret = -1;
693 struct tmem_hashbucket *hb; 693 struct tmem_hashbucket *hb;
694 694
695 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 695 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
696 spin_lock(&hb->lock); 696 spin_lock(&hb->lock);
697 obj = tmem_obj_find(hb, oidp); 697 obj = tmem_obj_find(hb, oidp);
698 if (obj == NULL) 698 if (obj == NULL)
699 goto out; 699 goto out;
700 new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd); 700 new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
701 ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj); 701 ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
702 out: 702 out:
703 spin_unlock(&hb->lock); 703 spin_unlock(&hb->lock);
704 return ret; 704 return ret;
705 } 705 }
706 706
707 /* 707 /*
708 * "Flush" all pages in tmem matching this oid. 708 * "Flush" all pages in tmem matching this oid.
709 */ 709 */
710 int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) 710 int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
711 { 711 {
712 struct tmem_obj *obj; 712 struct tmem_obj *obj;
713 struct tmem_hashbucket *hb; 713 struct tmem_hashbucket *hb;
714 int ret = -1; 714 int ret = -1;
715 715
716 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 716 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
717 spin_lock(&hb->lock); 717 spin_lock(&hb->lock);
718 obj = tmem_obj_find(hb, oidp); 718 obj = tmem_obj_find(hb, oidp);
719 if (obj == NULL) 719 if (obj == NULL)
720 goto out; 720 goto out;
721 tmem_pampd_destroy_all_in_obj(obj); 721 tmem_pampd_destroy_all_in_obj(obj);
722 tmem_obj_free(obj, hb); 722 tmem_obj_free(obj, hb);
723 (*tmem_hostops.obj_free)(obj, pool); 723 (*tmem_hostops.obj_free)(obj, pool);
724 ret = 0; 724 ret = 0;
725 725
726 out: 726 out:
727 spin_unlock(&hb->lock); 727 spin_unlock(&hb->lock);
728 return ret; 728 return ret;
729 } 729 }
730 730
731 /* 731 /*
732 * "Flush" all pages (and tmem_objs) from this tmem_pool and disable 732 * "Flush" all pages (and tmem_objs) from this tmem_pool and disable
733 * all subsequent access to this tmem_pool. 733 * all subsequent access to this tmem_pool.
734 */ 734 */
735 int tmem_destroy_pool(struct tmem_pool *pool) 735 int tmem_destroy_pool(struct tmem_pool *pool)
736 { 736 {
737 int ret = -1; 737 int ret = -1;
738 738
739 if (pool == NULL) 739 if (pool == NULL)
740 goto out; 740 goto out;
741 tmem_pool_flush(pool, 1); 741 tmem_pool_flush(pool, 1);
742 ret = 0; 742 ret = 0;
743 out: 743 out:
744 return ret; 744 return ret;
745 } 745 }
746 746
747 static LIST_HEAD(tmem_global_pool_list); 747 static LIST_HEAD(tmem_global_pool_list);
748 748
749 /* 749 /*
750 * Create a new tmem_pool with the provided flag and return 750 * Create a new tmem_pool with the provided flag and return
751 * a pool id provided by the tmem host implementation. 751 * a pool id provided by the tmem host implementation.
752 */ 752 */
753 void tmem_new_pool(struct tmem_pool *pool, uint32_t flags) 753 void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
754 { 754 {
755 int persistent = flags & TMEM_POOL_PERSIST; 755 int persistent = flags & TMEM_POOL_PERSIST;
756 int shared = flags & TMEM_POOL_SHARED; 756 int shared = flags & TMEM_POOL_SHARED;
757 struct tmem_hashbucket *hb = &pool->hashbucket[0]; 757 struct tmem_hashbucket *hb = &pool->hashbucket[0];
758 int i; 758 int i;
759 759
760 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { 760 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
761 hb->obj_rb_root = RB_ROOT; 761 hb->obj_rb_root = RB_ROOT;
762 spin_lock_init(&hb->lock); 762 spin_lock_init(&hb->lock);
763 } 763 }
764 INIT_LIST_HEAD(&pool->pool_list); 764 INIT_LIST_HEAD(&pool->pool_list);
765 atomic_set(&pool->obj_count, 0); 765 atomic_set(&pool->obj_count, 0);
766 SET_SENTINEL(pool, POOL); 766 SET_SENTINEL(pool, POOL);
767 list_add_tail(&pool->pool_list, &tmem_global_pool_list); 767 list_add_tail(&pool->pool_list, &tmem_global_pool_list);
768 pool->persistent = persistent; 768 pool->persistent = persistent;
769 pool->shared = shared; 769 pool->shared = shared;
770 } 770 }
771 771