Commit 03afc0e25f7fc03537014a770f4c54ebbe63a24c
Committed by
Linus Torvalds
1 parent
bfc8c90139
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
slab: get_online_mems for kmem_cache_{create,destroy,shrink}
When we create a sl[au]b cache, we allocate kmem_cache_node structures for each online NUMA node. To handle nodes taken online/offline, we register memory hotplug notifier and allocate/free kmem_cache_node corresponding to the node that changes its state for each kmem cache. To synchronize between the two paths we hold the slab_mutex during both the cache creationg/destruction path and while tuning per-node parts of kmem caches in memory hotplug handler, but that's not quite right, because it does not guarantee that a newly created cache will have all kmem_cache_nodes initialized in case it races with memory hotplug. For instance, in case of slub: CPU0 CPU1 ---- ---- kmem_cache_create: online_pages: __kmem_cache_create: slab_memory_callback: slab_mem_going_online_callback: lock slab_mutex for each slab_caches list entry allocate kmem_cache node unlock slab_mutex lock slab_mutex init_kmem_cache_nodes: for_each_node_state(node, N_NORMAL_MEMORY) allocate kmem_cache node add kmem_cache to slab_caches list unlock slab_mutex online_pages (continued): node_states_set_node As a result we'll get a kmem cache with not all kmem_cache_nodes allocated. To avoid issues like that we should hold get/put_online_mems() during the whole kmem cache creation/destruction/shrink paths, just like we deal with cpu hotplug. This patch does the trick. Note, that after it's applied, there is no need in taking the slab_mutex for kmem_cache_shrink any more, so it is removed from there. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Cc: Toshi Kani <toshi.kani@hp.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Jiang Liu <liuj97@gmail.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Wen Congyang <wency@cn.fujitsu.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 5 changed files with 39 additions and 31 deletions Side-by-side Diff
mm/slab.c
... | ... | @@ -2480,8 +2480,7 @@ |
2480 | 2480 | return nr_freed; |
2481 | 2481 | } |
2482 | 2482 | |
2483 | -/* Called with slab_mutex held to protect against cpu hotplug */ | |
2484 | -static int __cache_shrink(struct kmem_cache *cachep) | |
2483 | +int __kmem_cache_shrink(struct kmem_cache *cachep) | |
2485 | 2484 | { |
2486 | 2485 | int ret = 0, i = 0; |
2487 | 2486 | struct kmem_cache_node *n; |
2488 | 2487 | |
... | ... | @@ -2502,32 +2501,11 @@ |
2502 | 2501 | return (ret ? 1 : 0); |
2503 | 2502 | } |
2504 | 2503 | |
2505 | -/** | |
2506 | - * kmem_cache_shrink - Shrink a cache. | |
2507 | - * @cachep: The cache to shrink. | |
2508 | - * | |
2509 | - * Releases as many slabs as possible for a cache. | |
2510 | - * To help debugging, a zero exit status indicates all slabs were released. | |
2511 | - */ | |
2512 | -int kmem_cache_shrink(struct kmem_cache *cachep) | |
2513 | -{ | |
2514 | - int ret; | |
2515 | - BUG_ON(!cachep || in_interrupt()); | |
2516 | - | |
2517 | - get_online_cpus(); | |
2518 | - mutex_lock(&slab_mutex); | |
2519 | - ret = __cache_shrink(cachep); | |
2520 | - mutex_unlock(&slab_mutex); | |
2521 | - put_online_cpus(); | |
2522 | - return ret; | |
2523 | -} | |
2524 | -EXPORT_SYMBOL(kmem_cache_shrink); | |
2525 | - | |
2526 | 2504 | int __kmem_cache_shutdown(struct kmem_cache *cachep) |
2527 | 2505 | { |
2528 | 2506 | int i; |
2529 | 2507 | struct kmem_cache_node *n; |
2530 | - int rc = __cache_shrink(cachep); | |
2508 | + int rc = __kmem_cache_shrink(cachep); | |
2531 | 2509 | |
2532 | 2510 | if (rc) |
2533 | 2511 | return rc; |
mm/slab.h
... | ... | @@ -91,6 +91,7 @@ |
91 | 91 | #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) |
92 | 92 | |
93 | 93 | int __kmem_cache_shutdown(struct kmem_cache *); |
94 | +int __kmem_cache_shrink(struct kmem_cache *); | |
94 | 95 | void slab_kmem_cache_release(struct kmem_cache *); |
95 | 96 | |
96 | 97 | struct seq_file; |
mm/slab_common.c
... | ... | @@ -205,6 +205,8 @@ |
205 | 205 | int err; |
206 | 206 | |
207 | 207 | get_online_cpus(); |
208 | + get_online_mems(); | |
209 | + | |
208 | 210 | mutex_lock(&slab_mutex); |
209 | 211 | |
210 | 212 | err = kmem_cache_sanity_check(name, size); |
... | ... | @@ -239,6 +241,8 @@ |
239 | 241 | |
240 | 242 | out_unlock: |
241 | 243 | mutex_unlock(&slab_mutex); |
244 | + | |
245 | + put_online_mems(); | |
242 | 246 | put_online_cpus(); |
243 | 247 | |
244 | 248 | if (err) { |
... | ... | @@ -272,6 +276,8 @@ |
272 | 276 | char *cache_name; |
273 | 277 | |
274 | 278 | get_online_cpus(); |
279 | + get_online_mems(); | |
280 | + | |
275 | 281 | mutex_lock(&slab_mutex); |
276 | 282 | |
277 | 283 | /* |
... | ... | @@ -295,6 +301,8 @@ |
295 | 301 | |
296 | 302 | out_unlock: |
297 | 303 | mutex_unlock(&slab_mutex); |
304 | + | |
305 | + put_online_mems(); | |
298 | 306 | put_online_cpus(); |
299 | 307 | } |
300 | 308 | |
... | ... | @@ -328,6 +336,8 @@ |
328 | 336 | void kmem_cache_destroy(struct kmem_cache *s) |
329 | 337 | { |
330 | 338 | get_online_cpus(); |
339 | + get_online_mems(); | |
340 | + | |
331 | 341 | mutex_lock(&slab_mutex); |
332 | 342 | |
333 | 343 | s->refcount--; |
334 | 344 | |
335 | 345 | |
... | ... | @@ -359,14 +369,35 @@ |
359 | 369 | #else |
360 | 370 | slab_kmem_cache_release(s); |
361 | 371 | #endif |
362 | - goto out_put_cpus; | |
372 | + goto out; | |
363 | 373 | |
364 | 374 | out_unlock: |
365 | 375 | mutex_unlock(&slab_mutex); |
366 | -out_put_cpus: | |
376 | +out: | |
377 | + put_online_mems(); | |
367 | 378 | put_online_cpus(); |
368 | 379 | } |
369 | 380 | EXPORT_SYMBOL(kmem_cache_destroy); |
381 | + | |
382 | +/** | |
383 | + * kmem_cache_shrink - Shrink a cache. | |
384 | + * @cachep: The cache to shrink. | |
385 | + * | |
386 | + * Releases as many slabs as possible for a cache. | |
387 | + * To help debugging, a zero exit status indicates all slabs were released. | |
388 | + */ | |
389 | +int kmem_cache_shrink(struct kmem_cache *cachep) | |
390 | +{ | |
391 | + int ret; | |
392 | + | |
393 | + get_online_cpus(); | |
394 | + get_online_mems(); | |
395 | + ret = __kmem_cache_shrink(cachep); | |
396 | + put_online_mems(); | |
397 | + put_online_cpus(); | |
398 | + return ret; | |
399 | +} | |
400 | +EXPORT_SYMBOL(kmem_cache_shrink); | |
370 | 401 | |
371 | 402 | int slab_is_available(void) |
372 | 403 | { |
mm/slob.c
... | ... | @@ -620,11 +620,10 @@ |
620 | 620 | return 0; |
621 | 621 | } |
622 | 622 | |
623 | -int kmem_cache_shrink(struct kmem_cache *d) | |
623 | +int __kmem_cache_shrink(struct kmem_cache *d) | |
624 | 624 | { |
625 | 625 | return 0; |
626 | 626 | } |
627 | -EXPORT_SYMBOL(kmem_cache_shrink); | |
628 | 627 | |
629 | 628 | struct kmem_cache kmem_cache_boot = { |
630 | 629 | .name = "kmem_cache", |
mm/slub.c
... | ... | @@ -3398,7 +3398,7 @@ |
3398 | 3398 | * being allocated from last increasing the chance that the last objects |
3399 | 3399 | * are freed in them. |
3400 | 3400 | */ |
3401 | -int kmem_cache_shrink(struct kmem_cache *s) | |
3401 | +int __kmem_cache_shrink(struct kmem_cache *s) | |
3402 | 3402 | { |
3403 | 3403 | int node; |
3404 | 3404 | int i; |
... | ... | @@ -3454,7 +3454,6 @@ |
3454 | 3454 | kfree(slabs_by_inuse); |
3455 | 3455 | return 0; |
3456 | 3456 | } |
3457 | -EXPORT_SYMBOL(kmem_cache_shrink); | |
3458 | 3457 | |
3459 | 3458 | static int slab_mem_going_offline_callback(void *arg) |
3460 | 3459 | { |
... | ... | @@ -3462,7 +3461,7 @@ |
3462 | 3461 | |
3463 | 3462 | mutex_lock(&slab_mutex); |
3464 | 3463 | list_for_each_entry(s, &slab_caches, list) |
3465 | - kmem_cache_shrink(s); | |
3464 | + __kmem_cache_shrink(s); | |
3466 | 3465 | mutex_unlock(&slab_mutex); |
3467 | 3466 | |
3468 | 3467 | return 0; |