slab: get_online_mems for kmem_cache_{create,destroy,shrink}

When we create a sl[au]b cache, we allocate kmem_cache_node structures for each online NUMA node. To handle nodes taken online/offline, we register memory hotplug notifier and allocate/free kmem_cache_node corresponding to the node that changes its state for each kmem cache. To synchronize between the two paths we hold the slab_mutex during both the cache creationg/destruction path and while tuning per-node parts of kmem caches in memory hotplug handler, but that's not quite right, because it does not guarantee that a newly created cache will have all kmem_cache_nodes initialized in case it races with memory hotplug. For instance, in case of slub: CPU0 CPU1 ---- ---- kmem_cache_create: online_pages: __kmem_cache_create: slab_memory_callback: slab_mem_going_online_callback: lock slab_mutex for each slab_caches list entry allocate kmem_cache node unlock slab_mutex lock slab_mutex init_kmem_cache_nodes: for_each_node_state(node, N_NORMAL_MEMORY) allocate kmem_cache node add kmem_cache to slab_caches list unlock slab_mutex online_pages (continued): node_states_set_node As a result we'll get a kmem cache with not all kmem_cache_nodes allocated. To avoid issues like that we should hold get/put_online_mems() during the whole kmem cache creation/destruction/shrink paths, just like we deal with cpu hotplug. This patch does the trick. Note, that after it's applied, there is no need in taking the slab_mutex for kmem_cache_shrink any more, so it is removed from there. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Cc: Toshi Kani <toshi.kani@hp.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Jiang Liu <liuj97@gmail.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Wen Congyang <wency@cn.fujitsu.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

slab: get_online_mems for kmem_cache_{create,destroy,shrink}
When we create a sl[au]b cache, we allocate kmem_cache_node structures for each online NUMA node. To handle nodes taken online/offline, we register memory hotplug notifier and allocate/free kmem_cache_node corresponding to the node that changes its state for each kmem cache. To synchronize between the two paths we hold the slab_mutex during both the cache creationg/destruction path and while tuning per-node parts of kmem caches in memory hotplug handler, but that's not quite right, because it does not guarantee that a newly created cache will have all kmem_cache_nodes initialized in case it races with memory hotplug. For instance, in case of slub: CPU0 CPU1 ---- ---- kmem_cache_create: online_pages: __kmem_cache_create: slab_memory_callback: slab_mem_going_online_callback: lock slab_mutex for each slab_caches list entry allocate kmem_cache node unlock slab_mutex lock slab_mutex init_kmem_cache_nodes: for_each_node_state(node, N_NORMAL_MEMORY) allocate kmem_cache node add kmem_cache to slab_caches list unlock slab_mutex online_pages (continued): node_states_set_node As a result we'll get a kmem cache with not all kmem_cache_nodes allocated. To avoid issues like that we should hold get/put_online_mems() during the whole kmem cache creation/destruction/shrink paths, just like we deal with cpu hotplug. This patch does the trick. Note, that after it's applied, there is no need in taking the slab_mutex for kmem_cache_shrink any more, so it is removed from there. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Cc: Toshi Kani <toshi.kani@hp.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Jiang Liu <liuj97@gmail.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Wen Congyang <wency@cn.fujitsu.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Vladimir Davydov · Linus Torvalds
1 parent bfc8c90139
Showing 5 changed files with 39 additions and 31 deletions Side-by-side Diff
mm/slab.c
mm/slab.h
mm/slab_common.c
mm/slob.c
mm/slub.c
@@ -2480,8 +2480,7 @@
 	return nr_freed;
 }
  
-/* Called with slab_mutex held to protect against cpu hotplug */
-static int __cache_shrink(struct kmem_cache *cachep)
+int __kmem_cache_shrink(struct kmem_cache *cachep)
 {
 	int ret = 0, i = 0;
 	struct kmem_cache_node *n;
  
@@ -2502,32 +2501,11 @@
 	return (ret ? 1 : 0);
 }
  
-/**
- * kmem_cache_shrink - Shrink a cache.
- * @cachep: The cache to shrink.
- *
- * Releases as many slabs as possible for a cache.
- * To help debugging, a zero exit status indicates all slabs were released.
- */
-int kmem_cache_shrink(struct kmem_cache *cachep)
-{
-	int ret;
-	BUG_ON(!cachep || in_interrupt());
-
-	get_online_cpus();
-	mutex_lock(&slab_mutex);
-	ret = __cache_shrink(cachep);
-	mutex_unlock(&slab_mutex);
-	put_online_cpus();
-	return ret;
-}
-EXPORT_SYMBOL(kmem_cache_shrink);
-
 int __kmem_cache_shutdown(struct kmem_cache *cachep)
 {
 	int i;
 	struct kmem_cache_node *n;
-	int rc = __cache_shrink(cachep);
+	int rc = __kmem_cache_shrink(cachep);
  
 	if (rc)
 		return rc;
@@ -91,6 +91,7 @@
 #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
  
 int __kmem_cache_shutdown(struct kmem_cache *);
+int __kmem_cache_shrink(struct kmem_cache *);
 void slab_kmem_cache_release(struct kmem_cache *);
  
 struct seq_file;
@@ -205,6 +205,8 @@
 	int err;
  
 	get_online_cpus();
+	get_online_mems();
+
 	mutex_lock(&slab_mutex);
  
 	err = kmem_cache_sanity_check(name, size);
@@ -239,6 +241,8 @@
  
 out_unlock:
 	mutex_unlock(&slab_mutex);
+
+	put_online_mems();
 	put_online_cpus();
  
 	if (err) {
@@ -272,6 +276,8 @@
 	char *cache_name;
  
 	get_online_cpus();
+	get_online_mems();
+
 	mutex_lock(&slab_mutex);
  
 	/*
@@ -295,6 +301,8 @@
  
 out_unlock:
 	mutex_unlock(&slab_mutex);
+
+	put_online_mems();
 	put_online_cpus();
 }
  
@@ -328,6 +336,8 @@
 void kmem_cache_destroy(struct kmem_cache *s)
 {
 	get_online_cpus();
+	get_online_mems();
+
 	mutex_lock(&slab_mutex);
  
 	s->refcount--;
  
  
@@ -359,14 +369,35 @@
 #else
 	slab_kmem_cache_release(s);
 #endif
-	goto out_put_cpus;
+	goto out;
  
 out_unlock:
 	mutex_unlock(&slab_mutex);
-out_put_cpus:
+out:
+	put_online_mems();
 	put_online_cpus();
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
+
+/**
+ * kmem_cache_shrink - Shrink a cache.
+ * @cachep: The cache to shrink.
+ *
+ * Releases as many slabs as possible for a cache.
+ * To help debugging, a zero exit status indicates all slabs were released.
+ */
+int kmem_cache_shrink(struct kmem_cache *cachep)
+{
+	int ret;
+
+	get_online_cpus();
+	get_online_mems();
+	ret = __kmem_cache_shrink(cachep);
+	put_online_mems();
+	put_online_cpus();
+	return ret;
+}
+EXPORT_SYMBOL(kmem_cache_shrink);
  
 int slab_is_available(void)
 {
@@ -620,11 +620,10 @@
 	return 0;
 }
  
-int kmem_cache_shrink(struct kmem_cache *d)
+int __kmem_cache_shrink(struct kmem_cache *d)
 {
 	return 0;
 }
-EXPORT_SYMBOL(kmem_cache_shrink);
  
 struct kmem_cache kmem_cache_boot = {
 	.name = "kmem_cache",
@@ -3398,7 +3398,7 @@
  * being allocated from last increasing the chance that the last objects
  * are freed in them.
  */
-int kmem_cache_shrink(struct kmem_cache *s)
+int __kmem_cache_shrink(struct kmem_cache *s)
 {
 	int node;
 	int i;
@@ -3454,7 +3454,6 @@
 	kfree(slabs_by_inuse);
 	return 0;
 }
-EXPORT_SYMBOL(kmem_cache_shrink);
  
 static int slab_mem_going_offline_callback(void *arg)
 {
@@ -3462,7 +3461,7 @@
  
 	mutex_lock(&slab_mutex);
 	list_for_each_entry(s, &slab_caches, list)
-		kmem_cache_shrink(s);
+		__kmem_cache_shrink(s);
 	mutex_unlock(&slab_mutex);
  
 	return 0;
...	...	@@ -2480,8 +2480,7 @@
2480	2480	return nr_freed;
2481	2481	}
2482	2482
2483		-/* Called with slab_mutex held to protect against cpu hotplug */
2484		-static int __cache_shrink(struct kmem_cache *cachep)
	2483	+int __kmem_cache_shrink(struct kmem_cache *cachep)
2485	2484	{
2486	2485	int ret = 0, i = 0;
2487	2486	struct kmem_cache_node *n;
2488	2487
...	...	@@ -2502,32 +2501,11 @@
2502	2501	return (ret ? 1 : 0);
2503	2502	}
2504	2503
2505		-/**
2506		- * kmem_cache_shrink - Shrink a cache.
2507		- * @cachep: The cache to shrink.
2508		- *
2509		- * Releases as many slabs as possible for a cache.
2510		- * To help debugging, a zero exit status indicates all slabs were released.
2511		- */
2512		-int kmem_cache_shrink(struct kmem_cache *cachep)
2513		-{
2514		- int ret;
2515		- BUG_ON(!cachep \|\| in_interrupt());
2516		-
2517		- get_online_cpus();
2518		- mutex_lock(&slab_mutex);
2519		- ret = __cache_shrink(cachep);
2520		- mutex_unlock(&slab_mutex);
2521		- put_online_cpus();
2522		- return ret;
2523		-}
2524		-EXPORT_SYMBOL(kmem_cache_shrink);
2525		-
2526	2504	int __kmem_cache_shutdown(struct kmem_cache *cachep)
2527	2505	{
2528	2506	int i;
2529	2507	struct kmem_cache_node *n;
2530		- int rc = __cache_shrink(cachep);
	2508	+ int rc = __kmem_cache_shrink(cachep);
2531	2509
2532	2510	if (rc)
2533	2511	return rc;
...	...	@@ -91,6 +91,7 @@
91	91	#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS \| SLAB_DEBUG_FLAGS \| SLAB_CACHE_FLAGS)
92	92
93	93	int __kmem_cache_shutdown(struct kmem_cache *);
	94	+int __kmem_cache_shrink(struct kmem_cache *);
94	95	void slab_kmem_cache_release(struct kmem_cache *);
95	96
96	97	struct seq_file;
...	...	@@ -205,6 +205,8 @@
205	205	int err;
206	206
207	207	get_online_cpus();
	208	+ get_online_mems();
	209	+
208	210	mutex_lock(&slab_mutex);
209	211
210	212	err = kmem_cache_sanity_check(name, size);
...	...	@@ -239,6 +241,8 @@
239	241
240	242	out_unlock:
241	243	mutex_unlock(&slab_mutex);
	244	+
	245	+ put_online_mems();
242	246	put_online_cpus();
243	247
244	248	if (err) {
...	...	@@ -272,6 +276,8 @@
272	276	char *cache_name;
273	277
274	278	get_online_cpus();
	279	+ get_online_mems();
	280	+
275	281	mutex_lock(&slab_mutex);
276	282
277	283	/*
...	...	@@ -295,6 +301,8 @@
295	301
296	302	out_unlock:
297	303	mutex_unlock(&slab_mutex);
	304	+
	305	+ put_online_mems();
298	306	put_online_cpus();
299	307	}
300	308
...	...	@@ -328,6 +336,8 @@
328	336	void kmem_cache_destroy(struct kmem_cache *s)
329	337	{
330	338	get_online_cpus();
	339	+ get_online_mems();
	340	+
331	341	mutex_lock(&slab_mutex);
332	342
333	343	s->refcount--;
334	344
335	345
...	...	@@ -359,14 +369,35 @@
359	369	#else
360	370	slab_kmem_cache_release(s);
361	371	#endif
362		- goto out_put_cpus;
	372	+ goto out;
363	373
364	374	out_unlock:
365	375	mutex_unlock(&slab_mutex);
366		-out_put_cpus:
	376	+out:
	377	+ put_online_mems();
367	378	put_online_cpus();
368	379	}
369	380	EXPORT_SYMBOL(kmem_cache_destroy);
	381	+
	382	+/**
	383	+ * kmem_cache_shrink - Shrink a cache.
	384	+ * @cachep: The cache to shrink.
	385	+ *
	386	+ * Releases as many slabs as possible for a cache.
	387	+ * To help debugging, a zero exit status indicates all slabs were released.
	388	+ */
	389	+int kmem_cache_shrink(struct kmem_cache *cachep)
	390	+{
	391	+ int ret;
	392	+
	393	+ get_online_cpus();
	394	+ get_online_mems();
	395	+ ret = __kmem_cache_shrink(cachep);
	396	+ put_online_mems();
	397	+ put_online_cpus();
	398	+ return ret;
	399	+}
	400	+EXPORT_SYMBOL(kmem_cache_shrink);
370	401
371	402	int slab_is_available(void)
372	403	{
...	...	@@ -620,11 +620,10 @@
620	620	return 0;
621	621	}
622	622
623		-int kmem_cache_shrink(struct kmem_cache *d)
	623	+int __kmem_cache_shrink(struct kmem_cache *d)
624	624	{
625	625	return 0;
626	626	}
627		-EXPORT_SYMBOL(kmem_cache_shrink);
628	627
629	628	struct kmem_cache kmem_cache_boot = {
630	629	.name = "kmem_cache",
...	...	@@ -3398,7 +3398,7 @@
3398	3398	* being allocated from last increasing the chance that the last objects
3399	3399	* are freed in them.
3400	3400	*/
3401		-int kmem_cache_shrink(struct kmem_cache *s)
	3401	+int __kmem_cache_shrink(struct kmem_cache *s)
3402	3402	{
3403	3403	int node;
3404	3404	int i;
...	...	@@ -3454,7 +3454,6 @@
3454	3454	kfree(slabs_by_inuse);
3455	3455	return 0;
3456	3456	}
3457		-EXPORT_SYMBOL(kmem_cache_shrink);
3458	3457
3459	3458	static int slab_mem_going_offline_callback(void *arg)
3460	3459	{
...	...	@@ -3462,7 +3461,7 @@
3462	3461
3463	3462	mutex_lock(&slab_mutex);
3464	3463	list_for_each_entry(s, &slab_caches, list)
3465		- kmem_cache_shrink(s);
	3464	+ __kmem_cache_shrink(s);
3466	3465	mutex_unlock(&slab_mutex);
3467	3466
3468	3467	return 0;