Blame view

mm/slab.c 110 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
  /*
   * linux/mm/slab.c
   * Written by Mark Hemment, 1996/97.
   * (markhe@nextd.demon.co.uk)
   *
   * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
   *
   * Major cleanup, different bufctl logic, per-cpu arrays
   *	(c) 2000 Manfred Spraul
   *
   * Cleanup, make the head arrays unconditional, preparation for NUMA
   * 	(c) 2002 Manfred Spraul
   *
   * An implementation of the Slab Allocator as described in outline in;
   *	UNIX Internals: The New Frontiers by Uresh Vahalia
   *	Pub: Prentice Hall	ISBN 0-13-101908-2
   * or with a little more detail in;
   *	The Slab Allocator: An Object-Caching Kernel Memory Allocator
   *	Jeff Bonwick (Sun Microsystems).
   *	Presented at: USENIX Summer 1994 Technical Conference
   *
   * The memory is organized in caches, one cache for each object type.
   * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
   * Each cache consists out of many slabs (they are small (usually one
   * page long) and always contiguous), and each slab contains multiple
   * initialized objects.
   *
   * This means, that your constructor is used only for newly allocated
183ff22bb   Simon Arlott   spelling fixes: mm/
30
   * slabs and you must pass objects with the same initializations to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
   * kmem_cache_free.
   *
   * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
   * normal). If you need a special memory type, then must create a new
   * cache for that memory type.
   *
   * In order to reduce fragmentation, the slabs are sorted in 3 groups:
   *   full slabs with 0 free objects
   *   partial slabs
   *   empty slabs with no allocated objects
   *
   * If partial slabs exist, then new allocations come from these slabs,
   * otherwise from empty slabs or new slabs are allocated.
   *
   * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
   * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
   *
   * Each cache has a short per-cpu head array, most allocs
   * and frees go into that array, and if that array overflows, then 1/2
   * of the entries in the array are given back into the global cache.
   * The head array is strictly LIFO and should improve the cache hit rates.
   * On SMP, it additionally reduces the spinlock operations.
   *
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
54
   * The c_cpuarray may not be read with enabled local interrupts -
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
56
57
58
   * it's changed with a smp_call_function().
   *
   * SMP synchronization:
   *  constructors and destructors are called without any locking.
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
59
   *  Several members in struct kmem_cache and struct slab never change, they
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
61
62
63
64
65
66
67
68
69
70
71
   *	are accessed without any locking.
   *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
   *  	and local interrupts are disabled so slab code is preempt-safe.
   *  The non-constant members are protected with a per-cache irq spinlock.
   *
   * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
   * in 2000 - many ideas in the current implementation are derived from
   * his patch.
   *
   * Further notes from the original documentation:
   *
   * 11 April '97.  Started multi-threading - markhe
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
72
   *	The global cache-chain is protected by the mutex 'slab_mutex'.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
74
75
76
77
78
   *	The sem is only needed when accessing/extending the cache-chain, which
   *	can never happen inside an interrupt (kmem_cache_create(),
   *	kmem_cache_shrink() and kmem_cache_reap()).
   *
   *	At present, each engine can be growing a cache.  This should be blocked.
   *
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
79
80
81
82
83
84
85
86
87
   * 15 March 2005. NUMA slab allocator.
   *	Shai Fultheim <shai@scalex86.org>.
   *	Shobhit Dayal <shobhit@calsoftinc.com>
   *	Alok N Kataria <alokk@calsoftinc.com>
   *	Christoph Lameter <christoph@lameter.com>
   *
   *	Modified the slab allocator to be node aware on NUMA systems.
   *	Each node has its own list of partial, free and full slabs.
   *	All object allocations for a node occur from node specific slab lists.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
90
  #include	<linux/slab.h>
  #include	<linux/mm.h>
c9cf55285   Randy Dunlap   [PATCH] add poiso...
91
  #include	<linux/poison.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
93
94
95
96
  #include	<linux/swap.h>
  #include	<linux/cache.h>
  #include	<linux/interrupt.h>
  #include	<linux/init.h>
  #include	<linux/compiler.h>
101a50019   Paul Jackson   [PATCH] cpuset me...
97
  #include	<linux/cpuset.h>
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
98
  #include	<linux/proc_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
100
101
102
103
104
105
  #include	<linux/seq_file.h>
  #include	<linux/notifier.h>
  #include	<linux/kallsyms.h>
  #include	<linux/cpu.h>
  #include	<linux/sysctl.h>
  #include	<linux/module.h>
  #include	<linux/rcupdate.h>
543537bd9   Paulo Marques   [PATCH] create a ...
106
  #include	<linux/string.h>
138ae6631   Andrew Morton   [PATCH] slab: use...
107
  #include	<linux/uaccess.h>
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
108
  #include	<linux/nodemask.h>
d5cff6352   Catalin Marinas   kmemleak: Add the...
109
  #include	<linux/kmemleak.h>
dc85da15d   Christoph Lameter   [PATCH] NUMA poli...
110
  #include	<linux/mempolicy.h>
fc0abb145   Ingo Molnar   [PATCH] sem2mutex...
111
  #include	<linux/mutex.h>
8a8b6502f   Akinobu Mita   [PATCH] fault-inj...
112
  #include	<linux/fault-inject.h>
e7eebaf6a   Ingo Molnar   [PATCH] pi-futex:...
113
  #include	<linux/rtmutex.h>
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
114
  #include	<linux/reciprocal_div.h>
3ac7fe5a4   Thomas Gleixner   infrastructure to...
115
  #include	<linux/debugobjects.h>
8f9f8d9e8   David Rientjes   slab: add memory ...
116
  #include	<linux/memory.h>
268bb0ce3   Linus Torvalds   sanitize <linux/p...
117
  #include	<linux/prefetch.h>
3f8c24529   Ingo Molnar   sched/headers: Pr...
118
  #include	<linux/sched/task_stack.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119

381760ead   Mel Gorman   mm: micro-optimis...
120
  #include	<net/sock.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
122
123
  #include	<asm/cacheflush.h>
  #include	<asm/tlbflush.h>
  #include	<asm/page.h>
4dee6b64e   Steven Rostedt   tracing/mm: Move ...
124
  #include <trace/events/kmem.h>
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
125
  #include	"internal.h"
b9ce5ef49   Glauber Costa   sl[au]b: always g...
126
  #include	"slab.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
  /*
50953fe9e   Christoph Lameter   slab allocators: ...
128
   * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
   *		  0 for faster, smaller code (especially in the critical paths).
   *
   * STATS	- 1 to collect stats for /proc/slabinfo.
   *		  0 for faster, smaller code (especially in the critical paths).
   *
   * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
   */
  
  #ifdef CONFIG_DEBUG_SLAB
  #define	DEBUG		1
  #define	STATS		1
  #define	FORCED_DEBUG	1
  #else
  #define	DEBUG		0
  #define	STATS		0
  #define	FORCED_DEBUG	0
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
147
  /* Shouldn't this be in a header file somewhere? */
  #define	BYTES_PER_WORD		sizeof(void *)
87a927c71   David Woodhouse   Fix slab redzone ...
148
  #define	REDZONE_ALIGN		max(BYTES_PER_WORD, __alignof__(unsigned long long))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
151
152
  #ifndef ARCH_KMALLOC_FLAGS
  #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
  #endif
f315e3fa1   Joonsoo Kim   slab: restrict th...
153
154
155
156
157
158
159
160
  #define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
  				<= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
  
  #if FREELIST_BYTE_INDEX
  typedef unsigned char freelist_idx_t;
  #else
  typedef unsigned short freelist_idx_t;
  #endif
30321c7b6   David Miller   slab: Fix off by ...
161
  #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
f315e3fa1   Joonsoo Kim   slab: restrict th...
162

072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
163
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
165
   * struct array_cache
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
167
168
169
170
171
172
173
174
175
176
177
178
179
   * Purpose:
   * - LIFO ordering, to hand out cache-warm objects from _alloc
   * - reduce the number of linked list operations
   * - reduce spinlock operations
   *
   * The limit is stored in the per-cpu structure to reduce the data cache
   * footprint.
   *
   */
  struct array_cache {
  	unsigned int avail;
  	unsigned int limit;
  	unsigned int batchcount;
  	unsigned int touched;
bda5b655f   Robert P. J. Day   Delete gcc-2.95 c...
180
  	void *entry[];	/*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
181
182
183
  			 * Must have this definition in here for the proper
  			 * alignment of array_cache. Also simplifies accessing
  			 * the entries.
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
184
  			 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
  };
c8522a3a5   Joonsoo Kim   slab: introduce a...
186
187
188
189
  struct alien_cache {
  	spinlock_t lock;
  	struct array_cache ac;
  };
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
190
  /*
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
191
192
   * Need this for bootstrapping a per node allocator.
   */
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
193
  #define NUM_INIT_LISTS (2 * MAX_NUMNODES)
ce8eb6c42   Christoph Lameter   slab: Rename list...
194
  static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
195
  #define	CACHE_CACHE 0
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
196
  #define	SIZE_NODE (MAX_NUMNODES)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
197

ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
198
  static int drain_freelist(struct kmem_cache *cache,
ce8eb6c42   Christoph Lameter   slab: Rename list...
199
  			struct kmem_cache_node *n, int tofree);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
200
  static void free_block(struct kmem_cache *cachep, void **objpp, int len,
97654dfa2   Joonsoo Kim   slab: defer slab_...
201
202
  			int node, struct list_head *list);
  static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
83b519e8b   Pekka Enberg   slab: setup alloc...
203
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
65f27f384   David Howells   WorkStruct: Pass ...
204
  static void cache_reap(struct work_struct *unused);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
205

76b342bdc   Joonsoo Kim   mm/slab: separate...
206
207
208
209
210
  static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
  						void **list);
  static inline void fixup_slab_list(struct kmem_cache *cachep,
  				struct kmem_cache_node *n, struct page *page,
  				void **list);
e0a427267   Ingo Molnar   [PATCH] mm/slab.c...
211
  static int slab_early_init = 1;
ce8eb6c42   Christoph Lameter   slab: Rename list...
212
  #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213

ce8eb6c42   Christoph Lameter   slab: Rename list...
214
  static void kmem_cache_node_init(struct kmem_cache_node *parent)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
215
216
217
218
  {
  	INIT_LIST_HEAD(&parent->slabs_full);
  	INIT_LIST_HEAD(&parent->slabs_partial);
  	INIT_LIST_HEAD(&parent->slabs_free);
bf00bd345   David Rientjes   mm, slab: maintai...
219
  	parent->total_slabs = 0;
f728b0a5d   Greg Thelen   mm, slab: faster ...
220
  	parent->free_slabs = 0;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
221
222
  	parent->shared = NULL;
  	parent->alien = NULL;
2e1217cf9   Ravikiran G Thirumalai   [PATCH] NUMA slab...
223
  	parent->colour_next = 0;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
224
225
226
227
  	spin_lock_init(&parent->list_lock);
  	parent->free_objects = 0;
  	parent->free_touched = 0;
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
228
229
230
  #define MAKE_LIST(cachep, listp, slab, nodeid)				\
  	do {								\
  		INIT_LIST_HEAD(listp);					\
18bf85411   Christoph Lameter   slab: use get_nod...
231
  		list_splice(&get_node(cachep, nodeid)->slab, listp);	\
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
232
  	} while (0)
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
233
234
  #define	MAKE_ALL_LISTS(cachep, ptr, nodeid)				\
  	do {								\
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
235
236
237
238
  	MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid);	\
  	MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
  	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
  	} while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239

b03a017be   Joonsoo Kim   mm/slab: introduc...
240
  #define CFLGS_OBJFREELIST_SLAB	(0x40000000UL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241
  #define CFLGS_OFF_SLAB		(0x80000000UL)
b03a017be   Joonsoo Kim   mm/slab: introduc...
242
  #define	OBJFREELIST_SLAB(x)	((x)->flags & CFLGS_OBJFREELIST_SLAB)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243
244
245
  #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
  
  #define BATCHREFILL_LIMIT	16
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
246
247
248
  /*
   * Optimization question: fewer reaps means less probability for unnessary
   * cpucache drain/refill cycles.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
249
   *
dc6f3f276   Adrian Bunk   mm/slab.c: fix a ...
250
   * OTOH the cpuarrays can contain lots of objects,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
252
   * which could lock up otherwise freeable slabs.
   */
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
253
254
  #define REAPTIMEOUT_AC		(2*HZ)
  #define REAPTIMEOUT_NODE	(4*HZ)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
256
257
258
259
260
  
  #if STATS
  #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
  #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
  #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
  #define	STATS_INC_GROWN(x)	((x)->grown++)
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
261
  #define	STATS_ADD_REAPED(x,y)	((x)->reaped += (y))
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
262
263
264
265
266
  #define	STATS_SET_HIGH(x)						\
  	do {								\
  		if ((x)->num_active > (x)->high_mark)			\
  			(x)->high_mark = (x)->num_active;		\
  	} while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267
268
  #define	STATS_INC_ERR(x)	((x)->errors++)
  #define	STATS_INC_NODEALLOCS(x)	((x)->node_allocs++)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
269
  #define	STATS_INC_NODEFREES(x)	((x)->node_frees++)
fb7faf331   Ravikiran G Thirumalai   [PATCH] slab: add...
270
  #define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
271
272
273
274
275
  #define	STATS_SET_FREEABLE(x, i)					\
  	do {								\
  		if ((x)->max_freeable < i)				\
  			(x)->max_freeable = i;				\
  	} while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276
277
278
279
280
281
282
283
284
  #define STATS_INC_ALLOCHIT(x)	atomic_inc(&(x)->allochit)
  #define STATS_INC_ALLOCMISS(x)	atomic_inc(&(x)->allocmiss)
  #define STATS_INC_FREEHIT(x)	atomic_inc(&(x)->freehit)
  #define STATS_INC_FREEMISS(x)	atomic_inc(&(x)->freemiss)
  #else
  #define	STATS_INC_ACTIVE(x)	do { } while (0)
  #define	STATS_DEC_ACTIVE(x)	do { } while (0)
  #define	STATS_INC_ALLOCED(x)	do { } while (0)
  #define	STATS_INC_GROWN(x)	do { } while (0)
4e60c86bd   Andi Kleen   gcc-4.6: mm: fix ...
285
  #define	STATS_ADD_REAPED(x,y)	do { (void)(y); } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
  #define	STATS_SET_HIGH(x)	do { } while (0)
  #define	STATS_INC_ERR(x)	do { } while (0)
  #define	STATS_INC_NODEALLOCS(x)	do { } while (0)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
289
  #define	STATS_INC_NODEFREES(x)	do { } while (0)
fb7faf331   Ravikiran G Thirumalai   [PATCH] slab: add...
290
  #define STATS_INC_ACOVERFLOW(x)   do { } while (0)
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
291
  #define	STATS_SET_FREEABLE(x, i) do { } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
294
295
296
297
298
  #define STATS_INC_ALLOCHIT(x)	do { } while (0)
  #define STATS_INC_ALLOCMISS(x)	do { } while (0)
  #define STATS_INC_FREEHIT(x)	do { } while (0)
  #define STATS_INC_FREEMISS(x)	do { } while (0)
  #endif
  
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
300
301
  /*
   * memory layout of objects:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
   * 0		: objp
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
303
   * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
304
305
   * 		the end of an object is aligned with the end of the real
   * 		allocation. Catches writes behind the end of the allocation.
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
306
   * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
   * 		redzone word.
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
308
   * cachep->obj_offset: The real object.
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
309
310
   * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
   * cachep->size - 1* BYTES_PER_WORD: last caller address
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
311
   *					[BYTES_PER_WORD long]
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
   */
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
313
  static int obj_offset(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
314
  {
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
315
  	return cachep->obj_offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
316
  }
b46b8f19c   David Woodhouse   Increase slab red...
317
  static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
319
  {
  	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
b46b8f19c   David Woodhouse   Increase slab red...
320
321
  	return (unsigned long long*) (objp + obj_offset(cachep) -
  				      sizeof(unsigned long long));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
  }
b46b8f19c   David Woodhouse   Increase slab red...
323
  static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
325
326
  {
  	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
  	if (cachep->flags & SLAB_STORE_USER)
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
327
  		return (unsigned long long *)(objp + cachep->size -
b46b8f19c   David Woodhouse   Increase slab red...
328
  					      sizeof(unsigned long long) -
87a927c71   David Woodhouse   Fix slab redzone ...
329
  					      REDZONE_ALIGN);
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
330
  	return (unsigned long long *) (objp + cachep->size -
b46b8f19c   David Woodhouse   Increase slab red...
331
  				       sizeof(unsigned long long));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
333
  static void **dbg_userword(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
335
  {
  	BUG_ON(!(cachep->flags & SLAB_STORE_USER));
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
336
  	return (void **)(objp + cachep->size - BYTES_PER_WORD);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
338
339
  }
  
  #else
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
340
  #define obj_offset(x)			0
b46b8f19c   David Woodhouse   Increase slab red...
341
342
  #define dbg_redzone1(cachep, objp)	({BUG(); (unsigned long long *)NULL;})
  #define dbg_redzone2(cachep, objp)	({BUG(); (unsigned long long *)NULL;})
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
344
345
  #define dbg_userword(cachep, objp)	({BUG(); (void **)NULL;})
  
  #endif
037873014   Joonsoo Kim   slab: fix oops wh...
346
  #ifdef CONFIG_DEBUG_SLAB_LEAK
d31676dfd   Joonsoo Kim   mm/slab: alternat...
347
  static inline bool is_store_user_clean(struct kmem_cache *cachep)
037873014   Joonsoo Kim   slab: fix oops wh...
348
  {
d31676dfd   Joonsoo Kim   mm/slab: alternat...
349
350
  	return atomic_read(&cachep->store_user_clean) == 1;
  }
037873014   Joonsoo Kim   slab: fix oops wh...
351

d31676dfd   Joonsoo Kim   mm/slab: alternat...
352
353
354
355
  static inline void set_store_user_clean(struct kmem_cache *cachep)
  {
  	atomic_set(&cachep->store_user_clean, 1);
  }
037873014   Joonsoo Kim   slab: fix oops wh...
356

d31676dfd   Joonsoo Kim   mm/slab: alternat...
357
358
359
360
  static inline void set_store_user_dirty(struct kmem_cache *cachep)
  {
  	if (is_store_user_clean(cachep))
  		atomic_set(&cachep->store_user_clean, 0);
037873014   Joonsoo Kim   slab: fix oops wh...
361
362
363
  }
  
  #else
d31676dfd   Joonsoo Kim   mm/slab: alternat...
364
  static inline void set_store_user_dirty(struct kmem_cache *cachep) {}
037873014   Joonsoo Kim   slab: fix oops wh...
365
366
  
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
367
  /*
3df1cccdf   David Rientjes   slab: introduce s...
368
369
   * Do not go above this order unless 0 objects fit into the slab or
   * overridden on the command line.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
   */
543585cc5   David Rientjes   slab: rename slab...
371
372
373
  #define	SLAB_MAX_ORDER_HI	1
  #define	SLAB_MAX_ORDER_LO	0
  static int slab_max_order = SLAB_MAX_ORDER_LO;
3df1cccdf   David Rientjes   slab: introduce s...
374
  static bool slab_max_order_set __initdata;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
375

6ed5eb221   Pekka Enberg   [PATCH] slab: ext...
376
377
  static inline struct kmem_cache *virt_to_cache(const void *obj)
  {
b49af68ff   Christoph Lameter   Add virt_to_head_...
378
  	struct page *page = virt_to_head_page(obj);
350260889   Christoph Lameter   slab: Remove some...
379
  	return page->slab_cache;
6ed5eb221   Pekka Enberg   [PATCH] slab: ext...
380
  }
8456a648c   Joonsoo Kim   slab: use struct ...
381
  static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
382
383
  				 unsigned int idx)
  {
8456a648c   Joonsoo Kim   slab: use struct ...
384
  	return page->s_mem + cache->size * idx;
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
385
  }
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
386
  /*
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
387
388
389
   * We want to avoid an expensive divide : (offset / cache->size)
   *   Using the fact that size is a constant for a particular cache,
   *   we can replace (offset / cache->size) by
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
390
391
392
   *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
   */
  static inline unsigned int obj_to_index(const struct kmem_cache *cache,
8456a648c   Joonsoo Kim   slab: use struct ...
393
  					const struct page *page, void *obj)
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
394
  {
8456a648c   Joonsoo Kim   slab: use struct ...
395
  	u32 offset = (obj - page->s_mem);
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
396
  	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
397
  }
6fb924304   Joonsoo Kim   mm/slab: remove u...
398
  #define BOOT_CPUCACHE_ENTRIES	1
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399
  /* internal cache of cache description objs */
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
400
  static struct kmem_cache kmem_cache_boot = {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
401
402
403
  	.batchcount = 1,
  	.limit = BOOT_CPUCACHE_ENTRIES,
  	.shared = 1,
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
404
  	.size = sizeof(struct kmem_cache),
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
405
  	.name = "kmem_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
  };
1871e52c7   Tejun Heo   percpu: make perc...
407
  static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408

343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
409
  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
  {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
411
  	return this_cpu_ptr(cachep->cpu_cache);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
413
414
415
  /*
   * Calculate the number of objects and left-over bytes for a given buffer size.
   */
70f75067b   Joonsoo Kim   mm/slab: avoid re...
416
417
  static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size,
  		unsigned long flags, size_t *left_over)
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
418
  {
70f75067b   Joonsoo Kim   mm/slab: avoid re...
419
  	unsigned int num;
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
420
  	size_t slab_size = PAGE_SIZE << gfporder;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421

fbaccacff   Steven Rostedt   [PATCH] slab: cac...
422
423
424
425
426
  	/*
  	 * The slab management structure can be either off the slab or
  	 * on it. For the latter case, the memory allocated for a
  	 * slab is used for:
  	 *
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
427
  	 * - @buffer_size bytes for each object
2e6b36021   Joonsoo Kim   mm/slab: put the ...
428
429
430
431
432
  	 * - One freelist_idx_t for each object
  	 *
  	 * We don't need to consider alignment of freelist because
  	 * freelist will be at the end of slab page. The objects will be
  	 * at the correct alignment.
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
433
434
435
436
437
438
  	 *
  	 * If the slab management structure is off the slab, then the
  	 * alignment will already be calculated into the size. Because
  	 * the slabs are all pages aligned, the objects will be at the
  	 * correct alignment when allocated.
  	 */
b03a017be   Joonsoo Kim   mm/slab: introduc...
439
  	if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) {
70f75067b   Joonsoo Kim   mm/slab: avoid re...
440
  		num = slab_size / buffer_size;
2e6b36021   Joonsoo Kim   mm/slab: put the ...
441
  		*left_over = slab_size % buffer_size;
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
442
  	} else {
70f75067b   Joonsoo Kim   mm/slab: avoid re...
443
  		num = slab_size / (buffer_size + sizeof(freelist_idx_t));
2e6b36021   Joonsoo Kim   mm/slab: put the ...
444
445
  		*left_over = slab_size %
  			(buffer_size + sizeof(freelist_idx_t));
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
446
  	}
70f75067b   Joonsoo Kim   mm/slab: avoid re...
447
448
  
  	return num;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
  }
f28510d30   Christoph Lameter   slab: Only define...
450
  #if DEBUG
d40cee245   Harvey Harrison   mm: remove remain...
451
  #define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
453
454
  static void __slab_error(const char *function, struct kmem_cache *cachep,
  			char *msg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  {
1170532bb   Joe Perches   mm: convert print...
456
457
  	pr_err("slab error in %s(): cache `%s': %s
  ",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
458
  	       function, cachep->name, msg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
459
  	dump_stack();
373d4d099   Rusty Russell   taint: add explic...
460
  	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
  }
f28510d30   Christoph Lameter   slab: Only define...
462
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463

3395ee058   Paul Menage   [PATCH] mm: add n...
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
  /*
   * By default on NUMA we use alien caches to stage the freeing of
   * objects allocated from other nodes. This causes massive memory
   * inefficiencies when using fake NUMA setup to split memory into a
   * large number of small nodes, so it can be disabled on the command
   * line
    */
  
  static int use_alien_caches __read_mostly = 1;
  static int __init noaliencache_setup(char *s)
  {
  	use_alien_caches = 0;
  	return 1;
  }
  __setup("noaliencache", noaliencache_setup);
3df1cccdf   David Rientjes   slab: introduce s...
479
480
481
482
483
484
485
486
487
488
  static int __init slab_max_order_setup(char *str)
  {
  	get_option(&str, &slab_max_order);
  	slab_max_order = slab_max_order < 0 ? 0 :
  				min(slab_max_order, MAX_ORDER - 1);
  	slab_max_order_set = true;
  
  	return 1;
  }
  __setup("slab_max_order=", slab_max_order_setup);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
489
490
491
492
493
494
495
  #ifdef CONFIG_NUMA
  /*
   * Special reaping functions for NUMA systems called from cache_reap().
   * These take care of doing round robin flushing of alien caches (containing
   * objects freed on different nodes from which they were allocated) and the
   * flushing of remote pcps by calling drain_node_pages.
   */
1871e52c7   Tejun Heo   percpu: make perc...
496
  static DEFINE_PER_CPU(unsigned long, slab_reap_node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
497
498
499
  
  static void init_reap_node(int cpu)
  {
0edaf86cf   Andrew Morton   include/linux/nod...
500
501
  	per_cpu(slab_reap_node, cpu) = next_node_in(cpu_to_mem(cpu),
  						    node_online_map);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
502
503
504
505
  }
  
  static void next_reap_node(void)
  {
909ea9646   Christoph Lameter   core: Replace __g...
506
  	int node = __this_cpu_read(slab_reap_node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
507

0edaf86cf   Andrew Morton   include/linux/nod...
508
  	node = next_node_in(node, node_online_map);
909ea9646   Christoph Lameter   core: Replace __g...
509
  	__this_cpu_write(slab_reap_node, node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
510
511
512
513
514
515
  }
  
  #else
  #define init_reap_node(cpu) do { } while (0)
  #define next_reap_node(void) do { } while (0)
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
519
520
521
522
  /*
   * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz
   * via the workqueue/eventd.
   * Add the CPU number into the expiration time to minimize the possibility of
   * the CPUs getting into lockstep and contending for the global cache chain
   * lock.
   */
0db0628d9   Paul Gortmaker   kernel: delete __...
523
  static void start_cpu_timer(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
524
  {
1871e52c7   Tejun Heo   percpu: make perc...
525
  	struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526

eac0337af   Tejun Heo   slab, workqueue: ...
527
  	if (reap_work->work.func == NULL) {
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
528
  		init_reap_node(cpu);
203b42f73   Tejun Heo   workqueue: make d...
529
  		INIT_DEFERRABLE_WORK(reap_work, cache_reap);
2b2842146   Arjan van de Ven   [PATCH] user of t...
530
531
  		schedule_delayed_work_on(cpu, reap_work,
  					__round_jiffies_relative(HZ, cpu));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532
533
  	}
  }
1fe00d50a   Joonsoo Kim   slab: factor out ...
534
  static void init_arraycache(struct array_cache *ac, int limit, int batch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
  {
d5cff6352   Catalin Marinas   kmemleak: Add the...
536
537
  	/*
  	 * The array_cache structures contain pointers to free object.
25985edce   Lucas De Marchi   Fix common misspe...
538
  	 * However, when such objects are allocated or transferred to another
d5cff6352   Catalin Marinas   kmemleak: Add the...
539
540
541
542
  	 * cache the pointers are not cleared and they could be counted as
  	 * valid references during a kmemleak scan. Therefore, kmemleak must
  	 * not scan such objects.
  	 */
1fe00d50a   Joonsoo Kim   slab: factor out ...
543
544
545
546
547
548
  	kmemleak_no_scan(ac);
  	if (ac) {
  		ac->avail = 0;
  		ac->limit = limit;
  		ac->batchcount = batch;
  		ac->touched = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
  	}
1fe00d50a   Joonsoo Kim   slab: factor out ...
550
551
552
553
554
  }
  
  static struct array_cache *alloc_arraycache(int node, int entries,
  					    int batchcount, gfp_t gfp)
  {
5e8047896   Joonsoo Kim   slab: change int ...
555
  	size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
1fe00d50a   Joonsoo Kim   slab: factor out ...
556
557
558
559
560
  	struct array_cache *ac = NULL;
  
  	ac = kmalloc_node(memsize, gfp, node);
  	init_arraycache(ac, entries, batchcount);
  	return ac;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
562
563
  static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
  					struct page *page, void *objp)
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
564
  {
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
565
566
567
  	struct kmem_cache_node *n;
  	int page_node;
  	LIST_HEAD(list);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
568

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
569
570
  	page_node = page_to_nid(page);
  	n = get_node(cachep, page_node);
381760ead   Mel Gorman   mm: micro-optimis...
571

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
572
573
574
  	spin_lock(&n->list_lock);
  	free_block(cachep, &objp, 1, page_node, &list);
  	spin_unlock(&n->list_lock);
381760ead   Mel Gorman   mm: micro-optimis...
575

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
576
  	slabs_destroy(cachep, &list);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
577
  }
3ded175a4   Christoph Lameter   [PATCH] slab: add...
578
579
580
581
582
583
584
585
586
587
  /*
   * Transfer objects in one arraycache to another.
   * Locking must be handled by the caller.
   *
   * Return the number of entries transferred.
   */
  static int transfer_objects(struct array_cache *to,
  		struct array_cache *from, unsigned int max)
  {
  	/* Figure out how many entries to transfer */
732eacc05   Hagen Paul Pfeifer   replace nested ma...
588
  	int nr = min3(from->avail, max, to->limit - to->avail);
3ded175a4   Christoph Lameter   [PATCH] slab: add...
589
590
591
592
593
594
595
596
597
  
  	if (!nr)
  		return 0;
  
  	memcpy(to->entry + to->avail, from->entry + from->avail -nr,
  			sizeof(void *) *nr);
  
  	from->avail -= nr;
  	to->avail += nr;
3ded175a4   Christoph Lameter   [PATCH] slab: add...
598
599
  	return nr;
  }
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
600
601
602
  #ifndef CONFIG_NUMA
  
  #define drain_alien_cache(cachep, alien) do { } while (0)
ce8eb6c42   Christoph Lameter   slab: Rename list...
603
  #define reap_alien(cachep, n) do { } while (0)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
604

c8522a3a5   Joonsoo Kim   slab: introduce a...
605
606
  static inline struct alien_cache **alloc_alien_cache(int node,
  						int limit, gfp_t gfp)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
607
  {
8888177ea   Joonsoo Kim   mm/slab: remove B...
608
  	return NULL;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
609
  }
c8522a3a5   Joonsoo Kim   slab: introduce a...
610
  static inline void free_alien_cache(struct alien_cache **ac_ptr)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
611
612
613
614
615
616
617
618
619
620
621
622
623
  {
  }
  
  static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  {
  	return 0;
  }
  
  static inline void *alternate_node_alloc(struct kmem_cache *cachep,
  		gfp_t flags)
  {
  	return NULL;
  }
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
624
  static inline void *____cache_alloc_node(struct kmem_cache *cachep,
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
625
626
627
628
  		 gfp_t flags, int nodeid)
  {
  	return NULL;
  }
4167e9b2c   David Rientjes   mm: remove GFP_TH...
629
630
  static inline gfp_t gfp_exact_node(gfp_t flags)
  {
444eb2a44   Mel Gorman   mm: thp: set THP ...
631
  	return flags & ~__GFP_NOFAIL;
4167e9b2c   David Rientjes   mm: remove GFP_TH...
632
  }
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
633
  #else	/* CONFIG_NUMA */
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
634
  static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
c61afb181   Paul Jackson   [PATCH] cpuset me...
635
  static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
dc85da15d   Christoph Lameter   [PATCH] NUMA poli...
636

c8522a3a5   Joonsoo Kim   slab: introduce a...
637
638
639
  static struct alien_cache *__alloc_alien_cache(int node, int entries,
  						int batch, gfp_t gfp)
  {
5e8047896   Joonsoo Kim   slab: change int ...
640
  	size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
c8522a3a5   Joonsoo Kim   slab: introduce a...
641
642
643
644
  	struct alien_cache *alc = NULL;
  
  	alc = kmalloc_node(memsize, gfp, node);
  	init_arraycache(&alc->ac, entries, batch);
49dfc304b   Joonsoo Kim   slab: use the loc...
645
  	spin_lock_init(&alc->lock);
c8522a3a5   Joonsoo Kim   slab: introduce a...
646
647
648
649
  	return alc;
  }
  
  static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
650
  {
c8522a3a5   Joonsoo Kim   slab: introduce a...
651
  	struct alien_cache **alc_ptr;
5e8047896   Joonsoo Kim   slab: change int ...
652
  	size_t memsize = sizeof(void *) * nr_node_ids;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
653
654
655
656
  	int i;
  
  	if (limit > 1)
  		limit = 12;
c8522a3a5   Joonsoo Kim   slab: introduce a...
657
658
659
660
661
662
663
664
665
666
667
668
669
  	alc_ptr = kzalloc_node(memsize, gfp, node);
  	if (!alc_ptr)
  		return NULL;
  
  	for_each_node(i) {
  		if (i == node || !node_online(i))
  			continue;
  		alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
  		if (!alc_ptr[i]) {
  			for (i--; i >= 0; i--)
  				kfree(alc_ptr[i]);
  			kfree(alc_ptr);
  			return NULL;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
670
671
  		}
  	}
c8522a3a5   Joonsoo Kim   slab: introduce a...
672
  	return alc_ptr;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
673
  }
c8522a3a5   Joonsoo Kim   slab: introduce a...
674
  static void free_alien_cache(struct alien_cache **alc_ptr)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
675
676
  {
  	int i;
c8522a3a5   Joonsoo Kim   slab: introduce a...
677
  	if (!alc_ptr)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
678
  		return;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
679
  	for_each_node(i)
c8522a3a5   Joonsoo Kim   slab: introduce a...
680
681
  	    kfree(alc_ptr[i]);
  	kfree(alc_ptr);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
682
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
683
  static void __drain_alien_cache(struct kmem_cache *cachep,
833b706cc   Joonsoo Kim   slab: destroy a s...
684
685
  				struct array_cache *ac, int node,
  				struct list_head *list)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
686
  {
18bf85411   Christoph Lameter   slab: use get_nod...
687
  	struct kmem_cache_node *n = get_node(cachep, node);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
688
689
  
  	if (ac->avail) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
690
  		spin_lock(&n->list_lock);
e00946fe2   Christoph Lameter   [PATCH] slab: Byp...
691
692
693
694
695
  		/*
  		 * Stuff objects into the remote nodes shared array first.
  		 * That way we could avoid the overhead of putting the objects
  		 * into the free lists and getting them back later.
  		 */
ce8eb6c42   Christoph Lameter   slab: Rename list...
696
697
  		if (n->shared)
  			transfer_objects(n->shared, ac, ac->limit);
e00946fe2   Christoph Lameter   [PATCH] slab: Byp...
698

833b706cc   Joonsoo Kim   slab: destroy a s...
699
  		free_block(cachep, ac->entry, ac->avail, node, list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
700
  		ac->avail = 0;
ce8eb6c42   Christoph Lameter   slab: Rename list...
701
  		spin_unlock(&n->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
702
703
  	}
  }
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
704
705
706
  /*
   * Called from cache_reap() to regularly drain alien caches round robin.
   */
ce8eb6c42   Christoph Lameter   slab: Rename list...
707
  static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
708
  {
909ea9646   Christoph Lameter   core: Replace __g...
709
  	int node = __this_cpu_read(slab_reap_node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
710

ce8eb6c42   Christoph Lameter   slab: Rename list...
711
  	if (n->alien) {
c8522a3a5   Joonsoo Kim   slab: introduce a...
712
713
714
715
716
  		struct alien_cache *alc = n->alien[node];
  		struct array_cache *ac;
  
  		if (alc) {
  			ac = &alc->ac;
49dfc304b   Joonsoo Kim   slab: use the loc...
717
  			if (ac->avail && spin_trylock_irq(&alc->lock)) {
833b706cc   Joonsoo Kim   slab: destroy a s...
718
719
720
  				LIST_HEAD(list);
  
  				__drain_alien_cache(cachep, ac, node, &list);
49dfc304b   Joonsoo Kim   slab: use the loc...
721
  				spin_unlock_irq(&alc->lock);
833b706cc   Joonsoo Kim   slab: destroy a s...
722
  				slabs_destroy(cachep, &list);
c8522a3a5   Joonsoo Kim   slab: introduce a...
723
  			}
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
724
725
726
  		}
  	}
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
727
  static void drain_alien_cache(struct kmem_cache *cachep,
c8522a3a5   Joonsoo Kim   slab: introduce a...
728
  				struct alien_cache **alien)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
729
  {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
730
  	int i = 0;
c8522a3a5   Joonsoo Kim   slab: introduce a...
731
  	struct alien_cache *alc;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
732
733
734
735
  	struct array_cache *ac;
  	unsigned long flags;
  
  	for_each_online_node(i) {
c8522a3a5   Joonsoo Kim   slab: introduce a...
736
737
  		alc = alien[i];
  		if (alc) {
833b706cc   Joonsoo Kim   slab: destroy a s...
738
  			LIST_HEAD(list);
c8522a3a5   Joonsoo Kim   slab: introduce a...
739
  			ac = &alc->ac;
49dfc304b   Joonsoo Kim   slab: use the loc...
740
  			spin_lock_irqsave(&alc->lock, flags);
833b706cc   Joonsoo Kim   slab: destroy a s...
741
  			__drain_alien_cache(cachep, ac, i, &list);
49dfc304b   Joonsoo Kim   slab: use the loc...
742
  			spin_unlock_irqrestore(&alc->lock, flags);
833b706cc   Joonsoo Kim   slab: destroy a s...
743
  			slabs_destroy(cachep, &list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
744
745
746
  		}
  	}
  }
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
747

25c4f304b   Joonsoo Kim   mm/slab: factor o...
748
749
  static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
  				int node, int page_node)
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
750
  {
ce8eb6c42   Christoph Lameter   slab: Rename list...
751
  	struct kmem_cache_node *n;
c8522a3a5   Joonsoo Kim   slab: introduce a...
752
753
  	struct alien_cache *alien = NULL;
  	struct array_cache *ac;
97654dfa2   Joonsoo Kim   slab: defer slab_...
754
  	LIST_HEAD(list);
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
755

18bf85411   Christoph Lameter   slab: use get_nod...
756
  	n = get_node(cachep, node);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
757
  	STATS_INC_NODEFREES(cachep);
25c4f304b   Joonsoo Kim   mm/slab: factor o...
758
759
  	if (n->alien && n->alien[page_node]) {
  		alien = n->alien[page_node];
c8522a3a5   Joonsoo Kim   slab: introduce a...
760
  		ac = &alien->ac;
49dfc304b   Joonsoo Kim   slab: use the loc...
761
  		spin_lock(&alien->lock);
c8522a3a5   Joonsoo Kim   slab: introduce a...
762
  		if (unlikely(ac->avail == ac->limit)) {
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
763
  			STATS_INC_ACOVERFLOW(cachep);
25c4f304b   Joonsoo Kim   mm/slab: factor o...
764
  			__drain_alien_cache(cachep, ac, page_node, &list);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
765
  		}
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
766
  		ac->entry[ac->avail++] = objp;
49dfc304b   Joonsoo Kim   slab: use the loc...
767
  		spin_unlock(&alien->lock);
833b706cc   Joonsoo Kim   slab: destroy a s...
768
  		slabs_destroy(cachep, &list);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
769
  	} else {
25c4f304b   Joonsoo Kim   mm/slab: factor o...
770
  		n = get_node(cachep, page_node);
18bf85411   Christoph Lameter   slab: use get_nod...
771
  		spin_lock(&n->list_lock);
25c4f304b   Joonsoo Kim   mm/slab: factor o...
772
  		free_block(cachep, &objp, 1, page_node, &list);
18bf85411   Christoph Lameter   slab: use get_nod...
773
  		spin_unlock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
774
  		slabs_destroy(cachep, &list);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
775
776
777
  	}
  	return 1;
  }
25c4f304b   Joonsoo Kim   mm/slab: factor o...
778
779
780
781
782
783
784
785
786
787
788
789
790
791
  
  static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  {
  	int page_node = page_to_nid(virt_to_page(objp));
  	int node = numa_mem_id();
  	/*
  	 * Make sure we are not freeing a object from another node to the array
  	 * cache on this cpu.
  	 */
  	if (likely(node == page_node))
  		return 0;
  
  	return __cache_free_alien(cachep, objp, node, page_node);
  }
4167e9b2c   David Rientjes   mm: remove GFP_TH...
792
793
  
  /*
444eb2a44   Mel Gorman   mm: thp: set THP ...
794
795
   * Construct gfp mask to allocate from a specific node but do not reclaim or
   * warn about failures.
4167e9b2c   David Rientjes   mm: remove GFP_TH...
796
797
798
   */
  static inline gfp_t gfp_exact_node(gfp_t flags)
  {
444eb2a44   Mel Gorman   mm: thp: set THP ...
799
  	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
4167e9b2c   David Rientjes   mm: remove GFP_TH...
800
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
801
  #endif
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
  static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
  {
  	struct kmem_cache_node *n;
  
  	/*
  	 * Set up the kmem_cache_node for cpu before we can
  	 * begin anything. Make sure some other cpu on this
  	 * node has not already allocated this
  	 */
  	n = get_node(cachep, node);
  	if (n) {
  		spin_lock_irq(&n->list_lock);
  		n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
  				cachep->num;
  		spin_unlock_irq(&n->list_lock);
  
  		return 0;
  	}
  
  	n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
  	if (!n)
  		return -ENOMEM;
  
  	kmem_cache_node_init(n);
  	n->next_reap = jiffies + REAPTIMEOUT_NODE +
  		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;
  
  	n->free_limit =
  		(1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
  
  	/*
  	 * The kmem_cache_nodes don't come and go as CPUs
  	 * come and go.  slab_mutex is sufficient
  	 * protection here.
  	 */
  	cachep->node[node] = n;
  
  	return 0;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
841
  #if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
8f9f8d9e8   David Rientjes   slab: add memory ...
842
  /*
6a67368c3   Christoph Lameter   slab: Rename node...
843
   * Allocates and initializes node for a node on each slab cache, used for
ce8eb6c42   Christoph Lameter   slab: Rename list...
844
   * either memory or cpu hotplug.  If memory is being hot-added, the kmem_cache_node
8f9f8d9e8   David Rientjes   slab: add memory ...
845
   * will be allocated off-node since memory is not yet online for the new node.
6a67368c3   Christoph Lameter   slab: Rename node...
846
   * When hotplugging memory or a cpu, existing node are not replaced if
8f9f8d9e8   David Rientjes   slab: add memory ...
847
848
   * already in use.
   *
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
849
   * Must hold slab_mutex.
8f9f8d9e8   David Rientjes   slab: add memory ...
850
   */
6a67368c3   Christoph Lameter   slab: Rename node...
851
  static int init_cache_node_node(int node)
8f9f8d9e8   David Rientjes   slab: add memory ...
852
  {
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
853
  	int ret;
8f9f8d9e8   David Rientjes   slab: add memory ...
854
  	struct kmem_cache *cachep;
8f9f8d9e8   David Rientjes   slab: add memory ...
855

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
856
  	list_for_each_entry(cachep, &slab_caches, list) {
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
857
858
859
  		ret = init_cache_node(cachep, node, GFP_KERNEL);
  		if (ret)
  			return ret;
8f9f8d9e8   David Rientjes   slab: add memory ...
860
  	}
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
861

8f9f8d9e8   David Rientjes   slab: add memory ...
862
863
  	return 0;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
864
  #endif
8f9f8d9e8   David Rientjes   slab: add memory ...
865

c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
  static int setup_kmem_cache_node(struct kmem_cache *cachep,
  				int node, gfp_t gfp, bool force_change)
  {
  	int ret = -ENOMEM;
  	struct kmem_cache_node *n;
  	struct array_cache *old_shared = NULL;
  	struct array_cache *new_shared = NULL;
  	struct alien_cache **new_alien = NULL;
  	LIST_HEAD(list);
  
  	if (use_alien_caches) {
  		new_alien = alloc_alien_cache(node, cachep->limit, gfp);
  		if (!new_alien)
  			goto fail;
  	}
  
  	if (cachep->shared) {
  		new_shared = alloc_arraycache(node,
  			cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
  		if (!new_shared)
  			goto fail;
  	}
  
  	ret = init_cache_node(cachep, node, gfp);
  	if (ret)
  		goto fail;
  
  	n = get_node(cachep, node);
  	spin_lock_irq(&n->list_lock);
  	if (n->shared && force_change) {
  		free_block(cachep, n->shared->entry,
  				n->shared->avail, node, &list);
  		n->shared->avail = 0;
  	}
  
  	if (!n->shared || force_change) {
  		old_shared = n->shared;
  		n->shared = new_shared;
  		new_shared = NULL;
  	}
  
  	if (!n->alien) {
  		n->alien = new_alien;
  		new_alien = NULL;
  	}
  
  	spin_unlock_irq(&n->list_lock);
  	slabs_destroy(cachep, &list);
801faf0db   Joonsoo Kim   mm/slab: lockless...
914
915
916
917
918
919
  	/*
  	 * To protect lockless access to n->shared during irq disabled context.
  	 * If n->shared isn't NULL in irq disabled context, accessing to it is
  	 * guaranteed to be valid until irq is re-enabled, because it will be
  	 * freed after synchronize_sched().
  	 */
86d9f4853   Joonsoo Kim   mm/slab: fix kmem...
920
  	if (old_shared && force_change)
801faf0db   Joonsoo Kim   mm/slab: lockless...
921
  		synchronize_sched();
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
922
923
924
925
926
927
928
  fail:
  	kfree(old_shared);
  	kfree(new_shared);
  	free_alien_cache(new_alien);
  
  	return ret;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
929
  #ifdef CONFIG_SMP
0db0628d9   Paul Gortmaker   kernel: delete __...
930
  static void cpuup_canceled(long cpu)
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
931
932
  {
  	struct kmem_cache *cachep;
ce8eb6c42   Christoph Lameter   slab: Rename list...
933
  	struct kmem_cache_node *n = NULL;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
934
  	int node = cpu_to_mem(cpu);
a70f73028   Rusty Russell   cpumask: replace ...
935
  	const struct cpumask *mask = cpumask_of_node(node);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
936

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
937
  	list_for_each_entry(cachep, &slab_caches, list) {
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
938
939
  		struct array_cache *nc;
  		struct array_cache *shared;
c8522a3a5   Joonsoo Kim   slab: introduce a...
940
  		struct alien_cache **alien;
97654dfa2   Joonsoo Kim   slab: defer slab_...
941
  		LIST_HEAD(list);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
942

18bf85411   Christoph Lameter   slab: use get_nod...
943
  		n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
944
  		if (!n)
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
945
  			continue;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
946

ce8eb6c42   Christoph Lameter   slab: Rename list...
947
  		spin_lock_irq(&n->list_lock);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
948

ce8eb6c42   Christoph Lameter   slab: Rename list...
949
950
  		/* Free limit for this kmem_cache_node */
  		n->free_limit -= cachep->batchcount;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
951
952
953
954
  
  		/* cpu is dead; no one can alloc from it. */
  		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
  		if (nc) {
97654dfa2   Joonsoo Kim   slab: defer slab_...
955
  			free_block(cachep, nc->entry, nc->avail, node, &list);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
956
957
  			nc->avail = 0;
  		}
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
958

58463c1fe   Rusty Russell   cpumask: avoid de...
959
  		if (!cpumask_empty(mask)) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
960
  			spin_unlock_irq(&n->list_lock);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
961
  			goto free_slab;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
962
  		}
ce8eb6c42   Christoph Lameter   slab: Rename list...
963
  		shared = n->shared;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
964
965
  		if (shared) {
  			free_block(cachep, shared->entry,
97654dfa2   Joonsoo Kim   slab: defer slab_...
966
  				   shared->avail, node, &list);
ce8eb6c42   Christoph Lameter   slab: Rename list...
967
  			n->shared = NULL;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
968
  		}
ce8eb6c42   Christoph Lameter   slab: Rename list...
969
970
  		alien = n->alien;
  		n->alien = NULL;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
971

ce8eb6c42   Christoph Lameter   slab: Rename list...
972
  		spin_unlock_irq(&n->list_lock);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
973
974
975
976
977
978
  
  		kfree(shared);
  		if (alien) {
  			drain_alien_cache(cachep, alien);
  			free_alien_cache(alien);
  		}
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
979
980
  
  free_slab:
97654dfa2   Joonsoo Kim   slab: defer slab_...
981
  		slabs_destroy(cachep, &list);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
982
983
984
985
986
987
  	}
  	/*
  	 * In the previous loop, all the objects were freed to
  	 * the respective cache's slabs,  now we can go ahead and
  	 * shrink each nodelist to its limit.
  	 */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
988
  	list_for_each_entry(cachep, &slab_caches, list) {
18bf85411   Christoph Lameter   slab: use get_nod...
989
  		n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
990
  		if (!n)
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
991
  			continue;
a5aa63a5f   Joonsoo Kim   mm/slab: drain th...
992
  		drain_freelist(cachep, n, INT_MAX);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
993
994
  	}
  }
0db0628d9   Paul Gortmaker   kernel: delete __...
995
  static int cpuup_prepare(long cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
996
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
997
  	struct kmem_cache *cachep;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
998
  	int node = cpu_to_mem(cpu);
8f9f8d9e8   David Rientjes   slab: add memory ...
999
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000

fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1001
1002
1003
1004
  	/*
  	 * We need to do this right in the beginning since
  	 * alloc_arraycache's are going to use this list.
  	 * kmalloc_node allows us to add the slab to the right
ce8eb6c42   Christoph Lameter   slab: Rename list...
1005
  	 * kmem_cache_node and not this cpu's kmem_cache_node
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1006
  	 */
6a67368c3   Christoph Lameter   slab: Rename node...
1007
  	err = init_cache_node_node(node);
8f9f8d9e8   David Rientjes   slab: add memory ...
1008
1009
  	if (err < 0)
  		goto bad;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1010
1011
1012
1013
1014
  
  	/*
  	 * Now we can go ahead with allocating the shared arrays and
  	 * array caches
  	 */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1015
  	list_for_each_entry(cachep, &slab_caches, list) {
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
1016
1017
1018
  		err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
  		if (err)
  			goto bad;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1019
  	}
ce79ddc8e   Pekka Enberg   SLAB: Fix lockdep...
1020

fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1021
1022
  	return 0;
  bad:
12d00f6a1   Akinobu Mita   cpu hotplug: slab...
1023
  	cpuup_canceled(cpu);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1024
1025
  	return -ENOMEM;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1026
  int slab_prepare_cpu(unsigned int cpu)
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1027
  {
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1028
  	int err;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1029

6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
  	mutex_lock(&slab_mutex);
  	err = cpuup_prepare(cpu);
  	mutex_unlock(&slab_mutex);
  	return err;
  }
  
  /*
   * This is called for a failed online attempt and for a successful
   * offline.
   *
   * Even if all the cpus of a node are down, we don't free the
   * kmem_list3 of any cache. This to avoid a race between cpu_down, and
   * a kmalloc allocation from another cpu for memory from the node of
   * the cpu going down.  The list3 structure is usually allocated from
   * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
   */
  int slab_dead_cpu(unsigned int cpu)
  {
  	mutex_lock(&slab_mutex);
  	cpuup_canceled(cpu);
  	mutex_unlock(&slab_mutex);
  	return 0;
  }
8f5be20bf   Ravikiran G Thirumalai   [PATCH] mm: slab:...
1053
  #endif
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1054
1055
1056
1057
1058
  
  static int slab_online_cpu(unsigned int cpu)
  {
  	start_cpu_timer(cpu);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1059
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
  static int slab_offline_cpu(unsigned int cpu)
  {
  	/*
  	 * Shutdown cache reaper. Note that the slab_mutex is held so
  	 * that if cache_reap() is invoked it cannot do anything
  	 * expensive but will only modify reap_work and reschedule the
  	 * timer.
  	 */
  	cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
  	/* Now the cache_reaper is guaranteed to be not running. */
  	per_cpu(slab_reap_work, cpu).work.func = NULL;
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1073

8f9f8d9e8   David Rientjes   slab: add memory ...
1074
1075
1076
1077
1078
1079
  #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
  /*
   * Drains freelist for a node on each slab cache, used for memory hot-remove.
   * Returns -EBUSY if all objects cannot be drained so that the node is not
   * removed.
   *
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1080
   * Must hold slab_mutex.
8f9f8d9e8   David Rientjes   slab: add memory ...
1081
   */
6a67368c3   Christoph Lameter   slab: Rename node...
1082
  static int __meminit drain_cache_node_node(int node)
8f9f8d9e8   David Rientjes   slab: add memory ...
1083
1084
1085
  {
  	struct kmem_cache *cachep;
  	int ret = 0;
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1086
  	list_for_each_entry(cachep, &slab_caches, list) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
1087
  		struct kmem_cache_node *n;
8f9f8d9e8   David Rientjes   slab: add memory ...
1088

18bf85411   Christoph Lameter   slab: use get_nod...
1089
  		n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
1090
  		if (!n)
8f9f8d9e8   David Rientjes   slab: add memory ...
1091
  			continue;
a5aa63a5f   Joonsoo Kim   mm/slab: drain th...
1092
  		drain_freelist(cachep, n, INT_MAX);
8f9f8d9e8   David Rientjes   slab: add memory ...
1093

ce8eb6c42   Christoph Lameter   slab: Rename list...
1094
1095
  		if (!list_empty(&n->slabs_full) ||
  		    !list_empty(&n->slabs_partial)) {
8f9f8d9e8   David Rientjes   slab: add memory ...
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
  			ret = -EBUSY;
  			break;
  		}
  	}
  	return ret;
  }
  
  static int __meminit slab_memory_callback(struct notifier_block *self,
  					unsigned long action, void *arg)
  {
  	struct memory_notify *mnb = arg;
  	int ret = 0;
  	int nid;
  
  	nid = mnb->status_change_nid;
  	if (nid < 0)
  		goto out;
  
  	switch (action) {
  	case MEM_GOING_ONLINE:
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1116
  		mutex_lock(&slab_mutex);
6a67368c3   Christoph Lameter   slab: Rename node...
1117
  		ret = init_cache_node_node(nid);
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1118
  		mutex_unlock(&slab_mutex);
8f9f8d9e8   David Rientjes   slab: add memory ...
1119
1120
  		break;
  	case MEM_GOING_OFFLINE:
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1121
  		mutex_lock(&slab_mutex);
6a67368c3   Christoph Lameter   slab: Rename node...
1122
  		ret = drain_cache_node_node(nid);
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1123
  		mutex_unlock(&slab_mutex);
8f9f8d9e8   David Rientjes   slab: add memory ...
1124
1125
1126
1127
1128
1129
1130
1131
  		break;
  	case MEM_ONLINE:
  	case MEM_OFFLINE:
  	case MEM_CANCEL_ONLINE:
  	case MEM_CANCEL_OFFLINE:
  		break;
  	}
  out:
5fda1bd5b   Prarit Bhargava   mm: notifier_from...
1132
  	return notifier_from_errno(ret);
8f9f8d9e8   David Rientjes   slab: add memory ...
1133
1134
  }
  #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1135
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
1136
   * swap the static kmem_cache_node with kmalloced memory
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1137
   */
6744f087b   Christoph Lameter   slab: Common name...
1138
  static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
8f9f8d9e8   David Rientjes   slab: add memory ...
1139
  				int nodeid)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1140
  {
6744f087b   Christoph Lameter   slab: Common name...
1141
  	struct kmem_cache_node *ptr;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1142

6744f087b   Christoph Lameter   slab: Common name...
1143
  	ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1144
  	BUG_ON(!ptr);
6744f087b   Christoph Lameter   slab: Common name...
1145
  	memcpy(ptr, list, sizeof(struct kmem_cache_node));
2b2d5493e   Ingo Molnar   [PATCH] lockdep: ...
1146
1147
1148
1149
  	/*
  	 * Do not assume that spinlocks can be initialized via memcpy:
  	 */
  	spin_lock_init(&ptr->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1150
  	MAKE_ALL_LISTS(cachep, ptr, nodeid);
6a67368c3   Christoph Lameter   slab: Rename node...
1151
  	cachep->node[nodeid] = ptr;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1152
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1153
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
1154
1155
   * For setting up all the kmem_cache_node for cache whose buffer_size is same as
   * size of kmem_cache_node.
556a169da   Pekka Enberg   slab: fix bootstr...
1156
   */
ce8eb6c42   Christoph Lameter   slab: Rename list...
1157
  static void __init set_up_node(struct kmem_cache *cachep, int index)
556a169da   Pekka Enberg   slab: fix bootstr...
1158
1159
1160
1161
  {
  	int node;
  
  	for_each_online_node(node) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
1162
  		cachep->node[node] = &init_kmem_cache_node[index + node];
6a67368c3   Christoph Lameter   slab: Rename node...
1163
  		cachep->node[node]->next_reap = jiffies +
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
1164
1165
  		    REAPTIMEOUT_NODE +
  		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;
556a169da   Pekka Enberg   slab: fix bootstr...
1166
1167
1168
1169
  	}
  }
  
  /*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1170
1171
   * Initialisation.  Called after the page allocator have been initialised and
   * before smp_init().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1172
1173
1174
   */
  void __init kmem_cache_init(void)
  {
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1175
  	int i;
68126702b   Joonsoo Kim   slab: overloading...
1176
1177
  	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
  					sizeof(struct rcu_head));
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1178
  	kmem_cache = &kmem_cache_boot;
8888177ea   Joonsoo Kim   mm/slab: remove B...
1179
  	if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
62918a036   Siddha, Suresh B   [PATCH] x86-64: s...
1180
  		use_alien_caches = 0;
3c5834652   Christoph Lameter   slab: Simplify bo...
1181
  	for (i = 0; i < NUM_INIT_LISTS; i++)
ce8eb6c42   Christoph Lameter   slab: Rename list...
1182
  		kmem_cache_node_init(&init_kmem_cache_node[i]);
3c5834652   Christoph Lameter   slab: Simplify bo...
1183

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1184
1185
  	/*
  	 * Fragmentation resistance on low memory - only use bigger
3df1cccdf   David Rientjes   slab: introduce s...
1186
1187
  	 * page orders on machines with more than 32MB of memory if
  	 * not overridden on the command line.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1188
  	 */
3df1cccdf   David Rientjes   slab: introduce s...
1189
  	if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
543585cc5   David Rientjes   slab: rename slab...
1190
  		slab_max_order = SLAB_MAX_ORDER_HI;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1191

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192
1193
  	/* Bootstrap is tricky, because several objects are allocated
  	 * from caches that do not exist yet:
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1194
1195
1196
  	 * 1) initialize the kmem_cache cache: it contains the struct
  	 *    kmem_cache structures of all caches, except kmem_cache itself:
  	 *    kmem_cache is statically allocated.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1197
  	 *    Initially an __init data area is used for the head array and the
ce8eb6c42   Christoph Lameter   slab: Rename list...
1198
  	 *    kmem_cache_node structures, it's replaced with a kmalloc allocated
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1199
  	 *    array at the end of the bootstrap.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1200
  	 * 2) Create the first kmalloc cache.
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1201
  	 *    The struct kmem_cache for the new cache is allocated normally.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1202
1203
1204
  	 *    An __init data area is used for the head array.
  	 * 3) Create the remaining kmalloc caches, with minimally sized
  	 *    head arrays.
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1205
  	 * 4) Replace the __init data head arrays for kmem_cache and the first
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1206
  	 *    kmalloc cache with kmalloc allocated arrays.
ce8eb6c42   Christoph Lameter   slab: Rename list...
1207
  	 * 5) Replace the __init data for kmem_cache_node for kmem_cache and
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1208
1209
  	 *    the other cache's with kmalloc allocated memory.
  	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1210
  	 */
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1211
  	/* 1) create the kmem_cache */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1212

8da3430d8   Eric Dumazet   slab: NUMA kmem_c...
1213
  	/*
b56efcf0a   Eric Dumazet   slab: shrink size...
1214
  	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
8da3430d8   Eric Dumazet   slab: NUMA kmem_c...
1215
  	 */
2f9baa9fc   Christoph Lameter   slab: Use the new...
1216
  	create_boot_cache(kmem_cache, "kmem_cache",
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1217
  		offsetof(struct kmem_cache, node) +
6744f087b   Christoph Lameter   slab: Common name...
1218
  				  nr_node_ids * sizeof(struct kmem_cache_node *),
2f9baa9fc   Christoph Lameter   slab: Use the new...
1219
1220
  				  SLAB_HWCACHE_ALIGN);
  	list_add(&kmem_cache->list, &slab_caches);
da9ec481d   Shakeel Butt   mm, slab: memcg_l...
1221
  	memcg_link_cache(kmem_cache);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1222
  	slab_state = PARTIAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1223

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1224
  	/*
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1225
1226
  	 * Initialize the caches that provide memory for the  kmem_cache_node
  	 * structures first.  Without this, further allocations will bug.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1227
  	 */
af3b5f876   Vlastimil Babka   mm, slab: rename ...
1228
1229
  	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache(
  				kmalloc_info[INDEX_NODE].name,
ce8eb6c42   Christoph Lameter   slab: Rename list...
1230
  				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1231
  	slab_state = PARTIAL_NODE;
34cc6990d   Daniel Sanders   slab: correct siz...
1232
  	setup_kmalloc_cache_index_table();
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1233

e0a427267   Ingo Molnar   [PATCH] mm/slab.c...
1234
  	slab_early_init = 0;
ce8eb6c42   Christoph Lameter   slab: Rename list...
1235
  	/* 5) Replace the bootstrap kmem_cache_node */
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1236
  	{
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
1237
  		int nid;
9c09a95cf   Mel Gorman   slab: partially r...
1238
  		for_each_online_node(nid) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
1239
  			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
556a169da   Pekka Enberg   slab: fix bootstr...
1240

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1241
  			init_list(kmalloc_caches[INDEX_NODE],
ce8eb6c42   Christoph Lameter   slab: Rename list...
1242
  					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1243
1244
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1245

f97d5f634   Christoph Lameter   slab: Common func...
1246
  	create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
8429db5c6   Pekka Enberg   slab: setup cpu c...
1247
1248
1249
1250
1251
  }
  
  void __init kmem_cache_init_late(void)
  {
  	struct kmem_cache *cachep;
97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1252
  	slab_state = UP;
52cef1891   Peter Zijlstra   slab, lockdep: Fi...
1253

8429db5c6   Pekka Enberg   slab: setup cpu c...
1254
  	/* 6) resize the head arrays to their final sizes */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1255
1256
  	mutex_lock(&slab_mutex);
  	list_for_each_entry(cachep, &slab_caches, list)
8429db5c6   Pekka Enberg   slab: setup cpu c...
1257
1258
  		if (enable_cpucache(cachep, GFP_NOWAIT))
  			BUG();
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1259
  	mutex_unlock(&slab_mutex);
056c62418   Ravikiran G Thirumalai   [PATCH] slab: fix...
1260

97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1261
1262
  	/* Done! */
  	slab_state = FULL;
8f9f8d9e8   David Rientjes   slab: add memory ...
1263
1264
1265
  #ifdef CONFIG_NUMA
  	/*
  	 * Register a memory hotplug callback that initializes and frees
6a67368c3   Christoph Lameter   slab: Rename node...
1266
  	 * node.
8f9f8d9e8   David Rientjes   slab: add memory ...
1267
1268
1269
  	 */
  	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
  #endif
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1270
1271
1272
  	/*
  	 * The reap timers are started later, with a module init call: That part
  	 * of the kernel is not yet operational.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1273
1274
1275
1276
1277
  	 */
  }
  
  static int __init cpucache_init(void)
  {
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1278
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1280
1281
  	/*
  	 * Register the timers that return unneeded pages to the page allocator
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1282
  	 */
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1283
1284
1285
  	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
  				slab_online_cpu, slab_offline_cpu);
  	WARN_ON(ret < 0);
a164f8962   Glauber Costa   slab: move FULL s...
1286
1287
  
  	/* Done! */
97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1288
  	slab_state = FULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1289
1290
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1291
  __initcall(cpucache_init);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1292
1293
1294
  static noinline void
  slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
  {
9a02d6999   David Rientjes   mm, slab: suppres...
1295
  #if DEBUG
ce8eb6c42   Christoph Lameter   slab: Rename list...
1296
  	struct kmem_cache_node *n;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1297
1298
  	unsigned long flags;
  	int node;
9a02d6999   David Rientjes   mm, slab: suppres...
1299
1300
1301
1302
1303
  	static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
  				      DEFAULT_RATELIMIT_BURST);
  
  	if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
  		return;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1304

5b3810e5c   Vlastimil Babka   mm, sl[au]b: prin...
1305
1306
1307
1308
1309
  	pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)
  ",
  		nodeid, gfpflags, &gfpflags);
  	pr_warn("  cache: %s, object size: %d, order: %d
  ",
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
1310
  		cachep->name, cachep->size, cachep->gfporder);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1311

18bf85411   Christoph Lameter   slab: use get_nod...
1312
  	for_each_kmem_cache_node(cachep, node, n) {
bf00bd345   David Rientjes   mm, slab: maintai...
1313
  		unsigned long total_slabs, free_slabs, free_objs;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1314

ce8eb6c42   Christoph Lameter   slab: Rename list...
1315
  		spin_lock_irqsave(&n->list_lock, flags);
bf00bd345   David Rientjes   mm, slab: maintai...
1316
1317
1318
  		total_slabs = n->total_slabs;
  		free_slabs = n->free_slabs;
  		free_objs = n->free_objects;
ce8eb6c42   Christoph Lameter   slab: Rename list...
1319
  		spin_unlock_irqrestore(&n->list_lock, flags);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1320

bf00bd345   David Rientjes   mm, slab: maintai...
1321
1322
1323
1324
1325
  		pr_warn("  node %d: slabs: %ld/%ld, objs: %ld/%ld
  ",
  			node, total_slabs - free_slabs, total_slabs,
  			(total_slabs * cachep->num) - free_objs,
  			total_slabs * cachep->num);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1326
  	}
9a02d6999   David Rientjes   mm, slab: suppres...
1327
  #endif
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1328
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1329
  /*
8a7d9b430   Wang Sheng-Hui   mm/slab.c: fix co...
1330
1331
   * Interface to system's page allocator. No need to hold the
   * kmem_cache_node ->list_lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1332
1333
1334
1335
1336
   *
   * If we requested dmaable memory, we will get it. Even if we
   * did not request dmaable memory, we might get it, but that
   * would be relatively rare and ignorable.
   */
0c3aa83e0   Joonsoo Kim   slab: change retu...
1337
1338
  static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
  								int nodeid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1339
1340
  {
  	struct page *page;
e1b6aa6f1   Christoph Hellwig   [PATCH] slab: cle...
1341
  	int nr_pages;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
1342

a618e89f1   Glauber Costa   slab: rename gfpf...
1343
  	flags |= cachep->allocflags;
e12ba74d8   Mel Gorman   Group short-lived...
1344
1345
  	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
  		flags |= __GFP_RECLAIMABLE;
e1b6aa6f1   Christoph Hellwig   [PATCH] slab: cle...
1346

ae63fd26b   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
1347
  	page = __alloc_pages_node(nodeid, flags, cachep->gfporder);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1348
  	if (!page) {
9a02d6999   David Rientjes   mm, slab: suppres...
1349
  		slab_out_of_memory(cachep, flags, nodeid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
  		return NULL;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1351
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1352

f3ccb2c42   Vladimir Davydov   memcg: unify slab...
1353
1354
1355
1356
  	if (memcg_charge_slab(page, flags, cachep->gfporder, cachep)) {
  		__free_pages(page, cachep->gfporder);
  		return NULL;
  	}
e1b6aa6f1   Christoph Hellwig   [PATCH] slab: cle...
1357
  	nr_pages = (1 << cachep->gfporder);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1358
  	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
7779f2123   Johannes Weiner   mm: memcontrol: a...
1359
  		mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, nr_pages);
972d1a7b1   Christoph Lameter   [PATCH] ZVC: Supp...
1360
  	else
7779f2123   Johannes Weiner   mm: memcontrol: a...
1361
  		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE, nr_pages);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
1362

a57a49887   Joonsoo Kim   slab: use __GFP_C...
1363
  	__SetPageSlab(page);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
1364
1365
  	/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
  	if (sk_memalloc_socks() && page_is_pfmemalloc(page))
a57a49887   Joonsoo Kim   slab: use __GFP_C...
1366
  		SetPageSlabPfmemalloc(page);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
1367

0c3aa83e0   Joonsoo Kim   slab: change retu...
1368
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369
1370
1371
1372
1373
  }
  
  /*
   * Interface to system's page release.
   */
0c3aa83e0   Joonsoo Kim   slab: change retu...
1374
  static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
  {
27ee57c93   Vladimir Davydov   mm: memcontrol: r...
1376
1377
  	int order = cachep->gfporder;
  	unsigned long nr_freed = (1 << order);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378

972d1a7b1   Christoph Lameter   [PATCH] ZVC: Supp...
1379
  	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
7779f2123   Johannes Weiner   mm: memcontrol: a...
1380
  		mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, -nr_freed);
972d1a7b1   Christoph Lameter   [PATCH] ZVC: Supp...
1381
  	else
7779f2123   Johannes Weiner   mm: memcontrol: a...
1382
  		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE, -nr_freed);
73293c2f9   Joonsoo Kim   slab: correct pfm...
1383

a57a49887   Joonsoo Kim   slab: use __GFP_C...
1384
  	BUG_ON(!PageSlab(page));
73293c2f9   Joonsoo Kim   slab: correct pfm...
1385
  	__ClearPageSlabPfmemalloc(page);
a57a49887   Joonsoo Kim   slab: use __GFP_C...
1386
  	__ClearPageSlab(page);
8456a648c   Joonsoo Kim   slab: use struct ...
1387
1388
  	page_mapcount_reset(page);
  	page->mapping = NULL;
1f458cbf1   Glauber Costa   memcg: destroy me...
1389

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1390
1391
  	if (current->reclaim_state)
  		current->reclaim_state->reclaimed_slab += nr_freed;
27ee57c93   Vladimir Davydov   mm: memcontrol: r...
1392
1393
  	memcg_uncharge_slab(page, order, cachep);
  	__free_pages(page, order);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
1395
1396
1397
  }
  
  static void kmem_rcu_free(struct rcu_head *head)
  {
68126702b   Joonsoo Kim   slab: overloading...
1398
1399
  	struct kmem_cache *cachep;
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400

68126702b   Joonsoo Kim   slab: overloading...
1401
1402
1403
1404
  	page = container_of(head, struct page, rcu_head);
  	cachep = page->slab_cache;
  
  	kmem_freepages(cachep, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1405
1406
1407
  }
  
  #if DEBUG
40b441379   Joonsoo Kim   mm/slab: clean up...
1408
1409
1410
1411
1412
1413
1414
1415
  static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
  {
  	if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
  		(cachep->size % PAGE_SIZE) == 0)
  		return true;
  
  	return false;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1416
1417
  
  #ifdef CONFIG_DEBUG_PAGEALLOC
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1418
  static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1419
  			    unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1420
  {
8c138bc00   Christoph Lameter   slab: Get rid of ...
1421
  	int size = cachep->object_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1422

3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1423
  	addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1424

b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1425
  	if (size < 5 * sizeof(unsigned long))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1426
  		return;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1427
1428
1429
1430
  	*addr++ = 0x12345678;
  	*addr++ = caller;
  	*addr++ = smp_processor_id();
  	size -= 3 * sizeof(unsigned long);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1431
1432
1433
1434
1435
1436
1437
  	{
  		unsigned long *sptr = &caller;
  		unsigned long svalue;
  
  		while (!kstack_end(sptr)) {
  			svalue = *sptr++;
  			if (kernel_text_address(svalue)) {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1438
  				*addr++ = svalue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1439
1440
1441
1442
1443
1444
1445
  				size -= sizeof(unsigned long);
  				if (size <= sizeof(unsigned long))
  					break;
  			}
  		}
  
  	}
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1446
  	*addr++ = 0x87654321;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1447
  }
40b441379   Joonsoo Kim   mm/slab: clean up...
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
  
  static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
  				int map, unsigned long caller)
  {
  	if (!is_debug_pagealloc_cache(cachep))
  		return;
  
  	if (caller)
  		store_stackinfo(cachep, objp, caller);
  
  	kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
  }
  
  #else
  static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
  				int map, unsigned long caller) {}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
  #endif
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1465
  static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1466
  {
8c138bc00   Christoph Lameter   slab: Get rid of ...
1467
  	int size = cachep->object_size;
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1468
  	addr = &((char *)addr)[obj_offset(cachep)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1469
1470
  
  	memset(addr, val, size);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1471
  	*(unsigned char *)(addr + size - 1) = POISON_END;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1472
1473
1474
1475
1476
  }
  
  static void dump_line(char *data, int offset, int limit)
  {
  	int i;
aa83aa40e   Dave Jones   [PATCH] single bi...
1477
1478
  	unsigned char error = 0;
  	int bad_count = 0;
1170532bb   Joe Perches   mm: convert print...
1479
  	pr_err("%03x: ", offset);
aa83aa40e   Dave Jones   [PATCH] single bi...
1480
1481
1482
1483
1484
  	for (i = 0; i < limit; i++) {
  		if (data[offset + i] != POISON_FREE) {
  			error = data[offset + i];
  			bad_count++;
  		}
aa83aa40e   Dave Jones   [PATCH] single bi...
1485
  	}
fdde6abb3   Sebastian Andrzej Siewior   slab: use print_h...
1486
1487
  	print_hex_dump(KERN_CONT, "", 0, 16, 1,
  			&data[offset], limit, 1);
aa83aa40e   Dave Jones   [PATCH] single bi...
1488
1489
1490
1491
  
  	if (bad_count == 1) {
  		error ^= POISON_FREE;
  		if (!(error & (error - 1))) {
1170532bb   Joe Perches   mm: convert print...
1492
1493
  			pr_err("Single bit error detected. Probably bad RAM.
  ");
aa83aa40e   Dave Jones   [PATCH] single bi...
1494
  #ifdef CONFIG_X86
1170532bb   Joe Perches   mm: convert print...
1495
1496
  			pr_err("Run memtest86+ or a similar memory test tool.
  ");
aa83aa40e   Dave Jones   [PATCH] single bi...
1497
  #else
1170532bb   Joe Perches   mm: convert print...
1498
1499
  			pr_err("Run a memory test tool.
  ");
aa83aa40e   Dave Jones   [PATCH] single bi...
1500
1501
1502
  #endif
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1503
1504
1505
1506
  }
  #endif
  
  #if DEBUG
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1507
  static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1508
1509
1510
1511
1512
  {
  	int i, size;
  	char *realobj;
  
  	if (cachep->flags & SLAB_RED_ZONE) {
1170532bb   Joe Perches   mm: convert print...
1513
1514
1515
1516
  		pr_err("Redzone: 0x%llx/0x%llx
  ",
  		       *dbg_redzone1(cachep, objp),
  		       *dbg_redzone2(cachep, objp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1517
1518
1519
  	}
  
  	if (cachep->flags & SLAB_STORE_USER) {
1170532bb   Joe Perches   mm: convert print...
1520
1521
  		pr_err("Last user: [<%p>](%pSR)
  ",
071361d34   Joe Perches   mm: Convert print...
1522
1523
  		       *dbg_userword(cachep, objp),
  		       *dbg_userword(cachep, objp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1524
  	}
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1525
  	realobj = (char *)objp + obj_offset(cachep);
8c138bc00   Christoph Lameter   slab: Get rid of ...
1526
  	size = cachep->object_size;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1527
  	for (i = 0; i < size && lines; i += 16, lines--) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1528
1529
  		int limit;
  		limit = 16;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1530
1531
  		if (i + limit > size)
  			limit = size - i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1532
1533
1534
  		dump_line(realobj, i, limit);
  	}
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1535
  static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1536
1537
1538
1539
  {
  	char *realobj;
  	int size, i;
  	int lines = 0;
40b441379   Joonsoo Kim   mm/slab: clean up...
1540
1541
  	if (is_debug_pagealloc_cache(cachep))
  		return;
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1542
  	realobj = (char *)objp + obj_offset(cachep);
8c138bc00   Christoph Lameter   slab: Get rid of ...
1543
  	size = cachep->object_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1544

b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1545
  	for (i = 0; i < size; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1546
  		char exp = POISON_FREE;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1547
  		if (i == size - 1)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1548
1549
1550
1551
1552
1553
  			exp = POISON_END;
  		if (realobj[i] != exp) {
  			int limit;
  			/* Mismatch ! */
  			/* Print header */
  			if (lines == 0) {
1170532bb   Joe Perches   mm: convert print...
1554
1555
1556
1557
  				pr_err("Slab corruption (%s): %s start=%p, len=%d
  ",
  				       print_tainted(), cachep->name,
  				       realobj, size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1558
1559
1560
  				print_objinfo(cachep, objp, 0);
  			}
  			/* Hexdump the affected line */
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1561
  			i = (i / 16) * 16;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1562
  			limit = 16;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1563
1564
  			if (i + limit > size)
  				limit = size - i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
  			dump_line(realobj, i, limit);
  			i += 16;
  			lines++;
  			/* Limit to 5 lines */
  			if (lines > 5)
  				break;
  		}
  	}
  	if (lines != 0) {
  		/* Print some data about the neighboring objects, if they
  		 * exist:
  		 */
8456a648c   Joonsoo Kim   slab: use struct ...
1577
  		struct page *page = virt_to_head_page(objp);
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
1578
  		unsigned int objnr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1579

8456a648c   Joonsoo Kim   slab: use struct ...
1580
  		objnr = obj_to_index(cachep, page, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1581
  		if (objnr) {
8456a648c   Joonsoo Kim   slab: use struct ...
1582
  			objp = index_to_obj(cachep, page, objnr - 1);
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1583
  			realobj = (char *)objp + obj_offset(cachep);
1170532bb   Joe Perches   mm: convert print...
1584
1585
  			pr_err("Prev obj: start=%p, len=%d
  ", realobj, size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1586
1587
  			print_objinfo(cachep, objp, 2);
  		}
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1588
  		if (objnr + 1 < cachep->num) {
8456a648c   Joonsoo Kim   slab: use struct ...
1589
  			objp = index_to_obj(cachep, page, objnr + 1);
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1590
  			realobj = (char *)objp + obj_offset(cachep);
1170532bb   Joe Perches   mm: convert print...
1591
1592
  			pr_err("Next obj: start=%p, len=%d
  ", realobj, size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1593
1594
1595
1596
1597
  			print_objinfo(cachep, objp, 2);
  		}
  	}
  }
  #endif
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1598
  #if DEBUG
8456a648c   Joonsoo Kim   slab: use struct ...
1599
1600
  static void slab_destroy_debugcheck(struct kmem_cache *cachep,
  						struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1601
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1602
  	int i;
b03a017be   Joonsoo Kim   mm/slab: introduc...
1603
1604
1605
1606
1607
  
  	if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
  		poison_obj(cachep, page->freelist - obj_offset(cachep),
  			POISON_FREE);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1608
  	for (i = 0; i < cachep->num; i++) {
8456a648c   Joonsoo Kim   slab: use struct ...
1609
  		void *objp = index_to_obj(cachep, page, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1610
1611
  
  		if (cachep->flags & SLAB_POISON) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1612
  			check_poison_obj(cachep, objp);
40b441379   Joonsoo Kim   mm/slab: clean up...
1613
  			slab_kernel_map(cachep, objp, 1, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1614
1615
1616
  		}
  		if (cachep->flags & SLAB_RED_ZONE) {
  			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
1617
  				slab_error(cachep, "start of a freed object was overwritten");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1618
  			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
1619
  				slab_error(cachep, "end of a freed object was overwritten");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1620
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1621
  	}
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1622
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1623
  #else
8456a648c   Joonsoo Kim   slab: use struct ...
1624
1625
  static void slab_destroy_debugcheck(struct kmem_cache *cachep,
  						struct page *page)
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1626
  {
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1627
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1628
  #endif
911851e6e   Randy Dunlap   [PATCH] slab: fix...
1629
1630
1631
  /**
   * slab_destroy - destroy and release all objects in a slab
   * @cachep: cache pointer being destroyed
cb8ee1a3d   Masanari Iida   mm: Fix warning o...
1632
   * @page: page pointer being destroyed
911851e6e   Randy Dunlap   [PATCH] slab: fix...
1633
   *
8a7d9b430   Wang Sheng-Hui   mm/slab.c: fix co...
1634
1635
1636
   * Destroy all the objs in a slab page, and release the mem back to the system.
   * Before calling the slab page must have been unlinked from the cache. The
   * kmem_cache_node ->list_lock is not held/needed.
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1637
   */
8456a648c   Joonsoo Kim   slab: use struct ...
1638
  static void slab_destroy(struct kmem_cache *cachep, struct page *page)
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1639
  {
7e0073552   Joonsoo Kim   slab: replace non...
1640
  	void *freelist;
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1641

8456a648c   Joonsoo Kim   slab: use struct ...
1642
1643
  	freelist = page->freelist;
  	slab_destroy_debugcheck(cachep, page);
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
1644
  	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
bc4f610d5   Kirill A. Shutemov   slab, slub: use p...
1645
1646
  		call_rcu(&page->rcu_head, kmem_rcu_free);
  	else
0c3aa83e0   Joonsoo Kim   slab: change retu...
1647
  		kmem_freepages(cachep, page);
68126702b   Joonsoo Kim   slab: overloading...
1648
1649
  
  	/*
8456a648c   Joonsoo Kim   slab: use struct ...
1650
  	 * From now on, we don't use freelist
68126702b   Joonsoo Kim   slab: overloading...
1651
1652
1653
  	 * although actual page can be freed in rcu context
  	 */
  	if (OFF_SLAB(cachep))
8456a648c   Joonsoo Kim   slab: use struct ...
1654
  		kmem_cache_free(cachep->freelist_cache, freelist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1655
  }
97654dfa2   Joonsoo Kim   slab: defer slab_...
1656
1657
1658
1659
1660
1661
1662
1663
1664
  static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
  {
  	struct page *page, *n;
  
  	list_for_each_entry_safe(page, n, list, lru) {
  		list_del(&page->lru);
  		slab_destroy(cachep, page);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1665
  /**
a70773ddb   Randy.Dunlap   [PATCH] mm/slab: ...
1666
1667
1668
   * calculate_slab_order - calculate size (page order) of slabs
   * @cachep: pointer to the cache that is being created
   * @size: size of objects to be created in this cache.
a70773ddb   Randy.Dunlap   [PATCH] mm/slab: ...
1669
1670
1671
   * @flags: slab allocation flags
   *
   * Also calculates the number of objects per slab.
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1672
1673
1674
1675
1676
   *
   * This could be made much more intelligent.  For now, try to avoid using
   * high order pages for slabs.  When the gfp() functions are more friendly
   * towards high-order requests, this should be changed.
   */
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1677
  static size_t calculate_slab_order(struct kmem_cache *cachep,
2e6b36021   Joonsoo Kim   mm/slab: put the ...
1678
  				size_t size, unsigned long flags)
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1679
1680
  {
  	size_t left_over = 0;
9888e6fa7   Linus Torvalds   slab: clarify and...
1681
  	int gfporder;
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1682

0aa817f07   Christoph Lameter   Slab allocators: ...
1683
  	for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1684
1685
  		unsigned int num;
  		size_t remainder;
70f75067b   Joonsoo Kim   mm/slab: avoid re...
1686
  		num = cache_estimate(gfporder, size, flags, &remainder);
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1687
1688
  		if (!num)
  			continue;
9888e6fa7   Linus Torvalds   slab: clarify and...
1689

f315e3fa1   Joonsoo Kim   slab: restrict th...
1690
1691
1692
  		/* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
  		if (num > SLAB_OBJ_MAX_NUM)
  			break;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1693
  		if (flags & CFLGS_OFF_SLAB) {
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1694
1695
1696
1697
1698
1699
1700
  			struct kmem_cache *freelist_cache;
  			size_t freelist_size;
  
  			freelist_size = num * sizeof(freelist_idx_t);
  			freelist_cache = kmalloc_slab(freelist_size, 0u);
  			if (!freelist_cache)
  				continue;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1701
  			/*
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1702
  			 * Needed to avoid possible looping condition
76b342bdc   Joonsoo Kim   mm/slab: separate...
1703
  			 * in cache_grow_begin()
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1704
  			 */
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1705
1706
  			if (OFF_SLAB(freelist_cache))
  				continue;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1707

3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1708
1709
1710
  			/* check if off slab has enough benefit */
  			if (freelist_cache->size > cachep->size / 2)
  				continue;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1711
  		}
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1712

9888e6fa7   Linus Torvalds   slab: clarify and...
1713
  		/* Found something acceptable - save it away */
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1714
  		cachep->num = num;
9888e6fa7   Linus Torvalds   slab: clarify and...
1715
  		cachep->gfporder = gfporder;
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1716
1717
1718
  		left_over = remainder;
  
  		/*
f78bb8ad4   Linus Torvalds   slab: fix calcula...
1719
1720
1721
1722
1723
1724
1725
1726
  		 * A VFS-reclaimable slab tends to have most allocations
  		 * as GFP_NOFS and we really don't want to have to be allocating
  		 * higher-order pages when we are unable to shrink dcache.
  		 */
  		if (flags & SLAB_RECLAIM_ACCOUNT)
  			break;
  
  		/*
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1727
1728
1729
  		 * Large number of objects is good, but very large slabs are
  		 * currently bad for the gfp()s.
  		 */
543585cc5   David Rientjes   slab: rename slab...
1730
  		if (gfporder >= slab_max_order)
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1731
  			break;
9888e6fa7   Linus Torvalds   slab: clarify and...
1732
1733
1734
  		/*
  		 * Acceptable internal fragmentation?
  		 */
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1735
  		if (left_over * 8 <= (PAGE_SIZE << gfporder))
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1736
1737
1738
1739
  			break;
  	}
  	return left_over;
  }
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1740
1741
1742
1743
1744
1745
1746
1747
  static struct array_cache __percpu *alloc_kmem_cache_cpus(
  		struct kmem_cache *cachep, int entries, int batchcount)
  {
  	int cpu;
  	size_t size;
  	struct array_cache __percpu *cpu_cache;
  
  	size = sizeof(void *) * entries + sizeof(struct array_cache);
85c9f4b04   Joonsoo Kim   mm/slab: fix unal...
1748
  	cpu_cache = __alloc_percpu(size, sizeof(void *));
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
  
  	if (!cpu_cache)
  		return NULL;
  
  	for_each_possible_cpu(cpu) {
  		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
  				entries, batchcount);
  	}
  
  	return cpu_cache;
  }
bd721ea73   Fabian Frederick   treewide: replace...
1760
  static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1761
  {
97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1762
  	if (slab_state >= FULL)
83b519e8b   Pekka Enberg   slab: setup alloc...
1763
  		return enable_cpucache(cachep, gfp);
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
1764

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1765
1766
1767
  	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
  	if (!cachep->cpu_cache)
  		return 1;
97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1768
  	if (slab_state == DOWN) {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1769
1770
  		/* Creation of first cache (kmem_cache). */
  		set_up_node(kmem_cache, CACHE_CACHE);
2f9baa9fc   Christoph Lameter   slab: Use the new...
1771
  	} else if (slab_state == PARTIAL) {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1772
1773
  		/* For kmem_cache_node */
  		set_up_node(cachep, SIZE_NODE);
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1774
  	} else {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1775
  		int node;
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1776

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1777
1778
1779
1780
1781
  		for_each_online_node(node) {
  			cachep->node[node] = kmalloc_node(
  				sizeof(struct kmem_cache_node), gfp, node);
  			BUG_ON(!cachep->node[node]);
  			kmem_cache_node_init(cachep->node[node]);
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1782
1783
  		}
  	}
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1784

6a67368c3   Christoph Lameter   slab: Rename node...
1785
  	cachep->node[numa_mem_id()]->next_reap =
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
1786
1787
  			jiffies + REAPTIMEOUT_NODE +
  			((unsigned long)cachep) % REAPTIMEOUT_NODE;
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1788
1789
1790
1791
1792
1793
1794
  
  	cpu_cache_get(cachep)->avail = 0;
  	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
  	cpu_cache_get(cachep)->batchcount = 1;
  	cpu_cache_get(cachep)->touched = 0;
  	cachep->batchcount = 1;
  	cachep->limit = BOOT_CPUCACHE_ENTRIES;
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
1795
  	return 0;
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1796
  }
12220dea0   Joonsoo Kim   mm/slab: support ...
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
  unsigned long kmem_cache_flags(unsigned long object_size,
  	unsigned long flags, const char *name,
  	void (*ctor)(void *))
  {
  	return flags;
  }
  
  struct kmem_cache *
  __kmem_cache_alias(const char *name, size_t size, size_t align,
  		   unsigned long flags, void (*ctor)(void *))
  {
  	struct kmem_cache *cachep;
  
  	cachep = find_mergeable(size, align, flags, name, ctor);
  	if (cachep) {
  		cachep->refcount++;
  
  		/*
  		 * Adjust the object sizes so that we clear
  		 * the complete object on kzalloc.
  		 */
  		cachep->object_size = max_t(int, cachep->object_size, size);
  	}
  	return cachep;
  }
b03a017be   Joonsoo Kim   mm/slab: introduc...
1822
1823
1824
1825
1826
1827
  static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
  			size_t size, unsigned long flags)
  {
  	size_t left;
  
  	cachep->num = 0;
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
1828
  	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
b03a017be   Joonsoo Kim   mm/slab: introduc...
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
  		return false;
  
  	left = calculate_slab_order(cachep, size,
  			flags | CFLGS_OBJFREELIST_SLAB);
  	if (!cachep->num)
  		return false;
  
  	if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)
  		return false;
  
  	cachep->colour = left / cachep->colour_off;
  
  	return true;
  }
158e319bb   Joonsoo Kim   mm/slab: clean up...
1843
1844
1845
1846
1847
1848
1849
1850
  static bool set_off_slab_cache(struct kmem_cache *cachep,
  			size_t size, unsigned long flags)
  {
  	size_t left;
  
  	cachep->num = 0;
  
  	/*
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1851
1852
  	 * Always use on-slab management when SLAB_NOLEAKTRACE
  	 * to avoid recursive calls into kmemleak.
158e319bb   Joonsoo Kim   mm/slab: clean up...
1853
  	 */
158e319bb   Joonsoo Kim   mm/slab: clean up...
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
  	if (flags & SLAB_NOLEAKTRACE)
  		return false;
  
  	/*
  	 * Size is large, assume best to place the slab management obj
  	 * off-slab (should allow better packing of objs).
  	 */
  	left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB);
  	if (!cachep->num)
  		return false;
  
  	/*
  	 * If the slab has been placed off-slab, and we have enough space then
  	 * move it on-slab. This is at the expense of any extra colouring.
  	 */
  	if (left >= cachep->num * sizeof(freelist_idx_t))
  		return false;
  
  	cachep->colour = left / cachep->colour_off;
  
  	return true;
  }
  
  static bool set_on_slab_cache(struct kmem_cache *cachep,
  			size_t size, unsigned long flags)
  {
  	size_t left;
  
  	cachep->num = 0;
  
  	left = calculate_slab_order(cachep, size, flags);
  	if (!cachep->num)
  		return false;
  
  	cachep->colour = left / cachep->colour_off;
  
  	return true;
  }
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1892
  /**
039363f38   Christoph Lameter   mm, sl[aou]b: Ext...
1893
   * __kmem_cache_create - Create a cache.
a755b76ab   Randy Dunlap   mm: fix slab.c ke...
1894
   * @cachep: cache management descriptor
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1895
   * @flags: SLAB flags
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1896
1897
1898
   *
   * Returns a ptr to the cache on success, NULL on failure.
   * Cannot be called within a int, but can be interrupted.
20c2df83d   Paul Mundt   mm: Remove slab d...
1899
   * The @ctor is run when new pages are allocated by the cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1900
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1901
1902
1903
1904
1905
1906
1907
1908
   * The flags are
   *
   * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
   * to catch references to uninitialised memory.
   *
   * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
   * for buffer overruns.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1909
1910
1911
1912
   * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
   * cacheline.  This can be beneficial if you're counting cycles as closely
   * as davem.
   */
278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
1913
  int
8a13a4cc8   Christoph Lameter   mm/sl[aou]b: Shri...
1914
  __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1915
  {
d4a5fca59   David Rientjes   mm, slab: initial...
1916
  	size_t ralign = BYTES_PER_WORD;
83b519e8b   Pekka Enberg   slab: setup alloc...
1917
  	gfp_t gfp;
278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
1918
  	int err;
8a13a4cc8   Christoph Lameter   mm/sl[aou]b: Shri...
1919
  	size_t size = cachep->size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1920

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1921
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1922
1923
1924
1925
1926
1927
1928
  #if FORCED_DEBUG
  	/*
  	 * Enable redzoning and last user accounting, except for caches with
  	 * large objects, if the increased size would increase the object size
  	 * above the next power of two: caches with object sizes just above a
  	 * power of two have a significant amount of internal fragmentation.
  	 */
87a927c71   David Woodhouse   Fix slab redzone ...
1929
1930
  	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
  						2 * sizeof(unsigned long long)))
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1931
  		flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
1932
  	if (!(flags & SLAB_TYPESAFE_BY_RCU))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1933
1934
  		flags |= SLAB_POISON;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1935
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1936

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1937
1938
  	/*
  	 * Check that size is in terms of words.  This is needed to avoid
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1939
1940
1941
  	 * unaligned accesses for some archs when redzoning is used, and makes
  	 * sure any on-slab bufctl's are also correctly aligned.
  	 */
e07719502   Canjiang Lu   mm/slab.c: replac...
1942
  	size = ALIGN(size, BYTES_PER_WORD);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1943

87a927c71   David Woodhouse   Fix slab redzone ...
1944
1945
1946
1947
  	if (flags & SLAB_RED_ZONE) {
  		ralign = REDZONE_ALIGN;
  		/* If redzoning, ensure that the second redzone is suitably
  		 * aligned, by adjusting the object size accordingly. */
e07719502   Canjiang Lu   mm/slab.c: replac...
1948
  		size = ALIGN(size, REDZONE_ALIGN);
87a927c71   David Woodhouse   Fix slab redzone ...
1949
  	}
ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1950

a44b56d35   Kevin Hilman   [PATCH] slab debu...
1951
  	/* 3) caller mandated alignment */
8a13a4cc8   Christoph Lameter   mm/sl[aou]b: Shri...
1952
1953
  	if (ralign < cachep->align) {
  		ralign = cachep->align;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1954
  	}
3ff84a7f3   Pekka Enberg   Revert "slab: Fix...
1955
1956
  	/* disable debug if necessary */
  	if (ralign > __alignof__(unsigned long long))
a44b56d35   Kevin Hilman   [PATCH] slab debu...
1957
  		flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1958
  	/*
ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1959
  	 * 4) Store it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1960
  	 */
8a13a4cc8   Christoph Lameter   mm/sl[aou]b: Shri...
1961
  	cachep->align = ralign;
158e319bb   Joonsoo Kim   mm/slab: clean up...
1962
1963
1964
1965
  	cachep->colour_off = cache_line_size();
  	/* Offset must be a multiple of the alignment. */
  	if (cachep->colour_off < cachep->align)
  		cachep->colour_off = cachep->align;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1966

83b519e8b   Pekka Enberg   slab: setup alloc...
1967
1968
1969
1970
  	if (slab_is_available())
  		gfp = GFP_KERNEL;
  	else
  		gfp = GFP_NOWAIT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1971
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1972

ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1973
1974
1975
1976
  	/*
  	 * Both debugging options require word-alignment which is calculated
  	 * into align above.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1977
  	if (flags & SLAB_RED_ZONE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1978
  		/* add space for red zone words */
3ff84a7f3   Pekka Enberg   Revert "slab: Fix...
1979
1980
  		cachep->obj_offset += sizeof(unsigned long long);
  		size += 2 * sizeof(unsigned long long);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1981
1982
  	}
  	if (flags & SLAB_STORE_USER) {
ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1983
  		/* user store requires one word storage behind the end of
87a927c71   David Woodhouse   Fix slab redzone ...
1984
1985
  		 * the real object. But if the second red zone needs to be
  		 * aligned to 64 bits, we must allow that much space.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1986
  		 */
87a927c71   David Woodhouse   Fix slab redzone ...
1987
1988
1989
1990
  		if (flags & SLAB_RED_ZONE)
  			size += REDZONE_ALIGN;
  		else
  			size += BYTES_PER_WORD;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1991
  	}
832a15d20   Joonsoo Kim   mm/slab: align ca...
1992
  #endif
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
1993
  	kasan_cache_create(cachep, &size, &flags);
832a15d20   Joonsoo Kim   mm/slab: align ca...
1994
1995
1996
1997
1998
1999
2000
2001
2002
  	size = ALIGN(size, cachep->align);
  	/*
  	 * We should restrict the number of objects in a slab to implement
  	 * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
  	 */
  	if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
  		size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
  
  #if DEBUG
03a2d2a3e   Joonsoo Kim   mm/slab: fix unex...
2003
2004
2005
2006
2007
2008
2009
  	/*
  	 * To activate debug pagealloc, off-slab management is necessary
  	 * requirement. In early phase of initialization, small sized slab
  	 * doesn't get initialized so it would not be possible. So, we need
  	 * to check size >= 256. It guarantees that all necessary small
  	 * sized slab is initialized in current slab initialization sequence.
  	 */
40323278b   Joonsoo Kim   mm/slab: use more...
2010
  	if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
f3a3c320d   Joonsoo Kim   mm/slab: do not c...
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
  		size >= 256 && cachep->object_size > cache_line_size()) {
  		if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {
  			size_t tmp_size = ALIGN(size, PAGE_SIZE);
  
  			if (set_off_slab_cache(cachep, tmp_size, flags)) {
  				flags |= CFLGS_OFF_SLAB;
  				cachep->obj_offset += tmp_size - size;
  				size = tmp_size;
  				goto done;
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2022
2023
  	}
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2024

b03a017be   Joonsoo Kim   mm/slab: introduc...
2025
2026
2027
2028
  	if (set_objfreelist_slab_cache(cachep, size, flags)) {
  		flags |= CFLGS_OBJFREELIST_SLAB;
  		goto done;
  	}
158e319bb   Joonsoo Kim   mm/slab: clean up...
2029
  	if (set_off_slab_cache(cachep, size, flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2030
  		flags |= CFLGS_OFF_SLAB;
158e319bb   Joonsoo Kim   mm/slab: clean up...
2031
  		goto done;
832a15d20   Joonsoo Kim   mm/slab: align ca...
2032
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2033

158e319bb   Joonsoo Kim   mm/slab: clean up...
2034
2035
  	if (set_on_slab_cache(cachep, size, flags))
  		goto done;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2036

158e319bb   Joonsoo Kim   mm/slab: clean up...
2037
  	return -E2BIG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2038

158e319bb   Joonsoo Kim   mm/slab: clean up...
2039
2040
  done:
  	cachep->freelist_size = cachep->num * sizeof(freelist_idx_t);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2041
  	cachep->flags = flags;
a57a49887   Joonsoo Kim   slab: use __GFP_C...
2042
  	cachep->allocflags = __GFP_COMP;
a3187e438   Yang Shi   mm: slab: remove ...
2043
  	if (flags & SLAB_CACHE_DMA)
a618e89f1   Glauber Costa   slab: rename gfpf...
2044
  		cachep->allocflags |= GFP_DMA;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
2045
  	cachep->size = size;
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
2046
  	cachep->reciprocal_buffer_size = reciprocal_value(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2047

40b441379   Joonsoo Kim   mm/slab: clean up...
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
  #if DEBUG
  	/*
  	 * If we're going to use the generic kernel_map_pages()
  	 * poisoning, then it's going to smash the contents of
  	 * the redzone and userword anyhow, so switch them off.
  	 */
  	if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
  		(cachep->flags & SLAB_POISON) &&
  		is_debug_pagealloc_cache(cachep))
  		cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
  #endif
  
  	if (OFF_SLAB(cachep)) {
158e319bb   Joonsoo Kim   mm/slab: clean up...
2061
2062
  		cachep->freelist_cache =
  			kmalloc_slab(cachep->freelist_size, 0u);
e5ac9c5ae   Ravikiran G Thirumalai   [PATCH] Add some ...
2063
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2064

278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
2065
2066
  	err = setup_cpu_cache(cachep, gfp);
  	if (err) {
52b4b950b   Dmitry Safonov   mm: slab: free km...
2067
  		__kmem_cache_release(cachep);
278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
2068
  		return err;
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
2069
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2070

278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
2071
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2072
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
  
  #if DEBUG
  static void check_irq_off(void)
  {
  	BUG_ON(!irqs_disabled());
  }
  
  static void check_irq_on(void)
  {
  	BUG_ON(irqs_disabled());
  }
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2084
2085
2086
2087
  static void check_mutex_acquired(void)
  {
  	BUG_ON(!mutex_is_locked(&slab_mutex));
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2088
  static void check_spinlock_acquired(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2089
2090
2091
  {
  #ifdef CONFIG_SMP
  	check_irq_off();
18bf85411   Christoph Lameter   slab: use get_nod...
2092
  	assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2093
2094
  #endif
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2095

343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2096
  static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2097
2098
2099
  {
  #ifdef CONFIG_SMP
  	check_irq_off();
18bf85411   Christoph Lameter   slab: use get_nod...
2100
  	assert_spin_locked(&get_node(cachep, node)->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2101
2102
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2103
2104
2105
  #else
  #define check_irq_off()	do { } while(0)
  #define check_irq_on()	do { } while(0)
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2106
  #define check_mutex_acquired()	do { } while(0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2107
  #define check_spinlock_acquired(x) do { } while(0)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2108
  #define check_spinlock_acquired_node(x, y) do { } while(0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2109
  #endif
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
  static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
  				int node, bool free_all, struct list_head *list)
  {
  	int tofree;
  
  	if (!ac || !ac->avail)
  		return;
  
  	tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
  	if (tofree > ac->avail)
  		tofree = (ac->avail + 1) / 2;
  
  	free_block(cachep, ac->entry, tofree, node, list);
  	ac->avail -= tofree;
  	memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
  }
aab2207cf   Christoph Lameter   [PATCH] slab: mak...
2126

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2127
2128
  static void do_drain(void *arg)
  {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2129
  	struct kmem_cache *cachep = arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2130
  	struct array_cache *ac;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
2131
  	int node = numa_mem_id();
18bf85411   Christoph Lameter   slab: use get_nod...
2132
  	struct kmem_cache_node *n;
97654dfa2   Joonsoo Kim   slab: defer slab_...
2133
  	LIST_HEAD(list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2134
2135
  
  	check_irq_off();
9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2136
  	ac = cpu_cache_get(cachep);
18bf85411   Christoph Lameter   slab: use get_nod...
2137
2138
  	n = get_node(cachep, node);
  	spin_lock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
2139
  	free_block(cachep, ac->entry, ac->avail, node, &list);
18bf85411   Christoph Lameter   slab: use get_nod...
2140
  	spin_unlock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
2141
  	slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2142
2143
  	ac->avail = 0;
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2144
  static void drain_cpu_caches(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2145
  {
ce8eb6c42   Christoph Lameter   slab: Rename list...
2146
  	struct kmem_cache_node *n;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2147
  	int node;
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2148
  	LIST_HEAD(list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2149

15c8b6c1a   Jens Axboe   on_each_cpu(): ki...
2150
  	on_each_cpu(do_drain, cachep, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2151
  	check_irq_on();
18bf85411   Christoph Lameter   slab: use get_nod...
2152
2153
  	for_each_kmem_cache_node(cachep, node, n)
  		if (n->alien)
ce8eb6c42   Christoph Lameter   slab: Rename list...
2154
  			drain_alien_cache(cachep, n->alien);
a4523a8b3   Roland Dreier   [PATCH] slab: Fix...
2155

18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2156
2157
2158
2159
2160
2161
2162
  	for_each_kmem_cache_node(cachep, node, n) {
  		spin_lock_irq(&n->list_lock);
  		drain_array_locked(cachep, n->shared, node, true, &list);
  		spin_unlock_irq(&n->list_lock);
  
  		slabs_destroy(cachep, &list);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2163
  }
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2164
2165
2166
2167
2168
2169
2170
  /*
   * Remove slabs from the list of free slabs.
   * Specify the number of slabs to drain in tofree.
   *
   * Returns the actual number of slabs released.
   */
  static int drain_freelist(struct kmem_cache *cache,
ce8eb6c42   Christoph Lameter   slab: Rename list...
2171
  			struct kmem_cache_node *n, int tofree)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2172
  {
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2173
2174
  	struct list_head *p;
  	int nr_freed;
8456a648c   Joonsoo Kim   slab: use struct ...
2175
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2176

ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2177
  	nr_freed = 0;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2178
  	while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2179

ce8eb6c42   Christoph Lameter   slab: Rename list...
2180
2181
2182
2183
  		spin_lock_irq(&n->list_lock);
  		p = n->slabs_free.prev;
  		if (p == &n->slabs_free) {
  			spin_unlock_irq(&n->list_lock);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2184
2185
  			goto out;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2186

8456a648c   Joonsoo Kim   slab: use struct ...
2187
  		page = list_entry(p, struct page, lru);
8456a648c   Joonsoo Kim   slab: use struct ...
2188
  		list_del(&page->lru);
f728b0a5d   Greg Thelen   mm, slab: faster ...
2189
  		n->free_slabs--;
bf00bd345   David Rientjes   mm, slab: maintai...
2190
  		n->total_slabs--;
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2191
2192
2193
2194
  		/*
  		 * Safe to drop the lock. The slab is no longer linked
  		 * to the cache.
  		 */
ce8eb6c42   Christoph Lameter   slab: Rename list...
2195
2196
  		n->free_objects -= cache->num;
  		spin_unlock_irq(&n->list_lock);
8456a648c   Joonsoo Kim   slab: use struct ...
2197
  		slab_destroy(cache, page);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2198
  		nr_freed++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2199
  	}
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2200
2201
  out:
  	return nr_freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2202
  }
c9fc58640   Tejun Heo   slab: introduce _...
2203
  int __kmem_cache_shrink(struct kmem_cache *cachep)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2204
  {
18bf85411   Christoph Lameter   slab: use get_nod...
2205
2206
  	int ret = 0;
  	int node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2207
  	struct kmem_cache_node *n;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2208
2209
2210
2211
  
  	drain_cpu_caches(cachep);
  
  	check_irq_on();
18bf85411   Christoph Lameter   slab: use get_nod...
2212
  	for_each_kmem_cache_node(cachep, node, n) {
a5aa63a5f   Joonsoo Kim   mm/slab: drain th...
2213
  		drain_freelist(cachep, n, INT_MAX);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2214

ce8eb6c42   Christoph Lameter   slab: Rename list...
2215
2216
  		ret += !list_empty(&n->slabs_full) ||
  			!list_empty(&n->slabs_partial);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2217
2218
2219
  	}
  	return (ret ? 1 : 0);
  }
c9fc58640   Tejun Heo   slab: introduce _...
2220
2221
2222
2223
2224
2225
  #ifdef CONFIG_MEMCG
  void __kmemcg_cache_deactivate(struct kmem_cache *cachep)
  {
  	__kmem_cache_shrink(cachep);
  }
  #endif
945cf2b61   Christoph Lameter   mm/sl[aou]b: Extr...
2226
  int __kmem_cache_shutdown(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2227
  {
c9fc58640   Tejun Heo   slab: introduce _...
2228
  	return __kmem_cache_shrink(cachep);
52b4b950b   Dmitry Safonov   mm: slab: free km...
2229
2230
2231
2232
  }
  
  void __kmem_cache_release(struct kmem_cache *cachep)
  {
12c3667fb   Christoph Lameter   mm/sl[aou]b: Get ...
2233
  	int i;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2234
  	struct kmem_cache_node *n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2235

c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2236
  	cache_random_seq_destroy(cachep);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
2237
  	free_percpu(cachep->cpu_cache);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2238

ce8eb6c42   Christoph Lameter   slab: Rename list...
2239
  	/* NUMA: free the node structures */
18bf85411   Christoph Lameter   slab: use get_nod...
2240
2241
2242
2243
2244
  	for_each_kmem_cache_node(cachep, i, n) {
  		kfree(n->shared);
  		free_alien_cache(n->alien);
  		kfree(n);
  		cachep->node[i] = NULL;
12c3667fb   Christoph Lameter   mm/sl[aou]b: Get ...
2245
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2246
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2247

e5ac9c5ae   Ravikiran G Thirumalai   [PATCH] Add some ...
2248
2249
  /*
   * Get the memory for a slab management obj.
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
   *
   * For a slab cache when the slab descriptor is off-slab, the
   * slab descriptor can't come from the same cache which is being created,
   * Because if it is the case, that means we defer the creation of
   * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
   * And we eventually call down to __kmem_cache_create(), which
   * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
   * This is a "chicken-and-egg" problem.
   *
   * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
   * which are all initialized during kmem_cache_init().
e5ac9c5ae   Ravikiran G Thirumalai   [PATCH] Add some ...
2261
   */
7e0073552   Joonsoo Kim   slab: replace non...
2262
  static void *alloc_slabmgmt(struct kmem_cache *cachep,
0c3aa83e0   Joonsoo Kim   slab: change retu...
2263
2264
  				   struct page *page, int colour_off,
  				   gfp_t local_flags, int nodeid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2265
  {
7e0073552   Joonsoo Kim   slab: replace non...
2266
  	void *freelist;
0c3aa83e0   Joonsoo Kim   slab: change retu...
2267
  	void *addr = page_address(page);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2268

2e6b36021   Joonsoo Kim   mm/slab: put the ...
2269
2270
  	page->s_mem = addr + colour_off;
  	page->active = 0;
b03a017be   Joonsoo Kim   mm/slab: introduc...
2271
2272
2273
  	if (OBJFREELIST_SLAB(cachep))
  		freelist = NULL;
  	else if (OFF_SLAB(cachep)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2274
  		/* Slab management obj is off-slab. */
8456a648c   Joonsoo Kim   slab: use struct ...
2275
  		freelist = kmem_cache_alloc_node(cachep->freelist_cache,
8759ec50a   Pekka Enberg   slab: remove GFP_...
2276
  					      local_flags, nodeid);
8456a648c   Joonsoo Kim   slab: use struct ...
2277
  		if (!freelist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2278
2279
  			return NULL;
  	} else {
2e6b36021   Joonsoo Kim   mm/slab: put the ...
2280
2281
2282
  		/* We will use last bytes at the slab for freelist */
  		freelist = addr + (PAGE_SIZE << cachep->gfporder) -
  				cachep->freelist_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2283
  	}
2e6b36021   Joonsoo Kim   mm/slab: put the ...
2284

8456a648c   Joonsoo Kim   slab: use struct ...
2285
  	return freelist;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2286
  }
7cc68973c   Joonsoo Kim   slab: fix the typ...
2287
  static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2288
  {
a41adfaa2   Joonsoo Kim   slab: introduce b...
2289
  	return ((freelist_idx_t *)page->freelist)[idx];
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2290
2291
2292
  }
  
  static inline void set_free_obj(struct page *page,
7cc68973c   Joonsoo Kim   slab: fix the typ...
2293
  					unsigned int idx, freelist_idx_t val)
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2294
  {
a41adfaa2   Joonsoo Kim   slab: introduce b...
2295
  	((freelist_idx_t *)(page->freelist))[idx] = val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2296
  }
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2297
  static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2298
  {
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2299
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2300
2301
2302
  	int i;
  
  	for (i = 0; i < cachep->num; i++) {
8456a648c   Joonsoo Kim   slab: use struct ...
2303
  		void *objp = index_to_obj(cachep, page, i);
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2304

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2305
2306
2307
2308
2309
2310
2311
2312
  		if (cachep->flags & SLAB_STORE_USER)
  			*dbg_userword(cachep, objp) = NULL;
  
  		if (cachep->flags & SLAB_RED_ZONE) {
  			*dbg_redzone1(cachep, objp) = RED_INACTIVE;
  			*dbg_redzone2(cachep, objp) = RED_INACTIVE;
  		}
  		/*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2313
2314
2315
  		 * Constructors are not allowed to allocate memory from the same
  		 * cache which they are a constructor for.  Otherwise, deadlock.
  		 * They must also be threaded.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2316
  		 */
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2317
2318
2319
  		if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
  			kasan_unpoison_object_data(cachep,
  						   objp + obj_offset(cachep));
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
2320
  			cachep->ctor(objp + obj_offset(cachep));
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2321
2322
2323
  			kasan_poison_object_data(
  				cachep, objp + obj_offset(cachep));
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2324
2325
2326
  
  		if (cachep->flags & SLAB_RED_ZONE) {
  			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
2327
  				slab_error(cachep, "constructor overwrote the end of an object");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2328
  			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
2329
  				slab_error(cachep, "constructor overwrote the start of an object");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2330
  		}
40b441379   Joonsoo Kim   mm/slab: clean up...
2331
2332
2333
2334
2335
  		/* need to poison the objs? */
  		if (cachep->flags & SLAB_POISON) {
  			poison_obj(cachep, objp, POISON_FREE);
  			slab_kernel_map(cachep, objp, 0, 0);
  		}
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2336
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2337
  #endif
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2338
  }
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2339
2340
2341
2342
2343
  #ifdef CONFIG_SLAB_FREELIST_RANDOM
  /* Hold information during a freelist initialization */
  union freelist_init_state {
  	struct {
  		unsigned int pos;
7c00fce98   Thomas Garnier   mm: reorganize SL...
2344
  		unsigned int *list;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2345
  		unsigned int count;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
  	};
  	struct rnd_state rnd_state;
  };
  
  /*
   * Initialize the state based on the randomization methode available.
   * return true if the pre-computed list is available, false otherwize.
   */
  static bool freelist_state_initialize(union freelist_init_state *state,
  				struct kmem_cache *cachep,
  				unsigned int count)
  {
  	bool ret;
  	unsigned int rand;
  
  	/* Use best entropy available to define a random shift */
7c00fce98   Thomas Garnier   mm: reorganize SL...
2362
  	rand = get_random_int();
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2363
2364
2365
2366
2367
2368
2369
2370
  
  	/* Use a random state if the pre-computed list is not available */
  	if (!cachep->random_seq) {
  		prandom_seed_state(&state->rnd_state, rand);
  		ret = false;
  	} else {
  		state->list = cachep->random_seq;
  		state->count = count;
c4e490cf1   John Sperbeck   mm/slab.c: fix SL...
2371
  		state->pos = rand % count;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2372
2373
2374
2375
2376
2377
2378
2379
  		ret = true;
  	}
  	return ret;
  }
  
  /* Get the next entry on the list and randomize it using a random shift */
  static freelist_idx_t next_random_slot(union freelist_init_state *state)
  {
c4e490cf1   John Sperbeck   mm/slab.c: fix SL...
2380
2381
2382
  	if (state->pos >= state->count)
  		state->pos = 0;
  	return state->list[state->pos++];
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2383
  }
7c00fce98   Thomas Garnier   mm: reorganize SL...
2384
2385
2386
2387
2388
2389
  /* Swap two freelist entries */
  static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
  {
  	swap(((freelist_idx_t *)page->freelist)[a],
  		((freelist_idx_t *)page->freelist)[b]);
  }
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2390
2391
2392
2393
2394
2395
  /*
   * Shuffle the freelist initialization state based on pre-computed lists.
   * return true if the list was successfully shuffled, false otherwise.
   */
  static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
  {
7c00fce98   Thomas Garnier   mm: reorganize SL...
2396
  	unsigned int objfreelist = 0, i, rand, count = cachep->num;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
  	union freelist_init_state state;
  	bool precomputed;
  
  	if (count < 2)
  		return false;
  
  	precomputed = freelist_state_initialize(&state, cachep, count);
  
  	/* Take a random entry as the objfreelist */
  	if (OBJFREELIST_SLAB(cachep)) {
  		if (!precomputed)
  			objfreelist = count - 1;
  		else
  			objfreelist = next_random_slot(&state);
  		page->freelist = index_to_obj(cachep, page, objfreelist) +
  						obj_offset(cachep);
  		count--;
  	}
  
  	/*
  	 * On early boot, generate the list dynamically.
  	 * Later use a pre-computed list for speed.
  	 */
  	if (!precomputed) {
7c00fce98   Thomas Garnier   mm: reorganize SL...
2421
2422
2423
2424
2425
2426
2427
2428
2429
  		for (i = 0; i < count; i++)
  			set_free_obj(page, i, i);
  
  		/* Fisher-Yates shuffle */
  		for (i = count - 1; i > 0; i--) {
  			rand = prandom_u32_state(&state.rnd_state);
  			rand %= (i + 1);
  			swap_free_obj(page, i, rand);
  		}
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
  	} else {
  		for (i = 0; i < count; i++)
  			set_free_obj(page, i, next_random_slot(&state));
  	}
  
  	if (OBJFREELIST_SLAB(cachep))
  		set_free_obj(page, cachep->num - 1, objfreelist);
  
  	return true;
  }
  #else
  static inline bool shuffle_freelist(struct kmem_cache *cachep,
  				struct page *page)
  {
  	return false;
  }
  #endif /* CONFIG_SLAB_FREELIST_RANDOM */
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2447
2448
2449
2450
  static void cache_init_objs(struct kmem_cache *cachep,
  			    struct page *page)
  {
  	int i;
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2451
  	void *objp;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2452
  	bool shuffled;
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2453
2454
  
  	cache_init_objs_debug(cachep, page);
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2455
2456
2457
2458
  	/* Try to randomize the freelist if enabled */
  	shuffled = shuffle_freelist(cachep, page);
  
  	if (!shuffled && OBJFREELIST_SLAB(cachep)) {
b03a017be   Joonsoo Kim   mm/slab: introduc...
2459
2460
2461
  		page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
  						obj_offset(cachep);
  	}
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2462
  	for (i = 0; i < cachep->num; i++) {
b3cbd9bf7   Andrey Ryabinin   mm/kasan: get rid...
2463
2464
  		objp = index_to_obj(cachep, page, i);
  		kasan_init_slab_obj(cachep, objp);
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2465
  		/* constructor could break poison info */
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2466
  		if (DEBUG == 0 && cachep->ctor) {
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2467
2468
2469
2470
  			kasan_unpoison_object_data(cachep, objp);
  			cachep->ctor(objp);
  			kasan_poison_object_data(cachep, objp);
  		}
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2471

c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2472
2473
  		if (!shuffled)
  			set_free_obj(page, i, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2474
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2475
  }
260b61dd4   Joonsoo Kim   mm/slab: remove t...
2476
  static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2477
  {
b1cb0982b   Joonsoo Kim   slab: change the ...
2478
  	void *objp;
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2479

e5c58dfdc   Joonsoo Kim   slab: introduce h...
2480
  	objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
8456a648c   Joonsoo Kim   slab: use struct ...
2481
  	page->active++;
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2482

d31676dfd   Joonsoo Kim   mm/slab: alternat...
2483
2484
2485
2486
  #if DEBUG
  	if (cachep->flags & SLAB_STORE_USER)
  		set_store_user_dirty(cachep);
  #endif
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2487
2488
  	return objp;
  }
260b61dd4   Joonsoo Kim   mm/slab: remove t...
2489
2490
  static void slab_put_obj(struct kmem_cache *cachep,
  			struct page *page, void *objp)
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2491
  {
8456a648c   Joonsoo Kim   slab: use struct ...
2492
  	unsigned int objnr = obj_to_index(cachep, page, objp);
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2493
  #if DEBUG
16025177e   Joonsoo Kim   slab: remove kmem...
2494
  	unsigned int i;
b1cb0982b   Joonsoo Kim   slab: change the ...
2495

b1cb0982b   Joonsoo Kim   slab: change the ...
2496
  	/* Verify double free bug */
8456a648c   Joonsoo Kim   slab: use struct ...
2497
  	for (i = page->active; i < cachep->num; i++) {
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2498
  		if (get_free_obj(page, i) == objnr) {
1170532bb   Joe Perches   mm: convert print...
2499
2500
  			pr_err("slab: double free detected in cache '%s', objp %p
  ",
756a025f0   Joe Perches   mm: coalesce spli...
2501
  			       cachep->name, objp);
b1cb0982b   Joonsoo Kim   slab: change the ...
2502
2503
  			BUG();
  		}
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2504
2505
  	}
  #endif
8456a648c   Joonsoo Kim   slab: use struct ...
2506
  	page->active--;
b03a017be   Joonsoo Kim   mm/slab: introduc...
2507
2508
  	if (!page->freelist)
  		page->freelist = objp + obj_offset(cachep);
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2509
  	set_free_obj(page, page->active, objnr);
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2510
  }
4776874ff   Pekka Enberg   [PATCH] slab: pag...
2511
2512
2513
  /*
   * Map pages beginning at addr to the given cache and slab. This is required
   * for the slab allocator to be able to lookup the cache and slab of a
ccd35fb9f   Nick Piggin   kernel: kmem_ptr_...
2514
   * virtual address for kfree, ksize, and slab debugging.
4776874ff   Pekka Enberg   [PATCH] slab: pag...
2515
   */
8456a648c   Joonsoo Kim   slab: use struct ...
2516
  static void slab_map_pages(struct kmem_cache *cache, struct page *page,
7e0073552   Joonsoo Kim   slab: replace non...
2517
  			   void *freelist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2518
  {
a57a49887   Joonsoo Kim   slab: use __GFP_C...
2519
  	page->slab_cache = cache;
8456a648c   Joonsoo Kim   slab: use struct ...
2520
  	page->freelist = freelist;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2521
2522
2523
2524
2525
2526
  }
  
  /*
   * Grow (by 1) the number of slabs within a cache.  This is called by
   * kmem_cache_alloc() when there are no active objs left in a cache.
   */
76b342bdc   Joonsoo Kim   mm/slab: separate...
2527
2528
  static struct page *cache_grow_begin(struct kmem_cache *cachep,
  				gfp_t flags, int nodeid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2529
  {
7e0073552   Joonsoo Kim   slab: replace non...
2530
  	void *freelist;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2531
2532
  	size_t offset;
  	gfp_t local_flags;
511e3a058   Joonsoo Kim   mm/slab: make cac...
2533
  	int page_node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2534
  	struct kmem_cache_node *n;
511e3a058   Joonsoo Kim   mm/slab: make cac...
2535
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2536

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2537
2538
2539
  	/*
  	 * Be lazy and only check for valid flags here,  keeping it out of the
  	 * critical path in kmem_cache_alloc().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2540
  	 */
c871ac4e9   Andrew Morton   slab: improve che...
2541
  	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
bacdcb346   Michal Hocko   slab: make GFP_SL...
2542
  		gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
72baeef0c   Michal Hocko   slab: do not pani...
2543
2544
2545
2546
2547
  		flags &= ~GFP_SLAB_BUG_MASK;
  		pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!
  ",
  				invalid_mask, &invalid_mask, flags, &flags);
  		dump_stack();
c871ac4e9   Andrew Morton   slab: improve che...
2548
  	}
6cb062296   Christoph Lameter   Categorize GFP flags
2549
  	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2550

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2551
  	check_irq_off();
d0164adc8   Mel Gorman   mm, page_alloc: d...
2552
  	if (gfpflags_allow_blocking(local_flags))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2553
2554
2555
  		local_irq_enable();
  
  	/*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2556
2557
  	 * Get mem for the objs.  Attempt to allocate a physical page from
  	 * 'nodeid'.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2558
  	 */
511e3a058   Joonsoo Kim   mm/slab: make cac...
2559
  	page = kmem_getpages(cachep, local_flags, nodeid);
0c3aa83e0   Joonsoo Kim   slab: change retu...
2560
  	if (!page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2561
  		goto failed;
511e3a058   Joonsoo Kim   mm/slab: make cac...
2562
2563
  	page_node = page_to_nid(page);
  	n = get_node(cachep, page_node);
03d1d43a1   Joonsoo Kim   mm/slab: racy acc...
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
  
  	/* Get colour for the slab, and cal the next value. */
  	n->colour_next++;
  	if (n->colour_next >= cachep->colour)
  		n->colour_next = 0;
  
  	offset = n->colour_next;
  	if (offset >= cachep->colour)
  		offset = 0;
  
  	offset *= cachep->colour_off;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2575
  	/* Get slab management. */
8456a648c   Joonsoo Kim   slab: use struct ...
2576
  	freelist = alloc_slabmgmt(cachep, page, offset,
511e3a058   Joonsoo Kim   mm/slab: make cac...
2577
  			local_flags & ~GFP_CONSTRAINT_MASK, page_node);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2578
  	if (OFF_SLAB(cachep) && !freelist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2579
  		goto opps1;
8456a648c   Joonsoo Kim   slab: use struct ...
2580
  	slab_map_pages(cachep, page, freelist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2581

7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2582
  	kasan_poison_slab(page);
8456a648c   Joonsoo Kim   slab: use struct ...
2583
  	cache_init_objs(cachep, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2584

d0164adc8   Mel Gorman   mm, page_alloc: d...
2585
  	if (gfpflags_allow_blocking(local_flags))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2586
  		local_irq_disable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2587

76b342bdc   Joonsoo Kim   mm/slab: separate...
2588
  	return page;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2589
  opps1:
0c3aa83e0   Joonsoo Kim   slab: change retu...
2590
  	kmem_freepages(cachep, page);
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2591
  failed:
d0164adc8   Mel Gorman   mm, page_alloc: d...
2592
  	if (gfpflags_allow_blocking(local_flags))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2593
  		local_irq_disable();
76b342bdc   Joonsoo Kim   mm/slab: separate...
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
  	return NULL;
  }
  
  static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
  {
  	struct kmem_cache_node *n;
  	void *list = NULL;
  
  	check_irq_off();
  
  	if (!page)
  		return;
  
  	INIT_LIST_HEAD(&page->lru);
  	n = get_node(cachep, page_to_nid(page));
  
  	spin_lock(&n->list_lock);
bf00bd345   David Rientjes   mm, slab: maintai...
2611
  	n->total_slabs++;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2612
  	if (!page->active) {
76b342bdc   Joonsoo Kim   mm/slab: separate...
2613
  		list_add_tail(&page->lru, &(n->slabs_free));
f728b0a5d   Greg Thelen   mm, slab: faster ...
2614
  		n->free_slabs++;
bf00bd345   David Rientjes   mm, slab: maintai...
2615
  	} else
76b342bdc   Joonsoo Kim   mm/slab: separate...
2616
  		fixup_slab_list(cachep, n, page, &list);
07a63c41f   Aruna Ramakrishna   mm/slab: improve ...
2617

76b342bdc   Joonsoo Kim   mm/slab: separate...
2618
2619
2620
2621
2622
  	STATS_INC_GROWN(cachep);
  	n->free_objects += cachep->num - page->active;
  	spin_unlock(&n->list_lock);
  
  	fixup_objfreelist_debug(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2623
2624
2625
2626
2627
2628
2629
2630
  }
  
  #if DEBUG
  
  /*
   * Perform extra freeing checks:
   * - detect bad pointers.
   * - POISON/RED_ZONE checking
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2631
2632
2633
   */
  static void kfree_debugcheck(const void *objp)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2634
  	if (!virt_addr_valid(objp)) {
1170532bb   Joe Perches   mm: convert print...
2635
2636
  		pr_err("kfree_debugcheck: out of range ptr %lxh
  ",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2637
2638
  		       (unsigned long)objp);
  		BUG();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2639
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2640
  }
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2641
2642
  static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
  {
b46b8f19c   David Woodhouse   Increase slab red...
2643
  	unsigned long long redzone1, redzone2;
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
  
  	redzone1 = *dbg_redzone1(cache, obj);
  	redzone2 = *dbg_redzone2(cache, obj);
  
  	/*
  	 * Redzone is ok.
  	 */
  	if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
  		return;
  
  	if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
  		slab_error(cache, "double free detected");
  	else
  		slab_error(cache, "memory outside object was overwritten");
1170532bb   Joe Perches   mm: convert print...
2658
2659
2660
  	pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx
  ",
  	       obj, redzone1, redzone2);
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2661
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2662
  static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2663
  				   unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2664
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2665
  	unsigned int objnr;
8456a648c   Joonsoo Kim   slab: use struct ...
2666
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2667

80cbd911c   Matthew Wilcox   Fix kmem_cache_fr...
2668
  	BUG_ON(virt_to_cache(objp) != cachep);
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
2669
  	objp -= obj_offset(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2670
  	kfree_debugcheck(objp);
b49af68ff   Christoph Lameter   Add virt_to_head_...
2671
  	page = virt_to_head_page(objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2672

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2673
  	if (cachep->flags & SLAB_RED_ZONE) {
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2674
  		verify_redzone_free(cachep, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2675
2676
2677
  		*dbg_redzone1(cachep, objp) = RED_INACTIVE;
  		*dbg_redzone2(cachep, objp) = RED_INACTIVE;
  	}
d31676dfd   Joonsoo Kim   mm/slab: alternat...
2678
2679
  	if (cachep->flags & SLAB_STORE_USER) {
  		set_store_user_dirty(cachep);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2680
  		*dbg_userword(cachep, objp) = (void *)caller;
d31676dfd   Joonsoo Kim   mm/slab: alternat...
2681
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2682

8456a648c   Joonsoo Kim   slab: use struct ...
2683
  	objnr = obj_to_index(cachep, page, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2684
2685
  
  	BUG_ON(objnr >= cachep->num);
8456a648c   Joonsoo Kim   slab: use struct ...
2686
  	BUG_ON(objp != index_to_obj(cachep, page, objnr));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2687

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2688
  	if (cachep->flags & SLAB_POISON) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2689
  		poison_obj(cachep, objp, POISON_FREE);
40b441379   Joonsoo Kim   mm/slab: clean up...
2690
  		slab_kernel_map(cachep, objp, 0, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2691
2692
2693
  	}
  	return objp;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2694
2695
2696
  #else
  #define kfree_debugcheck(x) do { } while(0)
  #define cache_free_debugcheck(x,objp,z) (objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2697
  #endif
b03a017be   Joonsoo Kim   mm/slab: introduc...
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
  static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
  						void **list)
  {
  #if DEBUG
  	void *next = *list;
  	void *objp;
  
  	while (next) {
  		objp = next - obj_offset(cachep);
  		next = *(void **)next;
  		poison_obj(cachep, objp, POISON_FREE);
  	}
  #endif
  }
d8410234d   Joonsoo Kim   mm/slab: factor o...
2712
  static inline void fixup_slab_list(struct kmem_cache *cachep,
b03a017be   Joonsoo Kim   mm/slab: introduc...
2713
2714
  				struct kmem_cache_node *n, struct page *page,
  				void **list)
d8410234d   Joonsoo Kim   mm/slab: factor o...
2715
2716
2717
  {
  	/* move slabp to correct slabp list: */
  	list_del(&page->lru);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2718
  	if (page->active == cachep->num) {
d8410234d   Joonsoo Kim   mm/slab: factor o...
2719
  		list_add(&page->lru, &n->slabs_full);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
  		if (OBJFREELIST_SLAB(cachep)) {
  #if DEBUG
  			/* Poisoning will be done without holding the lock */
  			if (cachep->flags & SLAB_POISON) {
  				void **objp = page->freelist;
  
  				*objp = *list;
  				*list = objp;
  			}
  #endif
  			page->freelist = NULL;
  		}
  	} else
d8410234d   Joonsoo Kim   mm/slab: factor o...
2733
2734
  		list_add(&page->lru, &n->slabs_partial);
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2735
2736
  /* Try to find non-pfmemalloc slab if needed */
  static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
bf00bd345   David Rientjes   mm, slab: maintai...
2737
  					struct page *page, bool pfmemalloc)
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
  {
  	if (!page)
  		return NULL;
  
  	if (pfmemalloc)
  		return page;
  
  	if (!PageSlabPfmemalloc(page))
  		return page;
  
  	/* No need to keep pfmemalloc slab if we have enough free objects */
  	if (n->free_objects > n->free_limit) {
  		ClearPageSlabPfmemalloc(page);
  		return page;
  	}
  
  	/* Move pfmemalloc slab to the end of list to speed up next search */
  	list_del(&page->lru);
bf00bd345   David Rientjes   mm, slab: maintai...
2756
  	if (!page->active) {
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2757
  		list_add_tail(&page->lru, &n->slabs_free);
bf00bd345   David Rientjes   mm, slab: maintai...
2758
  		n->free_slabs++;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2759
  	} else
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2760
2761
2762
2763
2764
2765
  		list_add_tail(&page->lru, &n->slabs_partial);
  
  	list_for_each_entry(page, &n->slabs_partial, lru) {
  		if (!PageSlabPfmemalloc(page))
  			return page;
  	}
f728b0a5d   Greg Thelen   mm, slab: faster ...
2766
  	n->free_touched = 1;
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2767
  	list_for_each_entry(page, &n->slabs_free, lru) {
f728b0a5d   Greg Thelen   mm, slab: faster ...
2768
  		if (!PageSlabPfmemalloc(page)) {
bf00bd345   David Rientjes   mm, slab: maintai...
2769
  			n->free_slabs--;
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2770
  			return page;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2771
  		}
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2772
2773
2774
2775
2776
2777
  	}
  
  	return NULL;
  }
  
  static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2778
2779
  {
  	struct page *page;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2780
  	assert_spin_locked(&n->list_lock);
bf00bd345   David Rientjes   mm, slab: maintai...
2781
  	page = list_first_entry_or_null(&n->slabs_partial, struct page, lru);
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2782
2783
  	if (!page) {
  		n->free_touched = 1;
bf00bd345   David Rientjes   mm, slab: maintai...
2784
2785
  		page = list_first_entry_or_null(&n->slabs_free, struct page,
  						lru);
f728b0a5d   Greg Thelen   mm, slab: faster ...
2786
  		if (page)
bf00bd345   David Rientjes   mm, slab: maintai...
2787
  			n->free_slabs--;
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2788
  	}
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2789
  	if (sk_memalloc_socks())
bf00bd345   David Rientjes   mm, slab: maintai...
2790
  		page = get_valid_first_slab(n, page, pfmemalloc);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2791

7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2792
2793
  	return page;
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
  static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
  				struct kmem_cache_node *n, gfp_t flags)
  {
  	struct page *page;
  	void *obj;
  	void *list = NULL;
  
  	if (!gfp_pfmemalloc_allowed(flags))
  		return NULL;
  
  	spin_lock(&n->list_lock);
  	page = get_first_slab(n, true);
  	if (!page) {
  		spin_unlock(&n->list_lock);
  		return NULL;
  	}
  
  	obj = slab_get_obj(cachep, page);
  	n->free_objects--;
  
  	fixup_slab_list(cachep, n, page, &list);
  
  	spin_unlock(&n->list_lock);
  	fixup_objfreelist_debug(cachep, &list);
  
  	return obj;
  }
213b46958   Joonsoo Kim   mm/slab: refill c...
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
  /*
   * Slab list should be fixed up by fixup_slab_list() for existing slab
   * or cache_grow_end() for new slab
   */
  static __always_inline int alloc_block(struct kmem_cache *cachep,
  		struct array_cache *ac, struct page *page, int batchcount)
  {
  	/*
  	 * There must be at least one object available for
  	 * allocation.
  	 */
  	BUG_ON(page->active >= cachep->num);
  
  	while (page->active < cachep->num && batchcount--) {
  		STATS_INC_ALLOCED(cachep);
  		STATS_INC_ACTIVE(cachep);
  		STATS_SET_HIGH(cachep);
  
  		ac->entry[ac->avail++] = slab_get_obj(cachep, page);
  	}
  
  	return batchcount;
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2844
  static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2845
2846
  {
  	int batchcount;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2847
  	struct kmem_cache_node *n;
801faf0db   Joonsoo Kim   mm/slab: lockless...
2848
  	struct array_cache *ac, *shared;
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
2849
  	int node;
b03a017be   Joonsoo Kim   mm/slab: introduc...
2850
  	void *list = NULL;
76b342bdc   Joonsoo Kim   mm/slab: separate...
2851
  	struct page *page;
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
2852

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2853
  	check_irq_off();
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
2854
  	node = numa_mem_id();
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2855

9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2856
  	ac = cpu_cache_get(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2857
2858
  	batchcount = ac->batchcount;
  	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2859
2860
2861
2862
  		/*
  		 * If there was little recent activity on this cache, then
  		 * perform only a partial refill.  Otherwise we could generate
  		 * refill bouncing.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2863
2864
2865
  		 */
  		batchcount = BATCHREFILL_LIMIT;
  	}
18bf85411   Christoph Lameter   slab: use get_nod...
2866
  	n = get_node(cachep, node);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2867

ce8eb6c42   Christoph Lameter   slab: Rename list...
2868
  	BUG_ON(ac->avail > 0 || !n);
801faf0db   Joonsoo Kim   mm/slab: lockless...
2869
2870
2871
  	shared = READ_ONCE(n->shared);
  	if (!n->free_objects && (!shared || !shared->avail))
  		goto direct_grow;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2872
  	spin_lock(&n->list_lock);
801faf0db   Joonsoo Kim   mm/slab: lockless...
2873
  	shared = READ_ONCE(n->shared);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2874

3ded175a4   Christoph Lameter   [PATCH] slab: add...
2875
  	/* See if we can refill from the shared array */
801faf0db   Joonsoo Kim   mm/slab: lockless...
2876
2877
  	if (shared && transfer_objects(ac, shared, batchcount)) {
  		shared->touched = 1;
3ded175a4   Christoph Lameter   [PATCH] slab: add...
2878
  		goto alloc_done;
44b57f1cc   Nick Piggin   slab: fix regress...
2879
  	}
3ded175a4   Christoph Lameter   [PATCH] slab: add...
2880

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2881
  	while (batchcount > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2882
  		/* Get slab alloc is to come from. */
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2883
  		page = get_first_slab(n, false);
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2884
2885
  		if (!page)
  			goto must_grow;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2886

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2887
  		check_spinlock_acquired(cachep);
714b8171a   Pekka Enberg   slab: ensure cach...
2888

213b46958   Joonsoo Kim   mm/slab: refill c...
2889
  		batchcount = alloc_block(cachep, ac, page, batchcount);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2890
  		fixup_slab_list(cachep, n, page, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2891
  	}
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2892
  must_grow:
ce8eb6c42   Christoph Lameter   slab: Rename list...
2893
  	n->free_objects -= ac->avail;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2894
  alloc_done:
ce8eb6c42   Christoph Lameter   slab: Rename list...
2895
  	spin_unlock(&n->list_lock);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2896
  	fixup_objfreelist_debug(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2897

801faf0db   Joonsoo Kim   mm/slab: lockless...
2898
  direct_grow:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2899
  	if (unlikely(!ac->avail)) {
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2900
2901
2902
2903
2904
2905
2906
  		/* Check if we can use obj in pfmemalloc slab */
  		if (sk_memalloc_socks()) {
  			void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
  
  			if (obj)
  				return obj;
  		}
76b342bdc   Joonsoo Kim   mm/slab: separate...
2907
  		page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2908

76b342bdc   Joonsoo Kim   mm/slab: separate...
2909
2910
2911
2912
  		/*
  		 * cache_grow_begin() can reenable interrupts,
  		 * then ac could change.
  		 */
9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2913
  		ac = cpu_cache_get(cachep);
213b46958   Joonsoo Kim   mm/slab: refill c...
2914
2915
2916
  		if (!ac->avail && page)
  			alloc_block(cachep, ac, page, batchcount);
  		cache_grow_end(cachep, page);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2917

213b46958   Joonsoo Kim   mm/slab: refill c...
2918
  		if (!ac->avail)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2919
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2920
2921
  	}
  	ac->touched = 1;
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2922

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2923
  	return ac->entry[--ac->avail];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2924
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2925
2926
  static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
  						gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2927
  {
d0164adc8   Mel Gorman   mm, page_alloc: d...
2928
  	might_sleep_if(gfpflags_allow_blocking(flags));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2929
2930
2931
  }
  
  #if DEBUG
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2932
  static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2933
  				gfp_t flags, void *objp, unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2934
  {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2935
  	if (!objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2936
  		return objp;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2937
  	if (cachep->flags & SLAB_POISON) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2938
  		check_poison_obj(cachep, objp);
40b441379   Joonsoo Kim   mm/slab: clean up...
2939
  		slab_kernel_map(cachep, objp, 1, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2940
2941
2942
  		poison_obj(cachep, objp, POISON_INUSE);
  	}
  	if (cachep->flags & SLAB_STORE_USER)
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2943
  		*dbg_userword(cachep, objp) = (void *)caller;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2944
2945
  
  	if (cachep->flags & SLAB_RED_ZONE) {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2946
2947
  		if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
  				*dbg_redzone2(cachep, objp) != RED_INACTIVE) {
756a025f0   Joe Perches   mm: coalesce spli...
2948
  			slab_error(cachep, "double free, or memory outside object was overwritten");
1170532bb   Joe Perches   mm: convert print...
2949
2950
2951
2952
  			pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx
  ",
  			       objp, *dbg_redzone1(cachep, objp),
  			       *dbg_redzone2(cachep, objp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2953
2954
2955
2956
  		}
  		*dbg_redzone1(cachep, objp) = RED_ACTIVE;
  		*dbg_redzone2(cachep, objp) = RED_ACTIVE;
  	}
037873014   Joonsoo Kim   slab: fix oops wh...
2957

3dafccf22   Manfred Spraul   [PATCH] slab: dis...
2958
  	objp += obj_offset(cachep);
4f1049345   Christoph Lameter   slab allocators: ...
2959
  	if (cachep->ctor && cachep->flags & SLAB_POISON)
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
2960
  		cachep->ctor(objp);
7ea466f22   Tetsuo Handa   slab: fix DEBUG_S...
2961
2962
  	if (ARCH_SLAB_MINALIGN &&
  	    ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
1170532bb   Joe Perches   mm: convert print...
2963
2964
  		pr_err("0x%p: not aligned to ARCH_SLAB_MINALIGN=%d
  ",
c225150b8   Hugh Dickins   slab: fix DEBUG_S...
2965
  		       objp, (int)ARCH_SLAB_MINALIGN);
a44b56d35   Kevin Hilman   [PATCH] slab debu...
2966
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2967
2968
2969
2970
2971
  	return objp;
  }
  #else
  #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
  #endif
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2972
  static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2973
  {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2974
  	void *objp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2975
  	struct array_cache *ac;
5c3823008   Alok N Kataria   [PATCH] kmalloc_n...
2976
  	check_irq_off();
8a8b6502f   Akinobu Mita   [PATCH] fault-inj...
2977

9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2978
  	ac = cpu_cache_get(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2979
  	if (likely(ac->avail)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2980
  		ac->touched = 1;
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2981
  		objp = ac->entry[--ac->avail];
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2982

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2983
2984
  		STATS_INC_ALLOCHIT(cachep);
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2985
  	}
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2986
2987
  
  	STATS_INC_ALLOCMISS(cachep);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2988
  	objp = cache_alloc_refill(cachep, flags);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2989
2990
2991
2992
2993
2994
2995
  	/*
  	 * the 'ac' may be updated by cache_alloc_refill(),
  	 * and kmemleak_erase() requires its correct value.
  	 */
  	ac = cpu_cache_get(cachep);
  
  out:
d5cff6352   Catalin Marinas   kmemleak: Add the...
2996
2997
2998
2999
3000
  	/*
  	 * To avoid a false negative, if an object that is in one of the
  	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
  	 * treat the array pointers as a reference to the object.
  	 */
f3d8b53a3   J. R. Okajima   slab, kmemleak: s...
3001
3002
  	if (objp)
  		kmemleak_erase(&ac->entry[ac->avail]);
5c3823008   Alok N Kataria   [PATCH] kmalloc_n...
3003
3004
  	return objp;
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3005
3006
  #ifdef CONFIG_NUMA
  /*
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
3007
   * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
c61afb181   Paul Jackson   [PATCH] cpuset me...
3008
3009
3010
3011
3012
3013
3014
   *
   * If we are in_interrupt, then process context, including cpusets and
   * mempolicy, may not apply and should not be used for allocation policy.
   */
  static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
  {
  	int nid_alloc, nid_here;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3015
  	if (in_interrupt() || (flags & __GFP_THISNODE))
c61afb181   Paul Jackson   [PATCH] cpuset me...
3016
  		return NULL;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3017
  	nid_alloc = nid_here = numa_mem_id();
c61afb181   Paul Jackson   [PATCH] cpuset me...
3018
  	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
6adef3ebe   Jack Steiner   cpusets: new roun...
3019
  		nid_alloc = cpuset_slab_spread_node();
c61afb181   Paul Jackson   [PATCH] cpuset me...
3020
  	else if (current->mempolicy)
2a389610a   David Rientjes   mm, mempolicy: re...
3021
  		nid_alloc = mempolicy_slab_node();
c61afb181   Paul Jackson   [PATCH] cpuset me...
3022
  	if (nid_alloc != nid_here)
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3023
  		return ____cache_alloc_node(cachep, flags, nid_alloc);
c61afb181   Paul Jackson   [PATCH] cpuset me...
3024
3025
3026
3027
  	return NULL;
  }
  
  /*
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3028
   * Fallback function if there was no memory available and no objects on a
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3029
   * certain node and fall back is permitted. First we scan all the
6a67368c3   Christoph Lameter   slab: Rename node...
3030
   * available node for available objects. If that fails then we
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3031
3032
3033
   * perform an allocation without specifying a node. This allows the page
   * allocator to do its reclaim / fallback magic. We then insert the
   * slab into the proper nodelist and then allocate from it.
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3034
   */
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3035
  static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3036
  {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3037
  	struct zonelist *zonelist;
dd1a239f6   Mel Gorman   mm: have zonelist...
3038
  	struct zoneref *z;
54a6eb5c4   Mel Gorman   mm: use two zonel...
3039
3040
  	struct zone *zone;
  	enum zone_type high_zoneidx = gfp_zone(flags);
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3041
  	void *obj = NULL;
76b342bdc   Joonsoo Kim   mm/slab: separate...
3042
  	struct page *page;
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3043
  	int nid;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3044
  	unsigned int cpuset_mems_cookie;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3045
3046
3047
  
  	if (flags & __GFP_THISNODE)
  		return NULL;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3048
  retry_cpuset:
d26914d11   Mel Gorman   mm: optimize put_...
3049
  	cpuset_mems_cookie = read_mems_allowed_begin();
2a389610a   David Rientjes   mm, mempolicy: re...
3050
  	zonelist = node_zonelist(mempolicy_slab_node(), flags);
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3051

3c517a613   Christoph Lameter   [PATCH] slab: bet...
3052
3053
3054
3055
3056
  retry:
  	/*
  	 * Look through allowed nodes for objects available
  	 * from existing per node queues.
  	 */
54a6eb5c4   Mel Gorman   mm: use two zonel...
3057
3058
  	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
  		nid = zone_to_nid(zone);
aedb0eb10   Christoph Lameter   [PATCH] Slab: Do ...
3059

061d7074e   Vladimir Davydov   slab: fix cpuset ...
3060
  		if (cpuset_zone_allowed(zone, flags) &&
18bf85411   Christoph Lameter   slab: use get_nod...
3061
3062
  			get_node(cache, nid) &&
  			get_node(cache, nid)->free_objects) {
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3063
  				obj = ____cache_alloc_node(cache,
4167e9b2c   David Rientjes   mm: remove GFP_TH...
3064
  					gfp_exact_node(flags), nid);
481c5346d   Christoph Lameter   Slab: Fix memory ...
3065
3066
3067
  				if (obj)
  					break;
  		}
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3068
  	}
cfce66047   Christoph Lameter   Slab allocators: ...
3069
  	if (!obj) {
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3070
3071
3072
3073
3074
3075
  		/*
  		 * This allocation will be performed within the constraints
  		 * of the current cpuset / memory policy requirements.
  		 * We may trigger various forms of reclaim on the allowed
  		 * set and go into memory reserves if necessary.
  		 */
76b342bdc   Joonsoo Kim   mm/slab: separate...
3076
3077
3078
3079
  		page = cache_grow_begin(cache, flags, numa_mem_id());
  		cache_grow_end(cache, page);
  		if (page) {
  			nid = page_to_nid(page);
511e3a058   Joonsoo Kim   mm/slab: make cac...
3080
3081
  			obj = ____cache_alloc_node(cache,
  				gfp_exact_node(flags), nid);
0c3aa83e0   Joonsoo Kim   slab: change retu...
3082

3c517a613   Christoph Lameter   [PATCH] slab: bet...
3083
  			/*
511e3a058   Joonsoo Kim   mm/slab: make cac...
3084
3085
  			 * Another processor may allocate the objects in
  			 * the slab since we are not holding any locks.
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3086
  			 */
511e3a058   Joonsoo Kim   mm/slab: make cac...
3087
3088
  			if (!obj)
  				goto retry;
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3089
  		}
aedb0eb10   Christoph Lameter   [PATCH] Slab: Do ...
3090
  	}
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3091

d26914d11   Mel Gorman   mm: optimize put_...
3092
  	if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3093
  		goto retry_cpuset;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3094
3095
3096
3097
  	return obj;
  }
  
  /*
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3098
   * A interface to enable slab creation on nodeid
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3099
   */
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3100
  static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3101
  				int nodeid)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3102
  {
8456a648c   Joonsoo Kim   slab: use struct ...
3103
  	struct page *page;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3104
  	struct kmem_cache_node *n;
213b46958   Joonsoo Kim   mm/slab: refill c...
3105
  	void *obj = NULL;
b03a017be   Joonsoo Kim   mm/slab: introduc...
3106
  	void *list = NULL;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3107

7c3fbbdd0   Paul Mackerras   slab: fix nodeid ...
3108
  	VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
18bf85411   Christoph Lameter   slab: use get_nod...
3109
  	n = get_node(cachep, nodeid);
ce8eb6c42   Christoph Lameter   slab: Rename list...
3110
  	BUG_ON(!n);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3111

ca3b9b917   Ravikiran G Thirumalai   [PATCH] NUMA slab...
3112
  	check_irq_off();
ce8eb6c42   Christoph Lameter   slab: Rename list...
3113
  	spin_lock(&n->list_lock);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
3114
  	page = get_first_slab(n, false);
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
3115
3116
  	if (!page)
  		goto must_grow;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3117

b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3118
  	check_spinlock_acquired_node(cachep, nodeid);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3119
3120
3121
3122
  
  	STATS_INC_NODEALLOCS(cachep);
  	STATS_INC_ACTIVE(cachep);
  	STATS_SET_HIGH(cachep);
8456a648c   Joonsoo Kim   slab: use struct ...
3123
  	BUG_ON(page->active == cachep->num);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3124

260b61dd4   Joonsoo Kim   mm/slab: remove t...
3125
  	obj = slab_get_obj(cachep, page);
ce8eb6c42   Christoph Lameter   slab: Rename list...
3126
  	n->free_objects--;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3127

b03a017be   Joonsoo Kim   mm/slab: introduc...
3128
  	fixup_slab_list(cachep, n, page, &list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3129

ce8eb6c42   Christoph Lameter   slab: Rename list...
3130
  	spin_unlock(&n->list_lock);
b03a017be   Joonsoo Kim   mm/slab: introduc...
3131
  	fixup_objfreelist_debug(cachep, &list);
213b46958   Joonsoo Kim   mm/slab: refill c...
3132
  	return obj;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3133

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3134
  must_grow:
ce8eb6c42   Christoph Lameter   slab: Rename list...
3135
  	spin_unlock(&n->list_lock);
76b342bdc   Joonsoo Kim   mm/slab: separate...
3136
  	page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
213b46958   Joonsoo Kim   mm/slab: refill c...
3137
3138
3139
3140
  	if (page) {
  		/* This slab isn't counted yet so don't update free_objects */
  		obj = slab_get_obj(cachep, page);
  	}
76b342bdc   Joonsoo Kim   mm/slab: separate...
3141
  	cache_grow_end(cachep, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3142

213b46958   Joonsoo Kim   mm/slab: refill c...
3143
  	return obj ? obj : fallback_alloc(cachep, flags);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3144
  }
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3145

8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3146
  static __always_inline void *
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3147
  slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3148
  		   unsigned long caller)
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3149
3150
3151
  {
  	unsigned long save_flags;
  	void *ptr;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3152
  	int slab_node = numa_mem_id();
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3153

dcce284a2   Benjamin Herrenschmidt   mm: Extend gfp ma...
3154
  	flags &= gfp_allowed_mask;
011eceaf0   Jesper Dangaard Brouer   slab: use slab_pr...
3155
3156
  	cachep = slab_pre_alloc_hook(cachep, flags);
  	if (unlikely(!cachep))
824ebef12   Akinobu Mita   fault injection: ...
3157
  		return NULL;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3158
3159
  	cache_alloc_debugcheck_before(cachep, flags);
  	local_irq_save(save_flags);
eacbbae38   Andrew Morton   slab: use NUMA_NO...
3160
  	if (nodeid == NUMA_NO_NODE)
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3161
  		nodeid = slab_node;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3162

18bf85411   Christoph Lameter   slab: use get_nod...
3163
  	if (unlikely(!get_node(cachep, nodeid))) {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3164
3165
3166
3167
  		/* Node not bootstrapped yet */
  		ptr = fallback_alloc(cachep, flags);
  		goto out;
  	}
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3168
  	if (nodeid == slab_node) {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
  		/*
  		 * Use the locally cached objects if possible.
  		 * However ____cache_alloc does not allow fallback
  		 * to other nodes. It may fail while we still have
  		 * objects on other nodes available.
  		 */
  		ptr = ____cache_alloc(cachep, flags);
  		if (ptr)
  			goto out;
  	}
  	/* ___cache_alloc_node can fall back to other nodes */
  	ptr = ____cache_alloc_node(cachep, flags, nodeid);
    out:
  	local_irq_restore(save_flags);
  	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3184
3185
  	if (unlikely(flags & __GFP_ZERO) && ptr)
  		memset(ptr, 0, cachep->object_size);
d07dbea46   Christoph Lameter   Slab allocators: ...
3186

d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3187
  	slab_post_alloc_hook(cachep, flags, 1, &ptr);
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3188
3189
3190
3191
3192
3193
3194
  	return ptr;
  }
  
  static __always_inline void *
  __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
  {
  	void *objp;
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
3195
  	if (current->mempolicy || cpuset_do_slab_mem_spread()) {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
  		objp = alternate_node_alloc(cache, flags);
  		if (objp)
  			goto out;
  	}
  	objp = ____cache_alloc(cache, flags);
  
  	/*
  	 * We may just have run out of memory on the local node.
  	 * ____cache_alloc_node() knows how to locate memory on other nodes
  	 */
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3206
3207
  	if (!objp)
  		objp = ____cache_alloc_node(cache, flags, numa_mem_id());
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
  
    out:
  	return objp;
  }
  #else
  
  static __always_inline void *
  __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
  {
  	return ____cache_alloc(cachep, flags);
  }
  
  #endif /* CONFIG_NUMA */
  
  static __always_inline void *
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3223
  slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3224
3225
3226
  {
  	unsigned long save_flags;
  	void *objp;
dcce284a2   Benjamin Herrenschmidt   mm: Extend gfp ma...
3227
  	flags &= gfp_allowed_mask;
011eceaf0   Jesper Dangaard Brouer   slab: use slab_pr...
3228
3229
  	cachep = slab_pre_alloc_hook(cachep, flags);
  	if (unlikely(!cachep))
824ebef12   Akinobu Mita   fault injection: ...
3230
  		return NULL;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3231
3232
3233
3234
3235
3236
  	cache_alloc_debugcheck_before(cachep, flags);
  	local_irq_save(save_flags);
  	objp = __do_cache_alloc(cachep, flags);
  	local_irq_restore(save_flags);
  	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
  	prefetchw(objp);
d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3237
3238
  	if (unlikely(flags & __GFP_ZERO) && objp)
  		memset(objp, 0, cachep->object_size);
d07dbea46   Christoph Lameter   Slab allocators: ...
3239

d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3240
  	slab_post_alloc_hook(cachep, flags, 1, &objp);
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3241
3242
  	return objp;
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3243
3244
  
  /*
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
3245
   * Caller needs to acquire correct kmem_cache_node's list_lock
97654dfa2   Joonsoo Kim   slab: defer slab_...
3246
   * @list: List of detached free slabs should be freed by caller
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3247
   */
97654dfa2   Joonsoo Kim   slab: defer slab_...
3248
3249
  static void free_block(struct kmem_cache *cachep, void **objpp,
  			int nr_objects, int node, struct list_head *list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3250
3251
  {
  	int i;
25c063fbd   Joonsoo Kim   slab: move up cod...
3252
  	struct kmem_cache_node *n = get_node(cachep, node);
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3253
3254
3255
  	struct page *page;
  
  	n->free_objects += nr_objects;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3256
3257
  
  	for (i = 0; i < nr_objects; i++) {
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
3258
  		void *objp;
8456a648c   Joonsoo Kim   slab: use struct ...
3259
  		struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3260

072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
3261
  		objp = objpp[i];
8456a648c   Joonsoo Kim   slab: use struct ...
3262
  		page = virt_to_head_page(objp);
8456a648c   Joonsoo Kim   slab: use struct ...
3263
  		list_del(&page->lru);
ff69416e6   Christoph Lameter   [PATCH] slab: fix...
3264
  		check_spinlock_acquired_node(cachep, node);
260b61dd4   Joonsoo Kim   mm/slab: remove t...
3265
  		slab_put_obj(cachep, page, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3266
  		STATS_DEC_ACTIVE(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3267
3268
  
  		/* fixup slab chains */
f728b0a5d   Greg Thelen   mm, slab: faster ...
3269
  		if (page->active == 0) {
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3270
  			list_add(&page->lru, &n->slabs_free);
f728b0a5d   Greg Thelen   mm, slab: faster ...
3271
  			n->free_slabs++;
f728b0a5d   Greg Thelen   mm, slab: faster ...
3272
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3273
3274
3275
3276
  			/* Unconditionally move a slab to the end of the
  			 * partial list on free - maximum time for the
  			 * other objects to be freed, too.
  			 */
8456a648c   Joonsoo Kim   slab: use struct ...
3277
  			list_add_tail(&page->lru, &n->slabs_partial);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3278
3279
  		}
  	}
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3280
3281
3282
3283
3284
  
  	while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
  		n->free_objects -= cachep->num;
  
  		page = list_last_entry(&n->slabs_free, struct page, lru);
de24baecd   Wei Yongjun   mm/slab: use list...
3285
  		list_move(&page->lru, list);
f728b0a5d   Greg Thelen   mm, slab: faster ...
3286
  		n->free_slabs--;
bf00bd345   David Rientjes   mm, slab: maintai...
3287
  		n->total_slabs--;
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3288
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3289
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3290
  static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3291
3292
  {
  	int batchcount;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3293
  	struct kmem_cache_node *n;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3294
  	int node = numa_mem_id();
97654dfa2   Joonsoo Kim   slab: defer slab_...
3295
  	LIST_HEAD(list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3296
3297
  
  	batchcount = ac->batchcount;
260b61dd4   Joonsoo Kim   mm/slab: remove t...
3298

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3299
  	check_irq_off();
18bf85411   Christoph Lameter   slab: use get_nod...
3300
  	n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
3301
3302
3303
  	spin_lock(&n->list_lock);
  	if (n->shared) {
  		struct array_cache *shared_array = n->shared;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3304
  		int max = shared_array->limit - shared_array->avail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3305
3306
3307
  		if (max) {
  			if (batchcount > max)
  				batchcount = max;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3308
  			memcpy(&(shared_array->entry[shared_array->avail]),
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3309
  			       ac->entry, sizeof(void *) * batchcount);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3310
3311
3312
3313
  			shared_array->avail += batchcount;
  			goto free_done;
  		}
  	}
97654dfa2   Joonsoo Kim   slab: defer slab_...
3314
  	free_block(cachep, ac->entry, batchcount, node, &list);
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3315
  free_done:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3316
3317
3318
  #if STATS
  	{
  		int i = 0;
73c0219d8   Geliang Tang   mm/slab.c: use li...
3319
  		struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3320

73c0219d8   Geliang Tang   mm/slab.c: use li...
3321
  		list_for_each_entry(page, &n->slabs_free, lru) {
8456a648c   Joonsoo Kim   slab: use struct ...
3322
  			BUG_ON(page->active);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3323
3324
  
  			i++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3325
3326
3327
3328
  		}
  		STATS_SET_FREEABLE(cachep, i);
  	}
  #endif
ce8eb6c42   Christoph Lameter   slab: Rename list...
3329
  	spin_unlock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
3330
  	slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3331
  	ac->avail -= batchcount;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3332
  	memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3333
3334
3335
  }
  
  /*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3336
3337
   * Release an obj back to its cache. If the obj has a constructed state, it must
   * be in this state _before_ it is released.  Called with disabled ints.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3338
   */
a947eb95e   Suleiman Souhlal   SLAB: Record actu...
3339
  static inline void __cache_free(struct kmem_cache *cachep, void *objp,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3340
  				unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3341
  {
55834c590   Alexander Potapenko   mm: kasan: initia...
3342
3343
3344
3345
3346
3347
  	/* Put the object into the quarantine, don't touch it for now. */
  	if (kasan_slab_free(cachep, objp))
  		return;
  
  	___cache_free(cachep, objp, caller);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3348

55834c590   Alexander Potapenko   mm: kasan: initia...
3349
3350
3351
3352
  void ___cache_free(struct kmem_cache *cachep, void *objp,
  		unsigned long caller)
  {
  	struct array_cache *ac = cpu_cache_get(cachep);
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3353

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3354
  	check_irq_off();
d5cff6352   Catalin Marinas   kmemleak: Add the...
3355
  	kmemleak_free_recursive(objp, cachep->flags);
a947eb95e   Suleiman Souhlal   SLAB: Record actu...
3356
  	objp = cache_free_debugcheck(cachep, objp, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3357

1807a1aaf   Siddha, Suresh B   slab: skip callin...
3358
3359
3360
3361
3362
3363
3364
  	/*
  	 * Skip calling cache_free_alien() when the platform is not numa.
  	 * This will avoid cache misses that happen while accessing slabp (which
  	 * is per page memory  reference) to get nodeid. Instead use a global
  	 * variable to skip the call, which is mostly likely to be present in
  	 * the cache.
  	 */
b6e68bc1b   Mel Gorman   page allocator: s...
3365
  	if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
3366
  		return;
3d8801940   Joonsoo Kim   mm/slab: move cac...
3367
  	if (ac->avail < ac->limit) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3368
  		STATS_INC_FREEHIT(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3369
3370
3371
  	} else {
  		STATS_INC_FREEMISS(cachep);
  		cache_flusharray(cachep, ac);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3372
  	}
42c8c99cd   Zhao Jin   slab, cleanup: re...
3373

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
  	if (sk_memalloc_socks()) {
  		struct page *page = virt_to_head_page(objp);
  
  		if (unlikely(PageSlabPfmemalloc(page))) {
  			cache_free_pfmemalloc(cachep, page, objp);
  			return;
  		}
  	}
  
  	ac->entry[ac->avail++] = objp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
  }
  
  /**
   * kmem_cache_alloc - Allocate an object
   * @cachep: The cache to allocate from.
   * @flags: See kmalloc().
   *
   * Allocate an object from this cache.  The flags are only relevant
   * if the cache has no available objects.
   */
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3394
  void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3395
  {
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3396
  	void *ret = slab_alloc(cachep, flags, _RET_IP_);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3397

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3398
  	kasan_slab_alloc(cachep, ret, flags);
ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3399
  	trace_kmem_cache_alloc(_RET_IP_, ret,
8c138bc00   Christoph Lameter   slab: Get rid of ...
3400
  			       cachep->object_size, cachep->size, flags);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3401
3402
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3403
3404
  }
  EXPORT_SYMBOL(kmem_cache_alloc);
7b0501dd6   Jesper Dangaard Brouer   slab: avoid runni...
3405
3406
3407
3408
3409
3410
3411
3412
3413
  static __always_inline void
  cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
  				  size_t size, void **p, unsigned long caller)
  {
  	size_t i;
  
  	for (i = 0; i < size; i++)
  		p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller);
  }
865762a81   Jesper Dangaard Brouer   slab/slub: adjust...
3414
  int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3415
  			  void **p)
484748f0b   Christoph Lameter   slab: infrastruct...
3416
  {
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
  	size_t i;
  
  	s = slab_pre_alloc_hook(s, flags);
  	if (!s)
  		return 0;
  
  	cache_alloc_debugcheck_before(s, flags);
  
  	local_irq_disable();
  	for (i = 0; i < size; i++) {
  		void *objp = __do_cache_alloc(s, flags);
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3428
3429
3430
3431
3432
  		if (unlikely(!objp))
  			goto error;
  		p[i] = objp;
  	}
  	local_irq_enable();
7b0501dd6   Jesper Dangaard Brouer   slab: avoid runni...
3433
  	cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
  	/* Clear memory outside IRQ disabled section */
  	if (unlikely(flags & __GFP_ZERO))
  		for (i = 0; i < size; i++)
  			memset(p[i], 0, s->object_size);
  
  	slab_post_alloc_hook(s, flags, size, p);
  	/* FIXME: Trace call missing. Christoph would like a bulk variant */
  	return size;
  error:
  	local_irq_enable();
7b0501dd6   Jesper Dangaard Brouer   slab: avoid runni...
3444
  	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3445
3446
3447
  	slab_post_alloc_hook(s, flags, i, p);
  	__kmem_cache_free_bulk(s, i, p);
  	return 0;
484748f0b   Christoph Lameter   slab: infrastruct...
3448
3449
  }
  EXPORT_SYMBOL(kmem_cache_alloc_bulk);
0f24f1287   Li Zefan   tracing, slab: De...
3450
  #ifdef CONFIG_TRACING
85beb5869   Steven Rostedt   tracing/slab: Mov...
3451
  void *
4052147c0   Ezequiel Garcia   mm, slab: Match S...
3452
  kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3453
  {
85beb5869   Steven Rostedt   tracing/slab: Mov...
3454
  	void *ret;
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3455
  	ret = slab_alloc(cachep, flags, _RET_IP_);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3456

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3457
  	kasan_kmalloc(cachep, ret, size, flags);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3458
  	trace_kmalloc(_RET_IP_, ret,
ff4fcd01e   Ezequiel Garcia   mm, slab: Remove ...
3459
  		      size, cachep->size, flags);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3460
  	return ret;
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3461
  }
85beb5869   Steven Rostedt   tracing/slab: Mov...
3462
  EXPORT_SYMBOL(kmem_cache_alloc_trace);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3463
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3464
  #ifdef CONFIG_NUMA
d0d04b78f   Zhouping Liu   mm, slab: moved k...
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
  /**
   * kmem_cache_alloc_node - Allocate an object on the specified node
   * @cachep: The cache to allocate from.
   * @flags: See kmalloc().
   * @nodeid: node number of the target node.
   *
   * Identical to kmem_cache_alloc but it will allocate memory on the given
   * node, which can improve the performance for cpu bound structures.
   *
   * Fallback to other node is possible if __GFP_THISNODE is not set.
   */
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3476
3477
  void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
  {
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3478
  	void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3479

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3480
  	kasan_slab_alloc(cachep, ret, flags);
ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3481
  	trace_kmem_cache_alloc_node(_RET_IP_, ret,
8c138bc00   Christoph Lameter   slab: Get rid of ...
3482
  				    cachep->object_size, cachep->size,
ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3483
  				    flags, nodeid);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3484
3485
  
  	return ret;
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3486
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3487
  EXPORT_SYMBOL(kmem_cache_alloc_node);
0f24f1287   Li Zefan   tracing, slab: De...
3488
  #ifdef CONFIG_TRACING
4052147c0   Ezequiel Garcia   mm, slab: Match S...
3489
  void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
85beb5869   Steven Rostedt   tracing/slab: Mov...
3490
  				  gfp_t flags,
4052147c0   Ezequiel Garcia   mm, slab: Match S...
3491
3492
  				  int nodeid,
  				  size_t size)
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3493
  {
85beb5869   Steven Rostedt   tracing/slab: Mov...
3494
  	void *ret;
592f41450   Ezequiel Garcia   mm/slab: Fix typo...
3495
  	ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3496
3497
  
  	kasan_kmalloc(cachep, ret, size, flags);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3498
  	trace_kmalloc_node(_RET_IP_, ret,
ff4fcd01e   Ezequiel Garcia   mm, slab: Remove ...
3499
  			   size, cachep->size,
85beb5869   Steven Rostedt   tracing/slab: Mov...
3500
3501
  			   flags, nodeid);
  	return ret;
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3502
  }
85beb5869   Steven Rostedt   tracing/slab: Mov...
3503
  EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3504
  #endif
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3505
  static __always_inline void *
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3506
  __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
97e2bde47   Manfred Spraul   [PATCH] add kmall...
3507
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3508
  	struct kmem_cache *cachep;
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3509
  	void *ret;
97e2bde47   Manfred Spraul   [PATCH] add kmall...
3510

2c59dd654   Christoph Lameter   slab: Common Kmal...
3511
  	cachep = kmalloc_slab(size, flags);
6cb8f9132   Christoph Lameter   Slab allocators: ...
3512
3513
  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
  		return cachep;
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3514
  	ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3515
  	kasan_kmalloc(cachep, ret, size, flags);
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3516
3517
  
  	return ret;
97e2bde47   Manfred Spraul   [PATCH] add kmall...
3518
  }
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3519

8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3520
3521
  void *__kmalloc_node(size_t size, gfp_t flags, int node)
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3522
  	return __do_kmalloc_node(size, flags, node, _RET_IP_);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3523
  }
dbe5e69d2   Christoph Hellwig   [PATCH] slab: opt...
3524
  EXPORT_SYMBOL(__kmalloc_node);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3525
3526
  
  void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
ce71e27c6   Eduard - Gabriel Munteanu   SLUB: Replace __b...
3527
  		int node, unsigned long caller)
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3528
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3529
  	return __do_kmalloc_node(size, flags, node, caller);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3530
3531
  }
  EXPORT_SYMBOL(__kmalloc_node_track_caller);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3532
  #endif /* CONFIG_NUMA */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3533
3534
  
  /**
800590f52   Paul Drynoff   [PATCH] slab: kma...
3535
   * __do_kmalloc - allocate memory
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3536
   * @size: how many bytes of memory are required.
800590f52   Paul Drynoff   [PATCH] slab: kma...
3537
   * @flags: the type of memory to allocate (see kmalloc).
911851e6e   Randy Dunlap   [PATCH] slab: fix...
3538
   * @caller: function caller for debug tracking of the caller
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3539
   */
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3540
  static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3541
  					  unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3542
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3543
  	struct kmem_cache *cachep;
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3544
  	void *ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3545

2c59dd654   Christoph Lameter   slab: Common Kmal...
3546
  	cachep = kmalloc_slab(size, flags);
a5c96d8a1   Linus Torvalds   Fix up non-NUMA S...
3547
3548
  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
  		return cachep;
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3549
  	ret = slab_alloc(cachep, flags, caller);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3550

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3551
  	kasan_kmalloc(cachep, ret, size, flags);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3552
  	trace_kmalloc(caller, ret,
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3553
  		      size, cachep->size, flags);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3554
3555
  
  	return ret;
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3556
  }
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3557
3558
  void *__kmalloc(size_t size, gfp_t flags)
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3559
  	return __do_kmalloc(size, flags, _RET_IP_);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3560
3561
  }
  EXPORT_SYMBOL(__kmalloc);
ce71e27c6   Eduard - Gabriel Munteanu   SLUB: Replace __b...
3562
  void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3563
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3564
  	return __do_kmalloc(size, flags, caller);
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3565
3566
  }
  EXPORT_SYMBOL(__kmalloc_track_caller);
1d2c8eea6   Christoph Hellwig   [PATCH] slab: cle...
3567

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3568
3569
3570
3571
3572
3573
3574
3575
  /**
   * kmem_cache_free - Deallocate an object
   * @cachep: The cache the allocation was from.
   * @objp: The previously allocated object.
   *
   * Free an object which was previously allocated from this
   * cache.
   */
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3576
  void kmem_cache_free(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3577
3578
  {
  	unsigned long flags;
b9ce5ef49   Glauber Costa   sl[au]b: always g...
3579
3580
3581
  	cachep = cache_from_obj(cachep, objp);
  	if (!cachep)
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3582
3583
  
  	local_irq_save(flags);
d97d476b1   Feng Tang   slab: Fix a typo ...
3584
  	debug_check_no_locks_freed(objp, cachep->object_size);
3ac7fe5a4   Thomas Gleixner   infrastructure to...
3585
  	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
8c138bc00   Christoph Lameter   slab: Get rid of ...
3586
  		debug_check_no_obj_freed(objp, cachep->object_size);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3587
  	__cache_free(cachep, objp, _RET_IP_);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3588
  	local_irq_restore(flags);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3589

ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3590
  	trace_kmem_cache_free(_RET_IP_, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3591
3592
  }
  EXPORT_SYMBOL(kmem_cache_free);
e6cdb58d1   Jesper Dangaard Brouer   slab: implement b...
3593
3594
3595
3596
3597
3598
3599
3600
  void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
  {
  	struct kmem_cache *s;
  	size_t i;
  
  	local_irq_disable();
  	for (i = 0; i < size; i++) {
  		void *objp = p[i];
ca2571955   Jesper Dangaard Brouer   mm: new API kfree...
3601
3602
3603
3604
  		if (!orig_s) /* called via kfree_bulk */
  			s = virt_to_cache(objp);
  		else
  			s = cache_from_obj(orig_s, objp);
e6cdb58d1   Jesper Dangaard Brouer   slab: implement b...
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
  
  		debug_check_no_locks_freed(objp, s->object_size);
  		if (!(s->flags & SLAB_DEBUG_OBJECTS))
  			debug_check_no_obj_freed(objp, s->object_size);
  
  		__cache_free(s, objp, _RET_IP_);
  	}
  	local_irq_enable();
  
  	/* FIXME: add tracing */
  }
  EXPORT_SYMBOL(kmem_cache_free_bulk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3617
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3618
3619
3620
   * kfree - free previously allocated memory
   * @objp: pointer returned by kmalloc.
   *
80e93effc   Pekka Enberg   [PATCH] update kf...
3621
3622
   * If @objp is NULL, no operation is performed.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3623
3624
3625
3626
3627
   * Don't free memory not originally allocated by kmalloc()
   * or you will run into trouble.
   */
  void kfree(const void *objp)
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3628
  	struct kmem_cache *c;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3629
  	unsigned long flags;
2121db74b   Pekka Enberg   kmemtrace: trace ...
3630
  	trace_kfree(_RET_IP_, objp);
6cb8f9132   Christoph Lameter   Slab allocators: ...
3631
  	if (unlikely(ZERO_OR_NULL_PTR(objp)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3632
3633
3634
  		return;
  	local_irq_save(flags);
  	kfree_debugcheck(objp);
6ed5eb221   Pekka Enberg   [PATCH] slab: ext...
3635
  	c = virt_to_cache(objp);
8c138bc00   Christoph Lameter   slab: Get rid of ...
3636
3637
3638
  	debug_check_no_locks_freed(objp, c->object_size);
  
  	debug_check_no_obj_freed(objp, c->object_size);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3639
  	__cache_free(c, (void *)objp, _RET_IP_);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3640
3641
3642
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(kfree);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3643
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
3644
   * This initializes kmem_cache_node or resizes various caches for all nodes.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3645
   */
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3646
  static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3647
  {
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3648
  	int ret;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3649
  	int node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3650
  	struct kmem_cache_node *n;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3651

9c09a95cf   Mel Gorman   slab: partially r...
3652
  	for_each_online_node(node) {
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3653
3654
  		ret = setup_kmem_cache_node(cachep, node, gfp, true);
  		if (ret)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3655
  			goto fail;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3656
  	}
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3657

cafeb02e0   Christoph Lameter   [PATCH] alloc_kme...
3658
  	return 0;
0718dc2a8   Christoph Lameter   [PATCH] slab: fix...
3659

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3660
  fail:
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3661
  	if (!cachep->list.next) {
0718dc2a8   Christoph Lameter   [PATCH] slab: fix...
3662
3663
3664
  		/* Cache is not active yet. Roll back what we did */
  		node--;
  		while (node >= 0) {
18bf85411   Christoph Lameter   slab: use get_nod...
3665
3666
  			n = get_node(cachep, node);
  			if (n) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
3667
3668
3669
  				kfree(n->shared);
  				free_alien_cache(n->alien);
  				kfree(n);
6a67368c3   Christoph Lameter   slab: Rename node...
3670
  				cachep->node[node] = NULL;
0718dc2a8   Christoph Lameter   [PATCH] slab: fix...
3671
3672
3673
3674
  			}
  			node--;
  		}
  	}
cafeb02e0   Christoph Lameter   [PATCH] alloc_kme...
3675
  	return -ENOMEM;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3676
  }
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3677
  /* Always called with the slab_mutex held */
943a451a8   Glauber Costa   slab: propagate t...
3678
  static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
83b519e8b   Pekka Enberg   slab: setup alloc...
3679
  				int batchcount, int shared, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3680
  {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3681
3682
  	struct array_cache __percpu *cpu_cache, *prev;
  	int cpu;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3683

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3684
3685
  	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
  	if (!cpu_cache)
d2e7b7d0a   Siddha, Suresh B   [PATCH] fix poten...
3686
  		return -ENOMEM;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3687
3688
  	prev = cachep->cpu_cache;
  	cachep->cpu_cache = cpu_cache;
a87c75fbc   Greg Thelen   slab: avoid IPIs ...
3689
3690
3691
3692
3693
3694
  	/*
  	 * Without a previous cpu_cache there's no need to synchronize remote
  	 * cpus, so skip the IPIs.
  	 */
  	if (prev)
  		kick_all_cpus_sync();
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3695

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3696
  	check_irq_on();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3697
3698
  	cachep->batchcount = batchcount;
  	cachep->limit = limit;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3699
  	cachep->shared = shared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3700

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3701
  	if (!prev)
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3702
  		goto setup_node;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3703
3704
  
  	for_each_online_cpu(cpu) {
97654dfa2   Joonsoo Kim   slab: defer slab_...
3705
  		LIST_HEAD(list);
18bf85411   Christoph Lameter   slab: use get_nod...
3706
3707
  		int node;
  		struct kmem_cache_node *n;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3708
  		struct array_cache *ac = per_cpu_ptr(prev, cpu);
18bf85411   Christoph Lameter   slab: use get_nod...
3709

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3710
  		node = cpu_to_mem(cpu);
18bf85411   Christoph Lameter   slab: use get_nod...
3711
3712
  		n = get_node(cachep, node);
  		spin_lock_irq(&n->list_lock);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3713
  		free_block(cachep, ac->entry, ac->avail, node, &list);
18bf85411   Christoph Lameter   slab: use get_nod...
3714
  		spin_unlock_irq(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
3715
  		slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3716
  	}
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3717
  	free_percpu(prev);
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3718
3719
  setup_node:
  	return setup_kmem_cache_nodes(cachep, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3720
  }
943a451a8   Glauber Costa   slab: propagate t...
3721
3722
3723
3724
  static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
  				int batchcount, int shared, gfp_t gfp)
  {
  	int ret;
426589f57   Vladimir Davydov   slab: link memcg ...
3725
  	struct kmem_cache *c;
943a451a8   Glauber Costa   slab: propagate t...
3726
3727
3728
3729
3730
3731
3732
3733
  
  	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
  
  	if (slab_state < FULL)
  		return ret;
  
  	if ((ret < 0) || !is_root_cache(cachep))
  		return ret;
426589f57   Vladimir Davydov   slab: link memcg ...
3734
3735
3736
3737
  	lockdep_assert_held(&slab_mutex);
  	for_each_memcg_cache(c, cachep) {
  		/* return value determined by the root cache only */
  		__do_tune_cpucache(c, limit, batchcount, shared, gfp);
943a451a8   Glauber Costa   slab: propagate t...
3738
3739
3740
3741
  	}
  
  	return ret;
  }
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3742
  /* Called with slab_mutex held always */
83b519e8b   Pekka Enberg   slab: setup alloc...
3743
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3744
3745
  {
  	int err;
943a451a8   Glauber Costa   slab: propagate t...
3746
3747
3748
  	int limit = 0;
  	int shared = 0;
  	int batchcount = 0;
7c00fce98   Thomas Garnier   mm: reorganize SL...
3749
  	err = cache_random_seq_create(cachep, cachep->num, gfp);
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
3750
3751
  	if (err)
  		goto end;
943a451a8   Glauber Costa   slab: propagate t...
3752
3753
3754
3755
3756
3757
  	if (!is_root_cache(cachep)) {
  		struct kmem_cache *root = memcg_root_cache(cachep);
  		limit = root->limit;
  		shared = root->shared;
  		batchcount = root->batchcount;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3758

943a451a8   Glauber Costa   slab: propagate t...
3759
3760
  	if (limit && shared && batchcount)
  		goto skip_setup;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3761
3762
  	/*
  	 * The head array serves three purposes:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3763
3764
  	 * - create a LIFO ordering, i.e. return objects that are cache-warm
  	 * - reduce the number of spinlock operations.
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3765
  	 * - reduce the number of linked list operations on the slab and
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3766
3767
3768
3769
  	 *   bufctl chains: array operations are cheaper.
  	 * The numbers are guessed, we should auto-tune as described by
  	 * Bonwick.
  	 */
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3770
  	if (cachep->size > 131072)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3771
  		limit = 1;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3772
  	else if (cachep->size > PAGE_SIZE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3773
  		limit = 8;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3774
  	else if (cachep->size > 1024)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3775
  		limit = 24;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3776
  	else if (cachep->size > 256)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3777
3778
3779
  		limit = 54;
  	else
  		limit = 120;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3780
3781
  	/*
  	 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3782
3783
3784
3785
3786
3787
3788
3789
  	 * allocation behaviour: Most allocs on one cpu, most free operations
  	 * on another cpu. For these cases, an efficient object passing between
  	 * cpus is necessary. This is provided by a shared array. The array
  	 * replaces Bonwick's magazine layer.
  	 * On uniprocessor, it's functionally equivalent (but less efficient)
  	 * to a larger limit. Thus disabled by default.
  	 */
  	shared = 0;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3790
  	if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3791
  		shared = 8;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3792
3793
  
  #if DEBUG
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3794
3795
3796
  	/*
  	 * With debugging enabled, large batchcount lead to excessively long
  	 * periods with disabled local interrupts. Limit the batchcount
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3797
3798
3799
3800
  	 */
  	if (limit > 32)
  		limit = 32;
  #endif
943a451a8   Glauber Costa   slab: propagate t...
3801
3802
3803
  	batchcount = (limit + 1) / 2;
  skip_setup:
  	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
3804
  end:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3805
  	if (err)
1170532bb   Joe Perches   mm: convert print...
3806
3807
  		pr_err("enable_cpucache failed for %s, error %d
  ",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3808
  		       cachep->name, -err);
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
3809
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3810
  }
1b55253a7   Christoph Lameter   [PATCH] slab: rem...
3811
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
3812
3813
   * Drain an array if it contains any elements taking the node lock only if
   * necessary. Note that the node listlock also protects the array_cache
b18e7e654   Christoph Lameter   [PATCH] slab: fix...
3814
   * if drain_array() is used on the shared array.
1b55253a7   Christoph Lameter   [PATCH] slab: rem...
3815
   */
ce8eb6c42   Christoph Lameter   slab: Rename list...
3816
  static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3817
  			 struct array_cache *ac, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3818
  {
97654dfa2   Joonsoo Kim   slab: defer slab_...
3819
  	LIST_HEAD(list);
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3820
3821
3822
  
  	/* ac from n->shared can be freed if we don't hold the slab_mutex. */
  	check_mutex_acquired();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3823

1b55253a7   Christoph Lameter   [PATCH] slab: rem...
3824
3825
  	if (!ac || !ac->avail)
  		return;
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3826
3827
  
  	if (ac->touched) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3828
  		ac->touched = 0;
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3829
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3830
  	}
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3831
3832
3833
3834
3835
3836
  
  	spin_lock_irq(&n->list_lock);
  	drain_array_locked(cachep, ac, node, false, &list);
  	spin_unlock_irq(&n->list_lock);
  
  	slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3837
3838
3839
3840
  }
  
  /**
   * cache_reap - Reclaim memory from caches.
05fb6bf0b   Randy Dunlap   [PATCH] kernel-do...
3841
   * @w: work descriptor
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3842
3843
3844
3845
3846
3847
   *
   * Called from workqueue/eventd every few seconds.
   * Purpose:
   * - clear the per-cpu caches for this CPU.
   * - return freeable pages to the main free memory pool.
   *
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3848
3849
   * If we cannot acquire the cache chain mutex then just give up - we'll try
   * again on the next iteration.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3850
   */
7c5cae368   Christoph Lameter   [PATCH] slab: use...
3851
  static void cache_reap(struct work_struct *w)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3852
  {
7a7c381d2   Christoph Hellwig   [PATCH] slab: sto...
3853
  	struct kmem_cache *searchp;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3854
  	struct kmem_cache_node *n;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3855
  	int node = numa_mem_id();
bf6aede71   Jean Delvare   workqueue: add to...
3856
  	struct delayed_work *work = to_delayed_work(w);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3857

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3858
  	if (!mutex_trylock(&slab_mutex))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3859
  		/* Give up. Setup the next iteration. */
7c5cae368   Christoph Lameter   [PATCH] slab: use...
3860
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3861

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3862
  	list_for_each_entry(searchp, &slab_caches, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3863
  		check_irq_on();
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3864
  		/*
ce8eb6c42   Christoph Lameter   slab: Rename list...
3865
  		 * We only take the node lock if absolutely necessary and we
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3866
3867
3868
  		 * have established with reasonable certainty that
  		 * we can do some work if the lock was obtained.
  		 */
18bf85411   Christoph Lameter   slab: use get_nod...
3869
  		n = get_node(searchp, node);
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3870

ce8eb6c42   Christoph Lameter   slab: Rename list...
3871
  		reap_alien(searchp, n);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3872

18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3873
  		drain_array(searchp, n, cpu_cache_get(searchp), node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3874

35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3875
3876
3877
3878
  		/*
  		 * These are racy checks but it does not matter
  		 * if we skip one check or scan twice.
  		 */
ce8eb6c42   Christoph Lameter   slab: Rename list...
3879
  		if (time_after(n->next_reap, jiffies))
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3880
  			goto next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3881

5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
3882
  		n->next_reap = jiffies + REAPTIMEOUT_NODE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3883

18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3884
  		drain_array(searchp, n, n->shared, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3885

ce8eb6c42   Christoph Lameter   slab: Rename list...
3886
3887
  		if (n->free_touched)
  			n->free_touched = 0;
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
3888
3889
  		else {
  			int freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3890

ce8eb6c42   Christoph Lameter   slab: Rename list...
3891
  			freed = drain_freelist(searchp, n, (n->free_limit +
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
3892
3893
3894
  				5 * searchp->num - 1) / (5 * searchp->num));
  			STATS_ADD_REAPED(searchp, freed);
  		}
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3895
  next:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3896
3897
3898
  		cond_resched();
  	}
  	check_irq_on();
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3899
  	mutex_unlock(&slab_mutex);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
3900
  	next_reap_node();
7c5cae368   Christoph Lameter   [PATCH] slab: use...
3901
  out:
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3902
  	/* Set up the next iteration */
20eaa393f   Vlastimil Babka   mm, slab: resched...
3903
3904
  	schedule_delayed_work_on(smp_processor_id(), work,
  				round_jiffies_relative(REAPTIMEOUT_AC));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3905
  }
158a96242   Linus Torvalds   Unify /proc/slabi...
3906
  #ifdef CONFIG_SLABINFO
0d7561c61   Glauber Costa   sl[au]b: Process ...
3907
  void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3908
  {
f728b0a5d   Greg Thelen   mm, slab: faster ...
3909
  	unsigned long active_objs, num_objs, active_slabs;
bf00bd345   David Rientjes   mm, slab: maintai...
3910
3911
  	unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0;
  	unsigned long free_slabs = 0;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3912
  	int node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3913
  	struct kmem_cache_node *n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3914

18bf85411   Christoph Lameter   slab: use get_nod...
3915
  	for_each_kmem_cache_node(cachep, node, n) {
ca3b9b917   Ravikiran G Thirumalai   [PATCH] NUMA slab...
3916
  		check_irq_on();
ce8eb6c42   Christoph Lameter   slab: Rename list...
3917
  		spin_lock_irq(&n->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3918

bf00bd345   David Rientjes   mm, slab: maintai...
3919
3920
  		total_slabs += n->total_slabs;
  		free_slabs += n->free_slabs;
f728b0a5d   Greg Thelen   mm, slab: faster ...
3921
  		free_objs += n->free_objects;
07a63c41f   Aruna Ramakrishna   mm/slab: improve ...
3922

ce8eb6c42   Christoph Lameter   slab: Rename list...
3923
3924
  		if (n->shared)
  			shared_avail += n->shared->avail;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3925

ce8eb6c42   Christoph Lameter   slab: Rename list...
3926
  		spin_unlock_irq(&n->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3927
  	}
bf00bd345   David Rientjes   mm, slab: maintai...
3928
3929
  	num_objs = total_slabs * cachep->num;
  	active_slabs = total_slabs - free_slabs;
f728b0a5d   Greg Thelen   mm, slab: faster ...
3930
  	active_objs = num_objs - free_objs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3931

0d7561c61   Glauber Costa   sl[au]b: Process ...
3932
3933
3934
  	sinfo->active_objs = active_objs;
  	sinfo->num_objs = num_objs;
  	sinfo->active_slabs = active_slabs;
bf00bd345   David Rientjes   mm, slab: maintai...
3935
  	sinfo->num_slabs = total_slabs;
0d7561c61   Glauber Costa   sl[au]b: Process ...
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
  	sinfo->shared_avail = shared_avail;
  	sinfo->limit = cachep->limit;
  	sinfo->batchcount = cachep->batchcount;
  	sinfo->shared = cachep->shared;
  	sinfo->objects_per_slab = cachep->num;
  	sinfo->cache_order = cachep->gfporder;
  }
  
  void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3946
  #if STATS
ce8eb6c42   Christoph Lameter   slab: Rename list...
3947
  	{			/* node stats */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3948
3949
3950
3951
3952
3953
  		unsigned long high = cachep->high_mark;
  		unsigned long allocs = cachep->num_allocations;
  		unsigned long grown = cachep->grown;
  		unsigned long reaped = cachep->reaped;
  		unsigned long errors = cachep->errors;
  		unsigned long max_freeable = cachep->max_freeable;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3954
  		unsigned long node_allocs = cachep->node_allocs;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3955
  		unsigned long node_frees = cachep->node_frees;
fb7faf331   Ravikiran G Thirumalai   [PATCH] slab: add...
3956
  		unsigned long overflows = cachep->node_overflow;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3957

756a025f0   Joe Perches   mm: coalesce spli...
3958
  		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu",
e92dd4fd1   Joe Perches   slab: Fix continu...
3959
3960
3961
  			   allocs, high, grown,
  			   reaped, errors, max_freeable, node_allocs,
  			   node_frees, overflows);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3962
3963
3964
3965
3966
3967
3968
3969
3970
  	}
  	/* cpu stats */
  	{
  		unsigned long allochit = atomic_read(&cachep->allochit);
  		unsigned long allocmiss = atomic_read(&cachep->allocmiss);
  		unsigned long freehit = atomic_read(&cachep->freehit);
  		unsigned long freemiss = atomic_read(&cachep->freemiss);
  
  		seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3971
  			   allochit, allocmiss, freehit, freemiss);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3972
3973
  	}
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3974
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3975
3976
3977
3978
3979
3980
3981
3982
  #define MAX_SLABINFO_WRITE 128
  /**
   * slabinfo_write - Tuning for the slab allocator
   * @file: unused
   * @buffer: user buffer
   * @count: data length
   * @ppos: unused
   */
b7454ad3c   Glauber Costa   mm/sl[au]b: Move ...
3983
  ssize_t slabinfo_write(struct file *file, const char __user *buffer,
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3984
  		       size_t count, loff_t *ppos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3985
  {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3986
  	char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3987
  	int limit, batchcount, shared, res;
7a7c381d2   Christoph Hellwig   [PATCH] slab: sto...
3988
  	struct kmem_cache *cachep;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3989

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3990
3991
3992
3993
  	if (count > MAX_SLABINFO_WRITE)
  		return -EINVAL;
  	if (copy_from_user(&kbuf, buffer, count))
  		return -EFAULT;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3994
  	kbuf[MAX_SLABINFO_WRITE] = '\0';
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
  
  	tmp = strchr(kbuf, ' ');
  	if (!tmp)
  		return -EINVAL;
  	*tmp = '\0';
  	tmp++;
  	if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
  		return -EINVAL;
  
  	/* Find the cache in the chain of caches. */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4005
  	mutex_lock(&slab_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4006
  	res = -EINVAL;
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4007
  	list_for_each_entry(cachep, &slab_caches, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4008
  		if (!strcmp(cachep->name, kbuf)) {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
4009
4010
  			if (limit < 1 || batchcount < 1 ||
  					batchcount > limit || shared < 0) {
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
4011
  				res = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4012
  			} else {
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
4013
  				res = do_tune_cpucache(cachep, limit,
83b519e8b   Pekka Enberg   slab: setup alloc...
4014
4015
  						       batchcount, shared,
  						       GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4016
4017
4018
4019
  			}
  			break;
  		}
  	}
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4020
  	mutex_unlock(&slab_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4021
4022
4023
4024
  	if (res >= 0)
  		res = count;
  	return res;
  }
871751e25   Al Viro   [PATCH] slab: imp...
4025
4026
  
  #ifdef CONFIG_DEBUG_SLAB_LEAK
871751e25   Al Viro   [PATCH] slab: imp...
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
  static inline int add_caller(unsigned long *n, unsigned long v)
  {
  	unsigned long *p;
  	int l;
  	if (!v)
  		return 1;
  	l = n[1];
  	p = n + 2;
  	while (l) {
  		int i = l/2;
  		unsigned long *q = p + 2 * i;
  		if (*q == v) {
  			q[1]++;
  			return 1;
  		}
  		if (*q > v) {
  			l = i;
  		} else {
  			p = q + 2;
  			l -= i + 1;
  		}
  	}
  	if (++n[1] == n[0])
  		return 0;
  	memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
  	p[0] = v;
  	p[1] = 1;
  	return 1;
  }
8456a648c   Joonsoo Kim   slab: use struct ...
4056
4057
  static void handle_slab(unsigned long *n, struct kmem_cache *c,
  						struct page *page)
871751e25   Al Viro   [PATCH] slab: imp...
4058
4059
  {
  	void *p;
d31676dfd   Joonsoo Kim   mm/slab: alternat...
4060
4061
  	int i, j;
  	unsigned long v;
b1cb0982b   Joonsoo Kim   slab: change the ...
4062

871751e25   Al Viro   [PATCH] slab: imp...
4063
4064
  	if (n[0] == n[1])
  		return;
8456a648c   Joonsoo Kim   slab: use struct ...
4065
  	for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
d31676dfd   Joonsoo Kim   mm/slab: alternat...
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
  		bool active = true;
  
  		for (j = page->active; j < c->num; j++) {
  			if (get_free_obj(page, j) == i) {
  				active = false;
  				break;
  			}
  		}
  
  		if (!active)
871751e25   Al Viro   [PATCH] slab: imp...
4076
  			continue;
b1cb0982b   Joonsoo Kim   slab: change the ...
4077

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
  		/*
  		 * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table
  		 * mapping is established when actual object allocation and
  		 * we could mistakenly access the unmapped object in the cpu
  		 * cache.
  		 */
  		if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v)))
  			continue;
  
  		if (!add_caller(n, v))
871751e25   Al Viro   [PATCH] slab: imp...
4088
4089
4090
4091
4092
4093
4094
  			return;
  	}
  }
  
  static void show_symbol(struct seq_file *m, unsigned long address)
  {
  #ifdef CONFIG_KALLSYMS
871751e25   Al Viro   [PATCH] slab: imp...
4095
  	unsigned long offset, size;
9281acea6   Tejun Heo   kallsyms: make KS...
4096
  	char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
871751e25   Al Viro   [PATCH] slab: imp...
4097

a5c43dae7   Alexey Dobriyan   Fix race between ...
4098
  	if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
871751e25   Al Viro   [PATCH] slab: imp...
4099
  		seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
a5c43dae7   Alexey Dobriyan   Fix race between ...
4100
  		if (modname[0])
871751e25   Al Viro   [PATCH] slab: imp...
4101
4102
4103
4104
4105
4106
4107
4108
4109
  			seq_printf(m, " [%s]", modname);
  		return;
  	}
  #endif
  	seq_printf(m, "%p", (void *)address);
  }
  
  static int leaks_show(struct seq_file *m, void *p)
  {
0672aa7c2   Thierry Reding   mm, slab: Build f...
4110
  	struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
8456a648c   Joonsoo Kim   slab: use struct ...
4111
  	struct page *page;
ce8eb6c42   Christoph Lameter   slab: Rename list...
4112
  	struct kmem_cache_node *n;
871751e25   Al Viro   [PATCH] slab: imp...
4113
  	const char *name;
db8450673   Christoph Lameter   slab: Fixup CONFI...
4114
  	unsigned long *x = m->private;
871751e25   Al Viro   [PATCH] slab: imp...
4115
4116
4117
4118
4119
4120
4121
  	int node;
  	int i;
  
  	if (!(cachep->flags & SLAB_STORE_USER))
  		return 0;
  	if (!(cachep->flags & SLAB_RED_ZONE))
  		return 0;
d31676dfd   Joonsoo Kim   mm/slab: alternat...
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
  	/*
  	 * Set store_user_clean and start to grab stored user information
  	 * for all objects on this cache. If some alloc/free requests comes
  	 * during the processing, information would be wrong so restart
  	 * whole processing.
  	 */
  	do {
  		set_store_user_clean(cachep);
  		drain_cpu_caches(cachep);
  
  		x[1] = 0;
871751e25   Al Viro   [PATCH] slab: imp...
4133

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4134
  		for_each_kmem_cache_node(cachep, node, n) {
871751e25   Al Viro   [PATCH] slab: imp...
4135

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4136
4137
  			check_irq_on();
  			spin_lock_irq(&n->list_lock);
871751e25   Al Viro   [PATCH] slab: imp...
4138

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4139
4140
4141
4142
4143
4144
4145
  			list_for_each_entry(page, &n->slabs_full, lru)
  				handle_slab(x, cachep, page);
  			list_for_each_entry(page, &n->slabs_partial, lru)
  				handle_slab(x, cachep, page);
  			spin_unlock_irq(&n->list_lock);
  		}
  	} while (!is_store_user_clean(cachep));
871751e25   Al Viro   [PATCH] slab: imp...
4146

871751e25   Al Viro   [PATCH] slab: imp...
4147
  	name = cachep->name;
db8450673   Christoph Lameter   slab: Fixup CONFI...
4148
  	if (x[0] == x[1]) {
871751e25   Al Viro   [PATCH] slab: imp...
4149
  		/* Increase the buffer size */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4150
  		mutex_unlock(&slab_mutex);
db8450673   Christoph Lameter   slab: Fixup CONFI...
4151
  		m->private = kzalloc(x[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
871751e25   Al Viro   [PATCH] slab: imp...
4152
4153
  		if (!m->private) {
  			/* Too bad, we are really out */
db8450673   Christoph Lameter   slab: Fixup CONFI...
4154
  			m->private = x;
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4155
  			mutex_lock(&slab_mutex);
871751e25   Al Viro   [PATCH] slab: imp...
4156
4157
  			return -ENOMEM;
  		}
db8450673   Christoph Lameter   slab: Fixup CONFI...
4158
4159
  		*(unsigned long *)m->private = x[0] * 2;
  		kfree(x);
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4160
  		mutex_lock(&slab_mutex);
871751e25   Al Viro   [PATCH] slab: imp...
4161
4162
4163
4164
  		/* Now make sure this entry will be retried */
  		m->count = m->size;
  		return 0;
  	}
db8450673   Christoph Lameter   slab: Fixup CONFI...
4165
4166
4167
  	for (i = 0; i < x[1]; i++) {
  		seq_printf(m, "%s: %lu ", name, x[2*i+3]);
  		show_symbol(m, x[2*i+2]);
871751e25   Al Viro   [PATCH] slab: imp...
4168
4169
4170
  		seq_putc(m, '
  ');
  	}
d2e7b7d0a   Siddha, Suresh B   [PATCH] fix poten...
4171

871751e25   Al Viro   [PATCH] slab: imp...
4172
4173
  	return 0;
  }
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4174
  static const struct seq_operations slabstats_op = {
1df3b26f2   Vladimir Davydov   slab: print slabi...
4175
  	.start = slab_start,
276a2439c   Wanpeng Li   mm/slab: Give s_n...
4176
4177
  	.next = slab_next,
  	.stop = slab_stop,
871751e25   Al Viro   [PATCH] slab: imp...
4178
4179
  	.show = leaks_show,
  };
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4180
4181
4182
  
  static int slabstats_open(struct inode *inode, struct file *file)
  {
b208ce329   Rob Jones   mm/slab.c: use __...
4183
4184
4185
4186
4187
4188
4189
4190
4191
  	unsigned long *n;
  
  	n = __seq_open_private(file, &slabstats_op, PAGE_SIZE);
  	if (!n)
  		return -ENOMEM;
  
  	*n = PAGE_SIZE / (2 * sizeof(unsigned long));
  
  	return 0;
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
  }
  
  static const struct file_operations proc_slabstats_operations = {
  	.open		= slabstats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= seq_release_private,
  };
  #endif
  
  static int __init slab_proc_init(void)
  {
  #ifdef CONFIG_DEBUG_SLAB_LEAK
  	proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
871751e25   Al Viro   [PATCH] slab: imp...
4206
  #endif
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4207
4208
4209
  	return 0;
  }
  module_init(slab_proc_init);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4210
  #endif
04385fc5e   Kees Cook   mm: SLAB hardened...
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
  #ifdef CONFIG_HARDENED_USERCOPY
  /*
   * Rejects objects that are incorrectly sized.
   *
   * Returns NULL if check passes, otherwise const char * to name of cache
   * to indicate an error.
   */
  const char *__check_heap_object(const void *ptr, unsigned long n,
  				struct page *page)
  {
  	struct kmem_cache *cachep;
  	unsigned int objnr;
  	unsigned long offset;
  
  	/* Find and validate object. */
  	cachep = page->slab_cache;
  	objnr = obj_to_index(cachep, page, (void *)ptr);
  	BUG_ON(objnr >= cachep->num);
  
  	/* Find offset within object. */
  	offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
  
  	/* Allow address range falling entirely within object size. */
  	if (offset <= cachep->object_size && n <= cachep->object_size - offset)
  		return NULL;
  
  	return cachep->name;
  }
  #endif /* CONFIG_HARDENED_USERCOPY */
00e145b6d   Manfred Spraul   [PATCH] slab: rem...
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
  /**
   * ksize - get the actual amount of memory allocated for a given object
   * @objp: Pointer to the object
   *
   * kmalloc may internally round up allocations and return more memory
   * than requested. ksize() can be used to determine the actual amount of
   * memory allocated. The caller may use this additional memory, even though
   * a smaller amount of memory was initially specified with the kmalloc call.
   * The caller must guarantee that objp points to a valid object previously
   * allocated with either kmalloc() or kmem_cache_alloc(). The object
   * must not be freed during the duration of the call.
   */
fd76bab2f   Pekka Enberg   slab: introduce k...
4252
  size_t ksize(const void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4253
  {
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
4254
  	size_t size;
ef8b4520b   Christoph Lameter   Slab allocators: ...
4255
4256
  	BUG_ON(!objp);
  	if (unlikely(objp == ZERO_SIZE_PTR))
00e145b6d   Manfred Spraul   [PATCH] slab: rem...
4257
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4258

7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
4259
4260
4261
4262
  	size = virt_to_cache(objp)->object_size;
  	/* We assume that ksize callers could use the whole allocated area,
  	 * so we need to unpoison this area.
  	 */
4ebb31a42   Alexander Potapenko   mm, kasan: don't ...
4263
  	kasan_unpoison_shadow(objp, size);
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
4264
4265
  
  	return size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4266
  }
b1aabecd5   Kirill A. Shutemov   mm: Export symbol...
4267
  EXPORT_SYMBOL(ksize);