Blame view

mm/slab.c 111 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
  /*
   * linux/mm/slab.c
   * Written by Mark Hemment, 1996/97.
   * (markhe@nextd.demon.co.uk)
   *
   * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
   *
   * Major cleanup, different bufctl logic, per-cpu arrays
   *	(c) 2000 Manfred Spraul
   *
   * Cleanup, make the head arrays unconditional, preparation for NUMA
   * 	(c) 2002 Manfred Spraul
   *
   * An implementation of the Slab Allocator as described in outline in;
   *	UNIX Internals: The New Frontiers by Uresh Vahalia
   *	Pub: Prentice Hall	ISBN 0-13-101908-2
   * or with a little more detail in;
   *	The Slab Allocator: An Object-Caching Kernel Memory Allocator
   *	Jeff Bonwick (Sun Microsystems).
   *	Presented at: USENIX Summer 1994 Technical Conference
   *
   * The memory is organized in caches, one cache for each object type.
   * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
   * Each cache consists out of many slabs (they are small (usually one
   * page long) and always contiguous), and each slab contains multiple
   * initialized objects.
   *
   * This means, that your constructor is used only for newly allocated
183ff22bb   Simon Arlott   spelling fixes: mm/
30
   * slabs and you must pass objects with the same initializations to
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
   * kmem_cache_free.
   *
   * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
   * normal). If you need a special memory type, then must create a new
   * cache for that memory type.
   *
   * In order to reduce fragmentation, the slabs are sorted in 3 groups:
   *   full slabs with 0 free objects
   *   partial slabs
   *   empty slabs with no allocated objects
   *
   * If partial slabs exist, then new allocations come from these slabs,
   * otherwise from empty slabs or new slabs are allocated.
   *
   * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
   * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
   *
   * Each cache has a short per-cpu head array, most allocs
   * and frees go into that array, and if that array overflows, then 1/2
   * of the entries in the array are given back into the global cache.
   * The head array is strictly LIFO and should improve the cache hit rates.
   * On SMP, it additionally reduces the spinlock operations.
   *
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
54
   * The c_cpuarray may not be read with enabled local interrupts -
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
56
57
58
   * it's changed with a smp_call_function().
   *
   * SMP synchronization:
   *  constructors and destructors are called without any locking.
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
59
   *  Several members in struct kmem_cache and struct slab never change, they
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
61
62
63
64
65
66
67
68
69
70
71
   *	are accessed without any locking.
   *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
   *  	and local interrupts are disabled so slab code is preempt-safe.
   *  The non-constant members are protected with a per-cache irq spinlock.
   *
   * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
   * in 2000 - many ideas in the current implementation are derived from
   * his patch.
   *
   * Further notes from the original documentation:
   *
   * 11 April '97.  Started multi-threading - markhe
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
72
   *	The global cache-chain is protected by the mutex 'slab_mutex'.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
74
75
76
77
78
   *	The sem is only needed when accessing/extending the cache-chain, which
   *	can never happen inside an interrupt (kmem_cache_create(),
   *	kmem_cache_shrink() and kmem_cache_reap()).
   *
   *	At present, each engine can be growing a cache.  This should be blocked.
   *
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
79
80
81
82
83
84
85
86
87
   * 15 March 2005. NUMA slab allocator.
   *	Shai Fultheim <shai@scalex86.org>.
   *	Shobhit Dayal <shobhit@calsoftinc.com>
   *	Alok N Kataria <alokk@calsoftinc.com>
   *	Christoph Lameter <christoph@lameter.com>
   *
   *	Modified the slab allocator to be node aware on NUMA systems.
   *	Each node has its own list of partial, free and full slabs.
   *	All object allocations for a node occur from node specific slab lists.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
90
  #include	<linux/slab.h>
  #include	<linux/mm.h>
c9cf55285   Randy Dunlap   [PATCH] add poiso...
91
  #include	<linux/poison.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
93
94
95
96
  #include	<linux/swap.h>
  #include	<linux/cache.h>
  #include	<linux/interrupt.h>
  #include	<linux/init.h>
  #include	<linux/compiler.h>
101a50019   Paul Jackson   [PATCH] cpuset me...
97
  #include	<linux/cpuset.h>
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
98
  #include	<linux/proc_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
100
101
102
103
104
105
  #include	<linux/seq_file.h>
  #include	<linux/notifier.h>
  #include	<linux/kallsyms.h>
  #include	<linux/cpu.h>
  #include	<linux/sysctl.h>
  #include	<linux/module.h>
  #include	<linux/rcupdate.h>
543537bd9   Paulo Marques   [PATCH] create a ...
106
  #include	<linux/string.h>
138ae6631   Andrew Morton   [PATCH] slab: use...
107
  #include	<linux/uaccess.h>
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
108
  #include	<linux/nodemask.h>
d5cff6352   Catalin Marinas   kmemleak: Add the...
109
  #include	<linux/kmemleak.h>
dc85da15d   Christoph Lameter   [PATCH] NUMA poli...
110
  #include	<linux/mempolicy.h>
fc0abb145   Ingo Molnar   [PATCH] sem2mutex...
111
  #include	<linux/mutex.h>
8a8b6502f   Akinobu Mita   [PATCH] fault-inj...
112
  #include	<linux/fault-inject.h>
e7eebaf6a   Ingo Molnar   [PATCH] pi-futex:...
113
  #include	<linux/rtmutex.h>
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
114
  #include	<linux/reciprocal_div.h>
3ac7fe5a4   Thomas Gleixner   infrastructure to...
115
  #include	<linux/debugobjects.h>
8f9f8d9e8   David Rientjes   slab: add memory ...
116
  #include	<linux/memory.h>
268bb0ce3   Linus Torvalds   sanitize <linux/p...
117
  #include	<linux/prefetch.h>
3f8c24529   Ingo Molnar   sched/headers: Pr...
118
  #include	<linux/sched/task_stack.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119

381760ead   Mel Gorman   mm: micro-optimis...
120
  #include	<net/sock.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
122
123
  #include	<asm/cacheflush.h>
  #include	<asm/tlbflush.h>
  #include	<asm/page.h>
4dee6b64e   Steven Rostedt   tracing/mm: Move ...
124
  #include <trace/events/kmem.h>
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
125
  #include	"internal.h"
b9ce5ef49   Glauber Costa   sl[au]b: always g...
126
  #include	"slab.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
  /*
50953fe9e   Christoph Lameter   slab allocators: ...
128
   * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
   *		  0 for faster, smaller code (especially in the critical paths).
   *
   * STATS	- 1 to collect stats for /proc/slabinfo.
   *		  0 for faster, smaller code (especially in the critical paths).
   *
   * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
   */
  
  #ifdef CONFIG_DEBUG_SLAB
  #define	DEBUG		1
  #define	STATS		1
  #define	FORCED_DEBUG	1
  #else
  #define	DEBUG		0
  #define	STATS		0
  #define	FORCED_DEBUG	0
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
147
  /* Shouldn't this be in a header file somewhere? */
  #define	BYTES_PER_WORD		sizeof(void *)
87a927c71   David Woodhouse   Fix slab redzone ...
148
  #define	REDZONE_ALIGN		max(BYTES_PER_WORD, __alignof__(unsigned long long))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
151
152
  #ifndef ARCH_KMALLOC_FLAGS
  #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
  #endif
f315e3fa1   Joonsoo Kim   slab: restrict th...
153
154
155
156
157
158
159
160
  #define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
  				<= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
  
  #if FREELIST_BYTE_INDEX
  typedef unsigned char freelist_idx_t;
  #else
  typedef unsigned short freelist_idx_t;
  #endif
30321c7b6   David Miller   slab: Fix off by ...
161
  #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
f315e3fa1   Joonsoo Kim   slab: restrict th...
162

072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
163
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
165
   * struct array_cache
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
167
168
169
170
171
172
173
174
175
176
177
178
179
   * Purpose:
   * - LIFO ordering, to hand out cache-warm objects from _alloc
   * - reduce the number of linked list operations
   * - reduce spinlock operations
   *
   * The limit is stored in the per-cpu structure to reduce the data cache
   * footprint.
   *
   */
  struct array_cache {
  	unsigned int avail;
  	unsigned int limit;
  	unsigned int batchcount;
  	unsigned int touched;
bda5b655f   Robert P. J. Day   Delete gcc-2.95 c...
180
  	void *entry[];	/*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
181
182
183
  			 * Must have this definition in here for the proper
  			 * alignment of array_cache. Also simplifies accessing
  			 * the entries.
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
184
  			 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
  };
c8522a3a5   Joonsoo Kim   slab: introduce a...
186
187
188
189
  struct alien_cache {
  	spinlock_t lock;
  	struct array_cache ac;
  };
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
190
  /*
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
191
192
   * Need this for bootstrapping a per node allocator.
   */
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
193
  #define NUM_INIT_LISTS (2 * MAX_NUMNODES)
ce8eb6c42   Christoph Lameter   slab: Rename list...
194
  static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
195
  #define	CACHE_CACHE 0
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
196
  #define	SIZE_NODE (MAX_NUMNODES)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
197

ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
198
  static int drain_freelist(struct kmem_cache *cache,
ce8eb6c42   Christoph Lameter   slab: Rename list...
199
  			struct kmem_cache_node *n, int tofree);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
200
  static void free_block(struct kmem_cache *cachep, void **objpp, int len,
97654dfa2   Joonsoo Kim   slab: defer slab_...
201
202
  			int node, struct list_head *list);
  static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
83b519e8b   Pekka Enberg   slab: setup alloc...
203
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
65f27f384   David Howells   WorkStruct: Pass ...
204
  static void cache_reap(struct work_struct *unused);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
205

76b342bdc   Joonsoo Kim   mm/slab: separate...
206
207
208
209
210
  static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
  						void **list);
  static inline void fixup_slab_list(struct kmem_cache *cachep,
  				struct kmem_cache_node *n, struct page *page,
  				void **list);
e0a427267   Ingo Molnar   [PATCH] mm/slab.c...
211
  static int slab_early_init = 1;
ce8eb6c42   Christoph Lameter   slab: Rename list...
212
  #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213

ce8eb6c42   Christoph Lameter   slab: Rename list...
214
  static void kmem_cache_node_init(struct kmem_cache_node *parent)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
215
216
217
218
  {
  	INIT_LIST_HEAD(&parent->slabs_full);
  	INIT_LIST_HEAD(&parent->slabs_partial);
  	INIT_LIST_HEAD(&parent->slabs_free);
bf00bd345   David Rientjes   mm, slab: maintai...
219
  	parent->total_slabs = 0;
f728b0a5d   Greg Thelen   mm, slab: faster ...
220
  	parent->free_slabs = 0;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
221
222
  	parent->shared = NULL;
  	parent->alien = NULL;
2e1217cf9   Ravikiran G Thirumalai   [PATCH] NUMA slab...
223
  	parent->colour_next = 0;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
224
225
226
227
  	spin_lock_init(&parent->list_lock);
  	parent->free_objects = 0;
  	parent->free_touched = 0;
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
228
229
230
  #define MAKE_LIST(cachep, listp, slab, nodeid)				\
  	do {								\
  		INIT_LIST_HEAD(listp);					\
18bf85411   Christoph Lameter   slab: use get_nod...
231
  		list_splice(&get_node(cachep, nodeid)->slab, listp);	\
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
232
  	} while (0)
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
233
234
  #define	MAKE_ALL_LISTS(cachep, ptr, nodeid)				\
  	do {								\
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
235
236
237
238
  	MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid);	\
  	MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
  	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
  	} while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239

4fd0b46e8   Alexey Dobriyan   slab, slub, slob:...
240
241
  #define CFLGS_OBJFREELIST_SLAB	((slab_flags_t __force)0x40000000U)
  #define CFLGS_OFF_SLAB		((slab_flags_t __force)0x80000000U)
b03a017be   Joonsoo Kim   mm/slab: introduc...
242
  #define	OBJFREELIST_SLAB(x)	((x)->flags & CFLGS_OBJFREELIST_SLAB)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243
244
245
  #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
  
  #define BATCHREFILL_LIMIT	16
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
246
247
248
  /*
   * Optimization question: fewer reaps means less probability for unnessary
   * cpucache drain/refill cycles.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
249
   *
dc6f3f276   Adrian Bunk   mm/slab.c: fix a ...
250
   * OTOH the cpuarrays can contain lots of objects,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
252
   * which could lock up otherwise freeable slabs.
   */
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
253
254
  #define REAPTIMEOUT_AC		(2*HZ)
  #define REAPTIMEOUT_NODE	(4*HZ)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
256
257
258
259
260
  
  #if STATS
  #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
  #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
  #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
  #define	STATS_INC_GROWN(x)	((x)->grown++)
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
261
  #define	STATS_ADD_REAPED(x,y)	((x)->reaped += (y))
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
262
263
264
265
266
  #define	STATS_SET_HIGH(x)						\
  	do {								\
  		if ((x)->num_active > (x)->high_mark)			\
  			(x)->high_mark = (x)->num_active;		\
  	} while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
267
268
  #define	STATS_INC_ERR(x)	((x)->errors++)
  #define	STATS_INC_NODEALLOCS(x)	((x)->node_allocs++)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
269
  #define	STATS_INC_NODEFREES(x)	((x)->node_frees++)
fb7faf331   Ravikiran G Thirumalai   [PATCH] slab: add...
270
  #define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
271
272
273
274
275
  #define	STATS_SET_FREEABLE(x, i)					\
  	do {								\
  		if ((x)->max_freeable < i)				\
  			(x)->max_freeable = i;				\
  	} while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276
277
278
279
280
281
282
283
284
  #define STATS_INC_ALLOCHIT(x)	atomic_inc(&(x)->allochit)
  #define STATS_INC_ALLOCMISS(x)	atomic_inc(&(x)->allocmiss)
  #define STATS_INC_FREEHIT(x)	atomic_inc(&(x)->freehit)
  #define STATS_INC_FREEMISS(x)	atomic_inc(&(x)->freemiss)
  #else
  #define	STATS_INC_ACTIVE(x)	do { } while (0)
  #define	STATS_DEC_ACTIVE(x)	do { } while (0)
  #define	STATS_INC_ALLOCED(x)	do { } while (0)
  #define	STATS_INC_GROWN(x)	do { } while (0)
4e60c86bd   Andi Kleen   gcc-4.6: mm: fix ...
285
  #define	STATS_ADD_REAPED(x,y)	do { (void)(y); } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
  #define	STATS_SET_HIGH(x)	do { } while (0)
  #define	STATS_INC_ERR(x)	do { } while (0)
  #define	STATS_INC_NODEALLOCS(x)	do { } while (0)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
289
  #define	STATS_INC_NODEFREES(x)	do { } while (0)
fb7faf331   Ravikiran G Thirumalai   [PATCH] slab: add...
290
  #define STATS_INC_ACOVERFLOW(x)   do { } while (0)
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
291
  #define	STATS_SET_FREEABLE(x, i) do { } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
294
295
296
297
298
  #define STATS_INC_ALLOCHIT(x)	do { } while (0)
  #define STATS_INC_ALLOCMISS(x)	do { } while (0)
  #define STATS_INC_FREEHIT(x)	do { } while (0)
  #define STATS_INC_FREEMISS(x)	do { } while (0)
  #endif
  
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
300
301
  /*
   * memory layout of objects:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
   * 0		: objp
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
303
   * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
304
305
   * 		the end of an object is aligned with the end of the real
   * 		allocation. Catches writes behind the end of the allocation.
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
306
   * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
   * 		redzone word.
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
308
   * cachep->obj_offset: The real object.
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
309
310
   * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
   * cachep->size - 1* BYTES_PER_WORD: last caller address
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
311
   *					[BYTES_PER_WORD long]
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
   */
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
313
  static int obj_offset(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
314
  {
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
315
  	return cachep->obj_offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
316
  }
b46b8f19c   David Woodhouse   Increase slab red...
317
  static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
319
  {
  	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
b46b8f19c   David Woodhouse   Increase slab red...
320
321
  	return (unsigned long long*) (objp + obj_offset(cachep) -
  				      sizeof(unsigned long long));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
  }
b46b8f19c   David Woodhouse   Increase slab red...
323
  static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
325
326
  {
  	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
  	if (cachep->flags & SLAB_STORE_USER)
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
327
  		return (unsigned long long *)(objp + cachep->size -
b46b8f19c   David Woodhouse   Increase slab red...
328
  					      sizeof(unsigned long long) -
87a927c71   David Woodhouse   Fix slab redzone ...
329
  					      REDZONE_ALIGN);
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
330
  	return (unsigned long long *) (objp + cachep->size -
b46b8f19c   David Woodhouse   Increase slab red...
331
  				       sizeof(unsigned long long));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
333
  static void **dbg_userword(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
335
  {
  	BUG_ON(!(cachep->flags & SLAB_STORE_USER));
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
336
  	return (void **)(objp + cachep->size - BYTES_PER_WORD);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337
338
339
  }
  
  #else
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
340
  #define obj_offset(x)			0
b46b8f19c   David Woodhouse   Increase slab red...
341
342
  #define dbg_redzone1(cachep, objp)	({BUG(); (unsigned long long *)NULL;})
  #define dbg_redzone2(cachep, objp)	({BUG(); (unsigned long long *)NULL;})
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
344
345
  #define dbg_userword(cachep, objp)	({BUG(); (void **)NULL;})
  
  #endif
037873014   Joonsoo Kim   slab: fix oops wh...
346
  #ifdef CONFIG_DEBUG_SLAB_LEAK
d31676dfd   Joonsoo Kim   mm/slab: alternat...
347
  static inline bool is_store_user_clean(struct kmem_cache *cachep)
037873014   Joonsoo Kim   slab: fix oops wh...
348
  {
d31676dfd   Joonsoo Kim   mm/slab: alternat...
349
350
  	return atomic_read(&cachep->store_user_clean) == 1;
  }
037873014   Joonsoo Kim   slab: fix oops wh...
351

d31676dfd   Joonsoo Kim   mm/slab: alternat...
352
353
354
355
  static inline void set_store_user_clean(struct kmem_cache *cachep)
  {
  	atomic_set(&cachep->store_user_clean, 1);
  }
037873014   Joonsoo Kim   slab: fix oops wh...
356

d31676dfd   Joonsoo Kim   mm/slab: alternat...
357
358
359
360
  static inline void set_store_user_dirty(struct kmem_cache *cachep)
  {
  	if (is_store_user_clean(cachep))
  		atomic_set(&cachep->store_user_clean, 0);
037873014   Joonsoo Kim   slab: fix oops wh...
361
362
363
  }
  
  #else
d31676dfd   Joonsoo Kim   mm/slab: alternat...
364
  static inline void set_store_user_dirty(struct kmem_cache *cachep) {}
037873014   Joonsoo Kim   slab: fix oops wh...
365
366
  
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
367
  /*
3df1cccdf   David Rientjes   slab: introduce s...
368
369
   * Do not go above this order unless 0 objects fit into the slab or
   * overridden on the command line.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
   */
543585cc5   David Rientjes   slab: rename slab...
371
372
373
  #define	SLAB_MAX_ORDER_HI	1
  #define	SLAB_MAX_ORDER_LO	0
  static int slab_max_order = SLAB_MAX_ORDER_LO;
3df1cccdf   David Rientjes   slab: introduce s...
374
  static bool slab_max_order_set __initdata;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
375

6ed5eb221   Pekka Enberg   [PATCH] slab: ext...
376
377
  static inline struct kmem_cache *virt_to_cache(const void *obj)
  {
b49af68ff   Christoph Lameter   Add virt_to_head_...
378
  	struct page *page = virt_to_head_page(obj);
350260889   Christoph Lameter   slab: Remove some...
379
  	return page->slab_cache;
6ed5eb221   Pekka Enberg   [PATCH] slab: ext...
380
  }
8456a648c   Joonsoo Kim   slab: use struct ...
381
  static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
382
383
  				 unsigned int idx)
  {
8456a648c   Joonsoo Kim   slab: use struct ...
384
  	return page->s_mem + cache->size * idx;
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
385
  }
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
386
  /*
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
387
388
389
   * We want to avoid an expensive divide : (offset / cache->size)
   *   Using the fact that size is a constant for a particular cache,
   *   we can replace (offset / cache->size) by
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
390
391
392
   *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
   */
  static inline unsigned int obj_to_index(const struct kmem_cache *cache,
8456a648c   Joonsoo Kim   slab: use struct ...
393
  					const struct page *page, void *obj)
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
394
  {
8456a648c   Joonsoo Kim   slab: use struct ...
395
  	u32 offset = (obj - page->s_mem);
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
396
  	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
397
  }
6fb924304   Joonsoo Kim   mm/slab: remove u...
398
  #define BOOT_CPUCACHE_ENTRIES	1
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399
  /* internal cache of cache description objs */
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
400
  static struct kmem_cache kmem_cache_boot = {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
401
402
403
  	.batchcount = 1,
  	.limit = BOOT_CPUCACHE_ENTRIES,
  	.shared = 1,
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
404
  	.size = sizeof(struct kmem_cache),
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
405
  	.name = "kmem_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
  };
1871e52c7   Tejun Heo   percpu: make perc...
407
  static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408

343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
409
  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
  {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
411
  	return this_cpu_ptr(cachep->cpu_cache);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
413
414
415
  /*
   * Calculate the number of objects and left-over bytes for a given buffer size.
   */
70f75067b   Joonsoo Kim   mm/slab: avoid re...
416
  static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size,
d50112edd   Alexey Dobriyan   slab, slub, slob:...
417
  		slab_flags_t flags, size_t *left_over)
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
418
  {
70f75067b   Joonsoo Kim   mm/slab: avoid re...
419
  	unsigned int num;
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
420
  	size_t slab_size = PAGE_SIZE << gfporder;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421

fbaccacff   Steven Rostedt   [PATCH] slab: cac...
422
423
424
425
426
  	/*
  	 * The slab management structure can be either off the slab or
  	 * on it. For the latter case, the memory allocated for a
  	 * slab is used for:
  	 *
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
427
  	 * - @buffer_size bytes for each object
2e6b36021   Joonsoo Kim   mm/slab: put the ...
428
429
430
431
432
  	 * - One freelist_idx_t for each object
  	 *
  	 * We don't need to consider alignment of freelist because
  	 * freelist will be at the end of slab page. The objects will be
  	 * at the correct alignment.
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
433
434
435
436
437
438
  	 *
  	 * If the slab management structure is off the slab, then the
  	 * alignment will already be calculated into the size. Because
  	 * the slabs are all pages aligned, the objects will be at the
  	 * correct alignment when allocated.
  	 */
b03a017be   Joonsoo Kim   mm/slab: introduc...
439
  	if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) {
70f75067b   Joonsoo Kim   mm/slab: avoid re...
440
  		num = slab_size / buffer_size;
2e6b36021   Joonsoo Kim   mm/slab: put the ...
441
  		*left_over = slab_size % buffer_size;
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
442
  	} else {
70f75067b   Joonsoo Kim   mm/slab: avoid re...
443
  		num = slab_size / (buffer_size + sizeof(freelist_idx_t));
2e6b36021   Joonsoo Kim   mm/slab: put the ...
444
445
  		*left_over = slab_size %
  			(buffer_size + sizeof(freelist_idx_t));
fbaccacff   Steven Rostedt   [PATCH] slab: cac...
446
  	}
70f75067b   Joonsoo Kim   mm/slab: avoid re...
447
448
  
  	return num;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
  }
f28510d30   Christoph Lameter   slab: Only define...
450
  #if DEBUG
d40cee245   Harvey Harrison   mm: remove remain...
451
  #define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
453
454
  static void __slab_error(const char *function, struct kmem_cache *cachep,
  			char *msg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  {
1170532bb   Joe Perches   mm: convert print...
456
457
  	pr_err("slab error in %s(): cache `%s': %s
  ",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
458
  	       function, cachep->name, msg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
459
  	dump_stack();
373d4d099   Rusty Russell   taint: add explic...
460
  	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
  }
f28510d30   Christoph Lameter   slab: Only define...
462
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463

3395ee058   Paul Menage   [PATCH] mm: add n...
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
  /*
   * By default on NUMA we use alien caches to stage the freeing of
   * objects allocated from other nodes. This causes massive memory
   * inefficiencies when using fake NUMA setup to split memory into a
   * large number of small nodes, so it can be disabled on the command
   * line
    */
  
  static int use_alien_caches __read_mostly = 1;
  static int __init noaliencache_setup(char *s)
  {
  	use_alien_caches = 0;
  	return 1;
  }
  __setup("noaliencache", noaliencache_setup);
3df1cccdf   David Rientjes   slab: introduce s...
479
480
481
482
483
484
485
486
487
488
  static int __init slab_max_order_setup(char *str)
  {
  	get_option(&str, &slab_max_order);
  	slab_max_order = slab_max_order < 0 ? 0 :
  				min(slab_max_order, MAX_ORDER - 1);
  	slab_max_order_set = true;
  
  	return 1;
  }
  __setup("slab_max_order=", slab_max_order_setup);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
489
490
491
492
493
494
495
  #ifdef CONFIG_NUMA
  /*
   * Special reaping functions for NUMA systems called from cache_reap().
   * These take care of doing round robin flushing of alien caches (containing
   * objects freed on different nodes from which they were allocated) and the
   * flushing of remote pcps by calling drain_node_pages.
   */
1871e52c7   Tejun Heo   percpu: make perc...
496
  static DEFINE_PER_CPU(unsigned long, slab_reap_node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
497
498
499
  
  static void init_reap_node(int cpu)
  {
0edaf86cf   Andrew Morton   include/linux/nod...
500
501
  	per_cpu(slab_reap_node, cpu) = next_node_in(cpu_to_mem(cpu),
  						    node_online_map);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
502
503
504
505
  }
  
  static void next_reap_node(void)
  {
909ea9646   Christoph Lameter   core: Replace __g...
506
  	int node = __this_cpu_read(slab_reap_node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
507

0edaf86cf   Andrew Morton   include/linux/nod...
508
  	node = next_node_in(node, node_online_map);
909ea9646   Christoph Lameter   core: Replace __g...
509
  	__this_cpu_write(slab_reap_node, node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
510
511
512
513
514
515
  }
  
  #else
  #define init_reap_node(cpu) do { } while (0)
  #define next_reap_node(void) do { } while (0)
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
519
520
521
522
  /*
   * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz
   * via the workqueue/eventd.
   * Add the CPU number into the expiration time to minimize the possibility of
   * the CPUs getting into lockstep and contending for the global cache chain
   * lock.
   */
0db0628d9   Paul Gortmaker   kernel: delete __...
523
  static void start_cpu_timer(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
524
  {
1871e52c7   Tejun Heo   percpu: make perc...
525
  	struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526

eac0337af   Tejun Heo   slab, workqueue: ...
527
  	if (reap_work->work.func == NULL) {
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
528
  		init_reap_node(cpu);
203b42f73   Tejun Heo   workqueue: make d...
529
  		INIT_DEFERRABLE_WORK(reap_work, cache_reap);
2b2842146   Arjan van de Ven   [PATCH] user of t...
530
531
  		schedule_delayed_work_on(cpu, reap_work,
  					__round_jiffies_relative(HZ, cpu));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532
533
  	}
  }
1fe00d50a   Joonsoo Kim   slab: factor out ...
534
  static void init_arraycache(struct array_cache *ac, int limit, int batch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
  {
1fe00d50a   Joonsoo Kim   slab: factor out ...
536
537
538
539
540
  	if (ac) {
  		ac->avail = 0;
  		ac->limit = limit;
  		ac->batchcount = batch;
  		ac->touched = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
541
  	}
1fe00d50a   Joonsoo Kim   slab: factor out ...
542
543
544
545
546
  }
  
  static struct array_cache *alloc_arraycache(int node, int entries,
  					    int batchcount, gfp_t gfp)
  {
5e8047896   Joonsoo Kim   slab: change int ...
547
  	size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
1fe00d50a   Joonsoo Kim   slab: factor out ...
548
549
550
  	struct array_cache *ac = NULL;
  
  	ac = kmalloc_node(memsize, gfp, node);
f09c424ce   Qian Cai   mm/slab.c: kmemle...
551
552
553
554
555
556
557
558
  	/*
  	 * The array_cache structures contain pointers to free object.
  	 * However, when such objects are allocated or transferred to another
  	 * cache the pointers are not cleared and they could be counted as
  	 * valid references during a kmemleak scan. Therefore, kmemleak must
  	 * not scan such objects.
  	 */
  	kmemleak_no_scan(ac);
1fe00d50a   Joonsoo Kim   slab: factor out ...
559
560
  	init_arraycache(ac, entries, batchcount);
  	return ac;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
562
563
  static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
  					struct page *page, void *objp)
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
564
  {
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
565
566
567
  	struct kmem_cache_node *n;
  	int page_node;
  	LIST_HEAD(list);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
568

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
569
570
  	page_node = page_to_nid(page);
  	n = get_node(cachep, page_node);
381760ead   Mel Gorman   mm: micro-optimis...
571

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
572
573
574
  	spin_lock(&n->list_lock);
  	free_block(cachep, &objp, 1, page_node, &list);
  	spin_unlock(&n->list_lock);
381760ead   Mel Gorman   mm: micro-optimis...
575

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
576
  	slabs_destroy(cachep, &list);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
577
  }
3ded175a4   Christoph Lameter   [PATCH] slab: add...
578
579
580
581
582
583
584
585
586
587
  /*
   * Transfer objects in one arraycache to another.
   * Locking must be handled by the caller.
   *
   * Return the number of entries transferred.
   */
  static int transfer_objects(struct array_cache *to,
  		struct array_cache *from, unsigned int max)
  {
  	/* Figure out how many entries to transfer */
732eacc05   Hagen Paul Pfeifer   replace nested ma...
588
  	int nr = min3(from->avail, max, to->limit - to->avail);
3ded175a4   Christoph Lameter   [PATCH] slab: add...
589
590
591
592
593
594
595
596
597
  
  	if (!nr)
  		return 0;
  
  	memcpy(to->entry + to->avail, from->entry + from->avail -nr,
  			sizeof(void *) *nr);
  
  	from->avail -= nr;
  	to->avail += nr;
3ded175a4   Christoph Lameter   [PATCH] slab: add...
598
599
  	return nr;
  }
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
600
601
602
  #ifndef CONFIG_NUMA
  
  #define drain_alien_cache(cachep, alien) do { } while (0)
ce8eb6c42   Christoph Lameter   slab: Rename list...
603
  #define reap_alien(cachep, n) do { } while (0)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
604

c8522a3a5   Joonsoo Kim   slab: introduce a...
605
606
  static inline struct alien_cache **alloc_alien_cache(int node,
  						int limit, gfp_t gfp)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
607
  {
8888177ea   Joonsoo Kim   mm/slab: remove B...
608
  	return NULL;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
609
  }
c8522a3a5   Joonsoo Kim   slab: introduce a...
610
  static inline void free_alien_cache(struct alien_cache **ac_ptr)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
611
612
613
614
615
616
617
618
619
620
621
622
623
  {
  }
  
  static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  {
  	return 0;
  }
  
  static inline void *alternate_node_alloc(struct kmem_cache *cachep,
  		gfp_t flags)
  {
  	return NULL;
  }
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
624
  static inline void *____cache_alloc_node(struct kmem_cache *cachep,
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
625
626
627
628
  		 gfp_t flags, int nodeid)
  {
  	return NULL;
  }
4167e9b2c   David Rientjes   mm: remove GFP_TH...
629
630
  static inline gfp_t gfp_exact_node(gfp_t flags)
  {
444eb2a44   Mel Gorman   mm: thp: set THP ...
631
  	return flags & ~__GFP_NOFAIL;
4167e9b2c   David Rientjes   mm: remove GFP_TH...
632
  }
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
633
  #else	/* CONFIG_NUMA */
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
634
  static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
c61afb181   Paul Jackson   [PATCH] cpuset me...
635
  static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
dc85da15d   Christoph Lameter   [PATCH] NUMA poli...
636

c8522a3a5   Joonsoo Kim   slab: introduce a...
637
638
639
  static struct alien_cache *__alloc_alien_cache(int node, int entries,
  						int batch, gfp_t gfp)
  {
5e8047896   Joonsoo Kim   slab: change int ...
640
  	size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
c8522a3a5   Joonsoo Kim   slab: introduce a...
641
642
643
  	struct alien_cache *alc = NULL;
  
  	alc = kmalloc_node(memsize, gfp, node);
f928ca391   Christoph Lameter   slab: alien cache...
644
  	if (alc) {
f09c424ce   Qian Cai   mm/slab.c: kmemle...
645
  		kmemleak_no_scan(alc);
f928ca391   Christoph Lameter   slab: alien cache...
646
647
648
  		init_arraycache(&alc->ac, entries, batch);
  		spin_lock_init(&alc->lock);
  	}
c8522a3a5   Joonsoo Kim   slab: introduce a...
649
650
651
652
  	return alc;
  }
  
  static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
653
  {
c8522a3a5   Joonsoo Kim   slab: introduce a...
654
  	struct alien_cache **alc_ptr;
5e8047896   Joonsoo Kim   slab: change int ...
655
  	size_t memsize = sizeof(void *) * nr_node_ids;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
656
657
658
659
  	int i;
  
  	if (limit > 1)
  		limit = 12;
c8522a3a5   Joonsoo Kim   slab: introduce a...
660
661
662
663
664
665
666
667
668
669
670
671
672
  	alc_ptr = kzalloc_node(memsize, gfp, node);
  	if (!alc_ptr)
  		return NULL;
  
  	for_each_node(i) {
  		if (i == node || !node_online(i))
  			continue;
  		alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
  		if (!alc_ptr[i]) {
  			for (i--; i >= 0; i--)
  				kfree(alc_ptr[i]);
  			kfree(alc_ptr);
  			return NULL;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
673
674
  		}
  	}
c8522a3a5   Joonsoo Kim   slab: introduce a...
675
  	return alc_ptr;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
676
  }
c8522a3a5   Joonsoo Kim   slab: introduce a...
677
  static void free_alien_cache(struct alien_cache **alc_ptr)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
678
679
  {
  	int i;
c8522a3a5   Joonsoo Kim   slab: introduce a...
680
  	if (!alc_ptr)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
681
  		return;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
682
  	for_each_node(i)
c8522a3a5   Joonsoo Kim   slab: introduce a...
683
684
  	    kfree(alc_ptr[i]);
  	kfree(alc_ptr);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
685
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
686
  static void __drain_alien_cache(struct kmem_cache *cachep,
833b706cc   Joonsoo Kim   slab: destroy a s...
687
688
  				struct array_cache *ac, int node,
  				struct list_head *list)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
689
  {
18bf85411   Christoph Lameter   slab: use get_nod...
690
  	struct kmem_cache_node *n = get_node(cachep, node);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
691
692
  
  	if (ac->avail) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
693
  		spin_lock(&n->list_lock);
e00946fe2   Christoph Lameter   [PATCH] slab: Byp...
694
695
696
697
698
  		/*
  		 * Stuff objects into the remote nodes shared array first.
  		 * That way we could avoid the overhead of putting the objects
  		 * into the free lists and getting them back later.
  		 */
ce8eb6c42   Christoph Lameter   slab: Rename list...
699
700
  		if (n->shared)
  			transfer_objects(n->shared, ac, ac->limit);
e00946fe2   Christoph Lameter   [PATCH] slab: Byp...
701

833b706cc   Joonsoo Kim   slab: destroy a s...
702
  		free_block(cachep, ac->entry, ac->avail, node, list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
703
  		ac->avail = 0;
ce8eb6c42   Christoph Lameter   slab: Rename list...
704
  		spin_unlock(&n->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
705
706
  	}
  }
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
707
708
709
  /*
   * Called from cache_reap() to regularly drain alien caches round robin.
   */
ce8eb6c42   Christoph Lameter   slab: Rename list...
710
  static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
711
  {
909ea9646   Christoph Lameter   core: Replace __g...
712
  	int node = __this_cpu_read(slab_reap_node);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
713

ce8eb6c42   Christoph Lameter   slab: Rename list...
714
  	if (n->alien) {
c8522a3a5   Joonsoo Kim   slab: introduce a...
715
716
717
718
719
  		struct alien_cache *alc = n->alien[node];
  		struct array_cache *ac;
  
  		if (alc) {
  			ac = &alc->ac;
49dfc304b   Joonsoo Kim   slab: use the loc...
720
  			if (ac->avail && spin_trylock_irq(&alc->lock)) {
833b706cc   Joonsoo Kim   slab: destroy a s...
721
722
723
  				LIST_HEAD(list);
  
  				__drain_alien_cache(cachep, ac, node, &list);
49dfc304b   Joonsoo Kim   slab: use the loc...
724
  				spin_unlock_irq(&alc->lock);
833b706cc   Joonsoo Kim   slab: destroy a s...
725
  				slabs_destroy(cachep, &list);
c8522a3a5   Joonsoo Kim   slab: introduce a...
726
  			}
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
727
728
729
  		}
  	}
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
730
  static void drain_alien_cache(struct kmem_cache *cachep,
c8522a3a5   Joonsoo Kim   slab: introduce a...
731
  				struct alien_cache **alien)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
732
  {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
733
  	int i = 0;
c8522a3a5   Joonsoo Kim   slab: introduce a...
734
  	struct alien_cache *alc;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
735
736
737
738
  	struct array_cache *ac;
  	unsigned long flags;
  
  	for_each_online_node(i) {
c8522a3a5   Joonsoo Kim   slab: introduce a...
739
740
  		alc = alien[i];
  		if (alc) {
833b706cc   Joonsoo Kim   slab: destroy a s...
741
  			LIST_HEAD(list);
c8522a3a5   Joonsoo Kim   slab: introduce a...
742
  			ac = &alc->ac;
49dfc304b   Joonsoo Kim   slab: use the loc...
743
  			spin_lock_irqsave(&alc->lock, flags);
833b706cc   Joonsoo Kim   slab: destroy a s...
744
  			__drain_alien_cache(cachep, ac, i, &list);
49dfc304b   Joonsoo Kim   slab: use the loc...
745
  			spin_unlock_irqrestore(&alc->lock, flags);
833b706cc   Joonsoo Kim   slab: destroy a s...
746
  			slabs_destroy(cachep, &list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
747
748
749
  		}
  	}
  }
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
750

25c4f304b   Joonsoo Kim   mm/slab: factor o...
751
752
  static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
  				int node, int page_node)
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
753
  {
ce8eb6c42   Christoph Lameter   slab: Rename list...
754
  	struct kmem_cache_node *n;
c8522a3a5   Joonsoo Kim   slab: introduce a...
755
756
  	struct alien_cache *alien = NULL;
  	struct array_cache *ac;
97654dfa2   Joonsoo Kim   slab: defer slab_...
757
  	LIST_HEAD(list);
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
758

18bf85411   Christoph Lameter   slab: use get_nod...
759
  	n = get_node(cachep, node);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
760
  	STATS_INC_NODEFREES(cachep);
25c4f304b   Joonsoo Kim   mm/slab: factor o...
761
762
  	if (n->alien && n->alien[page_node]) {
  		alien = n->alien[page_node];
c8522a3a5   Joonsoo Kim   slab: introduce a...
763
  		ac = &alien->ac;
49dfc304b   Joonsoo Kim   slab: use the loc...
764
  		spin_lock(&alien->lock);
c8522a3a5   Joonsoo Kim   slab: introduce a...
765
  		if (unlikely(ac->avail == ac->limit)) {
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
766
  			STATS_INC_ACOVERFLOW(cachep);
25c4f304b   Joonsoo Kim   mm/slab: factor o...
767
  			__drain_alien_cache(cachep, ac, page_node, &list);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
768
  		}
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
769
  		ac->entry[ac->avail++] = objp;
49dfc304b   Joonsoo Kim   slab: use the loc...
770
  		spin_unlock(&alien->lock);
833b706cc   Joonsoo Kim   slab: destroy a s...
771
  		slabs_destroy(cachep, &list);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
772
  	} else {
25c4f304b   Joonsoo Kim   mm/slab: factor o...
773
  		n = get_node(cachep, page_node);
18bf85411   Christoph Lameter   slab: use get_nod...
774
  		spin_lock(&n->list_lock);
25c4f304b   Joonsoo Kim   mm/slab: factor o...
775
  		free_block(cachep, &objp, 1, page_node, &list);
18bf85411   Christoph Lameter   slab: use get_nod...
776
  		spin_unlock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
777
  		slabs_destroy(cachep, &list);
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
778
779
780
  	}
  	return 1;
  }
25c4f304b   Joonsoo Kim   mm/slab: factor o...
781
782
783
784
785
786
787
788
789
790
791
792
793
794
  
  static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  {
  	int page_node = page_to_nid(virt_to_page(objp));
  	int node = numa_mem_id();
  	/*
  	 * Make sure we are not freeing a object from another node to the array
  	 * cache on this cpu.
  	 */
  	if (likely(node == page_node))
  		return 0;
  
  	return __cache_free_alien(cachep, objp, node, page_node);
  }
4167e9b2c   David Rientjes   mm: remove GFP_TH...
795
796
  
  /*
444eb2a44   Mel Gorman   mm: thp: set THP ...
797
798
   * Construct gfp mask to allocate from a specific node but do not reclaim or
   * warn about failures.
4167e9b2c   David Rientjes   mm: remove GFP_TH...
799
800
801
   */
  static inline gfp_t gfp_exact_node(gfp_t flags)
  {
444eb2a44   Mel Gorman   mm: thp: set THP ...
802
  	return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
4167e9b2c   David Rientjes   mm: remove GFP_TH...
803
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
804
  #endif
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
  static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
  {
  	struct kmem_cache_node *n;
  
  	/*
  	 * Set up the kmem_cache_node for cpu before we can
  	 * begin anything. Make sure some other cpu on this
  	 * node has not already allocated this
  	 */
  	n = get_node(cachep, node);
  	if (n) {
  		spin_lock_irq(&n->list_lock);
  		n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
  				cachep->num;
  		spin_unlock_irq(&n->list_lock);
  
  		return 0;
  	}
  
  	n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
  	if (!n)
  		return -ENOMEM;
  
  	kmem_cache_node_init(n);
  	n->next_reap = jiffies + REAPTIMEOUT_NODE +
  		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;
  
  	n->free_limit =
  		(1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
  
  	/*
  	 * The kmem_cache_nodes don't come and go as CPUs
  	 * come and go.  slab_mutex is sufficient
  	 * protection here.
  	 */
  	cachep->node[node] = n;
  
  	return 0;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
844
  #if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
8f9f8d9e8   David Rientjes   slab: add memory ...
845
  /*
6a67368c3   Christoph Lameter   slab: Rename node...
846
   * Allocates and initializes node for a node on each slab cache, used for
ce8eb6c42   Christoph Lameter   slab: Rename list...
847
   * either memory or cpu hotplug.  If memory is being hot-added, the kmem_cache_node
8f9f8d9e8   David Rientjes   slab: add memory ...
848
   * will be allocated off-node since memory is not yet online for the new node.
6a67368c3   Christoph Lameter   slab: Rename node...
849
   * When hotplugging memory or a cpu, existing node are not replaced if
8f9f8d9e8   David Rientjes   slab: add memory ...
850
851
   * already in use.
   *
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
852
   * Must hold slab_mutex.
8f9f8d9e8   David Rientjes   slab: add memory ...
853
   */
6a67368c3   Christoph Lameter   slab: Rename node...
854
  static int init_cache_node_node(int node)
8f9f8d9e8   David Rientjes   slab: add memory ...
855
  {
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
856
  	int ret;
8f9f8d9e8   David Rientjes   slab: add memory ...
857
  	struct kmem_cache *cachep;
8f9f8d9e8   David Rientjes   slab: add memory ...
858

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
859
  	list_for_each_entry(cachep, &slab_caches, list) {
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
860
861
862
  		ret = init_cache_node(cachep, node, GFP_KERNEL);
  		if (ret)
  			return ret;
8f9f8d9e8   David Rientjes   slab: add memory ...
863
  	}
ded0ecf61   Joonsoo Kim   mm/slab: factor o...
864

8f9f8d9e8   David Rientjes   slab: add memory ...
865
866
  	return 0;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
867
  #endif
8f9f8d9e8   David Rientjes   slab: add memory ...
868

c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
  static int setup_kmem_cache_node(struct kmem_cache *cachep,
  				int node, gfp_t gfp, bool force_change)
  {
  	int ret = -ENOMEM;
  	struct kmem_cache_node *n;
  	struct array_cache *old_shared = NULL;
  	struct array_cache *new_shared = NULL;
  	struct alien_cache **new_alien = NULL;
  	LIST_HEAD(list);
  
  	if (use_alien_caches) {
  		new_alien = alloc_alien_cache(node, cachep->limit, gfp);
  		if (!new_alien)
  			goto fail;
  	}
  
  	if (cachep->shared) {
  		new_shared = alloc_arraycache(node,
  			cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
  		if (!new_shared)
  			goto fail;
  	}
  
  	ret = init_cache_node(cachep, node, gfp);
  	if (ret)
  		goto fail;
  
  	n = get_node(cachep, node);
  	spin_lock_irq(&n->list_lock);
  	if (n->shared && force_change) {
  		free_block(cachep, n->shared->entry,
  				n->shared->avail, node, &list);
  		n->shared->avail = 0;
  	}
  
  	if (!n->shared || force_change) {
  		old_shared = n->shared;
  		n->shared = new_shared;
  		new_shared = NULL;
  	}
  
  	if (!n->alien) {
  		n->alien = new_alien;
  		new_alien = NULL;
  	}
  
  	spin_unlock_irq(&n->list_lock);
  	slabs_destroy(cachep, &list);
801faf0db   Joonsoo Kim   mm/slab: lockless...
917
918
919
920
921
922
  	/*
  	 * To protect lockless access to n->shared during irq disabled context.
  	 * If n->shared isn't NULL in irq disabled context, accessing to it is
  	 * guaranteed to be valid until irq is re-enabled, because it will be
  	 * freed after synchronize_sched().
  	 */
86d9f4853   Joonsoo Kim   mm/slab: fix kmem...
923
  	if (old_shared && force_change)
801faf0db   Joonsoo Kim   mm/slab: lockless...
924
  		synchronize_sched();
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
925
926
927
928
929
930
931
  fail:
  	kfree(old_shared);
  	kfree(new_shared);
  	free_alien_cache(new_alien);
  
  	return ret;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
932
  #ifdef CONFIG_SMP
0db0628d9   Paul Gortmaker   kernel: delete __...
933
  static void cpuup_canceled(long cpu)
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
934
935
  {
  	struct kmem_cache *cachep;
ce8eb6c42   Christoph Lameter   slab: Rename list...
936
  	struct kmem_cache_node *n = NULL;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
937
  	int node = cpu_to_mem(cpu);
a70f73028   Rusty Russell   cpumask: replace ...
938
  	const struct cpumask *mask = cpumask_of_node(node);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
939

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
940
  	list_for_each_entry(cachep, &slab_caches, list) {
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
941
942
  		struct array_cache *nc;
  		struct array_cache *shared;
c8522a3a5   Joonsoo Kim   slab: introduce a...
943
  		struct alien_cache **alien;
97654dfa2   Joonsoo Kim   slab: defer slab_...
944
  		LIST_HEAD(list);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
945

18bf85411   Christoph Lameter   slab: use get_nod...
946
  		n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
947
  		if (!n)
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
948
  			continue;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
949

ce8eb6c42   Christoph Lameter   slab: Rename list...
950
  		spin_lock_irq(&n->list_lock);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
951

ce8eb6c42   Christoph Lameter   slab: Rename list...
952
953
  		/* Free limit for this kmem_cache_node */
  		n->free_limit -= cachep->batchcount;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
954
955
956
957
  
  		/* cpu is dead; no one can alloc from it. */
  		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
  		if (nc) {
97654dfa2   Joonsoo Kim   slab: defer slab_...
958
  			free_block(cachep, nc->entry, nc->avail, node, &list);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
959
960
  			nc->avail = 0;
  		}
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
961

58463c1fe   Rusty Russell   cpumask: avoid de...
962
  		if (!cpumask_empty(mask)) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
963
  			spin_unlock_irq(&n->list_lock);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
964
  			goto free_slab;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
965
  		}
ce8eb6c42   Christoph Lameter   slab: Rename list...
966
  		shared = n->shared;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
967
968
  		if (shared) {
  			free_block(cachep, shared->entry,
97654dfa2   Joonsoo Kim   slab: defer slab_...
969
  				   shared->avail, node, &list);
ce8eb6c42   Christoph Lameter   slab: Rename list...
970
  			n->shared = NULL;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
971
  		}
ce8eb6c42   Christoph Lameter   slab: Rename list...
972
973
  		alien = n->alien;
  		n->alien = NULL;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
974

ce8eb6c42   Christoph Lameter   slab: Rename list...
975
  		spin_unlock_irq(&n->list_lock);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
976
977
978
979
980
981
  
  		kfree(shared);
  		if (alien) {
  			drain_alien_cache(cachep, alien);
  			free_alien_cache(alien);
  		}
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
982
983
  
  free_slab:
97654dfa2   Joonsoo Kim   slab: defer slab_...
984
  		slabs_destroy(cachep, &list);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
985
986
987
988
989
990
  	}
  	/*
  	 * In the previous loop, all the objects were freed to
  	 * the respective cache's slabs,  now we can go ahead and
  	 * shrink each nodelist to its limit.
  	 */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
991
  	list_for_each_entry(cachep, &slab_caches, list) {
18bf85411   Christoph Lameter   slab: use get_nod...
992
  		n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
993
  		if (!n)
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
994
  			continue;
a5aa63a5f   Joonsoo Kim   mm/slab: drain th...
995
  		drain_freelist(cachep, n, INT_MAX);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
996
997
  	}
  }
0db0628d9   Paul Gortmaker   kernel: delete __...
998
  static int cpuup_prepare(long cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1000
  	struct kmem_cache *cachep;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
1001
  	int node = cpu_to_mem(cpu);
8f9f8d9e8   David Rientjes   slab: add memory ...
1002
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1003

fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1004
1005
1006
1007
  	/*
  	 * We need to do this right in the beginning since
  	 * alloc_arraycache's are going to use this list.
  	 * kmalloc_node allows us to add the slab to the right
ce8eb6c42   Christoph Lameter   slab: Rename list...
1008
  	 * kmem_cache_node and not this cpu's kmem_cache_node
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1009
  	 */
6a67368c3   Christoph Lameter   slab: Rename node...
1010
  	err = init_cache_node_node(node);
8f9f8d9e8   David Rientjes   slab: add memory ...
1011
1012
  	if (err < 0)
  		goto bad;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1013
1014
1015
1016
1017
  
  	/*
  	 * Now we can go ahead with allocating the shared arrays and
  	 * array caches
  	 */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1018
  	list_for_each_entry(cachep, &slab_caches, list) {
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
1019
1020
1021
  		err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
  		if (err)
  			goto bad;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1022
  	}
ce79ddc8e   Pekka Enberg   SLAB: Fix lockdep...
1023

fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1024
1025
  	return 0;
  bad:
12d00f6a1   Akinobu Mita   cpu hotplug: slab...
1026
  	cpuup_canceled(cpu);
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1027
1028
  	return -ENOMEM;
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1029
  int slab_prepare_cpu(unsigned int cpu)
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1030
  {
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1031
  	int err;
fbf1e473b   Akinobu Mita   cpu hotplug: slab...
1032

6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
  	mutex_lock(&slab_mutex);
  	err = cpuup_prepare(cpu);
  	mutex_unlock(&slab_mutex);
  	return err;
  }
  
  /*
   * This is called for a failed online attempt and for a successful
   * offline.
   *
   * Even if all the cpus of a node are down, we don't free the
   * kmem_list3 of any cache. This to avoid a race between cpu_down, and
   * a kmalloc allocation from another cpu for memory from the node of
   * the cpu going down.  The list3 structure is usually allocated from
   * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
   */
  int slab_dead_cpu(unsigned int cpu)
  {
  	mutex_lock(&slab_mutex);
  	cpuup_canceled(cpu);
  	mutex_unlock(&slab_mutex);
  	return 0;
  }
8f5be20bf   Ravikiran G Thirumalai   [PATCH] mm: slab:...
1056
  #endif
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1057
1058
1059
1060
1061
  
  static int slab_online_cpu(unsigned int cpu)
  {
  	start_cpu_timer(cpu);
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062
  }
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
  static int slab_offline_cpu(unsigned int cpu)
  {
  	/*
  	 * Shutdown cache reaper. Note that the slab_mutex is held so
  	 * that if cache_reap() is invoked it cannot do anything
  	 * expensive but will only modify reap_work and reschedule the
  	 * timer.
  	 */
  	cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
  	/* Now the cache_reaper is guaranteed to be not running. */
  	per_cpu(slab_reap_work, cpu).work.func = NULL;
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1076

8f9f8d9e8   David Rientjes   slab: add memory ...
1077
1078
1079
1080
1081
1082
  #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
  /*
   * Drains freelist for a node on each slab cache, used for memory hot-remove.
   * Returns -EBUSY if all objects cannot be drained so that the node is not
   * removed.
   *
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1083
   * Must hold slab_mutex.
8f9f8d9e8   David Rientjes   slab: add memory ...
1084
   */
6a67368c3   Christoph Lameter   slab: Rename node...
1085
  static int __meminit drain_cache_node_node(int node)
8f9f8d9e8   David Rientjes   slab: add memory ...
1086
1087
1088
  {
  	struct kmem_cache *cachep;
  	int ret = 0;
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1089
  	list_for_each_entry(cachep, &slab_caches, list) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
1090
  		struct kmem_cache_node *n;
8f9f8d9e8   David Rientjes   slab: add memory ...
1091

18bf85411   Christoph Lameter   slab: use get_nod...
1092
  		n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
1093
  		if (!n)
8f9f8d9e8   David Rientjes   slab: add memory ...
1094
  			continue;
a5aa63a5f   Joonsoo Kim   mm/slab: drain th...
1095
  		drain_freelist(cachep, n, INT_MAX);
8f9f8d9e8   David Rientjes   slab: add memory ...
1096

ce8eb6c42   Christoph Lameter   slab: Rename list...
1097
1098
  		if (!list_empty(&n->slabs_full) ||
  		    !list_empty(&n->slabs_partial)) {
8f9f8d9e8   David Rientjes   slab: add memory ...
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
  			ret = -EBUSY;
  			break;
  		}
  	}
  	return ret;
  }
  
  static int __meminit slab_memory_callback(struct notifier_block *self,
  					unsigned long action, void *arg)
  {
  	struct memory_notify *mnb = arg;
  	int ret = 0;
  	int nid;
  
  	nid = mnb->status_change_nid;
  	if (nid < 0)
  		goto out;
  
  	switch (action) {
  	case MEM_GOING_ONLINE:
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1119
  		mutex_lock(&slab_mutex);
6a67368c3   Christoph Lameter   slab: Rename node...
1120
  		ret = init_cache_node_node(nid);
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1121
  		mutex_unlock(&slab_mutex);
8f9f8d9e8   David Rientjes   slab: add memory ...
1122
1123
  		break;
  	case MEM_GOING_OFFLINE:
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1124
  		mutex_lock(&slab_mutex);
6a67368c3   Christoph Lameter   slab: Rename node...
1125
  		ret = drain_cache_node_node(nid);
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1126
  		mutex_unlock(&slab_mutex);
8f9f8d9e8   David Rientjes   slab: add memory ...
1127
1128
1129
1130
1131
1132
1133
1134
  		break;
  	case MEM_ONLINE:
  	case MEM_OFFLINE:
  	case MEM_CANCEL_ONLINE:
  	case MEM_CANCEL_OFFLINE:
  		break;
  	}
  out:
5fda1bd5b   Prarit Bhargava   mm: notifier_from...
1135
  	return notifier_from_errno(ret);
8f9f8d9e8   David Rientjes   slab: add memory ...
1136
1137
  }
  #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1138
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
1139
   * swap the static kmem_cache_node with kmalloced memory
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1140
   */
6744f087b   Christoph Lameter   slab: Common name...
1141
  static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
8f9f8d9e8   David Rientjes   slab: add memory ...
1142
  				int nodeid)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1143
  {
6744f087b   Christoph Lameter   slab: Common name...
1144
  	struct kmem_cache_node *ptr;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1145

6744f087b   Christoph Lameter   slab: Common name...
1146
  	ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1147
  	BUG_ON(!ptr);
6744f087b   Christoph Lameter   slab: Common name...
1148
  	memcpy(ptr, list, sizeof(struct kmem_cache_node));
2b2d5493e   Ingo Molnar   [PATCH] lockdep: ...
1149
1150
1151
1152
  	/*
  	 * Do not assume that spinlocks can be initialized via memcpy:
  	 */
  	spin_lock_init(&ptr->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1153
  	MAKE_ALL_LISTS(cachep, ptr, nodeid);
6a67368c3   Christoph Lameter   slab: Rename node...
1154
  	cachep->node[nodeid] = ptr;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1155
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1156
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
1157
1158
   * For setting up all the kmem_cache_node for cache whose buffer_size is same as
   * size of kmem_cache_node.
556a169da   Pekka Enberg   slab: fix bootstr...
1159
   */
ce8eb6c42   Christoph Lameter   slab: Rename list...
1160
  static void __init set_up_node(struct kmem_cache *cachep, int index)
556a169da   Pekka Enberg   slab: fix bootstr...
1161
1162
1163
1164
  {
  	int node;
  
  	for_each_online_node(node) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
1165
  		cachep->node[node] = &init_kmem_cache_node[index + node];
6a67368c3   Christoph Lameter   slab: Rename node...
1166
  		cachep->node[node]->next_reap = jiffies +
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
1167
1168
  		    REAPTIMEOUT_NODE +
  		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;
556a169da   Pekka Enberg   slab: fix bootstr...
1169
1170
1171
1172
  	}
  }
  
  /*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1173
1174
   * Initialisation.  Called after the page allocator have been initialised and
   * before smp_init().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
1176
1177
   */
  void __init kmem_cache_init(void)
  {
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1178
  	int i;
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1179
  	kmem_cache = &kmem_cache_boot;
8888177ea   Joonsoo Kim   mm/slab: remove B...
1180
  	if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
62918a036   Siddha, Suresh B   [PATCH] x86-64: s...
1181
  		use_alien_caches = 0;
3c5834652   Christoph Lameter   slab: Simplify bo...
1182
  	for (i = 0; i < NUM_INIT_LISTS; i++)
ce8eb6c42   Christoph Lameter   slab: Rename list...
1183
  		kmem_cache_node_init(&init_kmem_cache_node[i]);
3c5834652   Christoph Lameter   slab: Simplify bo...
1184

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1185
1186
  	/*
  	 * Fragmentation resistance on low memory - only use bigger
3df1cccdf   David Rientjes   slab: introduce s...
1187
1188
  	 * page orders on machines with more than 32MB of memory if
  	 * not overridden on the command line.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
  	 */
3df1cccdf   David Rientjes   slab: introduce s...
1190
  	if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
543585cc5   David Rientjes   slab: rename slab...
1191
  		slab_max_order = SLAB_MAX_ORDER_HI;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1193
1194
  	/* Bootstrap is tricky, because several objects are allocated
  	 * from caches that do not exist yet:
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1195
1196
1197
  	 * 1) initialize the kmem_cache cache: it contains the struct
  	 *    kmem_cache structures of all caches, except kmem_cache itself:
  	 *    kmem_cache is statically allocated.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1198
  	 *    Initially an __init data area is used for the head array and the
ce8eb6c42   Christoph Lameter   slab: Rename list...
1199
  	 *    kmem_cache_node structures, it's replaced with a kmalloc allocated
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1200
  	 *    array at the end of the bootstrap.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1201
  	 * 2) Create the first kmalloc cache.
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1202
  	 *    The struct kmem_cache for the new cache is allocated normally.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1203
1204
1205
  	 *    An __init data area is used for the head array.
  	 * 3) Create the remaining kmalloc caches, with minimally sized
  	 *    head arrays.
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1206
  	 * 4) Replace the __init data head arrays for kmem_cache and the first
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1207
  	 *    kmalloc cache with kmalloc allocated arrays.
ce8eb6c42   Christoph Lameter   slab: Rename list...
1208
  	 * 5) Replace the __init data for kmem_cache_node for kmem_cache and
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1209
1210
  	 *    the other cache's with kmalloc allocated memory.
  	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1211
  	 */
9b030cb86   Christoph Lameter   mm/sl[aou]b: Use ...
1212
  	/* 1) create the kmem_cache */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1213

8da3430d8   Eric Dumazet   slab: NUMA kmem_c...
1214
  	/*
b56efcf0a   Eric Dumazet   slab: shrink size...
1215
  	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
8da3430d8   Eric Dumazet   slab: NUMA kmem_c...
1216
  	 */
2f9baa9fc   Christoph Lameter   slab: Use the new...
1217
  	create_boot_cache(kmem_cache, "kmem_cache",
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1218
  		offsetof(struct kmem_cache, node) +
6744f087b   Christoph Lameter   slab: Common name...
1219
  				  nr_node_ids * sizeof(struct kmem_cache_node *),
8eb8284b4   David Windsor   usercopy: Prepare...
1220
  				  SLAB_HWCACHE_ALIGN, 0, 0);
2f9baa9fc   Christoph Lameter   slab: Use the new...
1221
  	list_add(&kmem_cache->list, &slab_caches);
880cd276d   Shakeel Butt   mm, slab: memcg_l...
1222
  	memcg_link_cache(kmem_cache);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1223
  	slab_state = PARTIAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1224

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1225
  	/*
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1226
1227
  	 * Initialize the caches that provide memory for the  kmem_cache_node
  	 * structures first.  Without this, further allocations will bug.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1228
  	 */
af3b5f876   Vlastimil Babka   mm, slab: rename ...
1229
1230
  	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache(
  				kmalloc_info[INDEX_NODE].name,
6c0c21adc   David Windsor   usercopy: Mark km...
1231
1232
  				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS,
  				0, kmalloc_size(INDEX_NODE));
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1233
  	slab_state = PARTIAL_NODE;
34cc6990d   Daniel Sanders   slab: correct siz...
1234
  	setup_kmalloc_cache_index_table();
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1235

e0a427267   Ingo Molnar   [PATCH] mm/slab.c...
1236
  	slab_early_init = 0;
ce8eb6c42   Christoph Lameter   slab: Rename list...
1237
  	/* 5) Replace the bootstrap kmem_cache_node */
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1238
  	{
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
1239
  		int nid;
9c09a95cf   Mel Gorman   slab: partially r...
1240
  		for_each_online_node(nid) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
1241
  			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
556a169da   Pekka Enberg   slab: fix bootstr...
1242

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1243
  			init_list(kmalloc_caches[INDEX_NODE],
ce8eb6c42   Christoph Lameter   slab: Rename list...
1244
  					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
1245
1246
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1247

f97d5f634   Christoph Lameter   slab: Common func...
1248
  	create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
8429db5c6   Pekka Enberg   slab: setup cpu c...
1249
1250
1251
1252
1253
  }
  
  void __init kmem_cache_init_late(void)
  {
  	struct kmem_cache *cachep;
8429db5c6   Pekka Enberg   slab: setup cpu c...
1254
  	/* 6) resize the head arrays to their final sizes */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1255
1256
  	mutex_lock(&slab_mutex);
  	list_for_each_entry(cachep, &slab_caches, list)
8429db5c6   Pekka Enberg   slab: setup cpu c...
1257
1258
  		if (enable_cpucache(cachep, GFP_NOWAIT))
  			BUG();
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
1259
  	mutex_unlock(&slab_mutex);
056c62418   Ravikiran G Thirumalai   [PATCH] slab: fix...
1260

97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1261
1262
  	/* Done! */
  	slab_state = FULL;
8f9f8d9e8   David Rientjes   slab: add memory ...
1263
1264
1265
  #ifdef CONFIG_NUMA
  	/*
  	 * Register a memory hotplug callback that initializes and frees
6a67368c3   Christoph Lameter   slab: Rename node...
1266
  	 * node.
8f9f8d9e8   David Rientjes   slab: add memory ...
1267
1268
1269
  	 */
  	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
  #endif
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1270
1271
1272
  	/*
  	 * The reap timers are started later, with a module init call: That part
  	 * of the kernel is not yet operational.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1273
1274
1275
1276
1277
  	 */
  }
  
  static int __init cpucache_init(void)
  {
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1278
  	int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1280
1281
  	/*
  	 * Register the timers that return unneeded pages to the page allocator
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1282
  	 */
6731d4f12   Sebastian Andrzej Siewior   slab: Convert to ...
1283
1284
1285
  	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
  				slab_online_cpu, slab_offline_cpu);
  	WARN_ON(ret < 0);
a164f8962   Glauber Costa   slab: move FULL s...
1286

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1287
1288
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1289
  __initcall(cpucache_init);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1290
1291
1292
  static noinline void
  slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
  {
9a02d6999   David Rientjes   mm, slab: suppres...
1293
  #if DEBUG
ce8eb6c42   Christoph Lameter   slab: Rename list...
1294
  	struct kmem_cache_node *n;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1295
1296
  	unsigned long flags;
  	int node;
9a02d6999   David Rientjes   mm, slab: suppres...
1297
1298
1299
1300
1301
  	static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
  				      DEFAULT_RATELIMIT_BURST);
  
  	if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
  		return;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1302

5b3810e5c   Vlastimil Babka   mm, sl[au]b: prin...
1303
1304
1305
1306
1307
  	pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)
  ",
  		nodeid, gfpflags, &gfpflags);
  	pr_warn("  cache: %s, object size: %d, order: %d
  ",
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
1308
  		cachep->name, cachep->size, cachep->gfporder);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1309

18bf85411   Christoph Lameter   slab: use get_nod...
1310
  	for_each_kmem_cache_node(cachep, node, n) {
bf00bd345   David Rientjes   mm, slab: maintai...
1311
  		unsigned long total_slabs, free_slabs, free_objs;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1312

ce8eb6c42   Christoph Lameter   slab: Rename list...
1313
  		spin_lock_irqsave(&n->list_lock, flags);
bf00bd345   David Rientjes   mm, slab: maintai...
1314
1315
1316
  		total_slabs = n->total_slabs;
  		free_slabs = n->free_slabs;
  		free_objs = n->free_objects;
ce8eb6c42   Christoph Lameter   slab: Rename list...
1317
  		spin_unlock_irqrestore(&n->list_lock, flags);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1318

bf00bd345   David Rientjes   mm, slab: maintai...
1319
1320
1321
1322
1323
  		pr_warn("  node %d: slabs: %ld/%ld, objs: %ld/%ld
  ",
  			node, total_slabs - free_slabs, total_slabs,
  			(total_slabs * cachep->num) - free_objs,
  			total_slabs * cachep->num);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1324
  	}
9a02d6999   David Rientjes   mm, slab: suppres...
1325
  #endif
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1326
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327
  /*
8a7d9b430   Wang Sheng-Hui   mm/slab.c: fix co...
1328
1329
   * Interface to system's page allocator. No need to hold the
   * kmem_cache_node ->list_lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1330
1331
1332
1333
1334
   *
   * If we requested dmaable memory, we will get it. Even if we
   * did not request dmaable memory, we might get it, but that
   * would be relatively rare and ignorable.
   */
0c3aa83e0   Joonsoo Kim   slab: change retu...
1335
1336
  static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
  								int nodeid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1337
1338
  {
  	struct page *page;
e1b6aa6f1   Christoph Hellwig   [PATCH] slab: cle...
1339
  	int nr_pages;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
1340

a618e89f1   Glauber Costa   slab: rename gfpf...
1341
  	flags |= cachep->allocflags;
e1b6aa6f1   Christoph Hellwig   [PATCH] slab: cle...
1342

75f296d93   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
1343
  	page = __alloc_pages_node(nodeid, flags, cachep->gfporder);
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1344
  	if (!page) {
9a02d6999   David Rientjes   mm, slab: suppres...
1345
  		slab_out_of_memory(cachep, flags, nodeid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346
  		return NULL;
8bdec192b   Rafael Aquini   mm: SLAB Out-of-m...
1347
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1348

f3ccb2c42   Vladimir Davydov   memcg: unify slab...
1349
1350
1351
1352
  	if (memcg_charge_slab(page, flags, cachep->gfporder, cachep)) {
  		__free_pages(page, cachep->gfporder);
  		return NULL;
  	}
e1b6aa6f1   Christoph Hellwig   [PATCH] slab: cle...
1353
  	nr_pages = (1 << cachep->gfporder);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1354
  	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
7779f2123   Johannes Weiner   mm: memcontrol: a...
1355
  		mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, nr_pages);
972d1a7b1   Christoph Lameter   [PATCH] ZVC: Supp...
1356
  	else
7779f2123   Johannes Weiner   mm: memcontrol: a...
1357
  		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE, nr_pages);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
1358

a57a49887   Joonsoo Kim   slab: use __GFP_C...
1359
  	__SetPageSlab(page);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
1360
1361
  	/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
  	if (sk_memalloc_socks() && page_is_pfmemalloc(page))
a57a49887   Joonsoo Kim   slab: use __GFP_C...
1362
  		SetPageSlabPfmemalloc(page);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
1363

0c3aa83e0   Joonsoo Kim   slab: change retu...
1364
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1365
1366
1367
1368
1369
  }
  
  /*
   * Interface to system's page release.
   */
0c3aa83e0   Joonsoo Kim   slab: change retu...
1370
  static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371
  {
27ee57c93   Vladimir Davydov   mm: memcontrol: r...
1372
1373
  	int order = cachep->gfporder;
  	unsigned long nr_freed = (1 << order);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1374

972d1a7b1   Christoph Lameter   [PATCH] ZVC: Supp...
1375
  	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
7779f2123   Johannes Weiner   mm: memcontrol: a...
1376
  		mod_lruvec_page_state(page, NR_SLAB_RECLAIMABLE, -nr_freed);
972d1a7b1   Christoph Lameter   [PATCH] ZVC: Supp...
1377
  	else
7779f2123   Johannes Weiner   mm: memcontrol: a...
1378
  		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE, -nr_freed);
73293c2f9   Joonsoo Kim   slab: correct pfm...
1379

a57a49887   Joonsoo Kim   slab: use __GFP_C...
1380
  	BUG_ON(!PageSlab(page));
73293c2f9   Joonsoo Kim   slab: correct pfm...
1381
  	__ClearPageSlabPfmemalloc(page);
a57a49887   Joonsoo Kim   slab: use __GFP_C...
1382
  	__ClearPageSlab(page);
8456a648c   Joonsoo Kim   slab: use struct ...
1383
1384
  	page_mapcount_reset(page);
  	page->mapping = NULL;
1f458cbf1   Glauber Costa   memcg: destroy me...
1385

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1386
1387
  	if (current->reclaim_state)
  		current->reclaim_state->reclaimed_slab += nr_freed;
27ee57c93   Vladimir Davydov   mm: memcontrol: r...
1388
1389
  	memcg_uncharge_slab(page, order, cachep);
  	__free_pages(page, order);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1390
1391
1392
1393
  }
  
  static void kmem_rcu_free(struct rcu_head *head)
  {
68126702b   Joonsoo Kim   slab: overloading...
1394
1395
  	struct kmem_cache *cachep;
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1396

68126702b   Joonsoo Kim   slab: overloading...
1397
1398
1399
1400
  	page = container_of(head, struct page, rcu_head);
  	cachep = page->slab_cache;
  
  	kmem_freepages(cachep, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1401
1402
1403
  }
  
  #if DEBUG
40b441379   Joonsoo Kim   mm/slab: clean up...
1404
1405
1406
1407
1408
1409
1410
1411
  static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
  {
  	if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
  		(cachep->size % PAGE_SIZE) == 0)
  		return true;
  
  	return false;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1412
1413
  
  #ifdef CONFIG_DEBUG_PAGEALLOC
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1414
  static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1415
  			    unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1416
  {
8c138bc00   Christoph Lameter   slab: Get rid of ...
1417
  	int size = cachep->object_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1418

3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1419
  	addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1420

b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1421
  	if (size < 5 * sizeof(unsigned long))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1422
  		return;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1423
1424
1425
1426
  	*addr++ = 0x12345678;
  	*addr++ = caller;
  	*addr++ = smp_processor_id();
  	size -= 3 * sizeof(unsigned long);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1427
1428
1429
1430
1431
1432
1433
  	{
  		unsigned long *sptr = &caller;
  		unsigned long svalue;
  
  		while (!kstack_end(sptr)) {
  			svalue = *sptr++;
  			if (kernel_text_address(svalue)) {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1434
  				*addr++ = svalue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1435
1436
1437
1438
1439
1440
1441
  				size -= sizeof(unsigned long);
  				if (size <= sizeof(unsigned long))
  					break;
  			}
  		}
  
  	}
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1442
  	*addr++ = 0x87654321;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1443
  }
40b441379   Joonsoo Kim   mm/slab: clean up...
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
  
  static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
  				int map, unsigned long caller)
  {
  	if (!is_debug_pagealloc_cache(cachep))
  		return;
  
  	if (caller)
  		store_stackinfo(cachep, objp, caller);
  
  	kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
  }
  
  #else
  static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
  				int map, unsigned long caller) {}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
  #endif
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1461
  static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1462
  {
8c138bc00   Christoph Lameter   slab: Get rid of ...
1463
  	int size = cachep->object_size;
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1464
  	addr = &((char *)addr)[obj_offset(cachep)];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1465
1466
  
  	memset(addr, val, size);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1467
  	*(unsigned char *)(addr + size - 1) = POISON_END;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1468
1469
1470
1471
1472
  }
  
  static void dump_line(char *data, int offset, int limit)
  {
  	int i;
aa83aa40e   Dave Jones   [PATCH] single bi...
1473
1474
  	unsigned char error = 0;
  	int bad_count = 0;
1170532bb   Joe Perches   mm: convert print...
1475
  	pr_err("%03x: ", offset);
aa83aa40e   Dave Jones   [PATCH] single bi...
1476
1477
1478
1479
1480
  	for (i = 0; i < limit; i++) {
  		if (data[offset + i] != POISON_FREE) {
  			error = data[offset + i];
  			bad_count++;
  		}
aa83aa40e   Dave Jones   [PATCH] single bi...
1481
  	}
fdde6abb3   Sebastian Andrzej Siewior   slab: use print_h...
1482
1483
  	print_hex_dump(KERN_CONT, "", 0, 16, 1,
  			&data[offset], limit, 1);
aa83aa40e   Dave Jones   [PATCH] single bi...
1484
1485
1486
1487
  
  	if (bad_count == 1) {
  		error ^= POISON_FREE;
  		if (!(error & (error - 1))) {
1170532bb   Joe Perches   mm: convert print...
1488
1489
  			pr_err("Single bit error detected. Probably bad RAM.
  ");
aa83aa40e   Dave Jones   [PATCH] single bi...
1490
  #ifdef CONFIG_X86
1170532bb   Joe Perches   mm: convert print...
1491
1492
  			pr_err("Run memtest86+ or a similar memory test tool.
  ");
aa83aa40e   Dave Jones   [PATCH] single bi...
1493
  #else
1170532bb   Joe Perches   mm: convert print...
1494
1495
  			pr_err("Run a memory test tool.
  ");
aa83aa40e   Dave Jones   [PATCH] single bi...
1496
1497
1498
  #endif
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1499
1500
1501
1502
  }
  #endif
  
  #if DEBUG
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1503
  static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1504
1505
1506
1507
1508
  {
  	int i, size;
  	char *realobj;
  
  	if (cachep->flags & SLAB_RED_ZONE) {
1170532bb   Joe Perches   mm: convert print...
1509
1510
1511
1512
  		pr_err("Redzone: 0x%llx/0x%llx
  ",
  		       *dbg_redzone1(cachep, objp),
  		       *dbg_redzone2(cachep, objp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1513
  	}
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
1514
1515
1516
  	if (cachep->flags & SLAB_STORE_USER)
  		pr_err("Last user: (%pSR)
  ", *dbg_userword(cachep, objp));
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1517
  	realobj = (char *)objp + obj_offset(cachep);
8c138bc00   Christoph Lameter   slab: Get rid of ...
1518
  	size = cachep->object_size;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1519
  	for (i = 0; i < size && lines; i += 16, lines--) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1520
1521
  		int limit;
  		limit = 16;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1522
1523
  		if (i + limit > size)
  			limit = size - i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1524
1525
1526
  		dump_line(realobj, i, limit);
  	}
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
1527
  static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1528
1529
1530
1531
  {
  	char *realobj;
  	int size, i;
  	int lines = 0;
40b441379   Joonsoo Kim   mm/slab: clean up...
1532
1533
  	if (is_debug_pagealloc_cache(cachep))
  		return;
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1534
  	realobj = (char *)objp + obj_offset(cachep);
8c138bc00   Christoph Lameter   slab: Get rid of ...
1535
  	size = cachep->object_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1536

b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1537
  	for (i = 0; i < size; i++) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1538
  		char exp = POISON_FREE;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1539
  		if (i == size - 1)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1540
1541
1542
1543
1544
1545
  			exp = POISON_END;
  		if (realobj[i] != exp) {
  			int limit;
  			/* Mismatch ! */
  			/* Print header */
  			if (lines == 0) {
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
1546
1547
  				pr_err("Slab corruption (%s): %s start=%px, len=%d
  ",
1170532bb   Joe Perches   mm: convert print...
1548
1549
  				       print_tainted(), cachep->name,
  				       realobj, size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1550
1551
1552
  				print_objinfo(cachep, objp, 0);
  			}
  			/* Hexdump the affected line */
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1553
  			i = (i / 16) * 16;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1554
  			limit = 16;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1555
1556
  			if (i + limit > size)
  				limit = size - i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
  			dump_line(realobj, i, limit);
  			i += 16;
  			lines++;
  			/* Limit to 5 lines */
  			if (lines > 5)
  				break;
  		}
  	}
  	if (lines != 0) {
  		/* Print some data about the neighboring objects, if they
  		 * exist:
  		 */
8456a648c   Joonsoo Kim   slab: use struct ...
1569
  		struct page *page = virt_to_head_page(objp);
8fea4e96a   Pekka Enberg   [PATCH] slab: obj...
1570
  		unsigned int objnr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1571

8456a648c   Joonsoo Kim   slab: use struct ...
1572
  		objnr = obj_to_index(cachep, page, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1573
  		if (objnr) {
8456a648c   Joonsoo Kim   slab: use struct ...
1574
  			objp = index_to_obj(cachep, page, objnr - 1);
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1575
  			realobj = (char *)objp + obj_offset(cachep);
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
1576
1577
  			pr_err("Prev obj: start=%px, len=%d
  ", realobj, size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1578
1579
  			print_objinfo(cachep, objp, 2);
  		}
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1580
  		if (objnr + 1 < cachep->num) {
8456a648c   Joonsoo Kim   slab: use struct ...
1581
  			objp = index_to_obj(cachep, page, objnr + 1);
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
1582
  			realobj = (char *)objp + obj_offset(cachep);
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
1583
1584
  			pr_err("Next obj: start=%px, len=%d
  ", realobj, size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1585
1586
1587
1588
1589
  			print_objinfo(cachep, objp, 2);
  		}
  	}
  }
  #endif
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1590
  #if DEBUG
8456a648c   Joonsoo Kim   slab: use struct ...
1591
1592
  static void slab_destroy_debugcheck(struct kmem_cache *cachep,
  						struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1593
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1594
  	int i;
b03a017be   Joonsoo Kim   mm/slab: introduc...
1595
1596
1597
1598
1599
  
  	if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
  		poison_obj(cachep, page->freelist - obj_offset(cachep),
  			POISON_FREE);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1600
  	for (i = 0; i < cachep->num; i++) {
8456a648c   Joonsoo Kim   slab: use struct ...
1601
  		void *objp = index_to_obj(cachep, page, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1602
1603
  
  		if (cachep->flags & SLAB_POISON) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1604
  			check_poison_obj(cachep, objp);
40b441379   Joonsoo Kim   mm/slab: clean up...
1605
  			slab_kernel_map(cachep, objp, 1, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1606
1607
1608
  		}
  		if (cachep->flags & SLAB_RED_ZONE) {
  			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
1609
  				slab_error(cachep, "start of a freed object was overwritten");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1610
  			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
1611
  				slab_error(cachep, "end of a freed object was overwritten");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1612
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1613
  	}
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1614
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1615
  #else
8456a648c   Joonsoo Kim   slab: use struct ...
1616
1617
  static void slab_destroy_debugcheck(struct kmem_cache *cachep,
  						struct page *page)
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1618
  {
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1619
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1620
  #endif
911851e6e   Randy Dunlap   [PATCH] slab: fix...
1621
1622
1623
  /**
   * slab_destroy - destroy and release all objects in a slab
   * @cachep: cache pointer being destroyed
cb8ee1a3d   Masanari Iida   mm: Fix warning o...
1624
   * @page: page pointer being destroyed
911851e6e   Randy Dunlap   [PATCH] slab: fix...
1625
   *
8a7d9b430   Wang Sheng-Hui   mm/slab.c: fix co...
1626
1627
1628
   * Destroy all the objs in a slab page, and release the mem back to the system.
   * Before calling the slab page must have been unlinked from the cache. The
   * kmem_cache_node ->list_lock is not held/needed.
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1629
   */
8456a648c   Joonsoo Kim   slab: use struct ...
1630
  static void slab_destroy(struct kmem_cache *cachep, struct page *page)
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1631
  {
7e0073552   Joonsoo Kim   slab: replace non...
1632
  	void *freelist;
12dd36fae   Matthew Dobson   [PATCH] slab: ext...
1633

8456a648c   Joonsoo Kim   slab: use struct ...
1634
1635
  	freelist = page->freelist;
  	slab_destroy_debugcheck(cachep, page);
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
1636
  	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
bc4f610d5   Kirill A. Shutemov   slab, slub: use p...
1637
1638
  		call_rcu(&page->rcu_head, kmem_rcu_free);
  	else
0c3aa83e0   Joonsoo Kim   slab: change retu...
1639
  		kmem_freepages(cachep, page);
68126702b   Joonsoo Kim   slab: overloading...
1640
1641
  
  	/*
8456a648c   Joonsoo Kim   slab: use struct ...
1642
  	 * From now on, we don't use freelist
68126702b   Joonsoo Kim   slab: overloading...
1643
1644
1645
  	 * although actual page can be freed in rcu context
  	 */
  	if (OFF_SLAB(cachep))
8456a648c   Joonsoo Kim   slab: use struct ...
1646
  		kmem_cache_free(cachep->freelist_cache, freelist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1647
  }
97654dfa2   Joonsoo Kim   slab: defer slab_...
1648
1649
1650
1651
1652
1653
1654
1655
1656
  static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
  {
  	struct page *page, *n;
  
  	list_for_each_entry_safe(page, n, list, lru) {
  		list_del(&page->lru);
  		slab_destroy(cachep, page);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1657
  /**
a70773ddb   Randy.Dunlap   [PATCH] mm/slab: ...
1658
1659
1660
   * calculate_slab_order - calculate size (page order) of slabs
   * @cachep: pointer to the cache that is being created
   * @size: size of objects to be created in this cache.
a70773ddb   Randy.Dunlap   [PATCH] mm/slab: ...
1661
1662
1663
   * @flags: slab allocation flags
   *
   * Also calculates the number of objects per slab.
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1664
1665
1666
1667
1668
   *
   * This could be made much more intelligent.  For now, try to avoid using
   * high order pages for slabs.  When the gfp() functions are more friendly
   * towards high-order requests, this should be changed.
   */
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1669
  static size_t calculate_slab_order(struct kmem_cache *cachep,
d50112edd   Alexey Dobriyan   slab, slub, slob:...
1670
  				size_t size, slab_flags_t flags)
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1671
1672
  {
  	size_t left_over = 0;
9888e6fa7   Linus Torvalds   slab: clarify and...
1673
  	int gfporder;
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1674

0aa817f07   Christoph Lameter   Slab allocators: ...
1675
  	for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1676
1677
  		unsigned int num;
  		size_t remainder;
70f75067b   Joonsoo Kim   mm/slab: avoid re...
1678
  		num = cache_estimate(gfporder, size, flags, &remainder);
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1679
1680
  		if (!num)
  			continue;
9888e6fa7   Linus Torvalds   slab: clarify and...
1681

f315e3fa1   Joonsoo Kim   slab: restrict th...
1682
1683
1684
  		/* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
  		if (num > SLAB_OBJ_MAX_NUM)
  			break;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1685
  		if (flags & CFLGS_OFF_SLAB) {
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1686
1687
1688
1689
1690
1691
1692
  			struct kmem_cache *freelist_cache;
  			size_t freelist_size;
  
  			freelist_size = num * sizeof(freelist_idx_t);
  			freelist_cache = kmalloc_slab(freelist_size, 0u);
  			if (!freelist_cache)
  				continue;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1693
  			/*
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1694
  			 * Needed to avoid possible looping condition
76b342bdc   Joonsoo Kim   mm/slab: separate...
1695
  			 * in cache_grow_begin()
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1696
  			 */
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1697
1698
  			if (OFF_SLAB(freelist_cache))
  				continue;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1699

3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1700
1701
1702
  			/* check if off slab has enough benefit */
  			if (freelist_cache->size > cachep->size / 2)
  				continue;
b1ab41c49   Ingo Molnar   [PATCH] slab.c: f...
1703
  		}
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1704

9888e6fa7   Linus Torvalds   slab: clarify and...
1705
  		/* Found something acceptable - save it away */
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1706
  		cachep->num = num;
9888e6fa7   Linus Torvalds   slab: clarify and...
1707
  		cachep->gfporder = gfporder;
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1708
1709
1710
  		left_over = remainder;
  
  		/*
f78bb8ad4   Linus Torvalds   slab: fix calcula...
1711
1712
1713
1714
1715
1716
1717
1718
  		 * A VFS-reclaimable slab tends to have most allocations
  		 * as GFP_NOFS and we really don't want to have to be allocating
  		 * higher-order pages when we are unable to shrink dcache.
  		 */
  		if (flags & SLAB_RECLAIM_ACCOUNT)
  			break;
  
  		/*
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1719
1720
1721
  		 * Large number of objects is good, but very large slabs are
  		 * currently bad for the gfp()s.
  		 */
543585cc5   David Rientjes   slab: rename slab...
1722
  		if (gfporder >= slab_max_order)
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1723
  			break;
9888e6fa7   Linus Torvalds   slab: clarify and...
1724
1725
1726
  		/*
  		 * Acceptable internal fragmentation?
  		 */
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1727
  		if (left_over * 8 <= (PAGE_SIZE << gfporder))
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1728
1729
1730
1731
  			break;
  	}
  	return left_over;
  }
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1732
1733
1734
1735
1736
1737
1738
1739
  static struct array_cache __percpu *alloc_kmem_cache_cpus(
  		struct kmem_cache *cachep, int entries, int batchcount)
  {
  	int cpu;
  	size_t size;
  	struct array_cache __percpu *cpu_cache;
  
  	size = sizeof(void *) * entries + sizeof(struct array_cache);
85c9f4b04   Joonsoo Kim   mm/slab: fix unal...
1740
  	cpu_cache = __alloc_percpu(size, sizeof(void *));
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
  
  	if (!cpu_cache)
  		return NULL;
  
  	for_each_possible_cpu(cpu) {
  		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
  				entries, batchcount);
  	}
  
  	return cpu_cache;
  }
bd721ea73   Fabian Frederick   treewide: replace...
1752
  static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1753
  {
97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1754
  	if (slab_state >= FULL)
83b519e8b   Pekka Enberg   slab: setup alloc...
1755
  		return enable_cpucache(cachep, gfp);
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
1756

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1757
1758
1759
  	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
  	if (!cachep->cpu_cache)
  		return 1;
97d066091   Christoph Lameter   mm, sl[aou]b: Com...
1760
  	if (slab_state == DOWN) {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1761
1762
  		/* Creation of first cache (kmem_cache). */
  		set_up_node(kmem_cache, CACHE_CACHE);
2f9baa9fc   Christoph Lameter   slab: Use the new...
1763
  	} else if (slab_state == PARTIAL) {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1764
1765
  		/* For kmem_cache_node */
  		set_up_node(cachep, SIZE_NODE);
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1766
  	} else {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1767
  		int node;
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1768

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1769
1770
1771
1772
1773
  		for_each_online_node(node) {
  			cachep->node[node] = kmalloc_node(
  				sizeof(struct kmem_cache_node), gfp, node);
  			BUG_ON(!cachep->node[node]);
  			kmem_cache_node_init(cachep->node[node]);
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1774
1775
  		}
  	}
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
1776

6a67368c3   Christoph Lameter   slab: Rename node...
1777
  	cachep->node[numa_mem_id()]->next_reap =
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
1778
1779
  			jiffies + REAPTIMEOUT_NODE +
  			((unsigned long)cachep) % REAPTIMEOUT_NODE;
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1780
1781
1782
1783
1784
1785
1786
  
  	cpu_cache_get(cachep)->avail = 0;
  	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
  	cpu_cache_get(cachep)->batchcount = 1;
  	cpu_cache_get(cachep)->touched = 0;
  	cachep->batchcount = 1;
  	cachep->limit = BOOT_CPUCACHE_ENTRIES;
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
1787
  	return 0;
f30cf7d13   Pekka Enberg   [PATCH] slab: ext...
1788
  }
0293d1fdd   Alexey Dobriyan   slab: make kmem_c...
1789
  slab_flags_t kmem_cache_flags(unsigned int object_size,
d50112edd   Alexey Dobriyan   slab, slub, slob:...
1790
  	slab_flags_t flags, const char *name,
12220dea0   Joonsoo Kim   mm/slab: support ...
1791
1792
1793
1794
1795
1796
  	void (*ctor)(void *))
  {
  	return flags;
  }
  
  struct kmem_cache *
f4957d5bd   Alexey Dobriyan   slab: make kmem_c...
1797
  __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
d50112edd   Alexey Dobriyan   slab, slub, slob:...
1798
  		   slab_flags_t flags, void (*ctor)(void *))
12220dea0   Joonsoo Kim   mm/slab: support ...
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
  {
  	struct kmem_cache *cachep;
  
  	cachep = find_mergeable(size, align, flags, name, ctor);
  	if (cachep) {
  		cachep->refcount++;
  
  		/*
  		 * Adjust the object sizes so that we clear
  		 * the complete object on kzalloc.
  		 */
  		cachep->object_size = max_t(int, cachep->object_size, size);
  	}
  	return cachep;
  }
b03a017be   Joonsoo Kim   mm/slab: introduc...
1814
  static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
d50112edd   Alexey Dobriyan   slab, slub, slob:...
1815
  			size_t size, slab_flags_t flags)
b03a017be   Joonsoo Kim   mm/slab: introduc...
1816
1817
1818
1819
  {
  	size_t left;
  
  	cachep->num = 0;
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
1820
  	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
b03a017be   Joonsoo Kim   mm/slab: introduc...
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
  		return false;
  
  	left = calculate_slab_order(cachep, size,
  			flags | CFLGS_OBJFREELIST_SLAB);
  	if (!cachep->num)
  		return false;
  
  	if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)
  		return false;
  
  	cachep->colour = left / cachep->colour_off;
  
  	return true;
  }
158e319bb   Joonsoo Kim   mm/slab: clean up...
1835
  static bool set_off_slab_cache(struct kmem_cache *cachep,
d50112edd   Alexey Dobriyan   slab, slub, slob:...
1836
  			size_t size, slab_flags_t flags)
158e319bb   Joonsoo Kim   mm/slab: clean up...
1837
1838
1839
1840
1841
1842
  {
  	size_t left;
  
  	cachep->num = 0;
  
  	/*
3217fd9bd   Joonsoo Kim   mm/slab: make cri...
1843
1844
  	 * Always use on-slab management when SLAB_NOLEAKTRACE
  	 * to avoid recursive calls into kmemleak.
158e319bb   Joonsoo Kim   mm/slab: clean up...
1845
  	 */
158e319bb   Joonsoo Kim   mm/slab: clean up...
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
  	if (flags & SLAB_NOLEAKTRACE)
  		return false;
  
  	/*
  	 * Size is large, assume best to place the slab management obj
  	 * off-slab (should allow better packing of objs).
  	 */
  	left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB);
  	if (!cachep->num)
  		return false;
  
  	/*
  	 * If the slab has been placed off-slab, and we have enough space then
  	 * move it on-slab. This is at the expense of any extra colouring.
  	 */
  	if (left >= cachep->num * sizeof(freelist_idx_t))
  		return false;
  
  	cachep->colour = left / cachep->colour_off;
  
  	return true;
  }
  
  static bool set_on_slab_cache(struct kmem_cache *cachep,
d50112edd   Alexey Dobriyan   slab, slub, slob:...
1870
  			size_t size, slab_flags_t flags)
158e319bb   Joonsoo Kim   mm/slab: clean up...
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
  {
  	size_t left;
  
  	cachep->num = 0;
  
  	left = calculate_slab_order(cachep, size, flags);
  	if (!cachep->num)
  		return false;
  
  	cachep->colour = left / cachep->colour_off;
  
  	return true;
  }
4d268eba1   Pekka Enberg   [PATCH] slab: ext...
1884
  /**
039363f38   Christoph Lameter   mm, sl[aou]b: Ext...
1885
   * __kmem_cache_create - Create a cache.
a755b76ab   Randy Dunlap   mm: fix slab.c ke...
1886
   * @cachep: cache management descriptor
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1887
   * @flags: SLAB flags
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1888
1889
1890
   *
   * Returns a ptr to the cache on success, NULL on failure.
   * Cannot be called within a int, but can be interrupted.
20c2df83d   Paul Mundt   mm: Remove slab d...
1891
   * The @ctor is run when new pages are allocated by the cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1892
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1893
1894
1895
1896
1897
1898
1899
1900
   * The flags are
   *
   * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
   * to catch references to uninitialised memory.
   *
   * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
   * for buffer overruns.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1901
1902
1903
1904
   * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
   * cacheline.  This can be beneficial if you're counting cycles as closely
   * as davem.
   */
d50112edd   Alexey Dobriyan   slab, slub, slob:...
1905
  int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1906
  {
d4a5fca59   David Rientjes   mm, slab: initial...
1907
  	size_t ralign = BYTES_PER_WORD;
83b519e8b   Pekka Enberg   slab: setup alloc...
1908
  	gfp_t gfp;
278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
1909
  	int err;
be4a7988b   Alexey Dobriyan   kasan: make kasan...
1910
  	unsigned int size = cachep->size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1911

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1912
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1913
1914
1915
1916
1917
1918
1919
  #if FORCED_DEBUG
  	/*
  	 * Enable redzoning and last user accounting, except for caches with
  	 * large objects, if the increased size would increase the object size
  	 * above the next power of two: caches with object sizes just above a
  	 * power of two have a significant amount of internal fragmentation.
  	 */
87a927c71   David Woodhouse   Fix slab redzone ...
1920
1921
  	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
  						2 * sizeof(unsigned long long)))
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
1922
  		flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
1923
  	if (!(flags & SLAB_TYPESAFE_BY_RCU))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1924
1925
  		flags |= SLAB_POISON;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1926
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1927

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1928
1929
  	/*
  	 * Check that size is in terms of words.  This is needed to avoid
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1930
1931
1932
  	 * unaligned accesses for some archs when redzoning is used, and makes
  	 * sure any on-slab bufctl's are also correctly aligned.
  	 */
e07719502   Canjiang Lu   mm/slab.c: replac...
1933
  	size = ALIGN(size, BYTES_PER_WORD);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1934

87a927c71   David Woodhouse   Fix slab redzone ...
1935
1936
1937
1938
  	if (flags & SLAB_RED_ZONE) {
  		ralign = REDZONE_ALIGN;
  		/* If redzoning, ensure that the second redzone is suitably
  		 * aligned, by adjusting the object size accordingly. */
e07719502   Canjiang Lu   mm/slab.c: replac...
1939
  		size = ALIGN(size, REDZONE_ALIGN);
87a927c71   David Woodhouse   Fix slab redzone ...
1940
  	}
ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1941

a44b56d35   Kevin Hilman   [PATCH] slab debu...
1942
  	/* 3) caller mandated alignment */
8a13a4cc8   Christoph Lameter   mm/sl[aou]b: Shri...
1943
1944
  	if (ralign < cachep->align) {
  		ralign = cachep->align;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1945
  	}
3ff84a7f3   Pekka Enberg   Revert "slab: Fix...
1946
1947
  	/* disable debug if necessary */
  	if (ralign > __alignof__(unsigned long long))
a44b56d35   Kevin Hilman   [PATCH] slab debu...
1948
  		flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
1949
  	/*
ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1950
  	 * 4) Store it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1951
  	 */
8a13a4cc8   Christoph Lameter   mm/sl[aou]b: Shri...
1952
  	cachep->align = ralign;
158e319bb   Joonsoo Kim   mm/slab: clean up...
1953
1954
1955
1956
  	cachep->colour_off = cache_line_size();
  	/* Offset must be a multiple of the alignment. */
  	if (cachep->colour_off < cachep->align)
  		cachep->colour_off = cachep->align;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1957

83b519e8b   Pekka Enberg   slab: setup alloc...
1958
1959
1960
1961
  	if (slab_is_available())
  		gfp = GFP_KERNEL;
  	else
  		gfp = GFP_NOWAIT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1962
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1963

ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1964
1965
1966
1967
  	/*
  	 * Both debugging options require word-alignment which is calculated
  	 * into align above.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1968
  	if (flags & SLAB_RED_ZONE) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1969
  		/* add space for red zone words */
3ff84a7f3   Pekka Enberg   Revert "slab: Fix...
1970
1971
  		cachep->obj_offset += sizeof(unsigned long long);
  		size += 2 * sizeof(unsigned long long);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1972
1973
  	}
  	if (flags & SLAB_STORE_USER) {
ca5f9703d   Pekka Enberg   [PATCH] slab: res...
1974
  		/* user store requires one word storage behind the end of
87a927c71   David Woodhouse   Fix slab redzone ...
1975
1976
  		 * the real object. But if the second red zone needs to be
  		 * aligned to 64 bits, we must allow that much space.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1977
  		 */
87a927c71   David Woodhouse   Fix slab redzone ...
1978
1979
1980
1981
  		if (flags & SLAB_RED_ZONE)
  			size += REDZONE_ALIGN;
  		else
  			size += BYTES_PER_WORD;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1982
  	}
832a15d20   Joonsoo Kim   mm/slab: align ca...
1983
  #endif
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
1984
  	kasan_cache_create(cachep, &size, &flags);
832a15d20   Joonsoo Kim   mm/slab: align ca...
1985
1986
1987
1988
1989
1990
1991
1992
1993
  	size = ALIGN(size, cachep->align);
  	/*
  	 * We should restrict the number of objects in a slab to implement
  	 * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
  	 */
  	if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
  		size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
  
  #if DEBUG
03a2d2a3e   Joonsoo Kim   mm/slab: fix unex...
1994
1995
1996
1997
1998
1999
2000
  	/*
  	 * To activate debug pagealloc, off-slab management is necessary
  	 * requirement. In early phase of initialization, small sized slab
  	 * doesn't get initialized so it would not be possible. So, we need
  	 * to check size >= 256. It guarantees that all necessary small
  	 * sized slab is initialized in current slab initialization sequence.
  	 */
40323278b   Joonsoo Kim   mm/slab: use more...
2001
  	if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
f3a3c320d   Joonsoo Kim   mm/slab: do not c...
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
  		size >= 256 && cachep->object_size > cache_line_size()) {
  		if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {
  			size_t tmp_size = ALIGN(size, PAGE_SIZE);
  
  			if (set_off_slab_cache(cachep, tmp_size, flags)) {
  				flags |= CFLGS_OFF_SLAB;
  				cachep->obj_offset += tmp_size - size;
  				size = tmp_size;
  				goto done;
  			}
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2013
2014
  	}
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2015

b03a017be   Joonsoo Kim   mm/slab: introduc...
2016
2017
2018
2019
  	if (set_objfreelist_slab_cache(cachep, size, flags)) {
  		flags |= CFLGS_OBJFREELIST_SLAB;
  		goto done;
  	}
158e319bb   Joonsoo Kim   mm/slab: clean up...
2020
  	if (set_off_slab_cache(cachep, size, flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2021
  		flags |= CFLGS_OFF_SLAB;
158e319bb   Joonsoo Kim   mm/slab: clean up...
2022
  		goto done;
832a15d20   Joonsoo Kim   mm/slab: align ca...
2023
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2024

158e319bb   Joonsoo Kim   mm/slab: clean up...
2025
2026
  	if (set_on_slab_cache(cachep, size, flags))
  		goto done;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2027

158e319bb   Joonsoo Kim   mm/slab: clean up...
2028
  	return -E2BIG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2029

158e319bb   Joonsoo Kim   mm/slab: clean up...
2030
2031
  done:
  	cachep->freelist_size = cachep->num * sizeof(freelist_idx_t);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2032
  	cachep->flags = flags;
a57a49887   Joonsoo Kim   slab: use __GFP_C...
2033
  	cachep->allocflags = __GFP_COMP;
a3187e438   Yang Shi   mm: slab: remove ...
2034
  	if (flags & SLAB_CACHE_DMA)
a618e89f1   Glauber Costa   slab: rename gfpf...
2035
  		cachep->allocflags |= GFP_DMA;
62d342d67   Nicolas Boichat   mm: add support f...
2036
2037
  	if (flags & SLAB_CACHE_DMA32)
  		cachep->allocflags |= GFP_DMA32;
a3ba07444   David Rientjes   mm/slab.c: only s...
2038
2039
  	if (flags & SLAB_RECLAIM_ACCOUNT)
  		cachep->allocflags |= __GFP_RECLAIMABLE;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
2040
  	cachep->size = size;
6a2d7a955   Eric Dumazet   [PATCH] SLAB: use...
2041
  	cachep->reciprocal_buffer_size = reciprocal_value(size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2042

40b441379   Joonsoo Kim   mm/slab: clean up...
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
  #if DEBUG
  	/*
  	 * If we're going to use the generic kernel_map_pages()
  	 * poisoning, then it's going to smash the contents of
  	 * the redzone and userword anyhow, so switch them off.
  	 */
  	if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
  		(cachep->flags & SLAB_POISON) &&
  		is_debug_pagealloc_cache(cachep))
  		cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
  #endif
  
  	if (OFF_SLAB(cachep)) {
158e319bb   Joonsoo Kim   mm/slab: clean up...
2056
2057
  		cachep->freelist_cache =
  			kmalloc_slab(cachep->freelist_size, 0u);
e5ac9c5ae   Ravikiran G Thirumalai   [PATCH] Add some ...
2058
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2059

278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
2060
2061
  	err = setup_cpu_cache(cachep, gfp);
  	if (err) {
52b4b950b   Dmitry Safonov   mm: slab: free km...
2062
  		__kmem_cache_release(cachep);
278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
2063
  		return err;
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
2064
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2065

278b1bb13   Christoph Lameter   mm/sl[aou]b: Move...
2066
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2067
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
  
  #if DEBUG
  static void check_irq_off(void)
  {
  	BUG_ON(!irqs_disabled());
  }
  
  static void check_irq_on(void)
  {
  	BUG_ON(irqs_disabled());
  }
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2079
2080
2081
2082
  static void check_mutex_acquired(void)
  {
  	BUG_ON(!mutex_is_locked(&slab_mutex));
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2083
  static void check_spinlock_acquired(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2084
2085
2086
  {
  #ifdef CONFIG_SMP
  	check_irq_off();
18bf85411   Christoph Lameter   slab: use get_nod...
2087
  	assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2088
2089
  #endif
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2090

343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2091
  static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2092
2093
2094
  {
  #ifdef CONFIG_SMP
  	check_irq_off();
18bf85411   Christoph Lameter   slab: use get_nod...
2095
  	assert_spin_locked(&get_node(cachep, node)->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2096
2097
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2098
2099
2100
  #else
  #define check_irq_off()	do { } while(0)
  #define check_irq_on()	do { } while(0)
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2101
  #define check_mutex_acquired()	do { } while(0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2102
  #define check_spinlock_acquired(x) do { } while(0)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2103
  #define check_spinlock_acquired_node(x, y) do { } while(0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2104
  #endif
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
  static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
  				int node, bool free_all, struct list_head *list)
  {
  	int tofree;
  
  	if (!ac || !ac->avail)
  		return;
  
  	tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
  	if (tofree > ac->avail)
  		tofree = (ac->avail + 1) / 2;
  
  	free_block(cachep, ac->entry, tofree, node, list);
  	ac->avail -= tofree;
  	memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
  }
aab2207cf   Christoph Lameter   [PATCH] slab: mak...
2121

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2122
2123
  static void do_drain(void *arg)
  {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2124
  	struct kmem_cache *cachep = arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2125
  	struct array_cache *ac;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
2126
  	int node = numa_mem_id();
18bf85411   Christoph Lameter   slab: use get_nod...
2127
  	struct kmem_cache_node *n;
97654dfa2   Joonsoo Kim   slab: defer slab_...
2128
  	LIST_HEAD(list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2129
2130
  
  	check_irq_off();
9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2131
  	ac = cpu_cache_get(cachep);
18bf85411   Christoph Lameter   slab: use get_nod...
2132
2133
  	n = get_node(cachep, node);
  	spin_lock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
2134
  	free_block(cachep, ac->entry, ac->avail, node, &list);
18bf85411   Christoph Lameter   slab: use get_nod...
2135
  	spin_unlock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
2136
  	slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2137
2138
  	ac->avail = 0;
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2139
  static void drain_cpu_caches(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2140
  {
ce8eb6c42   Christoph Lameter   slab: Rename list...
2141
  	struct kmem_cache_node *n;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2142
  	int node;
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2143
  	LIST_HEAD(list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2144

15c8b6c1a   Jens Axboe   on_each_cpu(): ki...
2145
  	on_each_cpu(do_drain, cachep, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2146
  	check_irq_on();
18bf85411   Christoph Lameter   slab: use get_nod...
2147
2148
  	for_each_kmem_cache_node(cachep, node, n)
  		if (n->alien)
ce8eb6c42   Christoph Lameter   slab: Rename list...
2149
  			drain_alien_cache(cachep, n->alien);
a4523a8b3   Roland Dreier   [PATCH] slab: Fix...
2150

18726ca8b   Joonsoo Kim   mm/slab: fix the ...
2151
2152
2153
2154
2155
2156
2157
  	for_each_kmem_cache_node(cachep, node, n) {
  		spin_lock_irq(&n->list_lock);
  		drain_array_locked(cachep, n->shared, node, true, &list);
  		spin_unlock_irq(&n->list_lock);
  
  		slabs_destroy(cachep, &list);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2158
  }
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2159
2160
2161
2162
2163
2164
2165
  /*
   * Remove slabs from the list of free slabs.
   * Specify the number of slabs to drain in tofree.
   *
   * Returns the actual number of slabs released.
   */
  static int drain_freelist(struct kmem_cache *cache,
ce8eb6c42   Christoph Lameter   slab: Rename list...
2166
  			struct kmem_cache_node *n, int tofree)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2167
  {
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2168
2169
  	struct list_head *p;
  	int nr_freed;
8456a648c   Joonsoo Kim   slab: use struct ...
2170
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2171

ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2172
  	nr_freed = 0;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2173
  	while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2174

ce8eb6c42   Christoph Lameter   slab: Rename list...
2175
2176
2177
2178
  		spin_lock_irq(&n->list_lock);
  		p = n->slabs_free.prev;
  		if (p == &n->slabs_free) {
  			spin_unlock_irq(&n->list_lock);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2179
2180
  			goto out;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2181

8456a648c   Joonsoo Kim   slab: use struct ...
2182
  		page = list_entry(p, struct page, lru);
8456a648c   Joonsoo Kim   slab: use struct ...
2183
  		list_del(&page->lru);
f728b0a5d   Greg Thelen   mm, slab: faster ...
2184
  		n->free_slabs--;
bf00bd345   David Rientjes   mm, slab: maintai...
2185
  		n->total_slabs--;
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2186
2187
2188
2189
  		/*
  		 * Safe to drop the lock. The slab is no longer linked
  		 * to the cache.
  		 */
ce8eb6c42   Christoph Lameter   slab: Rename list...
2190
2191
  		n->free_objects -= cache->num;
  		spin_unlock_irq(&n->list_lock);
8456a648c   Joonsoo Kim   slab: use struct ...
2192
  		slab_destroy(cache, page);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2193
  		nr_freed++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2194
  	}
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2195
2196
  out:
  	return nr_freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2197
  }
f9e13c0a5   Shakeel Butt   slab, slub: skip ...
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
  bool __kmem_cache_empty(struct kmem_cache *s)
  {
  	int node;
  	struct kmem_cache_node *n;
  
  	for_each_kmem_cache_node(s, node, n)
  		if (!list_empty(&n->slabs_full) ||
  		    !list_empty(&n->slabs_partial))
  			return false;
  	return true;
  }
c9fc58640   Tejun Heo   slab: introduce _...
2209
  int __kmem_cache_shrink(struct kmem_cache *cachep)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2210
  {
18bf85411   Christoph Lameter   slab: use get_nod...
2211
2212
  	int ret = 0;
  	int node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2213
  	struct kmem_cache_node *n;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2214
2215
2216
2217
  
  	drain_cpu_caches(cachep);
  
  	check_irq_on();
18bf85411   Christoph Lameter   slab: use get_nod...
2218
  	for_each_kmem_cache_node(cachep, node, n) {
a5aa63a5f   Joonsoo Kim   mm/slab: drain th...
2219
  		drain_freelist(cachep, n, INT_MAX);
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
2220

ce8eb6c42   Christoph Lameter   slab: Rename list...
2221
2222
  		ret += !list_empty(&n->slabs_full) ||
  			!list_empty(&n->slabs_partial);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2223
2224
2225
  	}
  	return (ret ? 1 : 0);
  }
c9fc58640   Tejun Heo   slab: introduce _...
2226
2227
2228
2229
2230
2231
  #ifdef CONFIG_MEMCG
  void __kmemcg_cache_deactivate(struct kmem_cache *cachep)
  {
  	__kmem_cache_shrink(cachep);
  }
  #endif
945cf2b61   Christoph Lameter   mm/sl[aou]b: Extr...
2232
  int __kmem_cache_shutdown(struct kmem_cache *cachep)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2233
  {
c9fc58640   Tejun Heo   slab: introduce _...
2234
  	return __kmem_cache_shrink(cachep);
52b4b950b   Dmitry Safonov   mm: slab: free km...
2235
2236
2237
2238
  }
  
  void __kmem_cache_release(struct kmem_cache *cachep)
  {
12c3667fb   Christoph Lameter   mm/sl[aou]b: Get ...
2239
  	int i;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2240
  	struct kmem_cache_node *n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2241

c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2242
  	cache_random_seq_destroy(cachep);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
2243
  	free_percpu(cachep->cpu_cache);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2244

ce8eb6c42   Christoph Lameter   slab: Rename list...
2245
  	/* NUMA: free the node structures */
18bf85411   Christoph Lameter   slab: use get_nod...
2246
2247
2248
2249
2250
  	for_each_kmem_cache_node(cachep, i, n) {
  		kfree(n->shared);
  		free_alien_cache(n->alien);
  		kfree(n);
  		cachep->node[i] = NULL;
12c3667fb   Christoph Lameter   mm/sl[aou]b: Get ...
2251
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2252
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2253

e5ac9c5ae   Ravikiran G Thirumalai   [PATCH] Add some ...
2254
2255
  /*
   * Get the memory for a slab management obj.
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
   *
   * For a slab cache when the slab descriptor is off-slab, the
   * slab descriptor can't come from the same cache which is being created,
   * Because if it is the case, that means we defer the creation of
   * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
   * And we eventually call down to __kmem_cache_create(), which
   * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
   * This is a "chicken-and-egg" problem.
   *
   * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
   * which are all initialized during kmem_cache_init().
e5ac9c5ae   Ravikiran G Thirumalai   [PATCH] Add some ...
2267
   */
7e0073552   Joonsoo Kim   slab: replace non...
2268
  static void *alloc_slabmgmt(struct kmem_cache *cachep,
0c3aa83e0   Joonsoo Kim   slab: change retu...
2269
2270
  				   struct page *page, int colour_off,
  				   gfp_t local_flags, int nodeid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2271
  {
7e0073552   Joonsoo Kim   slab: replace non...
2272
  	void *freelist;
0c3aa83e0   Joonsoo Kim   slab: change retu...
2273
  	void *addr = page_address(page);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2274

2e6b36021   Joonsoo Kim   mm/slab: put the ...
2275
2276
  	page->s_mem = addr + colour_off;
  	page->active = 0;
b03a017be   Joonsoo Kim   mm/slab: introduc...
2277
2278
2279
  	if (OBJFREELIST_SLAB(cachep))
  		freelist = NULL;
  	else if (OFF_SLAB(cachep)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2280
  		/* Slab management obj is off-slab. */
8456a648c   Joonsoo Kim   slab: use struct ...
2281
  		freelist = kmem_cache_alloc_node(cachep->freelist_cache,
8759ec50a   Pekka Enberg   slab: remove GFP_...
2282
  					      local_flags, nodeid);
8456a648c   Joonsoo Kim   slab: use struct ...
2283
  		if (!freelist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2284
2285
  			return NULL;
  	} else {
2e6b36021   Joonsoo Kim   mm/slab: put the ...
2286
2287
2288
  		/* We will use last bytes at the slab for freelist */
  		freelist = addr + (PAGE_SIZE << cachep->gfporder) -
  				cachep->freelist_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2289
  	}
2e6b36021   Joonsoo Kim   mm/slab: put the ...
2290

8456a648c   Joonsoo Kim   slab: use struct ...
2291
  	return freelist;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2292
  }
7cc68973c   Joonsoo Kim   slab: fix the typ...
2293
  static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2294
  {
a41adfaa2   Joonsoo Kim   slab: introduce b...
2295
  	return ((freelist_idx_t *)page->freelist)[idx];
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2296
2297
2298
  }
  
  static inline void set_free_obj(struct page *page,
7cc68973c   Joonsoo Kim   slab: fix the typ...
2299
  					unsigned int idx, freelist_idx_t val)
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2300
  {
a41adfaa2   Joonsoo Kim   slab: introduce b...
2301
  	((freelist_idx_t *)(page->freelist))[idx] = val;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2302
  }
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2303
  static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2304
  {
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2305
  #if DEBUG
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2306
2307
2308
  	int i;
  
  	for (i = 0; i < cachep->num; i++) {
8456a648c   Joonsoo Kim   slab: use struct ...
2309
  		void *objp = index_to_obj(cachep, page, i);
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2310

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2311
2312
2313
2314
2315
2316
2317
2318
  		if (cachep->flags & SLAB_STORE_USER)
  			*dbg_userword(cachep, objp) = NULL;
  
  		if (cachep->flags & SLAB_RED_ZONE) {
  			*dbg_redzone1(cachep, objp) = RED_INACTIVE;
  			*dbg_redzone2(cachep, objp) = RED_INACTIVE;
  		}
  		/*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2319
2320
2321
  		 * Constructors are not allowed to allocate memory from the same
  		 * cache which they are a constructor for.  Otherwise, deadlock.
  		 * They must also be threaded.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2322
  		 */
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2323
2324
2325
  		if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
  			kasan_unpoison_object_data(cachep,
  						   objp + obj_offset(cachep));
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
2326
  			cachep->ctor(objp + obj_offset(cachep));
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2327
2328
2329
  			kasan_poison_object_data(
  				cachep, objp + obj_offset(cachep));
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2330
2331
2332
  
  		if (cachep->flags & SLAB_RED_ZONE) {
  			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
2333
  				slab_error(cachep, "constructor overwrote the end of an object");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2334
  			if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
756a025f0   Joe Perches   mm: coalesce spli...
2335
  				slab_error(cachep, "constructor overwrote the start of an object");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2336
  		}
40b441379   Joonsoo Kim   mm/slab: clean up...
2337
2338
2339
2340
2341
  		/* need to poison the objs? */
  		if (cachep->flags & SLAB_POISON) {
  			poison_obj(cachep, objp, POISON_FREE);
  			slab_kernel_map(cachep, objp, 0, 0);
  		}
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2342
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2343
  #endif
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2344
  }
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2345
2346
2347
2348
2349
  #ifdef CONFIG_SLAB_FREELIST_RANDOM
  /* Hold information during a freelist initialization */
  union freelist_init_state {
  	struct {
  		unsigned int pos;
7c00fce98   Thomas Garnier   mm: reorganize SL...
2350
  		unsigned int *list;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2351
  		unsigned int count;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
  	};
  	struct rnd_state rnd_state;
  };
  
  /*
   * Initialize the state based on the randomization methode available.
   * return true if the pre-computed list is available, false otherwize.
   */
  static bool freelist_state_initialize(union freelist_init_state *state,
  				struct kmem_cache *cachep,
  				unsigned int count)
  {
  	bool ret;
  	unsigned int rand;
  
  	/* Use best entropy available to define a random shift */
7c00fce98   Thomas Garnier   mm: reorganize SL...
2368
  	rand = get_random_int();
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2369
2370
2371
2372
2373
2374
2375
2376
  
  	/* Use a random state if the pre-computed list is not available */
  	if (!cachep->random_seq) {
  		prandom_seed_state(&state->rnd_state, rand);
  		ret = false;
  	} else {
  		state->list = cachep->random_seq;
  		state->count = count;
c4e490cf1   John Sperbeck   mm/slab.c: fix SL...
2377
  		state->pos = rand % count;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2378
2379
2380
2381
2382
2383
2384
2385
  		ret = true;
  	}
  	return ret;
  }
  
  /* Get the next entry on the list and randomize it using a random shift */
  static freelist_idx_t next_random_slot(union freelist_init_state *state)
  {
c4e490cf1   John Sperbeck   mm/slab.c: fix SL...
2386
2387
2388
  	if (state->pos >= state->count)
  		state->pos = 0;
  	return state->list[state->pos++];
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2389
  }
7c00fce98   Thomas Garnier   mm: reorganize SL...
2390
2391
2392
2393
2394
2395
  /* Swap two freelist entries */
  static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
  {
  	swap(((freelist_idx_t *)page->freelist)[a],
  		((freelist_idx_t *)page->freelist)[b]);
  }
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2396
2397
2398
2399
2400
2401
  /*
   * Shuffle the freelist initialization state based on pre-computed lists.
   * return true if the list was successfully shuffled, false otherwise.
   */
  static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
  {
7c00fce98   Thomas Garnier   mm: reorganize SL...
2402
  	unsigned int objfreelist = 0, i, rand, count = cachep->num;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
  	union freelist_init_state state;
  	bool precomputed;
  
  	if (count < 2)
  		return false;
  
  	precomputed = freelist_state_initialize(&state, cachep, count);
  
  	/* Take a random entry as the objfreelist */
  	if (OBJFREELIST_SLAB(cachep)) {
  		if (!precomputed)
  			objfreelist = count - 1;
  		else
  			objfreelist = next_random_slot(&state);
  		page->freelist = index_to_obj(cachep, page, objfreelist) +
  						obj_offset(cachep);
  		count--;
  	}
  
  	/*
  	 * On early boot, generate the list dynamically.
  	 * Later use a pre-computed list for speed.
  	 */
  	if (!precomputed) {
7c00fce98   Thomas Garnier   mm: reorganize SL...
2427
2428
2429
2430
2431
2432
2433
2434
2435
  		for (i = 0; i < count; i++)
  			set_free_obj(page, i, i);
  
  		/* Fisher-Yates shuffle */
  		for (i = count - 1; i > 0; i--) {
  			rand = prandom_u32_state(&state.rnd_state);
  			rand %= (i + 1);
  			swap_free_obj(page, i, rand);
  		}
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
  	} else {
  		for (i = 0; i < count; i++)
  			set_free_obj(page, i, next_random_slot(&state));
  	}
  
  	if (OBJFREELIST_SLAB(cachep))
  		set_free_obj(page, cachep->num - 1, objfreelist);
  
  	return true;
  }
  #else
  static inline bool shuffle_freelist(struct kmem_cache *cachep,
  				struct page *page)
  {
  	return false;
  }
  #endif /* CONFIG_SLAB_FREELIST_RANDOM */
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2453
2454
2455
2456
  static void cache_init_objs(struct kmem_cache *cachep,
  			    struct page *page)
  {
  	int i;
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2457
  	void *objp;
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2458
  	bool shuffled;
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2459
2460
  
  	cache_init_objs_debug(cachep, page);
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2461
2462
2463
2464
  	/* Try to randomize the freelist if enabled */
  	shuffled = shuffle_freelist(cachep, page);
  
  	if (!shuffled && OBJFREELIST_SLAB(cachep)) {
b03a017be   Joonsoo Kim   mm/slab: introduc...
2465
2466
2467
  		page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
  						obj_offset(cachep);
  	}
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2468
  	for (i = 0; i < cachep->num; i++) {
b3cbd9bf7   Andrey Ryabinin   mm/kasan: get rid...
2469
2470
  		objp = index_to_obj(cachep, page, i);
  		kasan_init_slab_obj(cachep, objp);
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2471
  		/* constructor could break poison info */
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2472
  		if (DEBUG == 0 && cachep->ctor) {
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2473
2474
2475
2476
  			kasan_unpoison_object_data(cachep, objp);
  			cachep->ctor(objp);
  			kasan_poison_object_data(cachep, objp);
  		}
10b2e9e8e   Joonsoo Kim   mm/slab: factor o...
2477

c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
2478
2479
  		if (!shuffled)
  			set_free_obj(page, i, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2480
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2481
  }
260b61dd4   Joonsoo Kim   mm/slab: remove t...
2482
  static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2483
  {
b1cb0982b   Joonsoo Kim   slab: change the ...
2484
  	void *objp;
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2485

e5c58dfdc   Joonsoo Kim   slab: introduce h...
2486
  	objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
8456a648c   Joonsoo Kim   slab: use struct ...
2487
  	page->active++;
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2488

d31676dfd   Joonsoo Kim   mm/slab: alternat...
2489
2490
2491
2492
  #if DEBUG
  	if (cachep->flags & SLAB_STORE_USER)
  		set_store_user_dirty(cachep);
  #endif
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2493
2494
  	return objp;
  }
260b61dd4   Joonsoo Kim   mm/slab: remove t...
2495
2496
  static void slab_put_obj(struct kmem_cache *cachep,
  			struct page *page, void *objp)
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2497
  {
8456a648c   Joonsoo Kim   slab: use struct ...
2498
  	unsigned int objnr = obj_to_index(cachep, page, objp);
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2499
  #if DEBUG
16025177e   Joonsoo Kim   slab: remove kmem...
2500
  	unsigned int i;
b1cb0982b   Joonsoo Kim   slab: change the ...
2501

b1cb0982b   Joonsoo Kim   slab: change the ...
2502
  	/* Verify double free bug */
8456a648c   Joonsoo Kim   slab: use struct ...
2503
  	for (i = page->active; i < cachep->num; i++) {
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2504
  		if (get_free_obj(page, i) == objnr) {
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
2505
2506
  			pr_err("slab: double free detected in cache '%s', objp %px
  ",
756a025f0   Joe Perches   mm: coalesce spli...
2507
  			       cachep->name, objp);
b1cb0982b   Joonsoo Kim   slab: change the ...
2508
2509
  			BUG();
  		}
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2510
2511
  	}
  #endif
8456a648c   Joonsoo Kim   slab: use struct ...
2512
  	page->active--;
b03a017be   Joonsoo Kim   mm/slab: introduc...
2513
2514
  	if (!page->freelist)
  		page->freelist = objp + obj_offset(cachep);
e5c58dfdc   Joonsoo Kim   slab: introduce h...
2515
  	set_free_obj(page, page->active, objnr);
78d382d77   Matthew Dobson   [PATCH] slab: ext...
2516
  }
4776874ff   Pekka Enberg   [PATCH] slab: pag...
2517
2518
2519
  /*
   * Map pages beginning at addr to the given cache and slab. This is required
   * for the slab allocator to be able to lookup the cache and slab of a
ccd35fb9f   Nick Piggin   kernel: kmem_ptr_...
2520
   * virtual address for kfree, ksize, and slab debugging.
4776874ff   Pekka Enberg   [PATCH] slab: pag...
2521
   */
8456a648c   Joonsoo Kim   slab: use struct ...
2522
  static void slab_map_pages(struct kmem_cache *cache, struct page *page,
7e0073552   Joonsoo Kim   slab: replace non...
2523
  			   void *freelist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2524
  {
a57a49887   Joonsoo Kim   slab: use __GFP_C...
2525
  	page->slab_cache = cache;
8456a648c   Joonsoo Kim   slab: use struct ...
2526
  	page->freelist = freelist;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2527
2528
2529
2530
2531
2532
  }
  
  /*
   * Grow (by 1) the number of slabs within a cache.  This is called by
   * kmem_cache_alloc() when there are no active objs left in a cache.
   */
76b342bdc   Joonsoo Kim   mm/slab: separate...
2533
2534
  static struct page *cache_grow_begin(struct kmem_cache *cachep,
  				gfp_t flags, int nodeid)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2535
  {
7e0073552   Joonsoo Kim   slab: replace non...
2536
  	void *freelist;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2537
2538
  	size_t offset;
  	gfp_t local_flags;
511e3a058   Joonsoo Kim   mm/slab: make cac...
2539
  	int page_node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2540
  	struct kmem_cache_node *n;
511e3a058   Joonsoo Kim   mm/slab: make cac...
2541
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2542

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2543
2544
2545
  	/*
  	 * Be lazy and only check for valid flags here,  keeping it out of the
  	 * critical path in kmem_cache_alloc().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2546
  	 */
c871ac4e9   Andrew Morton   slab: improve che...
2547
  	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
bacdcb346   Michal Hocko   slab: make GFP_SL...
2548
  		gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
72baeef0c   Michal Hocko   slab: do not pani...
2549
2550
2551
2552
2553
  		flags &= ~GFP_SLAB_BUG_MASK;
  		pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!
  ",
  				invalid_mask, &invalid_mask, flags, &flags);
  		dump_stack();
c871ac4e9   Andrew Morton   slab: improve che...
2554
  	}
128227e7f   Matthew Wilcox   slab: __GFP_ZERO ...
2555
  	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
6cb062296   Christoph Lameter   Categorize GFP flags
2556
  	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2557

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2558
  	check_irq_off();
d0164adc8   Mel Gorman   mm, page_alloc: d...
2559
  	if (gfpflags_allow_blocking(local_flags))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2560
2561
2562
  		local_irq_enable();
  
  	/*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2563
2564
  	 * Get mem for the objs.  Attempt to allocate a physical page from
  	 * 'nodeid'.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2565
  	 */
511e3a058   Joonsoo Kim   mm/slab: make cac...
2566
  	page = kmem_getpages(cachep, local_flags, nodeid);
0c3aa83e0   Joonsoo Kim   slab: change retu...
2567
  	if (!page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2568
  		goto failed;
511e3a058   Joonsoo Kim   mm/slab: make cac...
2569
2570
  	page_node = page_to_nid(page);
  	n = get_node(cachep, page_node);
03d1d43a1   Joonsoo Kim   mm/slab: racy acc...
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
  
  	/* Get colour for the slab, and cal the next value. */
  	n->colour_next++;
  	if (n->colour_next >= cachep->colour)
  		n->colour_next = 0;
  
  	offset = n->colour_next;
  	if (offset >= cachep->colour)
  		offset = 0;
  
  	offset *= cachep->colour_off;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2582
  	/* Get slab management. */
8456a648c   Joonsoo Kim   slab: use struct ...
2583
  	freelist = alloc_slabmgmt(cachep, page, offset,
511e3a058   Joonsoo Kim   mm/slab: make cac...
2584
  			local_flags & ~GFP_CONSTRAINT_MASK, page_node);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2585
  	if (OFF_SLAB(cachep) && !freelist)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2586
  		goto opps1;
8456a648c   Joonsoo Kim   slab: use struct ...
2587
  	slab_map_pages(cachep, page, freelist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2588

7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
2589
  	kasan_poison_slab(page);
8456a648c   Joonsoo Kim   slab: use struct ...
2590
  	cache_init_objs(cachep, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2591

d0164adc8   Mel Gorman   mm, page_alloc: d...
2592
  	if (gfpflags_allow_blocking(local_flags))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2593
  		local_irq_disable();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2594

76b342bdc   Joonsoo Kim   mm/slab: separate...
2595
  	return page;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2596
  opps1:
0c3aa83e0   Joonsoo Kim   slab: change retu...
2597
  	kmem_freepages(cachep, page);
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2598
  failed:
d0164adc8   Mel Gorman   mm, page_alloc: d...
2599
  	if (gfpflags_allow_blocking(local_flags))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2600
  		local_irq_disable();
76b342bdc   Joonsoo Kim   mm/slab: separate...
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
  	return NULL;
  }
  
  static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
  {
  	struct kmem_cache_node *n;
  	void *list = NULL;
  
  	check_irq_off();
  
  	if (!page)
  		return;
  
  	INIT_LIST_HEAD(&page->lru);
  	n = get_node(cachep, page_to_nid(page));
  
  	spin_lock(&n->list_lock);
bf00bd345   David Rientjes   mm, slab: maintai...
2618
  	n->total_slabs++;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2619
  	if (!page->active) {
76b342bdc   Joonsoo Kim   mm/slab: separate...
2620
  		list_add_tail(&page->lru, &(n->slabs_free));
f728b0a5d   Greg Thelen   mm, slab: faster ...
2621
  		n->free_slabs++;
bf00bd345   David Rientjes   mm, slab: maintai...
2622
  	} else
76b342bdc   Joonsoo Kim   mm/slab: separate...
2623
  		fixup_slab_list(cachep, n, page, &list);
07a63c41f   Aruna Ramakrishna   mm/slab: improve ...
2624

76b342bdc   Joonsoo Kim   mm/slab: separate...
2625
2626
2627
2628
2629
  	STATS_INC_GROWN(cachep);
  	n->free_objects += cachep->num - page->active;
  	spin_unlock(&n->list_lock);
  
  	fixup_objfreelist_debug(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2630
2631
2632
2633
2634
2635
2636
2637
  }
  
  #if DEBUG
  
  /*
   * Perform extra freeing checks:
   * - detect bad pointers.
   * - POISON/RED_ZONE checking
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2638
2639
2640
   */
  static void kfree_debugcheck(const void *objp)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2641
  	if (!virt_addr_valid(objp)) {
1170532bb   Joe Perches   mm: convert print...
2642
2643
  		pr_err("kfree_debugcheck: out of range ptr %lxh
  ",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2644
2645
  		       (unsigned long)objp);
  		BUG();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2646
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2647
  }
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2648
2649
  static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
  {
b46b8f19c   David Woodhouse   Increase slab red...
2650
  	unsigned long long redzone1, redzone2;
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
  
  	redzone1 = *dbg_redzone1(cache, obj);
  	redzone2 = *dbg_redzone2(cache, obj);
  
  	/*
  	 * Redzone is ok.
  	 */
  	if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
  		return;
  
  	if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
  		slab_error(cache, "double free detected");
  	else
  		slab_error(cache, "memory outside object was overwritten");
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
2665
2666
  	pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx
  ",
1170532bb   Joe Perches   mm: convert print...
2667
  	       obj, redzone1, redzone2);
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2668
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2669
  static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2670
  				   unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2671
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2672
  	unsigned int objnr;
8456a648c   Joonsoo Kim   slab: use struct ...
2673
  	struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2674

80cbd911c   Matthew Wilcox   Fix kmem_cache_fr...
2675
  	BUG_ON(virt_to_cache(objp) != cachep);
3dafccf22   Manfred Spraul   [PATCH] slab: dis...
2676
  	objp -= obj_offset(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2677
  	kfree_debugcheck(objp);
b49af68ff   Christoph Lameter   Add virt_to_head_...
2678
  	page = virt_to_head_page(objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2679

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2680
  	if (cachep->flags & SLAB_RED_ZONE) {
58ce1fd58   Pekka Enberg   [PATCH] slab: red...
2681
  		verify_redzone_free(cachep, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2682
2683
2684
  		*dbg_redzone1(cachep, objp) = RED_INACTIVE;
  		*dbg_redzone2(cachep, objp) = RED_INACTIVE;
  	}
d31676dfd   Joonsoo Kim   mm/slab: alternat...
2685
2686
  	if (cachep->flags & SLAB_STORE_USER) {
  		set_store_user_dirty(cachep);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2687
  		*dbg_userword(cachep, objp) = (void *)caller;
d31676dfd   Joonsoo Kim   mm/slab: alternat...
2688
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2689

8456a648c   Joonsoo Kim   slab: use struct ...
2690
  	objnr = obj_to_index(cachep, page, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2691
2692
  
  	BUG_ON(objnr >= cachep->num);
8456a648c   Joonsoo Kim   slab: use struct ...
2693
  	BUG_ON(objp != index_to_obj(cachep, page, objnr));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2694

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2695
  	if (cachep->flags & SLAB_POISON) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2696
  		poison_obj(cachep, objp, POISON_FREE);
40b441379   Joonsoo Kim   mm/slab: clean up...
2697
  		slab_kernel_map(cachep, objp, 0, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2698
2699
2700
  	}
  	return objp;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2701
2702
2703
  #else
  #define kfree_debugcheck(x) do { } while(0)
  #define cache_free_debugcheck(x,objp,z) (objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2704
  #endif
b03a017be   Joonsoo Kim   mm/slab: introduc...
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
  static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
  						void **list)
  {
  #if DEBUG
  	void *next = *list;
  	void *objp;
  
  	while (next) {
  		objp = next - obj_offset(cachep);
  		next = *(void **)next;
  		poison_obj(cachep, objp, POISON_FREE);
  	}
  #endif
  }
d8410234d   Joonsoo Kim   mm/slab: factor o...
2719
  static inline void fixup_slab_list(struct kmem_cache *cachep,
b03a017be   Joonsoo Kim   mm/slab: introduc...
2720
2721
  				struct kmem_cache_node *n, struct page *page,
  				void **list)
d8410234d   Joonsoo Kim   mm/slab: factor o...
2722
2723
2724
  {
  	/* move slabp to correct slabp list: */
  	list_del(&page->lru);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2725
  	if (page->active == cachep->num) {
d8410234d   Joonsoo Kim   mm/slab: factor o...
2726
  		list_add(&page->lru, &n->slabs_full);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
  		if (OBJFREELIST_SLAB(cachep)) {
  #if DEBUG
  			/* Poisoning will be done without holding the lock */
  			if (cachep->flags & SLAB_POISON) {
  				void **objp = page->freelist;
  
  				*objp = *list;
  				*list = objp;
  			}
  #endif
  			page->freelist = NULL;
  		}
  	} else
d8410234d   Joonsoo Kim   mm/slab: factor o...
2740
2741
  		list_add(&page->lru, &n->slabs_partial);
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2742
2743
  /* Try to find non-pfmemalloc slab if needed */
  static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
bf00bd345   David Rientjes   mm, slab: maintai...
2744
  					struct page *page, bool pfmemalloc)
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
  {
  	if (!page)
  		return NULL;
  
  	if (pfmemalloc)
  		return page;
  
  	if (!PageSlabPfmemalloc(page))
  		return page;
  
  	/* No need to keep pfmemalloc slab if we have enough free objects */
  	if (n->free_objects > n->free_limit) {
  		ClearPageSlabPfmemalloc(page);
  		return page;
  	}
  
  	/* Move pfmemalloc slab to the end of list to speed up next search */
  	list_del(&page->lru);
bf00bd345   David Rientjes   mm, slab: maintai...
2763
  	if (!page->active) {
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2764
  		list_add_tail(&page->lru, &n->slabs_free);
bf00bd345   David Rientjes   mm, slab: maintai...
2765
  		n->free_slabs++;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2766
  	} else
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2767
2768
2769
2770
2771
2772
  		list_add_tail(&page->lru, &n->slabs_partial);
  
  	list_for_each_entry(page, &n->slabs_partial, lru) {
  		if (!PageSlabPfmemalloc(page))
  			return page;
  	}
f728b0a5d   Greg Thelen   mm, slab: faster ...
2773
  	n->free_touched = 1;
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2774
  	list_for_each_entry(page, &n->slabs_free, lru) {
f728b0a5d   Greg Thelen   mm, slab: faster ...
2775
  		if (!PageSlabPfmemalloc(page)) {
bf00bd345   David Rientjes   mm, slab: maintai...
2776
  			n->free_slabs--;
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2777
  			return page;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2778
  		}
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2779
2780
2781
2782
2783
2784
  	}
  
  	return NULL;
  }
  
  static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2785
2786
  {
  	struct page *page;
f728b0a5d   Greg Thelen   mm, slab: faster ...
2787
  	assert_spin_locked(&n->list_lock);
bf00bd345   David Rientjes   mm, slab: maintai...
2788
  	page = list_first_entry_or_null(&n->slabs_partial, struct page, lru);
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2789
2790
  	if (!page) {
  		n->free_touched = 1;
bf00bd345   David Rientjes   mm, slab: maintai...
2791
2792
  		page = list_first_entry_or_null(&n->slabs_free, struct page,
  						lru);
f728b0a5d   Greg Thelen   mm, slab: faster ...
2793
  		if (page)
bf00bd345   David Rientjes   mm, slab: maintai...
2794
  			n->free_slabs--;
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2795
  	}
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2796
  	if (sk_memalloc_socks())
bf00bd345   David Rientjes   mm, slab: maintai...
2797
  		page = get_valid_first_slab(n, page, pfmemalloc);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2798

7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2799
2800
  	return page;
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
  static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
  				struct kmem_cache_node *n, gfp_t flags)
  {
  	struct page *page;
  	void *obj;
  	void *list = NULL;
  
  	if (!gfp_pfmemalloc_allowed(flags))
  		return NULL;
  
  	spin_lock(&n->list_lock);
  	page = get_first_slab(n, true);
  	if (!page) {
  		spin_unlock(&n->list_lock);
  		return NULL;
  	}
  
  	obj = slab_get_obj(cachep, page);
  	n->free_objects--;
  
  	fixup_slab_list(cachep, n, page, &list);
  
  	spin_unlock(&n->list_lock);
  	fixup_objfreelist_debug(cachep, &list);
  
  	return obj;
  }
213b46958   Joonsoo Kim   mm/slab: refill c...
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
  /*
   * Slab list should be fixed up by fixup_slab_list() for existing slab
   * or cache_grow_end() for new slab
   */
  static __always_inline int alloc_block(struct kmem_cache *cachep,
  		struct array_cache *ac, struct page *page, int batchcount)
  {
  	/*
  	 * There must be at least one object available for
  	 * allocation.
  	 */
  	BUG_ON(page->active >= cachep->num);
  
  	while (page->active < cachep->num && batchcount--) {
  		STATS_INC_ALLOCED(cachep);
  		STATS_INC_ACTIVE(cachep);
  		STATS_SET_HIGH(cachep);
  
  		ac->entry[ac->avail++] = slab_get_obj(cachep, page);
  	}
  
  	return batchcount;
  }
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2851
  static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2852
2853
  {
  	int batchcount;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2854
  	struct kmem_cache_node *n;
801faf0db   Joonsoo Kim   mm/slab: lockless...
2855
  	struct array_cache *ac, *shared;
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
2856
  	int node;
b03a017be   Joonsoo Kim   mm/slab: introduc...
2857
  	void *list = NULL;
76b342bdc   Joonsoo Kim   mm/slab: separate...
2858
  	struct page *page;
1ca4cb241   Pekka Enberg   [PATCH] slab: red...
2859

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2860
  	check_irq_off();
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
2861
  	node = numa_mem_id();
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2862

9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2863
  	ac = cpu_cache_get(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2864
2865
  	batchcount = ac->batchcount;
  	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2866
2867
2868
2869
  		/*
  		 * If there was little recent activity on this cache, then
  		 * perform only a partial refill.  Otherwise we could generate
  		 * refill bouncing.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2870
2871
2872
  		 */
  		batchcount = BATCHREFILL_LIMIT;
  	}
18bf85411   Christoph Lameter   slab: use get_nod...
2873
  	n = get_node(cachep, node);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2874

ce8eb6c42   Christoph Lameter   slab: Rename list...
2875
  	BUG_ON(ac->avail > 0 || !n);
801faf0db   Joonsoo Kim   mm/slab: lockless...
2876
2877
2878
  	shared = READ_ONCE(n->shared);
  	if (!n->free_objects && (!shared || !shared->avail))
  		goto direct_grow;
ce8eb6c42   Christoph Lameter   slab: Rename list...
2879
  	spin_lock(&n->list_lock);
801faf0db   Joonsoo Kim   mm/slab: lockless...
2880
  	shared = READ_ONCE(n->shared);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2881

3ded175a4   Christoph Lameter   [PATCH] slab: add...
2882
  	/* See if we can refill from the shared array */
801faf0db   Joonsoo Kim   mm/slab: lockless...
2883
2884
  	if (shared && transfer_objects(ac, shared, batchcount)) {
  		shared->touched = 1;
3ded175a4   Christoph Lameter   [PATCH] slab: add...
2885
  		goto alloc_done;
44b57f1cc   Nick Piggin   slab: fix regress...
2886
  	}
3ded175a4   Christoph Lameter   [PATCH] slab: add...
2887

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2888
  	while (batchcount > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2889
  		/* Get slab alloc is to come from. */
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2890
  		page = get_first_slab(n, false);
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
2891
2892
  		if (!page)
  			goto must_grow;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2893

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2894
  		check_spinlock_acquired(cachep);
714b8171a   Pekka Enberg   slab: ensure cach...
2895

213b46958   Joonsoo Kim   mm/slab: refill c...
2896
  		batchcount = alloc_block(cachep, ac, page, batchcount);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2897
  		fixup_slab_list(cachep, n, page, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2898
  	}
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2899
  must_grow:
ce8eb6c42   Christoph Lameter   slab: Rename list...
2900
  	n->free_objects -= ac->avail;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2901
  alloc_done:
ce8eb6c42   Christoph Lameter   slab: Rename list...
2902
  	spin_unlock(&n->list_lock);
b03a017be   Joonsoo Kim   mm/slab: introduc...
2903
  	fixup_objfreelist_debug(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2904

801faf0db   Joonsoo Kim   mm/slab: lockless...
2905
  direct_grow:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2906
  	if (unlikely(!ac->avail)) {
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2907
2908
2909
2910
2911
2912
2913
  		/* Check if we can use obj in pfmemalloc slab */
  		if (sk_memalloc_socks()) {
  			void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
  
  			if (obj)
  				return obj;
  		}
76b342bdc   Joonsoo Kim   mm/slab: separate...
2914
  		page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
2915

76b342bdc   Joonsoo Kim   mm/slab: separate...
2916
2917
2918
2919
  		/*
  		 * cache_grow_begin() can reenable interrupts,
  		 * then ac could change.
  		 */
9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2920
  		ac = cpu_cache_get(cachep);
213b46958   Joonsoo Kim   mm/slab: refill c...
2921
2922
2923
  		if (!ac->avail && page)
  			alloc_block(cachep, ac, page, batchcount);
  		cache_grow_end(cachep, page);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2924

213b46958   Joonsoo Kim   mm/slab: refill c...
2925
  		if (!ac->avail)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2926
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2927
2928
  	}
  	ac->touched = 1;
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2929

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2930
  	return ac->entry[--ac->avail];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2931
  }
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2932
2933
  static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
  						gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2934
  {
d0164adc8   Mel Gorman   mm, page_alloc: d...
2935
  	might_sleep_if(gfpflags_allow_blocking(flags));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2936
2937
2938
  }
  
  #if DEBUG
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2939
  static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2940
  				gfp_t flags, void *objp, unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2941
  {
128227e7f   Matthew Wilcox   slab: __GFP_ZERO ...
2942
  	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2943
  	if (!objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2944
  		return objp;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2945
  	if (cachep->flags & SLAB_POISON) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2946
  		check_poison_obj(cachep, objp);
40b441379   Joonsoo Kim   mm/slab: clean up...
2947
  		slab_kernel_map(cachep, objp, 1, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2948
2949
2950
  		poison_obj(cachep, objp, POISON_INUSE);
  	}
  	if (cachep->flags & SLAB_STORE_USER)
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
2951
  		*dbg_userword(cachep, objp) = (void *)caller;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2952
2953
  
  	if (cachep->flags & SLAB_RED_ZONE) {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
2954
2955
  		if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
  				*dbg_redzone2(cachep, objp) != RED_INACTIVE) {
756a025f0   Joe Perches   mm: coalesce spli...
2956
  			slab_error(cachep, "double free, or memory outside object was overwritten");
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
2957
2958
  			pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx
  ",
1170532bb   Joe Perches   mm: convert print...
2959
2960
  			       objp, *dbg_redzone1(cachep, objp),
  			       *dbg_redzone2(cachep, objp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2961
2962
2963
2964
  		}
  		*dbg_redzone1(cachep, objp) = RED_ACTIVE;
  		*dbg_redzone2(cachep, objp) = RED_ACTIVE;
  	}
037873014   Joonsoo Kim   slab: fix oops wh...
2965

3dafccf22   Manfred Spraul   [PATCH] slab: dis...
2966
  	objp += obj_offset(cachep);
4f1049345   Christoph Lameter   slab allocators: ...
2967
  	if (cachep->ctor && cachep->flags & SLAB_POISON)
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
2968
  		cachep->ctor(objp);
7ea466f22   Tetsuo Handa   slab: fix DEBUG_S...
2969
2970
  	if (ARCH_SLAB_MINALIGN &&
  	    ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
2971
2972
  		pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d
  ",
c225150b8   Hugh Dickins   slab: fix DEBUG_S...
2973
  		       objp, (int)ARCH_SLAB_MINALIGN);
a44b56d35   Kevin Hilman   [PATCH] slab debu...
2974
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2975
2976
2977
2978
2979
  	return objp;
  }
  #else
  #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
  #endif
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
2980
  static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2981
  {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
2982
  	void *objp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2983
  	struct array_cache *ac;
5c3823008   Alok N Kataria   [PATCH] kmalloc_n...
2984
  	check_irq_off();
8a8b6502f   Akinobu Mita   [PATCH] fault-inj...
2985

9a2dba4b4   Pekka Enberg   [PATCH] slab: ren...
2986
  	ac = cpu_cache_get(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2987
  	if (likely(ac->avail)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2988
  		ac->touched = 1;
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2989
  		objp = ac->entry[--ac->avail];
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2990

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2991
2992
  		STATS_INC_ALLOCHIT(cachep);
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2993
  	}
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2994
2995
  
  	STATS_INC_ALLOCMISS(cachep);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
2996
  	objp = cache_alloc_refill(cachep, flags);
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
2997
2998
2999
3000
3001
3002
3003
  	/*
  	 * the 'ac' may be updated by cache_alloc_refill(),
  	 * and kmemleak_erase() requires its correct value.
  	 */
  	ac = cpu_cache_get(cachep);
  
  out:
d5cff6352   Catalin Marinas   kmemleak: Add the...
3004
3005
3006
3007
3008
  	/*
  	 * To avoid a false negative, if an object that is in one of the
  	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
  	 * treat the array pointers as a reference to the object.
  	 */
f3d8b53a3   J. R. Okajima   slab, kmemleak: s...
3009
3010
  	if (objp)
  		kmemleak_erase(&ac->entry[ac->avail]);
5c3823008   Alok N Kataria   [PATCH] kmalloc_n...
3011
3012
  	return objp;
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3013
3014
  #ifdef CONFIG_NUMA
  /*
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
3015
   * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
c61afb181   Paul Jackson   [PATCH] cpuset me...
3016
3017
3018
3019
3020
3021
3022
   *
   * If we are in_interrupt, then process context, including cpusets and
   * mempolicy, may not apply and should not be used for allocation policy.
   */
  static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
  {
  	int nid_alloc, nid_here;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3023
  	if (in_interrupt() || (flags & __GFP_THISNODE))
c61afb181   Paul Jackson   [PATCH] cpuset me...
3024
  		return NULL;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3025
  	nid_alloc = nid_here = numa_mem_id();
c61afb181   Paul Jackson   [PATCH] cpuset me...
3026
  	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
6adef3ebe   Jack Steiner   cpusets: new roun...
3027
  		nid_alloc = cpuset_slab_spread_node();
c61afb181   Paul Jackson   [PATCH] cpuset me...
3028
  	else if (current->mempolicy)
2a389610a   David Rientjes   mm, mempolicy: re...
3029
  		nid_alloc = mempolicy_slab_node();
c61afb181   Paul Jackson   [PATCH] cpuset me...
3030
  	if (nid_alloc != nid_here)
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3031
  		return ____cache_alloc_node(cachep, flags, nid_alloc);
c61afb181   Paul Jackson   [PATCH] cpuset me...
3032
3033
3034
3035
  	return NULL;
  }
  
  /*
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3036
   * Fallback function if there was no memory available and no objects on a
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3037
   * certain node and fall back is permitted. First we scan all the
6a67368c3   Christoph Lameter   slab: Rename node...
3038
   * available node for available objects. If that fails then we
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3039
3040
3041
   * perform an allocation without specifying a node. This allows the page
   * allocator to do its reclaim / fallback magic. We then insert the
   * slab into the proper nodelist and then allocate from it.
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3042
   */
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3043
  static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3044
  {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3045
  	struct zonelist *zonelist;
dd1a239f6   Mel Gorman   mm: have zonelist...
3046
  	struct zoneref *z;
54a6eb5c4   Mel Gorman   mm: use two zonel...
3047
3048
  	struct zone *zone;
  	enum zone_type high_zoneidx = gfp_zone(flags);
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3049
  	void *obj = NULL;
76b342bdc   Joonsoo Kim   mm/slab: separate...
3050
  	struct page *page;
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3051
  	int nid;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3052
  	unsigned int cpuset_mems_cookie;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3053
3054
3055
  
  	if (flags & __GFP_THISNODE)
  		return NULL;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3056
  retry_cpuset:
d26914d11   Mel Gorman   mm: optimize put_...
3057
  	cpuset_mems_cookie = read_mems_allowed_begin();
2a389610a   David Rientjes   mm, mempolicy: re...
3058
  	zonelist = node_zonelist(mempolicy_slab_node(), flags);
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3059

3c517a613   Christoph Lameter   [PATCH] slab: bet...
3060
3061
3062
3063
3064
  retry:
  	/*
  	 * Look through allowed nodes for objects available
  	 * from existing per node queues.
  	 */
54a6eb5c4   Mel Gorman   mm: use two zonel...
3065
3066
  	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
  		nid = zone_to_nid(zone);
aedb0eb10   Christoph Lameter   [PATCH] Slab: Do ...
3067

061d7074e   Vladimir Davydov   slab: fix cpuset ...
3068
  		if (cpuset_zone_allowed(zone, flags) &&
18bf85411   Christoph Lameter   slab: use get_nod...
3069
3070
  			get_node(cache, nid) &&
  			get_node(cache, nid)->free_objects) {
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3071
  				obj = ____cache_alloc_node(cache,
4167e9b2c   David Rientjes   mm: remove GFP_TH...
3072
  					gfp_exact_node(flags), nid);
481c5346d   Christoph Lameter   Slab: Fix memory ...
3073
3074
3075
  				if (obj)
  					break;
  		}
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3076
  	}
cfce66047   Christoph Lameter   Slab allocators: ...
3077
  	if (!obj) {
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3078
3079
3080
3081
3082
3083
  		/*
  		 * This allocation will be performed within the constraints
  		 * of the current cpuset / memory policy requirements.
  		 * We may trigger various forms of reclaim on the allowed
  		 * set and go into memory reserves if necessary.
  		 */
76b342bdc   Joonsoo Kim   mm/slab: separate...
3084
3085
3086
3087
  		page = cache_grow_begin(cache, flags, numa_mem_id());
  		cache_grow_end(cache, page);
  		if (page) {
  			nid = page_to_nid(page);
511e3a058   Joonsoo Kim   mm/slab: make cac...
3088
3089
  			obj = ____cache_alloc_node(cache,
  				gfp_exact_node(flags), nid);
0c3aa83e0   Joonsoo Kim   slab: change retu...
3090

3c517a613   Christoph Lameter   [PATCH] slab: bet...
3091
  			/*
511e3a058   Joonsoo Kim   mm/slab: make cac...
3092
3093
  			 * Another processor may allocate the objects in
  			 * the slab since we are not holding any locks.
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3094
  			 */
511e3a058   Joonsoo Kim   mm/slab: make cac...
3095
3096
  			if (!obj)
  				goto retry;
3c517a613   Christoph Lameter   [PATCH] slab: bet...
3097
  		}
aedb0eb10   Christoph Lameter   [PATCH] Slab: Do ...
3098
  	}
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3099

d26914d11   Mel Gorman   mm: optimize put_...
3100
  	if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
3101
  		goto retry_cpuset;
765c4507a   Christoph Lameter   [PATCH] GFP_THISN...
3102
3103
3104
3105
  	return obj;
  }
  
  /*
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3106
   * A interface to enable slab creation on nodeid
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3107
   */
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3108
  static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3109
  				int nodeid)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3110
  {
8456a648c   Joonsoo Kim   slab: use struct ...
3111
  	struct page *page;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3112
  	struct kmem_cache_node *n;
213b46958   Joonsoo Kim   mm/slab: refill c...
3113
  	void *obj = NULL;
b03a017be   Joonsoo Kim   mm/slab: introduc...
3114
  	void *list = NULL;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3115

7c3fbbdd0   Paul Mackerras   slab: fix nodeid ...
3116
  	VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
18bf85411   Christoph Lameter   slab: use get_nod...
3117
  	n = get_node(cachep, nodeid);
ce8eb6c42   Christoph Lameter   slab: Rename list...
3118
  	BUG_ON(!n);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3119

ca3b9b917   Ravikiran G Thirumalai   [PATCH] NUMA slab...
3120
  	check_irq_off();
ce8eb6c42   Christoph Lameter   slab: Rename list...
3121
  	spin_lock(&n->list_lock);
f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
3122
  	page = get_first_slab(n, false);
7aa0d2278   Geliang Tang   mm/slab.c: add a ...
3123
3124
  	if (!page)
  		goto must_grow;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3125

b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3126
  	check_spinlock_acquired_node(cachep, nodeid);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3127
3128
3129
3130
  
  	STATS_INC_NODEALLOCS(cachep);
  	STATS_INC_ACTIVE(cachep);
  	STATS_SET_HIGH(cachep);
8456a648c   Joonsoo Kim   slab: use struct ...
3131
  	BUG_ON(page->active == cachep->num);
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3132

260b61dd4   Joonsoo Kim   mm/slab: remove t...
3133
  	obj = slab_get_obj(cachep, page);
ce8eb6c42   Christoph Lameter   slab: Rename list...
3134
  	n->free_objects--;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3135

b03a017be   Joonsoo Kim   mm/slab: introduc...
3136
  	fixup_slab_list(cachep, n, page, &list);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3137

ce8eb6c42   Christoph Lameter   slab: Rename list...
3138
  	spin_unlock(&n->list_lock);
b03a017be   Joonsoo Kim   mm/slab: introduc...
3139
  	fixup_objfreelist_debug(cachep, &list);
213b46958   Joonsoo Kim   mm/slab: refill c...
3140
  	return obj;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3141

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3142
  must_grow:
ce8eb6c42   Christoph Lameter   slab: Rename list...
3143
  	spin_unlock(&n->list_lock);
76b342bdc   Joonsoo Kim   mm/slab: separate...
3144
  	page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
213b46958   Joonsoo Kim   mm/slab: refill c...
3145
3146
3147
3148
  	if (page) {
  		/* This slab isn't counted yet so don't update free_objects */
  		obj = slab_get_obj(cachep, page);
  	}
76b342bdc   Joonsoo Kim   mm/slab: separate...
3149
  	cache_grow_end(cachep, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3150

213b46958   Joonsoo Kim   mm/slab: refill c...
3151
  	return obj ? obj : fallback_alloc(cachep, flags);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3152
  }
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3153

8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3154
  static __always_inline void *
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3155
  slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3156
  		   unsigned long caller)
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3157
3158
3159
  {
  	unsigned long save_flags;
  	void *ptr;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3160
  	int slab_node = numa_mem_id();
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3161

dcce284a2   Benjamin Herrenschmidt   mm: Extend gfp ma...
3162
  	flags &= gfp_allowed_mask;
011eceaf0   Jesper Dangaard Brouer   slab: use slab_pr...
3163
3164
  	cachep = slab_pre_alloc_hook(cachep, flags);
  	if (unlikely(!cachep))
824ebef12   Akinobu Mita   fault injection: ...
3165
  		return NULL;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3166
3167
  	cache_alloc_debugcheck_before(cachep, flags);
  	local_irq_save(save_flags);
eacbbae38   Andrew Morton   slab: use NUMA_NO...
3168
  	if (nodeid == NUMA_NO_NODE)
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3169
  		nodeid = slab_node;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3170

18bf85411   Christoph Lameter   slab: use get_nod...
3171
  	if (unlikely(!get_node(cachep, nodeid))) {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3172
3173
3174
3175
  		/* Node not bootstrapped yet */
  		ptr = fallback_alloc(cachep, flags);
  		goto out;
  	}
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3176
  	if (nodeid == slab_node) {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
  		/*
  		 * Use the locally cached objects if possible.
  		 * However ____cache_alloc does not allow fallback
  		 * to other nodes. It may fail while we still have
  		 * objects on other nodes available.
  		 */
  		ptr = ____cache_alloc(cachep, flags);
  		if (ptr)
  			goto out;
  	}
  	/* ___cache_alloc_node can fall back to other nodes */
  	ptr = ____cache_alloc_node(cachep, flags, nodeid);
    out:
  	local_irq_restore(save_flags);
  	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3192
3193
  	if (unlikely(flags & __GFP_ZERO) && ptr)
  		memset(ptr, 0, cachep->object_size);
d07dbea46   Christoph Lameter   Slab allocators: ...
3194

d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3195
  	slab_post_alloc_hook(cachep, flags, 1, &ptr);
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3196
3197
3198
3199
3200
3201
3202
  	return ptr;
  }
  
  static __always_inline void *
  __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
  {
  	void *objp;
2ad654bc5   Zefan Li   cpuset: PF_SPREAD...
3203
  	if (current->mempolicy || cpuset_do_slab_mem_spread()) {
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
  		objp = alternate_node_alloc(cache, flags);
  		if (objp)
  			goto out;
  	}
  	objp = ____cache_alloc(cache, flags);
  
  	/*
  	 * We may just have run out of memory on the local node.
  	 * ____cache_alloc_node() knows how to locate memory on other nodes
  	 */
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3214
3215
  	if (!objp)
  		objp = ____cache_alloc_node(cache, flags, numa_mem_id());
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
  
    out:
  	return objp;
  }
  #else
  
  static __always_inline void *
  __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
  {
  	return ____cache_alloc(cachep, flags);
  }
  
  #endif /* CONFIG_NUMA */
  
  static __always_inline void *
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3231
  slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3232
3233
3234
  {
  	unsigned long save_flags;
  	void *objp;
dcce284a2   Benjamin Herrenschmidt   mm: Extend gfp ma...
3235
  	flags &= gfp_allowed_mask;
011eceaf0   Jesper Dangaard Brouer   slab: use slab_pr...
3236
3237
  	cachep = slab_pre_alloc_hook(cachep, flags);
  	if (unlikely(!cachep))
824ebef12   Akinobu Mita   fault injection: ...
3238
  		return NULL;
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3239
3240
3241
3242
3243
3244
  	cache_alloc_debugcheck_before(cachep, flags);
  	local_irq_save(save_flags);
  	objp = __do_cache_alloc(cachep, flags);
  	local_irq_restore(save_flags);
  	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
  	prefetchw(objp);
d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3245
3246
  	if (unlikely(flags & __GFP_ZERO) && objp)
  		memset(objp, 0, cachep->object_size);
d07dbea46   Christoph Lameter   Slab allocators: ...
3247

d5e3ed66d   Jesper Dangaard Brouer   slab: use slab_po...
3248
  	slab_post_alloc_hook(cachep, flags, 1, &objp);
8c8cc2c10   Pekka Enberg   [PATCH] slab: cac...
3249
3250
  	return objp;
  }
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3251
3252
  
  /*
5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
3253
   * Caller needs to acquire correct kmem_cache_node's list_lock
97654dfa2   Joonsoo Kim   slab: defer slab_...
3254
   * @list: List of detached free slabs should be freed by caller
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3255
   */
97654dfa2   Joonsoo Kim   slab: defer slab_...
3256
3257
  static void free_block(struct kmem_cache *cachep, void **objpp,
  			int nr_objects, int node, struct list_head *list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3258
3259
  {
  	int i;
25c063fbd   Joonsoo Kim   slab: move up cod...
3260
  	struct kmem_cache_node *n = get_node(cachep, node);
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3261
3262
3263
  	struct page *page;
  
  	n->free_objects += nr_objects;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3264
3265
  
  	for (i = 0; i < nr_objects; i++) {
072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
3266
  		void *objp;
8456a648c   Joonsoo Kim   slab: use struct ...
3267
  		struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3268

072bb0aa5   Mel Gorman   mm: sl[au]b: add ...
3269
  		objp = objpp[i];
8456a648c   Joonsoo Kim   slab: use struct ...
3270
  		page = virt_to_head_page(objp);
8456a648c   Joonsoo Kim   slab: use struct ...
3271
  		list_del(&page->lru);
ff69416e6   Christoph Lameter   [PATCH] slab: fix...
3272
  		check_spinlock_acquired_node(cachep, node);
260b61dd4   Joonsoo Kim   mm/slab: remove t...
3273
  		slab_put_obj(cachep, page, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3274
  		STATS_DEC_ACTIVE(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3275
3276
  
  		/* fixup slab chains */
f728b0a5d   Greg Thelen   mm, slab: faster ...
3277
  		if (page->active == 0) {
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3278
  			list_add(&page->lru, &n->slabs_free);
f728b0a5d   Greg Thelen   mm, slab: faster ...
3279
  			n->free_slabs++;
f728b0a5d   Greg Thelen   mm, slab: faster ...
3280
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3281
3282
3283
3284
  			/* Unconditionally move a slab to the end of the
  			 * partial list on free - maximum time for the
  			 * other objects to be freed, too.
  			 */
8456a648c   Joonsoo Kim   slab: use struct ...
3285
  			list_add_tail(&page->lru, &n->slabs_partial);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3286
3287
  		}
  	}
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3288
3289
3290
3291
3292
  
  	while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
  		n->free_objects -= cachep->num;
  
  		page = list_last_entry(&n->slabs_free, struct page, lru);
de24baecd   Wei Yongjun   mm/slab: use list...
3293
  		list_move(&page->lru, list);
f728b0a5d   Greg Thelen   mm, slab: faster ...
3294
  		n->free_slabs--;
bf00bd345   David Rientjes   mm, slab: maintai...
3295
  		n->total_slabs--;
6052b7880   Joonsoo Kim   mm/slab: don't ke...
3296
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3297
  }
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3298
  static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3299
3300
  {
  	int batchcount;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3301
  	struct kmem_cache_node *n;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3302
  	int node = numa_mem_id();
97654dfa2   Joonsoo Kim   slab: defer slab_...
3303
  	LIST_HEAD(list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3304
3305
  
  	batchcount = ac->batchcount;
260b61dd4   Joonsoo Kim   mm/slab: remove t...
3306

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3307
  	check_irq_off();
18bf85411   Christoph Lameter   slab: use get_nod...
3308
  	n = get_node(cachep, node);
ce8eb6c42   Christoph Lameter   slab: Rename list...
3309
3310
3311
  	spin_lock(&n->list_lock);
  	if (n->shared) {
  		struct array_cache *shared_array = n->shared;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3312
  		int max = shared_array->limit - shared_array->avail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3313
3314
3315
  		if (max) {
  			if (batchcount > max)
  				batchcount = max;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3316
  			memcpy(&(shared_array->entry[shared_array->avail]),
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3317
  			       ac->entry, sizeof(void *) * batchcount);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3318
3319
3320
3321
  			shared_array->avail += batchcount;
  			goto free_done;
  		}
  	}
97654dfa2   Joonsoo Kim   slab: defer slab_...
3322
  	free_block(cachep, ac->entry, batchcount, node, &list);
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3323
  free_done:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3324
3325
3326
  #if STATS
  	{
  		int i = 0;
73c0219d8   Geliang Tang   mm/slab.c: use li...
3327
  		struct page *page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3328

73c0219d8   Geliang Tang   mm/slab.c: use li...
3329
  		list_for_each_entry(page, &n->slabs_free, lru) {
8456a648c   Joonsoo Kim   slab: use struct ...
3330
  			BUG_ON(page->active);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3331
3332
  
  			i++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3333
3334
3335
3336
  		}
  		STATS_SET_FREEABLE(cachep, i);
  	}
  #endif
ce8eb6c42   Christoph Lameter   slab: Rename list...
3337
  	spin_unlock(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
3338
  	slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3339
  	ac->avail -= batchcount;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3340
  	memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3341
3342
3343
  }
  
  /*
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3344
3345
   * Release an obj back to its cache. If the obj has a constructed state, it must
   * be in this state _before_ it is released.  Called with disabled ints.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3346
   */
ee3ce779b   Dmitry Vyukov   kasan: don't use ...
3347
3348
  static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
  					 unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3349
  {
55834c590   Alexander Potapenko   mm: kasan: initia...
3350
  	/* Put the object into the quarantine, don't touch it for now. */
ee3ce779b   Dmitry Vyukov   kasan: don't use ...
3351
  	if (kasan_slab_free(cachep, objp, _RET_IP_))
55834c590   Alexander Potapenko   mm: kasan: initia...
3352
3353
3354
3355
  		return;
  
  	___cache_free(cachep, objp, caller);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3356

55834c590   Alexander Potapenko   mm: kasan: initia...
3357
3358
3359
3360
  void ___cache_free(struct kmem_cache *cachep, void *objp,
  		unsigned long caller)
  {
  	struct array_cache *ac = cpu_cache_get(cachep);
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3361

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3362
  	check_irq_off();
d5cff6352   Catalin Marinas   kmemleak: Add the...
3363
  	kmemleak_free_recursive(objp, cachep->flags);
a947eb95e   Suleiman Souhlal   SLAB: Record actu...
3364
  	objp = cache_free_debugcheck(cachep, objp, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3365

1807a1aaf   Siddha, Suresh B   slab: skip callin...
3366
3367
3368
3369
3370
3371
3372
  	/*
  	 * Skip calling cache_free_alien() when the platform is not numa.
  	 * This will avoid cache misses that happen while accessing slabp (which
  	 * is per page memory  reference) to get nodeid. Instead use a global
  	 * variable to skip the call, which is mostly likely to be present in
  	 * the cache.
  	 */
b6e68bc1b   Mel Gorman   page allocator: s...
3373
  	if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
729bd0b74   Pekka Enberg   [PATCH] slab: ext...
3374
  		return;
3d8801940   Joonsoo Kim   mm/slab: move cac...
3375
  	if (ac->avail < ac->limit) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3376
  		STATS_INC_FREEHIT(cachep);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3377
3378
3379
  	} else {
  		STATS_INC_FREEMISS(cachep);
  		cache_flusharray(cachep, ac);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3380
  	}
42c8c99cd   Zhao Jin   slab, cleanup: re...
3381

f68f8dddb   Joonsoo Kim   mm/slab: re-imple...
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
  	if (sk_memalloc_socks()) {
  		struct page *page = virt_to_head_page(objp);
  
  		if (unlikely(PageSlabPfmemalloc(page))) {
  			cache_free_pfmemalloc(cachep, page, objp);
  			return;
  		}
  	}
  
  	ac->entry[ac->avail++] = objp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
  }
  
  /**
   * kmem_cache_alloc - Allocate an object
   * @cachep: The cache to allocate from.
   * @flags: See kmalloc().
   *
   * Allocate an object from this cache.  The flags are only relevant
   * if the cache has no available objects.
   */
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3402
  void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3403
  {
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3404
  	void *ret = slab_alloc(cachep, flags, _RET_IP_);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3405

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3406
  	kasan_slab_alloc(cachep, ret, flags);
ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3407
  	trace_kmem_cache_alloc(_RET_IP_, ret,
8c138bc00   Christoph Lameter   slab: Get rid of ...
3408
  			       cachep->object_size, cachep->size, flags);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3409
3410
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3411
3412
  }
  EXPORT_SYMBOL(kmem_cache_alloc);
7b0501dd6   Jesper Dangaard Brouer   slab: avoid runni...
3413
3414
3415
3416
3417
3418
3419
3420
3421
  static __always_inline void
  cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
  				  size_t size, void **p, unsigned long caller)
  {
  	size_t i;
  
  	for (i = 0; i < size; i++)
  		p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller);
  }
865762a81   Jesper Dangaard Brouer   slab/slub: adjust...
3422
  int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3423
  			  void **p)
484748f0b   Christoph Lameter   slab: infrastruct...
3424
  {
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
  	size_t i;
  
  	s = slab_pre_alloc_hook(s, flags);
  	if (!s)
  		return 0;
  
  	cache_alloc_debugcheck_before(s, flags);
  
  	local_irq_disable();
  	for (i = 0; i < size; i++) {
  		void *objp = __do_cache_alloc(s, flags);
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3436
3437
3438
3439
3440
  		if (unlikely(!objp))
  			goto error;
  		p[i] = objp;
  	}
  	local_irq_enable();
7b0501dd6   Jesper Dangaard Brouer   slab: avoid runni...
3441
  	cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
  	/* Clear memory outside IRQ disabled section */
  	if (unlikely(flags & __GFP_ZERO))
  		for (i = 0; i < size; i++)
  			memset(p[i], 0, s->object_size);
  
  	slab_post_alloc_hook(s, flags, size, p);
  	/* FIXME: Trace call missing. Christoph would like a bulk variant */
  	return size;
  error:
  	local_irq_enable();
7b0501dd6   Jesper Dangaard Brouer   slab: avoid runni...
3452
  	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
2a777eac1   Jesper Dangaard Brouer   slab: implement b...
3453
3454
3455
  	slab_post_alloc_hook(s, flags, i, p);
  	__kmem_cache_free_bulk(s, i, p);
  	return 0;
484748f0b   Christoph Lameter   slab: infrastruct...
3456
3457
  }
  EXPORT_SYMBOL(kmem_cache_alloc_bulk);
0f24f1287   Li Zefan   tracing, slab: De...
3458
  #ifdef CONFIG_TRACING
85beb5869   Steven Rostedt   tracing/slab: Mov...
3459
  void *
4052147c0   Ezequiel Garcia   mm, slab: Match S...
3460
  kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3461
  {
85beb5869   Steven Rostedt   tracing/slab: Mov...
3462
  	void *ret;
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3463
  	ret = slab_alloc(cachep, flags, _RET_IP_);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3464

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3465
  	kasan_kmalloc(cachep, ret, size, flags);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3466
  	trace_kmalloc(_RET_IP_, ret,
ff4fcd01e   Ezequiel Garcia   mm, slab: Remove ...
3467
  		      size, cachep->size, flags);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3468
  	return ret;
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3469
  }
85beb5869   Steven Rostedt   tracing/slab: Mov...
3470
  EXPORT_SYMBOL(kmem_cache_alloc_trace);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3471
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3472
  #ifdef CONFIG_NUMA
d0d04b78f   Zhouping Liu   mm, slab: moved k...
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
  /**
   * kmem_cache_alloc_node - Allocate an object on the specified node
   * @cachep: The cache to allocate from.
   * @flags: See kmalloc().
   * @nodeid: node number of the target node.
   *
   * Identical to kmem_cache_alloc but it will allocate memory on the given
   * node, which can improve the performance for cpu bound structures.
   *
   * Fallback to other node is possible if __GFP_THISNODE is not set.
   */
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3484
3485
  void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
  {
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3486
  	void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3487

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3488
  	kasan_slab_alloc(cachep, ret, flags);
ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3489
  	trace_kmem_cache_alloc_node(_RET_IP_, ret,
8c138bc00   Christoph Lameter   slab: Get rid of ...
3490
  				    cachep->object_size, cachep->size,
ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3491
  				    flags, nodeid);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3492
3493
  
  	return ret;
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3494
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3495
  EXPORT_SYMBOL(kmem_cache_alloc_node);
0f24f1287   Li Zefan   tracing, slab: De...
3496
  #ifdef CONFIG_TRACING
4052147c0   Ezequiel Garcia   mm, slab: Match S...
3497
  void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
85beb5869   Steven Rostedt   tracing/slab: Mov...
3498
  				  gfp_t flags,
4052147c0   Ezequiel Garcia   mm, slab: Match S...
3499
3500
  				  int nodeid,
  				  size_t size)
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3501
  {
85beb5869   Steven Rostedt   tracing/slab: Mov...
3502
  	void *ret;
592f41450   Ezequiel Garcia   mm/slab: Fix typo...
3503
  	ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3504
3505
  
  	kasan_kmalloc(cachep, ret, size, flags);
85beb5869   Steven Rostedt   tracing/slab: Mov...
3506
  	trace_kmalloc_node(_RET_IP_, ret,
ff4fcd01e   Ezequiel Garcia   mm, slab: Remove ...
3507
  			   size, cachep->size,
85beb5869   Steven Rostedt   tracing/slab: Mov...
3508
3509
  			   flags, nodeid);
  	return ret;
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3510
  }
85beb5869   Steven Rostedt   tracing/slab: Mov...
3511
  EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3512
  #endif
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3513
  static __always_inline void *
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3514
  __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
97e2bde47   Manfred Spraul   [PATCH] add kmall...
3515
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3516
  	struct kmem_cache *cachep;
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3517
  	void *ret;
97e2bde47   Manfred Spraul   [PATCH] add kmall...
3518

3996e891e   Dmitry Vyukov   mm: don't warn ab...
3519
3520
  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
  		return NULL;
2c59dd654   Christoph Lameter   slab: Common Kmal...
3521
  	cachep = kmalloc_slab(size, flags);
6cb8f9132   Christoph Lameter   Slab allocators: ...
3522
3523
  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
  		return cachep;
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3524
  	ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3525
  	kasan_kmalloc(cachep, ret, size, flags);
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
3526
3527
  
  	return ret;
97e2bde47   Manfred Spraul   [PATCH] add kmall...
3528
  }
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3529

8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3530
3531
  void *__kmalloc_node(size_t size, gfp_t flags, int node)
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3532
  	return __do_kmalloc_node(size, flags, node, _RET_IP_);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3533
  }
dbe5e69d2   Christoph Hellwig   [PATCH] slab: opt...
3534
  EXPORT_SYMBOL(__kmalloc_node);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3535
3536
  
  void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
ce71e27c6   Eduard - Gabriel Munteanu   SLUB: Replace __b...
3537
  		int node, unsigned long caller)
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3538
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3539
  	return __do_kmalloc_node(size, flags, node, caller);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3540
3541
  }
  EXPORT_SYMBOL(__kmalloc_node_track_caller);
8b98c1699   Christoph Hellwig   [PATCH] leak trac...
3542
  #endif /* CONFIG_NUMA */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3543
3544
  
  /**
800590f52   Paul Drynoff   [PATCH] slab: kma...
3545
   * __do_kmalloc - allocate memory
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3546
   * @size: how many bytes of memory are required.
800590f52   Paul Drynoff   [PATCH] slab: kma...
3547
   * @flags: the type of memory to allocate (see kmalloc).
911851e6e   Randy Dunlap   [PATCH] slab: fix...
3548
   * @caller: function caller for debug tracking of the caller
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3549
   */
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3550
  static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3551
  					  unsigned long caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3552
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3553
  	struct kmem_cache *cachep;
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3554
  	void *ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3555

3996e891e   Dmitry Vyukov   mm: don't warn ab...
3556
3557
  	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
  		return NULL;
2c59dd654   Christoph Lameter   slab: Common Kmal...
3558
  	cachep = kmalloc_slab(size, flags);
a5c96d8a1   Linus Torvalds   Fix up non-NUMA S...
3559
3560
  	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
  		return cachep;
48356303f   Ezequiel Garcia   mm, slab: Rename ...
3561
  	ret = slab_alloc(cachep, flags, caller);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3562

505f5dcb1   Alexander Potapenko   mm, kasan: add GF...
3563
  	kasan_kmalloc(cachep, ret, size, flags);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3564
  	trace_kmalloc(caller, ret,
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3565
  		      size, cachep->size, flags);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3566
3567
  
  	return ret;
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3568
  }
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3569
3570
  void *__kmalloc(size_t size, gfp_t flags)
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3571
  	return __do_kmalloc(size, flags, _RET_IP_);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3572
3573
  }
  EXPORT_SYMBOL(__kmalloc);
ce71e27c6   Eduard - Gabriel Munteanu   SLUB: Replace __b...
3574
  void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3575
  {
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3576
  	return __do_kmalloc(size, flags, caller);
7fd6b1413   Pekka Enberg   [PATCH] slab: fix...
3577
3578
  }
  EXPORT_SYMBOL(__kmalloc_track_caller);
1d2c8eea6   Christoph Hellwig   [PATCH] slab: cle...
3579

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3580
3581
3582
3583
3584
3585
3586
3587
  /**
   * kmem_cache_free - Deallocate an object
   * @cachep: The cache the allocation was from.
   * @objp: The previously allocated object.
   *
   * Free an object which was previously allocated from this
   * cache.
   */
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3588
  void kmem_cache_free(struct kmem_cache *cachep, void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3589
3590
  {
  	unsigned long flags;
b9ce5ef49   Glauber Costa   sl[au]b: always g...
3591
3592
3593
  	cachep = cache_from_obj(cachep, objp);
  	if (!cachep)
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3594
3595
  
  	local_irq_save(flags);
d97d476b1   Feng Tang   slab: Fix a typo ...
3596
  	debug_check_no_locks_freed(objp, cachep->object_size);
3ac7fe5a4   Thomas Gleixner   infrastructure to...
3597
  	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
8c138bc00   Christoph Lameter   slab: Get rid of ...
3598
  		debug_check_no_obj_freed(objp, cachep->object_size);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3599
  	__cache_free(cachep, objp, _RET_IP_);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3600
  	local_irq_restore(flags);
36555751c   Eduard - Gabriel Munteanu   kmemtrace: SLAB h...
3601

ca2b84cb3   Eduard - Gabriel Munteanu   kmemtrace: use tr...
3602
  	trace_kmem_cache_free(_RET_IP_, objp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3603
3604
  }
  EXPORT_SYMBOL(kmem_cache_free);
e6cdb58d1   Jesper Dangaard Brouer   slab: implement b...
3605
3606
3607
3608
3609
3610
3611
3612
  void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
  {
  	struct kmem_cache *s;
  	size_t i;
  
  	local_irq_disable();
  	for (i = 0; i < size; i++) {
  		void *objp = p[i];
ca2571955   Jesper Dangaard Brouer   mm: new API kfree...
3613
3614
3615
3616
  		if (!orig_s) /* called via kfree_bulk */
  			s = virt_to_cache(objp);
  		else
  			s = cache_from_obj(orig_s, objp);
e6cdb58d1   Jesper Dangaard Brouer   slab: implement b...
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
  
  		debug_check_no_locks_freed(objp, s->object_size);
  		if (!(s->flags & SLAB_DEBUG_OBJECTS))
  			debug_check_no_obj_freed(objp, s->object_size);
  
  		__cache_free(s, objp, _RET_IP_);
  	}
  	local_irq_enable();
  
  	/* FIXME: add tracing */
  }
  EXPORT_SYMBOL(kmem_cache_free_bulk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3629
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3630
3631
3632
   * kfree - free previously allocated memory
   * @objp: pointer returned by kmalloc.
   *
80e93effc   Pekka Enberg   [PATCH] update kf...
3633
3634
   * If @objp is NULL, no operation is performed.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3635
3636
3637
3638
3639
   * Don't free memory not originally allocated by kmalloc()
   * or you will run into trouble.
   */
  void kfree(const void *objp)
  {
343e0d7a9   Pekka Enberg   [PATCH] slab: rep...
3640
  	struct kmem_cache *c;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3641
  	unsigned long flags;
2121db74b   Pekka Enberg   kmemtrace: trace ...
3642
  	trace_kfree(_RET_IP_, objp);
6cb8f9132   Christoph Lameter   Slab allocators: ...
3643
  	if (unlikely(ZERO_OR_NULL_PTR(objp)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3644
3645
3646
  		return;
  	local_irq_save(flags);
  	kfree_debugcheck(objp);
6ed5eb221   Pekka Enberg   [PATCH] slab: ext...
3647
  	c = virt_to_cache(objp);
8c138bc00   Christoph Lameter   slab: Get rid of ...
3648
3649
3650
  	debug_check_no_locks_freed(objp, c->object_size);
  
  	debug_check_no_obj_freed(objp, c->object_size);
7c0cb9c64   Ezequiel Garcia   mm, slab: Replace...
3651
  	__cache_free(c, (void *)objp, _RET_IP_);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3652
3653
3654
  	local_irq_restore(flags);
  }
  EXPORT_SYMBOL(kfree);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3655
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
3656
   * This initializes kmem_cache_node or resizes various caches for all nodes.
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3657
   */
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3658
  static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3659
  {
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3660
  	int ret;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3661
  	int node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3662
  	struct kmem_cache_node *n;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3663

9c09a95cf   Mel Gorman   slab: partially r...
3664
  	for_each_online_node(node) {
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3665
3666
  		ret = setup_kmem_cache_node(cachep, node, gfp, true);
  		if (ret)
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3667
  			goto fail;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3668
  	}
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3669

cafeb02e0   Christoph Lameter   [PATCH] alloc_kme...
3670
  	return 0;
0718dc2a8   Christoph Lameter   [PATCH] slab: fix...
3671

a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3672
  fail:
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3673
  	if (!cachep->list.next) {
0718dc2a8   Christoph Lameter   [PATCH] slab: fix...
3674
3675
3676
  		/* Cache is not active yet. Roll back what we did */
  		node--;
  		while (node >= 0) {
18bf85411   Christoph Lameter   slab: use get_nod...
3677
3678
  			n = get_node(cachep, node);
  			if (n) {
ce8eb6c42   Christoph Lameter   slab: Rename list...
3679
3680
3681
  				kfree(n->shared);
  				free_alien_cache(n->alien);
  				kfree(n);
6a67368c3   Christoph Lameter   slab: Rename node...
3682
  				cachep->node[node] = NULL;
0718dc2a8   Christoph Lameter   [PATCH] slab: fix...
3683
3684
3685
3686
  			}
  			node--;
  		}
  	}
cafeb02e0   Christoph Lameter   [PATCH] alloc_kme...
3687
  	return -ENOMEM;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3688
  }
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3689
  /* Always called with the slab_mutex held */
943a451a8   Glauber Costa   slab: propagate t...
3690
  static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
83b519e8b   Pekka Enberg   slab: setup alloc...
3691
  				int batchcount, int shared, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3692
  {
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3693
3694
  	struct array_cache __percpu *cpu_cache, *prev;
  	int cpu;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3695

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3696
3697
  	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
  	if (!cpu_cache)
d2e7b7d0a   Siddha, Suresh B   [PATCH] fix poten...
3698
  		return -ENOMEM;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3699
3700
  	prev = cachep->cpu_cache;
  	cachep->cpu_cache = cpu_cache;
a87c75fbc   Greg Thelen   slab: avoid IPIs ...
3701
3702
3703
3704
3705
3706
  	/*
  	 * Without a previous cpu_cache there's no need to synchronize remote
  	 * cpus, so skip the IPIs.
  	 */
  	if (prev)
  		kick_all_cpus_sync();
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3707

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3708
  	check_irq_on();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3709
3710
  	cachep->batchcount = batchcount;
  	cachep->limit = limit;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3711
  	cachep->shared = shared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3712

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3713
  	if (!prev)
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3714
  		goto setup_node;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3715
3716
  
  	for_each_online_cpu(cpu) {
97654dfa2   Joonsoo Kim   slab: defer slab_...
3717
  		LIST_HEAD(list);
18bf85411   Christoph Lameter   slab: use get_nod...
3718
3719
  		int node;
  		struct kmem_cache_node *n;
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3720
  		struct array_cache *ac = per_cpu_ptr(prev, cpu);
18bf85411   Christoph Lameter   slab: use get_nod...
3721

bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3722
  		node = cpu_to_mem(cpu);
18bf85411   Christoph Lameter   slab: use get_nod...
3723
3724
  		n = get_node(cachep, node);
  		spin_lock_irq(&n->list_lock);
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3725
  		free_block(cachep, ac->entry, ac->avail, node, &list);
18bf85411   Christoph Lameter   slab: use get_nod...
3726
  		spin_unlock_irq(&n->list_lock);
97654dfa2   Joonsoo Kim   slab: defer slab_...
3727
  		slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3728
  	}
bf0dea23a   Joonsoo Kim   mm/slab: use perc...
3729
  	free_percpu(prev);
c3d332b6b   Joonsoo Kim   mm/slab: clean-up...
3730
3731
  setup_node:
  	return setup_kmem_cache_nodes(cachep, gfp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3732
  }
943a451a8   Glauber Costa   slab: propagate t...
3733
3734
3735
3736
  static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
  				int batchcount, int shared, gfp_t gfp)
  {
  	int ret;
426589f57   Vladimir Davydov   slab: link memcg ...
3737
  	struct kmem_cache *c;
943a451a8   Glauber Costa   slab: propagate t...
3738
3739
3740
3741
3742
3743
3744
3745
  
  	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
  
  	if (slab_state < FULL)
  		return ret;
  
  	if ((ret < 0) || !is_root_cache(cachep))
  		return ret;
426589f57   Vladimir Davydov   slab: link memcg ...
3746
3747
3748
3749
  	lockdep_assert_held(&slab_mutex);
  	for_each_memcg_cache(c, cachep) {
  		/* return value determined by the root cache only */
  		__do_tune_cpucache(c, limit, batchcount, shared, gfp);
943a451a8   Glauber Costa   slab: propagate t...
3750
3751
3752
3753
  	}
  
  	return ret;
  }
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3754
  /* Called with slab_mutex held always */
83b519e8b   Pekka Enberg   slab: setup alloc...
3755
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3756
3757
  {
  	int err;
943a451a8   Glauber Costa   slab: propagate t...
3758
3759
3760
  	int limit = 0;
  	int shared = 0;
  	int batchcount = 0;
7c00fce98   Thomas Garnier   mm: reorganize SL...
3761
  	err = cache_random_seq_create(cachep, cachep->num, gfp);
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
3762
3763
  	if (err)
  		goto end;
943a451a8   Glauber Costa   slab: propagate t...
3764
3765
3766
3767
3768
3769
  	if (!is_root_cache(cachep)) {
  		struct kmem_cache *root = memcg_root_cache(cachep);
  		limit = root->limit;
  		shared = root->shared;
  		batchcount = root->batchcount;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3770

943a451a8   Glauber Costa   slab: propagate t...
3771
3772
  	if (limit && shared && batchcount)
  		goto skip_setup;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3773
3774
  	/*
  	 * The head array serves three purposes:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3775
3776
  	 * - create a LIFO ordering, i.e. return objects that are cache-warm
  	 * - reduce the number of spinlock operations.
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3777
  	 * - reduce the number of linked list operations on the slab and
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3778
3779
3780
3781
  	 *   bufctl chains: array operations are cheaper.
  	 * The numbers are guessed, we should auto-tune as described by
  	 * Bonwick.
  	 */
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3782
  	if (cachep->size > 131072)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3783
  		limit = 1;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3784
  	else if (cachep->size > PAGE_SIZE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3785
  		limit = 8;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3786
  	else if (cachep->size > 1024)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3787
  		limit = 24;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3788
  	else if (cachep->size > 256)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3789
3790
3791
  		limit = 54;
  	else
  		limit = 120;
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3792
3793
  	/*
  	 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3794
3795
3796
3797
3798
3799
3800
3801
  	 * allocation behaviour: Most allocs on one cpu, most free operations
  	 * on another cpu. For these cases, an efficient object passing between
  	 * cpus is necessary. This is provided by a shared array. The array
  	 * replaces Bonwick's magazine layer.
  	 * On uniprocessor, it's functionally equivalent (but less efficient)
  	 * to a larger limit. Thus disabled by default.
  	 */
  	shared = 0;
3b0efdfa1   Christoph Lameter   mm, sl[aou]b: Ext...
3802
  	if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3803
  		shared = 8;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3804
3805
  
  #if DEBUG
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3806
3807
3808
  	/*
  	 * With debugging enabled, large batchcount lead to excessively long
  	 * periods with disabled local interrupts. Limit the batchcount
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3809
3810
3811
3812
  	 */
  	if (limit > 32)
  		limit = 32;
  #endif
943a451a8   Glauber Costa   slab: propagate t...
3813
3814
3815
  	batchcount = (limit + 1) / 2;
  skip_setup:
  	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
c7ce4f60a   Thomas Garnier   mm: SLAB freelist...
3816
  end:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3817
  	if (err)
1170532bb   Joe Perches   mm: convert print...
3818
3819
  		pr_err("enable_cpucache failed for %s, error %d
  ",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3820
  		       cachep->name, -err);
2ed3a4ef9   Christoph Lameter   [PATCH] slab: do ...
3821
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3822
  }
1b55253a7   Christoph Lameter   [PATCH] slab: rem...
3823
  /*
ce8eb6c42   Christoph Lameter   slab: Rename list...
3824
3825
   * Drain an array if it contains any elements taking the node lock only if
   * necessary. Note that the node listlock also protects the array_cache
b18e7e654   Christoph Lameter   [PATCH] slab: fix...
3826
   * if drain_array() is used on the shared array.
1b55253a7   Christoph Lameter   [PATCH] slab: rem...
3827
   */
ce8eb6c42   Christoph Lameter   slab: Rename list...
3828
  static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3829
  			 struct array_cache *ac, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3830
  {
97654dfa2   Joonsoo Kim   slab: defer slab_...
3831
  	LIST_HEAD(list);
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3832
3833
3834
  
  	/* ac from n->shared can be freed if we don't hold the slab_mutex. */
  	check_mutex_acquired();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3835

1b55253a7   Christoph Lameter   [PATCH] slab: rem...
3836
3837
  	if (!ac || !ac->avail)
  		return;
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3838
3839
  
  	if (ac->touched) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3840
  		ac->touched = 0;
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3841
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3842
  	}
18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3843
3844
3845
3846
3847
3848
  
  	spin_lock_irq(&n->list_lock);
  	drain_array_locked(cachep, ac, node, false, &list);
  	spin_unlock_irq(&n->list_lock);
  
  	slabs_destroy(cachep, &list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3849
3850
3851
3852
  }
  
  /**
   * cache_reap - Reclaim memory from caches.
05fb6bf0b   Randy Dunlap   [PATCH] kernel-do...
3853
   * @w: work descriptor
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3854
3855
3856
3857
3858
3859
   *
   * Called from workqueue/eventd every few seconds.
   * Purpose:
   * - clear the per-cpu caches for this CPU.
   * - return freeable pages to the main free memory pool.
   *
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3860
3861
   * If we cannot acquire the cache chain mutex then just give up - we'll try
   * again on the next iteration.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3862
   */
7c5cae368   Christoph Lameter   [PATCH] slab: use...
3863
  static void cache_reap(struct work_struct *w)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3864
  {
7a7c381d2   Christoph Hellwig   [PATCH] slab: sto...
3865
  	struct kmem_cache *searchp;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3866
  	struct kmem_cache_node *n;
7d6e6d09d   Lee Schermerhorn   numa: slab: use n...
3867
  	int node = numa_mem_id();
bf6aede71   Jean Delvare   workqueue: add to...
3868
  	struct delayed_work *work = to_delayed_work(w);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3869

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3870
  	if (!mutex_trylock(&slab_mutex))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3871
  		/* Give up. Setup the next iteration. */
7c5cae368   Christoph Lameter   [PATCH] slab: use...
3872
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3873

18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3874
  	list_for_each_entry(searchp, &slab_caches, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3875
  		check_irq_on();
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3876
  		/*
ce8eb6c42   Christoph Lameter   slab: Rename list...
3877
  		 * We only take the node lock if absolutely necessary and we
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3878
3879
3880
  		 * have established with reasonable certainty that
  		 * we can do some work if the lock was obtained.
  		 */
18bf85411   Christoph Lameter   slab: use get_nod...
3881
  		n = get_node(searchp, node);
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3882

ce8eb6c42   Christoph Lameter   slab: Rename list...
3883
  		reap_alien(searchp, n);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3884

18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3885
  		drain_array(searchp, n, cpu_cache_get(searchp), node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3886

35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3887
3888
3889
3890
  		/*
  		 * These are racy checks but it does not matter
  		 * if we skip one check or scan twice.
  		 */
ce8eb6c42   Christoph Lameter   slab: Rename list...
3891
  		if (time_after(n->next_reap, jiffies))
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3892
  			goto next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3893

5f0985bb1   Jianyu Zhan   mm/slab.c: cleanu...
3894
  		n->next_reap = jiffies + REAPTIMEOUT_NODE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3895

18726ca8b   Joonsoo Kim   mm/slab: fix the ...
3896
  		drain_array(searchp, n, n->shared, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3897

ce8eb6c42   Christoph Lameter   slab: Rename list...
3898
3899
  		if (n->free_touched)
  			n->free_touched = 0;
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
3900
3901
  		else {
  			int freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3902

ce8eb6c42   Christoph Lameter   slab: Rename list...
3903
  			freed = drain_freelist(searchp, n, (n->free_limit +
ed11d9eb2   Christoph Lameter   [PATCH] slab: con...
3904
3905
3906
  				5 * searchp->num - 1) / (5 * searchp->num));
  			STATS_ADD_REAPED(searchp, freed);
  		}
35386e3b0   Christoph Lameter   [PATCH] slab: cac...
3907
  next:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3908
3909
3910
  		cond_resched();
  	}
  	check_irq_on();
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
3911
  	mutex_unlock(&slab_mutex);
8fce4d8e3   Christoph Lameter   [PATCH] slab: Nod...
3912
  	next_reap_node();
7c5cae368   Christoph Lameter   [PATCH] slab: use...
3913
  out:
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
3914
  	/* Set up the next iteration */
a9f2a846f   Vlastimil Babka   mm, slab: resched...
3915
3916
  	schedule_delayed_work_on(smp_processor_id(), work,
  				round_jiffies_relative(REAPTIMEOUT_AC));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3917
  }
0d7561c61   Glauber Costa   sl[au]b: Process ...
3918
  void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3919
  {
f728b0a5d   Greg Thelen   mm, slab: faster ...
3920
  	unsigned long active_objs, num_objs, active_slabs;
bf00bd345   David Rientjes   mm, slab: maintai...
3921
3922
  	unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0;
  	unsigned long free_slabs = 0;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3923
  	int node;
ce8eb6c42   Christoph Lameter   slab: Rename list...
3924
  	struct kmem_cache_node *n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3925

18bf85411   Christoph Lameter   slab: use get_nod...
3926
  	for_each_kmem_cache_node(cachep, node, n) {
ca3b9b917   Ravikiran G Thirumalai   [PATCH] NUMA slab...
3927
  		check_irq_on();
ce8eb6c42   Christoph Lameter   slab: Rename list...
3928
  		spin_lock_irq(&n->list_lock);
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3929

bf00bd345   David Rientjes   mm, slab: maintai...
3930
3931
  		total_slabs += n->total_slabs;
  		free_slabs += n->free_slabs;
f728b0a5d   Greg Thelen   mm, slab: faster ...
3932
  		free_objs += n->free_objects;
07a63c41f   Aruna Ramakrishna   mm/slab: improve ...
3933

ce8eb6c42   Christoph Lameter   slab: Rename list...
3934
3935
  		if (n->shared)
  			shared_avail += n->shared->avail;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3936

ce8eb6c42   Christoph Lameter   slab: Rename list...
3937
  		spin_unlock_irq(&n->list_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3938
  	}
bf00bd345   David Rientjes   mm, slab: maintai...
3939
3940
  	num_objs = total_slabs * cachep->num;
  	active_slabs = total_slabs - free_slabs;
f728b0a5d   Greg Thelen   mm, slab: faster ...
3941
  	active_objs = num_objs - free_objs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3942

0d7561c61   Glauber Costa   sl[au]b: Process ...
3943
3944
3945
  	sinfo->active_objs = active_objs;
  	sinfo->num_objs = num_objs;
  	sinfo->active_slabs = active_slabs;
bf00bd345   David Rientjes   mm, slab: maintai...
3946
  	sinfo->num_slabs = total_slabs;
0d7561c61   Glauber Costa   sl[au]b: Process ...
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
  	sinfo->shared_avail = shared_avail;
  	sinfo->limit = cachep->limit;
  	sinfo->batchcount = cachep->batchcount;
  	sinfo->shared = cachep->shared;
  	sinfo->objects_per_slab = cachep->num;
  	sinfo->cache_order = cachep->gfporder;
  }
  
  void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3957
  #if STATS
ce8eb6c42   Christoph Lameter   slab: Rename list...
3958
  	{			/* node stats */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3959
3960
3961
3962
3963
3964
  		unsigned long high = cachep->high_mark;
  		unsigned long allocs = cachep->num_allocations;
  		unsigned long grown = cachep->grown;
  		unsigned long reaped = cachep->reaped;
  		unsigned long errors = cachep->errors;
  		unsigned long max_freeable = cachep->max_freeable;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3965
  		unsigned long node_allocs = cachep->node_allocs;
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
3966
  		unsigned long node_frees = cachep->node_frees;
fb7faf331   Ravikiran G Thirumalai   [PATCH] slab: add...
3967
  		unsigned long overflows = cachep->node_overflow;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3968

756a025f0   Joe Perches   mm: coalesce spli...
3969
  		seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu",
e92dd4fd1   Joe Perches   slab: Fix continu...
3970
3971
3972
  			   allocs, high, grown,
  			   reaped, errors, max_freeable, node_allocs,
  			   node_frees, overflows);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3973
3974
3975
3976
3977
3978
3979
3980
3981
  	}
  	/* cpu stats */
  	{
  		unsigned long allochit = atomic_read(&cachep->allochit);
  		unsigned long allocmiss = atomic_read(&cachep->allocmiss);
  		unsigned long freehit = atomic_read(&cachep->freehit);
  		unsigned long freemiss = atomic_read(&cachep->freemiss);
  
  		seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3982
  			   allochit, allocmiss, freehit, freemiss);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3983
3984
  	}
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3985
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3986
3987
3988
3989
3990
3991
3992
3993
  #define MAX_SLABINFO_WRITE 128
  /**
   * slabinfo_write - Tuning for the slab allocator
   * @file: unused
   * @buffer: user buffer
   * @count: data length
   * @ppos: unused
   */
b7454ad3c   Glauber Costa   mm/sl[au]b: Move ...
3994
  ssize_t slabinfo_write(struct file *file, const char __user *buffer,
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3995
  		       size_t count, loff_t *ppos)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3996
  {
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
3997
  	char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3998
  	int limit, batchcount, shared, res;
7a7c381d2   Christoph Hellwig   [PATCH] slab: sto...
3999
  	struct kmem_cache *cachep;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
4000

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4001
4002
4003
4004
  	if (count > MAX_SLABINFO_WRITE)
  		return -EINVAL;
  	if (copy_from_user(&kbuf, buffer, count))
  		return -EFAULT;
b28a02de8   Pekka Enberg   [PATCH] slab: fix...
4005
  	kbuf[MAX_SLABINFO_WRITE] = '\0';
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
  
  	tmp = strchr(kbuf, ' ');
  	if (!tmp)
  		return -EINVAL;
  	*tmp = '\0';
  	tmp++;
  	if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
  		return -EINVAL;
  
  	/* Find the cache in the chain of caches. */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4016
  	mutex_lock(&slab_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4017
  	res = -EINVAL;
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4018
  	list_for_each_entry(cachep, &slab_caches, list) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4019
  		if (!strcmp(cachep->name, kbuf)) {
a737b3e2f   Andrew Morton   [PATCH] slab cleanup
4020
4021
  			if (limit < 1 || batchcount < 1 ||
  					batchcount > limit || shared < 0) {
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
4022
  				res = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4023
  			} else {
e498be7da   Christoph Lameter   [PATCH] Numa-awar...
4024
  				res = do_tune_cpucache(cachep, limit,
83b519e8b   Pekka Enberg   slab: setup alloc...
4025
4026
  						       batchcount, shared,
  						       GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4027
4028
4029
4030
  			}
  			break;
  		}
  	}
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4031
  	mutex_unlock(&slab_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4032
4033
4034
4035
  	if (res >= 0)
  		res = count;
  	return res;
  }
871751e25   Al Viro   [PATCH] slab: imp...
4036
4037
  
  #ifdef CONFIG_DEBUG_SLAB_LEAK
871751e25   Al Viro   [PATCH] slab: imp...
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
  static inline int add_caller(unsigned long *n, unsigned long v)
  {
  	unsigned long *p;
  	int l;
  	if (!v)
  		return 1;
  	l = n[1];
  	p = n + 2;
  	while (l) {
  		int i = l/2;
  		unsigned long *q = p + 2 * i;
  		if (*q == v) {
  			q[1]++;
  			return 1;
  		}
  		if (*q > v) {
  			l = i;
  		} else {
  			p = q + 2;
  			l -= i + 1;
  		}
  	}
  	if (++n[1] == n[0])
  		return 0;
  	memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
  	p[0] = v;
  	p[1] = 1;
  	return 1;
  }
8456a648c   Joonsoo Kim   slab: use struct ...
4067
4068
  static void handle_slab(unsigned long *n, struct kmem_cache *c,
  						struct page *page)
871751e25   Al Viro   [PATCH] slab: imp...
4069
4070
  {
  	void *p;
d31676dfd   Joonsoo Kim   mm/slab: alternat...
4071
4072
  	int i, j;
  	unsigned long v;
b1cb0982b   Joonsoo Kim   slab: change the ...
4073

871751e25   Al Viro   [PATCH] slab: imp...
4074
4075
  	if (n[0] == n[1])
  		return;
8456a648c   Joonsoo Kim   slab: use struct ...
4076
  	for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
d31676dfd   Joonsoo Kim   mm/slab: alternat...
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
  		bool active = true;
  
  		for (j = page->active; j < c->num; j++) {
  			if (get_free_obj(page, j) == i) {
  				active = false;
  				break;
  			}
  		}
  
  		if (!active)
871751e25   Al Viro   [PATCH] slab: imp...
4087
  			continue;
b1cb0982b   Joonsoo Kim   slab: change the ...
4088

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
  		/*
  		 * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table
  		 * mapping is established when actual object allocation and
  		 * we could mistakenly access the unmapped object in the cpu
  		 * cache.
  		 */
  		if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v)))
  			continue;
  
  		if (!add_caller(n, v))
871751e25   Al Viro   [PATCH] slab: imp...
4099
4100
4101
4102
4103
4104
4105
  			return;
  	}
  }
  
  static void show_symbol(struct seq_file *m, unsigned long address)
  {
  #ifdef CONFIG_KALLSYMS
871751e25   Al Viro   [PATCH] slab: imp...
4106
  	unsigned long offset, size;
9281acea6   Tejun Heo   kallsyms: make KS...
4107
  	char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
871751e25   Al Viro   [PATCH] slab: imp...
4108

a5c43dae7   Alexey Dobriyan   Fix race between ...
4109
  	if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
871751e25   Al Viro   [PATCH] slab: imp...
4110
  		seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
a5c43dae7   Alexey Dobriyan   Fix race between ...
4111
  		if (modname[0])
871751e25   Al Viro   [PATCH] slab: imp...
4112
4113
4114
4115
  			seq_printf(m, " [%s]", modname);
  		return;
  	}
  #endif
85c3e4a5a   Geert Uytterhoeven   mm/slab.c: do not...
4116
  	seq_printf(m, "%px", (void *)address);
871751e25   Al Viro   [PATCH] slab: imp...
4117
4118
4119
4120
  }
  
  static int leaks_show(struct seq_file *m, void *p)
  {
0672aa7c2   Thierry Reding   mm, slab: Build f...
4121
  	struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
8456a648c   Joonsoo Kim   slab: use struct ...
4122
  	struct page *page;
ce8eb6c42   Christoph Lameter   slab: Rename list...
4123
  	struct kmem_cache_node *n;
871751e25   Al Viro   [PATCH] slab: imp...
4124
  	const char *name;
db8450673   Christoph Lameter   slab: Fixup CONFI...
4125
  	unsigned long *x = m->private;
871751e25   Al Viro   [PATCH] slab: imp...
4126
4127
4128
4129
4130
4131
4132
  	int node;
  	int i;
  
  	if (!(cachep->flags & SLAB_STORE_USER))
  		return 0;
  	if (!(cachep->flags & SLAB_RED_ZONE))
  		return 0;
d31676dfd   Joonsoo Kim   mm/slab: alternat...
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
  	/*
  	 * Set store_user_clean and start to grab stored user information
  	 * for all objects on this cache. If some alloc/free requests comes
  	 * during the processing, information would be wrong so restart
  	 * whole processing.
  	 */
  	do {
  		set_store_user_clean(cachep);
  		drain_cpu_caches(cachep);
  
  		x[1] = 0;
871751e25   Al Viro   [PATCH] slab: imp...
4144

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4145
  		for_each_kmem_cache_node(cachep, node, n) {
871751e25   Al Viro   [PATCH] slab: imp...
4146

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4147
4148
  			check_irq_on();
  			spin_lock_irq(&n->list_lock);
871751e25   Al Viro   [PATCH] slab: imp...
4149

d31676dfd   Joonsoo Kim   mm/slab: alternat...
4150
4151
4152
4153
4154
4155
4156
  			list_for_each_entry(page, &n->slabs_full, lru)
  				handle_slab(x, cachep, page);
  			list_for_each_entry(page, &n->slabs_partial, lru)
  				handle_slab(x, cachep, page);
  			spin_unlock_irq(&n->list_lock);
  		}
  	} while (!is_store_user_clean(cachep));
871751e25   Al Viro   [PATCH] slab: imp...
4157

871751e25   Al Viro   [PATCH] slab: imp...
4158
  	name = cachep->name;
db8450673   Christoph Lameter   slab: Fixup CONFI...
4159
  	if (x[0] == x[1]) {
871751e25   Al Viro   [PATCH] slab: imp...
4160
  		/* Increase the buffer size */
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4161
  		mutex_unlock(&slab_mutex);
6396bb221   Kees Cook   treewide: kzalloc...
4162
4163
  		m->private = kcalloc(x[0] * 4, sizeof(unsigned long),
  				     GFP_KERNEL);
871751e25   Al Viro   [PATCH] slab: imp...
4164
4165
  		if (!m->private) {
  			/* Too bad, we are really out */
db8450673   Christoph Lameter   slab: Fixup CONFI...
4166
  			m->private = x;
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4167
  			mutex_lock(&slab_mutex);
871751e25   Al Viro   [PATCH] slab: imp...
4168
4169
  			return -ENOMEM;
  		}
db8450673   Christoph Lameter   slab: Fixup CONFI...
4170
4171
  		*(unsigned long *)m->private = x[0] * 2;
  		kfree(x);
18004c5d4   Christoph Lameter   mm, sl[aou]b: Use...
4172
  		mutex_lock(&slab_mutex);
871751e25   Al Viro   [PATCH] slab: imp...
4173
4174
4175
4176
  		/* Now make sure this entry will be retried */
  		m->count = m->size;
  		return 0;
  	}
db8450673   Christoph Lameter   slab: Fixup CONFI...
4177
4178
4179
  	for (i = 0; i < x[1]; i++) {
  		seq_printf(m, "%s: %lu ", name, x[2*i+3]);
  		show_symbol(m, x[2*i+2]);
871751e25   Al Viro   [PATCH] slab: imp...
4180
4181
4182
  		seq_putc(m, '
  ');
  	}
d2e7b7d0a   Siddha, Suresh B   [PATCH] fix poten...
4183

871751e25   Al Viro   [PATCH] slab: imp...
4184
4185
  	return 0;
  }
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4186
  static const struct seq_operations slabstats_op = {
1df3b26f2   Vladimir Davydov   slab: print slabi...
4187
  	.start = slab_start,
276a2439c   Wanpeng Li   mm/slab: Give s_n...
4188
4189
  	.next = slab_next,
  	.stop = slab_stop,
871751e25   Al Viro   [PATCH] slab: imp...
4190
4191
  	.show = leaks_show,
  };
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4192
4193
4194
  
  static int slabstats_open(struct inode *inode, struct file *file)
  {
b208ce329   Rob Jones   mm/slab.c: use __...
4195
4196
4197
4198
4199
4200
4201
4202
4203
  	unsigned long *n;
  
  	n = __seq_open_private(file, &slabstats_op, PAGE_SIZE);
  	if (!n)
  		return -ENOMEM;
  
  	*n = PAGE_SIZE / (2 * sizeof(unsigned long));
  
  	return 0;
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
  }
  
  static const struct file_operations proc_slabstats_operations = {
  	.open		= slabstats_open,
  	.read		= seq_read,
  	.llseek		= seq_lseek,
  	.release	= seq_release_private,
  };
  #endif
  
  static int __init slab_proc_init(void)
  {
  #ifdef CONFIG_DEBUG_SLAB_LEAK
  	proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
871751e25   Al Viro   [PATCH] slab: imp...
4218
  #endif
a0ec95a8e   Alexey Dobriyan   proc: move /proc/...
4219
4220
4221
  	return 0;
  }
  module_init(slab_proc_init);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4222

04385fc5e   Kees Cook   mm: SLAB hardened...
4223
4224
  #ifdef CONFIG_HARDENED_USERCOPY
  /*
afcc90f86   Kees Cook   usercopy: WARN() ...
4225
4226
4227
   * Rejects incorrectly sized objects and objects that are to be copied
   * to/from userspace but do not fall entirely within the containing slab
   * cache's usercopy region.
04385fc5e   Kees Cook   mm: SLAB hardened...
4228
4229
4230
4231
   *
   * Returns NULL if check passes, otherwise const char * to name of cache
   * to indicate an error.
   */
f4e6e289c   Kees Cook   usercopy: Include...
4232
4233
  void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
  			 bool to_user)
04385fc5e   Kees Cook   mm: SLAB hardened...
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
  {
  	struct kmem_cache *cachep;
  	unsigned int objnr;
  	unsigned long offset;
  
  	/* Find and validate object. */
  	cachep = page->slab_cache;
  	objnr = obj_to_index(cachep, page, (void *)ptr);
  	BUG_ON(objnr >= cachep->num);
  
  	/* Find offset within object. */
  	offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
afcc90f86   Kees Cook   usercopy: WARN() ...
4246
4247
4248
4249
  	/* Allow address range falling entirely within usercopy region. */
  	if (offset >= cachep->useroffset &&
  	    offset - cachep->useroffset <= cachep->usersize &&
  	    n <= cachep->useroffset - offset + cachep->usersize)
f4e6e289c   Kees Cook   usercopy: Include...
4250
  		return;
04385fc5e   Kees Cook   mm: SLAB hardened...
4251

afcc90f86   Kees Cook   usercopy: WARN() ...
4252
4253
4254
4255
4256
4257
  	/*
  	 * If the copy is still within the allocated object, produce
  	 * a warning instead of rejecting the copy. This is intended
  	 * to be a temporary method to find any missing usercopy
  	 * whitelists.
  	 */
2d891fbc3   Kees Cook   usercopy: Allow s...
4258
4259
  	if (usercopy_fallback &&
  	    offset <= cachep->object_size &&
afcc90f86   Kees Cook   usercopy: WARN() ...
4260
4261
4262
4263
  	    n <= cachep->object_size - offset) {
  		usercopy_warn("SLAB object", cachep->name, to_user, offset, n);
  		return;
  	}
04385fc5e   Kees Cook   mm: SLAB hardened...
4264

f4e6e289c   Kees Cook   usercopy: Include...
4265
  	usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
04385fc5e   Kees Cook   mm: SLAB hardened...
4266
4267
  }
  #endif /* CONFIG_HARDENED_USERCOPY */
00e145b6d   Manfred Spraul   [PATCH] slab: rem...
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
  /**
   * ksize - get the actual amount of memory allocated for a given object
   * @objp: Pointer to the object
   *
   * kmalloc may internally round up allocations and return more memory
   * than requested. ksize() can be used to determine the actual amount of
   * memory allocated. The caller may use this additional memory, even though
   * a smaller amount of memory was initially specified with the kmalloc call.
   * The caller must guarantee that objp points to a valid object previously
   * allocated with either kmalloc() or kmem_cache_alloc(). The object
   * must not be freed during the duration of the call.
   */
fd76bab2f   Pekka Enberg   slab: introduce k...
4280
  size_t ksize(const void *objp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4281
  {
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
4282
  	size_t size;
ef8b4520b   Christoph Lameter   Slab allocators: ...
4283
4284
  	BUG_ON(!objp);
  	if (unlikely(objp == ZERO_SIZE_PTR))
00e145b6d   Manfred Spraul   [PATCH] slab: rem...
4285
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4286

7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
4287
4288
4289
4290
  	size = virt_to_cache(objp)->object_size;
  	/* We assume that ksize callers could use the whole allocated area,
  	 * so we need to unpoison this area.
  	 */
4ebb31a42   Alexander Potapenko   mm, kasan: don't ...
4291
  	kasan_unpoison_shadow(objp, size);
7ed2f9e66   Alexander Potapenko   mm, kasan: SLAB s...
4292
4293
  
  	return size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
4294
  }
b1aabecd5   Kirill A. Shutemov   mm: Export symbol...
4295
  EXPORT_SYMBOL(ksize);