Commit 8a9de9cde83d1c913bcbe5bd8c6291f5536a576c

Authored by Christoph Lameter
Committed by Greg Kroah-Hartman
1 parent c45247fdfb

slub: Do not hold slub_lock when calling sysfs_slab_add()

commit 66c4c35c6bc5a1a452b024cf0364635b28fd94e4 upstream.

sysfs_slab_add() calls various sysfs functions that actually may
end up in userspace doing all sorts of things.

Release the slub_lock after adding the kmem_cache structure to the list.
At that point the address of the kmem_cache is not known so we are
guaranteed exlusive access to the following modifications to the
kmem_cache structure.

If the sysfs_slab_add fails then reacquire the slub_lock to
remove the kmem_cache structure from the list.

Reported-by: Sasha Levin <levinsasha928@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 1 changed file with 2 additions and 1 deletions Inline Diff

1 /* 1 /*
2 * SLUB: A slab allocator that limits cache line use instead of queuing 2 * SLUB: A slab allocator that limits cache line use instead of queuing
3 * objects in per cpu and per node lists. 3 * objects in per cpu and per node lists.
4 * 4 *
5 * The allocator synchronizes using per slab locks or atomic operatios 5 * The allocator synchronizes using per slab locks or atomic operatios
6 * and only uses a centralized lock to manage a pool of partial slabs. 6 * and only uses a centralized lock to manage a pool of partial slabs.
7 * 7 *
8 * (C) 2007 SGI, Christoph Lameter 8 * (C) 2007 SGI, Christoph Lameter
9 * (C) 2011 Linux Foundation, Christoph Lameter 9 * (C) 2011 Linux Foundation, Christoph Lameter
10 */ 10 */
11 11
12 #include <linux/mm.h> 12 #include <linux/mm.h>
13 #include <linux/swap.h> /* struct reclaim_state */ 13 #include <linux/swap.h> /* struct reclaim_state */
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/bit_spinlock.h> 15 #include <linux/bit_spinlock.h>
16 #include <linux/interrupt.h> 16 #include <linux/interrupt.h>
17 #include <linux/bitops.h> 17 #include <linux/bitops.h>
18 #include <linux/slab.h> 18 #include <linux/slab.h>
19 #include <linux/proc_fs.h> 19 #include <linux/proc_fs.h>
20 #include <linux/seq_file.h> 20 #include <linux/seq_file.h>
21 #include <linux/kmemcheck.h> 21 #include <linux/kmemcheck.h>
22 #include <linux/cpu.h> 22 #include <linux/cpu.h>
23 #include <linux/cpuset.h> 23 #include <linux/cpuset.h>
24 #include <linux/mempolicy.h> 24 #include <linux/mempolicy.h>
25 #include <linux/ctype.h> 25 #include <linux/ctype.h>
26 #include <linux/debugobjects.h> 26 #include <linux/debugobjects.h>
27 #include <linux/kallsyms.h> 27 #include <linux/kallsyms.h>
28 #include <linux/memory.h> 28 #include <linux/memory.h>
29 #include <linux/math64.h> 29 #include <linux/math64.h>
30 #include <linux/fault-inject.h> 30 #include <linux/fault-inject.h>
31 #include <linux/stacktrace.h> 31 #include <linux/stacktrace.h>
32 32
33 #include <trace/events/kmem.h> 33 #include <trace/events/kmem.h>
34 34
35 /* 35 /*
36 * Lock order: 36 * Lock order:
37 * 1. slub_lock (Global Semaphore) 37 * 1. slub_lock (Global Semaphore)
38 * 2. node->list_lock 38 * 2. node->list_lock
39 * 3. slab_lock(page) (Only on some arches and for debugging) 39 * 3. slab_lock(page) (Only on some arches and for debugging)
40 * 40 *
41 * slub_lock 41 * slub_lock
42 * 42 *
43 * The role of the slub_lock is to protect the list of all the slabs 43 * The role of the slub_lock is to protect the list of all the slabs
44 * and to synchronize major metadata changes to slab cache structures. 44 * and to synchronize major metadata changes to slab cache structures.
45 * 45 *
46 * The slab_lock is only used for debugging and on arches that do not 46 * The slab_lock is only used for debugging and on arches that do not
47 * have the ability to do a cmpxchg_double. It only protects the second 47 * have the ability to do a cmpxchg_double. It only protects the second
48 * double word in the page struct. Meaning 48 * double word in the page struct. Meaning
49 * A. page->freelist -> List of object free in a page 49 * A. page->freelist -> List of object free in a page
50 * B. page->counters -> Counters of objects 50 * B. page->counters -> Counters of objects
51 * C. page->frozen -> frozen state 51 * C. page->frozen -> frozen state
52 * 52 *
53 * If a slab is frozen then it is exempt from list management. It is not 53 * If a slab is frozen then it is exempt from list management. It is not
54 * on any list. The processor that froze the slab is the one who can 54 * on any list. The processor that froze the slab is the one who can
55 * perform list operations on the page. Other processors may put objects 55 * perform list operations on the page. Other processors may put objects
56 * onto the freelist but the processor that froze the slab is the only 56 * onto the freelist but the processor that froze the slab is the only
57 * one that can retrieve the objects from the page's freelist. 57 * one that can retrieve the objects from the page's freelist.
58 * 58 *
59 * The list_lock protects the partial and full list on each node and 59 * The list_lock protects the partial and full list on each node and
60 * the partial slab counter. If taken then no new slabs may be added or 60 * the partial slab counter. If taken then no new slabs may be added or
61 * removed from the lists nor make the number of partial slabs be modified. 61 * removed from the lists nor make the number of partial slabs be modified.
62 * (Note that the total number of slabs is an atomic value that may be 62 * (Note that the total number of slabs is an atomic value that may be
63 * modified without taking the list lock). 63 * modified without taking the list lock).
64 * 64 *
65 * The list_lock is a centralized lock and thus we avoid taking it as 65 * The list_lock is a centralized lock and thus we avoid taking it as
66 * much as possible. As long as SLUB does not have to handle partial 66 * much as possible. As long as SLUB does not have to handle partial
67 * slabs, operations can continue without any centralized lock. F.e. 67 * slabs, operations can continue without any centralized lock. F.e.
68 * allocating a long series of objects that fill up slabs does not require 68 * allocating a long series of objects that fill up slabs does not require
69 * the list lock. 69 * the list lock.
70 * Interrupts are disabled during allocation and deallocation in order to 70 * Interrupts are disabled during allocation and deallocation in order to
71 * make the slab allocator safe to use in the context of an irq. In addition 71 * make the slab allocator safe to use in the context of an irq. In addition
72 * interrupts are disabled to ensure that the processor does not change 72 * interrupts are disabled to ensure that the processor does not change
73 * while handling per_cpu slabs, due to kernel preemption. 73 * while handling per_cpu slabs, due to kernel preemption.
74 * 74 *
75 * SLUB assigns one slab for allocation to each processor. 75 * SLUB assigns one slab for allocation to each processor.
76 * Allocations only occur from these slabs called cpu slabs. 76 * Allocations only occur from these slabs called cpu slabs.
77 * 77 *
78 * Slabs with free elements are kept on a partial list and during regular 78 * Slabs with free elements are kept on a partial list and during regular
79 * operations no list for full slabs is used. If an object in a full slab is 79 * operations no list for full slabs is used. If an object in a full slab is
80 * freed then the slab will show up again on the partial lists. 80 * freed then the slab will show up again on the partial lists.
81 * We track full slabs for debugging purposes though because otherwise we 81 * We track full slabs for debugging purposes though because otherwise we
82 * cannot scan all objects. 82 * cannot scan all objects.
83 * 83 *
84 * Slabs are freed when they become empty. Teardown and setup is 84 * Slabs are freed when they become empty. Teardown and setup is
85 * minimal so we rely on the page allocators per cpu caches for 85 * minimal so we rely on the page allocators per cpu caches for
86 * fast frees and allocs. 86 * fast frees and allocs.
87 * 87 *
88 * Overloading of page flags that are otherwise used for LRU management. 88 * Overloading of page flags that are otherwise used for LRU management.
89 * 89 *
90 * PageActive The slab is frozen and exempt from list processing. 90 * PageActive The slab is frozen and exempt from list processing.
91 * This means that the slab is dedicated to a purpose 91 * This means that the slab is dedicated to a purpose
92 * such as satisfying allocations for a specific 92 * such as satisfying allocations for a specific
93 * processor. Objects may be freed in the slab while 93 * processor. Objects may be freed in the slab while
94 * it is frozen but slab_free will then skip the usual 94 * it is frozen but slab_free will then skip the usual
95 * list operations. It is up to the processor holding 95 * list operations. It is up to the processor holding
96 * the slab to integrate the slab into the slab lists 96 * the slab to integrate the slab into the slab lists
97 * when the slab is no longer needed. 97 * when the slab is no longer needed.
98 * 98 *
99 * One use of this flag is to mark slabs that are 99 * One use of this flag is to mark slabs that are
100 * used for allocations. Then such a slab becomes a cpu 100 * used for allocations. Then such a slab becomes a cpu
101 * slab. The cpu slab may be equipped with an additional 101 * slab. The cpu slab may be equipped with an additional
102 * freelist that allows lockless access to 102 * freelist that allows lockless access to
103 * free objects in addition to the regular freelist 103 * free objects in addition to the regular freelist
104 * that requires the slab lock. 104 * that requires the slab lock.
105 * 105 *
106 * PageError Slab requires special handling due to debug 106 * PageError Slab requires special handling due to debug
107 * options set. This moves slab handling out of 107 * options set. This moves slab handling out of
108 * the fast path and disables lockless freelists. 108 * the fast path and disables lockless freelists.
109 */ 109 */
110 110
111 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 111 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
112 SLAB_TRACE | SLAB_DEBUG_FREE) 112 SLAB_TRACE | SLAB_DEBUG_FREE)
113 113
114 static inline int kmem_cache_debug(struct kmem_cache *s) 114 static inline int kmem_cache_debug(struct kmem_cache *s)
115 { 115 {
116 #ifdef CONFIG_SLUB_DEBUG 116 #ifdef CONFIG_SLUB_DEBUG
117 return unlikely(s->flags & SLAB_DEBUG_FLAGS); 117 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
118 #else 118 #else
119 return 0; 119 return 0;
120 #endif 120 #endif
121 } 121 }
122 122
123 /* 123 /*
124 * Issues still to be resolved: 124 * Issues still to be resolved:
125 * 125 *
126 * - Support PAGE_ALLOC_DEBUG. Should be easy to do. 126 * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
127 * 127 *
128 * - Variable sizing of the per node arrays 128 * - Variable sizing of the per node arrays
129 */ 129 */
130 130
131 /* Enable to test recovery from slab corruption on boot */ 131 /* Enable to test recovery from slab corruption on boot */
132 #undef SLUB_RESILIENCY_TEST 132 #undef SLUB_RESILIENCY_TEST
133 133
134 /* Enable to log cmpxchg failures */ 134 /* Enable to log cmpxchg failures */
135 #undef SLUB_DEBUG_CMPXCHG 135 #undef SLUB_DEBUG_CMPXCHG
136 136
137 /* 137 /*
138 * Mininum number of partial slabs. These will be left on the partial 138 * Mininum number of partial slabs. These will be left on the partial
139 * lists even if they are empty. kmem_cache_shrink may reclaim them. 139 * lists even if they are empty. kmem_cache_shrink may reclaim them.
140 */ 140 */
141 #define MIN_PARTIAL 5 141 #define MIN_PARTIAL 5
142 142
143 /* 143 /*
144 * Maximum number of desirable partial slabs. 144 * Maximum number of desirable partial slabs.
145 * The existence of more partial slabs makes kmem_cache_shrink 145 * The existence of more partial slabs makes kmem_cache_shrink
146 * sort the partial list by the number of objects in the. 146 * sort the partial list by the number of objects in the.
147 */ 147 */
148 #define MAX_PARTIAL 10 148 #define MAX_PARTIAL 10
149 149
150 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ 150 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
151 SLAB_POISON | SLAB_STORE_USER) 151 SLAB_POISON | SLAB_STORE_USER)
152 152
153 /* 153 /*
154 * Debugging flags that require metadata to be stored in the slab. These get 154 * Debugging flags that require metadata to be stored in the slab. These get
155 * disabled when slub_debug=O is used and a cache's min order increases with 155 * disabled when slub_debug=O is used and a cache's min order increases with
156 * metadata. 156 * metadata.
157 */ 157 */
158 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) 158 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
159 159
160 /* 160 /*
161 * Set of flags that will prevent slab merging 161 * Set of flags that will prevent slab merging
162 */ 162 */
163 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 163 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
164 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ 164 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
165 SLAB_FAILSLAB) 165 SLAB_FAILSLAB)
166 166
167 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 167 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
168 SLAB_CACHE_DMA | SLAB_NOTRACK) 168 SLAB_CACHE_DMA | SLAB_NOTRACK)
169 169
170 #define OO_SHIFT 16 170 #define OO_SHIFT 16
171 #define OO_MASK ((1 << OO_SHIFT) - 1) 171 #define OO_MASK ((1 << OO_SHIFT) - 1)
172 #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ 172 #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
173 173
174 /* Internal SLUB flags */ 174 /* Internal SLUB flags */
175 #define __OBJECT_POISON 0x80000000UL /* Poison object */ 175 #define __OBJECT_POISON 0x80000000UL /* Poison object */
176 #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ 176 #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
177 177
178 static int kmem_size = sizeof(struct kmem_cache); 178 static int kmem_size = sizeof(struct kmem_cache);
179 179
180 #ifdef CONFIG_SMP 180 #ifdef CONFIG_SMP
181 static struct notifier_block slab_notifier; 181 static struct notifier_block slab_notifier;
182 #endif 182 #endif
183 183
184 static enum { 184 static enum {
185 DOWN, /* No slab functionality available */ 185 DOWN, /* No slab functionality available */
186 PARTIAL, /* Kmem_cache_node works */ 186 PARTIAL, /* Kmem_cache_node works */
187 UP, /* Everything works but does not show up in sysfs */ 187 UP, /* Everything works but does not show up in sysfs */
188 SYSFS /* Sysfs up */ 188 SYSFS /* Sysfs up */
189 } slab_state = DOWN; 189 } slab_state = DOWN;
190 190
191 /* A list of all slab caches on the system */ 191 /* A list of all slab caches on the system */
192 static DECLARE_RWSEM(slub_lock); 192 static DECLARE_RWSEM(slub_lock);
193 static LIST_HEAD(slab_caches); 193 static LIST_HEAD(slab_caches);
194 194
195 /* 195 /*
196 * Tracking user of a slab. 196 * Tracking user of a slab.
197 */ 197 */
198 #define TRACK_ADDRS_COUNT 16 198 #define TRACK_ADDRS_COUNT 16
199 struct track { 199 struct track {
200 unsigned long addr; /* Called from address */ 200 unsigned long addr; /* Called from address */
201 #ifdef CONFIG_STACKTRACE 201 #ifdef CONFIG_STACKTRACE
202 unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ 202 unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
203 #endif 203 #endif
204 int cpu; /* Was running on cpu */ 204 int cpu; /* Was running on cpu */
205 int pid; /* Pid context */ 205 int pid; /* Pid context */
206 unsigned long when; /* When did the operation occur */ 206 unsigned long when; /* When did the operation occur */
207 }; 207 };
208 208
209 enum track_item { TRACK_ALLOC, TRACK_FREE }; 209 enum track_item { TRACK_ALLOC, TRACK_FREE };
210 210
211 #ifdef CONFIG_SYSFS 211 #ifdef CONFIG_SYSFS
212 static int sysfs_slab_add(struct kmem_cache *); 212 static int sysfs_slab_add(struct kmem_cache *);
213 static int sysfs_slab_alias(struct kmem_cache *, const char *); 213 static int sysfs_slab_alias(struct kmem_cache *, const char *);
214 static void sysfs_slab_remove(struct kmem_cache *); 214 static void sysfs_slab_remove(struct kmem_cache *);
215 215
216 #else 216 #else
217 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 217 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
218 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 218 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
219 { return 0; } 219 { return 0; }
220 static inline void sysfs_slab_remove(struct kmem_cache *s) 220 static inline void sysfs_slab_remove(struct kmem_cache *s)
221 { 221 {
222 kfree(s->name); 222 kfree(s->name);
223 kfree(s); 223 kfree(s);
224 } 224 }
225 225
226 #endif 226 #endif
227 227
228 static inline void stat(const struct kmem_cache *s, enum stat_item si) 228 static inline void stat(const struct kmem_cache *s, enum stat_item si)
229 { 229 {
230 #ifdef CONFIG_SLUB_STATS 230 #ifdef CONFIG_SLUB_STATS
231 __this_cpu_inc(s->cpu_slab->stat[si]); 231 __this_cpu_inc(s->cpu_slab->stat[si]);
232 #endif 232 #endif
233 } 233 }
234 234
235 /******************************************************************** 235 /********************************************************************
236 * Core slab cache functions 236 * Core slab cache functions
237 *******************************************************************/ 237 *******************************************************************/
238 238
239 int slab_is_available(void) 239 int slab_is_available(void)
240 { 240 {
241 return slab_state >= UP; 241 return slab_state >= UP;
242 } 242 }
243 243
244 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 244 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
245 { 245 {
246 return s->node[node]; 246 return s->node[node];
247 } 247 }
248 248
249 /* Verify that a pointer has an address that is valid within a slab page */ 249 /* Verify that a pointer has an address that is valid within a slab page */
250 static inline int check_valid_pointer(struct kmem_cache *s, 250 static inline int check_valid_pointer(struct kmem_cache *s,
251 struct page *page, const void *object) 251 struct page *page, const void *object)
252 { 252 {
253 void *base; 253 void *base;
254 254
255 if (!object) 255 if (!object)
256 return 1; 256 return 1;
257 257
258 base = page_address(page); 258 base = page_address(page);
259 if (object < base || object >= base + page->objects * s->size || 259 if (object < base || object >= base + page->objects * s->size ||
260 (object - base) % s->size) { 260 (object - base) % s->size) {
261 return 0; 261 return 0;
262 } 262 }
263 263
264 return 1; 264 return 1;
265 } 265 }
266 266
267 static inline void *get_freepointer(struct kmem_cache *s, void *object) 267 static inline void *get_freepointer(struct kmem_cache *s, void *object)
268 { 268 {
269 return *(void **)(object + s->offset); 269 return *(void **)(object + s->offset);
270 } 270 }
271 271
272 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) 272 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
273 { 273 {
274 void *p; 274 void *p;
275 275
276 #ifdef CONFIG_DEBUG_PAGEALLOC 276 #ifdef CONFIG_DEBUG_PAGEALLOC
277 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p)); 277 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
278 #else 278 #else
279 p = get_freepointer(s, object); 279 p = get_freepointer(s, object);
280 #endif 280 #endif
281 return p; 281 return p;
282 } 282 }
283 283
284 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) 284 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
285 { 285 {
286 *(void **)(object + s->offset) = fp; 286 *(void **)(object + s->offset) = fp;
287 } 287 }
288 288
289 /* Loop over all objects in a slab */ 289 /* Loop over all objects in a slab */
290 #define for_each_object(__p, __s, __addr, __objects) \ 290 #define for_each_object(__p, __s, __addr, __objects) \
291 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ 291 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
292 __p += (__s)->size) 292 __p += (__s)->size)
293 293
294 /* Determine object index from a given position */ 294 /* Determine object index from a given position */
295 static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 295 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
296 { 296 {
297 return (p - addr) / s->size; 297 return (p - addr) / s->size;
298 } 298 }
299 299
300 static inline size_t slab_ksize(const struct kmem_cache *s) 300 static inline size_t slab_ksize(const struct kmem_cache *s)
301 { 301 {
302 #ifdef CONFIG_SLUB_DEBUG 302 #ifdef CONFIG_SLUB_DEBUG
303 /* 303 /*
304 * Debugging requires use of the padding between object 304 * Debugging requires use of the padding between object
305 * and whatever may come after it. 305 * and whatever may come after it.
306 */ 306 */
307 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 307 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
308 return s->objsize; 308 return s->objsize;
309 309
310 #endif 310 #endif
311 /* 311 /*
312 * If we have the need to store the freelist pointer 312 * If we have the need to store the freelist pointer
313 * back there or track user information then we can 313 * back there or track user information then we can
314 * only use the space before that information. 314 * only use the space before that information.
315 */ 315 */
316 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 316 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
317 return s->inuse; 317 return s->inuse;
318 /* 318 /*
319 * Else we can use all the padding etc for the allocation 319 * Else we can use all the padding etc for the allocation
320 */ 320 */
321 return s->size; 321 return s->size;
322 } 322 }
323 323
324 static inline int order_objects(int order, unsigned long size, int reserved) 324 static inline int order_objects(int order, unsigned long size, int reserved)
325 { 325 {
326 return ((PAGE_SIZE << order) - reserved) / size; 326 return ((PAGE_SIZE << order) - reserved) / size;
327 } 327 }
328 328
329 static inline struct kmem_cache_order_objects oo_make(int order, 329 static inline struct kmem_cache_order_objects oo_make(int order,
330 unsigned long size, int reserved) 330 unsigned long size, int reserved)
331 { 331 {
332 struct kmem_cache_order_objects x = { 332 struct kmem_cache_order_objects x = {
333 (order << OO_SHIFT) + order_objects(order, size, reserved) 333 (order << OO_SHIFT) + order_objects(order, size, reserved)
334 }; 334 };
335 335
336 return x; 336 return x;
337 } 337 }
338 338
339 static inline int oo_order(struct kmem_cache_order_objects x) 339 static inline int oo_order(struct kmem_cache_order_objects x)
340 { 340 {
341 return x.x >> OO_SHIFT; 341 return x.x >> OO_SHIFT;
342 } 342 }
343 343
344 static inline int oo_objects(struct kmem_cache_order_objects x) 344 static inline int oo_objects(struct kmem_cache_order_objects x)
345 { 345 {
346 return x.x & OO_MASK; 346 return x.x & OO_MASK;
347 } 347 }
348 348
349 /* 349 /*
350 * Per slab locking using the pagelock 350 * Per slab locking using the pagelock
351 */ 351 */
352 static __always_inline void slab_lock(struct page *page) 352 static __always_inline void slab_lock(struct page *page)
353 { 353 {
354 bit_spin_lock(PG_locked, &page->flags); 354 bit_spin_lock(PG_locked, &page->flags);
355 } 355 }
356 356
357 static __always_inline void slab_unlock(struct page *page) 357 static __always_inline void slab_unlock(struct page *page)
358 { 358 {
359 __bit_spin_unlock(PG_locked, &page->flags); 359 __bit_spin_unlock(PG_locked, &page->flags);
360 } 360 }
361 361
362 /* Interrupts must be disabled (for the fallback code to work right) */ 362 /* Interrupts must be disabled (for the fallback code to work right) */
363 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, 363 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
364 void *freelist_old, unsigned long counters_old, 364 void *freelist_old, unsigned long counters_old,
365 void *freelist_new, unsigned long counters_new, 365 void *freelist_new, unsigned long counters_new,
366 const char *n) 366 const char *n)
367 { 367 {
368 VM_BUG_ON(!irqs_disabled()); 368 VM_BUG_ON(!irqs_disabled());
369 #ifdef CONFIG_CMPXCHG_DOUBLE 369 #ifdef CONFIG_CMPXCHG_DOUBLE
370 if (s->flags & __CMPXCHG_DOUBLE) { 370 if (s->flags & __CMPXCHG_DOUBLE) {
371 if (cmpxchg_double(&page->freelist, 371 if (cmpxchg_double(&page->freelist,
372 freelist_old, counters_old, 372 freelist_old, counters_old,
373 freelist_new, counters_new)) 373 freelist_new, counters_new))
374 return 1; 374 return 1;
375 } else 375 } else
376 #endif 376 #endif
377 { 377 {
378 slab_lock(page); 378 slab_lock(page);
379 if (page->freelist == freelist_old && page->counters == counters_old) { 379 if (page->freelist == freelist_old && page->counters == counters_old) {
380 page->freelist = freelist_new; 380 page->freelist = freelist_new;
381 page->counters = counters_new; 381 page->counters = counters_new;
382 slab_unlock(page); 382 slab_unlock(page);
383 return 1; 383 return 1;
384 } 384 }
385 slab_unlock(page); 385 slab_unlock(page);
386 } 386 }
387 387
388 cpu_relax(); 388 cpu_relax();
389 stat(s, CMPXCHG_DOUBLE_FAIL); 389 stat(s, CMPXCHG_DOUBLE_FAIL);
390 390
391 #ifdef SLUB_DEBUG_CMPXCHG 391 #ifdef SLUB_DEBUG_CMPXCHG
392 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); 392 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
393 #endif 393 #endif
394 394
395 return 0; 395 return 0;
396 } 396 }
397 397
398 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, 398 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
399 void *freelist_old, unsigned long counters_old, 399 void *freelist_old, unsigned long counters_old,
400 void *freelist_new, unsigned long counters_new, 400 void *freelist_new, unsigned long counters_new,
401 const char *n) 401 const char *n)
402 { 402 {
403 #ifdef CONFIG_CMPXCHG_DOUBLE 403 #ifdef CONFIG_CMPXCHG_DOUBLE
404 if (s->flags & __CMPXCHG_DOUBLE) { 404 if (s->flags & __CMPXCHG_DOUBLE) {
405 if (cmpxchg_double(&page->freelist, 405 if (cmpxchg_double(&page->freelist,
406 freelist_old, counters_old, 406 freelist_old, counters_old,
407 freelist_new, counters_new)) 407 freelist_new, counters_new))
408 return 1; 408 return 1;
409 } else 409 } else
410 #endif 410 #endif
411 { 411 {
412 unsigned long flags; 412 unsigned long flags;
413 413
414 local_irq_save(flags); 414 local_irq_save(flags);
415 slab_lock(page); 415 slab_lock(page);
416 if (page->freelist == freelist_old && page->counters == counters_old) { 416 if (page->freelist == freelist_old && page->counters == counters_old) {
417 page->freelist = freelist_new; 417 page->freelist = freelist_new;
418 page->counters = counters_new; 418 page->counters = counters_new;
419 slab_unlock(page); 419 slab_unlock(page);
420 local_irq_restore(flags); 420 local_irq_restore(flags);
421 return 1; 421 return 1;
422 } 422 }
423 slab_unlock(page); 423 slab_unlock(page);
424 local_irq_restore(flags); 424 local_irq_restore(flags);
425 } 425 }
426 426
427 cpu_relax(); 427 cpu_relax();
428 stat(s, CMPXCHG_DOUBLE_FAIL); 428 stat(s, CMPXCHG_DOUBLE_FAIL);
429 429
430 #ifdef SLUB_DEBUG_CMPXCHG 430 #ifdef SLUB_DEBUG_CMPXCHG
431 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); 431 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
432 #endif 432 #endif
433 433
434 return 0; 434 return 0;
435 } 435 }
436 436
437 #ifdef CONFIG_SLUB_DEBUG 437 #ifdef CONFIG_SLUB_DEBUG
438 /* 438 /*
439 * Determine a map of object in use on a page. 439 * Determine a map of object in use on a page.
440 * 440 *
441 * Node listlock must be held to guarantee that the page does 441 * Node listlock must be held to guarantee that the page does
442 * not vanish from under us. 442 * not vanish from under us.
443 */ 443 */
444 static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) 444 static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
445 { 445 {
446 void *p; 446 void *p;
447 void *addr = page_address(page); 447 void *addr = page_address(page);
448 448
449 for (p = page->freelist; p; p = get_freepointer(s, p)) 449 for (p = page->freelist; p; p = get_freepointer(s, p))
450 set_bit(slab_index(p, s, addr), map); 450 set_bit(slab_index(p, s, addr), map);
451 } 451 }
452 452
453 /* 453 /*
454 * Debug settings: 454 * Debug settings:
455 */ 455 */
456 #ifdef CONFIG_SLUB_DEBUG_ON 456 #ifdef CONFIG_SLUB_DEBUG_ON
457 static int slub_debug = DEBUG_DEFAULT_FLAGS; 457 static int slub_debug = DEBUG_DEFAULT_FLAGS;
458 #else 458 #else
459 static int slub_debug; 459 static int slub_debug;
460 #endif 460 #endif
461 461
462 static char *slub_debug_slabs; 462 static char *slub_debug_slabs;
463 static int disable_higher_order_debug; 463 static int disable_higher_order_debug;
464 464
465 /* 465 /*
466 * Object debugging 466 * Object debugging
467 */ 467 */
468 static void print_section(char *text, u8 *addr, unsigned int length) 468 static void print_section(char *text, u8 *addr, unsigned int length)
469 { 469 {
470 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, 470 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
471 length, 1); 471 length, 1);
472 } 472 }
473 473
474 static struct track *get_track(struct kmem_cache *s, void *object, 474 static struct track *get_track(struct kmem_cache *s, void *object,
475 enum track_item alloc) 475 enum track_item alloc)
476 { 476 {
477 struct track *p; 477 struct track *p;
478 478
479 if (s->offset) 479 if (s->offset)
480 p = object + s->offset + sizeof(void *); 480 p = object + s->offset + sizeof(void *);
481 else 481 else
482 p = object + s->inuse; 482 p = object + s->inuse;
483 483
484 return p + alloc; 484 return p + alloc;
485 } 485 }
486 486
487 static void set_track(struct kmem_cache *s, void *object, 487 static void set_track(struct kmem_cache *s, void *object,
488 enum track_item alloc, unsigned long addr) 488 enum track_item alloc, unsigned long addr)
489 { 489 {
490 struct track *p = get_track(s, object, alloc); 490 struct track *p = get_track(s, object, alloc);
491 491
492 if (addr) { 492 if (addr) {
493 #ifdef CONFIG_STACKTRACE 493 #ifdef CONFIG_STACKTRACE
494 struct stack_trace trace; 494 struct stack_trace trace;
495 int i; 495 int i;
496 496
497 trace.nr_entries = 0; 497 trace.nr_entries = 0;
498 trace.max_entries = TRACK_ADDRS_COUNT; 498 trace.max_entries = TRACK_ADDRS_COUNT;
499 trace.entries = p->addrs; 499 trace.entries = p->addrs;
500 trace.skip = 3; 500 trace.skip = 3;
501 save_stack_trace(&trace); 501 save_stack_trace(&trace);
502 502
503 /* See rant in lockdep.c */ 503 /* See rant in lockdep.c */
504 if (trace.nr_entries != 0 && 504 if (trace.nr_entries != 0 &&
505 trace.entries[trace.nr_entries - 1] == ULONG_MAX) 505 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
506 trace.nr_entries--; 506 trace.nr_entries--;
507 507
508 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) 508 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
509 p->addrs[i] = 0; 509 p->addrs[i] = 0;
510 #endif 510 #endif
511 p->addr = addr; 511 p->addr = addr;
512 p->cpu = smp_processor_id(); 512 p->cpu = smp_processor_id();
513 p->pid = current->pid; 513 p->pid = current->pid;
514 p->when = jiffies; 514 p->when = jiffies;
515 } else 515 } else
516 memset(p, 0, sizeof(struct track)); 516 memset(p, 0, sizeof(struct track));
517 } 517 }
518 518
519 static void init_tracking(struct kmem_cache *s, void *object) 519 static void init_tracking(struct kmem_cache *s, void *object)
520 { 520 {
521 if (!(s->flags & SLAB_STORE_USER)) 521 if (!(s->flags & SLAB_STORE_USER))
522 return; 522 return;
523 523
524 set_track(s, object, TRACK_FREE, 0UL); 524 set_track(s, object, TRACK_FREE, 0UL);
525 set_track(s, object, TRACK_ALLOC, 0UL); 525 set_track(s, object, TRACK_ALLOC, 0UL);
526 } 526 }
527 527
528 static void print_track(const char *s, struct track *t) 528 static void print_track(const char *s, struct track *t)
529 { 529 {
530 if (!t->addr) 530 if (!t->addr)
531 return; 531 return;
532 532
533 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", 533 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
534 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); 534 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
535 #ifdef CONFIG_STACKTRACE 535 #ifdef CONFIG_STACKTRACE
536 { 536 {
537 int i; 537 int i;
538 for (i = 0; i < TRACK_ADDRS_COUNT; i++) 538 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
539 if (t->addrs[i]) 539 if (t->addrs[i])
540 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]); 540 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
541 else 541 else
542 break; 542 break;
543 } 543 }
544 #endif 544 #endif
545 } 545 }
546 546
547 static void print_tracking(struct kmem_cache *s, void *object) 547 static void print_tracking(struct kmem_cache *s, void *object)
548 { 548 {
549 if (!(s->flags & SLAB_STORE_USER)) 549 if (!(s->flags & SLAB_STORE_USER))
550 return; 550 return;
551 551
552 print_track("Allocated", get_track(s, object, TRACK_ALLOC)); 552 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
553 print_track("Freed", get_track(s, object, TRACK_FREE)); 553 print_track("Freed", get_track(s, object, TRACK_FREE));
554 } 554 }
555 555
556 static void print_page_info(struct page *page) 556 static void print_page_info(struct page *page)
557 { 557 {
558 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", 558 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
559 page, page->objects, page->inuse, page->freelist, page->flags); 559 page, page->objects, page->inuse, page->freelist, page->flags);
560 560
561 } 561 }
562 562
563 static void slab_bug(struct kmem_cache *s, char *fmt, ...) 563 static void slab_bug(struct kmem_cache *s, char *fmt, ...)
564 { 564 {
565 va_list args; 565 va_list args;
566 char buf[100]; 566 char buf[100];
567 567
568 va_start(args, fmt); 568 va_start(args, fmt);
569 vsnprintf(buf, sizeof(buf), fmt, args); 569 vsnprintf(buf, sizeof(buf), fmt, args);
570 va_end(args); 570 va_end(args);
571 printk(KERN_ERR "========================================" 571 printk(KERN_ERR "========================================"
572 "=====================================\n"); 572 "=====================================\n");
573 printk(KERN_ERR "BUG %s: %s\n", s->name, buf); 573 printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
574 printk(KERN_ERR "----------------------------------------" 574 printk(KERN_ERR "----------------------------------------"
575 "-------------------------------------\n\n"); 575 "-------------------------------------\n\n");
576 } 576 }
577 577
578 static void slab_fix(struct kmem_cache *s, char *fmt, ...) 578 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
579 { 579 {
580 va_list args; 580 va_list args;
581 char buf[100]; 581 char buf[100];
582 582
583 va_start(args, fmt); 583 va_start(args, fmt);
584 vsnprintf(buf, sizeof(buf), fmt, args); 584 vsnprintf(buf, sizeof(buf), fmt, args);
585 va_end(args); 585 va_end(args);
586 printk(KERN_ERR "FIX %s: %s\n", s->name, buf); 586 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
587 } 587 }
588 588
589 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 589 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
590 { 590 {
591 unsigned int off; /* Offset of last byte */ 591 unsigned int off; /* Offset of last byte */
592 u8 *addr = page_address(page); 592 u8 *addr = page_address(page);
593 593
594 print_tracking(s, p); 594 print_tracking(s, p);
595 595
596 print_page_info(page); 596 print_page_info(page);
597 597
598 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", 598 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
599 p, p - addr, get_freepointer(s, p)); 599 p, p - addr, get_freepointer(s, p));
600 600
601 if (p > addr + 16) 601 if (p > addr + 16)
602 print_section("Bytes b4 ", p - 16, 16); 602 print_section("Bytes b4 ", p - 16, 16);
603 603
604 print_section("Object ", p, min_t(unsigned long, s->objsize, 604 print_section("Object ", p, min_t(unsigned long, s->objsize,
605 PAGE_SIZE)); 605 PAGE_SIZE));
606 if (s->flags & SLAB_RED_ZONE) 606 if (s->flags & SLAB_RED_ZONE)
607 print_section("Redzone ", p + s->objsize, 607 print_section("Redzone ", p + s->objsize,
608 s->inuse - s->objsize); 608 s->inuse - s->objsize);
609 609
610 if (s->offset) 610 if (s->offset)
611 off = s->offset + sizeof(void *); 611 off = s->offset + sizeof(void *);
612 else 612 else
613 off = s->inuse; 613 off = s->inuse;
614 614
615 if (s->flags & SLAB_STORE_USER) 615 if (s->flags & SLAB_STORE_USER)
616 off += 2 * sizeof(struct track); 616 off += 2 * sizeof(struct track);
617 617
618 if (off != s->size) 618 if (off != s->size)
619 /* Beginning of the filler is the free pointer */ 619 /* Beginning of the filler is the free pointer */
620 print_section("Padding ", p + off, s->size - off); 620 print_section("Padding ", p + off, s->size - off);
621 621
622 dump_stack(); 622 dump_stack();
623 } 623 }
624 624
625 static void object_err(struct kmem_cache *s, struct page *page, 625 static void object_err(struct kmem_cache *s, struct page *page,
626 u8 *object, char *reason) 626 u8 *object, char *reason)
627 { 627 {
628 slab_bug(s, "%s", reason); 628 slab_bug(s, "%s", reason);
629 print_trailer(s, page, object); 629 print_trailer(s, page, object);
630 } 630 }
631 631
632 static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) 632 static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
633 { 633 {
634 va_list args; 634 va_list args;
635 char buf[100]; 635 char buf[100];
636 636
637 va_start(args, fmt); 637 va_start(args, fmt);
638 vsnprintf(buf, sizeof(buf), fmt, args); 638 vsnprintf(buf, sizeof(buf), fmt, args);
639 va_end(args); 639 va_end(args);
640 slab_bug(s, "%s", buf); 640 slab_bug(s, "%s", buf);
641 print_page_info(page); 641 print_page_info(page);
642 dump_stack(); 642 dump_stack();
643 } 643 }
644 644
645 static void init_object(struct kmem_cache *s, void *object, u8 val) 645 static void init_object(struct kmem_cache *s, void *object, u8 val)
646 { 646 {
647 u8 *p = object; 647 u8 *p = object;
648 648
649 if (s->flags & __OBJECT_POISON) { 649 if (s->flags & __OBJECT_POISON) {
650 memset(p, POISON_FREE, s->objsize - 1); 650 memset(p, POISON_FREE, s->objsize - 1);
651 p[s->objsize - 1] = POISON_END; 651 p[s->objsize - 1] = POISON_END;
652 } 652 }
653 653
654 if (s->flags & SLAB_RED_ZONE) 654 if (s->flags & SLAB_RED_ZONE)
655 memset(p + s->objsize, val, s->inuse - s->objsize); 655 memset(p + s->objsize, val, s->inuse - s->objsize);
656 } 656 }
657 657
658 static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 658 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
659 void *from, void *to) 659 void *from, void *to)
660 { 660 {
661 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data); 661 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
662 memset(from, data, to - from); 662 memset(from, data, to - from);
663 } 663 }
664 664
665 static int check_bytes_and_report(struct kmem_cache *s, struct page *page, 665 static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
666 u8 *object, char *what, 666 u8 *object, char *what,
667 u8 *start, unsigned int value, unsigned int bytes) 667 u8 *start, unsigned int value, unsigned int bytes)
668 { 668 {
669 u8 *fault; 669 u8 *fault;
670 u8 *end; 670 u8 *end;
671 671
672 fault = memchr_inv(start, value, bytes); 672 fault = memchr_inv(start, value, bytes);
673 if (!fault) 673 if (!fault)
674 return 1; 674 return 1;
675 675
676 end = start + bytes; 676 end = start + bytes;
677 while (end > fault && end[-1] == value) 677 while (end > fault && end[-1] == value)
678 end--; 678 end--;
679 679
680 slab_bug(s, "%s overwritten", what); 680 slab_bug(s, "%s overwritten", what);
681 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", 681 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
682 fault, end - 1, fault[0], value); 682 fault, end - 1, fault[0], value);
683 print_trailer(s, page, object); 683 print_trailer(s, page, object);
684 684
685 restore_bytes(s, what, value, fault, end); 685 restore_bytes(s, what, value, fault, end);
686 return 0; 686 return 0;
687 } 687 }
688 688
689 /* 689 /*
690 * Object layout: 690 * Object layout:
691 * 691 *
692 * object address 692 * object address
693 * Bytes of the object to be managed. 693 * Bytes of the object to be managed.
694 * If the freepointer may overlay the object then the free 694 * If the freepointer may overlay the object then the free
695 * pointer is the first word of the object. 695 * pointer is the first word of the object.
696 * 696 *
697 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 697 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
698 * 0xa5 (POISON_END) 698 * 0xa5 (POISON_END)
699 * 699 *
700 * object + s->objsize 700 * object + s->objsize
701 * Padding to reach word boundary. This is also used for Redzoning. 701 * Padding to reach word boundary. This is also used for Redzoning.
702 * Padding is extended by another word if Redzoning is enabled and 702 * Padding is extended by another word if Redzoning is enabled and
703 * objsize == inuse. 703 * objsize == inuse.
704 * 704 *
705 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 705 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
706 * 0xcc (RED_ACTIVE) for objects in use. 706 * 0xcc (RED_ACTIVE) for objects in use.
707 * 707 *
708 * object + s->inuse 708 * object + s->inuse
709 * Meta data starts here. 709 * Meta data starts here.
710 * 710 *
711 * A. Free pointer (if we cannot overwrite object on free) 711 * A. Free pointer (if we cannot overwrite object on free)
712 * B. Tracking data for SLAB_STORE_USER 712 * B. Tracking data for SLAB_STORE_USER
713 * C. Padding to reach required alignment boundary or at mininum 713 * C. Padding to reach required alignment boundary or at mininum
714 * one word if debugging is on to be able to detect writes 714 * one word if debugging is on to be able to detect writes
715 * before the word boundary. 715 * before the word boundary.
716 * 716 *
717 * Padding is done using 0x5a (POISON_INUSE) 717 * Padding is done using 0x5a (POISON_INUSE)
718 * 718 *
719 * object + s->size 719 * object + s->size
720 * Nothing is used beyond s->size. 720 * Nothing is used beyond s->size.
721 * 721 *
722 * If slabcaches are merged then the objsize and inuse boundaries are mostly 722 * If slabcaches are merged then the objsize and inuse boundaries are mostly
723 * ignored. And therefore no slab options that rely on these boundaries 723 * ignored. And therefore no slab options that rely on these boundaries
724 * may be used with merged slabcaches. 724 * may be used with merged slabcaches.
725 */ 725 */
726 726
727 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) 727 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
728 { 728 {
729 unsigned long off = s->inuse; /* The end of info */ 729 unsigned long off = s->inuse; /* The end of info */
730 730
731 if (s->offset) 731 if (s->offset)
732 /* Freepointer is placed after the object. */ 732 /* Freepointer is placed after the object. */
733 off += sizeof(void *); 733 off += sizeof(void *);
734 734
735 if (s->flags & SLAB_STORE_USER) 735 if (s->flags & SLAB_STORE_USER)
736 /* We also have user information there */ 736 /* We also have user information there */
737 off += 2 * sizeof(struct track); 737 off += 2 * sizeof(struct track);
738 738
739 if (s->size == off) 739 if (s->size == off)
740 return 1; 740 return 1;
741 741
742 return check_bytes_and_report(s, page, p, "Object padding", 742 return check_bytes_and_report(s, page, p, "Object padding",
743 p + off, POISON_INUSE, s->size - off); 743 p + off, POISON_INUSE, s->size - off);
744 } 744 }
745 745
746 /* Check the pad bytes at the end of a slab page */ 746 /* Check the pad bytes at the end of a slab page */
747 static int slab_pad_check(struct kmem_cache *s, struct page *page) 747 static int slab_pad_check(struct kmem_cache *s, struct page *page)
748 { 748 {
749 u8 *start; 749 u8 *start;
750 u8 *fault; 750 u8 *fault;
751 u8 *end; 751 u8 *end;
752 int length; 752 int length;
753 int remainder; 753 int remainder;
754 754
755 if (!(s->flags & SLAB_POISON)) 755 if (!(s->flags & SLAB_POISON))
756 return 1; 756 return 1;
757 757
758 start = page_address(page); 758 start = page_address(page);
759 length = (PAGE_SIZE << compound_order(page)) - s->reserved; 759 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
760 end = start + length; 760 end = start + length;
761 remainder = length % s->size; 761 remainder = length % s->size;
762 if (!remainder) 762 if (!remainder)
763 return 1; 763 return 1;
764 764
765 fault = memchr_inv(end - remainder, POISON_INUSE, remainder); 765 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
766 if (!fault) 766 if (!fault)
767 return 1; 767 return 1;
768 while (end > fault && end[-1] == POISON_INUSE) 768 while (end > fault && end[-1] == POISON_INUSE)
769 end--; 769 end--;
770 770
771 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); 771 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
772 print_section("Padding ", end - remainder, remainder); 772 print_section("Padding ", end - remainder, remainder);
773 773
774 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); 774 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
775 return 0; 775 return 0;
776 } 776 }
777 777
778 static int check_object(struct kmem_cache *s, struct page *page, 778 static int check_object(struct kmem_cache *s, struct page *page,
779 void *object, u8 val) 779 void *object, u8 val)
780 { 780 {
781 u8 *p = object; 781 u8 *p = object;
782 u8 *endobject = object + s->objsize; 782 u8 *endobject = object + s->objsize;
783 783
784 if (s->flags & SLAB_RED_ZONE) { 784 if (s->flags & SLAB_RED_ZONE) {
785 if (!check_bytes_and_report(s, page, object, "Redzone", 785 if (!check_bytes_and_report(s, page, object, "Redzone",
786 endobject, val, s->inuse - s->objsize)) 786 endobject, val, s->inuse - s->objsize))
787 return 0; 787 return 0;
788 } else { 788 } else {
789 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 789 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
790 check_bytes_and_report(s, page, p, "Alignment padding", 790 check_bytes_and_report(s, page, p, "Alignment padding",
791 endobject, POISON_INUSE, s->inuse - s->objsize); 791 endobject, POISON_INUSE, s->inuse - s->objsize);
792 } 792 }
793 } 793 }
794 794
795 if (s->flags & SLAB_POISON) { 795 if (s->flags & SLAB_POISON) {
796 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && 796 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
797 (!check_bytes_and_report(s, page, p, "Poison", p, 797 (!check_bytes_and_report(s, page, p, "Poison", p,
798 POISON_FREE, s->objsize - 1) || 798 POISON_FREE, s->objsize - 1) ||
799 !check_bytes_and_report(s, page, p, "Poison", 799 !check_bytes_and_report(s, page, p, "Poison",
800 p + s->objsize - 1, POISON_END, 1))) 800 p + s->objsize - 1, POISON_END, 1)))
801 return 0; 801 return 0;
802 /* 802 /*
803 * check_pad_bytes cleans up on its own. 803 * check_pad_bytes cleans up on its own.
804 */ 804 */
805 check_pad_bytes(s, page, p); 805 check_pad_bytes(s, page, p);
806 } 806 }
807 807
808 if (!s->offset && val == SLUB_RED_ACTIVE) 808 if (!s->offset && val == SLUB_RED_ACTIVE)
809 /* 809 /*
810 * Object and freepointer overlap. Cannot check 810 * Object and freepointer overlap. Cannot check
811 * freepointer while object is allocated. 811 * freepointer while object is allocated.
812 */ 812 */
813 return 1; 813 return 1;
814 814
815 /* Check free pointer validity */ 815 /* Check free pointer validity */
816 if (!check_valid_pointer(s, page, get_freepointer(s, p))) { 816 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
817 object_err(s, page, p, "Freepointer corrupt"); 817 object_err(s, page, p, "Freepointer corrupt");
818 /* 818 /*
819 * No choice but to zap it and thus lose the remainder 819 * No choice but to zap it and thus lose the remainder
820 * of the free objects in this slab. May cause 820 * of the free objects in this slab. May cause
821 * another error because the object count is now wrong. 821 * another error because the object count is now wrong.
822 */ 822 */
823 set_freepointer(s, p, NULL); 823 set_freepointer(s, p, NULL);
824 return 0; 824 return 0;
825 } 825 }
826 return 1; 826 return 1;
827 } 827 }
828 828
829 static int check_slab(struct kmem_cache *s, struct page *page) 829 static int check_slab(struct kmem_cache *s, struct page *page)
830 { 830 {
831 int maxobj; 831 int maxobj;
832 832
833 VM_BUG_ON(!irqs_disabled()); 833 VM_BUG_ON(!irqs_disabled());
834 834
835 if (!PageSlab(page)) { 835 if (!PageSlab(page)) {
836 slab_err(s, page, "Not a valid slab page"); 836 slab_err(s, page, "Not a valid slab page");
837 return 0; 837 return 0;
838 } 838 }
839 839
840 maxobj = order_objects(compound_order(page), s->size, s->reserved); 840 maxobj = order_objects(compound_order(page), s->size, s->reserved);
841 if (page->objects > maxobj) { 841 if (page->objects > maxobj) {
842 slab_err(s, page, "objects %u > max %u", 842 slab_err(s, page, "objects %u > max %u",
843 s->name, page->objects, maxobj); 843 s->name, page->objects, maxobj);
844 return 0; 844 return 0;
845 } 845 }
846 if (page->inuse > page->objects) { 846 if (page->inuse > page->objects) {
847 slab_err(s, page, "inuse %u > max %u", 847 slab_err(s, page, "inuse %u > max %u",
848 s->name, page->inuse, page->objects); 848 s->name, page->inuse, page->objects);
849 return 0; 849 return 0;
850 } 850 }
851 /* Slab_pad_check fixes things up after itself */ 851 /* Slab_pad_check fixes things up after itself */
852 slab_pad_check(s, page); 852 slab_pad_check(s, page);
853 return 1; 853 return 1;
854 } 854 }
855 855
856 /* 856 /*
857 * Determine if a certain object on a page is on the freelist. Must hold the 857 * Determine if a certain object on a page is on the freelist. Must hold the
858 * slab lock to guarantee that the chains are in a consistent state. 858 * slab lock to guarantee that the chains are in a consistent state.
859 */ 859 */
860 static int on_freelist(struct kmem_cache *s, struct page *page, void *search) 860 static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
861 { 861 {
862 int nr = 0; 862 int nr = 0;
863 void *fp; 863 void *fp;
864 void *object = NULL; 864 void *object = NULL;
865 unsigned long max_objects; 865 unsigned long max_objects;
866 866
867 fp = page->freelist; 867 fp = page->freelist;
868 while (fp && nr <= page->objects) { 868 while (fp && nr <= page->objects) {
869 if (fp == search) 869 if (fp == search)
870 return 1; 870 return 1;
871 if (!check_valid_pointer(s, page, fp)) { 871 if (!check_valid_pointer(s, page, fp)) {
872 if (object) { 872 if (object) {
873 object_err(s, page, object, 873 object_err(s, page, object,
874 "Freechain corrupt"); 874 "Freechain corrupt");
875 set_freepointer(s, object, NULL); 875 set_freepointer(s, object, NULL);
876 break; 876 break;
877 } else { 877 } else {
878 slab_err(s, page, "Freepointer corrupt"); 878 slab_err(s, page, "Freepointer corrupt");
879 page->freelist = NULL; 879 page->freelist = NULL;
880 page->inuse = page->objects; 880 page->inuse = page->objects;
881 slab_fix(s, "Freelist cleared"); 881 slab_fix(s, "Freelist cleared");
882 return 0; 882 return 0;
883 } 883 }
884 break; 884 break;
885 } 885 }
886 object = fp; 886 object = fp;
887 fp = get_freepointer(s, object); 887 fp = get_freepointer(s, object);
888 nr++; 888 nr++;
889 } 889 }
890 890
891 max_objects = order_objects(compound_order(page), s->size, s->reserved); 891 max_objects = order_objects(compound_order(page), s->size, s->reserved);
892 if (max_objects > MAX_OBJS_PER_PAGE) 892 if (max_objects > MAX_OBJS_PER_PAGE)
893 max_objects = MAX_OBJS_PER_PAGE; 893 max_objects = MAX_OBJS_PER_PAGE;
894 894
895 if (page->objects != max_objects) { 895 if (page->objects != max_objects) {
896 slab_err(s, page, "Wrong number of objects. Found %d but " 896 slab_err(s, page, "Wrong number of objects. Found %d but "
897 "should be %d", page->objects, max_objects); 897 "should be %d", page->objects, max_objects);
898 page->objects = max_objects; 898 page->objects = max_objects;
899 slab_fix(s, "Number of objects adjusted."); 899 slab_fix(s, "Number of objects adjusted.");
900 } 900 }
901 if (page->inuse != page->objects - nr) { 901 if (page->inuse != page->objects - nr) {
902 slab_err(s, page, "Wrong object count. Counter is %d but " 902 slab_err(s, page, "Wrong object count. Counter is %d but "
903 "counted were %d", page->inuse, page->objects - nr); 903 "counted were %d", page->inuse, page->objects - nr);
904 page->inuse = page->objects - nr; 904 page->inuse = page->objects - nr;
905 slab_fix(s, "Object count adjusted."); 905 slab_fix(s, "Object count adjusted.");
906 } 906 }
907 return search == NULL; 907 return search == NULL;
908 } 908 }
909 909
910 static void trace(struct kmem_cache *s, struct page *page, void *object, 910 static void trace(struct kmem_cache *s, struct page *page, void *object,
911 int alloc) 911 int alloc)
912 { 912 {
913 if (s->flags & SLAB_TRACE) { 913 if (s->flags & SLAB_TRACE) {
914 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", 914 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
915 s->name, 915 s->name,
916 alloc ? "alloc" : "free", 916 alloc ? "alloc" : "free",
917 object, page->inuse, 917 object, page->inuse,
918 page->freelist); 918 page->freelist);
919 919
920 if (!alloc) 920 if (!alloc)
921 print_section("Object ", (void *)object, s->objsize); 921 print_section("Object ", (void *)object, s->objsize);
922 922
923 dump_stack(); 923 dump_stack();
924 } 924 }
925 } 925 }
926 926
927 /* 927 /*
928 * Hooks for other subsystems that check memory allocations. In a typical 928 * Hooks for other subsystems that check memory allocations. In a typical
929 * production configuration these hooks all should produce no code at all. 929 * production configuration these hooks all should produce no code at all.
930 */ 930 */
931 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 931 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
932 { 932 {
933 flags &= gfp_allowed_mask; 933 flags &= gfp_allowed_mask;
934 lockdep_trace_alloc(flags); 934 lockdep_trace_alloc(flags);
935 might_sleep_if(flags & __GFP_WAIT); 935 might_sleep_if(flags & __GFP_WAIT);
936 936
937 return should_failslab(s->objsize, flags, s->flags); 937 return should_failslab(s->objsize, flags, s->flags);
938 } 938 }
939 939
940 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 940 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
941 { 941 {
942 flags &= gfp_allowed_mask; 942 flags &= gfp_allowed_mask;
943 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 943 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
944 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 944 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
945 } 945 }
946 946
947 static inline void slab_free_hook(struct kmem_cache *s, void *x) 947 static inline void slab_free_hook(struct kmem_cache *s, void *x)
948 { 948 {
949 kmemleak_free_recursive(x, s->flags); 949 kmemleak_free_recursive(x, s->flags);
950 950
951 /* 951 /*
952 * Trouble is that we may no longer disable interupts in the fast path 952 * Trouble is that we may no longer disable interupts in the fast path
953 * So in order to make the debug calls that expect irqs to be 953 * So in order to make the debug calls that expect irqs to be
954 * disabled we need to disable interrupts temporarily. 954 * disabled we need to disable interrupts temporarily.
955 */ 955 */
956 #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) 956 #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
957 { 957 {
958 unsigned long flags; 958 unsigned long flags;
959 959
960 local_irq_save(flags); 960 local_irq_save(flags);
961 kmemcheck_slab_free(s, x, s->objsize); 961 kmemcheck_slab_free(s, x, s->objsize);
962 debug_check_no_locks_freed(x, s->objsize); 962 debug_check_no_locks_freed(x, s->objsize);
963 local_irq_restore(flags); 963 local_irq_restore(flags);
964 } 964 }
965 #endif 965 #endif
966 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 966 if (!(s->flags & SLAB_DEBUG_OBJECTS))
967 debug_check_no_obj_freed(x, s->objsize); 967 debug_check_no_obj_freed(x, s->objsize);
968 } 968 }
969 969
970 /* 970 /*
971 * Tracking of fully allocated slabs for debugging purposes. 971 * Tracking of fully allocated slabs for debugging purposes.
972 * 972 *
973 * list_lock must be held. 973 * list_lock must be held.
974 */ 974 */
975 static void add_full(struct kmem_cache *s, 975 static void add_full(struct kmem_cache *s,
976 struct kmem_cache_node *n, struct page *page) 976 struct kmem_cache_node *n, struct page *page)
977 { 977 {
978 if (!(s->flags & SLAB_STORE_USER)) 978 if (!(s->flags & SLAB_STORE_USER))
979 return; 979 return;
980 980
981 list_add(&page->lru, &n->full); 981 list_add(&page->lru, &n->full);
982 } 982 }
983 983
984 /* 984 /*
985 * list_lock must be held. 985 * list_lock must be held.
986 */ 986 */
987 static void remove_full(struct kmem_cache *s, struct page *page) 987 static void remove_full(struct kmem_cache *s, struct page *page)
988 { 988 {
989 if (!(s->flags & SLAB_STORE_USER)) 989 if (!(s->flags & SLAB_STORE_USER))
990 return; 990 return;
991 991
992 list_del(&page->lru); 992 list_del(&page->lru);
993 } 993 }
994 994
995 /* Tracking of the number of slabs for debugging purposes */ 995 /* Tracking of the number of slabs for debugging purposes */
996 static inline unsigned long slabs_node(struct kmem_cache *s, int node) 996 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
997 { 997 {
998 struct kmem_cache_node *n = get_node(s, node); 998 struct kmem_cache_node *n = get_node(s, node);
999 999
1000 return atomic_long_read(&n->nr_slabs); 1000 return atomic_long_read(&n->nr_slabs);
1001 } 1001 }
1002 1002
1003 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) 1003 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1004 { 1004 {
1005 return atomic_long_read(&n->nr_slabs); 1005 return atomic_long_read(&n->nr_slabs);
1006 } 1006 }
1007 1007
1008 static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) 1008 static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1009 { 1009 {
1010 struct kmem_cache_node *n = get_node(s, node); 1010 struct kmem_cache_node *n = get_node(s, node);
1011 1011
1012 /* 1012 /*
1013 * May be called early in order to allocate a slab for the 1013 * May be called early in order to allocate a slab for the
1014 * kmem_cache_node structure. Solve the chicken-egg 1014 * kmem_cache_node structure. Solve the chicken-egg
1015 * dilemma by deferring the increment of the count during 1015 * dilemma by deferring the increment of the count during
1016 * bootstrap (see early_kmem_cache_node_alloc). 1016 * bootstrap (see early_kmem_cache_node_alloc).
1017 */ 1017 */
1018 if (n) { 1018 if (n) {
1019 atomic_long_inc(&n->nr_slabs); 1019 atomic_long_inc(&n->nr_slabs);
1020 atomic_long_add(objects, &n->total_objects); 1020 atomic_long_add(objects, &n->total_objects);
1021 } 1021 }
1022 } 1022 }
1023 static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) 1023 static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1024 { 1024 {
1025 struct kmem_cache_node *n = get_node(s, node); 1025 struct kmem_cache_node *n = get_node(s, node);
1026 1026
1027 atomic_long_dec(&n->nr_slabs); 1027 atomic_long_dec(&n->nr_slabs);
1028 atomic_long_sub(objects, &n->total_objects); 1028 atomic_long_sub(objects, &n->total_objects);
1029 } 1029 }
1030 1030
1031 /* Object debug checks for alloc/free paths */ 1031 /* Object debug checks for alloc/free paths */
1032 static void setup_object_debug(struct kmem_cache *s, struct page *page, 1032 static void setup_object_debug(struct kmem_cache *s, struct page *page,
1033 void *object) 1033 void *object)
1034 { 1034 {
1035 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) 1035 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1036 return; 1036 return;
1037 1037
1038 init_object(s, object, SLUB_RED_INACTIVE); 1038 init_object(s, object, SLUB_RED_INACTIVE);
1039 init_tracking(s, object); 1039 init_tracking(s, object);
1040 } 1040 }
1041 1041
1042 static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page, 1042 static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1043 void *object, unsigned long addr) 1043 void *object, unsigned long addr)
1044 { 1044 {
1045 if (!check_slab(s, page)) 1045 if (!check_slab(s, page))
1046 goto bad; 1046 goto bad;
1047 1047
1048 if (!check_valid_pointer(s, page, object)) { 1048 if (!check_valid_pointer(s, page, object)) {
1049 object_err(s, page, object, "Freelist Pointer check fails"); 1049 object_err(s, page, object, "Freelist Pointer check fails");
1050 goto bad; 1050 goto bad;
1051 } 1051 }
1052 1052
1053 if (!check_object(s, page, object, SLUB_RED_INACTIVE)) 1053 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1054 goto bad; 1054 goto bad;
1055 1055
1056 /* Success perform special debug activities for allocs */ 1056 /* Success perform special debug activities for allocs */
1057 if (s->flags & SLAB_STORE_USER) 1057 if (s->flags & SLAB_STORE_USER)
1058 set_track(s, object, TRACK_ALLOC, addr); 1058 set_track(s, object, TRACK_ALLOC, addr);
1059 trace(s, page, object, 1); 1059 trace(s, page, object, 1);
1060 init_object(s, object, SLUB_RED_ACTIVE); 1060 init_object(s, object, SLUB_RED_ACTIVE);
1061 return 1; 1061 return 1;
1062 1062
1063 bad: 1063 bad:
1064 if (PageSlab(page)) { 1064 if (PageSlab(page)) {
1065 /* 1065 /*
1066 * If this is a slab page then lets do the best we can 1066 * If this is a slab page then lets do the best we can
1067 * to avoid issues in the future. Marking all objects 1067 * to avoid issues in the future. Marking all objects
1068 * as used avoids touching the remaining objects. 1068 * as used avoids touching the remaining objects.
1069 */ 1069 */
1070 slab_fix(s, "Marking all objects used"); 1070 slab_fix(s, "Marking all objects used");
1071 page->inuse = page->objects; 1071 page->inuse = page->objects;
1072 page->freelist = NULL; 1072 page->freelist = NULL;
1073 } 1073 }
1074 return 0; 1074 return 0;
1075 } 1075 }
1076 1076
1077 static noinline int free_debug_processing(struct kmem_cache *s, 1077 static noinline int free_debug_processing(struct kmem_cache *s,
1078 struct page *page, void *object, unsigned long addr) 1078 struct page *page, void *object, unsigned long addr)
1079 { 1079 {
1080 unsigned long flags; 1080 unsigned long flags;
1081 int rc = 0; 1081 int rc = 0;
1082 1082
1083 local_irq_save(flags); 1083 local_irq_save(flags);
1084 slab_lock(page); 1084 slab_lock(page);
1085 1085
1086 if (!check_slab(s, page)) 1086 if (!check_slab(s, page))
1087 goto fail; 1087 goto fail;
1088 1088
1089 if (!check_valid_pointer(s, page, object)) { 1089 if (!check_valid_pointer(s, page, object)) {
1090 slab_err(s, page, "Invalid object pointer 0x%p", object); 1090 slab_err(s, page, "Invalid object pointer 0x%p", object);
1091 goto fail; 1091 goto fail;
1092 } 1092 }
1093 1093
1094 if (on_freelist(s, page, object)) { 1094 if (on_freelist(s, page, object)) {
1095 object_err(s, page, object, "Object already free"); 1095 object_err(s, page, object, "Object already free");
1096 goto fail; 1096 goto fail;
1097 } 1097 }
1098 1098
1099 if (!check_object(s, page, object, SLUB_RED_ACTIVE)) 1099 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1100 goto out; 1100 goto out;
1101 1101
1102 if (unlikely(s != page->slab)) { 1102 if (unlikely(s != page->slab)) {
1103 if (!PageSlab(page)) { 1103 if (!PageSlab(page)) {
1104 slab_err(s, page, "Attempt to free object(0x%p) " 1104 slab_err(s, page, "Attempt to free object(0x%p) "
1105 "outside of slab", object); 1105 "outside of slab", object);
1106 } else if (!page->slab) { 1106 } else if (!page->slab) {
1107 printk(KERN_ERR 1107 printk(KERN_ERR
1108 "SLUB <none>: no slab for object 0x%p.\n", 1108 "SLUB <none>: no slab for object 0x%p.\n",
1109 object); 1109 object);
1110 dump_stack(); 1110 dump_stack();
1111 } else 1111 } else
1112 object_err(s, page, object, 1112 object_err(s, page, object,
1113 "page slab pointer corrupt."); 1113 "page slab pointer corrupt.");
1114 goto fail; 1114 goto fail;
1115 } 1115 }
1116 1116
1117 if (s->flags & SLAB_STORE_USER) 1117 if (s->flags & SLAB_STORE_USER)
1118 set_track(s, object, TRACK_FREE, addr); 1118 set_track(s, object, TRACK_FREE, addr);
1119 trace(s, page, object, 0); 1119 trace(s, page, object, 0);
1120 init_object(s, object, SLUB_RED_INACTIVE); 1120 init_object(s, object, SLUB_RED_INACTIVE);
1121 rc = 1; 1121 rc = 1;
1122 out: 1122 out:
1123 slab_unlock(page); 1123 slab_unlock(page);
1124 local_irq_restore(flags); 1124 local_irq_restore(flags);
1125 return rc; 1125 return rc;
1126 1126
1127 fail: 1127 fail:
1128 slab_fix(s, "Object at 0x%p not freed", object); 1128 slab_fix(s, "Object at 0x%p not freed", object);
1129 goto out; 1129 goto out;
1130 } 1130 }
1131 1131
1132 static int __init setup_slub_debug(char *str) 1132 static int __init setup_slub_debug(char *str)
1133 { 1133 {
1134 slub_debug = DEBUG_DEFAULT_FLAGS; 1134 slub_debug = DEBUG_DEFAULT_FLAGS;
1135 if (*str++ != '=' || !*str) 1135 if (*str++ != '=' || !*str)
1136 /* 1136 /*
1137 * No options specified. Switch on full debugging. 1137 * No options specified. Switch on full debugging.
1138 */ 1138 */
1139 goto out; 1139 goto out;
1140 1140
1141 if (*str == ',') 1141 if (*str == ',')
1142 /* 1142 /*
1143 * No options but restriction on slabs. This means full 1143 * No options but restriction on slabs. This means full
1144 * debugging for slabs matching a pattern. 1144 * debugging for slabs matching a pattern.
1145 */ 1145 */
1146 goto check_slabs; 1146 goto check_slabs;
1147 1147
1148 if (tolower(*str) == 'o') { 1148 if (tolower(*str) == 'o') {
1149 /* 1149 /*
1150 * Avoid enabling debugging on caches if its minimum order 1150 * Avoid enabling debugging on caches if its minimum order
1151 * would increase as a result. 1151 * would increase as a result.
1152 */ 1152 */
1153 disable_higher_order_debug = 1; 1153 disable_higher_order_debug = 1;
1154 goto out; 1154 goto out;
1155 } 1155 }
1156 1156
1157 slub_debug = 0; 1157 slub_debug = 0;
1158 if (*str == '-') 1158 if (*str == '-')
1159 /* 1159 /*
1160 * Switch off all debugging measures. 1160 * Switch off all debugging measures.
1161 */ 1161 */
1162 goto out; 1162 goto out;
1163 1163
1164 /* 1164 /*
1165 * Determine which debug features should be switched on 1165 * Determine which debug features should be switched on
1166 */ 1166 */
1167 for (; *str && *str != ','; str++) { 1167 for (; *str && *str != ','; str++) {
1168 switch (tolower(*str)) { 1168 switch (tolower(*str)) {
1169 case 'f': 1169 case 'f':
1170 slub_debug |= SLAB_DEBUG_FREE; 1170 slub_debug |= SLAB_DEBUG_FREE;
1171 break; 1171 break;
1172 case 'z': 1172 case 'z':
1173 slub_debug |= SLAB_RED_ZONE; 1173 slub_debug |= SLAB_RED_ZONE;
1174 break; 1174 break;
1175 case 'p': 1175 case 'p':
1176 slub_debug |= SLAB_POISON; 1176 slub_debug |= SLAB_POISON;
1177 break; 1177 break;
1178 case 'u': 1178 case 'u':
1179 slub_debug |= SLAB_STORE_USER; 1179 slub_debug |= SLAB_STORE_USER;
1180 break; 1180 break;
1181 case 't': 1181 case 't':
1182 slub_debug |= SLAB_TRACE; 1182 slub_debug |= SLAB_TRACE;
1183 break; 1183 break;
1184 case 'a': 1184 case 'a':
1185 slub_debug |= SLAB_FAILSLAB; 1185 slub_debug |= SLAB_FAILSLAB;
1186 break; 1186 break;
1187 default: 1187 default:
1188 printk(KERN_ERR "slub_debug option '%c' " 1188 printk(KERN_ERR "slub_debug option '%c' "
1189 "unknown. skipped\n", *str); 1189 "unknown. skipped\n", *str);
1190 } 1190 }
1191 } 1191 }
1192 1192
1193 check_slabs: 1193 check_slabs:
1194 if (*str == ',') 1194 if (*str == ',')
1195 slub_debug_slabs = str + 1; 1195 slub_debug_slabs = str + 1;
1196 out: 1196 out:
1197 return 1; 1197 return 1;
1198 } 1198 }
1199 1199
1200 __setup("slub_debug", setup_slub_debug); 1200 __setup("slub_debug", setup_slub_debug);
1201 1201
1202 static unsigned long kmem_cache_flags(unsigned long objsize, 1202 static unsigned long kmem_cache_flags(unsigned long objsize,
1203 unsigned long flags, const char *name, 1203 unsigned long flags, const char *name,
1204 void (*ctor)(void *)) 1204 void (*ctor)(void *))
1205 { 1205 {
1206 /* 1206 /*
1207 * Enable debugging if selected on the kernel commandline. 1207 * Enable debugging if selected on the kernel commandline.
1208 */ 1208 */
1209 if (slub_debug && (!slub_debug_slabs || 1209 if (slub_debug && (!slub_debug_slabs ||
1210 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) 1210 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1211 flags |= slub_debug; 1211 flags |= slub_debug;
1212 1212
1213 return flags; 1213 return flags;
1214 } 1214 }
1215 #else 1215 #else
1216 static inline void setup_object_debug(struct kmem_cache *s, 1216 static inline void setup_object_debug(struct kmem_cache *s,
1217 struct page *page, void *object) {} 1217 struct page *page, void *object) {}
1218 1218
1219 static inline int alloc_debug_processing(struct kmem_cache *s, 1219 static inline int alloc_debug_processing(struct kmem_cache *s,
1220 struct page *page, void *object, unsigned long addr) { return 0; } 1220 struct page *page, void *object, unsigned long addr) { return 0; }
1221 1221
1222 static inline int free_debug_processing(struct kmem_cache *s, 1222 static inline int free_debug_processing(struct kmem_cache *s,
1223 struct page *page, void *object, unsigned long addr) { return 0; } 1223 struct page *page, void *object, unsigned long addr) { return 0; }
1224 1224
1225 static inline int slab_pad_check(struct kmem_cache *s, struct page *page) 1225 static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1226 { return 1; } 1226 { return 1; }
1227 static inline int check_object(struct kmem_cache *s, struct page *page, 1227 static inline int check_object(struct kmem_cache *s, struct page *page,
1228 void *object, u8 val) { return 1; } 1228 void *object, u8 val) { return 1; }
1229 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, 1229 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1230 struct page *page) {} 1230 struct page *page) {}
1231 static inline void remove_full(struct kmem_cache *s, struct page *page) {} 1231 static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1232 static inline unsigned long kmem_cache_flags(unsigned long objsize, 1232 static inline unsigned long kmem_cache_flags(unsigned long objsize,
1233 unsigned long flags, const char *name, 1233 unsigned long flags, const char *name,
1234 void (*ctor)(void *)) 1234 void (*ctor)(void *))
1235 { 1235 {
1236 return flags; 1236 return flags;
1237 } 1237 }
1238 #define slub_debug 0 1238 #define slub_debug 0
1239 1239
1240 #define disable_higher_order_debug 0 1240 #define disable_higher_order_debug 0
1241 1241
1242 static inline unsigned long slabs_node(struct kmem_cache *s, int node) 1242 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1243 { return 0; } 1243 { return 0; }
1244 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) 1244 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1245 { return 0; } 1245 { return 0; }
1246 static inline void inc_slabs_node(struct kmem_cache *s, int node, 1246 static inline void inc_slabs_node(struct kmem_cache *s, int node,
1247 int objects) {} 1247 int objects) {}
1248 static inline void dec_slabs_node(struct kmem_cache *s, int node, 1248 static inline void dec_slabs_node(struct kmem_cache *s, int node,
1249 int objects) {} 1249 int objects) {}
1250 1250
1251 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 1251 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1252 { return 0; } 1252 { return 0; }
1253 1253
1254 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, 1254 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1255 void *object) {} 1255 void *object) {}
1256 1256
1257 static inline void slab_free_hook(struct kmem_cache *s, void *x) {} 1257 static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1258 1258
1259 #endif /* CONFIG_SLUB_DEBUG */ 1259 #endif /* CONFIG_SLUB_DEBUG */
1260 1260
1261 /* 1261 /*
1262 * Slab allocation and freeing 1262 * Slab allocation and freeing
1263 */ 1263 */
1264 static inline struct page *alloc_slab_page(gfp_t flags, int node, 1264 static inline struct page *alloc_slab_page(gfp_t flags, int node,
1265 struct kmem_cache_order_objects oo) 1265 struct kmem_cache_order_objects oo)
1266 { 1266 {
1267 int order = oo_order(oo); 1267 int order = oo_order(oo);
1268 1268
1269 flags |= __GFP_NOTRACK; 1269 flags |= __GFP_NOTRACK;
1270 1270
1271 if (node == NUMA_NO_NODE) 1271 if (node == NUMA_NO_NODE)
1272 return alloc_pages(flags, order); 1272 return alloc_pages(flags, order);
1273 else 1273 else
1274 return alloc_pages_exact_node(node, flags, order); 1274 return alloc_pages_exact_node(node, flags, order);
1275 } 1275 }
1276 1276
1277 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1277 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1278 { 1278 {
1279 struct page *page; 1279 struct page *page;
1280 struct kmem_cache_order_objects oo = s->oo; 1280 struct kmem_cache_order_objects oo = s->oo;
1281 gfp_t alloc_gfp; 1281 gfp_t alloc_gfp;
1282 1282
1283 flags &= gfp_allowed_mask; 1283 flags &= gfp_allowed_mask;
1284 1284
1285 if (flags & __GFP_WAIT) 1285 if (flags & __GFP_WAIT)
1286 local_irq_enable(); 1286 local_irq_enable();
1287 1287
1288 flags |= s->allocflags; 1288 flags |= s->allocflags;
1289 1289
1290 /* 1290 /*
1291 * Let the initial higher-order allocation fail under memory pressure 1291 * Let the initial higher-order allocation fail under memory pressure
1292 * so we fall-back to the minimum order allocation. 1292 * so we fall-back to the minimum order allocation.
1293 */ 1293 */
1294 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; 1294 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1295 1295
1296 page = alloc_slab_page(alloc_gfp, node, oo); 1296 page = alloc_slab_page(alloc_gfp, node, oo);
1297 if (unlikely(!page)) { 1297 if (unlikely(!page)) {
1298 oo = s->min; 1298 oo = s->min;
1299 /* 1299 /*
1300 * Allocation may have failed due to fragmentation. 1300 * Allocation may have failed due to fragmentation.
1301 * Try a lower order alloc if possible 1301 * Try a lower order alloc if possible
1302 */ 1302 */
1303 page = alloc_slab_page(flags, node, oo); 1303 page = alloc_slab_page(flags, node, oo);
1304 1304
1305 if (page) 1305 if (page)
1306 stat(s, ORDER_FALLBACK); 1306 stat(s, ORDER_FALLBACK);
1307 } 1307 }
1308 1308
1309 if (flags & __GFP_WAIT) 1309 if (flags & __GFP_WAIT)
1310 local_irq_disable(); 1310 local_irq_disable();
1311 1311
1312 if (!page) 1312 if (!page)
1313 return NULL; 1313 return NULL;
1314 1314
1315 if (kmemcheck_enabled 1315 if (kmemcheck_enabled
1316 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { 1316 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1317 int pages = 1 << oo_order(oo); 1317 int pages = 1 << oo_order(oo);
1318 1318
1319 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); 1319 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1320 1320
1321 /* 1321 /*
1322 * Objects from caches that have a constructor don't get 1322 * Objects from caches that have a constructor don't get
1323 * cleared when they're allocated, so we need to do it here. 1323 * cleared when they're allocated, so we need to do it here.
1324 */ 1324 */
1325 if (s->ctor) 1325 if (s->ctor)
1326 kmemcheck_mark_uninitialized_pages(page, pages); 1326 kmemcheck_mark_uninitialized_pages(page, pages);
1327 else 1327 else
1328 kmemcheck_mark_unallocated_pages(page, pages); 1328 kmemcheck_mark_unallocated_pages(page, pages);
1329 } 1329 }
1330 1330
1331 page->objects = oo_objects(oo); 1331 page->objects = oo_objects(oo);
1332 mod_zone_page_state(page_zone(page), 1332 mod_zone_page_state(page_zone(page),
1333 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1333 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1334 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1334 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1335 1 << oo_order(oo)); 1335 1 << oo_order(oo));
1336 1336
1337 return page; 1337 return page;
1338 } 1338 }
1339 1339
1340 static void setup_object(struct kmem_cache *s, struct page *page, 1340 static void setup_object(struct kmem_cache *s, struct page *page,
1341 void *object) 1341 void *object)
1342 { 1342 {
1343 setup_object_debug(s, page, object); 1343 setup_object_debug(s, page, object);
1344 if (unlikely(s->ctor)) 1344 if (unlikely(s->ctor))
1345 s->ctor(object); 1345 s->ctor(object);
1346 } 1346 }
1347 1347
1348 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) 1348 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1349 { 1349 {
1350 struct page *page; 1350 struct page *page;
1351 void *start; 1351 void *start;
1352 void *last; 1352 void *last;
1353 void *p; 1353 void *p;
1354 1354
1355 BUG_ON(flags & GFP_SLAB_BUG_MASK); 1355 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1356 1356
1357 page = allocate_slab(s, 1357 page = allocate_slab(s,
1358 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); 1358 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1359 if (!page) 1359 if (!page)
1360 goto out; 1360 goto out;
1361 1361
1362 inc_slabs_node(s, page_to_nid(page), page->objects); 1362 inc_slabs_node(s, page_to_nid(page), page->objects);
1363 page->slab = s; 1363 page->slab = s;
1364 page->flags |= 1 << PG_slab; 1364 page->flags |= 1 << PG_slab;
1365 1365
1366 start = page_address(page); 1366 start = page_address(page);
1367 1367
1368 if (unlikely(s->flags & SLAB_POISON)) 1368 if (unlikely(s->flags & SLAB_POISON))
1369 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); 1369 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1370 1370
1371 last = start; 1371 last = start;
1372 for_each_object(p, s, start, page->objects) { 1372 for_each_object(p, s, start, page->objects) {
1373 setup_object(s, page, last); 1373 setup_object(s, page, last);
1374 set_freepointer(s, last, p); 1374 set_freepointer(s, last, p);
1375 last = p; 1375 last = p;
1376 } 1376 }
1377 setup_object(s, page, last); 1377 setup_object(s, page, last);
1378 set_freepointer(s, last, NULL); 1378 set_freepointer(s, last, NULL);
1379 1379
1380 page->freelist = start; 1380 page->freelist = start;
1381 page->inuse = page->objects; 1381 page->inuse = page->objects;
1382 page->frozen = 1; 1382 page->frozen = 1;
1383 out: 1383 out:
1384 return page; 1384 return page;
1385 } 1385 }
1386 1386
1387 static void __free_slab(struct kmem_cache *s, struct page *page) 1387 static void __free_slab(struct kmem_cache *s, struct page *page)
1388 { 1388 {
1389 int order = compound_order(page); 1389 int order = compound_order(page);
1390 int pages = 1 << order; 1390 int pages = 1 << order;
1391 1391
1392 if (kmem_cache_debug(s)) { 1392 if (kmem_cache_debug(s)) {
1393 void *p; 1393 void *p;
1394 1394
1395 slab_pad_check(s, page); 1395 slab_pad_check(s, page);
1396 for_each_object(p, s, page_address(page), 1396 for_each_object(p, s, page_address(page),
1397 page->objects) 1397 page->objects)
1398 check_object(s, page, p, SLUB_RED_INACTIVE); 1398 check_object(s, page, p, SLUB_RED_INACTIVE);
1399 } 1399 }
1400 1400
1401 kmemcheck_free_shadow(page, compound_order(page)); 1401 kmemcheck_free_shadow(page, compound_order(page));
1402 1402
1403 mod_zone_page_state(page_zone(page), 1403 mod_zone_page_state(page_zone(page),
1404 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1404 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1405 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1405 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1406 -pages); 1406 -pages);
1407 1407
1408 __ClearPageSlab(page); 1408 __ClearPageSlab(page);
1409 reset_page_mapcount(page); 1409 reset_page_mapcount(page);
1410 if (current->reclaim_state) 1410 if (current->reclaim_state)
1411 current->reclaim_state->reclaimed_slab += pages; 1411 current->reclaim_state->reclaimed_slab += pages;
1412 __free_pages(page, order); 1412 __free_pages(page, order);
1413 } 1413 }
1414 1414
1415 #define need_reserve_slab_rcu \ 1415 #define need_reserve_slab_rcu \
1416 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) 1416 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1417 1417
1418 static void rcu_free_slab(struct rcu_head *h) 1418 static void rcu_free_slab(struct rcu_head *h)
1419 { 1419 {
1420 struct page *page; 1420 struct page *page;
1421 1421
1422 if (need_reserve_slab_rcu) 1422 if (need_reserve_slab_rcu)
1423 page = virt_to_head_page(h); 1423 page = virt_to_head_page(h);
1424 else 1424 else
1425 page = container_of((struct list_head *)h, struct page, lru); 1425 page = container_of((struct list_head *)h, struct page, lru);
1426 1426
1427 __free_slab(page->slab, page); 1427 __free_slab(page->slab, page);
1428 } 1428 }
1429 1429
1430 static void free_slab(struct kmem_cache *s, struct page *page) 1430 static void free_slab(struct kmem_cache *s, struct page *page)
1431 { 1431 {
1432 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1432 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1433 struct rcu_head *head; 1433 struct rcu_head *head;
1434 1434
1435 if (need_reserve_slab_rcu) { 1435 if (need_reserve_slab_rcu) {
1436 int order = compound_order(page); 1436 int order = compound_order(page);
1437 int offset = (PAGE_SIZE << order) - s->reserved; 1437 int offset = (PAGE_SIZE << order) - s->reserved;
1438 1438
1439 VM_BUG_ON(s->reserved != sizeof(*head)); 1439 VM_BUG_ON(s->reserved != sizeof(*head));
1440 head = page_address(page) + offset; 1440 head = page_address(page) + offset;
1441 } else { 1441 } else {
1442 /* 1442 /*
1443 * RCU free overloads the RCU head over the LRU 1443 * RCU free overloads the RCU head over the LRU
1444 */ 1444 */
1445 head = (void *)&page->lru; 1445 head = (void *)&page->lru;
1446 } 1446 }
1447 1447
1448 call_rcu(head, rcu_free_slab); 1448 call_rcu(head, rcu_free_slab);
1449 } else 1449 } else
1450 __free_slab(s, page); 1450 __free_slab(s, page);
1451 } 1451 }
1452 1452
1453 static void discard_slab(struct kmem_cache *s, struct page *page) 1453 static void discard_slab(struct kmem_cache *s, struct page *page)
1454 { 1454 {
1455 dec_slabs_node(s, page_to_nid(page), page->objects); 1455 dec_slabs_node(s, page_to_nid(page), page->objects);
1456 free_slab(s, page); 1456 free_slab(s, page);
1457 } 1457 }
1458 1458
1459 /* 1459 /*
1460 * Management of partially allocated slabs. 1460 * Management of partially allocated slabs.
1461 * 1461 *
1462 * list_lock must be held. 1462 * list_lock must be held.
1463 */ 1463 */
1464 static inline void add_partial(struct kmem_cache_node *n, 1464 static inline void add_partial(struct kmem_cache_node *n,
1465 struct page *page, int tail) 1465 struct page *page, int tail)
1466 { 1466 {
1467 n->nr_partial++; 1467 n->nr_partial++;
1468 if (tail == DEACTIVATE_TO_TAIL) 1468 if (tail == DEACTIVATE_TO_TAIL)
1469 list_add_tail(&page->lru, &n->partial); 1469 list_add_tail(&page->lru, &n->partial);
1470 else 1470 else
1471 list_add(&page->lru, &n->partial); 1471 list_add(&page->lru, &n->partial);
1472 } 1472 }
1473 1473
1474 /* 1474 /*
1475 * list_lock must be held. 1475 * list_lock must be held.
1476 */ 1476 */
1477 static inline void remove_partial(struct kmem_cache_node *n, 1477 static inline void remove_partial(struct kmem_cache_node *n,
1478 struct page *page) 1478 struct page *page)
1479 { 1479 {
1480 list_del(&page->lru); 1480 list_del(&page->lru);
1481 n->nr_partial--; 1481 n->nr_partial--;
1482 } 1482 }
1483 1483
1484 /* 1484 /*
1485 * Lock slab, remove from the partial list and put the object into the 1485 * Lock slab, remove from the partial list and put the object into the
1486 * per cpu freelist. 1486 * per cpu freelist.
1487 * 1487 *
1488 * Returns a list of objects or NULL if it fails. 1488 * Returns a list of objects or NULL if it fails.
1489 * 1489 *
1490 * Must hold list_lock. 1490 * Must hold list_lock.
1491 */ 1491 */
1492 static inline void *acquire_slab(struct kmem_cache *s, 1492 static inline void *acquire_slab(struct kmem_cache *s,
1493 struct kmem_cache_node *n, struct page *page, 1493 struct kmem_cache_node *n, struct page *page,
1494 int mode) 1494 int mode)
1495 { 1495 {
1496 void *freelist; 1496 void *freelist;
1497 unsigned long counters; 1497 unsigned long counters;
1498 struct page new; 1498 struct page new;
1499 1499
1500 /* 1500 /*
1501 * Zap the freelist and set the frozen bit. 1501 * Zap the freelist and set the frozen bit.
1502 * The old freelist is the list of objects for the 1502 * The old freelist is the list of objects for the
1503 * per cpu allocation list. 1503 * per cpu allocation list.
1504 */ 1504 */
1505 do { 1505 do {
1506 freelist = page->freelist; 1506 freelist = page->freelist;
1507 counters = page->counters; 1507 counters = page->counters;
1508 new.counters = counters; 1508 new.counters = counters;
1509 if (mode) 1509 if (mode)
1510 new.inuse = page->objects; 1510 new.inuse = page->objects;
1511 1511
1512 VM_BUG_ON(new.frozen); 1512 VM_BUG_ON(new.frozen);
1513 new.frozen = 1; 1513 new.frozen = 1;
1514 1514
1515 } while (!__cmpxchg_double_slab(s, page, 1515 } while (!__cmpxchg_double_slab(s, page,
1516 freelist, counters, 1516 freelist, counters,
1517 NULL, new.counters, 1517 NULL, new.counters,
1518 "lock and freeze")); 1518 "lock and freeze"));
1519 1519
1520 remove_partial(n, page); 1520 remove_partial(n, page);
1521 return freelist; 1521 return freelist;
1522 } 1522 }
1523 1523
1524 static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); 1524 static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1525 1525
1526 /* 1526 /*
1527 * Try to allocate a partial slab from a specific node. 1527 * Try to allocate a partial slab from a specific node.
1528 */ 1528 */
1529 static void *get_partial_node(struct kmem_cache *s, 1529 static void *get_partial_node(struct kmem_cache *s,
1530 struct kmem_cache_node *n, struct kmem_cache_cpu *c) 1530 struct kmem_cache_node *n, struct kmem_cache_cpu *c)
1531 { 1531 {
1532 struct page *page, *page2; 1532 struct page *page, *page2;
1533 void *object = NULL; 1533 void *object = NULL;
1534 1534
1535 /* 1535 /*
1536 * Racy check. If we mistakenly see no partial slabs then we 1536 * Racy check. If we mistakenly see no partial slabs then we
1537 * just allocate an empty slab. If we mistakenly try to get a 1537 * just allocate an empty slab. If we mistakenly try to get a
1538 * partial slab and there is none available then get_partials() 1538 * partial slab and there is none available then get_partials()
1539 * will return NULL. 1539 * will return NULL.
1540 */ 1540 */
1541 if (!n || !n->nr_partial) 1541 if (!n || !n->nr_partial)
1542 return NULL; 1542 return NULL;
1543 1543
1544 spin_lock(&n->list_lock); 1544 spin_lock(&n->list_lock);
1545 list_for_each_entry_safe(page, page2, &n->partial, lru) { 1545 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1546 void *t = acquire_slab(s, n, page, object == NULL); 1546 void *t = acquire_slab(s, n, page, object == NULL);
1547 int available; 1547 int available;
1548 1548
1549 if (!t) 1549 if (!t)
1550 break; 1550 break;
1551 1551
1552 if (!object) { 1552 if (!object) {
1553 c->page = page; 1553 c->page = page;
1554 c->node = page_to_nid(page); 1554 c->node = page_to_nid(page);
1555 stat(s, ALLOC_FROM_PARTIAL); 1555 stat(s, ALLOC_FROM_PARTIAL);
1556 object = t; 1556 object = t;
1557 available = page->objects - page->inuse; 1557 available = page->objects - page->inuse;
1558 } else { 1558 } else {
1559 page->freelist = t; 1559 page->freelist = t;
1560 available = put_cpu_partial(s, page, 0); 1560 available = put_cpu_partial(s, page, 0);
1561 } 1561 }
1562 if (kmem_cache_debug(s) || available > s->cpu_partial / 2) 1562 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1563 break; 1563 break;
1564 1564
1565 } 1565 }
1566 spin_unlock(&n->list_lock); 1566 spin_unlock(&n->list_lock);
1567 return object; 1567 return object;
1568 } 1568 }
1569 1569
1570 /* 1570 /*
1571 * Get a page from somewhere. Search in increasing NUMA distances. 1571 * Get a page from somewhere. Search in increasing NUMA distances.
1572 */ 1572 */
1573 static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, 1573 static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
1574 struct kmem_cache_cpu *c) 1574 struct kmem_cache_cpu *c)
1575 { 1575 {
1576 #ifdef CONFIG_NUMA 1576 #ifdef CONFIG_NUMA
1577 struct zonelist *zonelist; 1577 struct zonelist *zonelist;
1578 struct zoneref *z; 1578 struct zoneref *z;
1579 struct zone *zone; 1579 struct zone *zone;
1580 enum zone_type high_zoneidx = gfp_zone(flags); 1580 enum zone_type high_zoneidx = gfp_zone(flags);
1581 void *object; 1581 void *object;
1582 1582
1583 /* 1583 /*
1584 * The defrag ratio allows a configuration of the tradeoffs between 1584 * The defrag ratio allows a configuration of the tradeoffs between
1585 * inter node defragmentation and node local allocations. A lower 1585 * inter node defragmentation and node local allocations. A lower
1586 * defrag_ratio increases the tendency to do local allocations 1586 * defrag_ratio increases the tendency to do local allocations
1587 * instead of attempting to obtain partial slabs from other nodes. 1587 * instead of attempting to obtain partial slabs from other nodes.
1588 * 1588 *
1589 * If the defrag_ratio is set to 0 then kmalloc() always 1589 * If the defrag_ratio is set to 0 then kmalloc() always
1590 * returns node local objects. If the ratio is higher then kmalloc() 1590 * returns node local objects. If the ratio is higher then kmalloc()
1591 * may return off node objects because partial slabs are obtained 1591 * may return off node objects because partial slabs are obtained
1592 * from other nodes and filled up. 1592 * from other nodes and filled up.
1593 * 1593 *
1594 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes 1594 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
1595 * defrag_ratio = 1000) then every (well almost) allocation will 1595 * defrag_ratio = 1000) then every (well almost) allocation will
1596 * first attempt to defrag slab caches on other nodes. This means 1596 * first attempt to defrag slab caches on other nodes. This means
1597 * scanning over all nodes to look for partial slabs which may be 1597 * scanning over all nodes to look for partial slabs which may be
1598 * expensive if we do it every time we are trying to find a slab 1598 * expensive if we do it every time we are trying to find a slab
1599 * with available objects. 1599 * with available objects.
1600 */ 1600 */
1601 if (!s->remote_node_defrag_ratio || 1601 if (!s->remote_node_defrag_ratio ||
1602 get_cycles() % 1024 > s->remote_node_defrag_ratio) 1602 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1603 return NULL; 1603 return NULL;
1604 1604
1605 get_mems_allowed(); 1605 get_mems_allowed();
1606 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1606 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1607 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1607 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1608 struct kmem_cache_node *n; 1608 struct kmem_cache_node *n;
1609 1609
1610 n = get_node(s, zone_to_nid(zone)); 1610 n = get_node(s, zone_to_nid(zone));
1611 1611
1612 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1612 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1613 n->nr_partial > s->min_partial) { 1613 n->nr_partial > s->min_partial) {
1614 object = get_partial_node(s, n, c); 1614 object = get_partial_node(s, n, c);
1615 if (object) { 1615 if (object) {
1616 put_mems_allowed(); 1616 put_mems_allowed();
1617 return object; 1617 return object;
1618 } 1618 }
1619 } 1619 }
1620 } 1620 }
1621 put_mems_allowed(); 1621 put_mems_allowed();
1622 #endif 1622 #endif
1623 return NULL; 1623 return NULL;
1624 } 1624 }
1625 1625
1626 /* 1626 /*
1627 * Get a partial page, lock it and return it. 1627 * Get a partial page, lock it and return it.
1628 */ 1628 */
1629 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, 1629 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1630 struct kmem_cache_cpu *c) 1630 struct kmem_cache_cpu *c)
1631 { 1631 {
1632 void *object; 1632 void *object;
1633 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; 1633 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1634 1634
1635 object = get_partial_node(s, get_node(s, searchnode), c); 1635 object = get_partial_node(s, get_node(s, searchnode), c);
1636 if (object || node != NUMA_NO_NODE) 1636 if (object || node != NUMA_NO_NODE)
1637 return object; 1637 return object;
1638 1638
1639 return get_any_partial(s, flags, c); 1639 return get_any_partial(s, flags, c);
1640 } 1640 }
1641 1641
1642 #ifdef CONFIG_PREEMPT 1642 #ifdef CONFIG_PREEMPT
1643 /* 1643 /*
1644 * Calculate the next globally unique transaction for disambiguiation 1644 * Calculate the next globally unique transaction for disambiguiation
1645 * during cmpxchg. The transactions start with the cpu number and are then 1645 * during cmpxchg. The transactions start with the cpu number and are then
1646 * incremented by CONFIG_NR_CPUS. 1646 * incremented by CONFIG_NR_CPUS.
1647 */ 1647 */
1648 #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) 1648 #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1649 #else 1649 #else
1650 /* 1650 /*
1651 * No preemption supported therefore also no need to check for 1651 * No preemption supported therefore also no need to check for
1652 * different cpus. 1652 * different cpus.
1653 */ 1653 */
1654 #define TID_STEP 1 1654 #define TID_STEP 1
1655 #endif 1655 #endif
1656 1656
1657 static inline unsigned long next_tid(unsigned long tid) 1657 static inline unsigned long next_tid(unsigned long tid)
1658 { 1658 {
1659 return tid + TID_STEP; 1659 return tid + TID_STEP;
1660 } 1660 }
1661 1661
1662 static inline unsigned int tid_to_cpu(unsigned long tid) 1662 static inline unsigned int tid_to_cpu(unsigned long tid)
1663 { 1663 {
1664 return tid % TID_STEP; 1664 return tid % TID_STEP;
1665 } 1665 }
1666 1666
1667 static inline unsigned long tid_to_event(unsigned long tid) 1667 static inline unsigned long tid_to_event(unsigned long tid)
1668 { 1668 {
1669 return tid / TID_STEP; 1669 return tid / TID_STEP;
1670 } 1670 }
1671 1671
1672 static inline unsigned int init_tid(int cpu) 1672 static inline unsigned int init_tid(int cpu)
1673 { 1673 {
1674 return cpu; 1674 return cpu;
1675 } 1675 }
1676 1676
1677 static inline void note_cmpxchg_failure(const char *n, 1677 static inline void note_cmpxchg_failure(const char *n,
1678 const struct kmem_cache *s, unsigned long tid) 1678 const struct kmem_cache *s, unsigned long tid)
1679 { 1679 {
1680 #ifdef SLUB_DEBUG_CMPXCHG 1680 #ifdef SLUB_DEBUG_CMPXCHG
1681 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); 1681 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1682 1682
1683 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); 1683 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1684 1684
1685 #ifdef CONFIG_PREEMPT 1685 #ifdef CONFIG_PREEMPT
1686 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) 1686 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1687 printk("due to cpu change %d -> %d\n", 1687 printk("due to cpu change %d -> %d\n",
1688 tid_to_cpu(tid), tid_to_cpu(actual_tid)); 1688 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1689 else 1689 else
1690 #endif 1690 #endif
1691 if (tid_to_event(tid) != tid_to_event(actual_tid)) 1691 if (tid_to_event(tid) != tid_to_event(actual_tid))
1692 printk("due to cpu running other code. Event %ld->%ld\n", 1692 printk("due to cpu running other code. Event %ld->%ld\n",
1693 tid_to_event(tid), tid_to_event(actual_tid)); 1693 tid_to_event(tid), tid_to_event(actual_tid));
1694 else 1694 else
1695 printk("for unknown reason: actual=%lx was=%lx target=%lx\n", 1695 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1696 actual_tid, tid, next_tid(tid)); 1696 actual_tid, tid, next_tid(tid));
1697 #endif 1697 #endif
1698 stat(s, CMPXCHG_DOUBLE_CPU_FAIL); 1698 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1699 } 1699 }
1700 1700
1701 void init_kmem_cache_cpus(struct kmem_cache *s) 1701 void init_kmem_cache_cpus(struct kmem_cache *s)
1702 { 1702 {
1703 int cpu; 1703 int cpu;
1704 1704
1705 for_each_possible_cpu(cpu) 1705 for_each_possible_cpu(cpu)
1706 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); 1706 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1707 } 1707 }
1708 1708
1709 /* 1709 /*
1710 * Remove the cpu slab 1710 * Remove the cpu slab
1711 */ 1711 */
1712 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1712 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1713 { 1713 {
1714 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; 1714 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1715 struct page *page = c->page; 1715 struct page *page = c->page;
1716 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1716 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1717 int lock = 0; 1717 int lock = 0;
1718 enum slab_modes l = M_NONE, m = M_NONE; 1718 enum slab_modes l = M_NONE, m = M_NONE;
1719 void *freelist; 1719 void *freelist;
1720 void *nextfree; 1720 void *nextfree;
1721 int tail = DEACTIVATE_TO_HEAD; 1721 int tail = DEACTIVATE_TO_HEAD;
1722 struct page new; 1722 struct page new;
1723 struct page old; 1723 struct page old;
1724 1724
1725 if (page->freelist) { 1725 if (page->freelist) {
1726 stat(s, DEACTIVATE_REMOTE_FREES); 1726 stat(s, DEACTIVATE_REMOTE_FREES);
1727 tail = DEACTIVATE_TO_TAIL; 1727 tail = DEACTIVATE_TO_TAIL;
1728 } 1728 }
1729 1729
1730 c->tid = next_tid(c->tid); 1730 c->tid = next_tid(c->tid);
1731 c->page = NULL; 1731 c->page = NULL;
1732 freelist = c->freelist; 1732 freelist = c->freelist;
1733 c->freelist = NULL; 1733 c->freelist = NULL;
1734 1734
1735 /* 1735 /*
1736 * Stage one: Free all available per cpu objects back 1736 * Stage one: Free all available per cpu objects back
1737 * to the page freelist while it is still frozen. Leave the 1737 * to the page freelist while it is still frozen. Leave the
1738 * last one. 1738 * last one.
1739 * 1739 *
1740 * There is no need to take the list->lock because the page 1740 * There is no need to take the list->lock because the page
1741 * is still frozen. 1741 * is still frozen.
1742 */ 1742 */
1743 while (freelist && (nextfree = get_freepointer(s, freelist))) { 1743 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1744 void *prior; 1744 void *prior;
1745 unsigned long counters; 1745 unsigned long counters;
1746 1746
1747 do { 1747 do {
1748 prior = page->freelist; 1748 prior = page->freelist;
1749 counters = page->counters; 1749 counters = page->counters;
1750 set_freepointer(s, freelist, prior); 1750 set_freepointer(s, freelist, prior);
1751 new.counters = counters; 1751 new.counters = counters;
1752 new.inuse--; 1752 new.inuse--;
1753 VM_BUG_ON(!new.frozen); 1753 VM_BUG_ON(!new.frozen);
1754 1754
1755 } while (!__cmpxchg_double_slab(s, page, 1755 } while (!__cmpxchg_double_slab(s, page,
1756 prior, counters, 1756 prior, counters,
1757 freelist, new.counters, 1757 freelist, new.counters,
1758 "drain percpu freelist")); 1758 "drain percpu freelist"));
1759 1759
1760 freelist = nextfree; 1760 freelist = nextfree;
1761 } 1761 }
1762 1762
1763 /* 1763 /*
1764 * Stage two: Ensure that the page is unfrozen while the 1764 * Stage two: Ensure that the page is unfrozen while the
1765 * list presence reflects the actual number of objects 1765 * list presence reflects the actual number of objects
1766 * during unfreeze. 1766 * during unfreeze.
1767 * 1767 *
1768 * We setup the list membership and then perform a cmpxchg 1768 * We setup the list membership and then perform a cmpxchg
1769 * with the count. If there is a mismatch then the page 1769 * with the count. If there is a mismatch then the page
1770 * is not unfrozen but the page is on the wrong list. 1770 * is not unfrozen but the page is on the wrong list.
1771 * 1771 *
1772 * Then we restart the process which may have to remove 1772 * Then we restart the process which may have to remove
1773 * the page from the list that we just put it on again 1773 * the page from the list that we just put it on again
1774 * because the number of objects in the slab may have 1774 * because the number of objects in the slab may have
1775 * changed. 1775 * changed.
1776 */ 1776 */
1777 redo: 1777 redo:
1778 1778
1779 old.freelist = page->freelist; 1779 old.freelist = page->freelist;
1780 old.counters = page->counters; 1780 old.counters = page->counters;
1781 VM_BUG_ON(!old.frozen); 1781 VM_BUG_ON(!old.frozen);
1782 1782
1783 /* Determine target state of the slab */ 1783 /* Determine target state of the slab */
1784 new.counters = old.counters; 1784 new.counters = old.counters;
1785 if (freelist) { 1785 if (freelist) {
1786 new.inuse--; 1786 new.inuse--;
1787 set_freepointer(s, freelist, old.freelist); 1787 set_freepointer(s, freelist, old.freelist);
1788 new.freelist = freelist; 1788 new.freelist = freelist;
1789 } else 1789 } else
1790 new.freelist = old.freelist; 1790 new.freelist = old.freelist;
1791 1791
1792 new.frozen = 0; 1792 new.frozen = 0;
1793 1793
1794 if (!new.inuse && n->nr_partial > s->min_partial) 1794 if (!new.inuse && n->nr_partial > s->min_partial)
1795 m = M_FREE; 1795 m = M_FREE;
1796 else if (new.freelist) { 1796 else if (new.freelist) {
1797 m = M_PARTIAL; 1797 m = M_PARTIAL;
1798 if (!lock) { 1798 if (!lock) {
1799 lock = 1; 1799 lock = 1;
1800 /* 1800 /*
1801 * Taking the spinlock removes the possiblity 1801 * Taking the spinlock removes the possiblity
1802 * that acquire_slab() will see a slab page that 1802 * that acquire_slab() will see a slab page that
1803 * is frozen 1803 * is frozen
1804 */ 1804 */
1805 spin_lock(&n->list_lock); 1805 spin_lock(&n->list_lock);
1806 } 1806 }
1807 } else { 1807 } else {
1808 m = M_FULL; 1808 m = M_FULL;
1809 if (kmem_cache_debug(s) && !lock) { 1809 if (kmem_cache_debug(s) && !lock) {
1810 lock = 1; 1810 lock = 1;
1811 /* 1811 /*
1812 * This also ensures that the scanning of full 1812 * This also ensures that the scanning of full
1813 * slabs from diagnostic functions will not see 1813 * slabs from diagnostic functions will not see
1814 * any frozen slabs. 1814 * any frozen slabs.
1815 */ 1815 */
1816 spin_lock(&n->list_lock); 1816 spin_lock(&n->list_lock);
1817 } 1817 }
1818 } 1818 }
1819 1819
1820 if (l != m) { 1820 if (l != m) {
1821 1821
1822 if (l == M_PARTIAL) 1822 if (l == M_PARTIAL)
1823 1823
1824 remove_partial(n, page); 1824 remove_partial(n, page);
1825 1825
1826 else if (l == M_FULL) 1826 else if (l == M_FULL)
1827 1827
1828 remove_full(s, page); 1828 remove_full(s, page);
1829 1829
1830 if (m == M_PARTIAL) { 1830 if (m == M_PARTIAL) {
1831 1831
1832 add_partial(n, page, tail); 1832 add_partial(n, page, tail);
1833 stat(s, tail); 1833 stat(s, tail);
1834 1834
1835 } else if (m == M_FULL) { 1835 } else if (m == M_FULL) {
1836 1836
1837 stat(s, DEACTIVATE_FULL); 1837 stat(s, DEACTIVATE_FULL);
1838 add_full(s, n, page); 1838 add_full(s, n, page);
1839 1839
1840 } 1840 }
1841 } 1841 }
1842 1842
1843 l = m; 1843 l = m;
1844 if (!__cmpxchg_double_slab(s, page, 1844 if (!__cmpxchg_double_slab(s, page,
1845 old.freelist, old.counters, 1845 old.freelist, old.counters,
1846 new.freelist, new.counters, 1846 new.freelist, new.counters,
1847 "unfreezing slab")) 1847 "unfreezing slab"))
1848 goto redo; 1848 goto redo;
1849 1849
1850 if (lock) 1850 if (lock)
1851 spin_unlock(&n->list_lock); 1851 spin_unlock(&n->list_lock);
1852 1852
1853 if (m == M_FREE) { 1853 if (m == M_FREE) {
1854 stat(s, DEACTIVATE_EMPTY); 1854 stat(s, DEACTIVATE_EMPTY);
1855 discard_slab(s, page); 1855 discard_slab(s, page);
1856 stat(s, FREE_SLAB); 1856 stat(s, FREE_SLAB);
1857 } 1857 }
1858 } 1858 }
1859 1859
1860 /* Unfreeze all the cpu partial slabs */ 1860 /* Unfreeze all the cpu partial slabs */
1861 static void unfreeze_partials(struct kmem_cache *s) 1861 static void unfreeze_partials(struct kmem_cache *s)
1862 { 1862 {
1863 struct kmem_cache_node *n = NULL; 1863 struct kmem_cache_node *n = NULL;
1864 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 1864 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1865 struct page *page, *discard_page = NULL; 1865 struct page *page, *discard_page = NULL;
1866 1866
1867 while ((page = c->partial)) { 1867 while ((page = c->partial)) {
1868 enum slab_modes { M_PARTIAL, M_FREE }; 1868 enum slab_modes { M_PARTIAL, M_FREE };
1869 enum slab_modes l, m; 1869 enum slab_modes l, m;
1870 struct page new; 1870 struct page new;
1871 struct page old; 1871 struct page old;
1872 1872
1873 c->partial = page->next; 1873 c->partial = page->next;
1874 l = M_FREE; 1874 l = M_FREE;
1875 1875
1876 do { 1876 do {
1877 1877
1878 old.freelist = page->freelist; 1878 old.freelist = page->freelist;
1879 old.counters = page->counters; 1879 old.counters = page->counters;
1880 VM_BUG_ON(!old.frozen); 1880 VM_BUG_ON(!old.frozen);
1881 1881
1882 new.counters = old.counters; 1882 new.counters = old.counters;
1883 new.freelist = old.freelist; 1883 new.freelist = old.freelist;
1884 1884
1885 new.frozen = 0; 1885 new.frozen = 0;
1886 1886
1887 if (!new.inuse && (!n || n->nr_partial > s->min_partial)) 1887 if (!new.inuse && (!n || n->nr_partial > s->min_partial))
1888 m = M_FREE; 1888 m = M_FREE;
1889 else { 1889 else {
1890 struct kmem_cache_node *n2 = get_node(s, 1890 struct kmem_cache_node *n2 = get_node(s,
1891 page_to_nid(page)); 1891 page_to_nid(page));
1892 1892
1893 m = M_PARTIAL; 1893 m = M_PARTIAL;
1894 if (n != n2) { 1894 if (n != n2) {
1895 if (n) 1895 if (n)
1896 spin_unlock(&n->list_lock); 1896 spin_unlock(&n->list_lock);
1897 1897
1898 n = n2; 1898 n = n2;
1899 spin_lock(&n->list_lock); 1899 spin_lock(&n->list_lock);
1900 } 1900 }
1901 } 1901 }
1902 1902
1903 if (l != m) { 1903 if (l != m) {
1904 if (l == M_PARTIAL) 1904 if (l == M_PARTIAL)
1905 remove_partial(n, page); 1905 remove_partial(n, page);
1906 else 1906 else
1907 add_partial(n, page, 1907 add_partial(n, page,
1908 DEACTIVATE_TO_TAIL); 1908 DEACTIVATE_TO_TAIL);
1909 1909
1910 l = m; 1910 l = m;
1911 } 1911 }
1912 1912
1913 } while (!cmpxchg_double_slab(s, page, 1913 } while (!cmpxchg_double_slab(s, page,
1914 old.freelist, old.counters, 1914 old.freelist, old.counters,
1915 new.freelist, new.counters, 1915 new.freelist, new.counters,
1916 "unfreezing slab")); 1916 "unfreezing slab"));
1917 1917
1918 if (m == M_FREE) { 1918 if (m == M_FREE) {
1919 page->next = discard_page; 1919 page->next = discard_page;
1920 discard_page = page; 1920 discard_page = page;
1921 } 1921 }
1922 } 1922 }
1923 1923
1924 if (n) 1924 if (n)
1925 spin_unlock(&n->list_lock); 1925 spin_unlock(&n->list_lock);
1926 1926
1927 while (discard_page) { 1927 while (discard_page) {
1928 page = discard_page; 1928 page = discard_page;
1929 discard_page = discard_page->next; 1929 discard_page = discard_page->next;
1930 1930
1931 stat(s, DEACTIVATE_EMPTY); 1931 stat(s, DEACTIVATE_EMPTY);
1932 discard_slab(s, page); 1932 discard_slab(s, page);
1933 stat(s, FREE_SLAB); 1933 stat(s, FREE_SLAB);
1934 } 1934 }
1935 } 1935 }
1936 1936
1937 /* 1937 /*
1938 * Put a page that was just frozen (in __slab_free) into a partial page 1938 * Put a page that was just frozen (in __slab_free) into a partial page
1939 * slot if available. This is done without interrupts disabled and without 1939 * slot if available. This is done without interrupts disabled and without
1940 * preemption disabled. The cmpxchg is racy and may put the partial page 1940 * preemption disabled. The cmpxchg is racy and may put the partial page
1941 * onto a random cpus partial slot. 1941 * onto a random cpus partial slot.
1942 * 1942 *
1943 * If we did not find a slot then simply move all the partials to the 1943 * If we did not find a slot then simply move all the partials to the
1944 * per node partial list. 1944 * per node partial list.
1945 */ 1945 */
1946 int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) 1946 int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1947 { 1947 {
1948 struct page *oldpage; 1948 struct page *oldpage;
1949 int pages; 1949 int pages;
1950 int pobjects; 1950 int pobjects;
1951 1951
1952 do { 1952 do {
1953 pages = 0; 1953 pages = 0;
1954 pobjects = 0; 1954 pobjects = 0;
1955 oldpage = this_cpu_read(s->cpu_slab->partial); 1955 oldpage = this_cpu_read(s->cpu_slab->partial);
1956 1956
1957 if (oldpage) { 1957 if (oldpage) {
1958 pobjects = oldpage->pobjects; 1958 pobjects = oldpage->pobjects;
1959 pages = oldpage->pages; 1959 pages = oldpage->pages;
1960 if (drain && pobjects > s->cpu_partial) { 1960 if (drain && pobjects > s->cpu_partial) {
1961 unsigned long flags; 1961 unsigned long flags;
1962 /* 1962 /*
1963 * partial array is full. Move the existing 1963 * partial array is full. Move the existing
1964 * set to the per node partial list. 1964 * set to the per node partial list.
1965 */ 1965 */
1966 local_irq_save(flags); 1966 local_irq_save(flags);
1967 unfreeze_partials(s); 1967 unfreeze_partials(s);
1968 local_irq_restore(flags); 1968 local_irq_restore(flags);
1969 pobjects = 0; 1969 pobjects = 0;
1970 pages = 0; 1970 pages = 0;
1971 } 1971 }
1972 } 1972 }
1973 1973
1974 pages++; 1974 pages++;
1975 pobjects += page->objects - page->inuse; 1975 pobjects += page->objects - page->inuse;
1976 1976
1977 page->pages = pages; 1977 page->pages = pages;
1978 page->pobjects = pobjects; 1978 page->pobjects = pobjects;
1979 page->next = oldpage; 1979 page->next = oldpage;
1980 1980
1981 } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); 1981 } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1982 stat(s, CPU_PARTIAL_FREE); 1982 stat(s, CPU_PARTIAL_FREE);
1983 return pobjects; 1983 return pobjects;
1984 } 1984 }
1985 1985
1986 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1986 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1987 { 1987 {
1988 stat(s, CPUSLAB_FLUSH); 1988 stat(s, CPUSLAB_FLUSH);
1989 deactivate_slab(s, c); 1989 deactivate_slab(s, c);
1990 } 1990 }
1991 1991
1992 /* 1992 /*
1993 * Flush cpu slab. 1993 * Flush cpu slab.
1994 * 1994 *
1995 * Called from IPI handler with interrupts disabled. 1995 * Called from IPI handler with interrupts disabled.
1996 */ 1996 */
1997 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1997 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1998 { 1998 {
1999 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 1999 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2000 2000
2001 if (likely(c)) { 2001 if (likely(c)) {
2002 if (c->page) 2002 if (c->page)
2003 flush_slab(s, c); 2003 flush_slab(s, c);
2004 2004
2005 unfreeze_partials(s); 2005 unfreeze_partials(s);
2006 } 2006 }
2007 } 2007 }
2008 2008
2009 static void flush_cpu_slab(void *d) 2009 static void flush_cpu_slab(void *d)
2010 { 2010 {
2011 struct kmem_cache *s = d; 2011 struct kmem_cache *s = d;
2012 2012
2013 __flush_cpu_slab(s, smp_processor_id()); 2013 __flush_cpu_slab(s, smp_processor_id());
2014 } 2014 }
2015 2015
2016 static void flush_all(struct kmem_cache *s) 2016 static void flush_all(struct kmem_cache *s)
2017 { 2017 {
2018 on_each_cpu(flush_cpu_slab, s, 1); 2018 on_each_cpu(flush_cpu_slab, s, 1);
2019 } 2019 }
2020 2020
2021 /* 2021 /*
2022 * Check if the objects in a per cpu structure fit numa 2022 * Check if the objects in a per cpu structure fit numa
2023 * locality expectations. 2023 * locality expectations.
2024 */ 2024 */
2025 static inline int node_match(struct kmem_cache_cpu *c, int node) 2025 static inline int node_match(struct kmem_cache_cpu *c, int node)
2026 { 2026 {
2027 #ifdef CONFIG_NUMA 2027 #ifdef CONFIG_NUMA
2028 if (node != NUMA_NO_NODE && c->node != node) 2028 if (node != NUMA_NO_NODE && c->node != node)
2029 return 0; 2029 return 0;
2030 #endif 2030 #endif
2031 return 1; 2031 return 1;
2032 } 2032 }
2033 2033
2034 static int count_free(struct page *page) 2034 static int count_free(struct page *page)
2035 { 2035 {
2036 return page->objects - page->inuse; 2036 return page->objects - page->inuse;
2037 } 2037 }
2038 2038
2039 static unsigned long count_partial(struct kmem_cache_node *n, 2039 static unsigned long count_partial(struct kmem_cache_node *n,
2040 int (*get_count)(struct page *)) 2040 int (*get_count)(struct page *))
2041 { 2041 {
2042 unsigned long flags; 2042 unsigned long flags;
2043 unsigned long x = 0; 2043 unsigned long x = 0;
2044 struct page *page; 2044 struct page *page;
2045 2045
2046 spin_lock_irqsave(&n->list_lock, flags); 2046 spin_lock_irqsave(&n->list_lock, flags);
2047 list_for_each_entry(page, &n->partial, lru) 2047 list_for_each_entry(page, &n->partial, lru)
2048 x += get_count(page); 2048 x += get_count(page);
2049 spin_unlock_irqrestore(&n->list_lock, flags); 2049 spin_unlock_irqrestore(&n->list_lock, flags);
2050 return x; 2050 return x;
2051 } 2051 }
2052 2052
2053 static inline unsigned long node_nr_objs(struct kmem_cache_node *n) 2053 static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2054 { 2054 {
2055 #ifdef CONFIG_SLUB_DEBUG 2055 #ifdef CONFIG_SLUB_DEBUG
2056 return atomic_long_read(&n->total_objects); 2056 return atomic_long_read(&n->total_objects);
2057 #else 2057 #else
2058 return 0; 2058 return 0;
2059 #endif 2059 #endif
2060 } 2060 }
2061 2061
2062 static noinline void 2062 static noinline void
2063 slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) 2063 slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2064 { 2064 {
2065 int node; 2065 int node;
2066 2066
2067 printk(KERN_WARNING 2067 printk(KERN_WARNING
2068 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", 2068 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2069 nid, gfpflags); 2069 nid, gfpflags);
2070 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " 2070 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2071 "default order: %d, min order: %d\n", s->name, s->objsize, 2071 "default order: %d, min order: %d\n", s->name, s->objsize,
2072 s->size, oo_order(s->oo), oo_order(s->min)); 2072 s->size, oo_order(s->oo), oo_order(s->min));
2073 2073
2074 if (oo_order(s->min) > get_order(s->objsize)) 2074 if (oo_order(s->min) > get_order(s->objsize))
2075 printk(KERN_WARNING " %s debugging increased min order, use " 2075 printk(KERN_WARNING " %s debugging increased min order, use "
2076 "slub_debug=O to disable.\n", s->name); 2076 "slub_debug=O to disable.\n", s->name);
2077 2077
2078 for_each_online_node(node) { 2078 for_each_online_node(node) {
2079 struct kmem_cache_node *n = get_node(s, node); 2079 struct kmem_cache_node *n = get_node(s, node);
2080 unsigned long nr_slabs; 2080 unsigned long nr_slabs;
2081 unsigned long nr_objs; 2081 unsigned long nr_objs;
2082 unsigned long nr_free; 2082 unsigned long nr_free;
2083 2083
2084 if (!n) 2084 if (!n)
2085 continue; 2085 continue;
2086 2086
2087 nr_free = count_partial(n, count_free); 2087 nr_free = count_partial(n, count_free);
2088 nr_slabs = node_nr_slabs(n); 2088 nr_slabs = node_nr_slabs(n);
2089 nr_objs = node_nr_objs(n); 2089 nr_objs = node_nr_objs(n);
2090 2090
2091 printk(KERN_WARNING 2091 printk(KERN_WARNING
2092 " node %d: slabs: %ld, objs: %ld, free: %ld\n", 2092 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2093 node, nr_slabs, nr_objs, nr_free); 2093 node, nr_slabs, nr_objs, nr_free);
2094 } 2094 }
2095 } 2095 }
2096 2096
2097 static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, 2097 static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2098 int node, struct kmem_cache_cpu **pc) 2098 int node, struct kmem_cache_cpu **pc)
2099 { 2099 {
2100 void *object; 2100 void *object;
2101 struct kmem_cache_cpu *c; 2101 struct kmem_cache_cpu *c;
2102 struct page *page = new_slab(s, flags, node); 2102 struct page *page = new_slab(s, flags, node);
2103 2103
2104 if (page) { 2104 if (page) {
2105 c = __this_cpu_ptr(s->cpu_slab); 2105 c = __this_cpu_ptr(s->cpu_slab);
2106 if (c->page) 2106 if (c->page)
2107 flush_slab(s, c); 2107 flush_slab(s, c);
2108 2108
2109 /* 2109 /*
2110 * No other reference to the page yet so we can 2110 * No other reference to the page yet so we can
2111 * muck around with it freely without cmpxchg 2111 * muck around with it freely without cmpxchg
2112 */ 2112 */
2113 object = page->freelist; 2113 object = page->freelist;
2114 page->freelist = NULL; 2114 page->freelist = NULL;
2115 2115
2116 stat(s, ALLOC_SLAB); 2116 stat(s, ALLOC_SLAB);
2117 c->node = page_to_nid(page); 2117 c->node = page_to_nid(page);
2118 c->page = page; 2118 c->page = page;
2119 *pc = c; 2119 *pc = c;
2120 } else 2120 } else
2121 object = NULL; 2121 object = NULL;
2122 2122
2123 return object; 2123 return object;
2124 } 2124 }
2125 2125
2126 /* 2126 /*
2127 * Slow path. The lockless freelist is empty or we need to perform 2127 * Slow path. The lockless freelist is empty or we need to perform
2128 * debugging duties. 2128 * debugging duties.
2129 * 2129 *
2130 * Processing is still very fast if new objects have been freed to the 2130 * Processing is still very fast if new objects have been freed to the
2131 * regular freelist. In that case we simply take over the regular freelist 2131 * regular freelist. In that case we simply take over the regular freelist
2132 * as the lockless freelist and zap the regular freelist. 2132 * as the lockless freelist and zap the regular freelist.
2133 * 2133 *
2134 * If that is not working then we fall back to the partial lists. We take the 2134 * If that is not working then we fall back to the partial lists. We take the
2135 * first element of the freelist as the object to allocate now and move the 2135 * first element of the freelist as the object to allocate now and move the
2136 * rest of the freelist to the lockless freelist. 2136 * rest of the freelist to the lockless freelist.
2137 * 2137 *
2138 * And if we were unable to get a new slab from the partial slab lists then 2138 * And if we were unable to get a new slab from the partial slab lists then
2139 * we need to allocate a new slab. This is the slowest path since it involves 2139 * we need to allocate a new slab. This is the slowest path since it involves
2140 * a call to the page allocator and the setup of a new slab. 2140 * a call to the page allocator and the setup of a new slab.
2141 */ 2141 */
2142 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 2142 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2143 unsigned long addr, struct kmem_cache_cpu *c) 2143 unsigned long addr, struct kmem_cache_cpu *c)
2144 { 2144 {
2145 void **object; 2145 void **object;
2146 unsigned long flags; 2146 unsigned long flags;
2147 struct page new; 2147 struct page new;
2148 unsigned long counters; 2148 unsigned long counters;
2149 2149
2150 local_irq_save(flags); 2150 local_irq_save(flags);
2151 #ifdef CONFIG_PREEMPT 2151 #ifdef CONFIG_PREEMPT
2152 /* 2152 /*
2153 * We may have been preempted and rescheduled on a different 2153 * We may have been preempted and rescheduled on a different
2154 * cpu before disabling interrupts. Need to reload cpu area 2154 * cpu before disabling interrupts. Need to reload cpu area
2155 * pointer. 2155 * pointer.
2156 */ 2156 */
2157 c = this_cpu_ptr(s->cpu_slab); 2157 c = this_cpu_ptr(s->cpu_slab);
2158 #endif 2158 #endif
2159 2159
2160 if (!c->page) 2160 if (!c->page)
2161 goto new_slab; 2161 goto new_slab;
2162 redo: 2162 redo:
2163 if (unlikely(!node_match(c, node))) { 2163 if (unlikely(!node_match(c, node))) {
2164 stat(s, ALLOC_NODE_MISMATCH); 2164 stat(s, ALLOC_NODE_MISMATCH);
2165 deactivate_slab(s, c); 2165 deactivate_slab(s, c);
2166 goto new_slab; 2166 goto new_slab;
2167 } 2167 }
2168 2168
2169 /* must check again c->freelist in case of cpu migration or IRQ */ 2169 /* must check again c->freelist in case of cpu migration or IRQ */
2170 object = c->freelist; 2170 object = c->freelist;
2171 if (object) 2171 if (object)
2172 goto load_freelist; 2172 goto load_freelist;
2173 2173
2174 stat(s, ALLOC_SLOWPATH); 2174 stat(s, ALLOC_SLOWPATH);
2175 2175
2176 do { 2176 do {
2177 object = c->page->freelist; 2177 object = c->page->freelist;
2178 counters = c->page->counters; 2178 counters = c->page->counters;
2179 new.counters = counters; 2179 new.counters = counters;
2180 VM_BUG_ON(!new.frozen); 2180 VM_BUG_ON(!new.frozen);
2181 2181
2182 /* 2182 /*
2183 * If there is no object left then we use this loop to 2183 * If there is no object left then we use this loop to
2184 * deactivate the slab which is simple since no objects 2184 * deactivate the slab which is simple since no objects
2185 * are left in the slab and therefore we do not need to 2185 * are left in the slab and therefore we do not need to
2186 * put the page back onto the partial list. 2186 * put the page back onto the partial list.
2187 * 2187 *
2188 * If there are objects left then we retrieve them 2188 * If there are objects left then we retrieve them
2189 * and use them to refill the per cpu queue. 2189 * and use them to refill the per cpu queue.
2190 */ 2190 */
2191 2191
2192 new.inuse = c->page->objects; 2192 new.inuse = c->page->objects;
2193 new.frozen = object != NULL; 2193 new.frozen = object != NULL;
2194 2194
2195 } while (!__cmpxchg_double_slab(s, c->page, 2195 } while (!__cmpxchg_double_slab(s, c->page,
2196 object, counters, 2196 object, counters,
2197 NULL, new.counters, 2197 NULL, new.counters,
2198 "__slab_alloc")); 2198 "__slab_alloc"));
2199 2199
2200 if (!object) { 2200 if (!object) {
2201 c->page = NULL; 2201 c->page = NULL;
2202 stat(s, DEACTIVATE_BYPASS); 2202 stat(s, DEACTIVATE_BYPASS);
2203 goto new_slab; 2203 goto new_slab;
2204 } 2204 }
2205 2205
2206 stat(s, ALLOC_REFILL); 2206 stat(s, ALLOC_REFILL);
2207 2207
2208 load_freelist: 2208 load_freelist:
2209 c->freelist = get_freepointer(s, object); 2209 c->freelist = get_freepointer(s, object);
2210 c->tid = next_tid(c->tid); 2210 c->tid = next_tid(c->tid);
2211 local_irq_restore(flags); 2211 local_irq_restore(flags);
2212 return object; 2212 return object;
2213 2213
2214 new_slab: 2214 new_slab:
2215 2215
2216 if (c->partial) { 2216 if (c->partial) {
2217 c->page = c->partial; 2217 c->page = c->partial;
2218 c->partial = c->page->next; 2218 c->partial = c->page->next;
2219 c->node = page_to_nid(c->page); 2219 c->node = page_to_nid(c->page);
2220 stat(s, CPU_PARTIAL_ALLOC); 2220 stat(s, CPU_PARTIAL_ALLOC);
2221 c->freelist = NULL; 2221 c->freelist = NULL;
2222 goto redo; 2222 goto redo;
2223 } 2223 }
2224 2224
2225 /* Then do expensive stuff like retrieving pages from the partial lists */ 2225 /* Then do expensive stuff like retrieving pages from the partial lists */
2226 object = get_partial(s, gfpflags, node, c); 2226 object = get_partial(s, gfpflags, node, c);
2227 2227
2228 if (unlikely(!object)) { 2228 if (unlikely(!object)) {
2229 2229
2230 object = new_slab_objects(s, gfpflags, node, &c); 2230 object = new_slab_objects(s, gfpflags, node, &c);
2231 2231
2232 if (unlikely(!object)) { 2232 if (unlikely(!object)) {
2233 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) 2233 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2234 slab_out_of_memory(s, gfpflags, node); 2234 slab_out_of_memory(s, gfpflags, node);
2235 2235
2236 local_irq_restore(flags); 2236 local_irq_restore(flags);
2237 return NULL; 2237 return NULL;
2238 } 2238 }
2239 } 2239 }
2240 2240
2241 if (likely(!kmem_cache_debug(s))) 2241 if (likely(!kmem_cache_debug(s)))
2242 goto load_freelist; 2242 goto load_freelist;
2243 2243
2244 /* Only entered in the debug case */ 2244 /* Only entered in the debug case */
2245 if (!alloc_debug_processing(s, c->page, object, addr)) 2245 if (!alloc_debug_processing(s, c->page, object, addr))
2246 goto new_slab; /* Slab failed checks. Next slab needed */ 2246 goto new_slab; /* Slab failed checks. Next slab needed */
2247 2247
2248 c->freelist = get_freepointer(s, object); 2248 c->freelist = get_freepointer(s, object);
2249 deactivate_slab(s, c); 2249 deactivate_slab(s, c);
2250 c->node = NUMA_NO_NODE; 2250 c->node = NUMA_NO_NODE;
2251 local_irq_restore(flags); 2251 local_irq_restore(flags);
2252 return object; 2252 return object;
2253 } 2253 }
2254 2254
2255 /* 2255 /*
2256 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) 2256 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
2257 * have the fastpath folded into their functions. So no function call 2257 * have the fastpath folded into their functions. So no function call
2258 * overhead for requests that can be satisfied on the fastpath. 2258 * overhead for requests that can be satisfied on the fastpath.
2259 * 2259 *
2260 * The fastpath works by first checking if the lockless freelist can be used. 2260 * The fastpath works by first checking if the lockless freelist can be used.
2261 * If not then __slab_alloc is called for slow processing. 2261 * If not then __slab_alloc is called for slow processing.
2262 * 2262 *
2263 * Otherwise we can simply pick the next object from the lockless free list. 2263 * Otherwise we can simply pick the next object from the lockless free list.
2264 */ 2264 */
2265 static __always_inline void *slab_alloc(struct kmem_cache *s, 2265 static __always_inline void *slab_alloc(struct kmem_cache *s,
2266 gfp_t gfpflags, int node, unsigned long addr) 2266 gfp_t gfpflags, int node, unsigned long addr)
2267 { 2267 {
2268 void **object; 2268 void **object;
2269 struct kmem_cache_cpu *c; 2269 struct kmem_cache_cpu *c;
2270 unsigned long tid; 2270 unsigned long tid;
2271 2271
2272 if (slab_pre_alloc_hook(s, gfpflags)) 2272 if (slab_pre_alloc_hook(s, gfpflags))
2273 return NULL; 2273 return NULL;
2274 2274
2275 redo: 2275 redo:
2276 2276
2277 /* 2277 /*
2278 * Must read kmem_cache cpu data via this cpu ptr. Preemption is 2278 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
2279 * enabled. We may switch back and forth between cpus while 2279 * enabled. We may switch back and forth between cpus while
2280 * reading from one cpu area. That does not matter as long 2280 * reading from one cpu area. That does not matter as long
2281 * as we end up on the original cpu again when doing the cmpxchg. 2281 * as we end up on the original cpu again when doing the cmpxchg.
2282 */ 2282 */
2283 c = __this_cpu_ptr(s->cpu_slab); 2283 c = __this_cpu_ptr(s->cpu_slab);
2284 2284
2285 /* 2285 /*
2286 * The transaction ids are globally unique per cpu and per operation on 2286 * The transaction ids are globally unique per cpu and per operation on
2287 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double 2287 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
2288 * occurs on the right processor and that there was no operation on the 2288 * occurs on the right processor and that there was no operation on the
2289 * linked list in between. 2289 * linked list in between.
2290 */ 2290 */
2291 tid = c->tid; 2291 tid = c->tid;
2292 barrier(); 2292 barrier();
2293 2293
2294 object = c->freelist; 2294 object = c->freelist;
2295 if (unlikely(!object || !node_match(c, node))) 2295 if (unlikely(!object || !node_match(c, node)))
2296 2296
2297 object = __slab_alloc(s, gfpflags, node, addr, c); 2297 object = __slab_alloc(s, gfpflags, node, addr, c);
2298 2298
2299 else { 2299 else {
2300 /* 2300 /*
2301 * The cmpxchg will only match if there was no additional 2301 * The cmpxchg will only match if there was no additional
2302 * operation and if we are on the right processor. 2302 * operation and if we are on the right processor.
2303 * 2303 *
2304 * The cmpxchg does the following atomically (without lock semantics!) 2304 * The cmpxchg does the following atomically (without lock semantics!)
2305 * 1. Relocate first pointer to the current per cpu area. 2305 * 1. Relocate first pointer to the current per cpu area.
2306 * 2. Verify that tid and freelist have not been changed 2306 * 2. Verify that tid and freelist have not been changed
2307 * 3. If they were not changed replace tid and freelist 2307 * 3. If they were not changed replace tid and freelist
2308 * 2308 *
2309 * Since this is without lock semantics the protection is only against 2309 * Since this is without lock semantics the protection is only against
2310 * code executing on this cpu *not* from access by other cpus. 2310 * code executing on this cpu *not* from access by other cpus.
2311 */ 2311 */
2312 if (unlikely(!irqsafe_cpu_cmpxchg_double( 2312 if (unlikely(!irqsafe_cpu_cmpxchg_double(
2313 s->cpu_slab->freelist, s->cpu_slab->tid, 2313 s->cpu_slab->freelist, s->cpu_slab->tid,
2314 object, tid, 2314 object, tid,
2315 get_freepointer_safe(s, object), next_tid(tid)))) { 2315 get_freepointer_safe(s, object), next_tid(tid)))) {
2316 2316
2317 note_cmpxchg_failure("slab_alloc", s, tid); 2317 note_cmpxchg_failure("slab_alloc", s, tid);
2318 goto redo; 2318 goto redo;
2319 } 2319 }
2320 stat(s, ALLOC_FASTPATH); 2320 stat(s, ALLOC_FASTPATH);
2321 } 2321 }
2322 2322
2323 if (unlikely(gfpflags & __GFP_ZERO) && object) 2323 if (unlikely(gfpflags & __GFP_ZERO) && object)
2324 memset(object, 0, s->objsize); 2324 memset(object, 0, s->objsize);
2325 2325
2326 slab_post_alloc_hook(s, gfpflags, object); 2326 slab_post_alloc_hook(s, gfpflags, object);
2327 2327
2328 return object; 2328 return object;
2329 } 2329 }
2330 2330
2331 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 2331 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2332 { 2332 {
2333 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2333 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2334 2334
2335 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); 2335 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
2336 2336
2337 return ret; 2337 return ret;
2338 } 2338 }
2339 EXPORT_SYMBOL(kmem_cache_alloc); 2339 EXPORT_SYMBOL(kmem_cache_alloc);
2340 2340
2341 #ifdef CONFIG_TRACING 2341 #ifdef CONFIG_TRACING
2342 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) 2342 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2343 { 2343 {
2344 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2344 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2345 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); 2345 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2346 return ret; 2346 return ret;
2347 } 2347 }
2348 EXPORT_SYMBOL(kmem_cache_alloc_trace); 2348 EXPORT_SYMBOL(kmem_cache_alloc_trace);
2349 2349
2350 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 2350 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2351 { 2351 {
2352 void *ret = kmalloc_order(size, flags, order); 2352 void *ret = kmalloc_order(size, flags, order);
2353 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 2353 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2354 return ret; 2354 return ret;
2355 } 2355 }
2356 EXPORT_SYMBOL(kmalloc_order_trace); 2356 EXPORT_SYMBOL(kmalloc_order_trace);
2357 #endif 2357 #endif
2358 2358
2359 #ifdef CONFIG_NUMA 2359 #ifdef CONFIG_NUMA
2360 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 2360 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2361 { 2361 {
2362 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2362 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2363 2363
2364 trace_kmem_cache_alloc_node(_RET_IP_, ret, 2364 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2365 s->objsize, s->size, gfpflags, node); 2365 s->objsize, s->size, gfpflags, node);
2366 2366
2367 return ret; 2367 return ret;
2368 } 2368 }
2369 EXPORT_SYMBOL(kmem_cache_alloc_node); 2369 EXPORT_SYMBOL(kmem_cache_alloc_node);
2370 2370
2371 #ifdef CONFIG_TRACING 2371 #ifdef CONFIG_TRACING
2372 void *kmem_cache_alloc_node_trace(struct kmem_cache *s, 2372 void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2373 gfp_t gfpflags, 2373 gfp_t gfpflags,
2374 int node, size_t size) 2374 int node, size_t size)
2375 { 2375 {
2376 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2376 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2377 2377
2378 trace_kmalloc_node(_RET_IP_, ret, 2378 trace_kmalloc_node(_RET_IP_, ret,
2379 size, s->size, gfpflags, node); 2379 size, s->size, gfpflags, node);
2380 return ret; 2380 return ret;
2381 } 2381 }
2382 EXPORT_SYMBOL(kmem_cache_alloc_node_trace); 2382 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2383 #endif 2383 #endif
2384 #endif 2384 #endif
2385 2385
2386 /* 2386 /*
2387 * Slow patch handling. This may still be called frequently since objects 2387 * Slow patch handling. This may still be called frequently since objects
2388 * have a longer lifetime than the cpu slabs in most processing loads. 2388 * have a longer lifetime than the cpu slabs in most processing loads.
2389 * 2389 *
2390 * So we still attempt to reduce cache line usage. Just take the slab 2390 * So we still attempt to reduce cache line usage. Just take the slab
2391 * lock and free the item. If there is no additional partial page 2391 * lock and free the item. If there is no additional partial page
2392 * handling required then we can return immediately. 2392 * handling required then we can return immediately.
2393 */ 2393 */
2394 static void __slab_free(struct kmem_cache *s, struct page *page, 2394 static void __slab_free(struct kmem_cache *s, struct page *page,
2395 void *x, unsigned long addr) 2395 void *x, unsigned long addr)
2396 { 2396 {
2397 void *prior; 2397 void *prior;
2398 void **object = (void *)x; 2398 void **object = (void *)x;
2399 int was_frozen; 2399 int was_frozen;
2400 int inuse; 2400 int inuse;
2401 struct page new; 2401 struct page new;
2402 unsigned long counters; 2402 unsigned long counters;
2403 struct kmem_cache_node *n = NULL; 2403 struct kmem_cache_node *n = NULL;
2404 unsigned long uninitialized_var(flags); 2404 unsigned long uninitialized_var(flags);
2405 2405
2406 stat(s, FREE_SLOWPATH); 2406 stat(s, FREE_SLOWPATH);
2407 2407
2408 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) 2408 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2409 return; 2409 return;
2410 2410
2411 do { 2411 do {
2412 prior = page->freelist; 2412 prior = page->freelist;
2413 counters = page->counters; 2413 counters = page->counters;
2414 set_freepointer(s, object, prior); 2414 set_freepointer(s, object, prior);
2415 new.counters = counters; 2415 new.counters = counters;
2416 was_frozen = new.frozen; 2416 was_frozen = new.frozen;
2417 new.inuse--; 2417 new.inuse--;
2418 if ((!new.inuse || !prior) && !was_frozen && !n) { 2418 if ((!new.inuse || !prior) && !was_frozen && !n) {
2419 2419
2420 if (!kmem_cache_debug(s) && !prior) 2420 if (!kmem_cache_debug(s) && !prior)
2421 2421
2422 /* 2422 /*
2423 * Slab was on no list before and will be partially empty 2423 * Slab was on no list before and will be partially empty
2424 * We can defer the list move and instead freeze it. 2424 * We can defer the list move and instead freeze it.
2425 */ 2425 */
2426 new.frozen = 1; 2426 new.frozen = 1;
2427 2427
2428 else { /* Needs to be taken off a list */ 2428 else { /* Needs to be taken off a list */
2429 2429
2430 n = get_node(s, page_to_nid(page)); 2430 n = get_node(s, page_to_nid(page));
2431 /* 2431 /*
2432 * Speculatively acquire the list_lock. 2432 * Speculatively acquire the list_lock.
2433 * If the cmpxchg does not succeed then we may 2433 * If the cmpxchg does not succeed then we may
2434 * drop the list_lock without any processing. 2434 * drop the list_lock without any processing.
2435 * 2435 *
2436 * Otherwise the list_lock will synchronize with 2436 * Otherwise the list_lock will synchronize with
2437 * other processors updating the list of slabs. 2437 * other processors updating the list of slabs.
2438 */ 2438 */
2439 spin_lock_irqsave(&n->list_lock, flags); 2439 spin_lock_irqsave(&n->list_lock, flags);
2440 2440
2441 } 2441 }
2442 } 2442 }
2443 inuse = new.inuse; 2443 inuse = new.inuse;
2444 2444
2445 } while (!cmpxchg_double_slab(s, page, 2445 } while (!cmpxchg_double_slab(s, page,
2446 prior, counters, 2446 prior, counters,
2447 object, new.counters, 2447 object, new.counters,
2448 "__slab_free")); 2448 "__slab_free"));
2449 2449
2450 if (likely(!n)) { 2450 if (likely(!n)) {
2451 2451
2452 /* 2452 /*
2453 * If we just froze the page then put it onto the 2453 * If we just froze the page then put it onto the
2454 * per cpu partial list. 2454 * per cpu partial list.
2455 */ 2455 */
2456 if (new.frozen && !was_frozen) 2456 if (new.frozen && !was_frozen)
2457 put_cpu_partial(s, page, 1); 2457 put_cpu_partial(s, page, 1);
2458 2458
2459 /* 2459 /*
2460 * The list lock was not taken therefore no list 2460 * The list lock was not taken therefore no list
2461 * activity can be necessary. 2461 * activity can be necessary.
2462 */ 2462 */
2463 if (was_frozen) 2463 if (was_frozen)
2464 stat(s, FREE_FROZEN); 2464 stat(s, FREE_FROZEN);
2465 return; 2465 return;
2466 } 2466 }
2467 2467
2468 /* 2468 /*
2469 * was_frozen may have been set after we acquired the list_lock in 2469 * was_frozen may have been set after we acquired the list_lock in
2470 * an earlier loop. So we need to check it here again. 2470 * an earlier loop. So we need to check it here again.
2471 */ 2471 */
2472 if (was_frozen) 2472 if (was_frozen)
2473 stat(s, FREE_FROZEN); 2473 stat(s, FREE_FROZEN);
2474 else { 2474 else {
2475 if (unlikely(!inuse && n->nr_partial > s->min_partial)) 2475 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2476 goto slab_empty; 2476 goto slab_empty;
2477 2477
2478 /* 2478 /*
2479 * Objects left in the slab. If it was not on the partial list before 2479 * Objects left in the slab. If it was not on the partial list before
2480 * then add it. 2480 * then add it.
2481 */ 2481 */
2482 if (unlikely(!prior)) { 2482 if (unlikely(!prior)) {
2483 remove_full(s, page); 2483 remove_full(s, page);
2484 add_partial(n, page, DEACTIVATE_TO_TAIL); 2484 add_partial(n, page, DEACTIVATE_TO_TAIL);
2485 stat(s, FREE_ADD_PARTIAL); 2485 stat(s, FREE_ADD_PARTIAL);
2486 } 2486 }
2487 } 2487 }
2488 spin_unlock_irqrestore(&n->list_lock, flags); 2488 spin_unlock_irqrestore(&n->list_lock, flags);
2489 return; 2489 return;
2490 2490
2491 slab_empty: 2491 slab_empty:
2492 if (prior) { 2492 if (prior) {
2493 /* 2493 /*
2494 * Slab on the partial list. 2494 * Slab on the partial list.
2495 */ 2495 */
2496 remove_partial(n, page); 2496 remove_partial(n, page);
2497 stat(s, FREE_REMOVE_PARTIAL); 2497 stat(s, FREE_REMOVE_PARTIAL);
2498 } else 2498 } else
2499 /* Slab must be on the full list */ 2499 /* Slab must be on the full list */
2500 remove_full(s, page); 2500 remove_full(s, page);
2501 2501
2502 spin_unlock_irqrestore(&n->list_lock, flags); 2502 spin_unlock_irqrestore(&n->list_lock, flags);
2503 stat(s, FREE_SLAB); 2503 stat(s, FREE_SLAB);
2504 discard_slab(s, page); 2504 discard_slab(s, page);
2505 } 2505 }
2506 2506
2507 /* 2507 /*
2508 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that 2508 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
2509 * can perform fastpath freeing without additional function calls. 2509 * can perform fastpath freeing without additional function calls.
2510 * 2510 *
2511 * The fastpath is only possible if we are freeing to the current cpu slab 2511 * The fastpath is only possible if we are freeing to the current cpu slab
2512 * of this processor. This typically the case if we have just allocated 2512 * of this processor. This typically the case if we have just allocated
2513 * the item before. 2513 * the item before.
2514 * 2514 *
2515 * If fastpath is not possible then fall back to __slab_free where we deal 2515 * If fastpath is not possible then fall back to __slab_free where we deal
2516 * with all sorts of special processing. 2516 * with all sorts of special processing.
2517 */ 2517 */
2518 static __always_inline void slab_free(struct kmem_cache *s, 2518 static __always_inline void slab_free(struct kmem_cache *s,
2519 struct page *page, void *x, unsigned long addr) 2519 struct page *page, void *x, unsigned long addr)
2520 { 2520 {
2521 void **object = (void *)x; 2521 void **object = (void *)x;
2522 struct kmem_cache_cpu *c; 2522 struct kmem_cache_cpu *c;
2523 unsigned long tid; 2523 unsigned long tid;
2524 2524
2525 slab_free_hook(s, x); 2525 slab_free_hook(s, x);
2526 2526
2527 redo: 2527 redo:
2528 /* 2528 /*
2529 * Determine the currently cpus per cpu slab. 2529 * Determine the currently cpus per cpu slab.
2530 * The cpu may change afterward. However that does not matter since 2530 * The cpu may change afterward. However that does not matter since
2531 * data is retrieved via this pointer. If we are on the same cpu 2531 * data is retrieved via this pointer. If we are on the same cpu
2532 * during the cmpxchg then the free will succedd. 2532 * during the cmpxchg then the free will succedd.
2533 */ 2533 */
2534 c = __this_cpu_ptr(s->cpu_slab); 2534 c = __this_cpu_ptr(s->cpu_slab);
2535 2535
2536 tid = c->tid; 2536 tid = c->tid;
2537 barrier(); 2537 barrier();
2538 2538
2539 if (likely(page == c->page)) { 2539 if (likely(page == c->page)) {
2540 set_freepointer(s, object, c->freelist); 2540 set_freepointer(s, object, c->freelist);
2541 2541
2542 if (unlikely(!irqsafe_cpu_cmpxchg_double( 2542 if (unlikely(!irqsafe_cpu_cmpxchg_double(
2543 s->cpu_slab->freelist, s->cpu_slab->tid, 2543 s->cpu_slab->freelist, s->cpu_slab->tid,
2544 c->freelist, tid, 2544 c->freelist, tid,
2545 object, next_tid(tid)))) { 2545 object, next_tid(tid)))) {
2546 2546
2547 note_cmpxchg_failure("slab_free", s, tid); 2547 note_cmpxchg_failure("slab_free", s, tid);
2548 goto redo; 2548 goto redo;
2549 } 2549 }
2550 stat(s, FREE_FASTPATH); 2550 stat(s, FREE_FASTPATH);
2551 } else 2551 } else
2552 __slab_free(s, page, x, addr); 2552 __slab_free(s, page, x, addr);
2553 2553
2554 } 2554 }
2555 2555
2556 void kmem_cache_free(struct kmem_cache *s, void *x) 2556 void kmem_cache_free(struct kmem_cache *s, void *x)
2557 { 2557 {
2558 struct page *page; 2558 struct page *page;
2559 2559
2560 page = virt_to_head_page(x); 2560 page = virt_to_head_page(x);
2561 2561
2562 slab_free(s, page, x, _RET_IP_); 2562 slab_free(s, page, x, _RET_IP_);
2563 2563
2564 trace_kmem_cache_free(_RET_IP_, x); 2564 trace_kmem_cache_free(_RET_IP_, x);
2565 } 2565 }
2566 EXPORT_SYMBOL(kmem_cache_free); 2566 EXPORT_SYMBOL(kmem_cache_free);
2567 2567
2568 /* 2568 /*
2569 * Object placement in a slab is made very easy because we always start at 2569 * Object placement in a slab is made very easy because we always start at
2570 * offset 0. If we tune the size of the object to the alignment then we can 2570 * offset 0. If we tune the size of the object to the alignment then we can
2571 * get the required alignment by putting one properly sized object after 2571 * get the required alignment by putting one properly sized object after
2572 * another. 2572 * another.
2573 * 2573 *
2574 * Notice that the allocation order determines the sizes of the per cpu 2574 * Notice that the allocation order determines the sizes of the per cpu
2575 * caches. Each processor has always one slab available for allocations. 2575 * caches. Each processor has always one slab available for allocations.
2576 * Increasing the allocation order reduces the number of times that slabs 2576 * Increasing the allocation order reduces the number of times that slabs
2577 * must be moved on and off the partial lists and is therefore a factor in 2577 * must be moved on and off the partial lists and is therefore a factor in
2578 * locking overhead. 2578 * locking overhead.
2579 */ 2579 */
2580 2580
2581 /* 2581 /*
2582 * Mininum / Maximum order of slab pages. This influences locking overhead 2582 * Mininum / Maximum order of slab pages. This influences locking overhead
2583 * and slab fragmentation. A higher order reduces the number of partial slabs 2583 * and slab fragmentation. A higher order reduces the number of partial slabs
2584 * and increases the number of allocations possible without having to 2584 * and increases the number of allocations possible without having to
2585 * take the list_lock. 2585 * take the list_lock.
2586 */ 2586 */
2587 static int slub_min_order; 2587 static int slub_min_order;
2588 static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; 2588 static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2589 static int slub_min_objects; 2589 static int slub_min_objects;
2590 2590
2591 /* 2591 /*
2592 * Merge control. If this is set then no merging of slab caches will occur. 2592 * Merge control. If this is set then no merging of slab caches will occur.
2593 * (Could be removed. This was introduced to pacify the merge skeptics.) 2593 * (Could be removed. This was introduced to pacify the merge skeptics.)
2594 */ 2594 */
2595 static int slub_nomerge; 2595 static int slub_nomerge;
2596 2596
2597 /* 2597 /*
2598 * Calculate the order of allocation given an slab object size. 2598 * Calculate the order of allocation given an slab object size.
2599 * 2599 *
2600 * The order of allocation has significant impact on performance and other 2600 * The order of allocation has significant impact on performance and other
2601 * system components. Generally order 0 allocations should be preferred since 2601 * system components. Generally order 0 allocations should be preferred since
2602 * order 0 does not cause fragmentation in the page allocator. Larger objects 2602 * order 0 does not cause fragmentation in the page allocator. Larger objects
2603 * be problematic to put into order 0 slabs because there may be too much 2603 * be problematic to put into order 0 slabs because there may be too much
2604 * unused space left. We go to a higher order if more than 1/16th of the slab 2604 * unused space left. We go to a higher order if more than 1/16th of the slab
2605 * would be wasted. 2605 * would be wasted.
2606 * 2606 *
2607 * In order to reach satisfactory performance we must ensure that a minimum 2607 * In order to reach satisfactory performance we must ensure that a minimum
2608 * number of objects is in one slab. Otherwise we may generate too much 2608 * number of objects is in one slab. Otherwise we may generate too much
2609 * activity on the partial lists which requires taking the list_lock. This is 2609 * activity on the partial lists which requires taking the list_lock. This is
2610 * less a concern for large slabs though which are rarely used. 2610 * less a concern for large slabs though which are rarely used.
2611 * 2611 *
2612 * slub_max_order specifies the order where we begin to stop considering the 2612 * slub_max_order specifies the order where we begin to stop considering the
2613 * number of objects in a slab as critical. If we reach slub_max_order then 2613 * number of objects in a slab as critical. If we reach slub_max_order then
2614 * we try to keep the page order as low as possible. So we accept more waste 2614 * we try to keep the page order as low as possible. So we accept more waste
2615 * of space in favor of a small page order. 2615 * of space in favor of a small page order.
2616 * 2616 *
2617 * Higher order allocations also allow the placement of more objects in a 2617 * Higher order allocations also allow the placement of more objects in a
2618 * slab and thereby reduce object handling overhead. If the user has 2618 * slab and thereby reduce object handling overhead. If the user has
2619 * requested a higher mininum order then we start with that one instead of 2619 * requested a higher mininum order then we start with that one instead of
2620 * the smallest order which will fit the object. 2620 * the smallest order which will fit the object.
2621 */ 2621 */
2622 static inline int slab_order(int size, int min_objects, 2622 static inline int slab_order(int size, int min_objects,
2623 int max_order, int fract_leftover, int reserved) 2623 int max_order, int fract_leftover, int reserved)
2624 { 2624 {
2625 int order; 2625 int order;
2626 int rem; 2626 int rem;
2627 int min_order = slub_min_order; 2627 int min_order = slub_min_order;
2628 2628
2629 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) 2629 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2630 return get_order(size * MAX_OBJS_PER_PAGE) - 1; 2630 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2631 2631
2632 for (order = max(min_order, 2632 for (order = max(min_order,
2633 fls(min_objects * size - 1) - PAGE_SHIFT); 2633 fls(min_objects * size - 1) - PAGE_SHIFT);
2634 order <= max_order; order++) { 2634 order <= max_order; order++) {
2635 2635
2636 unsigned long slab_size = PAGE_SIZE << order; 2636 unsigned long slab_size = PAGE_SIZE << order;
2637 2637
2638 if (slab_size < min_objects * size + reserved) 2638 if (slab_size < min_objects * size + reserved)
2639 continue; 2639 continue;
2640 2640
2641 rem = (slab_size - reserved) % size; 2641 rem = (slab_size - reserved) % size;
2642 2642
2643 if (rem <= slab_size / fract_leftover) 2643 if (rem <= slab_size / fract_leftover)
2644 break; 2644 break;
2645 2645
2646 } 2646 }
2647 2647
2648 return order; 2648 return order;
2649 } 2649 }
2650 2650
2651 static inline int calculate_order(int size, int reserved) 2651 static inline int calculate_order(int size, int reserved)
2652 { 2652 {
2653 int order; 2653 int order;
2654 int min_objects; 2654 int min_objects;
2655 int fraction; 2655 int fraction;
2656 int max_objects; 2656 int max_objects;
2657 2657
2658 /* 2658 /*
2659 * Attempt to find best configuration for a slab. This 2659 * Attempt to find best configuration for a slab. This
2660 * works by first attempting to generate a layout with 2660 * works by first attempting to generate a layout with
2661 * the best configuration and backing off gradually. 2661 * the best configuration and backing off gradually.
2662 * 2662 *
2663 * First we reduce the acceptable waste in a slab. Then 2663 * First we reduce the acceptable waste in a slab. Then
2664 * we reduce the minimum objects required in a slab. 2664 * we reduce the minimum objects required in a slab.
2665 */ 2665 */
2666 min_objects = slub_min_objects; 2666 min_objects = slub_min_objects;
2667 if (!min_objects) 2667 if (!min_objects)
2668 min_objects = 4 * (fls(nr_cpu_ids) + 1); 2668 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2669 max_objects = order_objects(slub_max_order, size, reserved); 2669 max_objects = order_objects(slub_max_order, size, reserved);
2670 min_objects = min(min_objects, max_objects); 2670 min_objects = min(min_objects, max_objects);
2671 2671
2672 while (min_objects > 1) { 2672 while (min_objects > 1) {
2673 fraction = 16; 2673 fraction = 16;
2674 while (fraction >= 4) { 2674 while (fraction >= 4) {
2675 order = slab_order(size, min_objects, 2675 order = slab_order(size, min_objects,
2676 slub_max_order, fraction, reserved); 2676 slub_max_order, fraction, reserved);
2677 if (order <= slub_max_order) 2677 if (order <= slub_max_order)
2678 return order; 2678 return order;
2679 fraction /= 2; 2679 fraction /= 2;
2680 } 2680 }
2681 min_objects--; 2681 min_objects--;
2682 } 2682 }
2683 2683
2684 /* 2684 /*
2685 * We were unable to place multiple objects in a slab. Now 2685 * We were unable to place multiple objects in a slab. Now
2686 * lets see if we can place a single object there. 2686 * lets see if we can place a single object there.
2687 */ 2687 */
2688 order = slab_order(size, 1, slub_max_order, 1, reserved); 2688 order = slab_order(size, 1, slub_max_order, 1, reserved);
2689 if (order <= slub_max_order) 2689 if (order <= slub_max_order)
2690 return order; 2690 return order;
2691 2691
2692 /* 2692 /*
2693 * Doh this slab cannot be placed using slub_max_order. 2693 * Doh this slab cannot be placed using slub_max_order.
2694 */ 2694 */
2695 order = slab_order(size, 1, MAX_ORDER, 1, reserved); 2695 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2696 if (order < MAX_ORDER) 2696 if (order < MAX_ORDER)
2697 return order; 2697 return order;
2698 return -ENOSYS; 2698 return -ENOSYS;
2699 } 2699 }
2700 2700
2701 /* 2701 /*
2702 * Figure out what the alignment of the objects will be. 2702 * Figure out what the alignment of the objects will be.
2703 */ 2703 */
2704 static unsigned long calculate_alignment(unsigned long flags, 2704 static unsigned long calculate_alignment(unsigned long flags,
2705 unsigned long align, unsigned long size) 2705 unsigned long align, unsigned long size)
2706 { 2706 {
2707 /* 2707 /*
2708 * If the user wants hardware cache aligned objects then follow that 2708 * If the user wants hardware cache aligned objects then follow that
2709 * suggestion if the object is sufficiently large. 2709 * suggestion if the object is sufficiently large.
2710 * 2710 *
2711 * The hardware cache alignment cannot override the specified 2711 * The hardware cache alignment cannot override the specified
2712 * alignment though. If that is greater then use it. 2712 * alignment though. If that is greater then use it.
2713 */ 2713 */
2714 if (flags & SLAB_HWCACHE_ALIGN) { 2714 if (flags & SLAB_HWCACHE_ALIGN) {
2715 unsigned long ralign = cache_line_size(); 2715 unsigned long ralign = cache_line_size();
2716 while (size <= ralign / 2) 2716 while (size <= ralign / 2)
2717 ralign /= 2; 2717 ralign /= 2;
2718 align = max(align, ralign); 2718 align = max(align, ralign);
2719 } 2719 }
2720 2720
2721 if (align < ARCH_SLAB_MINALIGN) 2721 if (align < ARCH_SLAB_MINALIGN)
2722 align = ARCH_SLAB_MINALIGN; 2722 align = ARCH_SLAB_MINALIGN;
2723 2723
2724 return ALIGN(align, sizeof(void *)); 2724 return ALIGN(align, sizeof(void *));
2725 } 2725 }
2726 2726
2727 static void 2727 static void
2728 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) 2728 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2729 { 2729 {
2730 n->nr_partial = 0; 2730 n->nr_partial = 0;
2731 spin_lock_init(&n->list_lock); 2731 spin_lock_init(&n->list_lock);
2732 INIT_LIST_HEAD(&n->partial); 2732 INIT_LIST_HEAD(&n->partial);
2733 #ifdef CONFIG_SLUB_DEBUG 2733 #ifdef CONFIG_SLUB_DEBUG
2734 atomic_long_set(&n->nr_slabs, 0); 2734 atomic_long_set(&n->nr_slabs, 0);
2735 atomic_long_set(&n->total_objects, 0); 2735 atomic_long_set(&n->total_objects, 0);
2736 INIT_LIST_HEAD(&n->full); 2736 INIT_LIST_HEAD(&n->full);
2737 #endif 2737 #endif
2738 } 2738 }
2739 2739
2740 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) 2740 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2741 { 2741 {
2742 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 2742 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2743 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); 2743 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2744 2744
2745 /* 2745 /*
2746 * Must align to double word boundary for the double cmpxchg 2746 * Must align to double word boundary for the double cmpxchg
2747 * instructions to work; see __pcpu_double_call_return_bool(). 2747 * instructions to work; see __pcpu_double_call_return_bool().
2748 */ 2748 */
2749 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2749 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2750 2 * sizeof(void *)); 2750 2 * sizeof(void *));
2751 2751
2752 if (!s->cpu_slab) 2752 if (!s->cpu_slab)
2753 return 0; 2753 return 0;
2754 2754
2755 init_kmem_cache_cpus(s); 2755 init_kmem_cache_cpus(s);
2756 2756
2757 return 1; 2757 return 1;
2758 } 2758 }
2759 2759
2760 static struct kmem_cache *kmem_cache_node; 2760 static struct kmem_cache *kmem_cache_node;
2761 2761
2762 /* 2762 /*
2763 * No kmalloc_node yet so do it by hand. We know that this is the first 2763 * No kmalloc_node yet so do it by hand. We know that this is the first
2764 * slab on the node for this slabcache. There are no concurrent accesses 2764 * slab on the node for this slabcache. There are no concurrent accesses
2765 * possible. 2765 * possible.
2766 * 2766 *
2767 * Note that this function only works on the kmalloc_node_cache 2767 * Note that this function only works on the kmalloc_node_cache
2768 * when allocating for the kmalloc_node_cache. This is used for bootstrapping 2768 * when allocating for the kmalloc_node_cache. This is used for bootstrapping
2769 * memory on a fresh node that has no slab structures yet. 2769 * memory on a fresh node that has no slab structures yet.
2770 */ 2770 */
2771 static void early_kmem_cache_node_alloc(int node) 2771 static void early_kmem_cache_node_alloc(int node)
2772 { 2772 {
2773 struct page *page; 2773 struct page *page;
2774 struct kmem_cache_node *n; 2774 struct kmem_cache_node *n;
2775 2775
2776 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); 2776 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2777 2777
2778 page = new_slab(kmem_cache_node, GFP_NOWAIT, node); 2778 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2779 2779
2780 BUG_ON(!page); 2780 BUG_ON(!page);
2781 if (page_to_nid(page) != node) { 2781 if (page_to_nid(page) != node) {
2782 printk(KERN_ERR "SLUB: Unable to allocate memory from " 2782 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2783 "node %d\n", node); 2783 "node %d\n", node);
2784 printk(KERN_ERR "SLUB: Allocating a useless per node structure " 2784 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2785 "in order to be able to continue\n"); 2785 "in order to be able to continue\n");
2786 } 2786 }
2787 2787
2788 n = page->freelist; 2788 n = page->freelist;
2789 BUG_ON(!n); 2789 BUG_ON(!n);
2790 page->freelist = get_freepointer(kmem_cache_node, n); 2790 page->freelist = get_freepointer(kmem_cache_node, n);
2791 page->inuse = 1; 2791 page->inuse = 1;
2792 page->frozen = 0; 2792 page->frozen = 0;
2793 kmem_cache_node->node[node] = n; 2793 kmem_cache_node->node[node] = n;
2794 #ifdef CONFIG_SLUB_DEBUG 2794 #ifdef CONFIG_SLUB_DEBUG
2795 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); 2795 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2796 init_tracking(kmem_cache_node, n); 2796 init_tracking(kmem_cache_node, n);
2797 #endif 2797 #endif
2798 init_kmem_cache_node(n, kmem_cache_node); 2798 init_kmem_cache_node(n, kmem_cache_node);
2799 inc_slabs_node(kmem_cache_node, node, page->objects); 2799 inc_slabs_node(kmem_cache_node, node, page->objects);
2800 2800
2801 add_partial(n, page, DEACTIVATE_TO_HEAD); 2801 add_partial(n, page, DEACTIVATE_TO_HEAD);
2802 } 2802 }
2803 2803
2804 static void free_kmem_cache_nodes(struct kmem_cache *s) 2804 static void free_kmem_cache_nodes(struct kmem_cache *s)
2805 { 2805 {
2806 int node; 2806 int node;
2807 2807
2808 for_each_node_state(node, N_NORMAL_MEMORY) { 2808 for_each_node_state(node, N_NORMAL_MEMORY) {
2809 struct kmem_cache_node *n = s->node[node]; 2809 struct kmem_cache_node *n = s->node[node];
2810 2810
2811 if (n) 2811 if (n)
2812 kmem_cache_free(kmem_cache_node, n); 2812 kmem_cache_free(kmem_cache_node, n);
2813 2813
2814 s->node[node] = NULL; 2814 s->node[node] = NULL;
2815 } 2815 }
2816 } 2816 }
2817 2817
2818 static int init_kmem_cache_nodes(struct kmem_cache *s) 2818 static int init_kmem_cache_nodes(struct kmem_cache *s)
2819 { 2819 {
2820 int node; 2820 int node;
2821 2821
2822 for_each_node_state(node, N_NORMAL_MEMORY) { 2822 for_each_node_state(node, N_NORMAL_MEMORY) {
2823 struct kmem_cache_node *n; 2823 struct kmem_cache_node *n;
2824 2824
2825 if (slab_state == DOWN) { 2825 if (slab_state == DOWN) {
2826 early_kmem_cache_node_alloc(node); 2826 early_kmem_cache_node_alloc(node);
2827 continue; 2827 continue;
2828 } 2828 }
2829 n = kmem_cache_alloc_node(kmem_cache_node, 2829 n = kmem_cache_alloc_node(kmem_cache_node,
2830 GFP_KERNEL, node); 2830 GFP_KERNEL, node);
2831 2831
2832 if (!n) { 2832 if (!n) {
2833 free_kmem_cache_nodes(s); 2833 free_kmem_cache_nodes(s);
2834 return 0; 2834 return 0;
2835 } 2835 }
2836 2836
2837 s->node[node] = n; 2837 s->node[node] = n;
2838 init_kmem_cache_node(n, s); 2838 init_kmem_cache_node(n, s);
2839 } 2839 }
2840 return 1; 2840 return 1;
2841 } 2841 }
2842 2842
2843 static void set_min_partial(struct kmem_cache *s, unsigned long min) 2843 static void set_min_partial(struct kmem_cache *s, unsigned long min)
2844 { 2844 {
2845 if (min < MIN_PARTIAL) 2845 if (min < MIN_PARTIAL)
2846 min = MIN_PARTIAL; 2846 min = MIN_PARTIAL;
2847 else if (min > MAX_PARTIAL) 2847 else if (min > MAX_PARTIAL)
2848 min = MAX_PARTIAL; 2848 min = MAX_PARTIAL;
2849 s->min_partial = min; 2849 s->min_partial = min;
2850 } 2850 }
2851 2851
2852 /* 2852 /*
2853 * calculate_sizes() determines the order and the distribution of data within 2853 * calculate_sizes() determines the order and the distribution of data within
2854 * a slab object. 2854 * a slab object.
2855 */ 2855 */
2856 static int calculate_sizes(struct kmem_cache *s, int forced_order) 2856 static int calculate_sizes(struct kmem_cache *s, int forced_order)
2857 { 2857 {
2858 unsigned long flags = s->flags; 2858 unsigned long flags = s->flags;
2859 unsigned long size = s->objsize; 2859 unsigned long size = s->objsize;
2860 unsigned long align = s->align; 2860 unsigned long align = s->align;
2861 int order; 2861 int order;
2862 2862
2863 /* 2863 /*
2864 * Round up object size to the next word boundary. We can only 2864 * Round up object size to the next word boundary. We can only
2865 * place the free pointer at word boundaries and this determines 2865 * place the free pointer at word boundaries and this determines
2866 * the possible location of the free pointer. 2866 * the possible location of the free pointer.
2867 */ 2867 */
2868 size = ALIGN(size, sizeof(void *)); 2868 size = ALIGN(size, sizeof(void *));
2869 2869
2870 #ifdef CONFIG_SLUB_DEBUG 2870 #ifdef CONFIG_SLUB_DEBUG
2871 /* 2871 /*
2872 * Determine if we can poison the object itself. If the user of 2872 * Determine if we can poison the object itself. If the user of
2873 * the slab may touch the object after free or before allocation 2873 * the slab may touch the object after free or before allocation
2874 * then we should never poison the object itself. 2874 * then we should never poison the object itself.
2875 */ 2875 */
2876 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && 2876 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2877 !s->ctor) 2877 !s->ctor)
2878 s->flags |= __OBJECT_POISON; 2878 s->flags |= __OBJECT_POISON;
2879 else 2879 else
2880 s->flags &= ~__OBJECT_POISON; 2880 s->flags &= ~__OBJECT_POISON;
2881 2881
2882 2882
2883 /* 2883 /*
2884 * If we are Redzoning then check if there is some space between the 2884 * If we are Redzoning then check if there is some space between the
2885 * end of the object and the free pointer. If not then add an 2885 * end of the object and the free pointer. If not then add an
2886 * additional word to have some bytes to store Redzone information. 2886 * additional word to have some bytes to store Redzone information.
2887 */ 2887 */
2888 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 2888 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2889 size += sizeof(void *); 2889 size += sizeof(void *);
2890 #endif 2890 #endif
2891 2891
2892 /* 2892 /*
2893 * With that we have determined the number of bytes in actual use 2893 * With that we have determined the number of bytes in actual use
2894 * by the object. This is the potential offset to the free pointer. 2894 * by the object. This is the potential offset to the free pointer.
2895 */ 2895 */
2896 s->inuse = size; 2896 s->inuse = size;
2897 2897
2898 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || 2898 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2899 s->ctor)) { 2899 s->ctor)) {
2900 /* 2900 /*
2901 * Relocate free pointer after the object if it is not 2901 * Relocate free pointer after the object if it is not
2902 * permitted to overwrite the first word of the object on 2902 * permitted to overwrite the first word of the object on
2903 * kmem_cache_free. 2903 * kmem_cache_free.
2904 * 2904 *
2905 * This is the case if we do RCU, have a constructor or 2905 * This is the case if we do RCU, have a constructor or
2906 * destructor or are poisoning the objects. 2906 * destructor or are poisoning the objects.
2907 */ 2907 */
2908 s->offset = size; 2908 s->offset = size;
2909 size += sizeof(void *); 2909 size += sizeof(void *);
2910 } 2910 }
2911 2911
2912 #ifdef CONFIG_SLUB_DEBUG 2912 #ifdef CONFIG_SLUB_DEBUG
2913 if (flags & SLAB_STORE_USER) 2913 if (flags & SLAB_STORE_USER)
2914 /* 2914 /*
2915 * Need to store information about allocs and frees after 2915 * Need to store information about allocs and frees after
2916 * the object. 2916 * the object.
2917 */ 2917 */
2918 size += 2 * sizeof(struct track); 2918 size += 2 * sizeof(struct track);
2919 2919
2920 if (flags & SLAB_RED_ZONE) 2920 if (flags & SLAB_RED_ZONE)
2921 /* 2921 /*
2922 * Add some empty padding so that we can catch 2922 * Add some empty padding so that we can catch
2923 * overwrites from earlier objects rather than let 2923 * overwrites from earlier objects rather than let
2924 * tracking information or the free pointer be 2924 * tracking information or the free pointer be
2925 * corrupted if a user writes before the start 2925 * corrupted if a user writes before the start
2926 * of the object. 2926 * of the object.
2927 */ 2927 */
2928 size += sizeof(void *); 2928 size += sizeof(void *);
2929 #endif 2929 #endif
2930 2930
2931 /* 2931 /*
2932 * Determine the alignment based on various parameters that the 2932 * Determine the alignment based on various parameters that the
2933 * user specified and the dynamic determination of cache line size 2933 * user specified and the dynamic determination of cache line size
2934 * on bootup. 2934 * on bootup.
2935 */ 2935 */
2936 align = calculate_alignment(flags, align, s->objsize); 2936 align = calculate_alignment(flags, align, s->objsize);
2937 s->align = align; 2937 s->align = align;
2938 2938
2939 /* 2939 /*
2940 * SLUB stores one object immediately after another beginning from 2940 * SLUB stores one object immediately after another beginning from
2941 * offset 0. In order to align the objects we have to simply size 2941 * offset 0. In order to align the objects we have to simply size
2942 * each object to conform to the alignment. 2942 * each object to conform to the alignment.
2943 */ 2943 */
2944 size = ALIGN(size, align); 2944 size = ALIGN(size, align);
2945 s->size = size; 2945 s->size = size;
2946 if (forced_order >= 0) 2946 if (forced_order >= 0)
2947 order = forced_order; 2947 order = forced_order;
2948 else 2948 else
2949 order = calculate_order(size, s->reserved); 2949 order = calculate_order(size, s->reserved);
2950 2950
2951 if (order < 0) 2951 if (order < 0)
2952 return 0; 2952 return 0;
2953 2953
2954 s->allocflags = 0; 2954 s->allocflags = 0;
2955 if (order) 2955 if (order)
2956 s->allocflags |= __GFP_COMP; 2956 s->allocflags |= __GFP_COMP;
2957 2957
2958 if (s->flags & SLAB_CACHE_DMA) 2958 if (s->flags & SLAB_CACHE_DMA)
2959 s->allocflags |= SLUB_DMA; 2959 s->allocflags |= SLUB_DMA;
2960 2960
2961 if (s->flags & SLAB_RECLAIM_ACCOUNT) 2961 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2962 s->allocflags |= __GFP_RECLAIMABLE; 2962 s->allocflags |= __GFP_RECLAIMABLE;
2963 2963
2964 /* 2964 /*
2965 * Determine the number of objects per slab 2965 * Determine the number of objects per slab
2966 */ 2966 */
2967 s->oo = oo_make(order, size, s->reserved); 2967 s->oo = oo_make(order, size, s->reserved);
2968 s->min = oo_make(get_order(size), size, s->reserved); 2968 s->min = oo_make(get_order(size), size, s->reserved);
2969 if (oo_objects(s->oo) > oo_objects(s->max)) 2969 if (oo_objects(s->oo) > oo_objects(s->max))
2970 s->max = s->oo; 2970 s->max = s->oo;
2971 2971
2972 return !!oo_objects(s->oo); 2972 return !!oo_objects(s->oo);
2973 2973
2974 } 2974 }
2975 2975
2976 static int kmem_cache_open(struct kmem_cache *s, 2976 static int kmem_cache_open(struct kmem_cache *s,
2977 const char *name, size_t size, 2977 const char *name, size_t size,
2978 size_t align, unsigned long flags, 2978 size_t align, unsigned long flags,
2979 void (*ctor)(void *)) 2979 void (*ctor)(void *))
2980 { 2980 {
2981 memset(s, 0, kmem_size); 2981 memset(s, 0, kmem_size);
2982 s->name = name; 2982 s->name = name;
2983 s->ctor = ctor; 2983 s->ctor = ctor;
2984 s->objsize = size; 2984 s->objsize = size;
2985 s->align = align; 2985 s->align = align;
2986 s->flags = kmem_cache_flags(size, flags, name, ctor); 2986 s->flags = kmem_cache_flags(size, flags, name, ctor);
2987 s->reserved = 0; 2987 s->reserved = 0;
2988 2988
2989 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) 2989 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
2990 s->reserved = sizeof(struct rcu_head); 2990 s->reserved = sizeof(struct rcu_head);
2991 2991
2992 if (!calculate_sizes(s, -1)) 2992 if (!calculate_sizes(s, -1))
2993 goto error; 2993 goto error;
2994 if (disable_higher_order_debug) { 2994 if (disable_higher_order_debug) {
2995 /* 2995 /*
2996 * Disable debugging flags that store metadata if the min slab 2996 * Disable debugging flags that store metadata if the min slab
2997 * order increased. 2997 * order increased.
2998 */ 2998 */
2999 if (get_order(s->size) > get_order(s->objsize)) { 2999 if (get_order(s->size) > get_order(s->objsize)) {
3000 s->flags &= ~DEBUG_METADATA_FLAGS; 3000 s->flags &= ~DEBUG_METADATA_FLAGS;
3001 s->offset = 0; 3001 s->offset = 0;
3002 if (!calculate_sizes(s, -1)) 3002 if (!calculate_sizes(s, -1))
3003 goto error; 3003 goto error;
3004 } 3004 }
3005 } 3005 }
3006 3006
3007 #ifdef CONFIG_CMPXCHG_DOUBLE 3007 #ifdef CONFIG_CMPXCHG_DOUBLE
3008 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) 3008 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3009 /* Enable fast mode */ 3009 /* Enable fast mode */
3010 s->flags |= __CMPXCHG_DOUBLE; 3010 s->flags |= __CMPXCHG_DOUBLE;
3011 #endif 3011 #endif
3012 3012
3013 /* 3013 /*
3014 * The larger the object size is, the more pages we want on the partial 3014 * The larger the object size is, the more pages we want on the partial
3015 * list to avoid pounding the page allocator excessively. 3015 * list to avoid pounding the page allocator excessively.
3016 */ 3016 */
3017 set_min_partial(s, ilog2(s->size) / 2); 3017 set_min_partial(s, ilog2(s->size) / 2);
3018 3018
3019 /* 3019 /*
3020 * cpu_partial determined the maximum number of objects kept in the 3020 * cpu_partial determined the maximum number of objects kept in the
3021 * per cpu partial lists of a processor. 3021 * per cpu partial lists of a processor.
3022 * 3022 *
3023 * Per cpu partial lists mainly contain slabs that just have one 3023 * Per cpu partial lists mainly contain slabs that just have one
3024 * object freed. If they are used for allocation then they can be 3024 * object freed. If they are used for allocation then they can be
3025 * filled up again with minimal effort. The slab will never hit the 3025 * filled up again with minimal effort. The slab will never hit the
3026 * per node partial lists and therefore no locking will be required. 3026 * per node partial lists and therefore no locking will be required.
3027 * 3027 *
3028 * This setting also determines 3028 * This setting also determines
3029 * 3029 *
3030 * A) The number of objects from per cpu partial slabs dumped to the 3030 * A) The number of objects from per cpu partial slabs dumped to the
3031 * per node list when we reach the limit. 3031 * per node list when we reach the limit.
3032 * B) The number of objects in cpu partial slabs to extract from the 3032 * B) The number of objects in cpu partial slabs to extract from the
3033 * per node list when we run out of per cpu objects. We only fetch 50% 3033 * per node list when we run out of per cpu objects. We only fetch 50%
3034 * to keep some capacity around for frees. 3034 * to keep some capacity around for frees.
3035 */ 3035 */
3036 if (s->size >= PAGE_SIZE) 3036 if (s->size >= PAGE_SIZE)
3037 s->cpu_partial = 2; 3037 s->cpu_partial = 2;
3038 else if (s->size >= 1024) 3038 else if (s->size >= 1024)
3039 s->cpu_partial = 6; 3039 s->cpu_partial = 6;
3040 else if (s->size >= 256) 3040 else if (s->size >= 256)
3041 s->cpu_partial = 13; 3041 s->cpu_partial = 13;
3042 else 3042 else
3043 s->cpu_partial = 30; 3043 s->cpu_partial = 30;
3044 3044
3045 s->refcount = 1; 3045 s->refcount = 1;
3046 #ifdef CONFIG_NUMA 3046 #ifdef CONFIG_NUMA
3047 s->remote_node_defrag_ratio = 1000; 3047 s->remote_node_defrag_ratio = 1000;
3048 #endif 3048 #endif
3049 if (!init_kmem_cache_nodes(s)) 3049 if (!init_kmem_cache_nodes(s))
3050 goto error; 3050 goto error;
3051 3051
3052 if (alloc_kmem_cache_cpus(s)) 3052 if (alloc_kmem_cache_cpus(s))
3053 return 1; 3053 return 1;
3054 3054
3055 free_kmem_cache_nodes(s); 3055 free_kmem_cache_nodes(s);
3056 error: 3056 error:
3057 if (flags & SLAB_PANIC) 3057 if (flags & SLAB_PANIC)
3058 panic("Cannot create slab %s size=%lu realsize=%u " 3058 panic("Cannot create slab %s size=%lu realsize=%u "
3059 "order=%u offset=%u flags=%lx\n", 3059 "order=%u offset=%u flags=%lx\n",
3060 s->name, (unsigned long)size, s->size, oo_order(s->oo), 3060 s->name, (unsigned long)size, s->size, oo_order(s->oo),
3061 s->offset, flags); 3061 s->offset, flags);
3062 return 0; 3062 return 0;
3063 } 3063 }
3064 3064
3065 /* 3065 /*
3066 * Determine the size of a slab object 3066 * Determine the size of a slab object
3067 */ 3067 */
3068 unsigned int kmem_cache_size(struct kmem_cache *s) 3068 unsigned int kmem_cache_size(struct kmem_cache *s)
3069 { 3069 {
3070 return s->objsize; 3070 return s->objsize;
3071 } 3071 }
3072 EXPORT_SYMBOL(kmem_cache_size); 3072 EXPORT_SYMBOL(kmem_cache_size);
3073 3073
3074 static void list_slab_objects(struct kmem_cache *s, struct page *page, 3074 static void list_slab_objects(struct kmem_cache *s, struct page *page,
3075 const char *text) 3075 const char *text)
3076 { 3076 {
3077 #ifdef CONFIG_SLUB_DEBUG 3077 #ifdef CONFIG_SLUB_DEBUG
3078 void *addr = page_address(page); 3078 void *addr = page_address(page);
3079 void *p; 3079 void *p;
3080 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * 3080 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3081 sizeof(long), GFP_ATOMIC); 3081 sizeof(long), GFP_ATOMIC);
3082 if (!map) 3082 if (!map)
3083 return; 3083 return;
3084 slab_err(s, page, "%s", text); 3084 slab_err(s, page, "%s", text);
3085 slab_lock(page); 3085 slab_lock(page);
3086 3086
3087 get_map(s, page, map); 3087 get_map(s, page, map);
3088 for_each_object(p, s, addr, page->objects) { 3088 for_each_object(p, s, addr, page->objects) {
3089 3089
3090 if (!test_bit(slab_index(p, s, addr), map)) { 3090 if (!test_bit(slab_index(p, s, addr), map)) {
3091 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n", 3091 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3092 p, p - addr); 3092 p, p - addr);
3093 print_tracking(s, p); 3093 print_tracking(s, p);
3094 } 3094 }
3095 } 3095 }
3096 slab_unlock(page); 3096 slab_unlock(page);
3097 kfree(map); 3097 kfree(map);
3098 #endif 3098 #endif
3099 } 3099 }
3100 3100
3101 /* 3101 /*
3102 * Attempt to free all partial slabs on a node. 3102 * Attempt to free all partial slabs on a node.
3103 * This is called from kmem_cache_close(). We must be the last thread 3103 * This is called from kmem_cache_close(). We must be the last thread
3104 * using the cache and therefore we do not need to lock anymore. 3104 * using the cache and therefore we do not need to lock anymore.
3105 */ 3105 */
3106 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) 3106 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3107 { 3107 {
3108 struct page *page, *h; 3108 struct page *page, *h;
3109 3109
3110 list_for_each_entry_safe(page, h, &n->partial, lru) { 3110 list_for_each_entry_safe(page, h, &n->partial, lru) {
3111 if (!page->inuse) { 3111 if (!page->inuse) {
3112 remove_partial(n, page); 3112 remove_partial(n, page);
3113 discard_slab(s, page); 3113 discard_slab(s, page);
3114 } else { 3114 } else {
3115 list_slab_objects(s, page, 3115 list_slab_objects(s, page,
3116 "Objects remaining on kmem_cache_close()"); 3116 "Objects remaining on kmem_cache_close()");
3117 } 3117 }
3118 } 3118 }
3119 } 3119 }
3120 3120
3121 /* 3121 /*
3122 * Release all resources used by a slab cache. 3122 * Release all resources used by a slab cache.
3123 */ 3123 */
3124 static inline int kmem_cache_close(struct kmem_cache *s) 3124 static inline int kmem_cache_close(struct kmem_cache *s)
3125 { 3125 {
3126 int node; 3126 int node;
3127 3127
3128 flush_all(s); 3128 flush_all(s);
3129 free_percpu(s->cpu_slab); 3129 free_percpu(s->cpu_slab);
3130 /* Attempt to free all objects */ 3130 /* Attempt to free all objects */
3131 for_each_node_state(node, N_NORMAL_MEMORY) { 3131 for_each_node_state(node, N_NORMAL_MEMORY) {
3132 struct kmem_cache_node *n = get_node(s, node); 3132 struct kmem_cache_node *n = get_node(s, node);
3133 3133
3134 free_partial(s, n); 3134 free_partial(s, n);
3135 if (n->nr_partial || slabs_node(s, node)) 3135 if (n->nr_partial || slabs_node(s, node))
3136 return 1; 3136 return 1;
3137 } 3137 }
3138 free_kmem_cache_nodes(s); 3138 free_kmem_cache_nodes(s);
3139 return 0; 3139 return 0;
3140 } 3140 }
3141 3141
3142 /* 3142 /*
3143 * Close a cache and release the kmem_cache structure 3143 * Close a cache and release the kmem_cache structure
3144 * (must be used for caches created using kmem_cache_create) 3144 * (must be used for caches created using kmem_cache_create)
3145 */ 3145 */
3146 void kmem_cache_destroy(struct kmem_cache *s) 3146 void kmem_cache_destroy(struct kmem_cache *s)
3147 { 3147 {
3148 down_write(&slub_lock); 3148 down_write(&slub_lock);
3149 s->refcount--; 3149 s->refcount--;
3150 if (!s->refcount) { 3150 if (!s->refcount) {
3151 list_del(&s->list); 3151 list_del(&s->list);
3152 up_write(&slub_lock); 3152 up_write(&slub_lock);
3153 if (kmem_cache_close(s)) { 3153 if (kmem_cache_close(s)) {
3154 printk(KERN_ERR "SLUB %s: %s called for cache that " 3154 printk(KERN_ERR "SLUB %s: %s called for cache that "
3155 "still has objects.\n", s->name, __func__); 3155 "still has objects.\n", s->name, __func__);
3156 dump_stack(); 3156 dump_stack();
3157 } 3157 }
3158 if (s->flags & SLAB_DESTROY_BY_RCU) 3158 if (s->flags & SLAB_DESTROY_BY_RCU)
3159 rcu_barrier(); 3159 rcu_barrier();
3160 sysfs_slab_remove(s); 3160 sysfs_slab_remove(s);
3161 } else 3161 } else
3162 up_write(&slub_lock); 3162 up_write(&slub_lock);
3163 } 3163 }
3164 EXPORT_SYMBOL(kmem_cache_destroy); 3164 EXPORT_SYMBOL(kmem_cache_destroy);
3165 3165
3166 /******************************************************************** 3166 /********************************************************************
3167 * Kmalloc subsystem 3167 * Kmalloc subsystem
3168 *******************************************************************/ 3168 *******************************************************************/
3169 3169
3170 struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; 3170 struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3171 EXPORT_SYMBOL(kmalloc_caches); 3171 EXPORT_SYMBOL(kmalloc_caches);
3172 3172
3173 static struct kmem_cache *kmem_cache; 3173 static struct kmem_cache *kmem_cache;
3174 3174
3175 #ifdef CONFIG_ZONE_DMA 3175 #ifdef CONFIG_ZONE_DMA
3176 static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; 3176 static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3177 #endif 3177 #endif
3178 3178
3179 static int __init setup_slub_min_order(char *str) 3179 static int __init setup_slub_min_order(char *str)
3180 { 3180 {
3181 get_option(&str, &slub_min_order); 3181 get_option(&str, &slub_min_order);
3182 3182
3183 return 1; 3183 return 1;
3184 } 3184 }
3185 3185
3186 __setup("slub_min_order=", setup_slub_min_order); 3186 __setup("slub_min_order=", setup_slub_min_order);
3187 3187
3188 static int __init setup_slub_max_order(char *str) 3188 static int __init setup_slub_max_order(char *str)
3189 { 3189 {
3190 get_option(&str, &slub_max_order); 3190 get_option(&str, &slub_max_order);
3191 slub_max_order = min(slub_max_order, MAX_ORDER - 1); 3191 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3192 3192
3193 return 1; 3193 return 1;
3194 } 3194 }
3195 3195
3196 __setup("slub_max_order=", setup_slub_max_order); 3196 __setup("slub_max_order=", setup_slub_max_order);
3197 3197
3198 static int __init setup_slub_min_objects(char *str) 3198 static int __init setup_slub_min_objects(char *str)
3199 { 3199 {
3200 get_option(&str, &slub_min_objects); 3200 get_option(&str, &slub_min_objects);
3201 3201
3202 return 1; 3202 return 1;
3203 } 3203 }
3204 3204
3205 __setup("slub_min_objects=", setup_slub_min_objects); 3205 __setup("slub_min_objects=", setup_slub_min_objects);
3206 3206
3207 static int __init setup_slub_nomerge(char *str) 3207 static int __init setup_slub_nomerge(char *str)
3208 { 3208 {
3209 slub_nomerge = 1; 3209 slub_nomerge = 1;
3210 return 1; 3210 return 1;
3211 } 3211 }
3212 3212
3213 __setup("slub_nomerge", setup_slub_nomerge); 3213 __setup("slub_nomerge", setup_slub_nomerge);
3214 3214
3215 static struct kmem_cache *__init create_kmalloc_cache(const char *name, 3215 static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3216 int size, unsigned int flags) 3216 int size, unsigned int flags)
3217 { 3217 {
3218 struct kmem_cache *s; 3218 struct kmem_cache *s;
3219 3219
3220 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); 3220 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3221 3221
3222 /* 3222 /*
3223 * This function is called with IRQs disabled during early-boot on 3223 * This function is called with IRQs disabled during early-boot on
3224 * single CPU so there's no need to take slub_lock here. 3224 * single CPU so there's no need to take slub_lock here.
3225 */ 3225 */
3226 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, 3226 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3227 flags, NULL)) 3227 flags, NULL))
3228 goto panic; 3228 goto panic;
3229 3229
3230 list_add(&s->list, &slab_caches); 3230 list_add(&s->list, &slab_caches);
3231 return s; 3231 return s;
3232 3232
3233 panic: 3233 panic:
3234 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); 3234 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
3235 return NULL; 3235 return NULL;
3236 } 3236 }
3237 3237
3238 /* 3238 /*
3239 * Conversion table for small slabs sizes / 8 to the index in the 3239 * Conversion table for small slabs sizes / 8 to the index in the
3240 * kmalloc array. This is necessary for slabs < 192 since we have non power 3240 * kmalloc array. This is necessary for slabs < 192 since we have non power
3241 * of two cache sizes there. The size of larger slabs can be determined using 3241 * of two cache sizes there. The size of larger slabs can be determined using
3242 * fls. 3242 * fls.
3243 */ 3243 */
3244 static s8 size_index[24] = { 3244 static s8 size_index[24] = {
3245 3, /* 8 */ 3245 3, /* 8 */
3246 4, /* 16 */ 3246 4, /* 16 */
3247 5, /* 24 */ 3247 5, /* 24 */
3248 5, /* 32 */ 3248 5, /* 32 */
3249 6, /* 40 */ 3249 6, /* 40 */
3250 6, /* 48 */ 3250 6, /* 48 */
3251 6, /* 56 */ 3251 6, /* 56 */
3252 6, /* 64 */ 3252 6, /* 64 */
3253 1, /* 72 */ 3253 1, /* 72 */
3254 1, /* 80 */ 3254 1, /* 80 */
3255 1, /* 88 */ 3255 1, /* 88 */
3256 1, /* 96 */ 3256 1, /* 96 */
3257 7, /* 104 */ 3257 7, /* 104 */
3258 7, /* 112 */ 3258 7, /* 112 */
3259 7, /* 120 */ 3259 7, /* 120 */
3260 7, /* 128 */ 3260 7, /* 128 */
3261 2, /* 136 */ 3261 2, /* 136 */
3262 2, /* 144 */ 3262 2, /* 144 */
3263 2, /* 152 */ 3263 2, /* 152 */
3264 2, /* 160 */ 3264 2, /* 160 */
3265 2, /* 168 */ 3265 2, /* 168 */
3266 2, /* 176 */ 3266 2, /* 176 */
3267 2, /* 184 */ 3267 2, /* 184 */
3268 2 /* 192 */ 3268 2 /* 192 */
3269 }; 3269 };
3270 3270
3271 static inline int size_index_elem(size_t bytes) 3271 static inline int size_index_elem(size_t bytes)
3272 { 3272 {
3273 return (bytes - 1) / 8; 3273 return (bytes - 1) / 8;
3274 } 3274 }
3275 3275
3276 static struct kmem_cache *get_slab(size_t size, gfp_t flags) 3276 static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3277 { 3277 {
3278 int index; 3278 int index;
3279 3279
3280 if (size <= 192) { 3280 if (size <= 192) {
3281 if (!size) 3281 if (!size)
3282 return ZERO_SIZE_PTR; 3282 return ZERO_SIZE_PTR;
3283 3283
3284 index = size_index[size_index_elem(size)]; 3284 index = size_index[size_index_elem(size)];
3285 } else 3285 } else
3286 index = fls(size - 1); 3286 index = fls(size - 1);
3287 3287
3288 #ifdef CONFIG_ZONE_DMA 3288 #ifdef CONFIG_ZONE_DMA
3289 if (unlikely((flags & SLUB_DMA))) 3289 if (unlikely((flags & SLUB_DMA)))
3290 return kmalloc_dma_caches[index]; 3290 return kmalloc_dma_caches[index];
3291 3291
3292 #endif 3292 #endif
3293 return kmalloc_caches[index]; 3293 return kmalloc_caches[index];
3294 } 3294 }
3295 3295
3296 void *__kmalloc(size_t size, gfp_t flags) 3296 void *__kmalloc(size_t size, gfp_t flags)
3297 { 3297 {
3298 struct kmem_cache *s; 3298 struct kmem_cache *s;
3299 void *ret; 3299 void *ret;
3300 3300
3301 if (unlikely(size > SLUB_MAX_SIZE)) 3301 if (unlikely(size > SLUB_MAX_SIZE))
3302 return kmalloc_large(size, flags); 3302 return kmalloc_large(size, flags);
3303 3303
3304 s = get_slab(size, flags); 3304 s = get_slab(size, flags);
3305 3305
3306 if (unlikely(ZERO_OR_NULL_PTR(s))) 3306 if (unlikely(ZERO_OR_NULL_PTR(s)))
3307 return s; 3307 return s;
3308 3308
3309 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_); 3309 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
3310 3310
3311 trace_kmalloc(_RET_IP_, ret, size, s->size, flags); 3311 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3312 3312
3313 return ret; 3313 return ret;
3314 } 3314 }
3315 EXPORT_SYMBOL(__kmalloc); 3315 EXPORT_SYMBOL(__kmalloc);
3316 3316
3317 #ifdef CONFIG_NUMA 3317 #ifdef CONFIG_NUMA
3318 static void *kmalloc_large_node(size_t size, gfp_t flags, int node) 3318 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3319 { 3319 {
3320 struct page *page; 3320 struct page *page;
3321 void *ptr = NULL; 3321 void *ptr = NULL;
3322 3322
3323 flags |= __GFP_COMP | __GFP_NOTRACK; 3323 flags |= __GFP_COMP | __GFP_NOTRACK;
3324 page = alloc_pages_node(node, flags, get_order(size)); 3324 page = alloc_pages_node(node, flags, get_order(size));
3325 if (page) 3325 if (page)
3326 ptr = page_address(page); 3326 ptr = page_address(page);
3327 3327
3328 kmemleak_alloc(ptr, size, 1, flags); 3328 kmemleak_alloc(ptr, size, 1, flags);
3329 return ptr; 3329 return ptr;
3330 } 3330 }
3331 3331
3332 void *__kmalloc_node(size_t size, gfp_t flags, int node) 3332 void *__kmalloc_node(size_t size, gfp_t flags, int node)
3333 { 3333 {
3334 struct kmem_cache *s; 3334 struct kmem_cache *s;
3335 void *ret; 3335 void *ret;
3336 3336
3337 if (unlikely(size > SLUB_MAX_SIZE)) { 3337 if (unlikely(size > SLUB_MAX_SIZE)) {
3338 ret = kmalloc_large_node(size, flags, node); 3338 ret = kmalloc_large_node(size, flags, node);
3339 3339
3340 trace_kmalloc_node(_RET_IP_, ret, 3340 trace_kmalloc_node(_RET_IP_, ret,
3341 size, PAGE_SIZE << get_order(size), 3341 size, PAGE_SIZE << get_order(size),
3342 flags, node); 3342 flags, node);
3343 3343
3344 return ret; 3344 return ret;
3345 } 3345 }
3346 3346
3347 s = get_slab(size, flags); 3347 s = get_slab(size, flags);
3348 3348
3349 if (unlikely(ZERO_OR_NULL_PTR(s))) 3349 if (unlikely(ZERO_OR_NULL_PTR(s)))
3350 return s; 3350 return s;
3351 3351
3352 ret = slab_alloc(s, flags, node, _RET_IP_); 3352 ret = slab_alloc(s, flags, node, _RET_IP_);
3353 3353
3354 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); 3354 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3355 3355
3356 return ret; 3356 return ret;
3357 } 3357 }
3358 EXPORT_SYMBOL(__kmalloc_node); 3358 EXPORT_SYMBOL(__kmalloc_node);
3359 #endif 3359 #endif
3360 3360
3361 size_t ksize(const void *object) 3361 size_t ksize(const void *object)
3362 { 3362 {
3363 struct page *page; 3363 struct page *page;
3364 3364
3365 if (unlikely(object == ZERO_SIZE_PTR)) 3365 if (unlikely(object == ZERO_SIZE_PTR))
3366 return 0; 3366 return 0;
3367 3367
3368 page = virt_to_head_page(object); 3368 page = virt_to_head_page(object);
3369 3369
3370 if (unlikely(!PageSlab(page))) { 3370 if (unlikely(!PageSlab(page))) {
3371 WARN_ON(!PageCompound(page)); 3371 WARN_ON(!PageCompound(page));
3372 return PAGE_SIZE << compound_order(page); 3372 return PAGE_SIZE << compound_order(page);
3373 } 3373 }
3374 3374
3375 return slab_ksize(page->slab); 3375 return slab_ksize(page->slab);
3376 } 3376 }
3377 EXPORT_SYMBOL(ksize); 3377 EXPORT_SYMBOL(ksize);
3378 3378
3379 #ifdef CONFIG_SLUB_DEBUG 3379 #ifdef CONFIG_SLUB_DEBUG
3380 bool verify_mem_not_deleted(const void *x) 3380 bool verify_mem_not_deleted(const void *x)
3381 { 3381 {
3382 struct page *page; 3382 struct page *page;
3383 void *object = (void *)x; 3383 void *object = (void *)x;
3384 unsigned long flags; 3384 unsigned long flags;
3385 bool rv; 3385 bool rv;
3386 3386
3387 if (unlikely(ZERO_OR_NULL_PTR(x))) 3387 if (unlikely(ZERO_OR_NULL_PTR(x)))
3388 return false; 3388 return false;
3389 3389
3390 local_irq_save(flags); 3390 local_irq_save(flags);
3391 3391
3392 page = virt_to_head_page(x); 3392 page = virt_to_head_page(x);
3393 if (unlikely(!PageSlab(page))) { 3393 if (unlikely(!PageSlab(page))) {
3394 /* maybe it was from stack? */ 3394 /* maybe it was from stack? */
3395 rv = true; 3395 rv = true;
3396 goto out_unlock; 3396 goto out_unlock;
3397 } 3397 }
3398 3398
3399 slab_lock(page); 3399 slab_lock(page);
3400 if (on_freelist(page->slab, page, object)) { 3400 if (on_freelist(page->slab, page, object)) {
3401 object_err(page->slab, page, object, "Object is on free-list"); 3401 object_err(page->slab, page, object, "Object is on free-list");
3402 rv = false; 3402 rv = false;
3403 } else { 3403 } else {
3404 rv = true; 3404 rv = true;
3405 } 3405 }
3406 slab_unlock(page); 3406 slab_unlock(page);
3407 3407
3408 out_unlock: 3408 out_unlock:
3409 local_irq_restore(flags); 3409 local_irq_restore(flags);
3410 return rv; 3410 return rv;
3411 } 3411 }
3412 EXPORT_SYMBOL(verify_mem_not_deleted); 3412 EXPORT_SYMBOL(verify_mem_not_deleted);
3413 #endif 3413 #endif
3414 3414
3415 void kfree(const void *x) 3415 void kfree(const void *x)
3416 { 3416 {
3417 struct page *page; 3417 struct page *page;
3418 void *object = (void *)x; 3418 void *object = (void *)x;
3419 3419
3420 trace_kfree(_RET_IP_, x); 3420 trace_kfree(_RET_IP_, x);
3421 3421
3422 if (unlikely(ZERO_OR_NULL_PTR(x))) 3422 if (unlikely(ZERO_OR_NULL_PTR(x)))
3423 return; 3423 return;
3424 3424
3425 page = virt_to_head_page(x); 3425 page = virt_to_head_page(x);
3426 if (unlikely(!PageSlab(page))) { 3426 if (unlikely(!PageSlab(page))) {
3427 BUG_ON(!PageCompound(page)); 3427 BUG_ON(!PageCompound(page));
3428 kmemleak_free(x); 3428 kmemleak_free(x);
3429 put_page(page); 3429 put_page(page);
3430 return; 3430 return;
3431 } 3431 }
3432 slab_free(page->slab, page, object, _RET_IP_); 3432 slab_free(page->slab, page, object, _RET_IP_);
3433 } 3433 }
3434 EXPORT_SYMBOL(kfree); 3434 EXPORT_SYMBOL(kfree);
3435 3435
3436 /* 3436 /*
3437 * kmem_cache_shrink removes empty slabs from the partial lists and sorts 3437 * kmem_cache_shrink removes empty slabs from the partial lists and sorts
3438 * the remaining slabs by the number of items in use. The slabs with the 3438 * the remaining slabs by the number of items in use. The slabs with the
3439 * most items in use come first. New allocations will then fill those up 3439 * most items in use come first. New allocations will then fill those up
3440 * and thus they can be removed from the partial lists. 3440 * and thus they can be removed from the partial lists.
3441 * 3441 *
3442 * The slabs with the least items are placed last. This results in them 3442 * The slabs with the least items are placed last. This results in them
3443 * being allocated from last increasing the chance that the last objects 3443 * being allocated from last increasing the chance that the last objects
3444 * are freed in them. 3444 * are freed in them.
3445 */ 3445 */
3446 int kmem_cache_shrink(struct kmem_cache *s) 3446 int kmem_cache_shrink(struct kmem_cache *s)
3447 { 3447 {
3448 int node; 3448 int node;
3449 int i; 3449 int i;
3450 struct kmem_cache_node *n; 3450 struct kmem_cache_node *n;
3451 struct page *page; 3451 struct page *page;
3452 struct page *t; 3452 struct page *t;
3453 int objects = oo_objects(s->max); 3453 int objects = oo_objects(s->max);
3454 struct list_head *slabs_by_inuse = 3454 struct list_head *slabs_by_inuse =
3455 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); 3455 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3456 unsigned long flags; 3456 unsigned long flags;
3457 3457
3458 if (!slabs_by_inuse) 3458 if (!slabs_by_inuse)
3459 return -ENOMEM; 3459 return -ENOMEM;
3460 3460
3461 flush_all(s); 3461 flush_all(s);
3462 for_each_node_state(node, N_NORMAL_MEMORY) { 3462 for_each_node_state(node, N_NORMAL_MEMORY) {
3463 n = get_node(s, node); 3463 n = get_node(s, node);
3464 3464
3465 if (!n->nr_partial) 3465 if (!n->nr_partial)
3466 continue; 3466 continue;
3467 3467
3468 for (i = 0; i < objects; i++) 3468 for (i = 0; i < objects; i++)
3469 INIT_LIST_HEAD(slabs_by_inuse + i); 3469 INIT_LIST_HEAD(slabs_by_inuse + i);
3470 3470
3471 spin_lock_irqsave(&n->list_lock, flags); 3471 spin_lock_irqsave(&n->list_lock, flags);
3472 3472
3473 /* 3473 /*
3474 * Build lists indexed by the items in use in each slab. 3474 * Build lists indexed by the items in use in each slab.
3475 * 3475 *
3476 * Note that concurrent frees may occur while we hold the 3476 * Note that concurrent frees may occur while we hold the
3477 * list_lock. page->inuse here is the upper limit. 3477 * list_lock. page->inuse here is the upper limit.
3478 */ 3478 */
3479 list_for_each_entry_safe(page, t, &n->partial, lru) { 3479 list_for_each_entry_safe(page, t, &n->partial, lru) {
3480 list_move(&page->lru, slabs_by_inuse + page->inuse); 3480 list_move(&page->lru, slabs_by_inuse + page->inuse);
3481 if (!page->inuse) 3481 if (!page->inuse)
3482 n->nr_partial--; 3482 n->nr_partial--;
3483 } 3483 }
3484 3484
3485 /* 3485 /*
3486 * Rebuild the partial list with the slabs filled up most 3486 * Rebuild the partial list with the slabs filled up most
3487 * first and the least used slabs at the end. 3487 * first and the least used slabs at the end.
3488 */ 3488 */
3489 for (i = objects - 1; i > 0; i--) 3489 for (i = objects - 1; i > 0; i--)
3490 list_splice(slabs_by_inuse + i, n->partial.prev); 3490 list_splice(slabs_by_inuse + i, n->partial.prev);
3491 3491
3492 spin_unlock_irqrestore(&n->list_lock, flags); 3492 spin_unlock_irqrestore(&n->list_lock, flags);
3493 3493
3494 /* Release empty slabs */ 3494 /* Release empty slabs */
3495 list_for_each_entry_safe(page, t, slabs_by_inuse, lru) 3495 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3496 discard_slab(s, page); 3496 discard_slab(s, page);
3497 } 3497 }
3498 3498
3499 kfree(slabs_by_inuse); 3499 kfree(slabs_by_inuse);
3500 return 0; 3500 return 0;
3501 } 3501 }
3502 EXPORT_SYMBOL(kmem_cache_shrink); 3502 EXPORT_SYMBOL(kmem_cache_shrink);
3503 3503
3504 #if defined(CONFIG_MEMORY_HOTPLUG) 3504 #if defined(CONFIG_MEMORY_HOTPLUG)
3505 static int slab_mem_going_offline_callback(void *arg) 3505 static int slab_mem_going_offline_callback(void *arg)
3506 { 3506 {
3507 struct kmem_cache *s; 3507 struct kmem_cache *s;
3508 3508
3509 down_read(&slub_lock); 3509 down_read(&slub_lock);
3510 list_for_each_entry(s, &slab_caches, list) 3510 list_for_each_entry(s, &slab_caches, list)
3511 kmem_cache_shrink(s); 3511 kmem_cache_shrink(s);
3512 up_read(&slub_lock); 3512 up_read(&slub_lock);
3513 3513
3514 return 0; 3514 return 0;
3515 } 3515 }
3516 3516
3517 static void slab_mem_offline_callback(void *arg) 3517 static void slab_mem_offline_callback(void *arg)
3518 { 3518 {
3519 struct kmem_cache_node *n; 3519 struct kmem_cache_node *n;
3520 struct kmem_cache *s; 3520 struct kmem_cache *s;
3521 struct memory_notify *marg = arg; 3521 struct memory_notify *marg = arg;
3522 int offline_node; 3522 int offline_node;
3523 3523
3524 offline_node = marg->status_change_nid; 3524 offline_node = marg->status_change_nid;
3525 3525
3526 /* 3526 /*
3527 * If the node still has available memory. we need kmem_cache_node 3527 * If the node still has available memory. we need kmem_cache_node
3528 * for it yet. 3528 * for it yet.
3529 */ 3529 */
3530 if (offline_node < 0) 3530 if (offline_node < 0)
3531 return; 3531 return;
3532 3532
3533 down_read(&slub_lock); 3533 down_read(&slub_lock);
3534 list_for_each_entry(s, &slab_caches, list) { 3534 list_for_each_entry(s, &slab_caches, list) {
3535 n = get_node(s, offline_node); 3535 n = get_node(s, offline_node);
3536 if (n) { 3536 if (n) {
3537 /* 3537 /*
3538 * if n->nr_slabs > 0, slabs still exist on the node 3538 * if n->nr_slabs > 0, slabs still exist on the node
3539 * that is going down. We were unable to free them, 3539 * that is going down. We were unable to free them,
3540 * and offline_pages() function shouldn't call this 3540 * and offline_pages() function shouldn't call this
3541 * callback. So, we must fail. 3541 * callback. So, we must fail.
3542 */ 3542 */
3543 BUG_ON(slabs_node(s, offline_node)); 3543 BUG_ON(slabs_node(s, offline_node));
3544 3544
3545 s->node[offline_node] = NULL; 3545 s->node[offline_node] = NULL;
3546 kmem_cache_free(kmem_cache_node, n); 3546 kmem_cache_free(kmem_cache_node, n);
3547 } 3547 }
3548 } 3548 }
3549 up_read(&slub_lock); 3549 up_read(&slub_lock);
3550 } 3550 }
3551 3551
3552 static int slab_mem_going_online_callback(void *arg) 3552 static int slab_mem_going_online_callback(void *arg)
3553 { 3553 {
3554 struct kmem_cache_node *n; 3554 struct kmem_cache_node *n;
3555 struct kmem_cache *s; 3555 struct kmem_cache *s;
3556 struct memory_notify *marg = arg; 3556 struct memory_notify *marg = arg;
3557 int nid = marg->status_change_nid; 3557 int nid = marg->status_change_nid;
3558 int ret = 0; 3558 int ret = 0;
3559 3559
3560 /* 3560 /*
3561 * If the node's memory is already available, then kmem_cache_node is 3561 * If the node's memory is already available, then kmem_cache_node is
3562 * already created. Nothing to do. 3562 * already created. Nothing to do.
3563 */ 3563 */
3564 if (nid < 0) 3564 if (nid < 0)
3565 return 0; 3565 return 0;
3566 3566
3567 /* 3567 /*
3568 * We are bringing a node online. No memory is available yet. We must 3568 * We are bringing a node online. No memory is available yet. We must
3569 * allocate a kmem_cache_node structure in order to bring the node 3569 * allocate a kmem_cache_node structure in order to bring the node
3570 * online. 3570 * online.
3571 */ 3571 */
3572 down_read(&slub_lock); 3572 down_read(&slub_lock);
3573 list_for_each_entry(s, &slab_caches, list) { 3573 list_for_each_entry(s, &slab_caches, list) {
3574 /* 3574 /*
3575 * XXX: kmem_cache_alloc_node will fallback to other nodes 3575 * XXX: kmem_cache_alloc_node will fallback to other nodes
3576 * since memory is not yet available from the node that 3576 * since memory is not yet available from the node that
3577 * is brought up. 3577 * is brought up.
3578 */ 3578 */
3579 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL); 3579 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3580 if (!n) { 3580 if (!n) {
3581 ret = -ENOMEM; 3581 ret = -ENOMEM;
3582 goto out; 3582 goto out;
3583 } 3583 }
3584 init_kmem_cache_node(n, s); 3584 init_kmem_cache_node(n, s);
3585 s->node[nid] = n; 3585 s->node[nid] = n;
3586 } 3586 }
3587 out: 3587 out:
3588 up_read(&slub_lock); 3588 up_read(&slub_lock);
3589 return ret; 3589 return ret;
3590 } 3590 }
3591 3591
3592 static int slab_memory_callback(struct notifier_block *self, 3592 static int slab_memory_callback(struct notifier_block *self,
3593 unsigned long action, void *arg) 3593 unsigned long action, void *arg)
3594 { 3594 {
3595 int ret = 0; 3595 int ret = 0;
3596 3596
3597 switch (action) { 3597 switch (action) {
3598 case MEM_GOING_ONLINE: 3598 case MEM_GOING_ONLINE:
3599 ret = slab_mem_going_online_callback(arg); 3599 ret = slab_mem_going_online_callback(arg);
3600 break; 3600 break;
3601 case MEM_GOING_OFFLINE: 3601 case MEM_GOING_OFFLINE:
3602 ret = slab_mem_going_offline_callback(arg); 3602 ret = slab_mem_going_offline_callback(arg);
3603 break; 3603 break;
3604 case MEM_OFFLINE: 3604 case MEM_OFFLINE:
3605 case MEM_CANCEL_ONLINE: 3605 case MEM_CANCEL_ONLINE:
3606 slab_mem_offline_callback(arg); 3606 slab_mem_offline_callback(arg);
3607 break; 3607 break;
3608 case MEM_ONLINE: 3608 case MEM_ONLINE:
3609 case MEM_CANCEL_OFFLINE: 3609 case MEM_CANCEL_OFFLINE:
3610 break; 3610 break;
3611 } 3611 }
3612 if (ret) 3612 if (ret)
3613 ret = notifier_from_errno(ret); 3613 ret = notifier_from_errno(ret);
3614 else 3614 else
3615 ret = NOTIFY_OK; 3615 ret = NOTIFY_OK;
3616 return ret; 3616 return ret;
3617 } 3617 }
3618 3618
3619 #endif /* CONFIG_MEMORY_HOTPLUG */ 3619 #endif /* CONFIG_MEMORY_HOTPLUG */
3620 3620
3621 /******************************************************************** 3621 /********************************************************************
3622 * Basic setup of slabs 3622 * Basic setup of slabs
3623 *******************************************************************/ 3623 *******************************************************************/
3624 3624
3625 /* 3625 /*
3626 * Used for early kmem_cache structures that were allocated using 3626 * Used for early kmem_cache structures that were allocated using
3627 * the page allocator 3627 * the page allocator
3628 */ 3628 */
3629 3629
3630 static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) 3630 static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3631 { 3631 {
3632 int node; 3632 int node;
3633 3633
3634 list_add(&s->list, &slab_caches); 3634 list_add(&s->list, &slab_caches);
3635 s->refcount = -1; 3635 s->refcount = -1;
3636 3636
3637 for_each_node_state(node, N_NORMAL_MEMORY) { 3637 for_each_node_state(node, N_NORMAL_MEMORY) {
3638 struct kmem_cache_node *n = get_node(s, node); 3638 struct kmem_cache_node *n = get_node(s, node);
3639 struct page *p; 3639 struct page *p;
3640 3640
3641 if (n) { 3641 if (n) {
3642 list_for_each_entry(p, &n->partial, lru) 3642 list_for_each_entry(p, &n->partial, lru)
3643 p->slab = s; 3643 p->slab = s;
3644 3644
3645 #ifdef CONFIG_SLUB_DEBUG 3645 #ifdef CONFIG_SLUB_DEBUG
3646 list_for_each_entry(p, &n->full, lru) 3646 list_for_each_entry(p, &n->full, lru)
3647 p->slab = s; 3647 p->slab = s;
3648 #endif 3648 #endif
3649 } 3649 }
3650 } 3650 }
3651 } 3651 }
3652 3652
3653 void __init kmem_cache_init(void) 3653 void __init kmem_cache_init(void)
3654 { 3654 {
3655 int i; 3655 int i;
3656 int caches = 0; 3656 int caches = 0;
3657 struct kmem_cache *temp_kmem_cache; 3657 struct kmem_cache *temp_kmem_cache;
3658 int order; 3658 int order;
3659 struct kmem_cache *temp_kmem_cache_node; 3659 struct kmem_cache *temp_kmem_cache_node;
3660 unsigned long kmalloc_size; 3660 unsigned long kmalloc_size;
3661 3661
3662 kmem_size = offsetof(struct kmem_cache, node) + 3662 kmem_size = offsetof(struct kmem_cache, node) +
3663 nr_node_ids * sizeof(struct kmem_cache_node *); 3663 nr_node_ids * sizeof(struct kmem_cache_node *);
3664 3664
3665 /* Allocate two kmem_caches from the page allocator */ 3665 /* Allocate two kmem_caches from the page allocator */
3666 kmalloc_size = ALIGN(kmem_size, cache_line_size()); 3666 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3667 order = get_order(2 * kmalloc_size); 3667 order = get_order(2 * kmalloc_size);
3668 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); 3668 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
3669 3669
3670 /* 3670 /*
3671 * Must first have the slab cache available for the allocations of the 3671 * Must first have the slab cache available for the allocations of the
3672 * struct kmem_cache_node's. There is special bootstrap code in 3672 * struct kmem_cache_node's. There is special bootstrap code in
3673 * kmem_cache_open for slab_state == DOWN. 3673 * kmem_cache_open for slab_state == DOWN.
3674 */ 3674 */
3675 kmem_cache_node = (void *)kmem_cache + kmalloc_size; 3675 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3676 3676
3677 kmem_cache_open(kmem_cache_node, "kmem_cache_node", 3677 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
3678 sizeof(struct kmem_cache_node), 3678 sizeof(struct kmem_cache_node),
3679 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 3679 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3680 3680
3681 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); 3681 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3682 3682
3683 /* Able to allocate the per node structures */ 3683 /* Able to allocate the per node structures */
3684 slab_state = PARTIAL; 3684 slab_state = PARTIAL;
3685 3685
3686 temp_kmem_cache = kmem_cache; 3686 temp_kmem_cache = kmem_cache;
3687 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, 3687 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
3688 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 3688 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3689 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); 3689 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3690 memcpy(kmem_cache, temp_kmem_cache, kmem_size); 3690 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3691 3691
3692 /* 3692 /*
3693 * Allocate kmem_cache_node properly from the kmem_cache slab. 3693 * Allocate kmem_cache_node properly from the kmem_cache slab.
3694 * kmem_cache_node is separately allocated so no need to 3694 * kmem_cache_node is separately allocated so no need to
3695 * update any list pointers. 3695 * update any list pointers.
3696 */ 3696 */
3697 temp_kmem_cache_node = kmem_cache_node; 3697 temp_kmem_cache_node = kmem_cache_node;
3698 3698
3699 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); 3699 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3700 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); 3700 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3701 3701
3702 kmem_cache_bootstrap_fixup(kmem_cache_node); 3702 kmem_cache_bootstrap_fixup(kmem_cache_node);
3703 3703
3704 caches++; 3704 caches++;
3705 kmem_cache_bootstrap_fixup(kmem_cache); 3705 kmem_cache_bootstrap_fixup(kmem_cache);
3706 caches++; 3706 caches++;
3707 /* Free temporary boot structure */ 3707 /* Free temporary boot structure */
3708 free_pages((unsigned long)temp_kmem_cache, order); 3708 free_pages((unsigned long)temp_kmem_cache, order);
3709 3709
3710 /* Now we can use the kmem_cache to allocate kmalloc slabs */ 3710 /* Now we can use the kmem_cache to allocate kmalloc slabs */
3711 3711
3712 /* 3712 /*
3713 * Patch up the size_index table if we have strange large alignment 3713 * Patch up the size_index table if we have strange large alignment
3714 * requirements for the kmalloc array. This is only the case for 3714 * requirements for the kmalloc array. This is only the case for
3715 * MIPS it seems. The standard arches will not generate any code here. 3715 * MIPS it seems. The standard arches will not generate any code here.
3716 * 3716 *
3717 * Largest permitted alignment is 256 bytes due to the way we 3717 * Largest permitted alignment is 256 bytes due to the way we
3718 * handle the index determination for the smaller caches. 3718 * handle the index determination for the smaller caches.
3719 * 3719 *
3720 * Make sure that nothing crazy happens if someone starts tinkering 3720 * Make sure that nothing crazy happens if someone starts tinkering
3721 * around with ARCH_KMALLOC_MINALIGN 3721 * around with ARCH_KMALLOC_MINALIGN
3722 */ 3722 */
3723 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 3723 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3724 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 3724 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3725 3725
3726 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 3726 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3727 int elem = size_index_elem(i); 3727 int elem = size_index_elem(i);
3728 if (elem >= ARRAY_SIZE(size_index)) 3728 if (elem >= ARRAY_SIZE(size_index))
3729 break; 3729 break;
3730 size_index[elem] = KMALLOC_SHIFT_LOW; 3730 size_index[elem] = KMALLOC_SHIFT_LOW;
3731 } 3731 }
3732 3732
3733 if (KMALLOC_MIN_SIZE == 64) { 3733 if (KMALLOC_MIN_SIZE == 64) {
3734 /* 3734 /*
3735 * The 96 byte size cache is not used if the alignment 3735 * The 96 byte size cache is not used if the alignment
3736 * is 64 byte. 3736 * is 64 byte.
3737 */ 3737 */
3738 for (i = 64 + 8; i <= 96; i += 8) 3738 for (i = 64 + 8; i <= 96; i += 8)
3739 size_index[size_index_elem(i)] = 7; 3739 size_index[size_index_elem(i)] = 7;
3740 } else if (KMALLOC_MIN_SIZE == 128) { 3740 } else if (KMALLOC_MIN_SIZE == 128) {
3741 /* 3741 /*
3742 * The 192 byte sized cache is not used if the alignment 3742 * The 192 byte sized cache is not used if the alignment
3743 * is 128 byte. Redirect kmalloc to use the 256 byte cache 3743 * is 128 byte. Redirect kmalloc to use the 256 byte cache
3744 * instead. 3744 * instead.
3745 */ 3745 */
3746 for (i = 128 + 8; i <= 192; i += 8) 3746 for (i = 128 + 8; i <= 192; i += 8)
3747 size_index[size_index_elem(i)] = 8; 3747 size_index[size_index_elem(i)] = 8;
3748 } 3748 }
3749 3749
3750 /* Caches that are not of the two-to-the-power-of size */ 3750 /* Caches that are not of the two-to-the-power-of size */
3751 if (KMALLOC_MIN_SIZE <= 32) { 3751 if (KMALLOC_MIN_SIZE <= 32) {
3752 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); 3752 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3753 caches++; 3753 caches++;
3754 } 3754 }
3755 3755
3756 if (KMALLOC_MIN_SIZE <= 64) { 3756 if (KMALLOC_MIN_SIZE <= 64) {
3757 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); 3757 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3758 caches++; 3758 caches++;
3759 } 3759 }
3760 3760
3761 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3761 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3762 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); 3762 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3763 caches++; 3763 caches++;
3764 } 3764 }
3765 3765
3766 slab_state = UP; 3766 slab_state = UP;
3767 3767
3768 /* Provide the correct kmalloc names now that the caches are up */ 3768 /* Provide the correct kmalloc names now that the caches are up */
3769 if (KMALLOC_MIN_SIZE <= 32) { 3769 if (KMALLOC_MIN_SIZE <= 32) {
3770 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT); 3770 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3771 BUG_ON(!kmalloc_caches[1]->name); 3771 BUG_ON(!kmalloc_caches[1]->name);
3772 } 3772 }
3773 3773
3774 if (KMALLOC_MIN_SIZE <= 64) { 3774 if (KMALLOC_MIN_SIZE <= 64) {
3775 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT); 3775 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3776 BUG_ON(!kmalloc_caches[2]->name); 3776 BUG_ON(!kmalloc_caches[2]->name);
3777 } 3777 }
3778 3778
3779 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3779 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3780 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); 3780 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3781 3781
3782 BUG_ON(!s); 3782 BUG_ON(!s);
3783 kmalloc_caches[i]->name = s; 3783 kmalloc_caches[i]->name = s;
3784 } 3784 }
3785 3785
3786 #ifdef CONFIG_SMP 3786 #ifdef CONFIG_SMP
3787 register_cpu_notifier(&slab_notifier); 3787 register_cpu_notifier(&slab_notifier);
3788 #endif 3788 #endif
3789 3789
3790 #ifdef CONFIG_ZONE_DMA 3790 #ifdef CONFIG_ZONE_DMA
3791 for (i = 0; i < SLUB_PAGE_SHIFT; i++) { 3791 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3792 struct kmem_cache *s = kmalloc_caches[i]; 3792 struct kmem_cache *s = kmalloc_caches[i];
3793 3793
3794 if (s && s->size) { 3794 if (s && s->size) {
3795 char *name = kasprintf(GFP_NOWAIT, 3795 char *name = kasprintf(GFP_NOWAIT,
3796 "dma-kmalloc-%d", s->objsize); 3796 "dma-kmalloc-%d", s->objsize);
3797 3797
3798 BUG_ON(!name); 3798 BUG_ON(!name);
3799 kmalloc_dma_caches[i] = create_kmalloc_cache(name, 3799 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3800 s->objsize, SLAB_CACHE_DMA); 3800 s->objsize, SLAB_CACHE_DMA);
3801 } 3801 }
3802 } 3802 }
3803 #endif 3803 #endif
3804 printk(KERN_INFO 3804 printk(KERN_INFO
3805 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 3805 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3806 " CPUs=%d, Nodes=%d\n", 3806 " CPUs=%d, Nodes=%d\n",
3807 caches, cache_line_size(), 3807 caches, cache_line_size(),
3808 slub_min_order, slub_max_order, slub_min_objects, 3808 slub_min_order, slub_max_order, slub_min_objects,
3809 nr_cpu_ids, nr_node_ids); 3809 nr_cpu_ids, nr_node_ids);
3810 } 3810 }
3811 3811
3812 void __init kmem_cache_init_late(void) 3812 void __init kmem_cache_init_late(void)
3813 { 3813 {
3814 } 3814 }
3815 3815
3816 /* 3816 /*
3817 * Find a mergeable slab cache 3817 * Find a mergeable slab cache
3818 */ 3818 */
3819 static int slab_unmergeable(struct kmem_cache *s) 3819 static int slab_unmergeable(struct kmem_cache *s)
3820 { 3820 {
3821 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3821 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3822 return 1; 3822 return 1;
3823 3823
3824 if (s->ctor) 3824 if (s->ctor)
3825 return 1; 3825 return 1;
3826 3826
3827 /* 3827 /*
3828 * We may have set a slab to be unmergeable during bootstrap. 3828 * We may have set a slab to be unmergeable during bootstrap.
3829 */ 3829 */
3830 if (s->refcount < 0) 3830 if (s->refcount < 0)
3831 return 1; 3831 return 1;
3832 3832
3833 return 0; 3833 return 0;
3834 } 3834 }
3835 3835
3836 static struct kmem_cache *find_mergeable(size_t size, 3836 static struct kmem_cache *find_mergeable(size_t size,
3837 size_t align, unsigned long flags, const char *name, 3837 size_t align, unsigned long flags, const char *name,
3838 void (*ctor)(void *)) 3838 void (*ctor)(void *))
3839 { 3839 {
3840 struct kmem_cache *s; 3840 struct kmem_cache *s;
3841 3841
3842 if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) 3842 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3843 return NULL; 3843 return NULL;
3844 3844
3845 if (ctor) 3845 if (ctor)
3846 return NULL; 3846 return NULL;
3847 3847
3848 size = ALIGN(size, sizeof(void *)); 3848 size = ALIGN(size, sizeof(void *));
3849 align = calculate_alignment(flags, align, size); 3849 align = calculate_alignment(flags, align, size);
3850 size = ALIGN(size, align); 3850 size = ALIGN(size, align);
3851 flags = kmem_cache_flags(size, flags, name, NULL); 3851 flags = kmem_cache_flags(size, flags, name, NULL);
3852 3852
3853 list_for_each_entry(s, &slab_caches, list) { 3853 list_for_each_entry(s, &slab_caches, list) {
3854 if (slab_unmergeable(s)) 3854 if (slab_unmergeable(s))
3855 continue; 3855 continue;
3856 3856
3857 if (size > s->size) 3857 if (size > s->size)
3858 continue; 3858 continue;
3859 3859
3860 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME)) 3860 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3861 continue; 3861 continue;
3862 /* 3862 /*
3863 * Check if alignment is compatible. 3863 * Check if alignment is compatible.
3864 * Courtesy of Adrian Drzewiecki 3864 * Courtesy of Adrian Drzewiecki
3865 */ 3865 */
3866 if ((s->size & ~(align - 1)) != s->size) 3866 if ((s->size & ~(align - 1)) != s->size)
3867 continue; 3867 continue;
3868 3868
3869 if (s->size - size >= sizeof(void *)) 3869 if (s->size - size >= sizeof(void *))
3870 continue; 3870 continue;
3871 3871
3872 return s; 3872 return s;
3873 } 3873 }
3874 return NULL; 3874 return NULL;
3875 } 3875 }
3876 3876
3877 struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3877 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3878 size_t align, unsigned long flags, void (*ctor)(void *)) 3878 size_t align, unsigned long flags, void (*ctor)(void *))
3879 { 3879 {
3880 struct kmem_cache *s; 3880 struct kmem_cache *s;
3881 char *n; 3881 char *n;
3882 3882
3883 if (WARN_ON(!name)) 3883 if (WARN_ON(!name))
3884 return NULL; 3884 return NULL;
3885 3885
3886 down_write(&slub_lock); 3886 down_write(&slub_lock);
3887 s = find_mergeable(size, align, flags, name, ctor); 3887 s = find_mergeable(size, align, flags, name, ctor);
3888 if (s) { 3888 if (s) {
3889 s->refcount++; 3889 s->refcount++;
3890 /* 3890 /*
3891 * Adjust the object sizes so that we clear 3891 * Adjust the object sizes so that we clear
3892 * the complete object on kzalloc. 3892 * the complete object on kzalloc.
3893 */ 3893 */
3894 s->objsize = max(s->objsize, (int)size); 3894 s->objsize = max(s->objsize, (int)size);
3895 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3895 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3896 3896
3897 if (sysfs_slab_alias(s, name)) { 3897 if (sysfs_slab_alias(s, name)) {
3898 s->refcount--; 3898 s->refcount--;
3899 goto err; 3899 goto err;
3900 } 3900 }
3901 up_write(&slub_lock); 3901 up_write(&slub_lock);
3902 return s; 3902 return s;
3903 } 3903 }
3904 3904
3905 n = kstrdup(name, GFP_KERNEL); 3905 n = kstrdup(name, GFP_KERNEL);
3906 if (!n) 3906 if (!n)
3907 goto err; 3907 goto err;
3908 3908
3909 s = kmalloc(kmem_size, GFP_KERNEL); 3909 s = kmalloc(kmem_size, GFP_KERNEL);
3910 if (s) { 3910 if (s) {
3911 if (kmem_cache_open(s, n, 3911 if (kmem_cache_open(s, n,
3912 size, align, flags, ctor)) { 3912 size, align, flags, ctor)) {
3913 list_add(&s->list, &slab_caches); 3913 list_add(&s->list, &slab_caches);
3914 up_write(&slub_lock);
3914 if (sysfs_slab_add(s)) { 3915 if (sysfs_slab_add(s)) {
3916 down_write(&slub_lock);
3915 list_del(&s->list); 3917 list_del(&s->list);
3916 kfree(n); 3918 kfree(n);
3917 kfree(s); 3919 kfree(s);
3918 goto err; 3920 goto err;
3919 } 3921 }
3920 up_write(&slub_lock);
3921 return s; 3922 return s;
3922 } 3923 }
3923 kfree(n); 3924 kfree(n);
3924 kfree(s); 3925 kfree(s);
3925 } 3926 }
3926 err: 3927 err:
3927 up_write(&slub_lock); 3928 up_write(&slub_lock);
3928 3929
3929 if (flags & SLAB_PANIC) 3930 if (flags & SLAB_PANIC)
3930 panic("Cannot create slabcache %s\n", name); 3931 panic("Cannot create slabcache %s\n", name);
3931 else 3932 else
3932 s = NULL; 3933 s = NULL;
3933 return s; 3934 return s;
3934 } 3935 }
3935 EXPORT_SYMBOL(kmem_cache_create); 3936 EXPORT_SYMBOL(kmem_cache_create);
3936 3937
3937 #ifdef CONFIG_SMP 3938 #ifdef CONFIG_SMP
3938 /* 3939 /*
3939 * Use the cpu notifier to insure that the cpu slabs are flushed when 3940 * Use the cpu notifier to insure that the cpu slabs are flushed when
3940 * necessary. 3941 * necessary.
3941 */ 3942 */
3942 static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, 3943 static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3943 unsigned long action, void *hcpu) 3944 unsigned long action, void *hcpu)
3944 { 3945 {
3945 long cpu = (long)hcpu; 3946 long cpu = (long)hcpu;
3946 struct kmem_cache *s; 3947 struct kmem_cache *s;
3947 unsigned long flags; 3948 unsigned long flags;
3948 3949
3949 switch (action) { 3950 switch (action) {
3950 case CPU_UP_CANCELED: 3951 case CPU_UP_CANCELED:
3951 case CPU_UP_CANCELED_FROZEN: 3952 case CPU_UP_CANCELED_FROZEN:
3952 case CPU_DEAD: 3953 case CPU_DEAD:
3953 case CPU_DEAD_FROZEN: 3954 case CPU_DEAD_FROZEN:
3954 down_read(&slub_lock); 3955 down_read(&slub_lock);
3955 list_for_each_entry(s, &slab_caches, list) { 3956 list_for_each_entry(s, &slab_caches, list) {
3956 local_irq_save(flags); 3957 local_irq_save(flags);
3957 __flush_cpu_slab(s, cpu); 3958 __flush_cpu_slab(s, cpu);
3958 local_irq_restore(flags); 3959 local_irq_restore(flags);
3959 } 3960 }
3960 up_read(&slub_lock); 3961 up_read(&slub_lock);
3961 break; 3962 break;
3962 default: 3963 default:
3963 break; 3964 break;
3964 } 3965 }
3965 return NOTIFY_OK; 3966 return NOTIFY_OK;
3966 } 3967 }
3967 3968
3968 static struct notifier_block __cpuinitdata slab_notifier = { 3969 static struct notifier_block __cpuinitdata slab_notifier = {
3969 .notifier_call = slab_cpuup_callback 3970 .notifier_call = slab_cpuup_callback
3970 }; 3971 };
3971 3972
3972 #endif 3973 #endif
3973 3974
3974 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) 3975 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3975 { 3976 {
3976 struct kmem_cache *s; 3977 struct kmem_cache *s;
3977 void *ret; 3978 void *ret;
3978 3979
3979 if (unlikely(size > SLUB_MAX_SIZE)) 3980 if (unlikely(size > SLUB_MAX_SIZE))
3980 return kmalloc_large(size, gfpflags); 3981 return kmalloc_large(size, gfpflags);
3981 3982
3982 s = get_slab(size, gfpflags); 3983 s = get_slab(size, gfpflags);
3983 3984
3984 if (unlikely(ZERO_OR_NULL_PTR(s))) 3985 if (unlikely(ZERO_OR_NULL_PTR(s)))
3985 return s; 3986 return s;
3986 3987
3987 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); 3988 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
3988 3989
3989 /* Honor the call site pointer we received. */ 3990 /* Honor the call site pointer we received. */
3990 trace_kmalloc(caller, ret, size, s->size, gfpflags); 3991 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3991 3992
3992 return ret; 3993 return ret;
3993 } 3994 }
3994 3995
3995 #ifdef CONFIG_NUMA 3996 #ifdef CONFIG_NUMA
3996 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 3997 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3997 int node, unsigned long caller) 3998 int node, unsigned long caller)
3998 { 3999 {
3999 struct kmem_cache *s; 4000 struct kmem_cache *s;
4000 void *ret; 4001 void *ret;
4001 4002
4002 if (unlikely(size > SLUB_MAX_SIZE)) { 4003 if (unlikely(size > SLUB_MAX_SIZE)) {
4003 ret = kmalloc_large_node(size, gfpflags, node); 4004 ret = kmalloc_large_node(size, gfpflags, node);
4004 4005
4005 trace_kmalloc_node(caller, ret, 4006 trace_kmalloc_node(caller, ret,
4006 size, PAGE_SIZE << get_order(size), 4007 size, PAGE_SIZE << get_order(size),
4007 gfpflags, node); 4008 gfpflags, node);
4008 4009
4009 return ret; 4010 return ret;
4010 } 4011 }
4011 4012
4012 s = get_slab(size, gfpflags); 4013 s = get_slab(size, gfpflags);
4013 4014
4014 if (unlikely(ZERO_OR_NULL_PTR(s))) 4015 if (unlikely(ZERO_OR_NULL_PTR(s)))
4015 return s; 4016 return s;
4016 4017
4017 ret = slab_alloc(s, gfpflags, node, caller); 4018 ret = slab_alloc(s, gfpflags, node, caller);
4018 4019
4019 /* Honor the call site pointer we received. */ 4020 /* Honor the call site pointer we received. */
4020 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); 4021 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4021 4022
4022 return ret; 4023 return ret;
4023 } 4024 }
4024 #endif 4025 #endif
4025 4026
4026 #ifdef CONFIG_SYSFS 4027 #ifdef CONFIG_SYSFS
4027 static int count_inuse(struct page *page) 4028 static int count_inuse(struct page *page)
4028 { 4029 {
4029 return page->inuse; 4030 return page->inuse;
4030 } 4031 }
4031 4032
4032 static int count_total(struct page *page) 4033 static int count_total(struct page *page)
4033 { 4034 {
4034 return page->objects; 4035 return page->objects;
4035 } 4036 }
4036 #endif 4037 #endif
4037 4038
4038 #ifdef CONFIG_SLUB_DEBUG 4039 #ifdef CONFIG_SLUB_DEBUG
4039 static int validate_slab(struct kmem_cache *s, struct page *page, 4040 static int validate_slab(struct kmem_cache *s, struct page *page,
4040 unsigned long *map) 4041 unsigned long *map)
4041 { 4042 {
4042 void *p; 4043 void *p;
4043 void *addr = page_address(page); 4044 void *addr = page_address(page);
4044 4045
4045 if (!check_slab(s, page) || 4046 if (!check_slab(s, page) ||
4046 !on_freelist(s, page, NULL)) 4047 !on_freelist(s, page, NULL))
4047 return 0; 4048 return 0;
4048 4049
4049 /* Now we know that a valid freelist exists */ 4050 /* Now we know that a valid freelist exists */
4050 bitmap_zero(map, page->objects); 4051 bitmap_zero(map, page->objects);
4051 4052
4052 get_map(s, page, map); 4053 get_map(s, page, map);
4053 for_each_object(p, s, addr, page->objects) { 4054 for_each_object(p, s, addr, page->objects) {
4054 if (test_bit(slab_index(p, s, addr), map)) 4055 if (test_bit(slab_index(p, s, addr), map))
4055 if (!check_object(s, page, p, SLUB_RED_INACTIVE)) 4056 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4056 return 0; 4057 return 0;
4057 } 4058 }
4058 4059
4059 for_each_object(p, s, addr, page->objects) 4060 for_each_object(p, s, addr, page->objects)
4060 if (!test_bit(slab_index(p, s, addr), map)) 4061 if (!test_bit(slab_index(p, s, addr), map))
4061 if (!check_object(s, page, p, SLUB_RED_ACTIVE)) 4062 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4062 return 0; 4063 return 0;
4063 return 1; 4064 return 1;
4064 } 4065 }
4065 4066
4066 static void validate_slab_slab(struct kmem_cache *s, struct page *page, 4067 static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4067 unsigned long *map) 4068 unsigned long *map)
4068 { 4069 {
4069 slab_lock(page); 4070 slab_lock(page);
4070 validate_slab(s, page, map); 4071 validate_slab(s, page, map);
4071 slab_unlock(page); 4072 slab_unlock(page);
4072 } 4073 }
4073 4074
4074 static int validate_slab_node(struct kmem_cache *s, 4075 static int validate_slab_node(struct kmem_cache *s,
4075 struct kmem_cache_node *n, unsigned long *map) 4076 struct kmem_cache_node *n, unsigned long *map)
4076 { 4077 {
4077 unsigned long count = 0; 4078 unsigned long count = 0;
4078 struct page *page; 4079 struct page *page;
4079 unsigned long flags; 4080 unsigned long flags;
4080 4081
4081 spin_lock_irqsave(&n->list_lock, flags); 4082 spin_lock_irqsave(&n->list_lock, flags);
4082 4083
4083 list_for_each_entry(page, &n->partial, lru) { 4084 list_for_each_entry(page, &n->partial, lru) {
4084 validate_slab_slab(s, page, map); 4085 validate_slab_slab(s, page, map);
4085 count++; 4086 count++;
4086 } 4087 }
4087 if (count != n->nr_partial) 4088 if (count != n->nr_partial)
4088 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " 4089 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4089 "counter=%ld\n", s->name, count, n->nr_partial); 4090 "counter=%ld\n", s->name, count, n->nr_partial);
4090 4091
4091 if (!(s->flags & SLAB_STORE_USER)) 4092 if (!(s->flags & SLAB_STORE_USER))
4092 goto out; 4093 goto out;
4093 4094
4094 list_for_each_entry(page, &n->full, lru) { 4095 list_for_each_entry(page, &n->full, lru) {
4095 validate_slab_slab(s, page, map); 4096 validate_slab_slab(s, page, map);
4096 count++; 4097 count++;
4097 } 4098 }
4098 if (count != atomic_long_read(&n->nr_slabs)) 4099 if (count != atomic_long_read(&n->nr_slabs))
4099 printk(KERN_ERR "SLUB: %s %ld slabs counted but " 4100 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4100 "counter=%ld\n", s->name, count, 4101 "counter=%ld\n", s->name, count,
4101 atomic_long_read(&n->nr_slabs)); 4102 atomic_long_read(&n->nr_slabs));
4102 4103
4103 out: 4104 out:
4104 spin_unlock_irqrestore(&n->list_lock, flags); 4105 spin_unlock_irqrestore(&n->list_lock, flags);
4105 return count; 4106 return count;
4106 } 4107 }
4107 4108
4108 static long validate_slab_cache(struct kmem_cache *s) 4109 static long validate_slab_cache(struct kmem_cache *s)
4109 { 4110 {
4110 int node; 4111 int node;
4111 unsigned long count = 0; 4112 unsigned long count = 0;
4112 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 4113 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4113 sizeof(unsigned long), GFP_KERNEL); 4114 sizeof(unsigned long), GFP_KERNEL);
4114 4115
4115 if (!map) 4116 if (!map)
4116 return -ENOMEM; 4117 return -ENOMEM;
4117 4118
4118 flush_all(s); 4119 flush_all(s);
4119 for_each_node_state(node, N_NORMAL_MEMORY) { 4120 for_each_node_state(node, N_NORMAL_MEMORY) {
4120 struct kmem_cache_node *n = get_node(s, node); 4121 struct kmem_cache_node *n = get_node(s, node);
4121 4122
4122 count += validate_slab_node(s, n, map); 4123 count += validate_slab_node(s, n, map);
4123 } 4124 }
4124 kfree(map); 4125 kfree(map);
4125 return count; 4126 return count;
4126 } 4127 }
4127 /* 4128 /*
4128 * Generate lists of code addresses where slabcache objects are allocated 4129 * Generate lists of code addresses where slabcache objects are allocated
4129 * and freed. 4130 * and freed.
4130 */ 4131 */
4131 4132
4132 struct location { 4133 struct location {
4133 unsigned long count; 4134 unsigned long count;
4134 unsigned long addr; 4135 unsigned long addr;
4135 long long sum_time; 4136 long long sum_time;
4136 long min_time; 4137 long min_time;
4137 long max_time; 4138 long max_time;
4138 long min_pid; 4139 long min_pid;
4139 long max_pid; 4140 long max_pid;
4140 DECLARE_BITMAP(cpus, NR_CPUS); 4141 DECLARE_BITMAP(cpus, NR_CPUS);
4141 nodemask_t nodes; 4142 nodemask_t nodes;
4142 }; 4143 };
4143 4144
4144 struct loc_track { 4145 struct loc_track {
4145 unsigned long max; 4146 unsigned long max;
4146 unsigned long count; 4147 unsigned long count;
4147 struct location *loc; 4148 struct location *loc;
4148 }; 4149 };
4149 4150
4150 static void free_loc_track(struct loc_track *t) 4151 static void free_loc_track(struct loc_track *t)
4151 { 4152 {
4152 if (t->max) 4153 if (t->max)
4153 free_pages((unsigned long)t->loc, 4154 free_pages((unsigned long)t->loc,
4154 get_order(sizeof(struct location) * t->max)); 4155 get_order(sizeof(struct location) * t->max));
4155 } 4156 }
4156 4157
4157 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags) 4158 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4158 { 4159 {
4159 struct location *l; 4160 struct location *l;
4160 int order; 4161 int order;
4161 4162
4162 order = get_order(sizeof(struct location) * max); 4163 order = get_order(sizeof(struct location) * max);
4163 4164
4164 l = (void *)__get_free_pages(flags, order); 4165 l = (void *)__get_free_pages(flags, order);
4165 if (!l) 4166 if (!l)
4166 return 0; 4167 return 0;
4167 4168
4168 if (t->count) { 4169 if (t->count) {
4169 memcpy(l, t->loc, sizeof(struct location) * t->count); 4170 memcpy(l, t->loc, sizeof(struct location) * t->count);
4170 free_loc_track(t); 4171 free_loc_track(t);
4171 } 4172 }
4172 t->max = max; 4173 t->max = max;
4173 t->loc = l; 4174 t->loc = l;
4174 return 1; 4175 return 1;
4175 } 4176 }
4176 4177
4177 static int add_location(struct loc_track *t, struct kmem_cache *s, 4178 static int add_location(struct loc_track *t, struct kmem_cache *s,
4178 const struct track *track) 4179 const struct track *track)
4179 { 4180 {
4180 long start, end, pos; 4181 long start, end, pos;
4181 struct location *l; 4182 struct location *l;
4182 unsigned long caddr; 4183 unsigned long caddr;
4183 unsigned long age = jiffies - track->when; 4184 unsigned long age = jiffies - track->when;
4184 4185
4185 start = -1; 4186 start = -1;
4186 end = t->count; 4187 end = t->count;
4187 4188
4188 for ( ; ; ) { 4189 for ( ; ; ) {
4189 pos = start + (end - start + 1) / 2; 4190 pos = start + (end - start + 1) / 2;
4190 4191
4191 /* 4192 /*
4192 * There is nothing at "end". If we end up there 4193 * There is nothing at "end". If we end up there
4193 * we need to add something to before end. 4194 * we need to add something to before end.
4194 */ 4195 */
4195 if (pos == end) 4196 if (pos == end)
4196 break; 4197 break;
4197 4198
4198 caddr = t->loc[pos].addr; 4199 caddr = t->loc[pos].addr;
4199 if (track->addr == caddr) { 4200 if (track->addr == caddr) {
4200 4201
4201 l = &t->loc[pos]; 4202 l = &t->loc[pos];
4202 l->count++; 4203 l->count++;
4203 if (track->when) { 4204 if (track->when) {
4204 l->sum_time += age; 4205 l->sum_time += age;
4205 if (age < l->min_time) 4206 if (age < l->min_time)
4206 l->min_time = age; 4207 l->min_time = age;
4207 if (age > l->max_time) 4208 if (age > l->max_time)
4208 l->max_time = age; 4209 l->max_time = age;
4209 4210
4210 if (track->pid < l->min_pid) 4211 if (track->pid < l->min_pid)
4211 l->min_pid = track->pid; 4212 l->min_pid = track->pid;
4212 if (track->pid > l->max_pid) 4213 if (track->pid > l->max_pid)
4213 l->max_pid = track->pid; 4214 l->max_pid = track->pid;
4214 4215
4215 cpumask_set_cpu(track->cpu, 4216 cpumask_set_cpu(track->cpu,
4216 to_cpumask(l->cpus)); 4217 to_cpumask(l->cpus));
4217 } 4218 }
4218 node_set(page_to_nid(virt_to_page(track)), l->nodes); 4219 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4219 return 1; 4220 return 1;
4220 } 4221 }
4221 4222
4222 if (track->addr < caddr) 4223 if (track->addr < caddr)
4223 end = pos; 4224 end = pos;
4224 else 4225 else
4225 start = pos; 4226 start = pos;
4226 } 4227 }
4227 4228
4228 /* 4229 /*
4229 * Not found. Insert new tracking element. 4230 * Not found. Insert new tracking element.
4230 */ 4231 */
4231 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC)) 4232 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4232 return 0; 4233 return 0;
4233 4234
4234 l = t->loc + pos; 4235 l = t->loc + pos;
4235 if (pos < t->count) 4236 if (pos < t->count)
4236 memmove(l + 1, l, 4237 memmove(l + 1, l,
4237 (t->count - pos) * sizeof(struct location)); 4238 (t->count - pos) * sizeof(struct location));
4238 t->count++; 4239 t->count++;
4239 l->count = 1; 4240 l->count = 1;
4240 l->addr = track->addr; 4241 l->addr = track->addr;
4241 l->sum_time = age; 4242 l->sum_time = age;
4242 l->min_time = age; 4243 l->min_time = age;
4243 l->max_time = age; 4244 l->max_time = age;
4244 l->min_pid = track->pid; 4245 l->min_pid = track->pid;
4245 l->max_pid = track->pid; 4246 l->max_pid = track->pid;
4246 cpumask_clear(to_cpumask(l->cpus)); 4247 cpumask_clear(to_cpumask(l->cpus));
4247 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus)); 4248 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4248 nodes_clear(l->nodes); 4249 nodes_clear(l->nodes);
4249 node_set(page_to_nid(virt_to_page(track)), l->nodes); 4250 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4250 return 1; 4251 return 1;
4251 } 4252 }
4252 4253
4253 static void process_slab(struct loc_track *t, struct kmem_cache *s, 4254 static void process_slab(struct loc_track *t, struct kmem_cache *s,
4254 struct page *page, enum track_item alloc, 4255 struct page *page, enum track_item alloc,
4255 unsigned long *map) 4256 unsigned long *map)
4256 { 4257 {
4257 void *addr = page_address(page); 4258 void *addr = page_address(page);
4258 void *p; 4259 void *p;
4259 4260
4260 bitmap_zero(map, page->objects); 4261 bitmap_zero(map, page->objects);
4261 get_map(s, page, map); 4262 get_map(s, page, map);
4262 4263
4263 for_each_object(p, s, addr, page->objects) 4264 for_each_object(p, s, addr, page->objects)
4264 if (!test_bit(slab_index(p, s, addr), map)) 4265 if (!test_bit(slab_index(p, s, addr), map))
4265 add_location(t, s, get_track(s, p, alloc)); 4266 add_location(t, s, get_track(s, p, alloc));
4266 } 4267 }
4267 4268
4268 static int list_locations(struct kmem_cache *s, char *buf, 4269 static int list_locations(struct kmem_cache *s, char *buf,
4269 enum track_item alloc) 4270 enum track_item alloc)
4270 { 4271 {
4271 int len = 0; 4272 int len = 0;
4272 unsigned long i; 4273 unsigned long i;
4273 struct loc_track t = { 0, 0, NULL }; 4274 struct loc_track t = { 0, 0, NULL };
4274 int node; 4275 int node;
4275 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 4276 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4276 sizeof(unsigned long), GFP_KERNEL); 4277 sizeof(unsigned long), GFP_KERNEL);
4277 4278
4278 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 4279 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4279 GFP_TEMPORARY)) { 4280 GFP_TEMPORARY)) {
4280 kfree(map); 4281 kfree(map);
4281 return sprintf(buf, "Out of memory\n"); 4282 return sprintf(buf, "Out of memory\n");
4282 } 4283 }
4283 /* Push back cpu slabs */ 4284 /* Push back cpu slabs */
4284 flush_all(s); 4285 flush_all(s);
4285 4286
4286 for_each_node_state(node, N_NORMAL_MEMORY) { 4287 for_each_node_state(node, N_NORMAL_MEMORY) {
4287 struct kmem_cache_node *n = get_node(s, node); 4288 struct kmem_cache_node *n = get_node(s, node);
4288 unsigned long flags; 4289 unsigned long flags;
4289 struct page *page; 4290 struct page *page;
4290 4291
4291 if (!atomic_long_read(&n->nr_slabs)) 4292 if (!atomic_long_read(&n->nr_slabs))
4292 continue; 4293 continue;
4293 4294
4294 spin_lock_irqsave(&n->list_lock, flags); 4295 spin_lock_irqsave(&n->list_lock, flags);
4295 list_for_each_entry(page, &n->partial, lru) 4296 list_for_each_entry(page, &n->partial, lru)
4296 process_slab(&t, s, page, alloc, map); 4297 process_slab(&t, s, page, alloc, map);
4297 list_for_each_entry(page, &n->full, lru) 4298 list_for_each_entry(page, &n->full, lru)
4298 process_slab(&t, s, page, alloc, map); 4299 process_slab(&t, s, page, alloc, map);
4299 spin_unlock_irqrestore(&n->list_lock, flags); 4300 spin_unlock_irqrestore(&n->list_lock, flags);
4300 } 4301 }
4301 4302
4302 for (i = 0; i < t.count; i++) { 4303 for (i = 0; i < t.count; i++) {
4303 struct location *l = &t.loc[i]; 4304 struct location *l = &t.loc[i];
4304 4305
4305 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) 4306 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4306 break; 4307 break;
4307 len += sprintf(buf + len, "%7ld ", l->count); 4308 len += sprintf(buf + len, "%7ld ", l->count);
4308 4309
4309 if (l->addr) 4310 if (l->addr)
4310 len += sprintf(buf + len, "%pS", (void *)l->addr); 4311 len += sprintf(buf + len, "%pS", (void *)l->addr);
4311 else 4312 else
4312 len += sprintf(buf + len, "<not-available>"); 4313 len += sprintf(buf + len, "<not-available>");
4313 4314
4314 if (l->sum_time != l->min_time) { 4315 if (l->sum_time != l->min_time) {
4315 len += sprintf(buf + len, " age=%ld/%ld/%ld", 4316 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4316 l->min_time, 4317 l->min_time,
4317 (long)div_u64(l->sum_time, l->count), 4318 (long)div_u64(l->sum_time, l->count),
4318 l->max_time); 4319 l->max_time);
4319 } else 4320 } else
4320 len += sprintf(buf + len, " age=%ld", 4321 len += sprintf(buf + len, " age=%ld",
4321 l->min_time); 4322 l->min_time);
4322 4323
4323 if (l->min_pid != l->max_pid) 4324 if (l->min_pid != l->max_pid)
4324 len += sprintf(buf + len, " pid=%ld-%ld", 4325 len += sprintf(buf + len, " pid=%ld-%ld",
4325 l->min_pid, l->max_pid); 4326 l->min_pid, l->max_pid);
4326 else 4327 else
4327 len += sprintf(buf + len, " pid=%ld", 4328 len += sprintf(buf + len, " pid=%ld",
4328 l->min_pid); 4329 l->min_pid);
4329 4330
4330 if (num_online_cpus() > 1 && 4331 if (num_online_cpus() > 1 &&
4331 !cpumask_empty(to_cpumask(l->cpus)) && 4332 !cpumask_empty(to_cpumask(l->cpus)) &&
4332 len < PAGE_SIZE - 60) { 4333 len < PAGE_SIZE - 60) {
4333 len += sprintf(buf + len, " cpus="); 4334 len += sprintf(buf + len, " cpus=");
4334 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, 4335 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4335 to_cpumask(l->cpus)); 4336 to_cpumask(l->cpus));
4336 } 4337 }
4337 4338
4338 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && 4339 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4339 len < PAGE_SIZE - 60) { 4340 len < PAGE_SIZE - 60) {
4340 len += sprintf(buf + len, " nodes="); 4341 len += sprintf(buf + len, " nodes=");
4341 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, 4342 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4342 l->nodes); 4343 l->nodes);
4343 } 4344 }
4344 4345
4345 len += sprintf(buf + len, "\n"); 4346 len += sprintf(buf + len, "\n");
4346 } 4347 }
4347 4348
4348 free_loc_track(&t); 4349 free_loc_track(&t);
4349 kfree(map); 4350 kfree(map);
4350 if (!t.count) 4351 if (!t.count)
4351 len += sprintf(buf, "No data\n"); 4352 len += sprintf(buf, "No data\n");
4352 return len; 4353 return len;
4353 } 4354 }
4354 #endif 4355 #endif
4355 4356
4356 #ifdef SLUB_RESILIENCY_TEST 4357 #ifdef SLUB_RESILIENCY_TEST
4357 static void resiliency_test(void) 4358 static void resiliency_test(void)
4358 { 4359 {
4359 u8 *p; 4360 u8 *p;
4360 4361
4361 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); 4362 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4362 4363
4363 printk(KERN_ERR "SLUB resiliency testing\n"); 4364 printk(KERN_ERR "SLUB resiliency testing\n");
4364 printk(KERN_ERR "-----------------------\n"); 4365 printk(KERN_ERR "-----------------------\n");
4365 printk(KERN_ERR "A. Corruption after allocation\n"); 4366 printk(KERN_ERR "A. Corruption after allocation\n");
4366 4367
4367 p = kzalloc(16, GFP_KERNEL); 4368 p = kzalloc(16, GFP_KERNEL);
4368 p[16] = 0x12; 4369 p[16] = 0x12;
4369 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" 4370 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4370 " 0x12->0x%p\n\n", p + 16); 4371 " 0x12->0x%p\n\n", p + 16);
4371 4372
4372 validate_slab_cache(kmalloc_caches[4]); 4373 validate_slab_cache(kmalloc_caches[4]);
4373 4374
4374 /* Hmmm... The next two are dangerous */ 4375 /* Hmmm... The next two are dangerous */
4375 p = kzalloc(32, GFP_KERNEL); 4376 p = kzalloc(32, GFP_KERNEL);
4376 p[32 + sizeof(void *)] = 0x34; 4377 p[32 + sizeof(void *)] = 0x34;
4377 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" 4378 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4378 " 0x34 -> -0x%p\n", p); 4379 " 0x34 -> -0x%p\n", p);
4379 printk(KERN_ERR 4380 printk(KERN_ERR
4380 "If allocated object is overwritten then not detectable\n\n"); 4381 "If allocated object is overwritten then not detectable\n\n");
4381 4382
4382 validate_slab_cache(kmalloc_caches[5]); 4383 validate_slab_cache(kmalloc_caches[5]);
4383 p = kzalloc(64, GFP_KERNEL); 4384 p = kzalloc(64, GFP_KERNEL);
4384 p += 64 + (get_cycles() & 0xff) * sizeof(void *); 4385 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4385 *p = 0x56; 4386 *p = 0x56;
4386 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", 4387 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4387 p); 4388 p);
4388 printk(KERN_ERR 4389 printk(KERN_ERR
4389 "If allocated object is overwritten then not detectable\n\n"); 4390 "If allocated object is overwritten then not detectable\n\n");
4390 validate_slab_cache(kmalloc_caches[6]); 4391 validate_slab_cache(kmalloc_caches[6]);
4391 4392
4392 printk(KERN_ERR "\nB. Corruption after free\n"); 4393 printk(KERN_ERR "\nB. Corruption after free\n");
4393 p = kzalloc(128, GFP_KERNEL); 4394 p = kzalloc(128, GFP_KERNEL);
4394 kfree(p); 4395 kfree(p);
4395 *p = 0x78; 4396 *p = 0x78;
4396 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); 4397 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4397 validate_slab_cache(kmalloc_caches[7]); 4398 validate_slab_cache(kmalloc_caches[7]);
4398 4399
4399 p = kzalloc(256, GFP_KERNEL); 4400 p = kzalloc(256, GFP_KERNEL);
4400 kfree(p); 4401 kfree(p);
4401 p[50] = 0x9a; 4402 p[50] = 0x9a;
4402 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", 4403 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4403 p); 4404 p);
4404 validate_slab_cache(kmalloc_caches[8]); 4405 validate_slab_cache(kmalloc_caches[8]);
4405 4406
4406 p = kzalloc(512, GFP_KERNEL); 4407 p = kzalloc(512, GFP_KERNEL);
4407 kfree(p); 4408 kfree(p);
4408 p[512] = 0xab; 4409 p[512] = 0xab;
4409 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); 4410 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4410 validate_slab_cache(kmalloc_caches[9]); 4411 validate_slab_cache(kmalloc_caches[9]);
4411 } 4412 }
4412 #else 4413 #else
4413 #ifdef CONFIG_SYSFS 4414 #ifdef CONFIG_SYSFS
4414 static void resiliency_test(void) {}; 4415 static void resiliency_test(void) {};
4415 #endif 4416 #endif
4416 #endif 4417 #endif
4417 4418
4418 #ifdef CONFIG_SYSFS 4419 #ifdef CONFIG_SYSFS
4419 enum slab_stat_type { 4420 enum slab_stat_type {
4420 SL_ALL, /* All slabs */ 4421 SL_ALL, /* All slabs */
4421 SL_PARTIAL, /* Only partially allocated slabs */ 4422 SL_PARTIAL, /* Only partially allocated slabs */
4422 SL_CPU, /* Only slabs used for cpu caches */ 4423 SL_CPU, /* Only slabs used for cpu caches */
4423 SL_OBJECTS, /* Determine allocated objects not slabs */ 4424 SL_OBJECTS, /* Determine allocated objects not slabs */
4424 SL_TOTAL /* Determine object capacity not slabs */ 4425 SL_TOTAL /* Determine object capacity not slabs */
4425 }; 4426 };
4426 4427
4427 #define SO_ALL (1 << SL_ALL) 4428 #define SO_ALL (1 << SL_ALL)
4428 #define SO_PARTIAL (1 << SL_PARTIAL) 4429 #define SO_PARTIAL (1 << SL_PARTIAL)
4429 #define SO_CPU (1 << SL_CPU) 4430 #define SO_CPU (1 << SL_CPU)
4430 #define SO_OBJECTS (1 << SL_OBJECTS) 4431 #define SO_OBJECTS (1 << SL_OBJECTS)
4431 #define SO_TOTAL (1 << SL_TOTAL) 4432 #define SO_TOTAL (1 << SL_TOTAL)
4432 4433
4433 static ssize_t show_slab_objects(struct kmem_cache *s, 4434 static ssize_t show_slab_objects(struct kmem_cache *s,
4434 char *buf, unsigned long flags) 4435 char *buf, unsigned long flags)
4435 { 4436 {
4436 unsigned long total = 0; 4437 unsigned long total = 0;
4437 int node; 4438 int node;
4438 int x; 4439 int x;
4439 unsigned long *nodes; 4440 unsigned long *nodes;
4440 unsigned long *per_cpu; 4441 unsigned long *per_cpu;
4441 4442
4442 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); 4443 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4443 if (!nodes) 4444 if (!nodes)
4444 return -ENOMEM; 4445 return -ENOMEM;
4445 per_cpu = nodes + nr_node_ids; 4446 per_cpu = nodes + nr_node_ids;
4446 4447
4447 if (flags & SO_CPU) { 4448 if (flags & SO_CPU) {
4448 int cpu; 4449 int cpu;
4449 4450
4450 for_each_possible_cpu(cpu) { 4451 for_each_possible_cpu(cpu) {
4451 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4452 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4452 int node = ACCESS_ONCE(c->node); 4453 int node = ACCESS_ONCE(c->node);
4453 struct page *page; 4454 struct page *page;
4454 4455
4455 if (node < 0) 4456 if (node < 0)
4456 continue; 4457 continue;
4457 page = ACCESS_ONCE(c->page); 4458 page = ACCESS_ONCE(c->page);
4458 if (page) { 4459 if (page) {
4459 if (flags & SO_TOTAL) 4460 if (flags & SO_TOTAL)
4460 x = page->objects; 4461 x = page->objects;
4461 else if (flags & SO_OBJECTS) 4462 else if (flags & SO_OBJECTS)
4462 x = page->inuse; 4463 x = page->inuse;
4463 else 4464 else
4464 x = 1; 4465 x = 1;
4465 4466
4466 total += x; 4467 total += x;
4467 nodes[node] += x; 4468 nodes[node] += x;
4468 } 4469 }
4469 page = c->partial; 4470 page = c->partial;
4470 4471
4471 if (page) { 4472 if (page) {
4472 x = page->pobjects; 4473 x = page->pobjects;
4473 total += x; 4474 total += x;
4474 nodes[node] += x; 4475 nodes[node] += x;
4475 } 4476 }
4476 per_cpu[node]++; 4477 per_cpu[node]++;
4477 } 4478 }
4478 } 4479 }
4479 4480
4480 lock_memory_hotplug(); 4481 lock_memory_hotplug();
4481 #ifdef CONFIG_SLUB_DEBUG 4482 #ifdef CONFIG_SLUB_DEBUG
4482 if (flags & SO_ALL) { 4483 if (flags & SO_ALL) {
4483 for_each_node_state(node, N_NORMAL_MEMORY) { 4484 for_each_node_state(node, N_NORMAL_MEMORY) {
4484 struct kmem_cache_node *n = get_node(s, node); 4485 struct kmem_cache_node *n = get_node(s, node);
4485 4486
4486 if (flags & SO_TOTAL) 4487 if (flags & SO_TOTAL)
4487 x = atomic_long_read(&n->total_objects); 4488 x = atomic_long_read(&n->total_objects);
4488 else if (flags & SO_OBJECTS) 4489 else if (flags & SO_OBJECTS)
4489 x = atomic_long_read(&n->total_objects) - 4490 x = atomic_long_read(&n->total_objects) -
4490 count_partial(n, count_free); 4491 count_partial(n, count_free);
4491 4492
4492 else 4493 else
4493 x = atomic_long_read(&n->nr_slabs); 4494 x = atomic_long_read(&n->nr_slabs);
4494 total += x; 4495 total += x;
4495 nodes[node] += x; 4496 nodes[node] += x;
4496 } 4497 }
4497 4498
4498 } else 4499 } else
4499 #endif 4500 #endif
4500 if (flags & SO_PARTIAL) { 4501 if (flags & SO_PARTIAL) {
4501 for_each_node_state(node, N_NORMAL_MEMORY) { 4502 for_each_node_state(node, N_NORMAL_MEMORY) {
4502 struct kmem_cache_node *n = get_node(s, node); 4503 struct kmem_cache_node *n = get_node(s, node);
4503 4504
4504 if (flags & SO_TOTAL) 4505 if (flags & SO_TOTAL)
4505 x = count_partial(n, count_total); 4506 x = count_partial(n, count_total);
4506 else if (flags & SO_OBJECTS) 4507 else if (flags & SO_OBJECTS)
4507 x = count_partial(n, count_inuse); 4508 x = count_partial(n, count_inuse);
4508 else 4509 else
4509 x = n->nr_partial; 4510 x = n->nr_partial;
4510 total += x; 4511 total += x;
4511 nodes[node] += x; 4512 nodes[node] += x;
4512 } 4513 }
4513 } 4514 }
4514 x = sprintf(buf, "%lu", total); 4515 x = sprintf(buf, "%lu", total);
4515 #ifdef CONFIG_NUMA 4516 #ifdef CONFIG_NUMA
4516 for_each_node_state(node, N_NORMAL_MEMORY) 4517 for_each_node_state(node, N_NORMAL_MEMORY)
4517 if (nodes[node]) 4518 if (nodes[node])
4518 x += sprintf(buf + x, " N%d=%lu", 4519 x += sprintf(buf + x, " N%d=%lu",
4519 node, nodes[node]); 4520 node, nodes[node]);
4520 #endif 4521 #endif
4521 unlock_memory_hotplug(); 4522 unlock_memory_hotplug();
4522 kfree(nodes); 4523 kfree(nodes);
4523 return x + sprintf(buf + x, "\n"); 4524 return x + sprintf(buf + x, "\n");
4524 } 4525 }
4525 4526
4526 #ifdef CONFIG_SLUB_DEBUG 4527 #ifdef CONFIG_SLUB_DEBUG
4527 static int any_slab_objects(struct kmem_cache *s) 4528 static int any_slab_objects(struct kmem_cache *s)
4528 { 4529 {
4529 int node; 4530 int node;
4530 4531
4531 for_each_online_node(node) { 4532 for_each_online_node(node) {
4532 struct kmem_cache_node *n = get_node(s, node); 4533 struct kmem_cache_node *n = get_node(s, node);
4533 4534
4534 if (!n) 4535 if (!n)
4535 continue; 4536 continue;
4536 4537
4537 if (atomic_long_read(&n->total_objects)) 4538 if (atomic_long_read(&n->total_objects))
4538 return 1; 4539 return 1;
4539 } 4540 }
4540 return 0; 4541 return 0;
4541 } 4542 }
4542 #endif 4543 #endif
4543 4544
4544 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) 4545 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4545 #define to_slab(n) container_of(n, struct kmem_cache, kobj) 4546 #define to_slab(n) container_of(n, struct kmem_cache, kobj)
4546 4547
4547 struct slab_attribute { 4548 struct slab_attribute {
4548 struct attribute attr; 4549 struct attribute attr;
4549 ssize_t (*show)(struct kmem_cache *s, char *buf); 4550 ssize_t (*show)(struct kmem_cache *s, char *buf);
4550 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count); 4551 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4551 }; 4552 };
4552 4553
4553 #define SLAB_ATTR_RO(_name) \ 4554 #define SLAB_ATTR_RO(_name) \
4554 static struct slab_attribute _name##_attr = \ 4555 static struct slab_attribute _name##_attr = \
4555 __ATTR(_name, 0400, _name##_show, NULL) 4556 __ATTR(_name, 0400, _name##_show, NULL)
4556 4557
4557 #define SLAB_ATTR(_name) \ 4558 #define SLAB_ATTR(_name) \
4558 static struct slab_attribute _name##_attr = \ 4559 static struct slab_attribute _name##_attr = \
4559 __ATTR(_name, 0600, _name##_show, _name##_store) 4560 __ATTR(_name, 0600, _name##_show, _name##_store)
4560 4561
4561 static ssize_t slab_size_show(struct kmem_cache *s, char *buf) 4562 static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4562 { 4563 {
4563 return sprintf(buf, "%d\n", s->size); 4564 return sprintf(buf, "%d\n", s->size);
4564 } 4565 }
4565 SLAB_ATTR_RO(slab_size); 4566 SLAB_ATTR_RO(slab_size);
4566 4567
4567 static ssize_t align_show(struct kmem_cache *s, char *buf) 4568 static ssize_t align_show(struct kmem_cache *s, char *buf)
4568 { 4569 {
4569 return sprintf(buf, "%d\n", s->align); 4570 return sprintf(buf, "%d\n", s->align);
4570 } 4571 }
4571 SLAB_ATTR_RO(align); 4572 SLAB_ATTR_RO(align);
4572 4573
4573 static ssize_t object_size_show(struct kmem_cache *s, char *buf) 4574 static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4574 { 4575 {
4575 return sprintf(buf, "%d\n", s->objsize); 4576 return sprintf(buf, "%d\n", s->objsize);
4576 } 4577 }
4577 SLAB_ATTR_RO(object_size); 4578 SLAB_ATTR_RO(object_size);
4578 4579
4579 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) 4580 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4580 { 4581 {
4581 return sprintf(buf, "%d\n", oo_objects(s->oo)); 4582 return sprintf(buf, "%d\n", oo_objects(s->oo));
4582 } 4583 }
4583 SLAB_ATTR_RO(objs_per_slab); 4584 SLAB_ATTR_RO(objs_per_slab);
4584 4585
4585 static ssize_t order_store(struct kmem_cache *s, 4586 static ssize_t order_store(struct kmem_cache *s,
4586 const char *buf, size_t length) 4587 const char *buf, size_t length)
4587 { 4588 {
4588 unsigned long order; 4589 unsigned long order;
4589 int err; 4590 int err;
4590 4591
4591 err = strict_strtoul(buf, 10, &order); 4592 err = strict_strtoul(buf, 10, &order);
4592 if (err) 4593 if (err)
4593 return err; 4594 return err;
4594 4595
4595 if (order > slub_max_order || order < slub_min_order) 4596 if (order > slub_max_order || order < slub_min_order)
4596 return -EINVAL; 4597 return -EINVAL;
4597 4598
4598 calculate_sizes(s, order); 4599 calculate_sizes(s, order);
4599 return length; 4600 return length;
4600 } 4601 }
4601 4602
4602 static ssize_t order_show(struct kmem_cache *s, char *buf) 4603 static ssize_t order_show(struct kmem_cache *s, char *buf)
4603 { 4604 {
4604 return sprintf(buf, "%d\n", oo_order(s->oo)); 4605 return sprintf(buf, "%d\n", oo_order(s->oo));
4605 } 4606 }
4606 SLAB_ATTR(order); 4607 SLAB_ATTR(order);
4607 4608
4608 static ssize_t min_partial_show(struct kmem_cache *s, char *buf) 4609 static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4609 { 4610 {
4610 return sprintf(buf, "%lu\n", s->min_partial); 4611 return sprintf(buf, "%lu\n", s->min_partial);
4611 } 4612 }
4612 4613
4613 static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, 4614 static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4614 size_t length) 4615 size_t length)
4615 { 4616 {
4616 unsigned long min; 4617 unsigned long min;
4617 int err; 4618 int err;
4618 4619
4619 err = strict_strtoul(buf, 10, &min); 4620 err = strict_strtoul(buf, 10, &min);
4620 if (err) 4621 if (err)
4621 return err; 4622 return err;
4622 4623
4623 set_min_partial(s, min); 4624 set_min_partial(s, min);
4624 return length; 4625 return length;
4625 } 4626 }
4626 SLAB_ATTR(min_partial); 4627 SLAB_ATTR(min_partial);
4627 4628
4628 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) 4629 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4629 { 4630 {
4630 return sprintf(buf, "%u\n", s->cpu_partial); 4631 return sprintf(buf, "%u\n", s->cpu_partial);
4631 } 4632 }
4632 4633
4633 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, 4634 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4634 size_t length) 4635 size_t length)
4635 { 4636 {
4636 unsigned long objects; 4637 unsigned long objects;
4637 int err; 4638 int err;
4638 4639
4639 err = strict_strtoul(buf, 10, &objects); 4640 err = strict_strtoul(buf, 10, &objects);
4640 if (err) 4641 if (err)
4641 return err; 4642 return err;
4642 4643
4643 s->cpu_partial = objects; 4644 s->cpu_partial = objects;
4644 flush_all(s); 4645 flush_all(s);
4645 return length; 4646 return length;
4646 } 4647 }
4647 SLAB_ATTR(cpu_partial); 4648 SLAB_ATTR(cpu_partial);
4648 4649
4649 static ssize_t ctor_show(struct kmem_cache *s, char *buf) 4650 static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4650 { 4651 {
4651 if (!s->ctor) 4652 if (!s->ctor)
4652 return 0; 4653 return 0;
4653 return sprintf(buf, "%pS\n", s->ctor); 4654 return sprintf(buf, "%pS\n", s->ctor);
4654 } 4655 }
4655 SLAB_ATTR_RO(ctor); 4656 SLAB_ATTR_RO(ctor);
4656 4657
4657 static ssize_t aliases_show(struct kmem_cache *s, char *buf) 4658 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4658 { 4659 {
4659 return sprintf(buf, "%d\n", s->refcount - 1); 4660 return sprintf(buf, "%d\n", s->refcount - 1);
4660 } 4661 }
4661 SLAB_ATTR_RO(aliases); 4662 SLAB_ATTR_RO(aliases);
4662 4663
4663 static ssize_t partial_show(struct kmem_cache *s, char *buf) 4664 static ssize_t partial_show(struct kmem_cache *s, char *buf)
4664 { 4665 {
4665 return show_slab_objects(s, buf, SO_PARTIAL); 4666 return show_slab_objects(s, buf, SO_PARTIAL);
4666 } 4667 }
4667 SLAB_ATTR_RO(partial); 4668 SLAB_ATTR_RO(partial);
4668 4669
4669 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) 4670 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4670 { 4671 {
4671 return show_slab_objects(s, buf, SO_CPU); 4672 return show_slab_objects(s, buf, SO_CPU);
4672 } 4673 }
4673 SLAB_ATTR_RO(cpu_slabs); 4674 SLAB_ATTR_RO(cpu_slabs);
4674 4675
4675 static ssize_t objects_show(struct kmem_cache *s, char *buf) 4676 static ssize_t objects_show(struct kmem_cache *s, char *buf)
4676 { 4677 {
4677 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS); 4678 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4678 } 4679 }
4679 SLAB_ATTR_RO(objects); 4680 SLAB_ATTR_RO(objects);
4680 4681
4681 static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) 4682 static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4682 { 4683 {
4683 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS); 4684 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4684 } 4685 }
4685 SLAB_ATTR_RO(objects_partial); 4686 SLAB_ATTR_RO(objects_partial);
4686 4687
4687 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) 4688 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4688 { 4689 {
4689 int objects = 0; 4690 int objects = 0;
4690 int pages = 0; 4691 int pages = 0;
4691 int cpu; 4692 int cpu;
4692 int len; 4693 int len;
4693 4694
4694 for_each_online_cpu(cpu) { 4695 for_each_online_cpu(cpu) {
4695 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial; 4696 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4696 4697
4697 if (page) { 4698 if (page) {
4698 pages += page->pages; 4699 pages += page->pages;
4699 objects += page->pobjects; 4700 objects += page->pobjects;
4700 } 4701 }
4701 } 4702 }
4702 4703
4703 len = sprintf(buf, "%d(%d)", objects, pages); 4704 len = sprintf(buf, "%d(%d)", objects, pages);
4704 4705
4705 #ifdef CONFIG_SMP 4706 #ifdef CONFIG_SMP
4706 for_each_online_cpu(cpu) { 4707 for_each_online_cpu(cpu) {
4707 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial; 4708 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4708 4709
4709 if (page && len < PAGE_SIZE - 20) 4710 if (page && len < PAGE_SIZE - 20)
4710 len += sprintf(buf + len, " C%d=%d(%d)", cpu, 4711 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4711 page->pobjects, page->pages); 4712 page->pobjects, page->pages);
4712 } 4713 }
4713 #endif 4714 #endif
4714 return len + sprintf(buf + len, "\n"); 4715 return len + sprintf(buf + len, "\n");
4715 } 4716 }
4716 SLAB_ATTR_RO(slabs_cpu_partial); 4717 SLAB_ATTR_RO(slabs_cpu_partial);
4717 4718
4718 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 4719 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4719 { 4720 {
4720 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 4721 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4721 } 4722 }
4722 4723
4723 static ssize_t reclaim_account_store(struct kmem_cache *s, 4724 static ssize_t reclaim_account_store(struct kmem_cache *s,
4724 const char *buf, size_t length) 4725 const char *buf, size_t length)
4725 { 4726 {
4726 s->flags &= ~SLAB_RECLAIM_ACCOUNT; 4727 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4727 if (buf[0] == '1') 4728 if (buf[0] == '1')
4728 s->flags |= SLAB_RECLAIM_ACCOUNT; 4729 s->flags |= SLAB_RECLAIM_ACCOUNT;
4729 return length; 4730 return length;
4730 } 4731 }
4731 SLAB_ATTR(reclaim_account); 4732 SLAB_ATTR(reclaim_account);
4732 4733
4733 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) 4734 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4734 { 4735 {
4735 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); 4736 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4736 } 4737 }
4737 SLAB_ATTR_RO(hwcache_align); 4738 SLAB_ATTR_RO(hwcache_align);
4738 4739
4739 #ifdef CONFIG_ZONE_DMA 4740 #ifdef CONFIG_ZONE_DMA
4740 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) 4741 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4741 { 4742 {
4742 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); 4743 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4743 } 4744 }
4744 SLAB_ATTR_RO(cache_dma); 4745 SLAB_ATTR_RO(cache_dma);
4745 #endif 4746 #endif
4746 4747
4747 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) 4748 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4748 { 4749 {
4749 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); 4750 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4750 } 4751 }
4751 SLAB_ATTR_RO(destroy_by_rcu); 4752 SLAB_ATTR_RO(destroy_by_rcu);
4752 4753
4753 static ssize_t reserved_show(struct kmem_cache *s, char *buf) 4754 static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4754 { 4755 {
4755 return sprintf(buf, "%d\n", s->reserved); 4756 return sprintf(buf, "%d\n", s->reserved);
4756 } 4757 }
4757 SLAB_ATTR_RO(reserved); 4758 SLAB_ATTR_RO(reserved);
4758 4759
4759 #ifdef CONFIG_SLUB_DEBUG 4760 #ifdef CONFIG_SLUB_DEBUG
4760 static ssize_t slabs_show(struct kmem_cache *s, char *buf) 4761 static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4761 { 4762 {
4762 return show_slab_objects(s, buf, SO_ALL); 4763 return show_slab_objects(s, buf, SO_ALL);
4763 } 4764 }
4764 SLAB_ATTR_RO(slabs); 4765 SLAB_ATTR_RO(slabs);
4765 4766
4766 static ssize_t total_objects_show(struct kmem_cache *s, char *buf) 4767 static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4767 { 4768 {
4768 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); 4769 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4769 } 4770 }
4770 SLAB_ATTR_RO(total_objects); 4771 SLAB_ATTR_RO(total_objects);
4771 4772
4772 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) 4773 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4773 { 4774 {
4774 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); 4775 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4775 } 4776 }
4776 4777
4777 static ssize_t sanity_checks_store(struct kmem_cache *s, 4778 static ssize_t sanity_checks_store(struct kmem_cache *s,
4778 const char *buf, size_t length) 4779 const char *buf, size_t length)
4779 { 4780 {
4780 s->flags &= ~SLAB_DEBUG_FREE; 4781 s->flags &= ~SLAB_DEBUG_FREE;
4781 if (buf[0] == '1') { 4782 if (buf[0] == '1') {
4782 s->flags &= ~__CMPXCHG_DOUBLE; 4783 s->flags &= ~__CMPXCHG_DOUBLE;
4783 s->flags |= SLAB_DEBUG_FREE; 4784 s->flags |= SLAB_DEBUG_FREE;
4784 } 4785 }
4785 return length; 4786 return length;
4786 } 4787 }
4787 SLAB_ATTR(sanity_checks); 4788 SLAB_ATTR(sanity_checks);
4788 4789
4789 static ssize_t trace_show(struct kmem_cache *s, char *buf) 4790 static ssize_t trace_show(struct kmem_cache *s, char *buf)
4790 { 4791 {
4791 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); 4792 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4792 } 4793 }
4793 4794
4794 static ssize_t trace_store(struct kmem_cache *s, const char *buf, 4795 static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4795 size_t length) 4796 size_t length)
4796 { 4797 {
4797 s->flags &= ~SLAB_TRACE; 4798 s->flags &= ~SLAB_TRACE;
4798 if (buf[0] == '1') { 4799 if (buf[0] == '1') {
4799 s->flags &= ~__CMPXCHG_DOUBLE; 4800 s->flags &= ~__CMPXCHG_DOUBLE;
4800 s->flags |= SLAB_TRACE; 4801 s->flags |= SLAB_TRACE;
4801 } 4802 }
4802 return length; 4803 return length;
4803 } 4804 }
4804 SLAB_ATTR(trace); 4805 SLAB_ATTR(trace);
4805 4806
4806 static ssize_t red_zone_show(struct kmem_cache *s, char *buf) 4807 static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4807 { 4808 {
4808 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); 4809 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4809 } 4810 }
4810 4811
4811 static ssize_t red_zone_store(struct kmem_cache *s, 4812 static ssize_t red_zone_store(struct kmem_cache *s,
4812 const char *buf, size_t length) 4813 const char *buf, size_t length)
4813 { 4814 {
4814 if (any_slab_objects(s)) 4815 if (any_slab_objects(s))
4815 return -EBUSY; 4816 return -EBUSY;
4816 4817
4817 s->flags &= ~SLAB_RED_ZONE; 4818 s->flags &= ~SLAB_RED_ZONE;
4818 if (buf[0] == '1') { 4819 if (buf[0] == '1') {
4819 s->flags &= ~__CMPXCHG_DOUBLE; 4820 s->flags &= ~__CMPXCHG_DOUBLE;
4820 s->flags |= SLAB_RED_ZONE; 4821 s->flags |= SLAB_RED_ZONE;
4821 } 4822 }
4822 calculate_sizes(s, -1); 4823 calculate_sizes(s, -1);
4823 return length; 4824 return length;
4824 } 4825 }
4825 SLAB_ATTR(red_zone); 4826 SLAB_ATTR(red_zone);
4826 4827
4827 static ssize_t poison_show(struct kmem_cache *s, char *buf) 4828 static ssize_t poison_show(struct kmem_cache *s, char *buf)
4828 { 4829 {
4829 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON)); 4830 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4830 } 4831 }
4831 4832
4832 static ssize_t poison_store(struct kmem_cache *s, 4833 static ssize_t poison_store(struct kmem_cache *s,
4833 const char *buf, size_t length) 4834 const char *buf, size_t length)
4834 { 4835 {
4835 if (any_slab_objects(s)) 4836 if (any_slab_objects(s))
4836 return -EBUSY; 4837 return -EBUSY;
4837 4838
4838 s->flags &= ~SLAB_POISON; 4839 s->flags &= ~SLAB_POISON;
4839 if (buf[0] == '1') { 4840 if (buf[0] == '1') {
4840 s->flags &= ~__CMPXCHG_DOUBLE; 4841 s->flags &= ~__CMPXCHG_DOUBLE;
4841 s->flags |= SLAB_POISON; 4842 s->flags |= SLAB_POISON;
4842 } 4843 }
4843 calculate_sizes(s, -1); 4844 calculate_sizes(s, -1);
4844 return length; 4845 return length;
4845 } 4846 }
4846 SLAB_ATTR(poison); 4847 SLAB_ATTR(poison);
4847 4848
4848 static ssize_t store_user_show(struct kmem_cache *s, char *buf) 4849 static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4849 { 4850 {
4850 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); 4851 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4851 } 4852 }
4852 4853
4853 static ssize_t store_user_store(struct kmem_cache *s, 4854 static ssize_t store_user_store(struct kmem_cache *s,
4854 const char *buf, size_t length) 4855 const char *buf, size_t length)
4855 { 4856 {
4856 if (any_slab_objects(s)) 4857 if (any_slab_objects(s))
4857 return -EBUSY; 4858 return -EBUSY;
4858 4859
4859 s->flags &= ~SLAB_STORE_USER; 4860 s->flags &= ~SLAB_STORE_USER;
4860 if (buf[0] == '1') { 4861 if (buf[0] == '1') {
4861 s->flags &= ~__CMPXCHG_DOUBLE; 4862 s->flags &= ~__CMPXCHG_DOUBLE;
4862 s->flags |= SLAB_STORE_USER; 4863 s->flags |= SLAB_STORE_USER;
4863 } 4864 }
4864 calculate_sizes(s, -1); 4865 calculate_sizes(s, -1);
4865 return length; 4866 return length;
4866 } 4867 }
4867 SLAB_ATTR(store_user); 4868 SLAB_ATTR(store_user);
4868 4869
4869 static ssize_t validate_show(struct kmem_cache *s, char *buf) 4870 static ssize_t validate_show(struct kmem_cache *s, char *buf)
4870 { 4871 {
4871 return 0; 4872 return 0;
4872 } 4873 }
4873 4874
4874 static ssize_t validate_store(struct kmem_cache *s, 4875 static ssize_t validate_store(struct kmem_cache *s,
4875 const char *buf, size_t length) 4876 const char *buf, size_t length)
4876 { 4877 {
4877 int ret = -EINVAL; 4878 int ret = -EINVAL;
4878 4879
4879 if (buf[0] == '1') { 4880 if (buf[0] == '1') {
4880 ret = validate_slab_cache(s); 4881 ret = validate_slab_cache(s);
4881 if (ret >= 0) 4882 if (ret >= 0)
4882 ret = length; 4883 ret = length;
4883 } 4884 }
4884 return ret; 4885 return ret;
4885 } 4886 }
4886 SLAB_ATTR(validate); 4887 SLAB_ATTR(validate);
4887 4888
4888 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) 4889 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4889 { 4890 {
4890 if (!(s->flags & SLAB_STORE_USER)) 4891 if (!(s->flags & SLAB_STORE_USER))
4891 return -ENOSYS; 4892 return -ENOSYS;
4892 return list_locations(s, buf, TRACK_ALLOC); 4893 return list_locations(s, buf, TRACK_ALLOC);
4893 } 4894 }
4894 SLAB_ATTR_RO(alloc_calls); 4895 SLAB_ATTR_RO(alloc_calls);
4895 4896
4896 static ssize_t free_calls_show(struct kmem_cache *s, char *buf) 4897 static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4897 { 4898 {
4898 if (!(s->flags & SLAB_STORE_USER)) 4899 if (!(s->flags & SLAB_STORE_USER))
4899 return -ENOSYS; 4900 return -ENOSYS;
4900 return list_locations(s, buf, TRACK_FREE); 4901 return list_locations(s, buf, TRACK_FREE);
4901 } 4902 }
4902 SLAB_ATTR_RO(free_calls); 4903 SLAB_ATTR_RO(free_calls);
4903 #endif /* CONFIG_SLUB_DEBUG */ 4904 #endif /* CONFIG_SLUB_DEBUG */
4904 4905
4905 #ifdef CONFIG_FAILSLAB 4906 #ifdef CONFIG_FAILSLAB
4906 static ssize_t failslab_show(struct kmem_cache *s, char *buf) 4907 static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4907 { 4908 {
4908 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); 4909 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4909 } 4910 }
4910 4911
4911 static ssize_t failslab_store(struct kmem_cache *s, const char *buf, 4912 static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4912 size_t length) 4913 size_t length)
4913 { 4914 {
4914 s->flags &= ~SLAB_FAILSLAB; 4915 s->flags &= ~SLAB_FAILSLAB;
4915 if (buf[0] == '1') 4916 if (buf[0] == '1')
4916 s->flags |= SLAB_FAILSLAB; 4917 s->flags |= SLAB_FAILSLAB;
4917 return length; 4918 return length;
4918 } 4919 }
4919 SLAB_ATTR(failslab); 4920 SLAB_ATTR(failslab);
4920 #endif 4921 #endif
4921 4922
4922 static ssize_t shrink_show(struct kmem_cache *s, char *buf) 4923 static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4923 { 4924 {
4924 return 0; 4925 return 0;
4925 } 4926 }
4926 4927
4927 static ssize_t shrink_store(struct kmem_cache *s, 4928 static ssize_t shrink_store(struct kmem_cache *s,
4928 const char *buf, size_t length) 4929 const char *buf, size_t length)
4929 { 4930 {
4930 if (buf[0] == '1') { 4931 if (buf[0] == '1') {
4931 int rc = kmem_cache_shrink(s); 4932 int rc = kmem_cache_shrink(s);
4932 4933
4933 if (rc) 4934 if (rc)
4934 return rc; 4935 return rc;
4935 } else 4936 } else
4936 return -EINVAL; 4937 return -EINVAL;
4937 return length; 4938 return length;
4938 } 4939 }
4939 SLAB_ATTR(shrink); 4940 SLAB_ATTR(shrink);
4940 4941
4941 #ifdef CONFIG_NUMA 4942 #ifdef CONFIG_NUMA
4942 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) 4943 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4943 { 4944 {
4944 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10); 4945 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4945 } 4946 }
4946 4947
4947 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, 4948 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4948 const char *buf, size_t length) 4949 const char *buf, size_t length)
4949 { 4950 {
4950 unsigned long ratio; 4951 unsigned long ratio;
4951 int err; 4952 int err;
4952 4953
4953 err = strict_strtoul(buf, 10, &ratio); 4954 err = strict_strtoul(buf, 10, &ratio);
4954 if (err) 4955 if (err)
4955 return err; 4956 return err;
4956 4957
4957 if (ratio <= 100) 4958 if (ratio <= 100)
4958 s->remote_node_defrag_ratio = ratio * 10; 4959 s->remote_node_defrag_ratio = ratio * 10;
4959 4960
4960 return length; 4961 return length;
4961 } 4962 }
4962 SLAB_ATTR(remote_node_defrag_ratio); 4963 SLAB_ATTR(remote_node_defrag_ratio);
4963 #endif 4964 #endif
4964 4965
4965 #ifdef CONFIG_SLUB_STATS 4966 #ifdef CONFIG_SLUB_STATS
4966 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) 4967 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4967 { 4968 {
4968 unsigned long sum = 0; 4969 unsigned long sum = 0;
4969 int cpu; 4970 int cpu;
4970 int len; 4971 int len;
4971 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); 4972 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4972 4973
4973 if (!data) 4974 if (!data)
4974 return -ENOMEM; 4975 return -ENOMEM;
4975 4976
4976 for_each_online_cpu(cpu) { 4977 for_each_online_cpu(cpu) {
4977 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; 4978 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4978 4979
4979 data[cpu] = x; 4980 data[cpu] = x;
4980 sum += x; 4981 sum += x;
4981 } 4982 }
4982 4983
4983 len = sprintf(buf, "%lu", sum); 4984 len = sprintf(buf, "%lu", sum);
4984 4985
4985 #ifdef CONFIG_SMP 4986 #ifdef CONFIG_SMP
4986 for_each_online_cpu(cpu) { 4987 for_each_online_cpu(cpu) {
4987 if (data[cpu] && len < PAGE_SIZE - 20) 4988 if (data[cpu] && len < PAGE_SIZE - 20)
4988 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]); 4989 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4989 } 4990 }
4990 #endif 4991 #endif
4991 kfree(data); 4992 kfree(data);
4992 return len + sprintf(buf + len, "\n"); 4993 return len + sprintf(buf + len, "\n");
4993 } 4994 }
4994 4995
4995 static void clear_stat(struct kmem_cache *s, enum stat_item si) 4996 static void clear_stat(struct kmem_cache *s, enum stat_item si)
4996 { 4997 {
4997 int cpu; 4998 int cpu;
4998 4999
4999 for_each_online_cpu(cpu) 5000 for_each_online_cpu(cpu)
5000 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; 5001 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5001 } 5002 }
5002 5003
5003 #define STAT_ATTR(si, text) \ 5004 #define STAT_ATTR(si, text) \
5004 static ssize_t text##_show(struct kmem_cache *s, char *buf) \ 5005 static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5005 { \ 5006 { \
5006 return show_stat(s, buf, si); \ 5007 return show_stat(s, buf, si); \
5007 } \ 5008 } \
5008 static ssize_t text##_store(struct kmem_cache *s, \ 5009 static ssize_t text##_store(struct kmem_cache *s, \
5009 const char *buf, size_t length) \ 5010 const char *buf, size_t length) \
5010 { \ 5011 { \
5011 if (buf[0] != '0') \ 5012 if (buf[0] != '0') \
5012 return -EINVAL; \ 5013 return -EINVAL; \
5013 clear_stat(s, si); \ 5014 clear_stat(s, si); \
5014 return length; \ 5015 return length; \
5015 } \ 5016 } \
5016 SLAB_ATTR(text); \ 5017 SLAB_ATTR(text); \
5017 5018
5018 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 5019 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5019 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 5020 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5020 STAT_ATTR(FREE_FASTPATH, free_fastpath); 5021 STAT_ATTR(FREE_FASTPATH, free_fastpath);
5021 STAT_ATTR(FREE_SLOWPATH, free_slowpath); 5022 STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5022 STAT_ATTR(FREE_FROZEN, free_frozen); 5023 STAT_ATTR(FREE_FROZEN, free_frozen);
5023 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial); 5024 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5024 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); 5025 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5025 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); 5026 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5026 STAT_ATTR(ALLOC_SLAB, alloc_slab); 5027 STAT_ATTR(ALLOC_SLAB, alloc_slab);
5027 STAT_ATTR(ALLOC_REFILL, alloc_refill); 5028 STAT_ATTR(ALLOC_REFILL, alloc_refill);
5028 STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch); 5029 STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5029 STAT_ATTR(FREE_SLAB, free_slab); 5030 STAT_ATTR(FREE_SLAB, free_slab);
5030 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); 5031 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5031 STAT_ATTR(DEACTIVATE_FULL, deactivate_full); 5032 STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5032 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); 5033 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5033 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); 5034 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5034 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); 5035 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5035 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); 5036 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5036 STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); 5037 STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5037 STAT_ATTR(ORDER_FALLBACK, order_fallback); 5038 STAT_ATTR(ORDER_FALLBACK, order_fallback);
5038 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); 5039 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5039 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); 5040 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5040 STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); 5041 STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5041 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); 5042 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5042 #endif 5043 #endif
5043 5044
5044 static struct attribute *slab_attrs[] = { 5045 static struct attribute *slab_attrs[] = {
5045 &slab_size_attr.attr, 5046 &slab_size_attr.attr,
5046 &object_size_attr.attr, 5047 &object_size_attr.attr,
5047 &objs_per_slab_attr.attr, 5048 &objs_per_slab_attr.attr,
5048 &order_attr.attr, 5049 &order_attr.attr,
5049 &min_partial_attr.attr, 5050 &min_partial_attr.attr,
5050 &cpu_partial_attr.attr, 5051 &cpu_partial_attr.attr,
5051 &objects_attr.attr, 5052 &objects_attr.attr,
5052 &objects_partial_attr.attr, 5053 &objects_partial_attr.attr,
5053 &partial_attr.attr, 5054 &partial_attr.attr,
5054 &cpu_slabs_attr.attr, 5055 &cpu_slabs_attr.attr,
5055 &ctor_attr.attr, 5056 &ctor_attr.attr,
5056 &aliases_attr.attr, 5057 &aliases_attr.attr,
5057 &align_attr.attr, 5058 &align_attr.attr,
5058 &hwcache_align_attr.attr, 5059 &hwcache_align_attr.attr,
5059 &reclaim_account_attr.attr, 5060 &reclaim_account_attr.attr,
5060 &destroy_by_rcu_attr.attr, 5061 &destroy_by_rcu_attr.attr,
5061 &shrink_attr.attr, 5062 &shrink_attr.attr,
5062 &reserved_attr.attr, 5063 &reserved_attr.attr,
5063 &slabs_cpu_partial_attr.attr, 5064 &slabs_cpu_partial_attr.attr,
5064 #ifdef CONFIG_SLUB_DEBUG 5065 #ifdef CONFIG_SLUB_DEBUG
5065 &total_objects_attr.attr, 5066 &total_objects_attr.attr,
5066 &slabs_attr.attr, 5067 &slabs_attr.attr,
5067 &sanity_checks_attr.attr, 5068 &sanity_checks_attr.attr,
5068 &trace_attr.attr, 5069 &trace_attr.attr,
5069 &red_zone_attr.attr, 5070 &red_zone_attr.attr,
5070 &poison_attr.attr, 5071 &poison_attr.attr,
5071 &store_user_attr.attr, 5072 &store_user_attr.attr,
5072 &validate_attr.attr, 5073 &validate_attr.attr,
5073 &alloc_calls_attr.attr, 5074 &alloc_calls_attr.attr,
5074 &free_calls_attr.attr, 5075 &free_calls_attr.attr,
5075 #endif 5076 #endif
5076 #ifdef CONFIG_ZONE_DMA 5077 #ifdef CONFIG_ZONE_DMA
5077 &cache_dma_attr.attr, 5078 &cache_dma_attr.attr,
5078 #endif 5079 #endif
5079 #ifdef CONFIG_NUMA 5080 #ifdef CONFIG_NUMA
5080 &remote_node_defrag_ratio_attr.attr, 5081 &remote_node_defrag_ratio_attr.attr,
5081 #endif 5082 #endif
5082 #ifdef CONFIG_SLUB_STATS 5083 #ifdef CONFIG_SLUB_STATS
5083 &alloc_fastpath_attr.attr, 5084 &alloc_fastpath_attr.attr,
5084 &alloc_slowpath_attr.attr, 5085 &alloc_slowpath_attr.attr,
5085 &free_fastpath_attr.attr, 5086 &free_fastpath_attr.attr,
5086 &free_slowpath_attr.attr, 5087 &free_slowpath_attr.attr,
5087 &free_frozen_attr.attr, 5088 &free_frozen_attr.attr,
5088 &free_add_partial_attr.attr, 5089 &free_add_partial_attr.attr,
5089 &free_remove_partial_attr.attr, 5090 &free_remove_partial_attr.attr,
5090 &alloc_from_partial_attr.attr, 5091 &alloc_from_partial_attr.attr,
5091 &alloc_slab_attr.attr, 5092 &alloc_slab_attr.attr,
5092 &alloc_refill_attr.attr, 5093 &alloc_refill_attr.attr,
5093 &alloc_node_mismatch_attr.attr, 5094 &alloc_node_mismatch_attr.attr,
5094 &free_slab_attr.attr, 5095 &free_slab_attr.attr,
5095 &cpuslab_flush_attr.attr, 5096 &cpuslab_flush_attr.attr,
5096 &deactivate_full_attr.attr, 5097 &deactivate_full_attr.attr,
5097 &deactivate_empty_attr.attr, 5098 &deactivate_empty_attr.attr,
5098 &deactivate_to_head_attr.attr, 5099 &deactivate_to_head_attr.attr,
5099 &deactivate_to_tail_attr.attr, 5100 &deactivate_to_tail_attr.attr,
5100 &deactivate_remote_frees_attr.attr, 5101 &deactivate_remote_frees_attr.attr,
5101 &deactivate_bypass_attr.attr, 5102 &deactivate_bypass_attr.attr,
5102 &order_fallback_attr.attr, 5103 &order_fallback_attr.attr,
5103 &cmpxchg_double_fail_attr.attr, 5104 &cmpxchg_double_fail_attr.attr,
5104 &cmpxchg_double_cpu_fail_attr.attr, 5105 &cmpxchg_double_cpu_fail_attr.attr,
5105 &cpu_partial_alloc_attr.attr, 5106 &cpu_partial_alloc_attr.attr,
5106 &cpu_partial_free_attr.attr, 5107 &cpu_partial_free_attr.attr,
5107 #endif 5108 #endif
5108 #ifdef CONFIG_FAILSLAB 5109 #ifdef CONFIG_FAILSLAB
5109 &failslab_attr.attr, 5110 &failslab_attr.attr,
5110 #endif 5111 #endif
5111 5112
5112 NULL 5113 NULL
5113 }; 5114 };
5114 5115
5115 static struct attribute_group slab_attr_group = { 5116 static struct attribute_group slab_attr_group = {
5116 .attrs = slab_attrs, 5117 .attrs = slab_attrs,
5117 }; 5118 };
5118 5119
5119 static ssize_t slab_attr_show(struct kobject *kobj, 5120 static ssize_t slab_attr_show(struct kobject *kobj,
5120 struct attribute *attr, 5121 struct attribute *attr,
5121 char *buf) 5122 char *buf)
5122 { 5123 {
5123 struct slab_attribute *attribute; 5124 struct slab_attribute *attribute;
5124 struct kmem_cache *s; 5125 struct kmem_cache *s;
5125 int err; 5126 int err;
5126 5127
5127 attribute = to_slab_attr(attr); 5128 attribute = to_slab_attr(attr);
5128 s = to_slab(kobj); 5129 s = to_slab(kobj);
5129 5130
5130 if (!attribute->show) 5131 if (!attribute->show)
5131 return -EIO; 5132 return -EIO;
5132 5133
5133 err = attribute->show(s, buf); 5134 err = attribute->show(s, buf);
5134 5135
5135 return err; 5136 return err;
5136 } 5137 }
5137 5138
5138 static ssize_t slab_attr_store(struct kobject *kobj, 5139 static ssize_t slab_attr_store(struct kobject *kobj,
5139 struct attribute *attr, 5140 struct attribute *attr,
5140 const char *buf, size_t len) 5141 const char *buf, size_t len)
5141 { 5142 {
5142 struct slab_attribute *attribute; 5143 struct slab_attribute *attribute;
5143 struct kmem_cache *s; 5144 struct kmem_cache *s;
5144 int err; 5145 int err;
5145 5146
5146 attribute = to_slab_attr(attr); 5147 attribute = to_slab_attr(attr);
5147 s = to_slab(kobj); 5148 s = to_slab(kobj);
5148 5149
5149 if (!attribute->store) 5150 if (!attribute->store)
5150 return -EIO; 5151 return -EIO;
5151 5152
5152 err = attribute->store(s, buf, len); 5153 err = attribute->store(s, buf, len);
5153 5154
5154 return err; 5155 return err;
5155 } 5156 }
5156 5157
5157 static void kmem_cache_release(struct kobject *kobj) 5158 static void kmem_cache_release(struct kobject *kobj)
5158 { 5159 {
5159 struct kmem_cache *s = to_slab(kobj); 5160 struct kmem_cache *s = to_slab(kobj);
5160 5161
5161 kfree(s->name); 5162 kfree(s->name);
5162 kfree(s); 5163 kfree(s);
5163 } 5164 }
5164 5165
5165 static const struct sysfs_ops slab_sysfs_ops = { 5166 static const struct sysfs_ops slab_sysfs_ops = {
5166 .show = slab_attr_show, 5167 .show = slab_attr_show,
5167 .store = slab_attr_store, 5168 .store = slab_attr_store,
5168 }; 5169 };
5169 5170
5170 static struct kobj_type slab_ktype = { 5171 static struct kobj_type slab_ktype = {
5171 .sysfs_ops = &slab_sysfs_ops, 5172 .sysfs_ops = &slab_sysfs_ops,
5172 .release = kmem_cache_release 5173 .release = kmem_cache_release
5173 }; 5174 };
5174 5175
5175 static int uevent_filter(struct kset *kset, struct kobject *kobj) 5176 static int uevent_filter(struct kset *kset, struct kobject *kobj)
5176 { 5177 {
5177 struct kobj_type *ktype = get_ktype(kobj); 5178 struct kobj_type *ktype = get_ktype(kobj);
5178 5179
5179 if (ktype == &slab_ktype) 5180 if (ktype == &slab_ktype)
5180 return 1; 5181 return 1;
5181 return 0; 5182 return 0;
5182 } 5183 }
5183 5184
5184 static const struct kset_uevent_ops slab_uevent_ops = { 5185 static const struct kset_uevent_ops slab_uevent_ops = {
5185 .filter = uevent_filter, 5186 .filter = uevent_filter,
5186 }; 5187 };
5187 5188
5188 static struct kset *slab_kset; 5189 static struct kset *slab_kset;
5189 5190
5190 #define ID_STR_LENGTH 64 5191 #define ID_STR_LENGTH 64
5191 5192
5192 /* Create a unique string id for a slab cache: 5193 /* Create a unique string id for a slab cache:
5193 * 5194 *
5194 * Format :[flags-]size 5195 * Format :[flags-]size
5195 */ 5196 */
5196 static char *create_unique_id(struct kmem_cache *s) 5197 static char *create_unique_id(struct kmem_cache *s)
5197 { 5198 {
5198 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); 5199 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5199 char *p = name; 5200 char *p = name;
5200 5201
5201 BUG_ON(!name); 5202 BUG_ON(!name);
5202 5203
5203 *p++ = ':'; 5204 *p++ = ':';
5204 /* 5205 /*
5205 * First flags affecting slabcache operations. We will only 5206 * First flags affecting slabcache operations. We will only
5206 * get here for aliasable slabs so we do not need to support 5207 * get here for aliasable slabs so we do not need to support
5207 * too many flags. The flags here must cover all flags that 5208 * too many flags. The flags here must cover all flags that
5208 * are matched during merging to guarantee that the id is 5209 * are matched during merging to guarantee that the id is
5209 * unique. 5210 * unique.
5210 */ 5211 */
5211 if (s->flags & SLAB_CACHE_DMA) 5212 if (s->flags & SLAB_CACHE_DMA)
5212 *p++ = 'd'; 5213 *p++ = 'd';
5213 if (s->flags & SLAB_RECLAIM_ACCOUNT) 5214 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5214 *p++ = 'a'; 5215 *p++ = 'a';
5215 if (s->flags & SLAB_DEBUG_FREE) 5216 if (s->flags & SLAB_DEBUG_FREE)
5216 *p++ = 'F'; 5217 *p++ = 'F';
5217 if (!(s->flags & SLAB_NOTRACK)) 5218 if (!(s->flags & SLAB_NOTRACK))
5218 *p++ = 't'; 5219 *p++ = 't';
5219 if (p != name + 1) 5220 if (p != name + 1)
5220 *p++ = '-'; 5221 *p++ = '-';
5221 p += sprintf(p, "%07d", s->size); 5222 p += sprintf(p, "%07d", s->size);
5222 BUG_ON(p > name + ID_STR_LENGTH - 1); 5223 BUG_ON(p > name + ID_STR_LENGTH - 1);
5223 return name; 5224 return name;
5224 } 5225 }
5225 5226
5226 static int sysfs_slab_add(struct kmem_cache *s) 5227 static int sysfs_slab_add(struct kmem_cache *s)
5227 { 5228 {
5228 int err; 5229 int err;
5229 const char *name; 5230 const char *name;
5230 int unmergeable; 5231 int unmergeable;
5231 5232
5232 if (slab_state < SYSFS) 5233 if (slab_state < SYSFS)
5233 /* Defer until later */ 5234 /* Defer until later */
5234 return 0; 5235 return 0;
5235 5236
5236 unmergeable = slab_unmergeable(s); 5237 unmergeable = slab_unmergeable(s);
5237 if (unmergeable) { 5238 if (unmergeable) {
5238 /* 5239 /*
5239 * Slabcache can never be merged so we can use the name proper. 5240 * Slabcache can never be merged so we can use the name proper.
5240 * This is typically the case for debug situations. In that 5241 * This is typically the case for debug situations. In that
5241 * case we can catch duplicate names easily. 5242 * case we can catch duplicate names easily.
5242 */ 5243 */
5243 sysfs_remove_link(&slab_kset->kobj, s->name); 5244 sysfs_remove_link(&slab_kset->kobj, s->name);
5244 name = s->name; 5245 name = s->name;
5245 } else { 5246 } else {
5246 /* 5247 /*
5247 * Create a unique name for the slab as a target 5248 * Create a unique name for the slab as a target
5248 * for the symlinks. 5249 * for the symlinks.
5249 */ 5250 */
5250 name = create_unique_id(s); 5251 name = create_unique_id(s);
5251 } 5252 }
5252 5253
5253 s->kobj.kset = slab_kset; 5254 s->kobj.kset = slab_kset;
5254 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name); 5255 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5255 if (err) { 5256 if (err) {
5256 kobject_put(&s->kobj); 5257 kobject_put(&s->kobj);
5257 return err; 5258 return err;
5258 } 5259 }
5259 5260
5260 err = sysfs_create_group(&s->kobj, &slab_attr_group); 5261 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5261 if (err) { 5262 if (err) {
5262 kobject_del(&s->kobj); 5263 kobject_del(&s->kobj);
5263 kobject_put(&s->kobj); 5264 kobject_put(&s->kobj);
5264 return err; 5265 return err;
5265 } 5266 }
5266 kobject_uevent(&s->kobj, KOBJ_ADD); 5267 kobject_uevent(&s->kobj, KOBJ_ADD);
5267 if (!unmergeable) { 5268 if (!unmergeable) {
5268 /* Setup first alias */ 5269 /* Setup first alias */
5269 sysfs_slab_alias(s, s->name); 5270 sysfs_slab_alias(s, s->name);
5270 kfree(name); 5271 kfree(name);
5271 } 5272 }
5272 return 0; 5273 return 0;
5273 } 5274 }
5274 5275
5275 static void sysfs_slab_remove(struct kmem_cache *s) 5276 static void sysfs_slab_remove(struct kmem_cache *s)
5276 { 5277 {
5277 if (slab_state < SYSFS) 5278 if (slab_state < SYSFS)
5278 /* 5279 /*
5279 * Sysfs has not been setup yet so no need to remove the 5280 * Sysfs has not been setup yet so no need to remove the
5280 * cache from sysfs. 5281 * cache from sysfs.
5281 */ 5282 */
5282 return; 5283 return;
5283 5284
5284 kobject_uevent(&s->kobj, KOBJ_REMOVE); 5285 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5285 kobject_del(&s->kobj); 5286 kobject_del(&s->kobj);
5286 kobject_put(&s->kobj); 5287 kobject_put(&s->kobj);
5287 } 5288 }
5288 5289
5289 /* 5290 /*
5290 * Need to buffer aliases during bootup until sysfs becomes 5291 * Need to buffer aliases during bootup until sysfs becomes
5291 * available lest we lose that information. 5292 * available lest we lose that information.
5292 */ 5293 */
5293 struct saved_alias { 5294 struct saved_alias {
5294 struct kmem_cache *s; 5295 struct kmem_cache *s;
5295 const char *name; 5296 const char *name;
5296 struct saved_alias *next; 5297 struct saved_alias *next;
5297 }; 5298 };
5298 5299
5299 static struct saved_alias *alias_list; 5300 static struct saved_alias *alias_list;
5300 5301
5301 static int sysfs_slab_alias(struct kmem_cache *s, const char *name) 5302 static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5302 { 5303 {
5303 struct saved_alias *al; 5304 struct saved_alias *al;
5304 5305
5305 if (slab_state == SYSFS) { 5306 if (slab_state == SYSFS) {
5306 /* 5307 /*
5307 * If we have a leftover link then remove it. 5308 * If we have a leftover link then remove it.
5308 */ 5309 */
5309 sysfs_remove_link(&slab_kset->kobj, name); 5310 sysfs_remove_link(&slab_kset->kobj, name);
5310 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name); 5311 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5311 } 5312 }
5312 5313
5313 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); 5314 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5314 if (!al) 5315 if (!al)
5315 return -ENOMEM; 5316 return -ENOMEM;
5316 5317
5317 al->s = s; 5318 al->s = s;
5318 al->name = name; 5319 al->name = name;
5319 al->next = alias_list; 5320 al->next = alias_list;
5320 alias_list = al; 5321 alias_list = al;
5321 return 0; 5322 return 0;
5322 } 5323 }
5323 5324
5324 static int __init slab_sysfs_init(void) 5325 static int __init slab_sysfs_init(void)
5325 { 5326 {
5326 struct kmem_cache *s; 5327 struct kmem_cache *s;
5327 int err; 5328 int err;
5328 5329
5329 down_write(&slub_lock); 5330 down_write(&slub_lock);
5330 5331
5331 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 5332 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5332 if (!slab_kset) { 5333 if (!slab_kset) {
5333 up_write(&slub_lock); 5334 up_write(&slub_lock);
5334 printk(KERN_ERR "Cannot register slab subsystem.\n"); 5335 printk(KERN_ERR "Cannot register slab subsystem.\n");
5335 return -ENOSYS; 5336 return -ENOSYS;
5336 } 5337 }
5337 5338
5338 slab_state = SYSFS; 5339 slab_state = SYSFS;
5339 5340
5340 list_for_each_entry(s, &slab_caches, list) { 5341 list_for_each_entry(s, &slab_caches, list) {
5341 err = sysfs_slab_add(s); 5342 err = sysfs_slab_add(s);
5342 if (err) 5343 if (err)
5343 printk(KERN_ERR "SLUB: Unable to add boot slab %s" 5344 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5344 " to sysfs\n", s->name); 5345 " to sysfs\n", s->name);
5345 } 5346 }
5346 5347
5347 while (alias_list) { 5348 while (alias_list) {
5348 struct saved_alias *al = alias_list; 5349 struct saved_alias *al = alias_list;
5349 5350
5350 alias_list = alias_list->next; 5351 alias_list = alias_list->next;
5351 err = sysfs_slab_alias(al->s, al->name); 5352 err = sysfs_slab_alias(al->s, al->name);
5352 if (err) 5353 if (err)
5353 printk(KERN_ERR "SLUB: Unable to add boot slab alias" 5354 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5354 " %s to sysfs\n", s->name); 5355 " %s to sysfs\n", s->name);
5355 kfree(al); 5356 kfree(al);
5356 } 5357 }
5357 5358
5358 up_write(&slub_lock); 5359 up_write(&slub_lock);
5359 resiliency_test(); 5360 resiliency_test();
5360 return 0; 5361 return 0;
5361 } 5362 }
5362 5363
5363 __initcall(slab_sysfs_init); 5364 __initcall(slab_sysfs_init);
5364 #endif /* CONFIG_SYSFS */ 5365 #endif /* CONFIG_SYSFS */
5365 5366
5366 /* 5367 /*
5367 * The /proc/slabinfo ABI 5368 * The /proc/slabinfo ABI
5368 */ 5369 */
5369 #ifdef CONFIG_SLABINFO 5370 #ifdef CONFIG_SLABINFO
5370 static void print_slabinfo_header(struct seq_file *m) 5371 static void print_slabinfo_header(struct seq_file *m)
5371 { 5372 {
5372 seq_puts(m, "slabinfo - version: 2.1\n"); 5373 seq_puts(m, "slabinfo - version: 2.1\n");
5373 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 5374 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
5374 "<objperslab> <pagesperslab>"); 5375 "<objperslab> <pagesperslab>");
5375 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 5376 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5376 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 5377 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
5377 seq_putc(m, '\n'); 5378 seq_putc(m, '\n');
5378 } 5379 }
5379 5380
5380 static void *s_start(struct seq_file *m, loff_t *pos) 5381 static void *s_start(struct seq_file *m, loff_t *pos)
5381 { 5382 {
5382 loff_t n = *pos; 5383 loff_t n = *pos;
5383 5384
5384 down_read(&slub_lock); 5385 down_read(&slub_lock);
5385 if (!n) 5386 if (!n)
5386 print_slabinfo_header(m); 5387 print_slabinfo_header(m);
5387 5388
5388 return seq_list_start(&slab_caches, *pos); 5389 return seq_list_start(&slab_caches, *pos);
5389 } 5390 }
5390 5391
5391 static void *s_next(struct seq_file *m, void *p, loff_t *pos) 5392 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5392 { 5393 {
5393 return seq_list_next(p, &slab_caches, pos); 5394 return seq_list_next(p, &slab_caches, pos);
5394 } 5395 }
5395 5396
5396 static void s_stop(struct seq_file *m, void *p) 5397 static void s_stop(struct seq_file *m, void *p)
5397 { 5398 {
5398 up_read(&slub_lock); 5399 up_read(&slub_lock);
5399 } 5400 }
5400 5401
5401 static int s_show(struct seq_file *m, void *p) 5402 static int s_show(struct seq_file *m, void *p)
5402 { 5403 {
5403 unsigned long nr_partials = 0; 5404 unsigned long nr_partials = 0;
5404 unsigned long nr_slabs = 0; 5405 unsigned long nr_slabs = 0;
5405 unsigned long nr_inuse = 0; 5406 unsigned long nr_inuse = 0;
5406 unsigned long nr_objs = 0; 5407 unsigned long nr_objs = 0;
5407 unsigned long nr_free = 0; 5408 unsigned long nr_free = 0;
5408 struct kmem_cache *s; 5409 struct kmem_cache *s;
5409 int node; 5410 int node;
5410 5411
5411 s = list_entry(p, struct kmem_cache, list); 5412 s = list_entry(p, struct kmem_cache, list);
5412 5413
5413 for_each_online_node(node) { 5414 for_each_online_node(node) {
5414 struct kmem_cache_node *n = get_node(s, node); 5415 struct kmem_cache_node *n = get_node(s, node);
5415 5416
5416 if (!n) 5417 if (!n)
5417 continue; 5418 continue;
5418 5419
5419 nr_partials += n->nr_partial; 5420 nr_partials += n->nr_partial;
5420 nr_slabs += atomic_long_read(&n->nr_slabs); 5421 nr_slabs += atomic_long_read(&n->nr_slabs);
5421 nr_objs += atomic_long_read(&n->total_objects); 5422 nr_objs += atomic_long_read(&n->total_objects);
5422 nr_free += count_partial(n, count_free); 5423 nr_free += count_partial(n, count_free);
5423 } 5424 }
5424 5425
5425 nr_inuse = nr_objs - nr_free; 5426 nr_inuse = nr_objs - nr_free;
5426 5427
5427 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, 5428 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
5428 nr_objs, s->size, oo_objects(s->oo), 5429 nr_objs, s->size, oo_objects(s->oo),
5429 (1 << oo_order(s->oo))); 5430 (1 << oo_order(s->oo)));
5430 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); 5431 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
5431 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, 5432 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
5432 0UL); 5433 0UL);
5433 seq_putc(m, '\n'); 5434 seq_putc(m, '\n');
5434 return 0; 5435 return 0;
5435 } 5436 }
5436 5437
5437 static const struct seq_operations slabinfo_op = { 5438 static const struct seq_operations slabinfo_op = {
5438 .start = s_start, 5439 .start = s_start,
5439 .next = s_next, 5440 .next = s_next,
5440 .stop = s_stop, 5441 .stop = s_stop,
5441 .show = s_show, 5442 .show = s_show,
5442 }; 5443 };
5443 5444
5444 static int slabinfo_open(struct inode *inode, struct file *file) 5445 static int slabinfo_open(struct inode *inode, struct file *file)
5445 { 5446 {
5446 return seq_open(file, &slabinfo_op); 5447 return seq_open(file, &slabinfo_op);
5447 } 5448 }
5448 5449
5449 static const struct file_operations proc_slabinfo_operations = { 5450 static const struct file_operations proc_slabinfo_operations = {
5450 .open = slabinfo_open, 5451 .open = slabinfo_open,
5451 .read = seq_read, 5452 .read = seq_read,
5452 .llseek = seq_lseek, 5453 .llseek = seq_lseek,
5453 .release = seq_release, 5454 .release = seq_release,
5454 }; 5455 };
5455 5456
5456 static int __init slab_proc_init(void) 5457 static int __init slab_proc_init(void)
5457 { 5458 {
5458 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); 5459 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5459 return 0; 5460 return 0;
5460 } 5461 }
5461 module_init(slab_proc_init); 5462 module_init(slab_proc_init);
5462 #endif /* CONFIG_SLABINFO */ 5463 #endif /* CONFIG_SLABINFO */