Commit af4f8ba31a4e328677bec493ceeaf112ca193b65

Authored by Linus Torvalds

Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

Pull slab updates from Pekka Enberg:
 "Mainly a bunch of SLUB fixes from Joonsoo Kim"

* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
  slub: use __SetPageSlab function to set PG_slab flag
  slub: fix a memory leak in get_partial_node()
  slub: remove unused argument of init_kmem_cache_node()
  slub: fix a possible memory leak
  Documentations: Fix slabinfo.c directory in vm/slub.txt
  slub: fix incorrect return type of get_any_partial()

Showing 2 changed files Inline Diff

Documentation/vm/slub.txt
1 Short users guide for SLUB 1 Short users guide for SLUB
2 -------------------------- 2 --------------------------
3 3
4 The basic philosophy of SLUB is very different from SLAB. SLAB 4 The basic philosophy of SLUB is very different from SLAB. SLAB
5 requires rebuilding the kernel to activate debug options for all 5 requires rebuilding the kernel to activate debug options for all
6 slab caches. SLUB always includes full debugging but it is off by default. 6 slab caches. SLUB always includes full debugging but it is off by default.
7 SLUB can enable debugging only for selected slabs in order to avoid 7 SLUB can enable debugging only for selected slabs in order to avoid
8 an impact on overall system performance which may make a bug more 8 an impact on overall system performance which may make a bug more
9 difficult to find. 9 difficult to find.
10 10
11 In order to switch debugging on one can add a option "slub_debug" 11 In order to switch debugging on one can add a option "slub_debug"
12 to the kernel command line. That will enable full debugging for 12 to the kernel command line. That will enable full debugging for
13 all slabs. 13 all slabs.
14 14
15 Typically one would then use the "slabinfo" command to get statistical 15 Typically one would then use the "slabinfo" command to get statistical
16 data and perform operation on the slabs. By default slabinfo only lists 16 data and perform operation on the slabs. By default slabinfo only lists
17 slabs that have data in them. See "slabinfo -h" for more options when 17 slabs that have data in them. See "slabinfo -h" for more options when
18 running the command. slabinfo can be compiled with 18 running the command. slabinfo can be compiled with
19 19
20 gcc -o slabinfo tools/slub/slabinfo.c 20 gcc -o slabinfo tools/vm/slabinfo.c
21 21
22 Some of the modes of operation of slabinfo require that slub debugging 22 Some of the modes of operation of slabinfo require that slub debugging
23 be enabled on the command line. F.e. no tracking information will be 23 be enabled on the command line. F.e. no tracking information will be
24 available without debugging on and validation can only partially 24 available without debugging on and validation can only partially
25 be performed if debugging was not switched on. 25 be performed if debugging was not switched on.
26 26
27 Some more sophisticated uses of slub_debug: 27 Some more sophisticated uses of slub_debug:
28 ------------------------------------------- 28 -------------------------------------------
29 29
30 Parameters may be given to slub_debug. If none is specified then full 30 Parameters may be given to slub_debug. If none is specified then full
31 debugging is enabled. Format: 31 debugging is enabled. Format:
32 32
33 slub_debug=<Debug-Options> Enable options for all slabs 33 slub_debug=<Debug-Options> Enable options for all slabs
34 slub_debug=<Debug-Options>,<slab name> 34 slub_debug=<Debug-Options>,<slab name>
35 Enable options only for select slabs 35 Enable options only for select slabs
36 36
37 Possible debug options are 37 Possible debug options are
38 F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry 38 F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry
39 SLAB legacy issues) 39 SLAB legacy issues)
40 Z Red zoning 40 Z Red zoning
41 P Poisoning (object and padding) 41 P Poisoning (object and padding)
42 U User tracking (free and alloc) 42 U User tracking (free and alloc)
43 T Trace (please only use on single slabs) 43 T Trace (please only use on single slabs)
44 A Toggle failslab filter mark for the cache 44 A Toggle failslab filter mark for the cache
45 O Switch debugging off for caches that would have 45 O Switch debugging off for caches that would have
46 caused higher minimum slab orders 46 caused higher minimum slab orders
47 - Switch all debugging off (useful if the kernel is 47 - Switch all debugging off (useful if the kernel is
48 configured with CONFIG_SLUB_DEBUG_ON) 48 configured with CONFIG_SLUB_DEBUG_ON)
49 49
50 F.e. in order to boot just with sanity checks and red zoning one would specify: 50 F.e. in order to boot just with sanity checks and red zoning one would specify:
51 51
52 slub_debug=FZ 52 slub_debug=FZ
53 53
54 Trying to find an issue in the dentry cache? Try 54 Trying to find an issue in the dentry cache? Try
55 55
56 slub_debug=,dentry 56 slub_debug=,dentry
57 57
58 to only enable debugging on the dentry cache. 58 to only enable debugging on the dentry cache.
59 59
60 Red zoning and tracking may realign the slab. We can just apply sanity checks 60 Red zoning and tracking may realign the slab. We can just apply sanity checks
61 to the dentry cache with 61 to the dentry cache with
62 62
63 slub_debug=F,dentry 63 slub_debug=F,dentry
64 64
65 Debugging options may require the minimum possible slab order to increase as 65 Debugging options may require the minimum possible slab order to increase as
66 a result of storing the metadata (for example, caches with PAGE_SIZE object 66 a result of storing the metadata (for example, caches with PAGE_SIZE object
67 sizes). This has a higher liklihood of resulting in slab allocation errors 67 sizes). This has a higher liklihood of resulting in slab allocation errors
68 in low memory situations or if there's high fragmentation of memory. To 68 in low memory situations or if there's high fragmentation of memory. To
69 switch off debugging for such caches by default, use 69 switch off debugging for such caches by default, use
70 70
71 slub_debug=O 71 slub_debug=O
72 72
73 In case you forgot to enable debugging on the kernel command line: It is 73 In case you forgot to enable debugging on the kernel command line: It is
74 possible to enable debugging manually when the kernel is up. Look at the 74 possible to enable debugging manually when the kernel is up. Look at the
75 contents of: 75 contents of:
76 76
77 /sys/kernel/slab/<slab name>/ 77 /sys/kernel/slab/<slab name>/
78 78
79 Look at the writable files. Writing 1 to them will enable the 79 Look at the writable files. Writing 1 to them will enable the
80 corresponding debug option. All options can be set on a slab that does 80 corresponding debug option. All options can be set on a slab that does
81 not contain objects. If the slab already contains objects then sanity checks 81 not contain objects. If the slab already contains objects then sanity checks
82 and tracing may only be enabled. The other options may cause the realignment 82 and tracing may only be enabled. The other options may cause the realignment
83 of objects. 83 of objects.
84 84
85 Careful with tracing: It may spew out lots of information and never stop if 85 Careful with tracing: It may spew out lots of information and never stop if
86 used on the wrong slab. 86 used on the wrong slab.
87 87
88 Slab merging 88 Slab merging
89 ------------ 89 ------------
90 90
91 If no debug options are specified then SLUB may merge similar slabs together 91 If no debug options are specified then SLUB may merge similar slabs together
92 in order to reduce overhead and increase cache hotness of objects. 92 in order to reduce overhead and increase cache hotness of objects.
93 slabinfo -a displays which slabs were merged together. 93 slabinfo -a displays which slabs were merged together.
94 94
95 Slab validation 95 Slab validation
96 --------------- 96 ---------------
97 97
98 SLUB can validate all object if the kernel was booted with slub_debug. In 98 SLUB can validate all object if the kernel was booted with slub_debug. In
99 order to do so you must have the slabinfo tool. Then you can do 99 order to do so you must have the slabinfo tool. Then you can do
100 100
101 slabinfo -v 101 slabinfo -v
102 102
103 which will test all objects. Output will be generated to the syslog. 103 which will test all objects. Output will be generated to the syslog.
104 104
105 This also works in a more limited way if boot was without slab debug. 105 This also works in a more limited way if boot was without slab debug.
106 In that case slabinfo -v simply tests all reachable objects. Usually 106 In that case slabinfo -v simply tests all reachable objects. Usually
107 these are in the cpu slabs and the partial slabs. Full slabs are not 107 these are in the cpu slabs and the partial slabs. Full slabs are not
108 tracked by SLUB in a non debug situation. 108 tracked by SLUB in a non debug situation.
109 109
110 Getting more performance 110 Getting more performance
111 ------------------------ 111 ------------------------
112 112
113 To some degree SLUB's performance is limited by the need to take the 113 To some degree SLUB's performance is limited by the need to take the
114 list_lock once in a while to deal with partial slabs. That overhead is 114 list_lock once in a while to deal with partial slabs. That overhead is
115 governed by the order of the allocation for each slab. The allocations 115 governed by the order of the allocation for each slab. The allocations
116 can be influenced by kernel parameters: 116 can be influenced by kernel parameters:
117 117
118 slub_min_objects=x (default 4) 118 slub_min_objects=x (default 4)
119 slub_min_order=x (default 0) 119 slub_min_order=x (default 0)
120 slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER)) 120 slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
121 121
122 slub_min_objects allows to specify how many objects must at least fit 122 slub_min_objects allows to specify how many objects must at least fit
123 into one slab in order for the allocation order to be acceptable. 123 into one slab in order for the allocation order to be acceptable.
124 In general slub will be able to perform this number of allocations 124 In general slub will be able to perform this number of allocations
125 on a slab without consulting centralized resources (list_lock) where 125 on a slab without consulting centralized resources (list_lock) where
126 contention may occur. 126 contention may occur.
127 127
128 slub_min_order specifies a minim order of slabs. A similar effect like 128 slub_min_order specifies a minim order of slabs. A similar effect like
129 slub_min_objects. 129 slub_min_objects.
130 130
131 slub_max_order specified the order at which slub_min_objects should no 131 slub_max_order specified the order at which slub_min_objects should no
132 longer be checked. This is useful to avoid SLUB trying to generate 132 longer be checked. This is useful to avoid SLUB trying to generate
133 super large order pages to fit slub_min_objects of a slab cache with 133 super large order pages to fit slub_min_objects of a slab cache with
134 large object sizes into one high order page. Setting command line 134 large object sizes into one high order page. Setting command line
135 parameter debug_guardpage_minorder=N (N > 0), forces setting 135 parameter debug_guardpage_minorder=N (N > 0), forces setting
136 slub_max_order to 0, what cause minimum possible order of slabs 136 slub_max_order to 0, what cause minimum possible order of slabs
137 allocation. 137 allocation.
138 138
139 SLUB Debug output 139 SLUB Debug output
140 ----------------- 140 -----------------
141 141
142 Here is a sample of slub debug output: 142 Here is a sample of slub debug output:
143 143
144 ==================================================================== 144 ====================================================================
145 BUG kmalloc-8: Redzone overwritten 145 BUG kmalloc-8: Redzone overwritten
146 -------------------------------------------------------------------- 146 --------------------------------------------------------------------
147 147
148 INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc 148 INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
149 INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58 149 INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
150 INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58 150 INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
151 INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554 151 INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
152 152
153 Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ 153 Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
154 Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005 154 Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005
155 Redzone 0xc90f6d28: 00 cc cc cc . 155 Redzone 0xc90f6d28: 00 cc cc cc .
156 Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ 156 Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
157 157
158 [<c010523d>] dump_trace+0x63/0x1eb 158 [<c010523d>] dump_trace+0x63/0x1eb
159 [<c01053df>] show_trace_log_lvl+0x1a/0x2f 159 [<c01053df>] show_trace_log_lvl+0x1a/0x2f
160 [<c010601d>] show_trace+0x12/0x14 160 [<c010601d>] show_trace+0x12/0x14
161 [<c0106035>] dump_stack+0x16/0x18 161 [<c0106035>] dump_stack+0x16/0x18
162 [<c017e0fa>] object_err+0x143/0x14b 162 [<c017e0fa>] object_err+0x143/0x14b
163 [<c017e2cc>] check_object+0x66/0x234 163 [<c017e2cc>] check_object+0x66/0x234
164 [<c017eb43>] __slab_free+0x239/0x384 164 [<c017eb43>] __slab_free+0x239/0x384
165 [<c017f446>] kfree+0xa6/0xc6 165 [<c017f446>] kfree+0xa6/0xc6
166 [<c02e2335>] get_modalias+0xb9/0xf5 166 [<c02e2335>] get_modalias+0xb9/0xf5
167 [<c02e23b7>] dmi_dev_uevent+0x27/0x3c 167 [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
168 [<c027866a>] dev_uevent+0x1ad/0x1da 168 [<c027866a>] dev_uevent+0x1ad/0x1da
169 [<c0205024>] kobject_uevent_env+0x20a/0x45b 169 [<c0205024>] kobject_uevent_env+0x20a/0x45b
170 [<c020527f>] kobject_uevent+0xa/0xf 170 [<c020527f>] kobject_uevent+0xa/0xf
171 [<c02779f1>] store_uevent+0x4f/0x58 171 [<c02779f1>] store_uevent+0x4f/0x58
172 [<c027758e>] dev_attr_store+0x29/0x2f 172 [<c027758e>] dev_attr_store+0x29/0x2f
173 [<c01bec4f>] sysfs_write_file+0x16e/0x19c 173 [<c01bec4f>] sysfs_write_file+0x16e/0x19c
174 [<c0183ba7>] vfs_write+0xd1/0x15a 174 [<c0183ba7>] vfs_write+0xd1/0x15a
175 [<c01841d7>] sys_write+0x3d/0x72 175 [<c01841d7>] sys_write+0x3d/0x72
176 [<c0104112>] sysenter_past_esp+0x5f/0x99 176 [<c0104112>] sysenter_past_esp+0x5f/0x99
177 [<b7f7b410>] 0xb7f7b410 177 [<b7f7b410>] 0xb7f7b410
178 ======================= 178 =======================
179 179
180 FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc 180 FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
181 181
182 If SLUB encounters a corrupted object (full detection requires the kernel 182 If SLUB encounters a corrupted object (full detection requires the kernel
183 to be booted with slub_debug) then the following output will be dumped 183 to be booted with slub_debug) then the following output will be dumped
184 into the syslog: 184 into the syslog:
185 185
186 1. Description of the problem encountered 186 1. Description of the problem encountered
187 187
188 This will be a message in the system log starting with 188 This will be a message in the system log starting with
189 189
190 =============================================== 190 ===============================================
191 BUG <slab cache affected>: <What went wrong> 191 BUG <slab cache affected>: <What went wrong>
192 ----------------------------------------------- 192 -----------------------------------------------
193 193
194 INFO: <corruption start>-<corruption_end> <more info> 194 INFO: <corruption start>-<corruption_end> <more info>
195 INFO: Slab <address> <slab information> 195 INFO: Slab <address> <slab information>
196 INFO: Object <address> <object information> 196 INFO: Object <address> <object information>
197 INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by 197 INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
198 cpu> pid=<pid of the process> 198 cpu> pid=<pid of the process>
199 INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu> 199 INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
200 pid=<pid of the process> 200 pid=<pid of the process>
201 201
202 (Object allocation / free information is only available if SLAB_STORE_USER is 202 (Object allocation / free information is only available if SLAB_STORE_USER is
203 set for the slab. slub_debug sets that option) 203 set for the slab. slub_debug sets that option)
204 204
205 2. The object contents if an object was involved. 205 2. The object contents if an object was involved.
206 206
207 Various types of lines can follow the BUG SLUB line: 207 Various types of lines can follow the BUG SLUB line:
208 208
209 Bytes b4 <address> : <bytes> 209 Bytes b4 <address> : <bytes>
210 Shows a few bytes before the object where the problem was detected. 210 Shows a few bytes before the object where the problem was detected.
211 Can be useful if the corruption does not stop with the start of the 211 Can be useful if the corruption does not stop with the start of the
212 object. 212 object.
213 213
214 Object <address> : <bytes> 214 Object <address> : <bytes>
215 The bytes of the object. If the object is inactive then the bytes 215 The bytes of the object. If the object is inactive then the bytes
216 typically contain poison values. Any non-poison value shows a 216 typically contain poison values. Any non-poison value shows a
217 corruption by a write after free. 217 corruption by a write after free.
218 218
219 Redzone <address> : <bytes> 219 Redzone <address> : <bytes>
220 The Redzone following the object. The Redzone is used to detect 220 The Redzone following the object. The Redzone is used to detect
221 writes after the object. All bytes should always have the same 221 writes after the object. All bytes should always have the same
222 value. If there is any deviation then it is due to a write after 222 value. If there is any deviation then it is due to a write after
223 the object boundary. 223 the object boundary.
224 224
225 (Redzone information is only available if SLAB_RED_ZONE is set. 225 (Redzone information is only available if SLAB_RED_ZONE is set.
226 slub_debug sets that option) 226 slub_debug sets that option)
227 227
228 Padding <address> : <bytes> 228 Padding <address> : <bytes>
229 Unused data to fill up the space in order to get the next object 229 Unused data to fill up the space in order to get the next object
230 properly aligned. In the debug case we make sure that there are 230 properly aligned. In the debug case we make sure that there are
231 at least 4 bytes of padding. This allows the detection of writes 231 at least 4 bytes of padding. This allows the detection of writes
232 before the object. 232 before the object.
233 233
234 3. A stackdump 234 3. A stackdump
235 235
236 The stackdump describes the location where the error was detected. The cause 236 The stackdump describes the location where the error was detected. The cause
237 of the corruption is may be more likely found by looking at the function that 237 of the corruption is may be more likely found by looking at the function that
238 allocated or freed the object. 238 allocated or freed the object.
239 239
240 4. Report on how the problem was dealt with in order to ensure the continued 240 4. Report on how the problem was dealt with in order to ensure the continued
241 operation of the system. 241 operation of the system.
242 242
243 These are messages in the system log beginning with 243 These are messages in the system log beginning with
244 244
245 FIX <slab cache affected>: <corrective action taken> 245 FIX <slab cache affected>: <corrective action taken>
246 246
247 In the above sample SLUB found that the Redzone of an active object has 247 In the above sample SLUB found that the Redzone of an active object has
248 been overwritten. Here a string of 8 characters was written into a slab that 248 been overwritten. Here a string of 8 characters was written into a slab that
249 has the length of 8 characters. However, a 8 character string needs a 249 has the length of 8 characters. However, a 8 character string needs a
250 terminating 0. That zero has overwritten the first byte of the Redzone field. 250 terminating 0. That zero has overwritten the first byte of the Redzone field.
251 After reporting the details of the issue encountered the FIX SLUB message 251 After reporting the details of the issue encountered the FIX SLUB message
252 tells us that SLUB has restored the Redzone to its proper value and then 252 tells us that SLUB has restored the Redzone to its proper value and then
253 system operations continue. 253 system operations continue.
254 254
255 Emergency operations: 255 Emergency operations:
256 --------------------- 256 ---------------------
257 257
258 Minimal debugging (sanity checks alone) can be enabled by booting with 258 Minimal debugging (sanity checks alone) can be enabled by booting with
259 259
260 slub_debug=F 260 slub_debug=F
261 261
262 This will be generally be enough to enable the resiliency features of slub 262 This will be generally be enough to enable the resiliency features of slub
263 which will keep the system running even if a bad kernel component will 263 which will keep the system running even if a bad kernel component will
264 keep corrupting objects. This may be important for production systems. 264 keep corrupting objects. This may be important for production systems.
265 Performance will be impacted by the sanity checks and there will be a 265 Performance will be impacted by the sanity checks and there will be a
266 continual stream of error messages to the syslog but no additional memory 266 continual stream of error messages to the syslog but no additional memory
267 will be used (unlike full debugging). 267 will be used (unlike full debugging).
268 268
269 No guarantees. The kernel component still needs to be fixed. Performance 269 No guarantees. The kernel component still needs to be fixed. Performance
270 may be optimized further by locating the slab that experiences corruption 270 may be optimized further by locating the slab that experiences corruption
271 and enabling debugging only for that cache 271 and enabling debugging only for that cache
272 272
273 I.e. 273 I.e.
274 274
275 slub_debug=F,dentry 275 slub_debug=F,dentry
276 276
277 If the corruption occurs by writing after the end of the object then it 277 If the corruption occurs by writing after the end of the object then it
278 may be advisable to enable a Redzone to avoid corrupting the beginning 278 may be advisable to enable a Redzone to avoid corrupting the beginning
279 of other objects. 279 of other objects.
280 280
281 slub_debug=FZ,dentry 281 slub_debug=FZ,dentry
282 282
283 Christoph Lameter, May 30, 2007 283 Christoph Lameter, May 30, 2007
284 284
1 /* 1 /*
2 * SLUB: A slab allocator that limits cache line use instead of queuing 2 * SLUB: A slab allocator that limits cache line use instead of queuing
3 * objects in per cpu and per node lists. 3 * objects in per cpu and per node lists.
4 * 4 *
5 * The allocator synchronizes using per slab locks or atomic operatios 5 * The allocator synchronizes using per slab locks or atomic operatios
6 * and only uses a centralized lock to manage a pool of partial slabs. 6 * and only uses a centralized lock to manage a pool of partial slabs.
7 * 7 *
8 * (C) 2007 SGI, Christoph Lameter 8 * (C) 2007 SGI, Christoph Lameter
9 * (C) 2011 Linux Foundation, Christoph Lameter 9 * (C) 2011 Linux Foundation, Christoph Lameter
10 */ 10 */
11 11
12 #include <linux/mm.h> 12 #include <linux/mm.h>
13 #include <linux/swap.h> /* struct reclaim_state */ 13 #include <linux/swap.h> /* struct reclaim_state */
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/bit_spinlock.h> 15 #include <linux/bit_spinlock.h>
16 #include <linux/interrupt.h> 16 #include <linux/interrupt.h>
17 #include <linux/bitops.h> 17 #include <linux/bitops.h>
18 #include <linux/slab.h> 18 #include <linux/slab.h>
19 #include <linux/proc_fs.h> 19 #include <linux/proc_fs.h>
20 #include <linux/seq_file.h> 20 #include <linux/seq_file.h>
21 #include <linux/kmemcheck.h> 21 #include <linux/kmemcheck.h>
22 #include <linux/cpu.h> 22 #include <linux/cpu.h>
23 #include <linux/cpuset.h> 23 #include <linux/cpuset.h>
24 #include <linux/mempolicy.h> 24 #include <linux/mempolicy.h>
25 #include <linux/ctype.h> 25 #include <linux/ctype.h>
26 #include <linux/debugobjects.h> 26 #include <linux/debugobjects.h>
27 #include <linux/kallsyms.h> 27 #include <linux/kallsyms.h>
28 #include <linux/memory.h> 28 #include <linux/memory.h>
29 #include <linux/math64.h> 29 #include <linux/math64.h>
30 #include <linux/fault-inject.h> 30 #include <linux/fault-inject.h>
31 #include <linux/stacktrace.h> 31 #include <linux/stacktrace.h>
32 #include <linux/prefetch.h> 32 #include <linux/prefetch.h>
33 33
34 #include <trace/events/kmem.h> 34 #include <trace/events/kmem.h>
35 35
36 /* 36 /*
37 * Lock order: 37 * Lock order:
38 * 1. slub_lock (Global Semaphore) 38 * 1. slub_lock (Global Semaphore)
39 * 2. node->list_lock 39 * 2. node->list_lock
40 * 3. slab_lock(page) (Only on some arches and for debugging) 40 * 3. slab_lock(page) (Only on some arches and for debugging)
41 * 41 *
42 * slub_lock 42 * slub_lock
43 * 43 *
44 * The role of the slub_lock is to protect the list of all the slabs 44 * The role of the slub_lock is to protect the list of all the slabs
45 * and to synchronize major metadata changes to slab cache structures. 45 * and to synchronize major metadata changes to slab cache structures.
46 * 46 *
47 * The slab_lock is only used for debugging and on arches that do not 47 * The slab_lock is only used for debugging and on arches that do not
48 * have the ability to do a cmpxchg_double. It only protects the second 48 * have the ability to do a cmpxchg_double. It only protects the second
49 * double word in the page struct. Meaning 49 * double word in the page struct. Meaning
50 * A. page->freelist -> List of object free in a page 50 * A. page->freelist -> List of object free in a page
51 * B. page->counters -> Counters of objects 51 * B. page->counters -> Counters of objects
52 * C. page->frozen -> frozen state 52 * C. page->frozen -> frozen state
53 * 53 *
54 * If a slab is frozen then it is exempt from list management. It is not 54 * If a slab is frozen then it is exempt from list management. It is not
55 * on any list. The processor that froze the slab is the one who can 55 * on any list. The processor that froze the slab is the one who can
56 * perform list operations on the page. Other processors may put objects 56 * perform list operations on the page. Other processors may put objects
57 * onto the freelist but the processor that froze the slab is the only 57 * onto the freelist but the processor that froze the slab is the only
58 * one that can retrieve the objects from the page's freelist. 58 * one that can retrieve the objects from the page's freelist.
59 * 59 *
60 * The list_lock protects the partial and full list on each node and 60 * The list_lock protects the partial and full list on each node and
61 * the partial slab counter. If taken then no new slabs may be added or 61 * the partial slab counter. If taken then no new slabs may be added or
62 * removed from the lists nor make the number of partial slabs be modified. 62 * removed from the lists nor make the number of partial slabs be modified.
63 * (Note that the total number of slabs is an atomic value that may be 63 * (Note that the total number of slabs is an atomic value that may be
64 * modified without taking the list lock). 64 * modified without taking the list lock).
65 * 65 *
66 * The list_lock is a centralized lock and thus we avoid taking it as 66 * The list_lock is a centralized lock and thus we avoid taking it as
67 * much as possible. As long as SLUB does not have to handle partial 67 * much as possible. As long as SLUB does not have to handle partial
68 * slabs, operations can continue without any centralized lock. F.e. 68 * slabs, operations can continue without any centralized lock. F.e.
69 * allocating a long series of objects that fill up slabs does not require 69 * allocating a long series of objects that fill up slabs does not require
70 * the list lock. 70 * the list lock.
71 * Interrupts are disabled during allocation and deallocation in order to 71 * Interrupts are disabled during allocation and deallocation in order to
72 * make the slab allocator safe to use in the context of an irq. In addition 72 * make the slab allocator safe to use in the context of an irq. In addition
73 * interrupts are disabled to ensure that the processor does not change 73 * interrupts are disabled to ensure that the processor does not change
74 * while handling per_cpu slabs, due to kernel preemption. 74 * while handling per_cpu slabs, due to kernel preemption.
75 * 75 *
76 * SLUB assigns one slab for allocation to each processor. 76 * SLUB assigns one slab for allocation to each processor.
77 * Allocations only occur from these slabs called cpu slabs. 77 * Allocations only occur from these slabs called cpu slabs.
78 * 78 *
79 * Slabs with free elements are kept on a partial list and during regular 79 * Slabs with free elements are kept on a partial list and during regular
80 * operations no list for full slabs is used. If an object in a full slab is 80 * operations no list for full slabs is used. If an object in a full slab is
81 * freed then the slab will show up again on the partial lists. 81 * freed then the slab will show up again on the partial lists.
82 * We track full slabs for debugging purposes though because otherwise we 82 * We track full slabs for debugging purposes though because otherwise we
83 * cannot scan all objects. 83 * cannot scan all objects.
84 * 84 *
85 * Slabs are freed when they become empty. Teardown and setup is 85 * Slabs are freed when they become empty. Teardown and setup is
86 * minimal so we rely on the page allocators per cpu caches for 86 * minimal so we rely on the page allocators per cpu caches for
87 * fast frees and allocs. 87 * fast frees and allocs.
88 * 88 *
89 * Overloading of page flags that are otherwise used for LRU management. 89 * Overloading of page flags that are otherwise used for LRU management.
90 * 90 *
91 * PageActive The slab is frozen and exempt from list processing. 91 * PageActive The slab is frozen and exempt from list processing.
92 * This means that the slab is dedicated to a purpose 92 * This means that the slab is dedicated to a purpose
93 * such as satisfying allocations for a specific 93 * such as satisfying allocations for a specific
94 * processor. Objects may be freed in the slab while 94 * processor. Objects may be freed in the slab while
95 * it is frozen but slab_free will then skip the usual 95 * it is frozen but slab_free will then skip the usual
96 * list operations. It is up to the processor holding 96 * list operations. It is up to the processor holding
97 * the slab to integrate the slab into the slab lists 97 * the slab to integrate the slab into the slab lists
98 * when the slab is no longer needed. 98 * when the slab is no longer needed.
99 * 99 *
100 * One use of this flag is to mark slabs that are 100 * One use of this flag is to mark slabs that are
101 * used for allocations. Then such a slab becomes a cpu 101 * used for allocations. Then such a slab becomes a cpu
102 * slab. The cpu slab may be equipped with an additional 102 * slab. The cpu slab may be equipped with an additional
103 * freelist that allows lockless access to 103 * freelist that allows lockless access to
104 * free objects in addition to the regular freelist 104 * free objects in addition to the regular freelist
105 * that requires the slab lock. 105 * that requires the slab lock.
106 * 106 *
107 * PageError Slab requires special handling due to debug 107 * PageError Slab requires special handling due to debug
108 * options set. This moves slab handling out of 108 * options set. This moves slab handling out of
109 * the fast path and disables lockless freelists. 109 * the fast path and disables lockless freelists.
110 */ 110 */
111 111
112 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 112 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
113 SLAB_TRACE | SLAB_DEBUG_FREE) 113 SLAB_TRACE | SLAB_DEBUG_FREE)
114 114
115 static inline int kmem_cache_debug(struct kmem_cache *s) 115 static inline int kmem_cache_debug(struct kmem_cache *s)
116 { 116 {
117 #ifdef CONFIG_SLUB_DEBUG 117 #ifdef CONFIG_SLUB_DEBUG
118 return unlikely(s->flags & SLAB_DEBUG_FLAGS); 118 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
119 #else 119 #else
120 return 0; 120 return 0;
121 #endif 121 #endif
122 } 122 }
123 123
124 /* 124 /*
125 * Issues still to be resolved: 125 * Issues still to be resolved:
126 * 126 *
127 * - Support PAGE_ALLOC_DEBUG. Should be easy to do. 127 * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
128 * 128 *
129 * - Variable sizing of the per node arrays 129 * - Variable sizing of the per node arrays
130 */ 130 */
131 131
132 /* Enable to test recovery from slab corruption on boot */ 132 /* Enable to test recovery from slab corruption on boot */
133 #undef SLUB_RESILIENCY_TEST 133 #undef SLUB_RESILIENCY_TEST
134 134
135 /* Enable to log cmpxchg failures */ 135 /* Enable to log cmpxchg failures */
136 #undef SLUB_DEBUG_CMPXCHG 136 #undef SLUB_DEBUG_CMPXCHG
137 137
138 /* 138 /*
139 * Mininum number of partial slabs. These will be left on the partial 139 * Mininum number of partial slabs. These will be left on the partial
140 * lists even if they are empty. kmem_cache_shrink may reclaim them. 140 * lists even if they are empty. kmem_cache_shrink may reclaim them.
141 */ 141 */
142 #define MIN_PARTIAL 5 142 #define MIN_PARTIAL 5
143 143
144 /* 144 /*
145 * Maximum number of desirable partial slabs. 145 * Maximum number of desirable partial slabs.
146 * The existence of more partial slabs makes kmem_cache_shrink 146 * The existence of more partial slabs makes kmem_cache_shrink
147 * sort the partial list by the number of objects in the. 147 * sort the partial list by the number of objects in the.
148 */ 148 */
149 #define MAX_PARTIAL 10 149 #define MAX_PARTIAL 10
150 150
151 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ 151 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
152 SLAB_POISON | SLAB_STORE_USER) 152 SLAB_POISON | SLAB_STORE_USER)
153 153
154 /* 154 /*
155 * Debugging flags that require metadata to be stored in the slab. These get 155 * Debugging flags that require metadata to be stored in the slab. These get
156 * disabled when slub_debug=O is used and a cache's min order increases with 156 * disabled when slub_debug=O is used and a cache's min order increases with
157 * metadata. 157 * metadata.
158 */ 158 */
159 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) 159 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
160 160
161 /* 161 /*
162 * Set of flags that will prevent slab merging 162 * Set of flags that will prevent slab merging
163 */ 163 */
164 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 164 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
165 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ 165 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
166 SLAB_FAILSLAB) 166 SLAB_FAILSLAB)
167 167
168 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 168 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
169 SLAB_CACHE_DMA | SLAB_NOTRACK) 169 SLAB_CACHE_DMA | SLAB_NOTRACK)
170 170
171 #define OO_SHIFT 16 171 #define OO_SHIFT 16
172 #define OO_MASK ((1 << OO_SHIFT) - 1) 172 #define OO_MASK ((1 << OO_SHIFT) - 1)
173 #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ 173 #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
174 174
175 /* Internal SLUB flags */ 175 /* Internal SLUB flags */
176 #define __OBJECT_POISON 0x80000000UL /* Poison object */ 176 #define __OBJECT_POISON 0x80000000UL /* Poison object */
177 #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ 177 #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
178 178
179 static int kmem_size = sizeof(struct kmem_cache); 179 static int kmem_size = sizeof(struct kmem_cache);
180 180
181 #ifdef CONFIG_SMP 181 #ifdef CONFIG_SMP
182 static struct notifier_block slab_notifier; 182 static struct notifier_block slab_notifier;
183 #endif 183 #endif
184 184
185 static enum { 185 static enum {
186 DOWN, /* No slab functionality available */ 186 DOWN, /* No slab functionality available */
187 PARTIAL, /* Kmem_cache_node works */ 187 PARTIAL, /* Kmem_cache_node works */
188 UP, /* Everything works but does not show up in sysfs */ 188 UP, /* Everything works but does not show up in sysfs */
189 SYSFS /* Sysfs up */ 189 SYSFS /* Sysfs up */
190 } slab_state = DOWN; 190 } slab_state = DOWN;
191 191
192 /* A list of all slab caches on the system */ 192 /* A list of all slab caches on the system */
193 static DECLARE_RWSEM(slub_lock); 193 static DECLARE_RWSEM(slub_lock);
194 static LIST_HEAD(slab_caches); 194 static LIST_HEAD(slab_caches);
195 195
196 /* 196 /*
197 * Tracking user of a slab. 197 * Tracking user of a slab.
198 */ 198 */
199 #define TRACK_ADDRS_COUNT 16 199 #define TRACK_ADDRS_COUNT 16
200 struct track { 200 struct track {
201 unsigned long addr; /* Called from address */ 201 unsigned long addr; /* Called from address */
202 #ifdef CONFIG_STACKTRACE 202 #ifdef CONFIG_STACKTRACE
203 unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ 203 unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
204 #endif 204 #endif
205 int cpu; /* Was running on cpu */ 205 int cpu; /* Was running on cpu */
206 int pid; /* Pid context */ 206 int pid; /* Pid context */
207 unsigned long when; /* When did the operation occur */ 207 unsigned long when; /* When did the operation occur */
208 }; 208 };
209 209
210 enum track_item { TRACK_ALLOC, TRACK_FREE }; 210 enum track_item { TRACK_ALLOC, TRACK_FREE };
211 211
212 #ifdef CONFIG_SYSFS 212 #ifdef CONFIG_SYSFS
213 static int sysfs_slab_add(struct kmem_cache *); 213 static int sysfs_slab_add(struct kmem_cache *);
214 static int sysfs_slab_alias(struct kmem_cache *, const char *); 214 static int sysfs_slab_alias(struct kmem_cache *, const char *);
215 static void sysfs_slab_remove(struct kmem_cache *); 215 static void sysfs_slab_remove(struct kmem_cache *);
216 216
217 #else 217 #else
218 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 218 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
219 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 219 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
220 { return 0; } 220 { return 0; }
221 static inline void sysfs_slab_remove(struct kmem_cache *s) 221 static inline void sysfs_slab_remove(struct kmem_cache *s)
222 { 222 {
223 kfree(s->name); 223 kfree(s->name);
224 kfree(s); 224 kfree(s);
225 } 225 }
226 226
227 #endif 227 #endif
228 228
229 static inline void stat(const struct kmem_cache *s, enum stat_item si) 229 static inline void stat(const struct kmem_cache *s, enum stat_item si)
230 { 230 {
231 #ifdef CONFIG_SLUB_STATS 231 #ifdef CONFIG_SLUB_STATS
232 __this_cpu_inc(s->cpu_slab->stat[si]); 232 __this_cpu_inc(s->cpu_slab->stat[si]);
233 #endif 233 #endif
234 } 234 }
235 235
236 /******************************************************************** 236 /********************************************************************
237 * Core slab cache functions 237 * Core slab cache functions
238 *******************************************************************/ 238 *******************************************************************/
239 239
240 int slab_is_available(void) 240 int slab_is_available(void)
241 { 241 {
242 return slab_state >= UP; 242 return slab_state >= UP;
243 } 243 }
244 244
245 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 245 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
246 { 246 {
247 return s->node[node]; 247 return s->node[node];
248 } 248 }
249 249
250 /* Verify that a pointer has an address that is valid within a slab page */ 250 /* Verify that a pointer has an address that is valid within a slab page */
251 static inline int check_valid_pointer(struct kmem_cache *s, 251 static inline int check_valid_pointer(struct kmem_cache *s,
252 struct page *page, const void *object) 252 struct page *page, const void *object)
253 { 253 {
254 void *base; 254 void *base;
255 255
256 if (!object) 256 if (!object)
257 return 1; 257 return 1;
258 258
259 base = page_address(page); 259 base = page_address(page);
260 if (object < base || object >= base + page->objects * s->size || 260 if (object < base || object >= base + page->objects * s->size ||
261 (object - base) % s->size) { 261 (object - base) % s->size) {
262 return 0; 262 return 0;
263 } 263 }
264 264
265 return 1; 265 return 1;
266 } 266 }
267 267
268 static inline void *get_freepointer(struct kmem_cache *s, void *object) 268 static inline void *get_freepointer(struct kmem_cache *s, void *object)
269 { 269 {
270 return *(void **)(object + s->offset); 270 return *(void **)(object + s->offset);
271 } 271 }
272 272
273 static void prefetch_freepointer(const struct kmem_cache *s, void *object) 273 static void prefetch_freepointer(const struct kmem_cache *s, void *object)
274 { 274 {
275 prefetch(object + s->offset); 275 prefetch(object + s->offset);
276 } 276 }
277 277
278 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) 278 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
279 { 279 {
280 void *p; 280 void *p;
281 281
282 #ifdef CONFIG_DEBUG_PAGEALLOC 282 #ifdef CONFIG_DEBUG_PAGEALLOC
283 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p)); 283 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
284 #else 284 #else
285 p = get_freepointer(s, object); 285 p = get_freepointer(s, object);
286 #endif 286 #endif
287 return p; 287 return p;
288 } 288 }
289 289
290 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) 290 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
291 { 291 {
292 *(void **)(object + s->offset) = fp; 292 *(void **)(object + s->offset) = fp;
293 } 293 }
294 294
295 /* Loop over all objects in a slab */ 295 /* Loop over all objects in a slab */
296 #define for_each_object(__p, __s, __addr, __objects) \ 296 #define for_each_object(__p, __s, __addr, __objects) \
297 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ 297 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
298 __p += (__s)->size) 298 __p += (__s)->size)
299 299
300 /* Determine object index from a given position */ 300 /* Determine object index from a given position */
301 static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 301 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
302 { 302 {
303 return (p - addr) / s->size; 303 return (p - addr) / s->size;
304 } 304 }
305 305
306 static inline size_t slab_ksize(const struct kmem_cache *s) 306 static inline size_t slab_ksize(const struct kmem_cache *s)
307 { 307 {
308 #ifdef CONFIG_SLUB_DEBUG 308 #ifdef CONFIG_SLUB_DEBUG
309 /* 309 /*
310 * Debugging requires use of the padding between object 310 * Debugging requires use of the padding between object
311 * and whatever may come after it. 311 * and whatever may come after it.
312 */ 312 */
313 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 313 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
314 return s->objsize; 314 return s->objsize;
315 315
316 #endif 316 #endif
317 /* 317 /*
318 * If we have the need to store the freelist pointer 318 * If we have the need to store the freelist pointer
319 * back there or track user information then we can 319 * back there or track user information then we can
320 * only use the space before that information. 320 * only use the space before that information.
321 */ 321 */
322 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 322 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
323 return s->inuse; 323 return s->inuse;
324 /* 324 /*
325 * Else we can use all the padding etc for the allocation 325 * Else we can use all the padding etc for the allocation
326 */ 326 */
327 return s->size; 327 return s->size;
328 } 328 }
329 329
330 static inline int order_objects(int order, unsigned long size, int reserved) 330 static inline int order_objects(int order, unsigned long size, int reserved)
331 { 331 {
332 return ((PAGE_SIZE << order) - reserved) / size; 332 return ((PAGE_SIZE << order) - reserved) / size;
333 } 333 }
334 334
335 static inline struct kmem_cache_order_objects oo_make(int order, 335 static inline struct kmem_cache_order_objects oo_make(int order,
336 unsigned long size, int reserved) 336 unsigned long size, int reserved)
337 { 337 {
338 struct kmem_cache_order_objects x = { 338 struct kmem_cache_order_objects x = {
339 (order << OO_SHIFT) + order_objects(order, size, reserved) 339 (order << OO_SHIFT) + order_objects(order, size, reserved)
340 }; 340 };
341 341
342 return x; 342 return x;
343 } 343 }
344 344
345 static inline int oo_order(struct kmem_cache_order_objects x) 345 static inline int oo_order(struct kmem_cache_order_objects x)
346 { 346 {
347 return x.x >> OO_SHIFT; 347 return x.x >> OO_SHIFT;
348 } 348 }
349 349
350 static inline int oo_objects(struct kmem_cache_order_objects x) 350 static inline int oo_objects(struct kmem_cache_order_objects x)
351 { 351 {
352 return x.x & OO_MASK; 352 return x.x & OO_MASK;
353 } 353 }
354 354
355 /* 355 /*
356 * Per slab locking using the pagelock 356 * Per slab locking using the pagelock
357 */ 357 */
358 static __always_inline void slab_lock(struct page *page) 358 static __always_inline void slab_lock(struct page *page)
359 { 359 {
360 bit_spin_lock(PG_locked, &page->flags); 360 bit_spin_lock(PG_locked, &page->flags);
361 } 361 }
362 362
363 static __always_inline void slab_unlock(struct page *page) 363 static __always_inline void slab_unlock(struct page *page)
364 { 364 {
365 __bit_spin_unlock(PG_locked, &page->flags); 365 __bit_spin_unlock(PG_locked, &page->flags);
366 } 366 }
367 367
368 /* Interrupts must be disabled (for the fallback code to work right) */ 368 /* Interrupts must be disabled (for the fallback code to work right) */
369 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, 369 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
370 void *freelist_old, unsigned long counters_old, 370 void *freelist_old, unsigned long counters_old,
371 void *freelist_new, unsigned long counters_new, 371 void *freelist_new, unsigned long counters_new,
372 const char *n) 372 const char *n)
373 { 373 {
374 VM_BUG_ON(!irqs_disabled()); 374 VM_BUG_ON(!irqs_disabled());
375 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ 375 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
376 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 376 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
377 if (s->flags & __CMPXCHG_DOUBLE) { 377 if (s->flags & __CMPXCHG_DOUBLE) {
378 if (cmpxchg_double(&page->freelist, &page->counters, 378 if (cmpxchg_double(&page->freelist, &page->counters,
379 freelist_old, counters_old, 379 freelist_old, counters_old,
380 freelist_new, counters_new)) 380 freelist_new, counters_new))
381 return 1; 381 return 1;
382 } else 382 } else
383 #endif 383 #endif
384 { 384 {
385 slab_lock(page); 385 slab_lock(page);
386 if (page->freelist == freelist_old && page->counters == counters_old) { 386 if (page->freelist == freelist_old && page->counters == counters_old) {
387 page->freelist = freelist_new; 387 page->freelist = freelist_new;
388 page->counters = counters_new; 388 page->counters = counters_new;
389 slab_unlock(page); 389 slab_unlock(page);
390 return 1; 390 return 1;
391 } 391 }
392 slab_unlock(page); 392 slab_unlock(page);
393 } 393 }
394 394
395 cpu_relax(); 395 cpu_relax();
396 stat(s, CMPXCHG_DOUBLE_FAIL); 396 stat(s, CMPXCHG_DOUBLE_FAIL);
397 397
398 #ifdef SLUB_DEBUG_CMPXCHG 398 #ifdef SLUB_DEBUG_CMPXCHG
399 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); 399 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
400 #endif 400 #endif
401 401
402 return 0; 402 return 0;
403 } 403 }
404 404
405 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, 405 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
406 void *freelist_old, unsigned long counters_old, 406 void *freelist_old, unsigned long counters_old,
407 void *freelist_new, unsigned long counters_new, 407 void *freelist_new, unsigned long counters_new,
408 const char *n) 408 const char *n)
409 { 409 {
410 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ 410 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
411 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 411 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
412 if (s->flags & __CMPXCHG_DOUBLE) { 412 if (s->flags & __CMPXCHG_DOUBLE) {
413 if (cmpxchg_double(&page->freelist, &page->counters, 413 if (cmpxchg_double(&page->freelist, &page->counters,
414 freelist_old, counters_old, 414 freelist_old, counters_old,
415 freelist_new, counters_new)) 415 freelist_new, counters_new))
416 return 1; 416 return 1;
417 } else 417 } else
418 #endif 418 #endif
419 { 419 {
420 unsigned long flags; 420 unsigned long flags;
421 421
422 local_irq_save(flags); 422 local_irq_save(flags);
423 slab_lock(page); 423 slab_lock(page);
424 if (page->freelist == freelist_old && page->counters == counters_old) { 424 if (page->freelist == freelist_old && page->counters == counters_old) {
425 page->freelist = freelist_new; 425 page->freelist = freelist_new;
426 page->counters = counters_new; 426 page->counters = counters_new;
427 slab_unlock(page); 427 slab_unlock(page);
428 local_irq_restore(flags); 428 local_irq_restore(flags);
429 return 1; 429 return 1;
430 } 430 }
431 slab_unlock(page); 431 slab_unlock(page);
432 local_irq_restore(flags); 432 local_irq_restore(flags);
433 } 433 }
434 434
435 cpu_relax(); 435 cpu_relax();
436 stat(s, CMPXCHG_DOUBLE_FAIL); 436 stat(s, CMPXCHG_DOUBLE_FAIL);
437 437
438 #ifdef SLUB_DEBUG_CMPXCHG 438 #ifdef SLUB_DEBUG_CMPXCHG
439 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); 439 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
440 #endif 440 #endif
441 441
442 return 0; 442 return 0;
443 } 443 }
444 444
445 #ifdef CONFIG_SLUB_DEBUG 445 #ifdef CONFIG_SLUB_DEBUG
446 /* 446 /*
447 * Determine a map of object in use on a page. 447 * Determine a map of object in use on a page.
448 * 448 *
449 * Node listlock must be held to guarantee that the page does 449 * Node listlock must be held to guarantee that the page does
450 * not vanish from under us. 450 * not vanish from under us.
451 */ 451 */
452 static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) 452 static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
453 { 453 {
454 void *p; 454 void *p;
455 void *addr = page_address(page); 455 void *addr = page_address(page);
456 456
457 for (p = page->freelist; p; p = get_freepointer(s, p)) 457 for (p = page->freelist; p; p = get_freepointer(s, p))
458 set_bit(slab_index(p, s, addr), map); 458 set_bit(slab_index(p, s, addr), map);
459 } 459 }
460 460
461 /* 461 /*
462 * Debug settings: 462 * Debug settings:
463 */ 463 */
464 #ifdef CONFIG_SLUB_DEBUG_ON 464 #ifdef CONFIG_SLUB_DEBUG_ON
465 static int slub_debug = DEBUG_DEFAULT_FLAGS; 465 static int slub_debug = DEBUG_DEFAULT_FLAGS;
466 #else 466 #else
467 static int slub_debug; 467 static int slub_debug;
468 #endif 468 #endif
469 469
470 static char *slub_debug_slabs; 470 static char *slub_debug_slabs;
471 static int disable_higher_order_debug; 471 static int disable_higher_order_debug;
472 472
473 /* 473 /*
474 * Object debugging 474 * Object debugging
475 */ 475 */
476 static void print_section(char *text, u8 *addr, unsigned int length) 476 static void print_section(char *text, u8 *addr, unsigned int length)
477 { 477 {
478 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, 478 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
479 length, 1); 479 length, 1);
480 } 480 }
481 481
482 static struct track *get_track(struct kmem_cache *s, void *object, 482 static struct track *get_track(struct kmem_cache *s, void *object,
483 enum track_item alloc) 483 enum track_item alloc)
484 { 484 {
485 struct track *p; 485 struct track *p;
486 486
487 if (s->offset) 487 if (s->offset)
488 p = object + s->offset + sizeof(void *); 488 p = object + s->offset + sizeof(void *);
489 else 489 else
490 p = object + s->inuse; 490 p = object + s->inuse;
491 491
492 return p + alloc; 492 return p + alloc;
493 } 493 }
494 494
495 static void set_track(struct kmem_cache *s, void *object, 495 static void set_track(struct kmem_cache *s, void *object,
496 enum track_item alloc, unsigned long addr) 496 enum track_item alloc, unsigned long addr)
497 { 497 {
498 struct track *p = get_track(s, object, alloc); 498 struct track *p = get_track(s, object, alloc);
499 499
500 if (addr) { 500 if (addr) {
501 #ifdef CONFIG_STACKTRACE 501 #ifdef CONFIG_STACKTRACE
502 struct stack_trace trace; 502 struct stack_trace trace;
503 int i; 503 int i;
504 504
505 trace.nr_entries = 0; 505 trace.nr_entries = 0;
506 trace.max_entries = TRACK_ADDRS_COUNT; 506 trace.max_entries = TRACK_ADDRS_COUNT;
507 trace.entries = p->addrs; 507 trace.entries = p->addrs;
508 trace.skip = 3; 508 trace.skip = 3;
509 save_stack_trace(&trace); 509 save_stack_trace(&trace);
510 510
511 /* See rant in lockdep.c */ 511 /* See rant in lockdep.c */
512 if (trace.nr_entries != 0 && 512 if (trace.nr_entries != 0 &&
513 trace.entries[trace.nr_entries - 1] == ULONG_MAX) 513 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
514 trace.nr_entries--; 514 trace.nr_entries--;
515 515
516 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) 516 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
517 p->addrs[i] = 0; 517 p->addrs[i] = 0;
518 #endif 518 #endif
519 p->addr = addr; 519 p->addr = addr;
520 p->cpu = smp_processor_id(); 520 p->cpu = smp_processor_id();
521 p->pid = current->pid; 521 p->pid = current->pid;
522 p->when = jiffies; 522 p->when = jiffies;
523 } else 523 } else
524 memset(p, 0, sizeof(struct track)); 524 memset(p, 0, sizeof(struct track));
525 } 525 }
526 526
527 static void init_tracking(struct kmem_cache *s, void *object) 527 static void init_tracking(struct kmem_cache *s, void *object)
528 { 528 {
529 if (!(s->flags & SLAB_STORE_USER)) 529 if (!(s->flags & SLAB_STORE_USER))
530 return; 530 return;
531 531
532 set_track(s, object, TRACK_FREE, 0UL); 532 set_track(s, object, TRACK_FREE, 0UL);
533 set_track(s, object, TRACK_ALLOC, 0UL); 533 set_track(s, object, TRACK_ALLOC, 0UL);
534 } 534 }
535 535
536 static void print_track(const char *s, struct track *t) 536 static void print_track(const char *s, struct track *t)
537 { 537 {
538 if (!t->addr) 538 if (!t->addr)
539 return; 539 return;
540 540
541 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", 541 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
542 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); 542 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
543 #ifdef CONFIG_STACKTRACE 543 #ifdef CONFIG_STACKTRACE
544 { 544 {
545 int i; 545 int i;
546 for (i = 0; i < TRACK_ADDRS_COUNT; i++) 546 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
547 if (t->addrs[i]) 547 if (t->addrs[i])
548 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]); 548 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
549 else 549 else
550 break; 550 break;
551 } 551 }
552 #endif 552 #endif
553 } 553 }
554 554
555 static void print_tracking(struct kmem_cache *s, void *object) 555 static void print_tracking(struct kmem_cache *s, void *object)
556 { 556 {
557 if (!(s->flags & SLAB_STORE_USER)) 557 if (!(s->flags & SLAB_STORE_USER))
558 return; 558 return;
559 559
560 print_track("Allocated", get_track(s, object, TRACK_ALLOC)); 560 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
561 print_track("Freed", get_track(s, object, TRACK_FREE)); 561 print_track("Freed", get_track(s, object, TRACK_FREE));
562 } 562 }
563 563
564 static void print_page_info(struct page *page) 564 static void print_page_info(struct page *page)
565 { 565 {
566 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", 566 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
567 page, page->objects, page->inuse, page->freelist, page->flags); 567 page, page->objects, page->inuse, page->freelist, page->flags);
568 568
569 } 569 }
570 570
571 static void slab_bug(struct kmem_cache *s, char *fmt, ...) 571 static void slab_bug(struct kmem_cache *s, char *fmt, ...)
572 { 572 {
573 va_list args; 573 va_list args;
574 char buf[100]; 574 char buf[100];
575 575
576 va_start(args, fmt); 576 va_start(args, fmt);
577 vsnprintf(buf, sizeof(buf), fmt, args); 577 vsnprintf(buf, sizeof(buf), fmt, args);
578 va_end(args); 578 va_end(args);
579 printk(KERN_ERR "========================================" 579 printk(KERN_ERR "========================================"
580 "=====================================\n"); 580 "=====================================\n");
581 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf); 581 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
582 printk(KERN_ERR "----------------------------------------" 582 printk(KERN_ERR "----------------------------------------"
583 "-------------------------------------\n\n"); 583 "-------------------------------------\n\n");
584 } 584 }
585 585
586 static void slab_fix(struct kmem_cache *s, char *fmt, ...) 586 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
587 { 587 {
588 va_list args; 588 va_list args;
589 char buf[100]; 589 char buf[100];
590 590
591 va_start(args, fmt); 591 va_start(args, fmt);
592 vsnprintf(buf, sizeof(buf), fmt, args); 592 vsnprintf(buf, sizeof(buf), fmt, args);
593 va_end(args); 593 va_end(args);
594 printk(KERN_ERR "FIX %s: %s\n", s->name, buf); 594 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
595 } 595 }
596 596
597 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 597 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
598 { 598 {
599 unsigned int off; /* Offset of last byte */ 599 unsigned int off; /* Offset of last byte */
600 u8 *addr = page_address(page); 600 u8 *addr = page_address(page);
601 601
602 print_tracking(s, p); 602 print_tracking(s, p);
603 603
604 print_page_info(page); 604 print_page_info(page);
605 605
606 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", 606 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
607 p, p - addr, get_freepointer(s, p)); 607 p, p - addr, get_freepointer(s, p));
608 608
609 if (p > addr + 16) 609 if (p > addr + 16)
610 print_section("Bytes b4 ", p - 16, 16); 610 print_section("Bytes b4 ", p - 16, 16);
611 611
612 print_section("Object ", p, min_t(unsigned long, s->objsize, 612 print_section("Object ", p, min_t(unsigned long, s->objsize,
613 PAGE_SIZE)); 613 PAGE_SIZE));
614 if (s->flags & SLAB_RED_ZONE) 614 if (s->flags & SLAB_RED_ZONE)
615 print_section("Redzone ", p + s->objsize, 615 print_section("Redzone ", p + s->objsize,
616 s->inuse - s->objsize); 616 s->inuse - s->objsize);
617 617
618 if (s->offset) 618 if (s->offset)
619 off = s->offset + sizeof(void *); 619 off = s->offset + sizeof(void *);
620 else 620 else
621 off = s->inuse; 621 off = s->inuse;
622 622
623 if (s->flags & SLAB_STORE_USER) 623 if (s->flags & SLAB_STORE_USER)
624 off += 2 * sizeof(struct track); 624 off += 2 * sizeof(struct track);
625 625
626 if (off != s->size) 626 if (off != s->size)
627 /* Beginning of the filler is the free pointer */ 627 /* Beginning of the filler is the free pointer */
628 print_section("Padding ", p + off, s->size - off); 628 print_section("Padding ", p + off, s->size - off);
629 629
630 dump_stack(); 630 dump_stack();
631 } 631 }
632 632
633 static void object_err(struct kmem_cache *s, struct page *page, 633 static void object_err(struct kmem_cache *s, struct page *page,
634 u8 *object, char *reason) 634 u8 *object, char *reason)
635 { 635 {
636 slab_bug(s, "%s", reason); 636 slab_bug(s, "%s", reason);
637 print_trailer(s, page, object); 637 print_trailer(s, page, object);
638 } 638 }
639 639
640 static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) 640 static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
641 { 641 {
642 va_list args; 642 va_list args;
643 char buf[100]; 643 char buf[100];
644 644
645 va_start(args, fmt); 645 va_start(args, fmt);
646 vsnprintf(buf, sizeof(buf), fmt, args); 646 vsnprintf(buf, sizeof(buf), fmt, args);
647 va_end(args); 647 va_end(args);
648 slab_bug(s, "%s", buf); 648 slab_bug(s, "%s", buf);
649 print_page_info(page); 649 print_page_info(page);
650 dump_stack(); 650 dump_stack();
651 } 651 }
652 652
653 static void init_object(struct kmem_cache *s, void *object, u8 val) 653 static void init_object(struct kmem_cache *s, void *object, u8 val)
654 { 654 {
655 u8 *p = object; 655 u8 *p = object;
656 656
657 if (s->flags & __OBJECT_POISON) { 657 if (s->flags & __OBJECT_POISON) {
658 memset(p, POISON_FREE, s->objsize - 1); 658 memset(p, POISON_FREE, s->objsize - 1);
659 p[s->objsize - 1] = POISON_END; 659 p[s->objsize - 1] = POISON_END;
660 } 660 }
661 661
662 if (s->flags & SLAB_RED_ZONE) 662 if (s->flags & SLAB_RED_ZONE)
663 memset(p + s->objsize, val, s->inuse - s->objsize); 663 memset(p + s->objsize, val, s->inuse - s->objsize);
664 } 664 }
665 665
666 static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 666 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
667 void *from, void *to) 667 void *from, void *to)
668 { 668 {
669 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data); 669 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
670 memset(from, data, to - from); 670 memset(from, data, to - from);
671 } 671 }
672 672
673 static int check_bytes_and_report(struct kmem_cache *s, struct page *page, 673 static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
674 u8 *object, char *what, 674 u8 *object, char *what,
675 u8 *start, unsigned int value, unsigned int bytes) 675 u8 *start, unsigned int value, unsigned int bytes)
676 { 676 {
677 u8 *fault; 677 u8 *fault;
678 u8 *end; 678 u8 *end;
679 679
680 fault = memchr_inv(start, value, bytes); 680 fault = memchr_inv(start, value, bytes);
681 if (!fault) 681 if (!fault)
682 return 1; 682 return 1;
683 683
684 end = start + bytes; 684 end = start + bytes;
685 while (end > fault && end[-1] == value) 685 while (end > fault && end[-1] == value)
686 end--; 686 end--;
687 687
688 slab_bug(s, "%s overwritten", what); 688 slab_bug(s, "%s overwritten", what);
689 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", 689 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
690 fault, end - 1, fault[0], value); 690 fault, end - 1, fault[0], value);
691 print_trailer(s, page, object); 691 print_trailer(s, page, object);
692 692
693 restore_bytes(s, what, value, fault, end); 693 restore_bytes(s, what, value, fault, end);
694 return 0; 694 return 0;
695 } 695 }
696 696
697 /* 697 /*
698 * Object layout: 698 * Object layout:
699 * 699 *
700 * object address 700 * object address
701 * Bytes of the object to be managed. 701 * Bytes of the object to be managed.
702 * If the freepointer may overlay the object then the free 702 * If the freepointer may overlay the object then the free
703 * pointer is the first word of the object. 703 * pointer is the first word of the object.
704 * 704 *
705 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 705 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
706 * 0xa5 (POISON_END) 706 * 0xa5 (POISON_END)
707 * 707 *
708 * object + s->objsize 708 * object + s->objsize
709 * Padding to reach word boundary. This is also used for Redzoning. 709 * Padding to reach word boundary. This is also used for Redzoning.
710 * Padding is extended by another word if Redzoning is enabled and 710 * Padding is extended by another word if Redzoning is enabled and
711 * objsize == inuse. 711 * objsize == inuse.
712 * 712 *
713 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 713 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
714 * 0xcc (RED_ACTIVE) for objects in use. 714 * 0xcc (RED_ACTIVE) for objects in use.
715 * 715 *
716 * object + s->inuse 716 * object + s->inuse
717 * Meta data starts here. 717 * Meta data starts here.
718 * 718 *
719 * A. Free pointer (if we cannot overwrite object on free) 719 * A. Free pointer (if we cannot overwrite object on free)
720 * B. Tracking data for SLAB_STORE_USER 720 * B. Tracking data for SLAB_STORE_USER
721 * C. Padding to reach required alignment boundary or at mininum 721 * C. Padding to reach required alignment boundary or at mininum
722 * one word if debugging is on to be able to detect writes 722 * one word if debugging is on to be able to detect writes
723 * before the word boundary. 723 * before the word boundary.
724 * 724 *
725 * Padding is done using 0x5a (POISON_INUSE) 725 * Padding is done using 0x5a (POISON_INUSE)
726 * 726 *
727 * object + s->size 727 * object + s->size
728 * Nothing is used beyond s->size. 728 * Nothing is used beyond s->size.
729 * 729 *
730 * If slabcaches are merged then the objsize and inuse boundaries are mostly 730 * If slabcaches are merged then the objsize and inuse boundaries are mostly
731 * ignored. And therefore no slab options that rely on these boundaries 731 * ignored. And therefore no slab options that rely on these boundaries
732 * may be used with merged slabcaches. 732 * may be used with merged slabcaches.
733 */ 733 */
734 734
735 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) 735 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
736 { 736 {
737 unsigned long off = s->inuse; /* The end of info */ 737 unsigned long off = s->inuse; /* The end of info */
738 738
739 if (s->offset) 739 if (s->offset)
740 /* Freepointer is placed after the object. */ 740 /* Freepointer is placed after the object. */
741 off += sizeof(void *); 741 off += sizeof(void *);
742 742
743 if (s->flags & SLAB_STORE_USER) 743 if (s->flags & SLAB_STORE_USER)
744 /* We also have user information there */ 744 /* We also have user information there */
745 off += 2 * sizeof(struct track); 745 off += 2 * sizeof(struct track);
746 746
747 if (s->size == off) 747 if (s->size == off)
748 return 1; 748 return 1;
749 749
750 return check_bytes_and_report(s, page, p, "Object padding", 750 return check_bytes_and_report(s, page, p, "Object padding",
751 p + off, POISON_INUSE, s->size - off); 751 p + off, POISON_INUSE, s->size - off);
752 } 752 }
753 753
754 /* Check the pad bytes at the end of a slab page */ 754 /* Check the pad bytes at the end of a slab page */
755 static int slab_pad_check(struct kmem_cache *s, struct page *page) 755 static int slab_pad_check(struct kmem_cache *s, struct page *page)
756 { 756 {
757 u8 *start; 757 u8 *start;
758 u8 *fault; 758 u8 *fault;
759 u8 *end; 759 u8 *end;
760 int length; 760 int length;
761 int remainder; 761 int remainder;
762 762
763 if (!(s->flags & SLAB_POISON)) 763 if (!(s->flags & SLAB_POISON))
764 return 1; 764 return 1;
765 765
766 start = page_address(page); 766 start = page_address(page);
767 length = (PAGE_SIZE << compound_order(page)) - s->reserved; 767 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
768 end = start + length; 768 end = start + length;
769 remainder = length % s->size; 769 remainder = length % s->size;
770 if (!remainder) 770 if (!remainder)
771 return 1; 771 return 1;
772 772
773 fault = memchr_inv(end - remainder, POISON_INUSE, remainder); 773 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
774 if (!fault) 774 if (!fault)
775 return 1; 775 return 1;
776 while (end > fault && end[-1] == POISON_INUSE) 776 while (end > fault && end[-1] == POISON_INUSE)
777 end--; 777 end--;
778 778
779 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); 779 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
780 print_section("Padding ", end - remainder, remainder); 780 print_section("Padding ", end - remainder, remainder);
781 781
782 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); 782 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
783 return 0; 783 return 0;
784 } 784 }
785 785
786 static int check_object(struct kmem_cache *s, struct page *page, 786 static int check_object(struct kmem_cache *s, struct page *page,
787 void *object, u8 val) 787 void *object, u8 val)
788 { 788 {
789 u8 *p = object; 789 u8 *p = object;
790 u8 *endobject = object + s->objsize; 790 u8 *endobject = object + s->objsize;
791 791
792 if (s->flags & SLAB_RED_ZONE) { 792 if (s->flags & SLAB_RED_ZONE) {
793 if (!check_bytes_and_report(s, page, object, "Redzone", 793 if (!check_bytes_and_report(s, page, object, "Redzone",
794 endobject, val, s->inuse - s->objsize)) 794 endobject, val, s->inuse - s->objsize))
795 return 0; 795 return 0;
796 } else { 796 } else {
797 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 797 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
798 check_bytes_and_report(s, page, p, "Alignment padding", 798 check_bytes_and_report(s, page, p, "Alignment padding",
799 endobject, POISON_INUSE, s->inuse - s->objsize); 799 endobject, POISON_INUSE, s->inuse - s->objsize);
800 } 800 }
801 } 801 }
802 802
803 if (s->flags & SLAB_POISON) { 803 if (s->flags & SLAB_POISON) {
804 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && 804 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
805 (!check_bytes_and_report(s, page, p, "Poison", p, 805 (!check_bytes_and_report(s, page, p, "Poison", p,
806 POISON_FREE, s->objsize - 1) || 806 POISON_FREE, s->objsize - 1) ||
807 !check_bytes_and_report(s, page, p, "Poison", 807 !check_bytes_and_report(s, page, p, "Poison",
808 p + s->objsize - 1, POISON_END, 1))) 808 p + s->objsize - 1, POISON_END, 1)))
809 return 0; 809 return 0;
810 /* 810 /*
811 * check_pad_bytes cleans up on its own. 811 * check_pad_bytes cleans up on its own.
812 */ 812 */
813 check_pad_bytes(s, page, p); 813 check_pad_bytes(s, page, p);
814 } 814 }
815 815
816 if (!s->offset && val == SLUB_RED_ACTIVE) 816 if (!s->offset && val == SLUB_RED_ACTIVE)
817 /* 817 /*
818 * Object and freepointer overlap. Cannot check 818 * Object and freepointer overlap. Cannot check
819 * freepointer while object is allocated. 819 * freepointer while object is allocated.
820 */ 820 */
821 return 1; 821 return 1;
822 822
823 /* Check free pointer validity */ 823 /* Check free pointer validity */
824 if (!check_valid_pointer(s, page, get_freepointer(s, p))) { 824 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
825 object_err(s, page, p, "Freepointer corrupt"); 825 object_err(s, page, p, "Freepointer corrupt");
826 /* 826 /*
827 * No choice but to zap it and thus lose the remainder 827 * No choice but to zap it and thus lose the remainder
828 * of the free objects in this slab. May cause 828 * of the free objects in this slab. May cause
829 * another error because the object count is now wrong. 829 * another error because the object count is now wrong.
830 */ 830 */
831 set_freepointer(s, p, NULL); 831 set_freepointer(s, p, NULL);
832 return 0; 832 return 0;
833 } 833 }
834 return 1; 834 return 1;
835 } 835 }
836 836
837 static int check_slab(struct kmem_cache *s, struct page *page) 837 static int check_slab(struct kmem_cache *s, struct page *page)
838 { 838 {
839 int maxobj; 839 int maxobj;
840 840
841 VM_BUG_ON(!irqs_disabled()); 841 VM_BUG_ON(!irqs_disabled());
842 842
843 if (!PageSlab(page)) { 843 if (!PageSlab(page)) {
844 slab_err(s, page, "Not a valid slab page"); 844 slab_err(s, page, "Not a valid slab page");
845 return 0; 845 return 0;
846 } 846 }
847 847
848 maxobj = order_objects(compound_order(page), s->size, s->reserved); 848 maxobj = order_objects(compound_order(page), s->size, s->reserved);
849 if (page->objects > maxobj) { 849 if (page->objects > maxobj) {
850 slab_err(s, page, "objects %u > max %u", 850 slab_err(s, page, "objects %u > max %u",
851 s->name, page->objects, maxobj); 851 s->name, page->objects, maxobj);
852 return 0; 852 return 0;
853 } 853 }
854 if (page->inuse > page->objects) { 854 if (page->inuse > page->objects) {
855 slab_err(s, page, "inuse %u > max %u", 855 slab_err(s, page, "inuse %u > max %u",
856 s->name, page->inuse, page->objects); 856 s->name, page->inuse, page->objects);
857 return 0; 857 return 0;
858 } 858 }
859 /* Slab_pad_check fixes things up after itself */ 859 /* Slab_pad_check fixes things up after itself */
860 slab_pad_check(s, page); 860 slab_pad_check(s, page);
861 return 1; 861 return 1;
862 } 862 }
863 863
864 /* 864 /*
865 * Determine if a certain object on a page is on the freelist. Must hold the 865 * Determine if a certain object on a page is on the freelist. Must hold the
866 * slab lock to guarantee that the chains are in a consistent state. 866 * slab lock to guarantee that the chains are in a consistent state.
867 */ 867 */
868 static int on_freelist(struct kmem_cache *s, struct page *page, void *search) 868 static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
869 { 869 {
870 int nr = 0; 870 int nr = 0;
871 void *fp; 871 void *fp;
872 void *object = NULL; 872 void *object = NULL;
873 unsigned long max_objects; 873 unsigned long max_objects;
874 874
875 fp = page->freelist; 875 fp = page->freelist;
876 while (fp && nr <= page->objects) { 876 while (fp && nr <= page->objects) {
877 if (fp == search) 877 if (fp == search)
878 return 1; 878 return 1;
879 if (!check_valid_pointer(s, page, fp)) { 879 if (!check_valid_pointer(s, page, fp)) {
880 if (object) { 880 if (object) {
881 object_err(s, page, object, 881 object_err(s, page, object,
882 "Freechain corrupt"); 882 "Freechain corrupt");
883 set_freepointer(s, object, NULL); 883 set_freepointer(s, object, NULL);
884 break; 884 break;
885 } else { 885 } else {
886 slab_err(s, page, "Freepointer corrupt"); 886 slab_err(s, page, "Freepointer corrupt");
887 page->freelist = NULL; 887 page->freelist = NULL;
888 page->inuse = page->objects; 888 page->inuse = page->objects;
889 slab_fix(s, "Freelist cleared"); 889 slab_fix(s, "Freelist cleared");
890 return 0; 890 return 0;
891 } 891 }
892 break; 892 break;
893 } 893 }
894 object = fp; 894 object = fp;
895 fp = get_freepointer(s, object); 895 fp = get_freepointer(s, object);
896 nr++; 896 nr++;
897 } 897 }
898 898
899 max_objects = order_objects(compound_order(page), s->size, s->reserved); 899 max_objects = order_objects(compound_order(page), s->size, s->reserved);
900 if (max_objects > MAX_OBJS_PER_PAGE) 900 if (max_objects > MAX_OBJS_PER_PAGE)
901 max_objects = MAX_OBJS_PER_PAGE; 901 max_objects = MAX_OBJS_PER_PAGE;
902 902
903 if (page->objects != max_objects) { 903 if (page->objects != max_objects) {
904 slab_err(s, page, "Wrong number of objects. Found %d but " 904 slab_err(s, page, "Wrong number of objects. Found %d but "
905 "should be %d", page->objects, max_objects); 905 "should be %d", page->objects, max_objects);
906 page->objects = max_objects; 906 page->objects = max_objects;
907 slab_fix(s, "Number of objects adjusted."); 907 slab_fix(s, "Number of objects adjusted.");
908 } 908 }
909 if (page->inuse != page->objects - nr) { 909 if (page->inuse != page->objects - nr) {
910 slab_err(s, page, "Wrong object count. Counter is %d but " 910 slab_err(s, page, "Wrong object count. Counter is %d but "
911 "counted were %d", page->inuse, page->objects - nr); 911 "counted were %d", page->inuse, page->objects - nr);
912 page->inuse = page->objects - nr; 912 page->inuse = page->objects - nr;
913 slab_fix(s, "Object count adjusted."); 913 slab_fix(s, "Object count adjusted.");
914 } 914 }
915 return search == NULL; 915 return search == NULL;
916 } 916 }
917 917
918 static void trace(struct kmem_cache *s, struct page *page, void *object, 918 static void trace(struct kmem_cache *s, struct page *page, void *object,
919 int alloc) 919 int alloc)
920 { 920 {
921 if (s->flags & SLAB_TRACE) { 921 if (s->flags & SLAB_TRACE) {
922 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", 922 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
923 s->name, 923 s->name,
924 alloc ? "alloc" : "free", 924 alloc ? "alloc" : "free",
925 object, page->inuse, 925 object, page->inuse,
926 page->freelist); 926 page->freelist);
927 927
928 if (!alloc) 928 if (!alloc)
929 print_section("Object ", (void *)object, s->objsize); 929 print_section("Object ", (void *)object, s->objsize);
930 930
931 dump_stack(); 931 dump_stack();
932 } 932 }
933 } 933 }
934 934
935 /* 935 /*
936 * Hooks for other subsystems that check memory allocations. In a typical 936 * Hooks for other subsystems that check memory allocations. In a typical
937 * production configuration these hooks all should produce no code at all. 937 * production configuration these hooks all should produce no code at all.
938 */ 938 */
939 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 939 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
940 { 940 {
941 flags &= gfp_allowed_mask; 941 flags &= gfp_allowed_mask;
942 lockdep_trace_alloc(flags); 942 lockdep_trace_alloc(flags);
943 might_sleep_if(flags & __GFP_WAIT); 943 might_sleep_if(flags & __GFP_WAIT);
944 944
945 return should_failslab(s->objsize, flags, s->flags); 945 return should_failslab(s->objsize, flags, s->flags);
946 } 946 }
947 947
948 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 948 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
949 { 949 {
950 flags &= gfp_allowed_mask; 950 flags &= gfp_allowed_mask;
951 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 951 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
952 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 952 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
953 } 953 }
954 954
955 static inline void slab_free_hook(struct kmem_cache *s, void *x) 955 static inline void slab_free_hook(struct kmem_cache *s, void *x)
956 { 956 {
957 kmemleak_free_recursive(x, s->flags); 957 kmemleak_free_recursive(x, s->flags);
958 958
959 /* 959 /*
960 * Trouble is that we may no longer disable interupts in the fast path 960 * Trouble is that we may no longer disable interupts in the fast path
961 * So in order to make the debug calls that expect irqs to be 961 * So in order to make the debug calls that expect irqs to be
962 * disabled we need to disable interrupts temporarily. 962 * disabled we need to disable interrupts temporarily.
963 */ 963 */
964 #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) 964 #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
965 { 965 {
966 unsigned long flags; 966 unsigned long flags;
967 967
968 local_irq_save(flags); 968 local_irq_save(flags);
969 kmemcheck_slab_free(s, x, s->objsize); 969 kmemcheck_slab_free(s, x, s->objsize);
970 debug_check_no_locks_freed(x, s->objsize); 970 debug_check_no_locks_freed(x, s->objsize);
971 local_irq_restore(flags); 971 local_irq_restore(flags);
972 } 972 }
973 #endif 973 #endif
974 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 974 if (!(s->flags & SLAB_DEBUG_OBJECTS))
975 debug_check_no_obj_freed(x, s->objsize); 975 debug_check_no_obj_freed(x, s->objsize);
976 } 976 }
977 977
978 /* 978 /*
979 * Tracking of fully allocated slabs for debugging purposes. 979 * Tracking of fully allocated slabs for debugging purposes.
980 * 980 *
981 * list_lock must be held. 981 * list_lock must be held.
982 */ 982 */
983 static void add_full(struct kmem_cache *s, 983 static void add_full(struct kmem_cache *s,
984 struct kmem_cache_node *n, struct page *page) 984 struct kmem_cache_node *n, struct page *page)
985 { 985 {
986 if (!(s->flags & SLAB_STORE_USER)) 986 if (!(s->flags & SLAB_STORE_USER))
987 return; 987 return;
988 988
989 list_add(&page->lru, &n->full); 989 list_add(&page->lru, &n->full);
990 } 990 }
991 991
992 /* 992 /*
993 * list_lock must be held. 993 * list_lock must be held.
994 */ 994 */
995 static void remove_full(struct kmem_cache *s, struct page *page) 995 static void remove_full(struct kmem_cache *s, struct page *page)
996 { 996 {
997 if (!(s->flags & SLAB_STORE_USER)) 997 if (!(s->flags & SLAB_STORE_USER))
998 return; 998 return;
999 999
1000 list_del(&page->lru); 1000 list_del(&page->lru);
1001 } 1001 }
1002 1002
1003 /* Tracking of the number of slabs for debugging purposes */ 1003 /* Tracking of the number of slabs for debugging purposes */
1004 static inline unsigned long slabs_node(struct kmem_cache *s, int node) 1004 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1005 { 1005 {
1006 struct kmem_cache_node *n = get_node(s, node); 1006 struct kmem_cache_node *n = get_node(s, node);
1007 1007
1008 return atomic_long_read(&n->nr_slabs); 1008 return atomic_long_read(&n->nr_slabs);
1009 } 1009 }
1010 1010
1011 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) 1011 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1012 { 1012 {
1013 return atomic_long_read(&n->nr_slabs); 1013 return atomic_long_read(&n->nr_slabs);
1014 } 1014 }
1015 1015
1016 static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) 1016 static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1017 { 1017 {
1018 struct kmem_cache_node *n = get_node(s, node); 1018 struct kmem_cache_node *n = get_node(s, node);
1019 1019
1020 /* 1020 /*
1021 * May be called early in order to allocate a slab for the 1021 * May be called early in order to allocate a slab for the
1022 * kmem_cache_node structure. Solve the chicken-egg 1022 * kmem_cache_node structure. Solve the chicken-egg
1023 * dilemma by deferring the increment of the count during 1023 * dilemma by deferring the increment of the count during
1024 * bootstrap (see early_kmem_cache_node_alloc). 1024 * bootstrap (see early_kmem_cache_node_alloc).
1025 */ 1025 */
1026 if (n) { 1026 if (n) {
1027 atomic_long_inc(&n->nr_slabs); 1027 atomic_long_inc(&n->nr_slabs);
1028 atomic_long_add(objects, &n->total_objects); 1028 atomic_long_add(objects, &n->total_objects);
1029 } 1029 }
1030 } 1030 }
1031 static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) 1031 static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1032 { 1032 {
1033 struct kmem_cache_node *n = get_node(s, node); 1033 struct kmem_cache_node *n = get_node(s, node);
1034 1034
1035 atomic_long_dec(&n->nr_slabs); 1035 atomic_long_dec(&n->nr_slabs);
1036 atomic_long_sub(objects, &n->total_objects); 1036 atomic_long_sub(objects, &n->total_objects);
1037 } 1037 }
1038 1038
1039 /* Object debug checks for alloc/free paths */ 1039 /* Object debug checks for alloc/free paths */
1040 static void setup_object_debug(struct kmem_cache *s, struct page *page, 1040 static void setup_object_debug(struct kmem_cache *s, struct page *page,
1041 void *object) 1041 void *object)
1042 { 1042 {
1043 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) 1043 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1044 return; 1044 return;
1045 1045
1046 init_object(s, object, SLUB_RED_INACTIVE); 1046 init_object(s, object, SLUB_RED_INACTIVE);
1047 init_tracking(s, object); 1047 init_tracking(s, object);
1048 } 1048 }
1049 1049
1050 static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page, 1050 static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1051 void *object, unsigned long addr) 1051 void *object, unsigned long addr)
1052 { 1052 {
1053 if (!check_slab(s, page)) 1053 if (!check_slab(s, page))
1054 goto bad; 1054 goto bad;
1055 1055
1056 if (!check_valid_pointer(s, page, object)) { 1056 if (!check_valid_pointer(s, page, object)) {
1057 object_err(s, page, object, "Freelist Pointer check fails"); 1057 object_err(s, page, object, "Freelist Pointer check fails");
1058 goto bad; 1058 goto bad;
1059 } 1059 }
1060 1060
1061 if (!check_object(s, page, object, SLUB_RED_INACTIVE)) 1061 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1062 goto bad; 1062 goto bad;
1063 1063
1064 /* Success perform special debug activities for allocs */ 1064 /* Success perform special debug activities for allocs */
1065 if (s->flags & SLAB_STORE_USER) 1065 if (s->flags & SLAB_STORE_USER)
1066 set_track(s, object, TRACK_ALLOC, addr); 1066 set_track(s, object, TRACK_ALLOC, addr);
1067 trace(s, page, object, 1); 1067 trace(s, page, object, 1);
1068 init_object(s, object, SLUB_RED_ACTIVE); 1068 init_object(s, object, SLUB_RED_ACTIVE);
1069 return 1; 1069 return 1;
1070 1070
1071 bad: 1071 bad:
1072 if (PageSlab(page)) { 1072 if (PageSlab(page)) {
1073 /* 1073 /*
1074 * If this is a slab page then lets do the best we can 1074 * If this is a slab page then lets do the best we can
1075 * to avoid issues in the future. Marking all objects 1075 * to avoid issues in the future. Marking all objects
1076 * as used avoids touching the remaining objects. 1076 * as used avoids touching the remaining objects.
1077 */ 1077 */
1078 slab_fix(s, "Marking all objects used"); 1078 slab_fix(s, "Marking all objects used");
1079 page->inuse = page->objects; 1079 page->inuse = page->objects;
1080 page->freelist = NULL; 1080 page->freelist = NULL;
1081 } 1081 }
1082 return 0; 1082 return 0;
1083 } 1083 }
1084 1084
1085 static noinline int free_debug_processing(struct kmem_cache *s, 1085 static noinline int free_debug_processing(struct kmem_cache *s,
1086 struct page *page, void *object, unsigned long addr) 1086 struct page *page, void *object, unsigned long addr)
1087 { 1087 {
1088 unsigned long flags; 1088 unsigned long flags;
1089 int rc = 0; 1089 int rc = 0;
1090 1090
1091 local_irq_save(flags); 1091 local_irq_save(flags);
1092 slab_lock(page); 1092 slab_lock(page);
1093 1093
1094 if (!check_slab(s, page)) 1094 if (!check_slab(s, page))
1095 goto fail; 1095 goto fail;
1096 1096
1097 if (!check_valid_pointer(s, page, object)) { 1097 if (!check_valid_pointer(s, page, object)) {
1098 slab_err(s, page, "Invalid object pointer 0x%p", object); 1098 slab_err(s, page, "Invalid object pointer 0x%p", object);
1099 goto fail; 1099 goto fail;
1100 } 1100 }
1101 1101
1102 if (on_freelist(s, page, object)) { 1102 if (on_freelist(s, page, object)) {
1103 object_err(s, page, object, "Object already free"); 1103 object_err(s, page, object, "Object already free");
1104 goto fail; 1104 goto fail;
1105 } 1105 }
1106 1106
1107 if (!check_object(s, page, object, SLUB_RED_ACTIVE)) 1107 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1108 goto out; 1108 goto out;
1109 1109
1110 if (unlikely(s != page->slab)) { 1110 if (unlikely(s != page->slab)) {
1111 if (!PageSlab(page)) { 1111 if (!PageSlab(page)) {
1112 slab_err(s, page, "Attempt to free object(0x%p) " 1112 slab_err(s, page, "Attempt to free object(0x%p) "
1113 "outside of slab", object); 1113 "outside of slab", object);
1114 } else if (!page->slab) { 1114 } else if (!page->slab) {
1115 printk(KERN_ERR 1115 printk(KERN_ERR
1116 "SLUB <none>: no slab for object 0x%p.\n", 1116 "SLUB <none>: no slab for object 0x%p.\n",
1117 object); 1117 object);
1118 dump_stack(); 1118 dump_stack();
1119 } else 1119 } else
1120 object_err(s, page, object, 1120 object_err(s, page, object,
1121 "page slab pointer corrupt."); 1121 "page slab pointer corrupt.");
1122 goto fail; 1122 goto fail;
1123 } 1123 }
1124 1124
1125 if (s->flags & SLAB_STORE_USER) 1125 if (s->flags & SLAB_STORE_USER)
1126 set_track(s, object, TRACK_FREE, addr); 1126 set_track(s, object, TRACK_FREE, addr);
1127 trace(s, page, object, 0); 1127 trace(s, page, object, 0);
1128 init_object(s, object, SLUB_RED_INACTIVE); 1128 init_object(s, object, SLUB_RED_INACTIVE);
1129 rc = 1; 1129 rc = 1;
1130 out: 1130 out:
1131 slab_unlock(page); 1131 slab_unlock(page);
1132 local_irq_restore(flags); 1132 local_irq_restore(flags);
1133 return rc; 1133 return rc;
1134 1134
1135 fail: 1135 fail:
1136 slab_fix(s, "Object at 0x%p not freed", object); 1136 slab_fix(s, "Object at 0x%p not freed", object);
1137 goto out; 1137 goto out;
1138 } 1138 }
1139 1139
1140 static int __init setup_slub_debug(char *str) 1140 static int __init setup_slub_debug(char *str)
1141 { 1141 {
1142 slub_debug = DEBUG_DEFAULT_FLAGS; 1142 slub_debug = DEBUG_DEFAULT_FLAGS;
1143 if (*str++ != '=' || !*str) 1143 if (*str++ != '=' || !*str)
1144 /* 1144 /*
1145 * No options specified. Switch on full debugging. 1145 * No options specified. Switch on full debugging.
1146 */ 1146 */
1147 goto out; 1147 goto out;
1148 1148
1149 if (*str == ',') 1149 if (*str == ',')
1150 /* 1150 /*
1151 * No options but restriction on slabs. This means full 1151 * No options but restriction on slabs. This means full
1152 * debugging for slabs matching a pattern. 1152 * debugging for slabs matching a pattern.
1153 */ 1153 */
1154 goto check_slabs; 1154 goto check_slabs;
1155 1155
1156 if (tolower(*str) == 'o') { 1156 if (tolower(*str) == 'o') {
1157 /* 1157 /*
1158 * Avoid enabling debugging on caches if its minimum order 1158 * Avoid enabling debugging on caches if its minimum order
1159 * would increase as a result. 1159 * would increase as a result.
1160 */ 1160 */
1161 disable_higher_order_debug = 1; 1161 disable_higher_order_debug = 1;
1162 goto out; 1162 goto out;
1163 } 1163 }
1164 1164
1165 slub_debug = 0; 1165 slub_debug = 0;
1166 if (*str == '-') 1166 if (*str == '-')
1167 /* 1167 /*
1168 * Switch off all debugging measures. 1168 * Switch off all debugging measures.
1169 */ 1169 */
1170 goto out; 1170 goto out;
1171 1171
1172 /* 1172 /*
1173 * Determine which debug features should be switched on 1173 * Determine which debug features should be switched on
1174 */ 1174 */
1175 for (; *str && *str != ','; str++) { 1175 for (; *str && *str != ','; str++) {
1176 switch (tolower(*str)) { 1176 switch (tolower(*str)) {
1177 case 'f': 1177 case 'f':
1178 slub_debug |= SLAB_DEBUG_FREE; 1178 slub_debug |= SLAB_DEBUG_FREE;
1179 break; 1179 break;
1180 case 'z': 1180 case 'z':
1181 slub_debug |= SLAB_RED_ZONE; 1181 slub_debug |= SLAB_RED_ZONE;
1182 break; 1182 break;
1183 case 'p': 1183 case 'p':
1184 slub_debug |= SLAB_POISON; 1184 slub_debug |= SLAB_POISON;
1185 break; 1185 break;
1186 case 'u': 1186 case 'u':
1187 slub_debug |= SLAB_STORE_USER; 1187 slub_debug |= SLAB_STORE_USER;
1188 break; 1188 break;
1189 case 't': 1189 case 't':
1190 slub_debug |= SLAB_TRACE; 1190 slub_debug |= SLAB_TRACE;
1191 break; 1191 break;
1192 case 'a': 1192 case 'a':
1193 slub_debug |= SLAB_FAILSLAB; 1193 slub_debug |= SLAB_FAILSLAB;
1194 break; 1194 break;
1195 default: 1195 default:
1196 printk(KERN_ERR "slub_debug option '%c' " 1196 printk(KERN_ERR "slub_debug option '%c' "
1197 "unknown. skipped\n", *str); 1197 "unknown. skipped\n", *str);
1198 } 1198 }
1199 } 1199 }
1200 1200
1201 check_slabs: 1201 check_slabs:
1202 if (*str == ',') 1202 if (*str == ',')
1203 slub_debug_slabs = str + 1; 1203 slub_debug_slabs = str + 1;
1204 out: 1204 out:
1205 return 1; 1205 return 1;
1206 } 1206 }
1207 1207
1208 __setup("slub_debug", setup_slub_debug); 1208 __setup("slub_debug", setup_slub_debug);
1209 1209
1210 static unsigned long kmem_cache_flags(unsigned long objsize, 1210 static unsigned long kmem_cache_flags(unsigned long objsize,
1211 unsigned long flags, const char *name, 1211 unsigned long flags, const char *name,
1212 void (*ctor)(void *)) 1212 void (*ctor)(void *))
1213 { 1213 {
1214 /* 1214 /*
1215 * Enable debugging if selected on the kernel commandline. 1215 * Enable debugging if selected on the kernel commandline.
1216 */ 1216 */
1217 if (slub_debug && (!slub_debug_slabs || 1217 if (slub_debug && (!slub_debug_slabs ||
1218 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) 1218 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1219 flags |= slub_debug; 1219 flags |= slub_debug;
1220 1220
1221 return flags; 1221 return flags;
1222 } 1222 }
1223 #else 1223 #else
1224 static inline void setup_object_debug(struct kmem_cache *s, 1224 static inline void setup_object_debug(struct kmem_cache *s,
1225 struct page *page, void *object) {} 1225 struct page *page, void *object) {}
1226 1226
1227 static inline int alloc_debug_processing(struct kmem_cache *s, 1227 static inline int alloc_debug_processing(struct kmem_cache *s,
1228 struct page *page, void *object, unsigned long addr) { return 0; } 1228 struct page *page, void *object, unsigned long addr) { return 0; }
1229 1229
1230 static inline int free_debug_processing(struct kmem_cache *s, 1230 static inline int free_debug_processing(struct kmem_cache *s,
1231 struct page *page, void *object, unsigned long addr) { return 0; } 1231 struct page *page, void *object, unsigned long addr) { return 0; }
1232 1232
1233 static inline int slab_pad_check(struct kmem_cache *s, struct page *page) 1233 static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1234 { return 1; } 1234 { return 1; }
1235 static inline int check_object(struct kmem_cache *s, struct page *page, 1235 static inline int check_object(struct kmem_cache *s, struct page *page,
1236 void *object, u8 val) { return 1; } 1236 void *object, u8 val) { return 1; }
1237 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, 1237 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1238 struct page *page) {} 1238 struct page *page) {}
1239 static inline void remove_full(struct kmem_cache *s, struct page *page) {} 1239 static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1240 static inline unsigned long kmem_cache_flags(unsigned long objsize, 1240 static inline unsigned long kmem_cache_flags(unsigned long objsize,
1241 unsigned long flags, const char *name, 1241 unsigned long flags, const char *name,
1242 void (*ctor)(void *)) 1242 void (*ctor)(void *))
1243 { 1243 {
1244 return flags; 1244 return flags;
1245 } 1245 }
1246 #define slub_debug 0 1246 #define slub_debug 0
1247 1247
1248 #define disable_higher_order_debug 0 1248 #define disable_higher_order_debug 0
1249 1249
1250 static inline unsigned long slabs_node(struct kmem_cache *s, int node) 1250 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1251 { return 0; } 1251 { return 0; }
1252 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) 1252 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1253 { return 0; } 1253 { return 0; }
1254 static inline void inc_slabs_node(struct kmem_cache *s, int node, 1254 static inline void inc_slabs_node(struct kmem_cache *s, int node,
1255 int objects) {} 1255 int objects) {}
1256 static inline void dec_slabs_node(struct kmem_cache *s, int node, 1256 static inline void dec_slabs_node(struct kmem_cache *s, int node,
1257 int objects) {} 1257 int objects) {}
1258 1258
1259 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 1259 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1260 { return 0; } 1260 { return 0; }
1261 1261
1262 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, 1262 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1263 void *object) {} 1263 void *object) {}
1264 1264
1265 static inline void slab_free_hook(struct kmem_cache *s, void *x) {} 1265 static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1266 1266
1267 #endif /* CONFIG_SLUB_DEBUG */ 1267 #endif /* CONFIG_SLUB_DEBUG */
1268 1268
1269 /* 1269 /*
1270 * Slab allocation and freeing 1270 * Slab allocation and freeing
1271 */ 1271 */
1272 static inline struct page *alloc_slab_page(gfp_t flags, int node, 1272 static inline struct page *alloc_slab_page(gfp_t flags, int node,
1273 struct kmem_cache_order_objects oo) 1273 struct kmem_cache_order_objects oo)
1274 { 1274 {
1275 int order = oo_order(oo); 1275 int order = oo_order(oo);
1276 1276
1277 flags |= __GFP_NOTRACK; 1277 flags |= __GFP_NOTRACK;
1278 1278
1279 if (node == NUMA_NO_NODE) 1279 if (node == NUMA_NO_NODE)
1280 return alloc_pages(flags, order); 1280 return alloc_pages(flags, order);
1281 else 1281 else
1282 return alloc_pages_exact_node(node, flags, order); 1282 return alloc_pages_exact_node(node, flags, order);
1283 } 1283 }
1284 1284
1285 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1285 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1286 { 1286 {
1287 struct page *page; 1287 struct page *page;
1288 struct kmem_cache_order_objects oo = s->oo; 1288 struct kmem_cache_order_objects oo = s->oo;
1289 gfp_t alloc_gfp; 1289 gfp_t alloc_gfp;
1290 1290
1291 flags &= gfp_allowed_mask; 1291 flags &= gfp_allowed_mask;
1292 1292
1293 if (flags & __GFP_WAIT) 1293 if (flags & __GFP_WAIT)
1294 local_irq_enable(); 1294 local_irq_enable();
1295 1295
1296 flags |= s->allocflags; 1296 flags |= s->allocflags;
1297 1297
1298 /* 1298 /*
1299 * Let the initial higher-order allocation fail under memory pressure 1299 * Let the initial higher-order allocation fail under memory pressure
1300 * so we fall-back to the minimum order allocation. 1300 * so we fall-back to the minimum order allocation.
1301 */ 1301 */
1302 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; 1302 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1303 1303
1304 page = alloc_slab_page(alloc_gfp, node, oo); 1304 page = alloc_slab_page(alloc_gfp, node, oo);
1305 if (unlikely(!page)) { 1305 if (unlikely(!page)) {
1306 oo = s->min; 1306 oo = s->min;
1307 /* 1307 /*
1308 * Allocation may have failed due to fragmentation. 1308 * Allocation may have failed due to fragmentation.
1309 * Try a lower order alloc if possible 1309 * Try a lower order alloc if possible
1310 */ 1310 */
1311 page = alloc_slab_page(flags, node, oo); 1311 page = alloc_slab_page(flags, node, oo);
1312 1312
1313 if (page) 1313 if (page)
1314 stat(s, ORDER_FALLBACK); 1314 stat(s, ORDER_FALLBACK);
1315 } 1315 }
1316 1316
1317 if (flags & __GFP_WAIT) 1317 if (flags & __GFP_WAIT)
1318 local_irq_disable(); 1318 local_irq_disable();
1319 1319
1320 if (!page) 1320 if (!page)
1321 return NULL; 1321 return NULL;
1322 1322
1323 if (kmemcheck_enabled 1323 if (kmemcheck_enabled
1324 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { 1324 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1325 int pages = 1 << oo_order(oo); 1325 int pages = 1 << oo_order(oo);
1326 1326
1327 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); 1327 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1328 1328
1329 /* 1329 /*
1330 * Objects from caches that have a constructor don't get 1330 * Objects from caches that have a constructor don't get
1331 * cleared when they're allocated, so we need to do it here. 1331 * cleared when they're allocated, so we need to do it here.
1332 */ 1332 */
1333 if (s->ctor) 1333 if (s->ctor)
1334 kmemcheck_mark_uninitialized_pages(page, pages); 1334 kmemcheck_mark_uninitialized_pages(page, pages);
1335 else 1335 else
1336 kmemcheck_mark_unallocated_pages(page, pages); 1336 kmemcheck_mark_unallocated_pages(page, pages);
1337 } 1337 }
1338 1338
1339 page->objects = oo_objects(oo); 1339 page->objects = oo_objects(oo);
1340 mod_zone_page_state(page_zone(page), 1340 mod_zone_page_state(page_zone(page),
1341 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1341 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1342 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1342 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1343 1 << oo_order(oo)); 1343 1 << oo_order(oo));
1344 1344
1345 return page; 1345 return page;
1346 } 1346 }
1347 1347
1348 static void setup_object(struct kmem_cache *s, struct page *page, 1348 static void setup_object(struct kmem_cache *s, struct page *page,
1349 void *object) 1349 void *object)
1350 { 1350 {
1351 setup_object_debug(s, page, object); 1351 setup_object_debug(s, page, object);
1352 if (unlikely(s->ctor)) 1352 if (unlikely(s->ctor))
1353 s->ctor(object); 1353 s->ctor(object);
1354 } 1354 }
1355 1355
1356 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) 1356 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1357 { 1357 {
1358 struct page *page; 1358 struct page *page;
1359 void *start; 1359 void *start;
1360 void *last; 1360 void *last;
1361 void *p; 1361 void *p;
1362 1362
1363 BUG_ON(flags & GFP_SLAB_BUG_MASK); 1363 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1364 1364
1365 page = allocate_slab(s, 1365 page = allocate_slab(s,
1366 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); 1366 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1367 if (!page) 1367 if (!page)
1368 goto out; 1368 goto out;
1369 1369
1370 inc_slabs_node(s, page_to_nid(page), page->objects); 1370 inc_slabs_node(s, page_to_nid(page), page->objects);
1371 page->slab = s; 1371 page->slab = s;
1372 page->flags |= 1 << PG_slab; 1372 __SetPageSlab(page);
1373 1373
1374 start = page_address(page); 1374 start = page_address(page);
1375 1375
1376 if (unlikely(s->flags & SLAB_POISON)) 1376 if (unlikely(s->flags & SLAB_POISON))
1377 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); 1377 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1378 1378
1379 last = start; 1379 last = start;
1380 for_each_object(p, s, start, page->objects) { 1380 for_each_object(p, s, start, page->objects) {
1381 setup_object(s, page, last); 1381 setup_object(s, page, last);
1382 set_freepointer(s, last, p); 1382 set_freepointer(s, last, p);
1383 last = p; 1383 last = p;
1384 } 1384 }
1385 setup_object(s, page, last); 1385 setup_object(s, page, last);
1386 set_freepointer(s, last, NULL); 1386 set_freepointer(s, last, NULL);
1387 1387
1388 page->freelist = start; 1388 page->freelist = start;
1389 page->inuse = page->objects; 1389 page->inuse = page->objects;
1390 page->frozen = 1; 1390 page->frozen = 1;
1391 out: 1391 out:
1392 return page; 1392 return page;
1393 } 1393 }
1394 1394
1395 static void __free_slab(struct kmem_cache *s, struct page *page) 1395 static void __free_slab(struct kmem_cache *s, struct page *page)
1396 { 1396 {
1397 int order = compound_order(page); 1397 int order = compound_order(page);
1398 int pages = 1 << order; 1398 int pages = 1 << order;
1399 1399
1400 if (kmem_cache_debug(s)) { 1400 if (kmem_cache_debug(s)) {
1401 void *p; 1401 void *p;
1402 1402
1403 slab_pad_check(s, page); 1403 slab_pad_check(s, page);
1404 for_each_object(p, s, page_address(page), 1404 for_each_object(p, s, page_address(page),
1405 page->objects) 1405 page->objects)
1406 check_object(s, page, p, SLUB_RED_INACTIVE); 1406 check_object(s, page, p, SLUB_RED_INACTIVE);
1407 } 1407 }
1408 1408
1409 kmemcheck_free_shadow(page, compound_order(page)); 1409 kmemcheck_free_shadow(page, compound_order(page));
1410 1410
1411 mod_zone_page_state(page_zone(page), 1411 mod_zone_page_state(page_zone(page),
1412 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1412 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1413 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1413 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1414 -pages); 1414 -pages);
1415 1415
1416 __ClearPageSlab(page); 1416 __ClearPageSlab(page);
1417 reset_page_mapcount(page); 1417 reset_page_mapcount(page);
1418 if (current->reclaim_state) 1418 if (current->reclaim_state)
1419 current->reclaim_state->reclaimed_slab += pages; 1419 current->reclaim_state->reclaimed_slab += pages;
1420 __free_pages(page, order); 1420 __free_pages(page, order);
1421 } 1421 }
1422 1422
1423 #define need_reserve_slab_rcu \ 1423 #define need_reserve_slab_rcu \
1424 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) 1424 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1425 1425
1426 static void rcu_free_slab(struct rcu_head *h) 1426 static void rcu_free_slab(struct rcu_head *h)
1427 { 1427 {
1428 struct page *page; 1428 struct page *page;
1429 1429
1430 if (need_reserve_slab_rcu) 1430 if (need_reserve_slab_rcu)
1431 page = virt_to_head_page(h); 1431 page = virt_to_head_page(h);
1432 else 1432 else
1433 page = container_of((struct list_head *)h, struct page, lru); 1433 page = container_of((struct list_head *)h, struct page, lru);
1434 1434
1435 __free_slab(page->slab, page); 1435 __free_slab(page->slab, page);
1436 } 1436 }
1437 1437
1438 static void free_slab(struct kmem_cache *s, struct page *page) 1438 static void free_slab(struct kmem_cache *s, struct page *page)
1439 { 1439 {
1440 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1440 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1441 struct rcu_head *head; 1441 struct rcu_head *head;
1442 1442
1443 if (need_reserve_slab_rcu) { 1443 if (need_reserve_slab_rcu) {
1444 int order = compound_order(page); 1444 int order = compound_order(page);
1445 int offset = (PAGE_SIZE << order) - s->reserved; 1445 int offset = (PAGE_SIZE << order) - s->reserved;
1446 1446
1447 VM_BUG_ON(s->reserved != sizeof(*head)); 1447 VM_BUG_ON(s->reserved != sizeof(*head));
1448 head = page_address(page) + offset; 1448 head = page_address(page) + offset;
1449 } else { 1449 } else {
1450 /* 1450 /*
1451 * RCU free overloads the RCU head over the LRU 1451 * RCU free overloads the RCU head over the LRU
1452 */ 1452 */
1453 head = (void *)&page->lru; 1453 head = (void *)&page->lru;
1454 } 1454 }
1455 1455
1456 call_rcu(head, rcu_free_slab); 1456 call_rcu(head, rcu_free_slab);
1457 } else 1457 } else
1458 __free_slab(s, page); 1458 __free_slab(s, page);
1459 } 1459 }
1460 1460
1461 static void discard_slab(struct kmem_cache *s, struct page *page) 1461 static void discard_slab(struct kmem_cache *s, struct page *page)
1462 { 1462 {
1463 dec_slabs_node(s, page_to_nid(page), page->objects); 1463 dec_slabs_node(s, page_to_nid(page), page->objects);
1464 free_slab(s, page); 1464 free_slab(s, page);
1465 } 1465 }
1466 1466
1467 /* 1467 /*
1468 * Management of partially allocated slabs. 1468 * Management of partially allocated slabs.
1469 * 1469 *
1470 * list_lock must be held. 1470 * list_lock must be held.
1471 */ 1471 */
1472 static inline void add_partial(struct kmem_cache_node *n, 1472 static inline void add_partial(struct kmem_cache_node *n,
1473 struct page *page, int tail) 1473 struct page *page, int tail)
1474 { 1474 {
1475 n->nr_partial++; 1475 n->nr_partial++;
1476 if (tail == DEACTIVATE_TO_TAIL) 1476 if (tail == DEACTIVATE_TO_TAIL)
1477 list_add_tail(&page->lru, &n->partial); 1477 list_add_tail(&page->lru, &n->partial);
1478 else 1478 else
1479 list_add(&page->lru, &n->partial); 1479 list_add(&page->lru, &n->partial);
1480 } 1480 }
1481 1481
1482 /* 1482 /*
1483 * list_lock must be held. 1483 * list_lock must be held.
1484 */ 1484 */
1485 static inline void remove_partial(struct kmem_cache_node *n, 1485 static inline void remove_partial(struct kmem_cache_node *n,
1486 struct page *page) 1486 struct page *page)
1487 { 1487 {
1488 list_del(&page->lru); 1488 list_del(&page->lru);
1489 n->nr_partial--; 1489 n->nr_partial--;
1490 } 1490 }
1491 1491
1492 /* 1492 /*
1493 * Lock slab, remove from the partial list and put the object into the 1493 * Lock slab, remove from the partial list and put the object into the
1494 * per cpu freelist. 1494 * per cpu freelist.
1495 * 1495 *
1496 * Returns a list of objects or NULL if it fails. 1496 * Returns a list of objects or NULL if it fails.
1497 * 1497 *
1498 * Must hold list_lock. 1498 * Must hold list_lock.
1499 */ 1499 */
1500 static inline void *acquire_slab(struct kmem_cache *s, 1500 static inline void *acquire_slab(struct kmem_cache *s,
1501 struct kmem_cache_node *n, struct page *page, 1501 struct kmem_cache_node *n, struct page *page,
1502 int mode) 1502 int mode)
1503 { 1503 {
1504 void *freelist; 1504 void *freelist;
1505 unsigned long counters; 1505 unsigned long counters;
1506 struct page new; 1506 struct page new;
1507 1507
1508 /* 1508 /*
1509 * Zap the freelist and set the frozen bit. 1509 * Zap the freelist and set the frozen bit.
1510 * The old freelist is the list of objects for the 1510 * The old freelist is the list of objects for the
1511 * per cpu allocation list. 1511 * per cpu allocation list.
1512 */ 1512 */
1513 do { 1513 do {
1514 freelist = page->freelist; 1514 freelist = page->freelist;
1515 counters = page->counters; 1515 counters = page->counters;
1516 new.counters = counters; 1516 new.counters = counters;
1517 if (mode) 1517 if (mode) {
1518 new.inuse = page->objects; 1518 new.inuse = page->objects;
1519 new.freelist = NULL;
1520 } else {
1521 new.freelist = freelist;
1522 }
1519 1523
1520 VM_BUG_ON(new.frozen); 1524 VM_BUG_ON(new.frozen);
1521 new.frozen = 1; 1525 new.frozen = 1;
1522 1526
1523 } while (!__cmpxchg_double_slab(s, page, 1527 } while (!__cmpxchg_double_slab(s, page,
1524 freelist, counters, 1528 freelist, counters,
1525 NULL, new.counters, 1529 new.freelist, new.counters,
1526 "lock and freeze")); 1530 "lock and freeze"));
1527 1531
1528 remove_partial(n, page); 1532 remove_partial(n, page);
1529 return freelist; 1533 return freelist;
1530 } 1534 }
1531 1535
1532 static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); 1536 static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1533 1537
1534 /* 1538 /*
1535 * Try to allocate a partial slab from a specific node. 1539 * Try to allocate a partial slab from a specific node.
1536 */ 1540 */
1537 static void *get_partial_node(struct kmem_cache *s, 1541 static void *get_partial_node(struct kmem_cache *s,
1538 struct kmem_cache_node *n, struct kmem_cache_cpu *c) 1542 struct kmem_cache_node *n, struct kmem_cache_cpu *c)
1539 { 1543 {
1540 struct page *page, *page2; 1544 struct page *page, *page2;
1541 void *object = NULL; 1545 void *object = NULL;
1542 1546
1543 /* 1547 /*
1544 * Racy check. If we mistakenly see no partial slabs then we 1548 * Racy check. If we mistakenly see no partial slabs then we
1545 * just allocate an empty slab. If we mistakenly try to get a 1549 * just allocate an empty slab. If we mistakenly try to get a
1546 * partial slab and there is none available then get_partials() 1550 * partial slab and there is none available then get_partials()
1547 * will return NULL. 1551 * will return NULL.
1548 */ 1552 */
1549 if (!n || !n->nr_partial) 1553 if (!n || !n->nr_partial)
1550 return NULL; 1554 return NULL;
1551 1555
1552 spin_lock(&n->list_lock); 1556 spin_lock(&n->list_lock);
1553 list_for_each_entry_safe(page, page2, &n->partial, lru) { 1557 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1554 void *t = acquire_slab(s, n, page, object == NULL); 1558 void *t = acquire_slab(s, n, page, object == NULL);
1555 int available; 1559 int available;
1556 1560
1557 if (!t) 1561 if (!t)
1558 break; 1562 break;
1559 1563
1560 if (!object) { 1564 if (!object) {
1561 c->page = page; 1565 c->page = page;
1562 c->node = page_to_nid(page); 1566 c->node = page_to_nid(page);
1563 stat(s, ALLOC_FROM_PARTIAL); 1567 stat(s, ALLOC_FROM_PARTIAL);
1564 object = t; 1568 object = t;
1565 available = page->objects - page->inuse; 1569 available = page->objects - page->inuse;
1566 } else { 1570 } else {
1567 page->freelist = t;
1568 available = put_cpu_partial(s, page, 0); 1571 available = put_cpu_partial(s, page, 0);
1569 stat(s, CPU_PARTIAL_NODE); 1572 stat(s, CPU_PARTIAL_NODE);
1570 } 1573 }
1571 if (kmem_cache_debug(s) || available > s->cpu_partial / 2) 1574 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1572 break; 1575 break;
1573 1576
1574 } 1577 }
1575 spin_unlock(&n->list_lock); 1578 spin_unlock(&n->list_lock);
1576 return object; 1579 return object;
1577 } 1580 }
1578 1581
1579 /* 1582 /*
1580 * Get a page from somewhere. Search in increasing NUMA distances. 1583 * Get a page from somewhere. Search in increasing NUMA distances.
1581 */ 1584 */
1582 static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, 1585 static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1583 struct kmem_cache_cpu *c) 1586 struct kmem_cache_cpu *c)
1584 { 1587 {
1585 #ifdef CONFIG_NUMA 1588 #ifdef CONFIG_NUMA
1586 struct zonelist *zonelist; 1589 struct zonelist *zonelist;
1587 struct zoneref *z; 1590 struct zoneref *z;
1588 struct zone *zone; 1591 struct zone *zone;
1589 enum zone_type high_zoneidx = gfp_zone(flags); 1592 enum zone_type high_zoneidx = gfp_zone(flags);
1590 void *object; 1593 void *object;
1591 unsigned int cpuset_mems_cookie; 1594 unsigned int cpuset_mems_cookie;
1592 1595
1593 /* 1596 /*
1594 * The defrag ratio allows a configuration of the tradeoffs between 1597 * The defrag ratio allows a configuration of the tradeoffs between
1595 * inter node defragmentation and node local allocations. A lower 1598 * inter node defragmentation and node local allocations. A lower
1596 * defrag_ratio increases the tendency to do local allocations 1599 * defrag_ratio increases the tendency to do local allocations
1597 * instead of attempting to obtain partial slabs from other nodes. 1600 * instead of attempting to obtain partial slabs from other nodes.
1598 * 1601 *
1599 * If the defrag_ratio is set to 0 then kmalloc() always 1602 * If the defrag_ratio is set to 0 then kmalloc() always
1600 * returns node local objects. If the ratio is higher then kmalloc() 1603 * returns node local objects. If the ratio is higher then kmalloc()
1601 * may return off node objects because partial slabs are obtained 1604 * may return off node objects because partial slabs are obtained
1602 * from other nodes and filled up. 1605 * from other nodes and filled up.
1603 * 1606 *
1604 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes 1607 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
1605 * defrag_ratio = 1000) then every (well almost) allocation will 1608 * defrag_ratio = 1000) then every (well almost) allocation will
1606 * first attempt to defrag slab caches on other nodes. This means 1609 * first attempt to defrag slab caches on other nodes. This means
1607 * scanning over all nodes to look for partial slabs which may be 1610 * scanning over all nodes to look for partial slabs which may be
1608 * expensive if we do it every time we are trying to find a slab 1611 * expensive if we do it every time we are trying to find a slab
1609 * with available objects. 1612 * with available objects.
1610 */ 1613 */
1611 if (!s->remote_node_defrag_ratio || 1614 if (!s->remote_node_defrag_ratio ||
1612 get_cycles() % 1024 > s->remote_node_defrag_ratio) 1615 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1613 return NULL; 1616 return NULL;
1614 1617
1615 do { 1618 do {
1616 cpuset_mems_cookie = get_mems_allowed(); 1619 cpuset_mems_cookie = get_mems_allowed();
1617 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1620 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1618 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1621 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1619 struct kmem_cache_node *n; 1622 struct kmem_cache_node *n;
1620 1623
1621 n = get_node(s, zone_to_nid(zone)); 1624 n = get_node(s, zone_to_nid(zone));
1622 1625
1623 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1626 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1624 n->nr_partial > s->min_partial) { 1627 n->nr_partial > s->min_partial) {
1625 object = get_partial_node(s, n, c); 1628 object = get_partial_node(s, n, c);
1626 if (object) { 1629 if (object) {
1627 /* 1630 /*
1628 * Return the object even if 1631 * Return the object even if
1629 * put_mems_allowed indicated that 1632 * put_mems_allowed indicated that
1630 * the cpuset mems_allowed was 1633 * the cpuset mems_allowed was
1631 * updated in parallel. It's a 1634 * updated in parallel. It's a
1632 * harmless race between the alloc 1635 * harmless race between the alloc
1633 * and the cpuset update. 1636 * and the cpuset update.
1634 */ 1637 */
1635 put_mems_allowed(cpuset_mems_cookie); 1638 put_mems_allowed(cpuset_mems_cookie);
1636 return object; 1639 return object;
1637 } 1640 }
1638 } 1641 }
1639 } 1642 }
1640 } while (!put_mems_allowed(cpuset_mems_cookie)); 1643 } while (!put_mems_allowed(cpuset_mems_cookie));
1641 #endif 1644 #endif
1642 return NULL; 1645 return NULL;
1643 } 1646 }
1644 1647
1645 /* 1648 /*
1646 * Get a partial page, lock it and return it. 1649 * Get a partial page, lock it and return it.
1647 */ 1650 */
1648 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, 1651 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1649 struct kmem_cache_cpu *c) 1652 struct kmem_cache_cpu *c)
1650 { 1653 {
1651 void *object; 1654 void *object;
1652 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; 1655 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1653 1656
1654 object = get_partial_node(s, get_node(s, searchnode), c); 1657 object = get_partial_node(s, get_node(s, searchnode), c);
1655 if (object || node != NUMA_NO_NODE) 1658 if (object || node != NUMA_NO_NODE)
1656 return object; 1659 return object;
1657 1660
1658 return get_any_partial(s, flags, c); 1661 return get_any_partial(s, flags, c);
1659 } 1662 }
1660 1663
1661 #ifdef CONFIG_PREEMPT 1664 #ifdef CONFIG_PREEMPT
1662 /* 1665 /*
1663 * Calculate the next globally unique transaction for disambiguiation 1666 * Calculate the next globally unique transaction for disambiguiation
1664 * during cmpxchg. The transactions start with the cpu number and are then 1667 * during cmpxchg. The transactions start with the cpu number and are then
1665 * incremented by CONFIG_NR_CPUS. 1668 * incremented by CONFIG_NR_CPUS.
1666 */ 1669 */
1667 #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) 1670 #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1668 #else 1671 #else
1669 /* 1672 /*
1670 * No preemption supported therefore also no need to check for 1673 * No preemption supported therefore also no need to check for
1671 * different cpus. 1674 * different cpus.
1672 */ 1675 */
1673 #define TID_STEP 1 1676 #define TID_STEP 1
1674 #endif 1677 #endif
1675 1678
1676 static inline unsigned long next_tid(unsigned long tid) 1679 static inline unsigned long next_tid(unsigned long tid)
1677 { 1680 {
1678 return tid + TID_STEP; 1681 return tid + TID_STEP;
1679 } 1682 }
1680 1683
1681 static inline unsigned int tid_to_cpu(unsigned long tid) 1684 static inline unsigned int tid_to_cpu(unsigned long tid)
1682 { 1685 {
1683 return tid % TID_STEP; 1686 return tid % TID_STEP;
1684 } 1687 }
1685 1688
1686 static inline unsigned long tid_to_event(unsigned long tid) 1689 static inline unsigned long tid_to_event(unsigned long tid)
1687 { 1690 {
1688 return tid / TID_STEP; 1691 return tid / TID_STEP;
1689 } 1692 }
1690 1693
1691 static inline unsigned int init_tid(int cpu) 1694 static inline unsigned int init_tid(int cpu)
1692 { 1695 {
1693 return cpu; 1696 return cpu;
1694 } 1697 }
1695 1698
1696 static inline void note_cmpxchg_failure(const char *n, 1699 static inline void note_cmpxchg_failure(const char *n,
1697 const struct kmem_cache *s, unsigned long tid) 1700 const struct kmem_cache *s, unsigned long tid)
1698 { 1701 {
1699 #ifdef SLUB_DEBUG_CMPXCHG 1702 #ifdef SLUB_DEBUG_CMPXCHG
1700 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); 1703 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1701 1704
1702 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); 1705 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1703 1706
1704 #ifdef CONFIG_PREEMPT 1707 #ifdef CONFIG_PREEMPT
1705 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) 1708 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1706 printk("due to cpu change %d -> %d\n", 1709 printk("due to cpu change %d -> %d\n",
1707 tid_to_cpu(tid), tid_to_cpu(actual_tid)); 1710 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1708 else 1711 else
1709 #endif 1712 #endif
1710 if (tid_to_event(tid) != tid_to_event(actual_tid)) 1713 if (tid_to_event(tid) != tid_to_event(actual_tid))
1711 printk("due to cpu running other code. Event %ld->%ld\n", 1714 printk("due to cpu running other code. Event %ld->%ld\n",
1712 tid_to_event(tid), tid_to_event(actual_tid)); 1715 tid_to_event(tid), tid_to_event(actual_tid));
1713 else 1716 else
1714 printk("for unknown reason: actual=%lx was=%lx target=%lx\n", 1717 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1715 actual_tid, tid, next_tid(tid)); 1718 actual_tid, tid, next_tid(tid));
1716 #endif 1719 #endif
1717 stat(s, CMPXCHG_DOUBLE_CPU_FAIL); 1720 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1718 } 1721 }
1719 1722
1720 void init_kmem_cache_cpus(struct kmem_cache *s) 1723 void init_kmem_cache_cpus(struct kmem_cache *s)
1721 { 1724 {
1722 int cpu; 1725 int cpu;
1723 1726
1724 for_each_possible_cpu(cpu) 1727 for_each_possible_cpu(cpu)
1725 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); 1728 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1726 } 1729 }
1727 1730
1728 /* 1731 /*
1729 * Remove the cpu slab 1732 * Remove the cpu slab
1730 */ 1733 */
1731 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1734 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1732 { 1735 {
1733 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; 1736 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1734 struct page *page = c->page; 1737 struct page *page = c->page;
1735 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1738 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1736 int lock = 0; 1739 int lock = 0;
1737 enum slab_modes l = M_NONE, m = M_NONE; 1740 enum slab_modes l = M_NONE, m = M_NONE;
1738 void *freelist; 1741 void *freelist;
1739 void *nextfree; 1742 void *nextfree;
1740 int tail = DEACTIVATE_TO_HEAD; 1743 int tail = DEACTIVATE_TO_HEAD;
1741 struct page new; 1744 struct page new;
1742 struct page old; 1745 struct page old;
1743 1746
1744 if (page->freelist) { 1747 if (page->freelist) {
1745 stat(s, DEACTIVATE_REMOTE_FREES); 1748 stat(s, DEACTIVATE_REMOTE_FREES);
1746 tail = DEACTIVATE_TO_TAIL; 1749 tail = DEACTIVATE_TO_TAIL;
1747 } 1750 }
1748 1751
1749 c->tid = next_tid(c->tid); 1752 c->tid = next_tid(c->tid);
1750 c->page = NULL; 1753 c->page = NULL;
1751 freelist = c->freelist; 1754 freelist = c->freelist;
1752 c->freelist = NULL; 1755 c->freelist = NULL;
1753 1756
1754 /* 1757 /*
1755 * Stage one: Free all available per cpu objects back 1758 * Stage one: Free all available per cpu objects back
1756 * to the page freelist while it is still frozen. Leave the 1759 * to the page freelist while it is still frozen. Leave the
1757 * last one. 1760 * last one.
1758 * 1761 *
1759 * There is no need to take the list->lock because the page 1762 * There is no need to take the list->lock because the page
1760 * is still frozen. 1763 * is still frozen.
1761 */ 1764 */
1762 while (freelist && (nextfree = get_freepointer(s, freelist))) { 1765 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1763 void *prior; 1766 void *prior;
1764 unsigned long counters; 1767 unsigned long counters;
1765 1768
1766 do { 1769 do {
1767 prior = page->freelist; 1770 prior = page->freelist;
1768 counters = page->counters; 1771 counters = page->counters;
1769 set_freepointer(s, freelist, prior); 1772 set_freepointer(s, freelist, prior);
1770 new.counters = counters; 1773 new.counters = counters;
1771 new.inuse--; 1774 new.inuse--;
1772 VM_BUG_ON(!new.frozen); 1775 VM_BUG_ON(!new.frozen);
1773 1776
1774 } while (!__cmpxchg_double_slab(s, page, 1777 } while (!__cmpxchg_double_slab(s, page,
1775 prior, counters, 1778 prior, counters,
1776 freelist, new.counters, 1779 freelist, new.counters,
1777 "drain percpu freelist")); 1780 "drain percpu freelist"));
1778 1781
1779 freelist = nextfree; 1782 freelist = nextfree;
1780 } 1783 }
1781 1784
1782 /* 1785 /*
1783 * Stage two: Ensure that the page is unfrozen while the 1786 * Stage two: Ensure that the page is unfrozen while the
1784 * list presence reflects the actual number of objects 1787 * list presence reflects the actual number of objects
1785 * during unfreeze. 1788 * during unfreeze.
1786 * 1789 *
1787 * We setup the list membership and then perform a cmpxchg 1790 * We setup the list membership and then perform a cmpxchg
1788 * with the count. If there is a mismatch then the page 1791 * with the count. If there is a mismatch then the page
1789 * is not unfrozen but the page is on the wrong list. 1792 * is not unfrozen but the page is on the wrong list.
1790 * 1793 *
1791 * Then we restart the process which may have to remove 1794 * Then we restart the process which may have to remove
1792 * the page from the list that we just put it on again 1795 * the page from the list that we just put it on again
1793 * because the number of objects in the slab may have 1796 * because the number of objects in the slab may have
1794 * changed. 1797 * changed.
1795 */ 1798 */
1796 redo: 1799 redo:
1797 1800
1798 old.freelist = page->freelist; 1801 old.freelist = page->freelist;
1799 old.counters = page->counters; 1802 old.counters = page->counters;
1800 VM_BUG_ON(!old.frozen); 1803 VM_BUG_ON(!old.frozen);
1801 1804
1802 /* Determine target state of the slab */ 1805 /* Determine target state of the slab */
1803 new.counters = old.counters; 1806 new.counters = old.counters;
1804 if (freelist) { 1807 if (freelist) {
1805 new.inuse--; 1808 new.inuse--;
1806 set_freepointer(s, freelist, old.freelist); 1809 set_freepointer(s, freelist, old.freelist);
1807 new.freelist = freelist; 1810 new.freelist = freelist;
1808 } else 1811 } else
1809 new.freelist = old.freelist; 1812 new.freelist = old.freelist;
1810 1813
1811 new.frozen = 0; 1814 new.frozen = 0;
1812 1815
1813 if (!new.inuse && n->nr_partial > s->min_partial) 1816 if (!new.inuse && n->nr_partial > s->min_partial)
1814 m = M_FREE; 1817 m = M_FREE;
1815 else if (new.freelist) { 1818 else if (new.freelist) {
1816 m = M_PARTIAL; 1819 m = M_PARTIAL;
1817 if (!lock) { 1820 if (!lock) {
1818 lock = 1; 1821 lock = 1;
1819 /* 1822 /*
1820 * Taking the spinlock removes the possiblity 1823 * Taking the spinlock removes the possiblity
1821 * that acquire_slab() will see a slab page that 1824 * that acquire_slab() will see a slab page that
1822 * is frozen 1825 * is frozen
1823 */ 1826 */
1824 spin_lock(&n->list_lock); 1827 spin_lock(&n->list_lock);
1825 } 1828 }
1826 } else { 1829 } else {
1827 m = M_FULL; 1830 m = M_FULL;
1828 if (kmem_cache_debug(s) && !lock) { 1831 if (kmem_cache_debug(s) && !lock) {
1829 lock = 1; 1832 lock = 1;
1830 /* 1833 /*
1831 * This also ensures that the scanning of full 1834 * This also ensures that the scanning of full
1832 * slabs from diagnostic functions will not see 1835 * slabs from diagnostic functions will not see
1833 * any frozen slabs. 1836 * any frozen slabs.
1834 */ 1837 */
1835 spin_lock(&n->list_lock); 1838 spin_lock(&n->list_lock);
1836 } 1839 }
1837 } 1840 }
1838 1841
1839 if (l != m) { 1842 if (l != m) {
1840 1843
1841 if (l == M_PARTIAL) 1844 if (l == M_PARTIAL)
1842 1845
1843 remove_partial(n, page); 1846 remove_partial(n, page);
1844 1847
1845 else if (l == M_FULL) 1848 else if (l == M_FULL)
1846 1849
1847 remove_full(s, page); 1850 remove_full(s, page);
1848 1851
1849 if (m == M_PARTIAL) { 1852 if (m == M_PARTIAL) {
1850 1853
1851 add_partial(n, page, tail); 1854 add_partial(n, page, tail);
1852 stat(s, tail); 1855 stat(s, tail);
1853 1856
1854 } else if (m == M_FULL) { 1857 } else if (m == M_FULL) {
1855 1858
1856 stat(s, DEACTIVATE_FULL); 1859 stat(s, DEACTIVATE_FULL);
1857 add_full(s, n, page); 1860 add_full(s, n, page);
1858 1861
1859 } 1862 }
1860 } 1863 }
1861 1864
1862 l = m; 1865 l = m;
1863 if (!__cmpxchg_double_slab(s, page, 1866 if (!__cmpxchg_double_slab(s, page,
1864 old.freelist, old.counters, 1867 old.freelist, old.counters,
1865 new.freelist, new.counters, 1868 new.freelist, new.counters,
1866 "unfreezing slab")) 1869 "unfreezing slab"))
1867 goto redo; 1870 goto redo;
1868 1871
1869 if (lock) 1872 if (lock)
1870 spin_unlock(&n->list_lock); 1873 spin_unlock(&n->list_lock);
1871 1874
1872 if (m == M_FREE) { 1875 if (m == M_FREE) {
1873 stat(s, DEACTIVATE_EMPTY); 1876 stat(s, DEACTIVATE_EMPTY);
1874 discard_slab(s, page); 1877 discard_slab(s, page);
1875 stat(s, FREE_SLAB); 1878 stat(s, FREE_SLAB);
1876 } 1879 }
1877 } 1880 }
1878 1881
1879 /* Unfreeze all the cpu partial slabs */ 1882 /* Unfreeze all the cpu partial slabs */
1880 static void unfreeze_partials(struct kmem_cache *s) 1883 static void unfreeze_partials(struct kmem_cache *s)
1881 { 1884 {
1882 struct kmem_cache_node *n = NULL; 1885 struct kmem_cache_node *n = NULL;
1883 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 1886 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1884 struct page *page, *discard_page = NULL; 1887 struct page *page, *discard_page = NULL;
1885 1888
1886 while ((page = c->partial)) { 1889 while ((page = c->partial)) {
1887 enum slab_modes { M_PARTIAL, M_FREE }; 1890 enum slab_modes { M_PARTIAL, M_FREE };
1888 enum slab_modes l, m; 1891 enum slab_modes l, m;
1889 struct page new; 1892 struct page new;
1890 struct page old; 1893 struct page old;
1891 1894
1892 c->partial = page->next; 1895 c->partial = page->next;
1893 l = M_FREE; 1896 l = M_FREE;
1894 1897
1895 do { 1898 do {
1896 1899
1897 old.freelist = page->freelist; 1900 old.freelist = page->freelist;
1898 old.counters = page->counters; 1901 old.counters = page->counters;
1899 VM_BUG_ON(!old.frozen); 1902 VM_BUG_ON(!old.frozen);
1900 1903
1901 new.counters = old.counters; 1904 new.counters = old.counters;
1902 new.freelist = old.freelist; 1905 new.freelist = old.freelist;
1903 1906
1904 new.frozen = 0; 1907 new.frozen = 0;
1905 1908
1906 if (!new.inuse && (!n || n->nr_partial > s->min_partial)) 1909 if (!new.inuse && (!n || n->nr_partial > s->min_partial))
1907 m = M_FREE; 1910 m = M_FREE;
1908 else { 1911 else {
1909 struct kmem_cache_node *n2 = get_node(s, 1912 struct kmem_cache_node *n2 = get_node(s,
1910 page_to_nid(page)); 1913 page_to_nid(page));
1911 1914
1912 m = M_PARTIAL; 1915 m = M_PARTIAL;
1913 if (n != n2) { 1916 if (n != n2) {
1914 if (n) 1917 if (n)
1915 spin_unlock(&n->list_lock); 1918 spin_unlock(&n->list_lock);
1916 1919
1917 n = n2; 1920 n = n2;
1918 spin_lock(&n->list_lock); 1921 spin_lock(&n->list_lock);
1919 } 1922 }
1920 } 1923 }
1921 1924
1922 if (l != m) { 1925 if (l != m) {
1923 if (l == M_PARTIAL) { 1926 if (l == M_PARTIAL) {
1924 remove_partial(n, page); 1927 remove_partial(n, page);
1925 stat(s, FREE_REMOVE_PARTIAL); 1928 stat(s, FREE_REMOVE_PARTIAL);
1926 } else { 1929 } else {
1927 add_partial(n, page, 1930 add_partial(n, page,
1928 DEACTIVATE_TO_TAIL); 1931 DEACTIVATE_TO_TAIL);
1929 stat(s, FREE_ADD_PARTIAL); 1932 stat(s, FREE_ADD_PARTIAL);
1930 } 1933 }
1931 1934
1932 l = m; 1935 l = m;
1933 } 1936 }
1934 1937
1935 } while (!cmpxchg_double_slab(s, page, 1938 } while (!cmpxchg_double_slab(s, page,
1936 old.freelist, old.counters, 1939 old.freelist, old.counters,
1937 new.freelist, new.counters, 1940 new.freelist, new.counters,
1938 "unfreezing slab")); 1941 "unfreezing slab"));
1939 1942
1940 if (m == M_FREE) { 1943 if (m == M_FREE) {
1941 page->next = discard_page; 1944 page->next = discard_page;
1942 discard_page = page; 1945 discard_page = page;
1943 } 1946 }
1944 } 1947 }
1945 1948
1946 if (n) 1949 if (n)
1947 spin_unlock(&n->list_lock); 1950 spin_unlock(&n->list_lock);
1948 1951
1949 while (discard_page) { 1952 while (discard_page) {
1950 page = discard_page; 1953 page = discard_page;
1951 discard_page = discard_page->next; 1954 discard_page = discard_page->next;
1952 1955
1953 stat(s, DEACTIVATE_EMPTY); 1956 stat(s, DEACTIVATE_EMPTY);
1954 discard_slab(s, page); 1957 discard_slab(s, page);
1955 stat(s, FREE_SLAB); 1958 stat(s, FREE_SLAB);
1956 } 1959 }
1957 } 1960 }
1958 1961
1959 /* 1962 /*
1960 * Put a page that was just frozen (in __slab_free) into a partial page 1963 * Put a page that was just frozen (in __slab_free) into a partial page
1961 * slot if available. This is done without interrupts disabled and without 1964 * slot if available. This is done without interrupts disabled and without
1962 * preemption disabled. The cmpxchg is racy and may put the partial page 1965 * preemption disabled. The cmpxchg is racy and may put the partial page
1963 * onto a random cpus partial slot. 1966 * onto a random cpus partial slot.
1964 * 1967 *
1965 * If we did not find a slot then simply move all the partials to the 1968 * If we did not find a slot then simply move all the partials to the
1966 * per node partial list. 1969 * per node partial list.
1967 */ 1970 */
1968 int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) 1971 int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1969 { 1972 {
1970 struct page *oldpage; 1973 struct page *oldpage;
1971 int pages; 1974 int pages;
1972 int pobjects; 1975 int pobjects;
1973 1976
1974 do { 1977 do {
1975 pages = 0; 1978 pages = 0;
1976 pobjects = 0; 1979 pobjects = 0;
1977 oldpage = this_cpu_read(s->cpu_slab->partial); 1980 oldpage = this_cpu_read(s->cpu_slab->partial);
1978 1981
1979 if (oldpage) { 1982 if (oldpage) {
1980 pobjects = oldpage->pobjects; 1983 pobjects = oldpage->pobjects;
1981 pages = oldpage->pages; 1984 pages = oldpage->pages;
1982 if (drain && pobjects > s->cpu_partial) { 1985 if (drain && pobjects > s->cpu_partial) {
1983 unsigned long flags; 1986 unsigned long flags;
1984 /* 1987 /*
1985 * partial array is full. Move the existing 1988 * partial array is full. Move the existing
1986 * set to the per node partial list. 1989 * set to the per node partial list.
1987 */ 1990 */
1988 local_irq_save(flags); 1991 local_irq_save(flags);
1989 unfreeze_partials(s); 1992 unfreeze_partials(s);
1990 local_irq_restore(flags); 1993 local_irq_restore(flags);
1991 pobjects = 0; 1994 pobjects = 0;
1992 pages = 0; 1995 pages = 0;
1993 stat(s, CPU_PARTIAL_DRAIN); 1996 stat(s, CPU_PARTIAL_DRAIN);
1994 } 1997 }
1995 } 1998 }
1996 1999
1997 pages++; 2000 pages++;
1998 pobjects += page->objects - page->inuse; 2001 pobjects += page->objects - page->inuse;
1999 2002
2000 page->pages = pages; 2003 page->pages = pages;
2001 page->pobjects = pobjects; 2004 page->pobjects = pobjects;
2002 page->next = oldpage; 2005 page->next = oldpage;
2003 2006
2004 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); 2007 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
2005 return pobjects; 2008 return pobjects;
2006 } 2009 }
2007 2010
2008 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 2011 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2009 { 2012 {
2010 stat(s, CPUSLAB_FLUSH); 2013 stat(s, CPUSLAB_FLUSH);
2011 deactivate_slab(s, c); 2014 deactivate_slab(s, c);
2012 } 2015 }
2013 2016
2014 /* 2017 /*
2015 * Flush cpu slab. 2018 * Flush cpu slab.
2016 * 2019 *
2017 * Called from IPI handler with interrupts disabled. 2020 * Called from IPI handler with interrupts disabled.
2018 */ 2021 */
2019 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 2022 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2020 { 2023 {
2021 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 2024 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2022 2025
2023 if (likely(c)) { 2026 if (likely(c)) {
2024 if (c->page) 2027 if (c->page)
2025 flush_slab(s, c); 2028 flush_slab(s, c);
2026 2029
2027 unfreeze_partials(s); 2030 unfreeze_partials(s);
2028 } 2031 }
2029 } 2032 }
2030 2033
2031 static void flush_cpu_slab(void *d) 2034 static void flush_cpu_slab(void *d)
2032 { 2035 {
2033 struct kmem_cache *s = d; 2036 struct kmem_cache *s = d;
2034 2037
2035 __flush_cpu_slab(s, smp_processor_id()); 2038 __flush_cpu_slab(s, smp_processor_id());
2036 } 2039 }
2037 2040
2038 static bool has_cpu_slab(int cpu, void *info) 2041 static bool has_cpu_slab(int cpu, void *info)
2039 { 2042 {
2040 struct kmem_cache *s = info; 2043 struct kmem_cache *s = info;
2041 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 2044 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2042 2045
2043 return c->page || c->partial; 2046 return c->page || c->partial;
2044 } 2047 }
2045 2048
2046 static void flush_all(struct kmem_cache *s) 2049 static void flush_all(struct kmem_cache *s)
2047 { 2050 {
2048 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); 2051 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2049 } 2052 }
2050 2053
2051 /* 2054 /*
2052 * Check if the objects in a per cpu structure fit numa 2055 * Check if the objects in a per cpu structure fit numa
2053 * locality expectations. 2056 * locality expectations.
2054 */ 2057 */
2055 static inline int node_match(struct kmem_cache_cpu *c, int node) 2058 static inline int node_match(struct kmem_cache_cpu *c, int node)
2056 { 2059 {
2057 #ifdef CONFIG_NUMA 2060 #ifdef CONFIG_NUMA
2058 if (node != NUMA_NO_NODE && c->node != node) 2061 if (node != NUMA_NO_NODE && c->node != node)
2059 return 0; 2062 return 0;
2060 #endif 2063 #endif
2061 return 1; 2064 return 1;
2062 } 2065 }
2063 2066
2064 static int count_free(struct page *page) 2067 static int count_free(struct page *page)
2065 { 2068 {
2066 return page->objects - page->inuse; 2069 return page->objects - page->inuse;
2067 } 2070 }
2068 2071
2069 static unsigned long count_partial(struct kmem_cache_node *n, 2072 static unsigned long count_partial(struct kmem_cache_node *n,
2070 int (*get_count)(struct page *)) 2073 int (*get_count)(struct page *))
2071 { 2074 {
2072 unsigned long flags; 2075 unsigned long flags;
2073 unsigned long x = 0; 2076 unsigned long x = 0;
2074 struct page *page; 2077 struct page *page;
2075 2078
2076 spin_lock_irqsave(&n->list_lock, flags); 2079 spin_lock_irqsave(&n->list_lock, flags);
2077 list_for_each_entry(page, &n->partial, lru) 2080 list_for_each_entry(page, &n->partial, lru)
2078 x += get_count(page); 2081 x += get_count(page);
2079 spin_unlock_irqrestore(&n->list_lock, flags); 2082 spin_unlock_irqrestore(&n->list_lock, flags);
2080 return x; 2083 return x;
2081 } 2084 }
2082 2085
2083 static inline unsigned long node_nr_objs(struct kmem_cache_node *n) 2086 static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2084 { 2087 {
2085 #ifdef CONFIG_SLUB_DEBUG 2088 #ifdef CONFIG_SLUB_DEBUG
2086 return atomic_long_read(&n->total_objects); 2089 return atomic_long_read(&n->total_objects);
2087 #else 2090 #else
2088 return 0; 2091 return 0;
2089 #endif 2092 #endif
2090 } 2093 }
2091 2094
2092 static noinline void 2095 static noinline void
2093 slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) 2096 slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2094 { 2097 {
2095 int node; 2098 int node;
2096 2099
2097 printk(KERN_WARNING 2100 printk(KERN_WARNING
2098 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", 2101 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2099 nid, gfpflags); 2102 nid, gfpflags);
2100 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " 2103 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2101 "default order: %d, min order: %d\n", s->name, s->objsize, 2104 "default order: %d, min order: %d\n", s->name, s->objsize,
2102 s->size, oo_order(s->oo), oo_order(s->min)); 2105 s->size, oo_order(s->oo), oo_order(s->min));
2103 2106
2104 if (oo_order(s->min) > get_order(s->objsize)) 2107 if (oo_order(s->min) > get_order(s->objsize))
2105 printk(KERN_WARNING " %s debugging increased min order, use " 2108 printk(KERN_WARNING " %s debugging increased min order, use "
2106 "slub_debug=O to disable.\n", s->name); 2109 "slub_debug=O to disable.\n", s->name);
2107 2110
2108 for_each_online_node(node) { 2111 for_each_online_node(node) {
2109 struct kmem_cache_node *n = get_node(s, node); 2112 struct kmem_cache_node *n = get_node(s, node);
2110 unsigned long nr_slabs; 2113 unsigned long nr_slabs;
2111 unsigned long nr_objs; 2114 unsigned long nr_objs;
2112 unsigned long nr_free; 2115 unsigned long nr_free;
2113 2116
2114 if (!n) 2117 if (!n)
2115 continue; 2118 continue;
2116 2119
2117 nr_free = count_partial(n, count_free); 2120 nr_free = count_partial(n, count_free);
2118 nr_slabs = node_nr_slabs(n); 2121 nr_slabs = node_nr_slabs(n);
2119 nr_objs = node_nr_objs(n); 2122 nr_objs = node_nr_objs(n);
2120 2123
2121 printk(KERN_WARNING 2124 printk(KERN_WARNING
2122 " node %d: slabs: %ld, objs: %ld, free: %ld\n", 2125 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2123 node, nr_slabs, nr_objs, nr_free); 2126 node, nr_slabs, nr_objs, nr_free);
2124 } 2127 }
2125 } 2128 }
2126 2129
2127 static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, 2130 static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2128 int node, struct kmem_cache_cpu **pc) 2131 int node, struct kmem_cache_cpu **pc)
2129 { 2132 {
2130 void *object; 2133 void *object;
2131 struct kmem_cache_cpu *c; 2134 struct kmem_cache_cpu *c;
2132 struct page *page = new_slab(s, flags, node); 2135 struct page *page = new_slab(s, flags, node);
2133 2136
2134 if (page) { 2137 if (page) {
2135 c = __this_cpu_ptr(s->cpu_slab); 2138 c = __this_cpu_ptr(s->cpu_slab);
2136 if (c->page) 2139 if (c->page)
2137 flush_slab(s, c); 2140 flush_slab(s, c);
2138 2141
2139 /* 2142 /*
2140 * No other reference to the page yet so we can 2143 * No other reference to the page yet so we can
2141 * muck around with it freely without cmpxchg 2144 * muck around with it freely without cmpxchg
2142 */ 2145 */
2143 object = page->freelist; 2146 object = page->freelist;
2144 page->freelist = NULL; 2147 page->freelist = NULL;
2145 2148
2146 stat(s, ALLOC_SLAB); 2149 stat(s, ALLOC_SLAB);
2147 c->node = page_to_nid(page); 2150 c->node = page_to_nid(page);
2148 c->page = page; 2151 c->page = page;
2149 *pc = c; 2152 *pc = c;
2150 } else 2153 } else
2151 object = NULL; 2154 object = NULL;
2152 2155
2153 return object; 2156 return object;
2154 } 2157 }
2155 2158
2156 /* 2159 /*
2157 * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist 2160 * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
2158 * or deactivate the page. 2161 * or deactivate the page.
2159 * 2162 *
2160 * The page is still frozen if the return value is not NULL. 2163 * The page is still frozen if the return value is not NULL.
2161 * 2164 *
2162 * If this function returns NULL then the page has been unfrozen. 2165 * If this function returns NULL then the page has been unfrozen.
2163 */ 2166 */
2164 static inline void *get_freelist(struct kmem_cache *s, struct page *page) 2167 static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2165 { 2168 {
2166 struct page new; 2169 struct page new;
2167 unsigned long counters; 2170 unsigned long counters;
2168 void *freelist; 2171 void *freelist;
2169 2172
2170 do { 2173 do {
2171 freelist = page->freelist; 2174 freelist = page->freelist;
2172 counters = page->counters; 2175 counters = page->counters;
2173 new.counters = counters; 2176 new.counters = counters;
2174 VM_BUG_ON(!new.frozen); 2177 VM_BUG_ON(!new.frozen);
2175 2178
2176 new.inuse = page->objects; 2179 new.inuse = page->objects;
2177 new.frozen = freelist != NULL; 2180 new.frozen = freelist != NULL;
2178 2181
2179 } while (!cmpxchg_double_slab(s, page, 2182 } while (!cmpxchg_double_slab(s, page,
2180 freelist, counters, 2183 freelist, counters,
2181 NULL, new.counters, 2184 NULL, new.counters,
2182 "get_freelist")); 2185 "get_freelist"));
2183 2186
2184 return freelist; 2187 return freelist;
2185 } 2188 }
2186 2189
2187 /* 2190 /*
2188 * Slow path. The lockless freelist is empty or we need to perform 2191 * Slow path. The lockless freelist is empty or we need to perform
2189 * debugging duties. 2192 * debugging duties.
2190 * 2193 *
2191 * Processing is still very fast if new objects have been freed to the 2194 * Processing is still very fast if new objects have been freed to the
2192 * regular freelist. In that case we simply take over the regular freelist 2195 * regular freelist. In that case we simply take over the regular freelist
2193 * as the lockless freelist and zap the regular freelist. 2196 * as the lockless freelist and zap the regular freelist.
2194 * 2197 *
2195 * If that is not working then we fall back to the partial lists. We take the 2198 * If that is not working then we fall back to the partial lists. We take the
2196 * first element of the freelist as the object to allocate now and move the 2199 * first element of the freelist as the object to allocate now and move the
2197 * rest of the freelist to the lockless freelist. 2200 * rest of the freelist to the lockless freelist.
2198 * 2201 *
2199 * And if we were unable to get a new slab from the partial slab lists then 2202 * And if we were unable to get a new slab from the partial slab lists then
2200 * we need to allocate a new slab. This is the slowest path since it involves 2203 * we need to allocate a new slab. This is the slowest path since it involves
2201 * a call to the page allocator and the setup of a new slab. 2204 * a call to the page allocator and the setup of a new slab.
2202 */ 2205 */
2203 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 2206 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2204 unsigned long addr, struct kmem_cache_cpu *c) 2207 unsigned long addr, struct kmem_cache_cpu *c)
2205 { 2208 {
2206 void **object; 2209 void **object;
2207 unsigned long flags; 2210 unsigned long flags;
2208 2211
2209 local_irq_save(flags); 2212 local_irq_save(flags);
2210 #ifdef CONFIG_PREEMPT 2213 #ifdef CONFIG_PREEMPT
2211 /* 2214 /*
2212 * We may have been preempted and rescheduled on a different 2215 * We may have been preempted and rescheduled on a different
2213 * cpu before disabling interrupts. Need to reload cpu area 2216 * cpu before disabling interrupts. Need to reload cpu area
2214 * pointer. 2217 * pointer.
2215 */ 2218 */
2216 c = this_cpu_ptr(s->cpu_slab); 2219 c = this_cpu_ptr(s->cpu_slab);
2217 #endif 2220 #endif
2218 2221
2219 if (!c->page) 2222 if (!c->page)
2220 goto new_slab; 2223 goto new_slab;
2221 redo: 2224 redo:
2222 if (unlikely(!node_match(c, node))) { 2225 if (unlikely(!node_match(c, node))) {
2223 stat(s, ALLOC_NODE_MISMATCH); 2226 stat(s, ALLOC_NODE_MISMATCH);
2224 deactivate_slab(s, c); 2227 deactivate_slab(s, c);
2225 goto new_slab; 2228 goto new_slab;
2226 } 2229 }
2227 2230
2228 /* must check again c->freelist in case of cpu migration or IRQ */ 2231 /* must check again c->freelist in case of cpu migration or IRQ */
2229 object = c->freelist; 2232 object = c->freelist;
2230 if (object) 2233 if (object)
2231 goto load_freelist; 2234 goto load_freelist;
2232 2235
2233 stat(s, ALLOC_SLOWPATH); 2236 stat(s, ALLOC_SLOWPATH);
2234 2237
2235 object = get_freelist(s, c->page); 2238 object = get_freelist(s, c->page);
2236 2239
2237 if (!object) { 2240 if (!object) {
2238 c->page = NULL; 2241 c->page = NULL;
2239 stat(s, DEACTIVATE_BYPASS); 2242 stat(s, DEACTIVATE_BYPASS);
2240 goto new_slab; 2243 goto new_slab;
2241 } 2244 }
2242 2245
2243 stat(s, ALLOC_REFILL); 2246 stat(s, ALLOC_REFILL);
2244 2247
2245 load_freelist: 2248 load_freelist:
2246 c->freelist = get_freepointer(s, object); 2249 c->freelist = get_freepointer(s, object);
2247 c->tid = next_tid(c->tid); 2250 c->tid = next_tid(c->tid);
2248 local_irq_restore(flags); 2251 local_irq_restore(flags);
2249 return object; 2252 return object;
2250 2253
2251 new_slab: 2254 new_slab:
2252 2255
2253 if (c->partial) { 2256 if (c->partial) {
2254 c->page = c->partial; 2257 c->page = c->partial;
2255 c->partial = c->page->next; 2258 c->partial = c->page->next;
2256 c->node = page_to_nid(c->page); 2259 c->node = page_to_nid(c->page);
2257 stat(s, CPU_PARTIAL_ALLOC); 2260 stat(s, CPU_PARTIAL_ALLOC);
2258 c->freelist = NULL; 2261 c->freelist = NULL;
2259 goto redo; 2262 goto redo;
2260 } 2263 }
2261 2264
2262 /* Then do expensive stuff like retrieving pages from the partial lists */ 2265 /* Then do expensive stuff like retrieving pages from the partial lists */
2263 object = get_partial(s, gfpflags, node, c); 2266 object = get_partial(s, gfpflags, node, c);
2264 2267
2265 if (unlikely(!object)) { 2268 if (unlikely(!object)) {
2266 2269
2267 object = new_slab_objects(s, gfpflags, node, &c); 2270 object = new_slab_objects(s, gfpflags, node, &c);
2268 2271
2269 if (unlikely(!object)) { 2272 if (unlikely(!object)) {
2270 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) 2273 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2271 slab_out_of_memory(s, gfpflags, node); 2274 slab_out_of_memory(s, gfpflags, node);
2272 2275
2273 local_irq_restore(flags); 2276 local_irq_restore(flags);
2274 return NULL; 2277 return NULL;
2275 } 2278 }
2276 } 2279 }
2277 2280
2278 if (likely(!kmem_cache_debug(s))) 2281 if (likely(!kmem_cache_debug(s)))
2279 goto load_freelist; 2282 goto load_freelist;
2280 2283
2281 /* Only entered in the debug case */ 2284 /* Only entered in the debug case */
2282 if (!alloc_debug_processing(s, c->page, object, addr)) 2285 if (!alloc_debug_processing(s, c->page, object, addr))
2283 goto new_slab; /* Slab failed checks. Next slab needed */ 2286 goto new_slab; /* Slab failed checks. Next slab needed */
2284 2287
2285 c->freelist = get_freepointer(s, object); 2288 c->freelist = get_freepointer(s, object);
2286 deactivate_slab(s, c); 2289 deactivate_slab(s, c);
2287 c->node = NUMA_NO_NODE; 2290 c->node = NUMA_NO_NODE;
2288 local_irq_restore(flags); 2291 local_irq_restore(flags);
2289 return object; 2292 return object;
2290 } 2293 }
2291 2294
2292 /* 2295 /*
2293 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) 2296 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
2294 * have the fastpath folded into their functions. So no function call 2297 * have the fastpath folded into their functions. So no function call
2295 * overhead for requests that can be satisfied on the fastpath. 2298 * overhead for requests that can be satisfied on the fastpath.
2296 * 2299 *
2297 * The fastpath works by first checking if the lockless freelist can be used. 2300 * The fastpath works by first checking if the lockless freelist can be used.
2298 * If not then __slab_alloc is called for slow processing. 2301 * If not then __slab_alloc is called for slow processing.
2299 * 2302 *
2300 * Otherwise we can simply pick the next object from the lockless free list. 2303 * Otherwise we can simply pick the next object from the lockless free list.
2301 */ 2304 */
2302 static __always_inline void *slab_alloc(struct kmem_cache *s, 2305 static __always_inline void *slab_alloc(struct kmem_cache *s,
2303 gfp_t gfpflags, int node, unsigned long addr) 2306 gfp_t gfpflags, int node, unsigned long addr)
2304 { 2307 {
2305 void **object; 2308 void **object;
2306 struct kmem_cache_cpu *c; 2309 struct kmem_cache_cpu *c;
2307 unsigned long tid; 2310 unsigned long tid;
2308 2311
2309 if (slab_pre_alloc_hook(s, gfpflags)) 2312 if (slab_pre_alloc_hook(s, gfpflags))
2310 return NULL; 2313 return NULL;
2311 2314
2312 redo: 2315 redo:
2313 2316
2314 /* 2317 /*
2315 * Must read kmem_cache cpu data via this cpu ptr. Preemption is 2318 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
2316 * enabled. We may switch back and forth between cpus while 2319 * enabled. We may switch back and forth between cpus while
2317 * reading from one cpu area. That does not matter as long 2320 * reading from one cpu area. That does not matter as long
2318 * as we end up on the original cpu again when doing the cmpxchg. 2321 * as we end up on the original cpu again when doing the cmpxchg.
2319 */ 2322 */
2320 c = __this_cpu_ptr(s->cpu_slab); 2323 c = __this_cpu_ptr(s->cpu_slab);
2321 2324
2322 /* 2325 /*
2323 * The transaction ids are globally unique per cpu and per operation on 2326 * The transaction ids are globally unique per cpu and per operation on
2324 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double 2327 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
2325 * occurs on the right processor and that there was no operation on the 2328 * occurs on the right processor and that there was no operation on the
2326 * linked list in between. 2329 * linked list in between.
2327 */ 2330 */
2328 tid = c->tid; 2331 tid = c->tid;
2329 barrier(); 2332 barrier();
2330 2333
2331 object = c->freelist; 2334 object = c->freelist;
2332 if (unlikely(!object || !node_match(c, node))) 2335 if (unlikely(!object || !node_match(c, node)))
2333 2336
2334 object = __slab_alloc(s, gfpflags, node, addr, c); 2337 object = __slab_alloc(s, gfpflags, node, addr, c);
2335 2338
2336 else { 2339 else {
2337 void *next_object = get_freepointer_safe(s, object); 2340 void *next_object = get_freepointer_safe(s, object);
2338 2341
2339 /* 2342 /*
2340 * The cmpxchg will only match if there was no additional 2343 * The cmpxchg will only match if there was no additional
2341 * operation and if we are on the right processor. 2344 * operation and if we are on the right processor.
2342 * 2345 *
2343 * The cmpxchg does the following atomically (without lock semantics!) 2346 * The cmpxchg does the following atomically (without lock semantics!)
2344 * 1. Relocate first pointer to the current per cpu area. 2347 * 1. Relocate first pointer to the current per cpu area.
2345 * 2. Verify that tid and freelist have not been changed 2348 * 2. Verify that tid and freelist have not been changed
2346 * 3. If they were not changed replace tid and freelist 2349 * 3. If they were not changed replace tid and freelist
2347 * 2350 *
2348 * Since this is without lock semantics the protection is only against 2351 * Since this is without lock semantics the protection is only against
2349 * code executing on this cpu *not* from access by other cpus. 2352 * code executing on this cpu *not* from access by other cpus.
2350 */ 2353 */
2351 if (unlikely(!this_cpu_cmpxchg_double( 2354 if (unlikely(!this_cpu_cmpxchg_double(
2352 s->cpu_slab->freelist, s->cpu_slab->tid, 2355 s->cpu_slab->freelist, s->cpu_slab->tid,
2353 object, tid, 2356 object, tid,
2354 next_object, next_tid(tid)))) { 2357 next_object, next_tid(tid)))) {
2355 2358
2356 note_cmpxchg_failure("slab_alloc", s, tid); 2359 note_cmpxchg_failure("slab_alloc", s, tid);
2357 goto redo; 2360 goto redo;
2358 } 2361 }
2359 prefetch_freepointer(s, next_object); 2362 prefetch_freepointer(s, next_object);
2360 stat(s, ALLOC_FASTPATH); 2363 stat(s, ALLOC_FASTPATH);
2361 } 2364 }
2362 2365
2363 if (unlikely(gfpflags & __GFP_ZERO) && object) 2366 if (unlikely(gfpflags & __GFP_ZERO) && object)
2364 memset(object, 0, s->objsize); 2367 memset(object, 0, s->objsize);
2365 2368
2366 slab_post_alloc_hook(s, gfpflags, object); 2369 slab_post_alloc_hook(s, gfpflags, object);
2367 2370
2368 return object; 2371 return object;
2369 } 2372 }
2370 2373
2371 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 2374 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2372 { 2375 {
2373 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2376 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2374 2377
2375 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); 2378 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
2376 2379
2377 return ret; 2380 return ret;
2378 } 2381 }
2379 EXPORT_SYMBOL(kmem_cache_alloc); 2382 EXPORT_SYMBOL(kmem_cache_alloc);
2380 2383
2381 #ifdef CONFIG_TRACING 2384 #ifdef CONFIG_TRACING
2382 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) 2385 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2383 { 2386 {
2384 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2387 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2385 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); 2388 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2386 return ret; 2389 return ret;
2387 } 2390 }
2388 EXPORT_SYMBOL(kmem_cache_alloc_trace); 2391 EXPORT_SYMBOL(kmem_cache_alloc_trace);
2389 2392
2390 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 2393 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2391 { 2394 {
2392 void *ret = kmalloc_order(size, flags, order); 2395 void *ret = kmalloc_order(size, flags, order);
2393 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 2396 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2394 return ret; 2397 return ret;
2395 } 2398 }
2396 EXPORT_SYMBOL(kmalloc_order_trace); 2399 EXPORT_SYMBOL(kmalloc_order_trace);
2397 #endif 2400 #endif
2398 2401
2399 #ifdef CONFIG_NUMA 2402 #ifdef CONFIG_NUMA
2400 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 2403 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2401 { 2404 {
2402 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2405 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2403 2406
2404 trace_kmem_cache_alloc_node(_RET_IP_, ret, 2407 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2405 s->objsize, s->size, gfpflags, node); 2408 s->objsize, s->size, gfpflags, node);
2406 2409
2407 return ret; 2410 return ret;
2408 } 2411 }
2409 EXPORT_SYMBOL(kmem_cache_alloc_node); 2412 EXPORT_SYMBOL(kmem_cache_alloc_node);
2410 2413
2411 #ifdef CONFIG_TRACING 2414 #ifdef CONFIG_TRACING
2412 void *kmem_cache_alloc_node_trace(struct kmem_cache *s, 2415 void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2413 gfp_t gfpflags, 2416 gfp_t gfpflags,
2414 int node, size_t size) 2417 int node, size_t size)
2415 { 2418 {
2416 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2419 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2417 2420
2418 trace_kmalloc_node(_RET_IP_, ret, 2421 trace_kmalloc_node(_RET_IP_, ret,
2419 size, s->size, gfpflags, node); 2422 size, s->size, gfpflags, node);
2420 return ret; 2423 return ret;
2421 } 2424 }
2422 EXPORT_SYMBOL(kmem_cache_alloc_node_trace); 2425 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2423 #endif 2426 #endif
2424 #endif 2427 #endif
2425 2428
2426 /* 2429 /*
2427 * Slow patch handling. This may still be called frequently since objects 2430 * Slow patch handling. This may still be called frequently since objects
2428 * have a longer lifetime than the cpu slabs in most processing loads. 2431 * have a longer lifetime than the cpu slabs in most processing loads.
2429 * 2432 *
2430 * So we still attempt to reduce cache line usage. Just take the slab 2433 * So we still attempt to reduce cache line usage. Just take the slab
2431 * lock and free the item. If there is no additional partial page 2434 * lock and free the item. If there is no additional partial page
2432 * handling required then we can return immediately. 2435 * handling required then we can return immediately.
2433 */ 2436 */
2434 static void __slab_free(struct kmem_cache *s, struct page *page, 2437 static void __slab_free(struct kmem_cache *s, struct page *page,
2435 void *x, unsigned long addr) 2438 void *x, unsigned long addr)
2436 { 2439 {
2437 void *prior; 2440 void *prior;
2438 void **object = (void *)x; 2441 void **object = (void *)x;
2439 int was_frozen; 2442 int was_frozen;
2440 int inuse; 2443 int inuse;
2441 struct page new; 2444 struct page new;
2442 unsigned long counters; 2445 unsigned long counters;
2443 struct kmem_cache_node *n = NULL; 2446 struct kmem_cache_node *n = NULL;
2444 unsigned long uninitialized_var(flags); 2447 unsigned long uninitialized_var(flags);
2445 2448
2446 stat(s, FREE_SLOWPATH); 2449 stat(s, FREE_SLOWPATH);
2447 2450
2448 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) 2451 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2449 return; 2452 return;
2450 2453
2451 do { 2454 do {
2452 prior = page->freelist; 2455 prior = page->freelist;
2453 counters = page->counters; 2456 counters = page->counters;
2454 set_freepointer(s, object, prior); 2457 set_freepointer(s, object, prior);
2455 new.counters = counters; 2458 new.counters = counters;
2456 was_frozen = new.frozen; 2459 was_frozen = new.frozen;
2457 new.inuse--; 2460 new.inuse--;
2458 if ((!new.inuse || !prior) && !was_frozen && !n) { 2461 if ((!new.inuse || !prior) && !was_frozen && !n) {
2459 2462
2460 if (!kmem_cache_debug(s) && !prior) 2463 if (!kmem_cache_debug(s) && !prior)
2461 2464
2462 /* 2465 /*
2463 * Slab was on no list before and will be partially empty 2466 * Slab was on no list before and will be partially empty
2464 * We can defer the list move and instead freeze it. 2467 * We can defer the list move and instead freeze it.
2465 */ 2468 */
2466 new.frozen = 1; 2469 new.frozen = 1;
2467 2470
2468 else { /* Needs to be taken off a list */ 2471 else { /* Needs to be taken off a list */
2469 2472
2470 n = get_node(s, page_to_nid(page)); 2473 n = get_node(s, page_to_nid(page));
2471 /* 2474 /*
2472 * Speculatively acquire the list_lock. 2475 * Speculatively acquire the list_lock.
2473 * If the cmpxchg does not succeed then we may 2476 * If the cmpxchg does not succeed then we may
2474 * drop the list_lock without any processing. 2477 * drop the list_lock without any processing.
2475 * 2478 *
2476 * Otherwise the list_lock will synchronize with 2479 * Otherwise the list_lock will synchronize with
2477 * other processors updating the list of slabs. 2480 * other processors updating the list of slabs.
2478 */ 2481 */
2479 spin_lock_irqsave(&n->list_lock, flags); 2482 spin_lock_irqsave(&n->list_lock, flags);
2480 2483
2481 } 2484 }
2482 } 2485 }
2483 inuse = new.inuse; 2486 inuse = new.inuse;
2484 2487
2485 } while (!cmpxchg_double_slab(s, page, 2488 } while (!cmpxchg_double_slab(s, page,
2486 prior, counters, 2489 prior, counters,
2487 object, new.counters, 2490 object, new.counters,
2488 "__slab_free")); 2491 "__slab_free"));
2489 2492
2490 if (likely(!n)) { 2493 if (likely(!n)) {
2491 2494
2492 /* 2495 /*
2493 * If we just froze the page then put it onto the 2496 * If we just froze the page then put it onto the
2494 * per cpu partial list. 2497 * per cpu partial list.
2495 */ 2498 */
2496 if (new.frozen && !was_frozen) { 2499 if (new.frozen && !was_frozen) {
2497 put_cpu_partial(s, page, 1); 2500 put_cpu_partial(s, page, 1);
2498 stat(s, CPU_PARTIAL_FREE); 2501 stat(s, CPU_PARTIAL_FREE);
2499 } 2502 }
2500 /* 2503 /*
2501 * The list lock was not taken therefore no list 2504 * The list lock was not taken therefore no list
2502 * activity can be necessary. 2505 * activity can be necessary.
2503 */ 2506 */
2504 if (was_frozen) 2507 if (was_frozen)
2505 stat(s, FREE_FROZEN); 2508 stat(s, FREE_FROZEN);
2506 return; 2509 return;
2507 } 2510 }
2508 2511
2509 /* 2512 /*
2510 * was_frozen may have been set after we acquired the list_lock in 2513 * was_frozen may have been set after we acquired the list_lock in
2511 * an earlier loop. So we need to check it here again. 2514 * an earlier loop. So we need to check it here again.
2512 */ 2515 */
2513 if (was_frozen) 2516 if (was_frozen)
2514 stat(s, FREE_FROZEN); 2517 stat(s, FREE_FROZEN);
2515 else { 2518 else {
2516 if (unlikely(!inuse && n->nr_partial > s->min_partial)) 2519 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2517 goto slab_empty; 2520 goto slab_empty;
2518 2521
2519 /* 2522 /*
2520 * Objects left in the slab. If it was not on the partial list before 2523 * Objects left in the slab. If it was not on the partial list before
2521 * then add it. 2524 * then add it.
2522 */ 2525 */
2523 if (unlikely(!prior)) { 2526 if (unlikely(!prior)) {
2524 remove_full(s, page); 2527 remove_full(s, page);
2525 add_partial(n, page, DEACTIVATE_TO_TAIL); 2528 add_partial(n, page, DEACTIVATE_TO_TAIL);
2526 stat(s, FREE_ADD_PARTIAL); 2529 stat(s, FREE_ADD_PARTIAL);
2527 } 2530 }
2528 } 2531 }
2529 spin_unlock_irqrestore(&n->list_lock, flags); 2532 spin_unlock_irqrestore(&n->list_lock, flags);
2530 return; 2533 return;
2531 2534
2532 slab_empty: 2535 slab_empty:
2533 if (prior) { 2536 if (prior) {
2534 /* 2537 /*
2535 * Slab on the partial list. 2538 * Slab on the partial list.
2536 */ 2539 */
2537 remove_partial(n, page); 2540 remove_partial(n, page);
2538 stat(s, FREE_REMOVE_PARTIAL); 2541 stat(s, FREE_REMOVE_PARTIAL);
2539 } else 2542 } else
2540 /* Slab must be on the full list */ 2543 /* Slab must be on the full list */
2541 remove_full(s, page); 2544 remove_full(s, page);
2542 2545
2543 spin_unlock_irqrestore(&n->list_lock, flags); 2546 spin_unlock_irqrestore(&n->list_lock, flags);
2544 stat(s, FREE_SLAB); 2547 stat(s, FREE_SLAB);
2545 discard_slab(s, page); 2548 discard_slab(s, page);
2546 } 2549 }
2547 2550
2548 /* 2551 /*
2549 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that 2552 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
2550 * can perform fastpath freeing without additional function calls. 2553 * can perform fastpath freeing without additional function calls.
2551 * 2554 *
2552 * The fastpath is only possible if we are freeing to the current cpu slab 2555 * The fastpath is only possible if we are freeing to the current cpu slab
2553 * of this processor. This typically the case if we have just allocated 2556 * of this processor. This typically the case if we have just allocated
2554 * the item before. 2557 * the item before.
2555 * 2558 *
2556 * If fastpath is not possible then fall back to __slab_free where we deal 2559 * If fastpath is not possible then fall back to __slab_free where we deal
2557 * with all sorts of special processing. 2560 * with all sorts of special processing.
2558 */ 2561 */
2559 static __always_inline void slab_free(struct kmem_cache *s, 2562 static __always_inline void slab_free(struct kmem_cache *s,
2560 struct page *page, void *x, unsigned long addr) 2563 struct page *page, void *x, unsigned long addr)
2561 { 2564 {
2562 void **object = (void *)x; 2565 void **object = (void *)x;
2563 struct kmem_cache_cpu *c; 2566 struct kmem_cache_cpu *c;
2564 unsigned long tid; 2567 unsigned long tid;
2565 2568
2566 slab_free_hook(s, x); 2569 slab_free_hook(s, x);
2567 2570
2568 redo: 2571 redo:
2569 /* 2572 /*
2570 * Determine the currently cpus per cpu slab. 2573 * Determine the currently cpus per cpu slab.
2571 * The cpu may change afterward. However that does not matter since 2574 * The cpu may change afterward. However that does not matter since
2572 * data is retrieved via this pointer. If we are on the same cpu 2575 * data is retrieved via this pointer. If we are on the same cpu
2573 * during the cmpxchg then the free will succedd. 2576 * during the cmpxchg then the free will succedd.
2574 */ 2577 */
2575 c = __this_cpu_ptr(s->cpu_slab); 2578 c = __this_cpu_ptr(s->cpu_slab);
2576 2579
2577 tid = c->tid; 2580 tid = c->tid;
2578 barrier(); 2581 barrier();
2579 2582
2580 if (likely(page == c->page)) { 2583 if (likely(page == c->page)) {
2581 set_freepointer(s, object, c->freelist); 2584 set_freepointer(s, object, c->freelist);
2582 2585
2583 if (unlikely(!this_cpu_cmpxchg_double( 2586 if (unlikely(!this_cpu_cmpxchg_double(
2584 s->cpu_slab->freelist, s->cpu_slab->tid, 2587 s->cpu_slab->freelist, s->cpu_slab->tid,
2585 c->freelist, tid, 2588 c->freelist, tid,
2586 object, next_tid(tid)))) { 2589 object, next_tid(tid)))) {
2587 2590
2588 note_cmpxchg_failure("slab_free", s, tid); 2591 note_cmpxchg_failure("slab_free", s, tid);
2589 goto redo; 2592 goto redo;
2590 } 2593 }
2591 stat(s, FREE_FASTPATH); 2594 stat(s, FREE_FASTPATH);
2592 } else 2595 } else
2593 __slab_free(s, page, x, addr); 2596 __slab_free(s, page, x, addr);
2594 2597
2595 } 2598 }
2596 2599
2597 void kmem_cache_free(struct kmem_cache *s, void *x) 2600 void kmem_cache_free(struct kmem_cache *s, void *x)
2598 { 2601 {
2599 struct page *page; 2602 struct page *page;
2600 2603
2601 page = virt_to_head_page(x); 2604 page = virt_to_head_page(x);
2602 2605
2603 slab_free(s, page, x, _RET_IP_); 2606 slab_free(s, page, x, _RET_IP_);
2604 2607
2605 trace_kmem_cache_free(_RET_IP_, x); 2608 trace_kmem_cache_free(_RET_IP_, x);
2606 } 2609 }
2607 EXPORT_SYMBOL(kmem_cache_free); 2610 EXPORT_SYMBOL(kmem_cache_free);
2608 2611
2609 /* 2612 /*
2610 * Object placement in a slab is made very easy because we always start at 2613 * Object placement in a slab is made very easy because we always start at
2611 * offset 0. If we tune the size of the object to the alignment then we can 2614 * offset 0. If we tune the size of the object to the alignment then we can
2612 * get the required alignment by putting one properly sized object after 2615 * get the required alignment by putting one properly sized object after
2613 * another. 2616 * another.
2614 * 2617 *
2615 * Notice that the allocation order determines the sizes of the per cpu 2618 * Notice that the allocation order determines the sizes of the per cpu
2616 * caches. Each processor has always one slab available for allocations. 2619 * caches. Each processor has always one slab available for allocations.
2617 * Increasing the allocation order reduces the number of times that slabs 2620 * Increasing the allocation order reduces the number of times that slabs
2618 * must be moved on and off the partial lists and is therefore a factor in 2621 * must be moved on and off the partial lists and is therefore a factor in
2619 * locking overhead. 2622 * locking overhead.
2620 */ 2623 */
2621 2624
2622 /* 2625 /*
2623 * Mininum / Maximum order of slab pages. This influences locking overhead 2626 * Mininum / Maximum order of slab pages. This influences locking overhead
2624 * and slab fragmentation. A higher order reduces the number of partial slabs 2627 * and slab fragmentation. A higher order reduces the number of partial slabs
2625 * and increases the number of allocations possible without having to 2628 * and increases the number of allocations possible without having to
2626 * take the list_lock. 2629 * take the list_lock.
2627 */ 2630 */
2628 static int slub_min_order; 2631 static int slub_min_order;
2629 static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; 2632 static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2630 static int slub_min_objects; 2633 static int slub_min_objects;
2631 2634
2632 /* 2635 /*
2633 * Merge control. If this is set then no merging of slab caches will occur. 2636 * Merge control. If this is set then no merging of slab caches will occur.
2634 * (Could be removed. This was introduced to pacify the merge skeptics.) 2637 * (Could be removed. This was introduced to pacify the merge skeptics.)
2635 */ 2638 */
2636 static int slub_nomerge; 2639 static int slub_nomerge;
2637 2640
2638 /* 2641 /*
2639 * Calculate the order of allocation given an slab object size. 2642 * Calculate the order of allocation given an slab object size.
2640 * 2643 *
2641 * The order of allocation has significant impact on performance and other 2644 * The order of allocation has significant impact on performance and other
2642 * system components. Generally order 0 allocations should be preferred since 2645 * system components. Generally order 0 allocations should be preferred since
2643 * order 0 does not cause fragmentation in the page allocator. Larger objects 2646 * order 0 does not cause fragmentation in the page allocator. Larger objects
2644 * be problematic to put into order 0 slabs because there may be too much 2647 * be problematic to put into order 0 slabs because there may be too much
2645 * unused space left. We go to a higher order if more than 1/16th of the slab 2648 * unused space left. We go to a higher order if more than 1/16th of the slab
2646 * would be wasted. 2649 * would be wasted.
2647 * 2650 *
2648 * In order to reach satisfactory performance we must ensure that a minimum 2651 * In order to reach satisfactory performance we must ensure that a minimum
2649 * number of objects is in one slab. Otherwise we may generate too much 2652 * number of objects is in one slab. Otherwise we may generate too much
2650 * activity on the partial lists which requires taking the list_lock. This is 2653 * activity on the partial lists which requires taking the list_lock. This is
2651 * less a concern for large slabs though which are rarely used. 2654 * less a concern for large slabs though which are rarely used.
2652 * 2655 *
2653 * slub_max_order specifies the order where we begin to stop considering the 2656 * slub_max_order specifies the order where we begin to stop considering the
2654 * number of objects in a slab as critical. If we reach slub_max_order then 2657 * number of objects in a slab as critical. If we reach slub_max_order then
2655 * we try to keep the page order as low as possible. So we accept more waste 2658 * we try to keep the page order as low as possible. So we accept more waste
2656 * of space in favor of a small page order. 2659 * of space in favor of a small page order.
2657 * 2660 *
2658 * Higher order allocations also allow the placement of more objects in a 2661 * Higher order allocations also allow the placement of more objects in a
2659 * slab and thereby reduce object handling overhead. If the user has 2662 * slab and thereby reduce object handling overhead. If the user has
2660 * requested a higher mininum order then we start with that one instead of 2663 * requested a higher mininum order then we start with that one instead of
2661 * the smallest order which will fit the object. 2664 * the smallest order which will fit the object.
2662 */ 2665 */
2663 static inline int slab_order(int size, int min_objects, 2666 static inline int slab_order(int size, int min_objects,
2664 int max_order, int fract_leftover, int reserved) 2667 int max_order, int fract_leftover, int reserved)
2665 { 2668 {
2666 int order; 2669 int order;
2667 int rem; 2670 int rem;
2668 int min_order = slub_min_order; 2671 int min_order = slub_min_order;
2669 2672
2670 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) 2673 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2671 return get_order(size * MAX_OBJS_PER_PAGE) - 1; 2674 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2672 2675
2673 for (order = max(min_order, 2676 for (order = max(min_order,
2674 fls(min_objects * size - 1) - PAGE_SHIFT); 2677 fls(min_objects * size - 1) - PAGE_SHIFT);
2675 order <= max_order; order++) { 2678 order <= max_order; order++) {
2676 2679
2677 unsigned long slab_size = PAGE_SIZE << order; 2680 unsigned long slab_size = PAGE_SIZE << order;
2678 2681
2679 if (slab_size < min_objects * size + reserved) 2682 if (slab_size < min_objects * size + reserved)
2680 continue; 2683 continue;
2681 2684
2682 rem = (slab_size - reserved) % size; 2685 rem = (slab_size - reserved) % size;
2683 2686
2684 if (rem <= slab_size / fract_leftover) 2687 if (rem <= slab_size / fract_leftover)
2685 break; 2688 break;
2686 2689
2687 } 2690 }
2688 2691
2689 return order; 2692 return order;
2690 } 2693 }
2691 2694
2692 static inline int calculate_order(int size, int reserved) 2695 static inline int calculate_order(int size, int reserved)
2693 { 2696 {
2694 int order; 2697 int order;
2695 int min_objects; 2698 int min_objects;
2696 int fraction; 2699 int fraction;
2697 int max_objects; 2700 int max_objects;
2698 2701
2699 /* 2702 /*
2700 * Attempt to find best configuration for a slab. This 2703 * Attempt to find best configuration for a slab. This
2701 * works by first attempting to generate a layout with 2704 * works by first attempting to generate a layout with
2702 * the best configuration and backing off gradually. 2705 * the best configuration and backing off gradually.
2703 * 2706 *
2704 * First we reduce the acceptable waste in a slab. Then 2707 * First we reduce the acceptable waste in a slab. Then
2705 * we reduce the minimum objects required in a slab. 2708 * we reduce the minimum objects required in a slab.
2706 */ 2709 */
2707 min_objects = slub_min_objects; 2710 min_objects = slub_min_objects;
2708 if (!min_objects) 2711 if (!min_objects)
2709 min_objects = 4 * (fls(nr_cpu_ids) + 1); 2712 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2710 max_objects = order_objects(slub_max_order, size, reserved); 2713 max_objects = order_objects(slub_max_order, size, reserved);
2711 min_objects = min(min_objects, max_objects); 2714 min_objects = min(min_objects, max_objects);
2712 2715
2713 while (min_objects > 1) { 2716 while (min_objects > 1) {
2714 fraction = 16; 2717 fraction = 16;
2715 while (fraction >= 4) { 2718 while (fraction >= 4) {
2716 order = slab_order(size, min_objects, 2719 order = slab_order(size, min_objects,
2717 slub_max_order, fraction, reserved); 2720 slub_max_order, fraction, reserved);
2718 if (order <= slub_max_order) 2721 if (order <= slub_max_order)
2719 return order; 2722 return order;
2720 fraction /= 2; 2723 fraction /= 2;
2721 } 2724 }
2722 min_objects--; 2725 min_objects--;
2723 } 2726 }
2724 2727
2725 /* 2728 /*
2726 * We were unable to place multiple objects in a slab. Now 2729 * We were unable to place multiple objects in a slab. Now
2727 * lets see if we can place a single object there. 2730 * lets see if we can place a single object there.
2728 */ 2731 */
2729 order = slab_order(size, 1, slub_max_order, 1, reserved); 2732 order = slab_order(size, 1, slub_max_order, 1, reserved);
2730 if (order <= slub_max_order) 2733 if (order <= slub_max_order)
2731 return order; 2734 return order;
2732 2735
2733 /* 2736 /*
2734 * Doh this slab cannot be placed using slub_max_order. 2737 * Doh this slab cannot be placed using slub_max_order.
2735 */ 2738 */
2736 order = slab_order(size, 1, MAX_ORDER, 1, reserved); 2739 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2737 if (order < MAX_ORDER) 2740 if (order < MAX_ORDER)
2738 return order; 2741 return order;
2739 return -ENOSYS; 2742 return -ENOSYS;
2740 } 2743 }
2741 2744
2742 /* 2745 /*
2743 * Figure out what the alignment of the objects will be. 2746 * Figure out what the alignment of the objects will be.
2744 */ 2747 */
2745 static unsigned long calculate_alignment(unsigned long flags, 2748 static unsigned long calculate_alignment(unsigned long flags,
2746 unsigned long align, unsigned long size) 2749 unsigned long align, unsigned long size)
2747 { 2750 {
2748 /* 2751 /*
2749 * If the user wants hardware cache aligned objects then follow that 2752 * If the user wants hardware cache aligned objects then follow that
2750 * suggestion if the object is sufficiently large. 2753 * suggestion if the object is sufficiently large.
2751 * 2754 *
2752 * The hardware cache alignment cannot override the specified 2755 * The hardware cache alignment cannot override the specified
2753 * alignment though. If that is greater then use it. 2756 * alignment though. If that is greater then use it.
2754 */ 2757 */
2755 if (flags & SLAB_HWCACHE_ALIGN) { 2758 if (flags & SLAB_HWCACHE_ALIGN) {
2756 unsigned long ralign = cache_line_size(); 2759 unsigned long ralign = cache_line_size();
2757 while (size <= ralign / 2) 2760 while (size <= ralign / 2)
2758 ralign /= 2; 2761 ralign /= 2;
2759 align = max(align, ralign); 2762 align = max(align, ralign);
2760 } 2763 }
2761 2764
2762 if (align < ARCH_SLAB_MINALIGN) 2765 if (align < ARCH_SLAB_MINALIGN)
2763 align = ARCH_SLAB_MINALIGN; 2766 align = ARCH_SLAB_MINALIGN;
2764 2767
2765 return ALIGN(align, sizeof(void *)); 2768 return ALIGN(align, sizeof(void *));
2766 } 2769 }
2767 2770
2768 static void 2771 static void
2769 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) 2772 init_kmem_cache_node(struct kmem_cache_node *n)
2770 { 2773 {
2771 n->nr_partial = 0; 2774 n->nr_partial = 0;
2772 spin_lock_init(&n->list_lock); 2775 spin_lock_init(&n->list_lock);
2773 INIT_LIST_HEAD(&n->partial); 2776 INIT_LIST_HEAD(&n->partial);
2774 #ifdef CONFIG_SLUB_DEBUG 2777 #ifdef CONFIG_SLUB_DEBUG
2775 atomic_long_set(&n->nr_slabs, 0); 2778 atomic_long_set(&n->nr_slabs, 0);
2776 atomic_long_set(&n->total_objects, 0); 2779 atomic_long_set(&n->total_objects, 0);
2777 INIT_LIST_HEAD(&n->full); 2780 INIT_LIST_HEAD(&n->full);
2778 #endif 2781 #endif
2779 } 2782 }
2780 2783
2781 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) 2784 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2782 { 2785 {
2783 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 2786 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2784 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); 2787 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2785 2788
2786 /* 2789 /*
2787 * Must align to double word boundary for the double cmpxchg 2790 * Must align to double word boundary for the double cmpxchg
2788 * instructions to work; see __pcpu_double_call_return_bool(). 2791 * instructions to work; see __pcpu_double_call_return_bool().
2789 */ 2792 */
2790 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2793 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2791 2 * sizeof(void *)); 2794 2 * sizeof(void *));
2792 2795
2793 if (!s->cpu_slab) 2796 if (!s->cpu_slab)
2794 return 0; 2797 return 0;
2795 2798
2796 init_kmem_cache_cpus(s); 2799 init_kmem_cache_cpus(s);
2797 2800
2798 return 1; 2801 return 1;
2799 } 2802 }
2800 2803
2801 static struct kmem_cache *kmem_cache_node; 2804 static struct kmem_cache *kmem_cache_node;
2802 2805
2803 /* 2806 /*
2804 * No kmalloc_node yet so do it by hand. We know that this is the first 2807 * No kmalloc_node yet so do it by hand. We know that this is the first
2805 * slab on the node for this slabcache. There are no concurrent accesses 2808 * slab on the node for this slabcache. There are no concurrent accesses
2806 * possible. 2809 * possible.
2807 * 2810 *
2808 * Note that this function only works on the kmalloc_node_cache 2811 * Note that this function only works on the kmalloc_node_cache
2809 * when allocating for the kmalloc_node_cache. This is used for bootstrapping 2812 * when allocating for the kmalloc_node_cache. This is used for bootstrapping
2810 * memory on a fresh node that has no slab structures yet. 2813 * memory on a fresh node that has no slab structures yet.
2811 */ 2814 */
2812 static void early_kmem_cache_node_alloc(int node) 2815 static void early_kmem_cache_node_alloc(int node)
2813 { 2816 {
2814 struct page *page; 2817 struct page *page;
2815 struct kmem_cache_node *n; 2818 struct kmem_cache_node *n;
2816 2819
2817 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); 2820 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2818 2821
2819 page = new_slab(kmem_cache_node, GFP_NOWAIT, node); 2822 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2820 2823
2821 BUG_ON(!page); 2824 BUG_ON(!page);
2822 if (page_to_nid(page) != node) { 2825 if (page_to_nid(page) != node) {
2823 printk(KERN_ERR "SLUB: Unable to allocate memory from " 2826 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2824 "node %d\n", node); 2827 "node %d\n", node);
2825 printk(KERN_ERR "SLUB: Allocating a useless per node structure " 2828 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2826 "in order to be able to continue\n"); 2829 "in order to be able to continue\n");
2827 } 2830 }
2828 2831
2829 n = page->freelist; 2832 n = page->freelist;
2830 BUG_ON(!n); 2833 BUG_ON(!n);
2831 page->freelist = get_freepointer(kmem_cache_node, n); 2834 page->freelist = get_freepointer(kmem_cache_node, n);
2832 page->inuse = 1; 2835 page->inuse = 1;
2833 page->frozen = 0; 2836 page->frozen = 0;
2834 kmem_cache_node->node[node] = n; 2837 kmem_cache_node->node[node] = n;
2835 #ifdef CONFIG_SLUB_DEBUG 2838 #ifdef CONFIG_SLUB_DEBUG
2836 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); 2839 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2837 init_tracking(kmem_cache_node, n); 2840 init_tracking(kmem_cache_node, n);
2838 #endif 2841 #endif
2839 init_kmem_cache_node(n, kmem_cache_node); 2842 init_kmem_cache_node(n);
2840 inc_slabs_node(kmem_cache_node, node, page->objects); 2843 inc_slabs_node(kmem_cache_node, node, page->objects);
2841 2844
2842 add_partial(n, page, DEACTIVATE_TO_HEAD); 2845 add_partial(n, page, DEACTIVATE_TO_HEAD);
2843 } 2846 }
2844 2847
2845 static void free_kmem_cache_nodes(struct kmem_cache *s) 2848 static void free_kmem_cache_nodes(struct kmem_cache *s)
2846 { 2849 {
2847 int node; 2850 int node;
2848 2851
2849 for_each_node_state(node, N_NORMAL_MEMORY) { 2852 for_each_node_state(node, N_NORMAL_MEMORY) {
2850 struct kmem_cache_node *n = s->node[node]; 2853 struct kmem_cache_node *n = s->node[node];
2851 2854
2852 if (n) 2855 if (n)
2853 kmem_cache_free(kmem_cache_node, n); 2856 kmem_cache_free(kmem_cache_node, n);
2854 2857
2855 s->node[node] = NULL; 2858 s->node[node] = NULL;
2856 } 2859 }
2857 } 2860 }
2858 2861
2859 static int init_kmem_cache_nodes(struct kmem_cache *s) 2862 static int init_kmem_cache_nodes(struct kmem_cache *s)
2860 { 2863 {
2861 int node; 2864 int node;
2862 2865
2863 for_each_node_state(node, N_NORMAL_MEMORY) { 2866 for_each_node_state(node, N_NORMAL_MEMORY) {
2864 struct kmem_cache_node *n; 2867 struct kmem_cache_node *n;
2865 2868
2866 if (slab_state == DOWN) { 2869 if (slab_state == DOWN) {
2867 early_kmem_cache_node_alloc(node); 2870 early_kmem_cache_node_alloc(node);
2868 continue; 2871 continue;
2869 } 2872 }
2870 n = kmem_cache_alloc_node(kmem_cache_node, 2873 n = kmem_cache_alloc_node(kmem_cache_node,
2871 GFP_KERNEL, node); 2874 GFP_KERNEL, node);
2872 2875
2873 if (!n) { 2876 if (!n) {
2874 free_kmem_cache_nodes(s); 2877 free_kmem_cache_nodes(s);
2875 return 0; 2878 return 0;
2876 } 2879 }
2877 2880
2878 s->node[node] = n; 2881 s->node[node] = n;
2879 init_kmem_cache_node(n, s); 2882 init_kmem_cache_node(n);
2880 } 2883 }
2881 return 1; 2884 return 1;
2882 } 2885 }
2883 2886
2884 static void set_min_partial(struct kmem_cache *s, unsigned long min) 2887 static void set_min_partial(struct kmem_cache *s, unsigned long min)
2885 { 2888 {
2886 if (min < MIN_PARTIAL) 2889 if (min < MIN_PARTIAL)
2887 min = MIN_PARTIAL; 2890 min = MIN_PARTIAL;
2888 else if (min > MAX_PARTIAL) 2891 else if (min > MAX_PARTIAL)
2889 min = MAX_PARTIAL; 2892 min = MAX_PARTIAL;
2890 s->min_partial = min; 2893 s->min_partial = min;
2891 } 2894 }
2892 2895
2893 /* 2896 /*
2894 * calculate_sizes() determines the order and the distribution of data within 2897 * calculate_sizes() determines the order and the distribution of data within
2895 * a slab object. 2898 * a slab object.
2896 */ 2899 */
2897 static int calculate_sizes(struct kmem_cache *s, int forced_order) 2900 static int calculate_sizes(struct kmem_cache *s, int forced_order)
2898 { 2901 {
2899 unsigned long flags = s->flags; 2902 unsigned long flags = s->flags;
2900 unsigned long size = s->objsize; 2903 unsigned long size = s->objsize;
2901 unsigned long align = s->align; 2904 unsigned long align = s->align;
2902 int order; 2905 int order;
2903 2906
2904 /* 2907 /*
2905 * Round up object size to the next word boundary. We can only 2908 * Round up object size to the next word boundary. We can only
2906 * place the free pointer at word boundaries and this determines 2909 * place the free pointer at word boundaries and this determines
2907 * the possible location of the free pointer. 2910 * the possible location of the free pointer.
2908 */ 2911 */
2909 size = ALIGN(size, sizeof(void *)); 2912 size = ALIGN(size, sizeof(void *));
2910 2913
2911 #ifdef CONFIG_SLUB_DEBUG 2914 #ifdef CONFIG_SLUB_DEBUG
2912 /* 2915 /*
2913 * Determine if we can poison the object itself. If the user of 2916 * Determine if we can poison the object itself. If the user of
2914 * the slab may touch the object after free or before allocation 2917 * the slab may touch the object after free or before allocation
2915 * then we should never poison the object itself. 2918 * then we should never poison the object itself.
2916 */ 2919 */
2917 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && 2920 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2918 !s->ctor) 2921 !s->ctor)
2919 s->flags |= __OBJECT_POISON; 2922 s->flags |= __OBJECT_POISON;
2920 else 2923 else
2921 s->flags &= ~__OBJECT_POISON; 2924 s->flags &= ~__OBJECT_POISON;
2922 2925
2923 2926
2924 /* 2927 /*
2925 * If we are Redzoning then check if there is some space between the 2928 * If we are Redzoning then check if there is some space between the
2926 * end of the object and the free pointer. If not then add an 2929 * end of the object and the free pointer. If not then add an
2927 * additional word to have some bytes to store Redzone information. 2930 * additional word to have some bytes to store Redzone information.
2928 */ 2931 */
2929 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 2932 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2930 size += sizeof(void *); 2933 size += sizeof(void *);
2931 #endif 2934 #endif
2932 2935
2933 /* 2936 /*
2934 * With that we have determined the number of bytes in actual use 2937 * With that we have determined the number of bytes in actual use
2935 * by the object. This is the potential offset to the free pointer. 2938 * by the object. This is the potential offset to the free pointer.
2936 */ 2939 */
2937 s->inuse = size; 2940 s->inuse = size;
2938 2941
2939 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || 2942 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2940 s->ctor)) { 2943 s->ctor)) {
2941 /* 2944 /*
2942 * Relocate free pointer after the object if it is not 2945 * Relocate free pointer after the object if it is not
2943 * permitted to overwrite the first word of the object on 2946 * permitted to overwrite the first word of the object on
2944 * kmem_cache_free. 2947 * kmem_cache_free.
2945 * 2948 *
2946 * This is the case if we do RCU, have a constructor or 2949 * This is the case if we do RCU, have a constructor or
2947 * destructor or are poisoning the objects. 2950 * destructor or are poisoning the objects.
2948 */ 2951 */
2949 s->offset = size; 2952 s->offset = size;
2950 size += sizeof(void *); 2953 size += sizeof(void *);
2951 } 2954 }
2952 2955
2953 #ifdef CONFIG_SLUB_DEBUG 2956 #ifdef CONFIG_SLUB_DEBUG
2954 if (flags & SLAB_STORE_USER) 2957 if (flags & SLAB_STORE_USER)
2955 /* 2958 /*
2956 * Need to store information about allocs and frees after 2959 * Need to store information about allocs and frees after
2957 * the object. 2960 * the object.
2958 */ 2961 */
2959 size += 2 * sizeof(struct track); 2962 size += 2 * sizeof(struct track);
2960 2963
2961 if (flags & SLAB_RED_ZONE) 2964 if (flags & SLAB_RED_ZONE)
2962 /* 2965 /*
2963 * Add some empty padding so that we can catch 2966 * Add some empty padding so that we can catch
2964 * overwrites from earlier objects rather than let 2967 * overwrites from earlier objects rather than let
2965 * tracking information or the free pointer be 2968 * tracking information or the free pointer be
2966 * corrupted if a user writes before the start 2969 * corrupted if a user writes before the start
2967 * of the object. 2970 * of the object.
2968 */ 2971 */
2969 size += sizeof(void *); 2972 size += sizeof(void *);
2970 #endif 2973 #endif
2971 2974
2972 /* 2975 /*
2973 * Determine the alignment based on various parameters that the 2976 * Determine the alignment based on various parameters that the
2974 * user specified and the dynamic determination of cache line size 2977 * user specified and the dynamic determination of cache line size
2975 * on bootup. 2978 * on bootup.
2976 */ 2979 */
2977 align = calculate_alignment(flags, align, s->objsize); 2980 align = calculate_alignment(flags, align, s->objsize);
2978 s->align = align; 2981 s->align = align;
2979 2982
2980 /* 2983 /*
2981 * SLUB stores one object immediately after another beginning from 2984 * SLUB stores one object immediately after another beginning from
2982 * offset 0. In order to align the objects we have to simply size 2985 * offset 0. In order to align the objects we have to simply size
2983 * each object to conform to the alignment. 2986 * each object to conform to the alignment.
2984 */ 2987 */
2985 size = ALIGN(size, align); 2988 size = ALIGN(size, align);
2986 s->size = size; 2989 s->size = size;
2987 if (forced_order >= 0) 2990 if (forced_order >= 0)
2988 order = forced_order; 2991 order = forced_order;
2989 else 2992 else
2990 order = calculate_order(size, s->reserved); 2993 order = calculate_order(size, s->reserved);
2991 2994
2992 if (order < 0) 2995 if (order < 0)
2993 return 0; 2996 return 0;
2994 2997
2995 s->allocflags = 0; 2998 s->allocflags = 0;
2996 if (order) 2999 if (order)
2997 s->allocflags |= __GFP_COMP; 3000 s->allocflags |= __GFP_COMP;
2998 3001
2999 if (s->flags & SLAB_CACHE_DMA) 3002 if (s->flags & SLAB_CACHE_DMA)
3000 s->allocflags |= SLUB_DMA; 3003 s->allocflags |= SLUB_DMA;
3001 3004
3002 if (s->flags & SLAB_RECLAIM_ACCOUNT) 3005 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3003 s->allocflags |= __GFP_RECLAIMABLE; 3006 s->allocflags |= __GFP_RECLAIMABLE;
3004 3007
3005 /* 3008 /*
3006 * Determine the number of objects per slab 3009 * Determine the number of objects per slab
3007 */ 3010 */
3008 s->oo = oo_make(order, size, s->reserved); 3011 s->oo = oo_make(order, size, s->reserved);
3009 s->min = oo_make(get_order(size), size, s->reserved); 3012 s->min = oo_make(get_order(size), size, s->reserved);
3010 if (oo_objects(s->oo) > oo_objects(s->max)) 3013 if (oo_objects(s->oo) > oo_objects(s->max))
3011 s->max = s->oo; 3014 s->max = s->oo;
3012 3015
3013 return !!oo_objects(s->oo); 3016 return !!oo_objects(s->oo);
3014 3017
3015 } 3018 }
3016 3019
3017 static int kmem_cache_open(struct kmem_cache *s, 3020 static int kmem_cache_open(struct kmem_cache *s,
3018 const char *name, size_t size, 3021 const char *name, size_t size,
3019 size_t align, unsigned long flags, 3022 size_t align, unsigned long flags,
3020 void (*ctor)(void *)) 3023 void (*ctor)(void *))
3021 { 3024 {
3022 memset(s, 0, kmem_size); 3025 memset(s, 0, kmem_size);
3023 s->name = name; 3026 s->name = name;
3024 s->ctor = ctor; 3027 s->ctor = ctor;
3025 s->objsize = size; 3028 s->objsize = size;
3026 s->align = align; 3029 s->align = align;
3027 s->flags = kmem_cache_flags(size, flags, name, ctor); 3030 s->flags = kmem_cache_flags(size, flags, name, ctor);
3028 s->reserved = 0; 3031 s->reserved = 0;
3029 3032
3030 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) 3033 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3031 s->reserved = sizeof(struct rcu_head); 3034 s->reserved = sizeof(struct rcu_head);
3032 3035
3033 if (!calculate_sizes(s, -1)) 3036 if (!calculate_sizes(s, -1))
3034 goto error; 3037 goto error;
3035 if (disable_higher_order_debug) { 3038 if (disable_higher_order_debug) {
3036 /* 3039 /*
3037 * Disable debugging flags that store metadata if the min slab 3040 * Disable debugging flags that store metadata if the min slab
3038 * order increased. 3041 * order increased.
3039 */ 3042 */
3040 if (get_order(s->size) > get_order(s->objsize)) { 3043 if (get_order(s->size) > get_order(s->objsize)) {
3041 s->flags &= ~DEBUG_METADATA_FLAGS; 3044 s->flags &= ~DEBUG_METADATA_FLAGS;
3042 s->offset = 0; 3045 s->offset = 0;
3043 if (!calculate_sizes(s, -1)) 3046 if (!calculate_sizes(s, -1))
3044 goto error; 3047 goto error;
3045 } 3048 }
3046 } 3049 }
3047 3050
3048 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ 3051 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3049 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 3052 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3050 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) 3053 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3051 /* Enable fast mode */ 3054 /* Enable fast mode */
3052 s->flags |= __CMPXCHG_DOUBLE; 3055 s->flags |= __CMPXCHG_DOUBLE;
3053 #endif 3056 #endif
3054 3057
3055 /* 3058 /*
3056 * The larger the object size is, the more pages we want on the partial 3059 * The larger the object size is, the more pages we want on the partial
3057 * list to avoid pounding the page allocator excessively. 3060 * list to avoid pounding the page allocator excessively.
3058 */ 3061 */
3059 set_min_partial(s, ilog2(s->size) / 2); 3062 set_min_partial(s, ilog2(s->size) / 2);
3060 3063
3061 /* 3064 /*
3062 * cpu_partial determined the maximum number of objects kept in the 3065 * cpu_partial determined the maximum number of objects kept in the
3063 * per cpu partial lists of a processor. 3066 * per cpu partial lists of a processor.
3064 * 3067 *
3065 * Per cpu partial lists mainly contain slabs that just have one 3068 * Per cpu partial lists mainly contain slabs that just have one
3066 * object freed. If they are used for allocation then they can be 3069 * object freed. If they are used for allocation then they can be
3067 * filled up again with minimal effort. The slab will never hit the 3070 * filled up again with minimal effort. The slab will never hit the
3068 * per node partial lists and therefore no locking will be required. 3071 * per node partial lists and therefore no locking will be required.
3069 * 3072 *
3070 * This setting also determines 3073 * This setting also determines
3071 * 3074 *
3072 * A) The number of objects from per cpu partial slabs dumped to the 3075 * A) The number of objects from per cpu partial slabs dumped to the
3073 * per node list when we reach the limit. 3076 * per node list when we reach the limit.
3074 * B) The number of objects in cpu partial slabs to extract from the 3077 * B) The number of objects in cpu partial slabs to extract from the
3075 * per node list when we run out of per cpu objects. We only fetch 50% 3078 * per node list when we run out of per cpu objects. We only fetch 50%
3076 * to keep some capacity around for frees. 3079 * to keep some capacity around for frees.
3077 */ 3080 */
3078 if (kmem_cache_debug(s)) 3081 if (kmem_cache_debug(s))
3079 s->cpu_partial = 0; 3082 s->cpu_partial = 0;
3080 else if (s->size >= PAGE_SIZE) 3083 else if (s->size >= PAGE_SIZE)
3081 s->cpu_partial = 2; 3084 s->cpu_partial = 2;
3082 else if (s->size >= 1024) 3085 else if (s->size >= 1024)
3083 s->cpu_partial = 6; 3086 s->cpu_partial = 6;
3084 else if (s->size >= 256) 3087 else if (s->size >= 256)
3085 s->cpu_partial = 13; 3088 s->cpu_partial = 13;
3086 else 3089 else
3087 s->cpu_partial = 30; 3090 s->cpu_partial = 30;
3088 3091
3089 s->refcount = 1; 3092 s->refcount = 1;
3090 #ifdef CONFIG_NUMA 3093 #ifdef CONFIG_NUMA
3091 s->remote_node_defrag_ratio = 1000; 3094 s->remote_node_defrag_ratio = 1000;
3092 #endif 3095 #endif
3093 if (!init_kmem_cache_nodes(s)) 3096 if (!init_kmem_cache_nodes(s))
3094 goto error; 3097 goto error;
3095 3098
3096 if (alloc_kmem_cache_cpus(s)) 3099 if (alloc_kmem_cache_cpus(s))
3097 return 1; 3100 return 1;
3098 3101
3099 free_kmem_cache_nodes(s); 3102 free_kmem_cache_nodes(s);
3100 error: 3103 error:
3101 if (flags & SLAB_PANIC) 3104 if (flags & SLAB_PANIC)
3102 panic("Cannot create slab %s size=%lu realsize=%u " 3105 panic("Cannot create slab %s size=%lu realsize=%u "
3103 "order=%u offset=%u flags=%lx\n", 3106 "order=%u offset=%u flags=%lx\n",
3104 s->name, (unsigned long)size, s->size, oo_order(s->oo), 3107 s->name, (unsigned long)size, s->size, oo_order(s->oo),
3105 s->offset, flags); 3108 s->offset, flags);
3106 return 0; 3109 return 0;
3107 } 3110 }
3108 3111
3109 /* 3112 /*
3110 * Determine the size of a slab object 3113 * Determine the size of a slab object
3111 */ 3114 */
3112 unsigned int kmem_cache_size(struct kmem_cache *s) 3115 unsigned int kmem_cache_size(struct kmem_cache *s)
3113 { 3116 {
3114 return s->objsize; 3117 return s->objsize;
3115 } 3118 }
3116 EXPORT_SYMBOL(kmem_cache_size); 3119 EXPORT_SYMBOL(kmem_cache_size);
3117 3120
3118 static void list_slab_objects(struct kmem_cache *s, struct page *page, 3121 static void list_slab_objects(struct kmem_cache *s, struct page *page,
3119 const char *text) 3122 const char *text)
3120 { 3123 {
3121 #ifdef CONFIG_SLUB_DEBUG 3124 #ifdef CONFIG_SLUB_DEBUG
3122 void *addr = page_address(page); 3125 void *addr = page_address(page);
3123 void *p; 3126 void *p;
3124 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * 3127 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3125 sizeof(long), GFP_ATOMIC); 3128 sizeof(long), GFP_ATOMIC);
3126 if (!map) 3129 if (!map)
3127 return; 3130 return;
3128 slab_err(s, page, "%s", text); 3131 slab_err(s, page, "%s", text);
3129 slab_lock(page); 3132 slab_lock(page);
3130 3133
3131 get_map(s, page, map); 3134 get_map(s, page, map);
3132 for_each_object(p, s, addr, page->objects) { 3135 for_each_object(p, s, addr, page->objects) {
3133 3136
3134 if (!test_bit(slab_index(p, s, addr), map)) { 3137 if (!test_bit(slab_index(p, s, addr), map)) {
3135 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n", 3138 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3136 p, p - addr); 3139 p, p - addr);
3137 print_tracking(s, p); 3140 print_tracking(s, p);
3138 } 3141 }
3139 } 3142 }
3140 slab_unlock(page); 3143 slab_unlock(page);
3141 kfree(map); 3144 kfree(map);
3142 #endif 3145 #endif
3143 } 3146 }
3144 3147
3145 /* 3148 /*
3146 * Attempt to free all partial slabs on a node. 3149 * Attempt to free all partial slabs on a node.
3147 * This is called from kmem_cache_close(). We must be the last thread 3150 * This is called from kmem_cache_close(). We must be the last thread
3148 * using the cache and therefore we do not need to lock anymore. 3151 * using the cache and therefore we do not need to lock anymore.
3149 */ 3152 */
3150 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) 3153 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3151 { 3154 {
3152 struct page *page, *h; 3155 struct page *page, *h;
3153 3156
3154 list_for_each_entry_safe(page, h, &n->partial, lru) { 3157 list_for_each_entry_safe(page, h, &n->partial, lru) {
3155 if (!page->inuse) { 3158 if (!page->inuse) {
3156 remove_partial(n, page); 3159 remove_partial(n, page);
3157 discard_slab(s, page); 3160 discard_slab(s, page);
3158 } else { 3161 } else {
3159 list_slab_objects(s, page, 3162 list_slab_objects(s, page,
3160 "Objects remaining on kmem_cache_close()"); 3163 "Objects remaining on kmem_cache_close()");
3161 } 3164 }
3162 } 3165 }
3163 } 3166 }
3164 3167
3165 /* 3168 /*
3166 * Release all resources used by a slab cache. 3169 * Release all resources used by a slab cache.
3167 */ 3170 */
3168 static inline int kmem_cache_close(struct kmem_cache *s) 3171 static inline int kmem_cache_close(struct kmem_cache *s)
3169 { 3172 {
3170 int node; 3173 int node;
3171 3174
3172 flush_all(s); 3175 flush_all(s);
3173 free_percpu(s->cpu_slab); 3176 free_percpu(s->cpu_slab);
3174 /* Attempt to free all objects */ 3177 /* Attempt to free all objects */
3175 for_each_node_state(node, N_NORMAL_MEMORY) { 3178 for_each_node_state(node, N_NORMAL_MEMORY) {
3176 struct kmem_cache_node *n = get_node(s, node); 3179 struct kmem_cache_node *n = get_node(s, node);
3177 3180
3178 free_partial(s, n); 3181 free_partial(s, n);
3179 if (n->nr_partial || slabs_node(s, node)) 3182 if (n->nr_partial || slabs_node(s, node))
3180 return 1; 3183 return 1;
3181 } 3184 }
3182 free_kmem_cache_nodes(s); 3185 free_kmem_cache_nodes(s);
3183 return 0; 3186 return 0;
3184 } 3187 }
3185 3188
3186 /* 3189 /*
3187 * Close a cache and release the kmem_cache structure 3190 * Close a cache and release the kmem_cache structure
3188 * (must be used for caches created using kmem_cache_create) 3191 * (must be used for caches created using kmem_cache_create)
3189 */ 3192 */
3190 void kmem_cache_destroy(struct kmem_cache *s) 3193 void kmem_cache_destroy(struct kmem_cache *s)
3191 { 3194 {
3192 down_write(&slub_lock); 3195 down_write(&slub_lock);
3193 s->refcount--; 3196 s->refcount--;
3194 if (!s->refcount) { 3197 if (!s->refcount) {
3195 list_del(&s->list); 3198 list_del(&s->list);
3196 up_write(&slub_lock); 3199 up_write(&slub_lock);
3197 if (kmem_cache_close(s)) { 3200 if (kmem_cache_close(s)) {
3198 printk(KERN_ERR "SLUB %s: %s called for cache that " 3201 printk(KERN_ERR "SLUB %s: %s called for cache that "
3199 "still has objects.\n", s->name, __func__); 3202 "still has objects.\n", s->name, __func__);
3200 dump_stack(); 3203 dump_stack();
3201 } 3204 }
3202 if (s->flags & SLAB_DESTROY_BY_RCU) 3205 if (s->flags & SLAB_DESTROY_BY_RCU)
3203 rcu_barrier(); 3206 rcu_barrier();
3204 sysfs_slab_remove(s); 3207 sysfs_slab_remove(s);
3205 } else 3208 } else
3206 up_write(&slub_lock); 3209 up_write(&slub_lock);
3207 } 3210 }
3208 EXPORT_SYMBOL(kmem_cache_destroy); 3211 EXPORT_SYMBOL(kmem_cache_destroy);
3209 3212
3210 /******************************************************************** 3213 /********************************************************************
3211 * Kmalloc subsystem 3214 * Kmalloc subsystem
3212 *******************************************************************/ 3215 *******************************************************************/
3213 3216
3214 struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; 3217 struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3215 EXPORT_SYMBOL(kmalloc_caches); 3218 EXPORT_SYMBOL(kmalloc_caches);
3216 3219
3217 static struct kmem_cache *kmem_cache; 3220 static struct kmem_cache *kmem_cache;
3218 3221
3219 #ifdef CONFIG_ZONE_DMA 3222 #ifdef CONFIG_ZONE_DMA
3220 static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; 3223 static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3221 #endif 3224 #endif
3222 3225
3223 static int __init setup_slub_min_order(char *str) 3226 static int __init setup_slub_min_order(char *str)
3224 { 3227 {
3225 get_option(&str, &slub_min_order); 3228 get_option(&str, &slub_min_order);
3226 3229
3227 return 1; 3230 return 1;
3228 } 3231 }
3229 3232
3230 __setup("slub_min_order=", setup_slub_min_order); 3233 __setup("slub_min_order=", setup_slub_min_order);
3231 3234
3232 static int __init setup_slub_max_order(char *str) 3235 static int __init setup_slub_max_order(char *str)
3233 { 3236 {
3234 get_option(&str, &slub_max_order); 3237 get_option(&str, &slub_max_order);
3235 slub_max_order = min(slub_max_order, MAX_ORDER - 1); 3238 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3236 3239
3237 return 1; 3240 return 1;
3238 } 3241 }
3239 3242
3240 __setup("slub_max_order=", setup_slub_max_order); 3243 __setup("slub_max_order=", setup_slub_max_order);
3241 3244
3242 static int __init setup_slub_min_objects(char *str) 3245 static int __init setup_slub_min_objects(char *str)
3243 { 3246 {
3244 get_option(&str, &slub_min_objects); 3247 get_option(&str, &slub_min_objects);
3245 3248
3246 return 1; 3249 return 1;
3247 } 3250 }
3248 3251
3249 __setup("slub_min_objects=", setup_slub_min_objects); 3252 __setup("slub_min_objects=", setup_slub_min_objects);
3250 3253
3251 static int __init setup_slub_nomerge(char *str) 3254 static int __init setup_slub_nomerge(char *str)
3252 { 3255 {
3253 slub_nomerge = 1; 3256 slub_nomerge = 1;
3254 return 1; 3257 return 1;
3255 } 3258 }
3256 3259
3257 __setup("slub_nomerge", setup_slub_nomerge); 3260 __setup("slub_nomerge", setup_slub_nomerge);
3258 3261
3259 static struct kmem_cache *__init create_kmalloc_cache(const char *name, 3262 static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3260 int size, unsigned int flags) 3263 int size, unsigned int flags)
3261 { 3264 {
3262 struct kmem_cache *s; 3265 struct kmem_cache *s;
3263 3266
3264 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); 3267 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3265 3268
3266 /* 3269 /*
3267 * This function is called with IRQs disabled during early-boot on 3270 * This function is called with IRQs disabled during early-boot on
3268 * single CPU so there's no need to take slub_lock here. 3271 * single CPU so there's no need to take slub_lock here.
3269 */ 3272 */
3270 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, 3273 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3271 flags, NULL)) 3274 flags, NULL))
3272 goto panic; 3275 goto panic;
3273 3276
3274 list_add(&s->list, &slab_caches); 3277 list_add(&s->list, &slab_caches);
3275 return s; 3278 return s;
3276 3279
3277 panic: 3280 panic:
3278 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); 3281 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
3279 return NULL; 3282 return NULL;
3280 } 3283 }
3281 3284
3282 /* 3285 /*
3283 * Conversion table for small slabs sizes / 8 to the index in the 3286 * Conversion table for small slabs sizes / 8 to the index in the
3284 * kmalloc array. This is necessary for slabs < 192 since we have non power 3287 * kmalloc array. This is necessary for slabs < 192 since we have non power
3285 * of two cache sizes there. The size of larger slabs can be determined using 3288 * of two cache sizes there. The size of larger slabs can be determined using
3286 * fls. 3289 * fls.
3287 */ 3290 */
3288 static s8 size_index[24] = { 3291 static s8 size_index[24] = {
3289 3, /* 8 */ 3292 3, /* 8 */
3290 4, /* 16 */ 3293 4, /* 16 */
3291 5, /* 24 */ 3294 5, /* 24 */
3292 5, /* 32 */ 3295 5, /* 32 */
3293 6, /* 40 */ 3296 6, /* 40 */
3294 6, /* 48 */ 3297 6, /* 48 */
3295 6, /* 56 */ 3298 6, /* 56 */
3296 6, /* 64 */ 3299 6, /* 64 */
3297 1, /* 72 */ 3300 1, /* 72 */
3298 1, /* 80 */ 3301 1, /* 80 */
3299 1, /* 88 */ 3302 1, /* 88 */
3300 1, /* 96 */ 3303 1, /* 96 */
3301 7, /* 104 */ 3304 7, /* 104 */
3302 7, /* 112 */ 3305 7, /* 112 */
3303 7, /* 120 */ 3306 7, /* 120 */
3304 7, /* 128 */ 3307 7, /* 128 */
3305 2, /* 136 */ 3308 2, /* 136 */
3306 2, /* 144 */ 3309 2, /* 144 */
3307 2, /* 152 */ 3310 2, /* 152 */
3308 2, /* 160 */ 3311 2, /* 160 */
3309 2, /* 168 */ 3312 2, /* 168 */
3310 2, /* 176 */ 3313 2, /* 176 */
3311 2, /* 184 */ 3314 2, /* 184 */
3312 2 /* 192 */ 3315 2 /* 192 */
3313 }; 3316 };
3314 3317
3315 static inline int size_index_elem(size_t bytes) 3318 static inline int size_index_elem(size_t bytes)
3316 { 3319 {
3317 return (bytes - 1) / 8; 3320 return (bytes - 1) / 8;
3318 } 3321 }
3319 3322
3320 static struct kmem_cache *get_slab(size_t size, gfp_t flags) 3323 static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3321 { 3324 {
3322 int index; 3325 int index;
3323 3326
3324 if (size <= 192) { 3327 if (size <= 192) {
3325 if (!size) 3328 if (!size)
3326 return ZERO_SIZE_PTR; 3329 return ZERO_SIZE_PTR;
3327 3330
3328 index = size_index[size_index_elem(size)]; 3331 index = size_index[size_index_elem(size)];
3329 } else 3332 } else
3330 index = fls(size - 1); 3333 index = fls(size - 1);
3331 3334
3332 #ifdef CONFIG_ZONE_DMA 3335 #ifdef CONFIG_ZONE_DMA
3333 if (unlikely((flags & SLUB_DMA))) 3336 if (unlikely((flags & SLUB_DMA)))
3334 return kmalloc_dma_caches[index]; 3337 return kmalloc_dma_caches[index];
3335 3338
3336 #endif 3339 #endif
3337 return kmalloc_caches[index]; 3340 return kmalloc_caches[index];
3338 } 3341 }
3339 3342
3340 void *__kmalloc(size_t size, gfp_t flags) 3343 void *__kmalloc(size_t size, gfp_t flags)
3341 { 3344 {
3342 struct kmem_cache *s; 3345 struct kmem_cache *s;
3343 void *ret; 3346 void *ret;
3344 3347
3345 if (unlikely(size > SLUB_MAX_SIZE)) 3348 if (unlikely(size > SLUB_MAX_SIZE))
3346 return kmalloc_large(size, flags); 3349 return kmalloc_large(size, flags);
3347 3350
3348 s = get_slab(size, flags); 3351 s = get_slab(size, flags);
3349 3352
3350 if (unlikely(ZERO_OR_NULL_PTR(s))) 3353 if (unlikely(ZERO_OR_NULL_PTR(s)))
3351 return s; 3354 return s;
3352 3355
3353 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_); 3356 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
3354 3357
3355 trace_kmalloc(_RET_IP_, ret, size, s->size, flags); 3358 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3356 3359
3357 return ret; 3360 return ret;
3358 } 3361 }
3359 EXPORT_SYMBOL(__kmalloc); 3362 EXPORT_SYMBOL(__kmalloc);
3360 3363
3361 #ifdef CONFIG_NUMA 3364 #ifdef CONFIG_NUMA
3362 static void *kmalloc_large_node(size_t size, gfp_t flags, int node) 3365 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3363 { 3366 {
3364 struct page *page; 3367 struct page *page;
3365 void *ptr = NULL; 3368 void *ptr = NULL;
3366 3369
3367 flags |= __GFP_COMP | __GFP_NOTRACK; 3370 flags |= __GFP_COMP | __GFP_NOTRACK;
3368 page = alloc_pages_node(node, flags, get_order(size)); 3371 page = alloc_pages_node(node, flags, get_order(size));
3369 if (page) 3372 if (page)
3370 ptr = page_address(page); 3373 ptr = page_address(page);
3371 3374
3372 kmemleak_alloc(ptr, size, 1, flags); 3375 kmemleak_alloc(ptr, size, 1, flags);
3373 return ptr; 3376 return ptr;
3374 } 3377 }
3375 3378
3376 void *__kmalloc_node(size_t size, gfp_t flags, int node) 3379 void *__kmalloc_node(size_t size, gfp_t flags, int node)
3377 { 3380 {
3378 struct kmem_cache *s; 3381 struct kmem_cache *s;
3379 void *ret; 3382 void *ret;
3380 3383
3381 if (unlikely(size > SLUB_MAX_SIZE)) { 3384 if (unlikely(size > SLUB_MAX_SIZE)) {
3382 ret = kmalloc_large_node(size, flags, node); 3385 ret = kmalloc_large_node(size, flags, node);
3383 3386
3384 trace_kmalloc_node(_RET_IP_, ret, 3387 trace_kmalloc_node(_RET_IP_, ret,
3385 size, PAGE_SIZE << get_order(size), 3388 size, PAGE_SIZE << get_order(size),
3386 flags, node); 3389 flags, node);
3387 3390
3388 return ret; 3391 return ret;
3389 } 3392 }
3390 3393
3391 s = get_slab(size, flags); 3394 s = get_slab(size, flags);
3392 3395
3393 if (unlikely(ZERO_OR_NULL_PTR(s))) 3396 if (unlikely(ZERO_OR_NULL_PTR(s)))
3394 return s; 3397 return s;
3395 3398
3396 ret = slab_alloc(s, flags, node, _RET_IP_); 3399 ret = slab_alloc(s, flags, node, _RET_IP_);
3397 3400
3398 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); 3401 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3399 3402
3400 return ret; 3403 return ret;
3401 } 3404 }
3402 EXPORT_SYMBOL(__kmalloc_node); 3405 EXPORT_SYMBOL(__kmalloc_node);
3403 #endif 3406 #endif
3404 3407
3405 size_t ksize(const void *object) 3408 size_t ksize(const void *object)
3406 { 3409 {
3407 struct page *page; 3410 struct page *page;
3408 3411
3409 if (unlikely(object == ZERO_SIZE_PTR)) 3412 if (unlikely(object == ZERO_SIZE_PTR))
3410 return 0; 3413 return 0;
3411 3414
3412 page = virt_to_head_page(object); 3415 page = virt_to_head_page(object);
3413 3416
3414 if (unlikely(!PageSlab(page))) { 3417 if (unlikely(!PageSlab(page))) {
3415 WARN_ON(!PageCompound(page)); 3418 WARN_ON(!PageCompound(page));
3416 return PAGE_SIZE << compound_order(page); 3419 return PAGE_SIZE << compound_order(page);
3417 } 3420 }
3418 3421
3419 return slab_ksize(page->slab); 3422 return slab_ksize(page->slab);
3420 } 3423 }
3421 EXPORT_SYMBOL(ksize); 3424 EXPORT_SYMBOL(ksize);
3422 3425
3423 #ifdef CONFIG_SLUB_DEBUG 3426 #ifdef CONFIG_SLUB_DEBUG
3424 bool verify_mem_not_deleted(const void *x) 3427 bool verify_mem_not_deleted(const void *x)
3425 { 3428 {
3426 struct page *page; 3429 struct page *page;
3427 void *object = (void *)x; 3430 void *object = (void *)x;
3428 unsigned long flags; 3431 unsigned long flags;
3429 bool rv; 3432 bool rv;
3430 3433
3431 if (unlikely(ZERO_OR_NULL_PTR(x))) 3434 if (unlikely(ZERO_OR_NULL_PTR(x)))
3432 return false; 3435 return false;
3433 3436
3434 local_irq_save(flags); 3437 local_irq_save(flags);
3435 3438
3436 page = virt_to_head_page(x); 3439 page = virt_to_head_page(x);
3437 if (unlikely(!PageSlab(page))) { 3440 if (unlikely(!PageSlab(page))) {
3438 /* maybe it was from stack? */ 3441 /* maybe it was from stack? */
3439 rv = true; 3442 rv = true;
3440 goto out_unlock; 3443 goto out_unlock;
3441 } 3444 }
3442 3445
3443 slab_lock(page); 3446 slab_lock(page);
3444 if (on_freelist(page->slab, page, object)) { 3447 if (on_freelist(page->slab, page, object)) {
3445 object_err(page->slab, page, object, "Object is on free-list"); 3448 object_err(page->slab, page, object, "Object is on free-list");
3446 rv = false; 3449 rv = false;
3447 } else { 3450 } else {
3448 rv = true; 3451 rv = true;
3449 } 3452 }
3450 slab_unlock(page); 3453 slab_unlock(page);
3451 3454
3452 out_unlock: 3455 out_unlock:
3453 local_irq_restore(flags); 3456 local_irq_restore(flags);
3454 return rv; 3457 return rv;
3455 } 3458 }
3456 EXPORT_SYMBOL(verify_mem_not_deleted); 3459 EXPORT_SYMBOL(verify_mem_not_deleted);
3457 #endif 3460 #endif
3458 3461
3459 void kfree(const void *x) 3462 void kfree(const void *x)
3460 { 3463 {
3461 struct page *page; 3464 struct page *page;
3462 void *object = (void *)x; 3465 void *object = (void *)x;
3463 3466
3464 trace_kfree(_RET_IP_, x); 3467 trace_kfree(_RET_IP_, x);
3465 3468
3466 if (unlikely(ZERO_OR_NULL_PTR(x))) 3469 if (unlikely(ZERO_OR_NULL_PTR(x)))
3467 return; 3470 return;
3468 3471
3469 page = virt_to_head_page(x); 3472 page = virt_to_head_page(x);
3470 if (unlikely(!PageSlab(page))) { 3473 if (unlikely(!PageSlab(page))) {
3471 BUG_ON(!PageCompound(page)); 3474 BUG_ON(!PageCompound(page));
3472 kmemleak_free(x); 3475 kmemleak_free(x);
3473 put_page(page); 3476 put_page(page);
3474 return; 3477 return;
3475 } 3478 }
3476 slab_free(page->slab, page, object, _RET_IP_); 3479 slab_free(page->slab, page, object, _RET_IP_);
3477 } 3480 }
3478 EXPORT_SYMBOL(kfree); 3481 EXPORT_SYMBOL(kfree);
3479 3482
3480 /* 3483 /*
3481 * kmem_cache_shrink removes empty slabs from the partial lists and sorts 3484 * kmem_cache_shrink removes empty slabs from the partial lists and sorts
3482 * the remaining slabs by the number of items in use. The slabs with the 3485 * the remaining slabs by the number of items in use. The slabs with the
3483 * most items in use come first. New allocations will then fill those up 3486 * most items in use come first. New allocations will then fill those up
3484 * and thus they can be removed from the partial lists. 3487 * and thus they can be removed from the partial lists.
3485 * 3488 *
3486 * The slabs with the least items are placed last. This results in them 3489 * The slabs with the least items are placed last. This results in them
3487 * being allocated from last increasing the chance that the last objects 3490 * being allocated from last increasing the chance that the last objects
3488 * are freed in them. 3491 * are freed in them.
3489 */ 3492 */
3490 int kmem_cache_shrink(struct kmem_cache *s) 3493 int kmem_cache_shrink(struct kmem_cache *s)
3491 { 3494 {
3492 int node; 3495 int node;
3493 int i; 3496 int i;
3494 struct kmem_cache_node *n; 3497 struct kmem_cache_node *n;
3495 struct page *page; 3498 struct page *page;
3496 struct page *t; 3499 struct page *t;
3497 int objects = oo_objects(s->max); 3500 int objects = oo_objects(s->max);
3498 struct list_head *slabs_by_inuse = 3501 struct list_head *slabs_by_inuse =
3499 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); 3502 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3500 unsigned long flags; 3503 unsigned long flags;
3501 3504
3502 if (!slabs_by_inuse) 3505 if (!slabs_by_inuse)
3503 return -ENOMEM; 3506 return -ENOMEM;
3504 3507
3505 flush_all(s); 3508 flush_all(s);
3506 for_each_node_state(node, N_NORMAL_MEMORY) { 3509 for_each_node_state(node, N_NORMAL_MEMORY) {
3507 n = get_node(s, node); 3510 n = get_node(s, node);
3508 3511
3509 if (!n->nr_partial) 3512 if (!n->nr_partial)
3510 continue; 3513 continue;
3511 3514
3512 for (i = 0; i < objects; i++) 3515 for (i = 0; i < objects; i++)
3513 INIT_LIST_HEAD(slabs_by_inuse + i); 3516 INIT_LIST_HEAD(slabs_by_inuse + i);
3514 3517
3515 spin_lock_irqsave(&n->list_lock, flags); 3518 spin_lock_irqsave(&n->list_lock, flags);
3516 3519
3517 /* 3520 /*
3518 * Build lists indexed by the items in use in each slab. 3521 * Build lists indexed by the items in use in each slab.
3519 * 3522 *
3520 * Note that concurrent frees may occur while we hold the 3523 * Note that concurrent frees may occur while we hold the
3521 * list_lock. page->inuse here is the upper limit. 3524 * list_lock. page->inuse here is the upper limit.
3522 */ 3525 */
3523 list_for_each_entry_safe(page, t, &n->partial, lru) { 3526 list_for_each_entry_safe(page, t, &n->partial, lru) {
3524 list_move(&page->lru, slabs_by_inuse + page->inuse); 3527 list_move(&page->lru, slabs_by_inuse + page->inuse);
3525 if (!page->inuse) 3528 if (!page->inuse)
3526 n->nr_partial--; 3529 n->nr_partial--;
3527 } 3530 }
3528 3531
3529 /* 3532 /*
3530 * Rebuild the partial list with the slabs filled up most 3533 * Rebuild the partial list with the slabs filled up most
3531 * first and the least used slabs at the end. 3534 * first and the least used slabs at the end.
3532 */ 3535 */
3533 for (i = objects - 1; i > 0; i--) 3536 for (i = objects - 1; i > 0; i--)
3534 list_splice(slabs_by_inuse + i, n->partial.prev); 3537 list_splice(slabs_by_inuse + i, n->partial.prev);
3535 3538
3536 spin_unlock_irqrestore(&n->list_lock, flags); 3539 spin_unlock_irqrestore(&n->list_lock, flags);
3537 3540
3538 /* Release empty slabs */ 3541 /* Release empty slabs */
3539 list_for_each_entry_safe(page, t, slabs_by_inuse, lru) 3542 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3540 discard_slab(s, page); 3543 discard_slab(s, page);
3541 } 3544 }
3542 3545
3543 kfree(slabs_by_inuse); 3546 kfree(slabs_by_inuse);
3544 return 0; 3547 return 0;
3545 } 3548 }
3546 EXPORT_SYMBOL(kmem_cache_shrink); 3549 EXPORT_SYMBOL(kmem_cache_shrink);
3547 3550
3548 #if defined(CONFIG_MEMORY_HOTPLUG) 3551 #if defined(CONFIG_MEMORY_HOTPLUG)
3549 static int slab_mem_going_offline_callback(void *arg) 3552 static int slab_mem_going_offline_callback(void *arg)
3550 { 3553 {
3551 struct kmem_cache *s; 3554 struct kmem_cache *s;
3552 3555
3553 down_read(&slub_lock); 3556 down_read(&slub_lock);
3554 list_for_each_entry(s, &slab_caches, list) 3557 list_for_each_entry(s, &slab_caches, list)
3555 kmem_cache_shrink(s); 3558 kmem_cache_shrink(s);
3556 up_read(&slub_lock); 3559 up_read(&slub_lock);
3557 3560
3558 return 0; 3561 return 0;
3559 } 3562 }
3560 3563
3561 static void slab_mem_offline_callback(void *arg) 3564 static void slab_mem_offline_callback(void *arg)
3562 { 3565 {
3563 struct kmem_cache_node *n; 3566 struct kmem_cache_node *n;
3564 struct kmem_cache *s; 3567 struct kmem_cache *s;
3565 struct memory_notify *marg = arg; 3568 struct memory_notify *marg = arg;
3566 int offline_node; 3569 int offline_node;
3567 3570
3568 offline_node = marg->status_change_nid; 3571 offline_node = marg->status_change_nid;
3569 3572
3570 /* 3573 /*
3571 * If the node still has available memory. we need kmem_cache_node 3574 * If the node still has available memory. we need kmem_cache_node
3572 * for it yet. 3575 * for it yet.
3573 */ 3576 */
3574 if (offline_node < 0) 3577 if (offline_node < 0)
3575 return; 3578 return;
3576 3579
3577 down_read(&slub_lock); 3580 down_read(&slub_lock);
3578 list_for_each_entry(s, &slab_caches, list) { 3581 list_for_each_entry(s, &slab_caches, list) {
3579 n = get_node(s, offline_node); 3582 n = get_node(s, offline_node);
3580 if (n) { 3583 if (n) {
3581 /* 3584 /*
3582 * if n->nr_slabs > 0, slabs still exist on the node 3585 * if n->nr_slabs > 0, slabs still exist on the node
3583 * that is going down. We were unable to free them, 3586 * that is going down. We were unable to free them,
3584 * and offline_pages() function shouldn't call this 3587 * and offline_pages() function shouldn't call this
3585 * callback. So, we must fail. 3588 * callback. So, we must fail.
3586 */ 3589 */
3587 BUG_ON(slabs_node(s, offline_node)); 3590 BUG_ON(slabs_node(s, offline_node));
3588 3591
3589 s->node[offline_node] = NULL; 3592 s->node[offline_node] = NULL;
3590 kmem_cache_free(kmem_cache_node, n); 3593 kmem_cache_free(kmem_cache_node, n);
3591 } 3594 }
3592 } 3595 }
3593 up_read(&slub_lock); 3596 up_read(&slub_lock);
3594 } 3597 }
3595 3598
3596 static int slab_mem_going_online_callback(void *arg) 3599 static int slab_mem_going_online_callback(void *arg)
3597 { 3600 {
3598 struct kmem_cache_node *n; 3601 struct kmem_cache_node *n;
3599 struct kmem_cache *s; 3602 struct kmem_cache *s;
3600 struct memory_notify *marg = arg; 3603 struct memory_notify *marg = arg;
3601 int nid = marg->status_change_nid; 3604 int nid = marg->status_change_nid;
3602 int ret = 0; 3605 int ret = 0;
3603 3606
3604 /* 3607 /*
3605 * If the node's memory is already available, then kmem_cache_node is 3608 * If the node's memory is already available, then kmem_cache_node is
3606 * already created. Nothing to do. 3609 * already created. Nothing to do.
3607 */ 3610 */
3608 if (nid < 0) 3611 if (nid < 0)
3609 return 0; 3612 return 0;
3610 3613
3611 /* 3614 /*
3612 * We are bringing a node online. No memory is available yet. We must 3615 * We are bringing a node online. No memory is available yet. We must
3613 * allocate a kmem_cache_node structure in order to bring the node 3616 * allocate a kmem_cache_node structure in order to bring the node
3614 * online. 3617 * online.
3615 */ 3618 */
3616 down_read(&slub_lock); 3619 down_read(&slub_lock);
3617 list_for_each_entry(s, &slab_caches, list) { 3620 list_for_each_entry(s, &slab_caches, list) {
3618 /* 3621 /*
3619 * XXX: kmem_cache_alloc_node will fallback to other nodes 3622 * XXX: kmem_cache_alloc_node will fallback to other nodes
3620 * since memory is not yet available from the node that 3623 * since memory is not yet available from the node that
3621 * is brought up. 3624 * is brought up.
3622 */ 3625 */
3623 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL); 3626 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3624 if (!n) { 3627 if (!n) {
3625 ret = -ENOMEM; 3628 ret = -ENOMEM;
3626 goto out; 3629 goto out;
3627 } 3630 }
3628 init_kmem_cache_node(n, s); 3631 init_kmem_cache_node(n);
3629 s->node[nid] = n; 3632 s->node[nid] = n;
3630 } 3633 }
3631 out: 3634 out:
3632 up_read(&slub_lock); 3635 up_read(&slub_lock);
3633 return ret; 3636 return ret;
3634 } 3637 }
3635 3638
3636 static int slab_memory_callback(struct notifier_block *self, 3639 static int slab_memory_callback(struct notifier_block *self,
3637 unsigned long action, void *arg) 3640 unsigned long action, void *arg)
3638 { 3641 {
3639 int ret = 0; 3642 int ret = 0;
3640 3643
3641 switch (action) { 3644 switch (action) {
3642 case MEM_GOING_ONLINE: 3645 case MEM_GOING_ONLINE:
3643 ret = slab_mem_going_online_callback(arg); 3646 ret = slab_mem_going_online_callback(arg);
3644 break; 3647 break;
3645 case MEM_GOING_OFFLINE: 3648 case MEM_GOING_OFFLINE:
3646 ret = slab_mem_going_offline_callback(arg); 3649 ret = slab_mem_going_offline_callback(arg);
3647 break; 3650 break;
3648 case MEM_OFFLINE: 3651 case MEM_OFFLINE:
3649 case MEM_CANCEL_ONLINE: 3652 case MEM_CANCEL_ONLINE:
3650 slab_mem_offline_callback(arg); 3653 slab_mem_offline_callback(arg);
3651 break; 3654 break;
3652 case MEM_ONLINE: 3655 case MEM_ONLINE:
3653 case MEM_CANCEL_OFFLINE: 3656 case MEM_CANCEL_OFFLINE:
3654 break; 3657 break;
3655 } 3658 }
3656 if (ret) 3659 if (ret)
3657 ret = notifier_from_errno(ret); 3660 ret = notifier_from_errno(ret);
3658 else 3661 else
3659 ret = NOTIFY_OK; 3662 ret = NOTIFY_OK;
3660 return ret; 3663 return ret;
3661 } 3664 }
3662 3665
3663 #endif /* CONFIG_MEMORY_HOTPLUG */ 3666 #endif /* CONFIG_MEMORY_HOTPLUG */
3664 3667
3665 /******************************************************************** 3668 /********************************************************************
3666 * Basic setup of slabs 3669 * Basic setup of slabs
3667 *******************************************************************/ 3670 *******************************************************************/
3668 3671
3669 /* 3672 /*
3670 * Used for early kmem_cache structures that were allocated using 3673 * Used for early kmem_cache structures that were allocated using
3671 * the page allocator 3674 * the page allocator
3672 */ 3675 */
3673 3676
3674 static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) 3677 static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3675 { 3678 {
3676 int node; 3679 int node;
3677 3680
3678 list_add(&s->list, &slab_caches); 3681 list_add(&s->list, &slab_caches);
3679 s->refcount = -1; 3682 s->refcount = -1;
3680 3683
3681 for_each_node_state(node, N_NORMAL_MEMORY) { 3684 for_each_node_state(node, N_NORMAL_MEMORY) {
3682 struct kmem_cache_node *n = get_node(s, node); 3685 struct kmem_cache_node *n = get_node(s, node);
3683 struct page *p; 3686 struct page *p;
3684 3687
3685 if (n) { 3688 if (n) {
3686 list_for_each_entry(p, &n->partial, lru) 3689 list_for_each_entry(p, &n->partial, lru)
3687 p->slab = s; 3690 p->slab = s;
3688 3691
3689 #ifdef CONFIG_SLUB_DEBUG 3692 #ifdef CONFIG_SLUB_DEBUG
3690 list_for_each_entry(p, &n->full, lru) 3693 list_for_each_entry(p, &n->full, lru)
3691 p->slab = s; 3694 p->slab = s;
3692 #endif 3695 #endif
3693 } 3696 }
3694 } 3697 }
3695 } 3698 }
3696 3699
3697 void __init kmem_cache_init(void) 3700 void __init kmem_cache_init(void)
3698 { 3701 {
3699 int i; 3702 int i;
3700 int caches = 0; 3703 int caches = 0;
3701 struct kmem_cache *temp_kmem_cache; 3704 struct kmem_cache *temp_kmem_cache;
3702 int order; 3705 int order;
3703 struct kmem_cache *temp_kmem_cache_node; 3706 struct kmem_cache *temp_kmem_cache_node;
3704 unsigned long kmalloc_size; 3707 unsigned long kmalloc_size;
3705 3708
3706 if (debug_guardpage_minorder()) 3709 if (debug_guardpage_minorder())
3707 slub_max_order = 0; 3710 slub_max_order = 0;
3708 3711
3709 kmem_size = offsetof(struct kmem_cache, node) + 3712 kmem_size = offsetof(struct kmem_cache, node) +
3710 nr_node_ids * sizeof(struct kmem_cache_node *); 3713 nr_node_ids * sizeof(struct kmem_cache_node *);
3711 3714
3712 /* Allocate two kmem_caches from the page allocator */ 3715 /* Allocate two kmem_caches from the page allocator */
3713 kmalloc_size = ALIGN(kmem_size, cache_line_size()); 3716 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3714 order = get_order(2 * kmalloc_size); 3717 order = get_order(2 * kmalloc_size);
3715 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); 3718 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
3716 3719
3717 /* 3720 /*
3718 * Must first have the slab cache available for the allocations of the 3721 * Must first have the slab cache available for the allocations of the
3719 * struct kmem_cache_node's. There is special bootstrap code in 3722 * struct kmem_cache_node's. There is special bootstrap code in
3720 * kmem_cache_open for slab_state == DOWN. 3723 * kmem_cache_open for slab_state == DOWN.
3721 */ 3724 */
3722 kmem_cache_node = (void *)kmem_cache + kmalloc_size; 3725 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3723 3726
3724 kmem_cache_open(kmem_cache_node, "kmem_cache_node", 3727 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
3725 sizeof(struct kmem_cache_node), 3728 sizeof(struct kmem_cache_node),
3726 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 3729 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3727 3730
3728 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); 3731 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3729 3732
3730 /* Able to allocate the per node structures */ 3733 /* Able to allocate the per node structures */
3731 slab_state = PARTIAL; 3734 slab_state = PARTIAL;
3732 3735
3733 temp_kmem_cache = kmem_cache; 3736 temp_kmem_cache = kmem_cache;
3734 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, 3737 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
3735 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 3738 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3736 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); 3739 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3737 memcpy(kmem_cache, temp_kmem_cache, kmem_size); 3740 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3738 3741
3739 /* 3742 /*
3740 * Allocate kmem_cache_node properly from the kmem_cache slab. 3743 * Allocate kmem_cache_node properly from the kmem_cache slab.
3741 * kmem_cache_node is separately allocated so no need to 3744 * kmem_cache_node is separately allocated so no need to
3742 * update any list pointers. 3745 * update any list pointers.
3743 */ 3746 */
3744 temp_kmem_cache_node = kmem_cache_node; 3747 temp_kmem_cache_node = kmem_cache_node;
3745 3748
3746 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); 3749 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3747 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); 3750 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3748 3751
3749 kmem_cache_bootstrap_fixup(kmem_cache_node); 3752 kmem_cache_bootstrap_fixup(kmem_cache_node);
3750 3753
3751 caches++; 3754 caches++;
3752 kmem_cache_bootstrap_fixup(kmem_cache); 3755 kmem_cache_bootstrap_fixup(kmem_cache);
3753 caches++; 3756 caches++;
3754 /* Free temporary boot structure */ 3757 /* Free temporary boot structure */
3755 free_pages((unsigned long)temp_kmem_cache, order); 3758 free_pages((unsigned long)temp_kmem_cache, order);
3756 3759
3757 /* Now we can use the kmem_cache to allocate kmalloc slabs */ 3760 /* Now we can use the kmem_cache to allocate kmalloc slabs */
3758 3761
3759 /* 3762 /*
3760 * Patch up the size_index table if we have strange large alignment 3763 * Patch up the size_index table if we have strange large alignment
3761 * requirements for the kmalloc array. This is only the case for 3764 * requirements for the kmalloc array. This is only the case for
3762 * MIPS it seems. The standard arches will not generate any code here. 3765 * MIPS it seems. The standard arches will not generate any code here.
3763 * 3766 *
3764 * Largest permitted alignment is 256 bytes due to the way we 3767 * Largest permitted alignment is 256 bytes due to the way we
3765 * handle the index determination for the smaller caches. 3768 * handle the index determination for the smaller caches.
3766 * 3769 *
3767 * Make sure that nothing crazy happens if someone starts tinkering 3770 * Make sure that nothing crazy happens if someone starts tinkering
3768 * around with ARCH_KMALLOC_MINALIGN 3771 * around with ARCH_KMALLOC_MINALIGN
3769 */ 3772 */
3770 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 3773 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3771 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 3774 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3772 3775
3773 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 3776 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3774 int elem = size_index_elem(i); 3777 int elem = size_index_elem(i);
3775 if (elem >= ARRAY_SIZE(size_index)) 3778 if (elem >= ARRAY_SIZE(size_index))
3776 break; 3779 break;
3777 size_index[elem] = KMALLOC_SHIFT_LOW; 3780 size_index[elem] = KMALLOC_SHIFT_LOW;
3778 } 3781 }
3779 3782
3780 if (KMALLOC_MIN_SIZE == 64) { 3783 if (KMALLOC_MIN_SIZE == 64) {
3781 /* 3784 /*
3782 * The 96 byte size cache is not used if the alignment 3785 * The 96 byte size cache is not used if the alignment
3783 * is 64 byte. 3786 * is 64 byte.
3784 */ 3787 */
3785 for (i = 64 + 8; i <= 96; i += 8) 3788 for (i = 64 + 8; i <= 96; i += 8)
3786 size_index[size_index_elem(i)] = 7; 3789 size_index[size_index_elem(i)] = 7;
3787 } else if (KMALLOC_MIN_SIZE == 128) { 3790 } else if (KMALLOC_MIN_SIZE == 128) {
3788 /* 3791 /*
3789 * The 192 byte sized cache is not used if the alignment 3792 * The 192 byte sized cache is not used if the alignment
3790 * is 128 byte. Redirect kmalloc to use the 256 byte cache 3793 * is 128 byte. Redirect kmalloc to use the 256 byte cache
3791 * instead. 3794 * instead.
3792 */ 3795 */
3793 for (i = 128 + 8; i <= 192; i += 8) 3796 for (i = 128 + 8; i <= 192; i += 8)
3794 size_index[size_index_elem(i)] = 8; 3797 size_index[size_index_elem(i)] = 8;
3795 } 3798 }
3796 3799
3797 /* Caches that are not of the two-to-the-power-of size */ 3800 /* Caches that are not of the two-to-the-power-of size */
3798 if (KMALLOC_MIN_SIZE <= 32) { 3801 if (KMALLOC_MIN_SIZE <= 32) {
3799 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); 3802 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3800 caches++; 3803 caches++;
3801 } 3804 }
3802 3805
3803 if (KMALLOC_MIN_SIZE <= 64) { 3806 if (KMALLOC_MIN_SIZE <= 64) {
3804 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); 3807 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3805 caches++; 3808 caches++;
3806 } 3809 }
3807 3810
3808 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3811 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3809 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); 3812 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3810 caches++; 3813 caches++;
3811 } 3814 }
3812 3815
3813 slab_state = UP; 3816 slab_state = UP;
3814 3817
3815 /* Provide the correct kmalloc names now that the caches are up */ 3818 /* Provide the correct kmalloc names now that the caches are up */
3816 if (KMALLOC_MIN_SIZE <= 32) { 3819 if (KMALLOC_MIN_SIZE <= 32) {
3817 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT); 3820 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3818 BUG_ON(!kmalloc_caches[1]->name); 3821 BUG_ON(!kmalloc_caches[1]->name);
3819 } 3822 }
3820 3823
3821 if (KMALLOC_MIN_SIZE <= 64) { 3824 if (KMALLOC_MIN_SIZE <= 64) {
3822 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT); 3825 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3823 BUG_ON(!kmalloc_caches[2]->name); 3826 BUG_ON(!kmalloc_caches[2]->name);
3824 } 3827 }
3825 3828
3826 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3829 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3827 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); 3830 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3828 3831
3829 BUG_ON(!s); 3832 BUG_ON(!s);
3830 kmalloc_caches[i]->name = s; 3833 kmalloc_caches[i]->name = s;
3831 } 3834 }
3832 3835
3833 #ifdef CONFIG_SMP 3836 #ifdef CONFIG_SMP
3834 register_cpu_notifier(&slab_notifier); 3837 register_cpu_notifier(&slab_notifier);
3835 #endif 3838 #endif
3836 3839
3837 #ifdef CONFIG_ZONE_DMA 3840 #ifdef CONFIG_ZONE_DMA
3838 for (i = 0; i < SLUB_PAGE_SHIFT; i++) { 3841 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3839 struct kmem_cache *s = kmalloc_caches[i]; 3842 struct kmem_cache *s = kmalloc_caches[i];
3840 3843
3841 if (s && s->size) { 3844 if (s && s->size) {
3842 char *name = kasprintf(GFP_NOWAIT, 3845 char *name = kasprintf(GFP_NOWAIT,
3843 "dma-kmalloc-%d", s->objsize); 3846 "dma-kmalloc-%d", s->objsize);
3844 3847
3845 BUG_ON(!name); 3848 BUG_ON(!name);
3846 kmalloc_dma_caches[i] = create_kmalloc_cache(name, 3849 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3847 s->objsize, SLAB_CACHE_DMA); 3850 s->objsize, SLAB_CACHE_DMA);
3848 } 3851 }
3849 } 3852 }
3850 #endif 3853 #endif
3851 printk(KERN_INFO 3854 printk(KERN_INFO
3852 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 3855 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3853 " CPUs=%d, Nodes=%d\n", 3856 " CPUs=%d, Nodes=%d\n",
3854 caches, cache_line_size(), 3857 caches, cache_line_size(),
3855 slub_min_order, slub_max_order, slub_min_objects, 3858 slub_min_order, slub_max_order, slub_min_objects,
3856 nr_cpu_ids, nr_node_ids); 3859 nr_cpu_ids, nr_node_ids);
3857 } 3860 }
3858 3861
3859 void __init kmem_cache_init_late(void) 3862 void __init kmem_cache_init_late(void)
3860 { 3863 {
3861 } 3864 }
3862 3865
3863 /* 3866 /*
3864 * Find a mergeable slab cache 3867 * Find a mergeable slab cache
3865 */ 3868 */
3866 static int slab_unmergeable(struct kmem_cache *s) 3869 static int slab_unmergeable(struct kmem_cache *s)
3867 { 3870 {
3868 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3871 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3869 return 1; 3872 return 1;
3870 3873
3871 if (s->ctor) 3874 if (s->ctor)
3872 return 1; 3875 return 1;
3873 3876
3874 /* 3877 /*
3875 * We may have set a slab to be unmergeable during bootstrap. 3878 * We may have set a slab to be unmergeable during bootstrap.
3876 */ 3879 */
3877 if (s->refcount < 0) 3880 if (s->refcount < 0)
3878 return 1; 3881 return 1;
3879 3882
3880 return 0; 3883 return 0;
3881 } 3884 }
3882 3885
3883 static struct kmem_cache *find_mergeable(size_t size, 3886 static struct kmem_cache *find_mergeable(size_t size,
3884 size_t align, unsigned long flags, const char *name, 3887 size_t align, unsigned long flags, const char *name,
3885 void (*ctor)(void *)) 3888 void (*ctor)(void *))
3886 { 3889 {
3887 struct kmem_cache *s; 3890 struct kmem_cache *s;
3888 3891
3889 if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) 3892 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3890 return NULL; 3893 return NULL;
3891 3894
3892 if (ctor) 3895 if (ctor)
3893 return NULL; 3896 return NULL;
3894 3897
3895 size = ALIGN(size, sizeof(void *)); 3898 size = ALIGN(size, sizeof(void *));
3896 align = calculate_alignment(flags, align, size); 3899 align = calculate_alignment(flags, align, size);
3897 size = ALIGN(size, align); 3900 size = ALIGN(size, align);
3898 flags = kmem_cache_flags(size, flags, name, NULL); 3901 flags = kmem_cache_flags(size, flags, name, NULL);
3899 3902
3900 list_for_each_entry(s, &slab_caches, list) { 3903 list_for_each_entry(s, &slab_caches, list) {
3901 if (slab_unmergeable(s)) 3904 if (slab_unmergeable(s))
3902 continue; 3905 continue;
3903 3906
3904 if (size > s->size) 3907 if (size > s->size)
3905 continue; 3908 continue;
3906 3909
3907 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME)) 3910 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3908 continue; 3911 continue;
3909 /* 3912 /*
3910 * Check if alignment is compatible. 3913 * Check if alignment is compatible.
3911 * Courtesy of Adrian Drzewiecki 3914 * Courtesy of Adrian Drzewiecki
3912 */ 3915 */
3913 if ((s->size & ~(align - 1)) != s->size) 3916 if ((s->size & ~(align - 1)) != s->size)
3914 continue; 3917 continue;
3915 3918
3916 if (s->size - size >= sizeof(void *)) 3919 if (s->size - size >= sizeof(void *))
3917 continue; 3920 continue;
3918 3921
3919 return s; 3922 return s;
3920 } 3923 }
3921 return NULL; 3924 return NULL;
3922 } 3925 }
3923 3926
3924 struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3927 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3925 size_t align, unsigned long flags, void (*ctor)(void *)) 3928 size_t align, unsigned long flags, void (*ctor)(void *))
3926 { 3929 {
3927 struct kmem_cache *s; 3930 struct kmem_cache *s;
3928 char *n; 3931 char *n;
3929 3932
3930 if (WARN_ON(!name)) 3933 if (WARN_ON(!name))
3931 return NULL; 3934 return NULL;
3932 3935
3933 down_write(&slub_lock); 3936 down_write(&slub_lock);
3934 s = find_mergeable(size, align, flags, name, ctor); 3937 s = find_mergeable(size, align, flags, name, ctor);
3935 if (s) { 3938 if (s) {
3936 s->refcount++; 3939 s->refcount++;
3937 /* 3940 /*
3938 * Adjust the object sizes so that we clear 3941 * Adjust the object sizes so that we clear
3939 * the complete object on kzalloc. 3942 * the complete object on kzalloc.
3940 */ 3943 */
3941 s->objsize = max(s->objsize, (int)size); 3944 s->objsize = max(s->objsize, (int)size);
3942 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3945 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3943 3946
3944 if (sysfs_slab_alias(s, name)) { 3947 if (sysfs_slab_alias(s, name)) {
3945 s->refcount--; 3948 s->refcount--;
3946 goto err; 3949 goto err;
3947 } 3950 }
3948 up_write(&slub_lock); 3951 up_write(&slub_lock);
3949 return s; 3952 return s;
3950 } 3953 }
3951 3954
3952 n = kstrdup(name, GFP_KERNEL); 3955 n = kstrdup(name, GFP_KERNEL);
3953 if (!n) 3956 if (!n)
3954 goto err; 3957 goto err;
3955 3958
3956 s = kmalloc(kmem_size, GFP_KERNEL); 3959 s = kmalloc(kmem_size, GFP_KERNEL);
3957 if (s) { 3960 if (s) {
3958 if (kmem_cache_open(s, n, 3961 if (kmem_cache_open(s, n,
3959 size, align, flags, ctor)) { 3962 size, align, flags, ctor)) {
3960 list_add(&s->list, &slab_caches); 3963 list_add(&s->list, &slab_caches);
3961 up_write(&slub_lock); 3964 up_write(&slub_lock);
3962 if (sysfs_slab_add(s)) { 3965 if (sysfs_slab_add(s)) {
3963 down_write(&slub_lock); 3966 down_write(&slub_lock);
3964 list_del(&s->list); 3967 list_del(&s->list);
3965 kfree(n); 3968 kfree(n);
3966 kfree(s); 3969 kfree(s);
3967 goto err; 3970 goto err;
3968 } 3971 }
3969 return s; 3972 return s;
3970 } 3973 }
3971 kfree(n);
3972 kfree(s); 3974 kfree(s);
3973 } 3975 }
3976 kfree(n);
3974 err: 3977 err:
3975 up_write(&slub_lock); 3978 up_write(&slub_lock);
3976 3979
3977 if (flags & SLAB_PANIC) 3980 if (flags & SLAB_PANIC)
3978 panic("Cannot create slabcache %s\n", name); 3981 panic("Cannot create slabcache %s\n", name);
3979 else 3982 else
3980 s = NULL; 3983 s = NULL;
3981 return s; 3984 return s;
3982 } 3985 }
3983 EXPORT_SYMBOL(kmem_cache_create); 3986 EXPORT_SYMBOL(kmem_cache_create);
3984 3987
3985 #ifdef CONFIG_SMP 3988 #ifdef CONFIG_SMP
3986 /* 3989 /*
3987 * Use the cpu notifier to insure that the cpu slabs are flushed when 3990 * Use the cpu notifier to insure that the cpu slabs are flushed when
3988 * necessary. 3991 * necessary.
3989 */ 3992 */
3990 static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, 3993 static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3991 unsigned long action, void *hcpu) 3994 unsigned long action, void *hcpu)
3992 { 3995 {
3993 long cpu = (long)hcpu; 3996 long cpu = (long)hcpu;
3994 struct kmem_cache *s; 3997 struct kmem_cache *s;
3995 unsigned long flags; 3998 unsigned long flags;
3996 3999
3997 switch (action) { 4000 switch (action) {
3998 case CPU_UP_CANCELED: 4001 case CPU_UP_CANCELED:
3999 case CPU_UP_CANCELED_FROZEN: 4002 case CPU_UP_CANCELED_FROZEN:
4000 case CPU_DEAD: 4003 case CPU_DEAD:
4001 case CPU_DEAD_FROZEN: 4004 case CPU_DEAD_FROZEN:
4002 down_read(&slub_lock); 4005 down_read(&slub_lock);
4003 list_for_each_entry(s, &slab_caches, list) { 4006 list_for_each_entry(s, &slab_caches, list) {
4004 local_irq_save(flags); 4007 local_irq_save(flags);
4005 __flush_cpu_slab(s, cpu); 4008 __flush_cpu_slab(s, cpu);
4006 local_irq_restore(flags); 4009 local_irq_restore(flags);
4007 } 4010 }
4008 up_read(&slub_lock); 4011 up_read(&slub_lock);
4009 break; 4012 break;
4010 default: 4013 default:
4011 break; 4014 break;
4012 } 4015 }
4013 return NOTIFY_OK; 4016 return NOTIFY_OK;
4014 } 4017 }
4015 4018
4016 static struct notifier_block __cpuinitdata slab_notifier = { 4019 static struct notifier_block __cpuinitdata slab_notifier = {
4017 .notifier_call = slab_cpuup_callback 4020 .notifier_call = slab_cpuup_callback
4018 }; 4021 };
4019 4022
4020 #endif 4023 #endif
4021 4024
4022 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) 4025 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4023 { 4026 {
4024 struct kmem_cache *s; 4027 struct kmem_cache *s;
4025 void *ret; 4028 void *ret;
4026 4029
4027 if (unlikely(size > SLUB_MAX_SIZE)) 4030 if (unlikely(size > SLUB_MAX_SIZE))
4028 return kmalloc_large(size, gfpflags); 4031 return kmalloc_large(size, gfpflags);
4029 4032
4030 s = get_slab(size, gfpflags); 4033 s = get_slab(size, gfpflags);
4031 4034
4032 if (unlikely(ZERO_OR_NULL_PTR(s))) 4035 if (unlikely(ZERO_OR_NULL_PTR(s)))
4033 return s; 4036 return s;
4034 4037
4035 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); 4038 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
4036 4039
4037 /* Honor the call site pointer we received. */ 4040 /* Honor the call site pointer we received. */
4038 trace_kmalloc(caller, ret, size, s->size, gfpflags); 4041 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4039 4042
4040 return ret; 4043 return ret;
4041 } 4044 }
4042 4045
4043 #ifdef CONFIG_NUMA 4046 #ifdef CONFIG_NUMA
4044 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 4047 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4045 int node, unsigned long caller) 4048 int node, unsigned long caller)
4046 { 4049 {
4047 struct kmem_cache *s; 4050 struct kmem_cache *s;
4048 void *ret; 4051 void *ret;
4049 4052
4050 if (unlikely(size > SLUB_MAX_SIZE)) { 4053 if (unlikely(size > SLUB_MAX_SIZE)) {
4051 ret = kmalloc_large_node(size, gfpflags, node); 4054 ret = kmalloc_large_node(size, gfpflags, node);
4052 4055
4053 trace_kmalloc_node(caller, ret, 4056 trace_kmalloc_node(caller, ret,
4054 size, PAGE_SIZE << get_order(size), 4057 size, PAGE_SIZE << get_order(size),
4055 gfpflags, node); 4058 gfpflags, node);
4056 4059
4057 return ret; 4060 return ret;
4058 } 4061 }
4059 4062
4060 s = get_slab(size, gfpflags); 4063 s = get_slab(size, gfpflags);
4061 4064
4062 if (unlikely(ZERO_OR_NULL_PTR(s))) 4065 if (unlikely(ZERO_OR_NULL_PTR(s)))
4063 return s; 4066 return s;
4064 4067
4065 ret = slab_alloc(s, gfpflags, node, caller); 4068 ret = slab_alloc(s, gfpflags, node, caller);
4066 4069
4067 /* Honor the call site pointer we received. */ 4070 /* Honor the call site pointer we received. */
4068 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); 4071 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4069 4072
4070 return ret; 4073 return ret;
4071 } 4074 }
4072 #endif 4075 #endif
4073 4076
4074 #ifdef CONFIG_SYSFS 4077 #ifdef CONFIG_SYSFS
4075 static int count_inuse(struct page *page) 4078 static int count_inuse(struct page *page)
4076 { 4079 {
4077 return page->inuse; 4080 return page->inuse;
4078 } 4081 }
4079 4082
4080 static int count_total(struct page *page) 4083 static int count_total(struct page *page)
4081 { 4084 {
4082 return page->objects; 4085 return page->objects;
4083 } 4086 }
4084 #endif 4087 #endif
4085 4088
4086 #ifdef CONFIG_SLUB_DEBUG 4089 #ifdef CONFIG_SLUB_DEBUG
4087 static int validate_slab(struct kmem_cache *s, struct page *page, 4090 static int validate_slab(struct kmem_cache *s, struct page *page,
4088 unsigned long *map) 4091 unsigned long *map)
4089 { 4092 {
4090 void *p; 4093 void *p;
4091 void *addr = page_address(page); 4094 void *addr = page_address(page);
4092 4095
4093 if (!check_slab(s, page) || 4096 if (!check_slab(s, page) ||
4094 !on_freelist(s, page, NULL)) 4097 !on_freelist(s, page, NULL))
4095 return 0; 4098 return 0;
4096 4099
4097 /* Now we know that a valid freelist exists */ 4100 /* Now we know that a valid freelist exists */
4098 bitmap_zero(map, page->objects); 4101 bitmap_zero(map, page->objects);
4099 4102
4100 get_map(s, page, map); 4103 get_map(s, page, map);
4101 for_each_object(p, s, addr, page->objects) { 4104 for_each_object(p, s, addr, page->objects) {
4102 if (test_bit(slab_index(p, s, addr), map)) 4105 if (test_bit(slab_index(p, s, addr), map))
4103 if (!check_object(s, page, p, SLUB_RED_INACTIVE)) 4106 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4104 return 0; 4107 return 0;
4105 } 4108 }
4106 4109
4107 for_each_object(p, s, addr, page->objects) 4110 for_each_object(p, s, addr, page->objects)
4108 if (!test_bit(slab_index(p, s, addr), map)) 4111 if (!test_bit(slab_index(p, s, addr), map))
4109 if (!check_object(s, page, p, SLUB_RED_ACTIVE)) 4112 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4110 return 0; 4113 return 0;
4111 return 1; 4114 return 1;
4112 } 4115 }
4113 4116
4114 static void validate_slab_slab(struct kmem_cache *s, struct page *page, 4117 static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4115 unsigned long *map) 4118 unsigned long *map)
4116 { 4119 {
4117 slab_lock(page); 4120 slab_lock(page);
4118 validate_slab(s, page, map); 4121 validate_slab(s, page, map);
4119 slab_unlock(page); 4122 slab_unlock(page);
4120 } 4123 }
4121 4124
4122 static int validate_slab_node(struct kmem_cache *s, 4125 static int validate_slab_node(struct kmem_cache *s,
4123 struct kmem_cache_node *n, unsigned long *map) 4126 struct kmem_cache_node *n, unsigned long *map)
4124 { 4127 {
4125 unsigned long count = 0; 4128 unsigned long count = 0;
4126 struct page *page; 4129 struct page *page;
4127 unsigned long flags; 4130 unsigned long flags;
4128 4131
4129 spin_lock_irqsave(&n->list_lock, flags); 4132 spin_lock_irqsave(&n->list_lock, flags);
4130 4133
4131 list_for_each_entry(page, &n->partial, lru) { 4134 list_for_each_entry(page, &n->partial, lru) {
4132 validate_slab_slab(s, page, map); 4135 validate_slab_slab(s, page, map);
4133 count++; 4136 count++;
4134 } 4137 }
4135 if (count != n->nr_partial) 4138 if (count != n->nr_partial)
4136 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " 4139 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4137 "counter=%ld\n", s->name, count, n->nr_partial); 4140 "counter=%ld\n", s->name, count, n->nr_partial);
4138 4141
4139 if (!(s->flags & SLAB_STORE_USER)) 4142 if (!(s->flags & SLAB_STORE_USER))
4140 goto out; 4143 goto out;
4141 4144
4142 list_for_each_entry(page, &n->full, lru) { 4145 list_for_each_entry(page, &n->full, lru) {
4143 validate_slab_slab(s, page, map); 4146 validate_slab_slab(s, page, map);
4144 count++; 4147 count++;
4145 } 4148 }
4146 if (count != atomic_long_read(&n->nr_slabs)) 4149 if (count != atomic_long_read(&n->nr_slabs))
4147 printk(KERN_ERR "SLUB: %s %ld slabs counted but " 4150 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4148 "counter=%ld\n", s->name, count, 4151 "counter=%ld\n", s->name, count,
4149 atomic_long_read(&n->nr_slabs)); 4152 atomic_long_read(&n->nr_slabs));
4150 4153
4151 out: 4154 out:
4152 spin_unlock_irqrestore(&n->list_lock, flags); 4155 spin_unlock_irqrestore(&n->list_lock, flags);
4153 return count; 4156 return count;
4154 } 4157 }
4155 4158
4156 static long validate_slab_cache(struct kmem_cache *s) 4159 static long validate_slab_cache(struct kmem_cache *s)
4157 { 4160 {
4158 int node; 4161 int node;
4159 unsigned long count = 0; 4162 unsigned long count = 0;
4160 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 4163 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4161 sizeof(unsigned long), GFP_KERNEL); 4164 sizeof(unsigned long), GFP_KERNEL);
4162 4165
4163 if (!map) 4166 if (!map)
4164 return -ENOMEM; 4167 return -ENOMEM;
4165 4168
4166 flush_all(s); 4169 flush_all(s);
4167 for_each_node_state(node, N_NORMAL_MEMORY) { 4170 for_each_node_state(node, N_NORMAL_MEMORY) {
4168 struct kmem_cache_node *n = get_node(s, node); 4171 struct kmem_cache_node *n = get_node(s, node);
4169 4172
4170 count += validate_slab_node(s, n, map); 4173 count += validate_slab_node(s, n, map);
4171 } 4174 }
4172 kfree(map); 4175 kfree(map);
4173 return count; 4176 return count;
4174 } 4177 }
4175 /* 4178 /*
4176 * Generate lists of code addresses where slabcache objects are allocated 4179 * Generate lists of code addresses where slabcache objects are allocated
4177 * and freed. 4180 * and freed.
4178 */ 4181 */
4179 4182
4180 struct location { 4183 struct location {
4181 unsigned long count; 4184 unsigned long count;
4182 unsigned long addr; 4185 unsigned long addr;
4183 long long sum_time; 4186 long long sum_time;
4184 long min_time; 4187 long min_time;
4185 long max_time; 4188 long max_time;
4186 long min_pid; 4189 long min_pid;
4187 long max_pid; 4190 long max_pid;
4188 DECLARE_BITMAP(cpus, NR_CPUS); 4191 DECLARE_BITMAP(cpus, NR_CPUS);
4189 nodemask_t nodes; 4192 nodemask_t nodes;
4190 }; 4193 };
4191 4194
4192 struct loc_track { 4195 struct loc_track {
4193 unsigned long max; 4196 unsigned long max;
4194 unsigned long count; 4197 unsigned long count;
4195 struct location *loc; 4198 struct location *loc;
4196 }; 4199 };
4197 4200
4198 static void free_loc_track(struct loc_track *t) 4201 static void free_loc_track(struct loc_track *t)
4199 { 4202 {
4200 if (t->max) 4203 if (t->max)
4201 free_pages((unsigned long)t->loc, 4204 free_pages((unsigned long)t->loc,
4202 get_order(sizeof(struct location) * t->max)); 4205 get_order(sizeof(struct location) * t->max));
4203 } 4206 }
4204 4207
4205 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags) 4208 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4206 { 4209 {
4207 struct location *l; 4210 struct location *l;
4208 int order; 4211 int order;
4209 4212
4210 order = get_order(sizeof(struct location) * max); 4213 order = get_order(sizeof(struct location) * max);
4211 4214
4212 l = (void *)__get_free_pages(flags, order); 4215 l = (void *)__get_free_pages(flags, order);
4213 if (!l) 4216 if (!l)
4214 return 0; 4217 return 0;
4215 4218
4216 if (t->count) { 4219 if (t->count) {
4217 memcpy(l, t->loc, sizeof(struct location) * t->count); 4220 memcpy(l, t->loc, sizeof(struct location) * t->count);
4218 free_loc_track(t); 4221 free_loc_track(t);
4219 } 4222 }
4220 t->max = max; 4223 t->max = max;
4221 t->loc = l; 4224 t->loc = l;
4222 return 1; 4225 return 1;
4223 } 4226 }
4224 4227
4225 static int add_location(struct loc_track *t, struct kmem_cache *s, 4228 static int add_location(struct loc_track *t, struct kmem_cache *s,
4226 const struct track *track) 4229 const struct track *track)
4227 { 4230 {
4228 long start, end, pos; 4231 long start, end, pos;
4229 struct location *l; 4232 struct location *l;
4230 unsigned long caddr; 4233 unsigned long caddr;
4231 unsigned long age = jiffies - track->when; 4234 unsigned long age = jiffies - track->when;
4232 4235
4233 start = -1; 4236 start = -1;
4234 end = t->count; 4237 end = t->count;
4235 4238
4236 for ( ; ; ) { 4239 for ( ; ; ) {
4237 pos = start + (end - start + 1) / 2; 4240 pos = start + (end - start + 1) / 2;
4238 4241
4239 /* 4242 /*
4240 * There is nothing at "end". If we end up there 4243 * There is nothing at "end". If we end up there
4241 * we need to add something to before end. 4244 * we need to add something to before end.
4242 */ 4245 */
4243 if (pos == end) 4246 if (pos == end)
4244 break; 4247 break;
4245 4248
4246 caddr = t->loc[pos].addr; 4249 caddr = t->loc[pos].addr;
4247 if (track->addr == caddr) { 4250 if (track->addr == caddr) {
4248 4251
4249 l = &t->loc[pos]; 4252 l = &t->loc[pos];
4250 l->count++; 4253 l->count++;
4251 if (track->when) { 4254 if (track->when) {
4252 l->sum_time += age; 4255 l->sum_time += age;
4253 if (age < l->min_time) 4256 if (age < l->min_time)
4254 l->min_time = age; 4257 l->min_time = age;
4255 if (age > l->max_time) 4258 if (age > l->max_time)
4256 l->max_time = age; 4259 l->max_time = age;
4257 4260
4258 if (track->pid < l->min_pid) 4261 if (track->pid < l->min_pid)
4259 l->min_pid = track->pid; 4262 l->min_pid = track->pid;
4260 if (track->pid > l->max_pid) 4263 if (track->pid > l->max_pid)
4261 l->max_pid = track->pid; 4264 l->max_pid = track->pid;
4262 4265
4263 cpumask_set_cpu(track->cpu, 4266 cpumask_set_cpu(track->cpu,
4264 to_cpumask(l->cpus)); 4267 to_cpumask(l->cpus));
4265 } 4268 }
4266 node_set(page_to_nid(virt_to_page(track)), l->nodes); 4269 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4267 return 1; 4270 return 1;
4268 } 4271 }
4269 4272
4270 if (track->addr < caddr) 4273 if (track->addr < caddr)
4271 end = pos; 4274 end = pos;
4272 else 4275 else
4273 start = pos; 4276 start = pos;
4274 } 4277 }
4275 4278
4276 /* 4279 /*
4277 * Not found. Insert new tracking element. 4280 * Not found. Insert new tracking element.
4278 */ 4281 */
4279 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC)) 4282 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4280 return 0; 4283 return 0;
4281 4284
4282 l = t->loc + pos; 4285 l = t->loc + pos;
4283 if (pos < t->count) 4286 if (pos < t->count)
4284 memmove(l + 1, l, 4287 memmove(l + 1, l,
4285 (t->count - pos) * sizeof(struct location)); 4288 (t->count - pos) * sizeof(struct location));
4286 t->count++; 4289 t->count++;
4287 l->count = 1; 4290 l->count = 1;
4288 l->addr = track->addr; 4291 l->addr = track->addr;
4289 l->sum_time = age; 4292 l->sum_time = age;
4290 l->min_time = age; 4293 l->min_time = age;
4291 l->max_time = age; 4294 l->max_time = age;
4292 l->min_pid = track->pid; 4295 l->min_pid = track->pid;
4293 l->max_pid = track->pid; 4296 l->max_pid = track->pid;
4294 cpumask_clear(to_cpumask(l->cpus)); 4297 cpumask_clear(to_cpumask(l->cpus));
4295 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus)); 4298 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4296 nodes_clear(l->nodes); 4299 nodes_clear(l->nodes);
4297 node_set(page_to_nid(virt_to_page(track)), l->nodes); 4300 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4298 return 1; 4301 return 1;
4299 } 4302 }
4300 4303
4301 static void process_slab(struct loc_track *t, struct kmem_cache *s, 4304 static void process_slab(struct loc_track *t, struct kmem_cache *s,
4302 struct page *page, enum track_item alloc, 4305 struct page *page, enum track_item alloc,
4303 unsigned long *map) 4306 unsigned long *map)
4304 { 4307 {
4305 void *addr = page_address(page); 4308 void *addr = page_address(page);
4306 void *p; 4309 void *p;
4307 4310
4308 bitmap_zero(map, page->objects); 4311 bitmap_zero(map, page->objects);
4309 get_map(s, page, map); 4312 get_map(s, page, map);
4310 4313
4311 for_each_object(p, s, addr, page->objects) 4314 for_each_object(p, s, addr, page->objects)
4312 if (!test_bit(slab_index(p, s, addr), map)) 4315 if (!test_bit(slab_index(p, s, addr), map))
4313 add_location(t, s, get_track(s, p, alloc)); 4316 add_location(t, s, get_track(s, p, alloc));
4314 } 4317 }
4315 4318
4316 static int list_locations(struct kmem_cache *s, char *buf, 4319 static int list_locations(struct kmem_cache *s, char *buf,
4317 enum track_item alloc) 4320 enum track_item alloc)
4318 { 4321 {
4319 int len = 0; 4322 int len = 0;
4320 unsigned long i; 4323 unsigned long i;
4321 struct loc_track t = { 0, 0, NULL }; 4324 struct loc_track t = { 0, 0, NULL };
4322 int node; 4325 int node;
4323 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) * 4326 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4324 sizeof(unsigned long), GFP_KERNEL); 4327 sizeof(unsigned long), GFP_KERNEL);
4325 4328
4326 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 4329 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4327 GFP_TEMPORARY)) { 4330 GFP_TEMPORARY)) {
4328 kfree(map); 4331 kfree(map);
4329 return sprintf(buf, "Out of memory\n"); 4332 return sprintf(buf, "Out of memory\n");
4330 } 4333 }
4331 /* Push back cpu slabs */ 4334 /* Push back cpu slabs */
4332 flush_all(s); 4335 flush_all(s);
4333 4336
4334 for_each_node_state(node, N_NORMAL_MEMORY) { 4337 for_each_node_state(node, N_NORMAL_MEMORY) {
4335 struct kmem_cache_node *n = get_node(s, node); 4338 struct kmem_cache_node *n = get_node(s, node);
4336 unsigned long flags; 4339 unsigned long flags;
4337 struct page *page; 4340 struct page *page;
4338 4341
4339 if (!atomic_long_read(&n->nr_slabs)) 4342 if (!atomic_long_read(&n->nr_slabs))
4340 continue; 4343 continue;
4341 4344
4342 spin_lock_irqsave(&n->list_lock, flags); 4345 spin_lock_irqsave(&n->list_lock, flags);
4343 list_for_each_entry(page, &n->partial, lru) 4346 list_for_each_entry(page, &n->partial, lru)
4344 process_slab(&t, s, page, alloc, map); 4347 process_slab(&t, s, page, alloc, map);
4345 list_for_each_entry(page, &n->full, lru) 4348 list_for_each_entry(page, &n->full, lru)
4346 process_slab(&t, s, page, alloc, map); 4349 process_slab(&t, s, page, alloc, map);
4347 spin_unlock_irqrestore(&n->list_lock, flags); 4350 spin_unlock_irqrestore(&n->list_lock, flags);
4348 } 4351 }
4349 4352
4350 for (i = 0; i < t.count; i++) { 4353 for (i = 0; i < t.count; i++) {
4351 struct location *l = &t.loc[i]; 4354 struct location *l = &t.loc[i];
4352 4355
4353 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) 4356 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4354 break; 4357 break;
4355 len += sprintf(buf + len, "%7ld ", l->count); 4358 len += sprintf(buf + len, "%7ld ", l->count);
4356 4359
4357 if (l->addr) 4360 if (l->addr)
4358 len += sprintf(buf + len, "%pS", (void *)l->addr); 4361 len += sprintf(buf + len, "%pS", (void *)l->addr);
4359 else 4362 else
4360 len += sprintf(buf + len, "<not-available>"); 4363 len += sprintf(buf + len, "<not-available>");
4361 4364
4362 if (l->sum_time != l->min_time) { 4365 if (l->sum_time != l->min_time) {
4363 len += sprintf(buf + len, " age=%ld/%ld/%ld", 4366 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4364 l->min_time, 4367 l->min_time,
4365 (long)div_u64(l->sum_time, l->count), 4368 (long)div_u64(l->sum_time, l->count),
4366 l->max_time); 4369 l->max_time);
4367 } else 4370 } else
4368 len += sprintf(buf + len, " age=%ld", 4371 len += sprintf(buf + len, " age=%ld",
4369 l->min_time); 4372 l->min_time);
4370 4373
4371 if (l->min_pid != l->max_pid) 4374 if (l->min_pid != l->max_pid)
4372 len += sprintf(buf + len, " pid=%ld-%ld", 4375 len += sprintf(buf + len, " pid=%ld-%ld",
4373 l->min_pid, l->max_pid); 4376 l->min_pid, l->max_pid);
4374 else 4377 else
4375 len += sprintf(buf + len, " pid=%ld", 4378 len += sprintf(buf + len, " pid=%ld",
4376 l->min_pid); 4379 l->min_pid);
4377 4380
4378 if (num_online_cpus() > 1 && 4381 if (num_online_cpus() > 1 &&
4379 !cpumask_empty(to_cpumask(l->cpus)) && 4382 !cpumask_empty(to_cpumask(l->cpus)) &&
4380 len < PAGE_SIZE - 60) { 4383 len < PAGE_SIZE - 60) {
4381 len += sprintf(buf + len, " cpus="); 4384 len += sprintf(buf + len, " cpus=");
4382 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, 4385 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4383 to_cpumask(l->cpus)); 4386 to_cpumask(l->cpus));
4384 } 4387 }
4385 4388
4386 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && 4389 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4387 len < PAGE_SIZE - 60) { 4390 len < PAGE_SIZE - 60) {
4388 len += sprintf(buf + len, " nodes="); 4391 len += sprintf(buf + len, " nodes=");
4389 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, 4392 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4390 l->nodes); 4393 l->nodes);
4391 } 4394 }
4392 4395
4393 len += sprintf(buf + len, "\n"); 4396 len += sprintf(buf + len, "\n");
4394 } 4397 }
4395 4398
4396 free_loc_track(&t); 4399 free_loc_track(&t);
4397 kfree(map); 4400 kfree(map);
4398 if (!t.count) 4401 if (!t.count)
4399 len += sprintf(buf, "No data\n"); 4402 len += sprintf(buf, "No data\n");
4400 return len; 4403 return len;
4401 } 4404 }
4402 #endif 4405 #endif
4403 4406
4404 #ifdef SLUB_RESILIENCY_TEST 4407 #ifdef SLUB_RESILIENCY_TEST
4405 static void resiliency_test(void) 4408 static void resiliency_test(void)
4406 { 4409 {
4407 u8 *p; 4410 u8 *p;
4408 4411
4409 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); 4412 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4410 4413
4411 printk(KERN_ERR "SLUB resiliency testing\n"); 4414 printk(KERN_ERR "SLUB resiliency testing\n");
4412 printk(KERN_ERR "-----------------------\n"); 4415 printk(KERN_ERR "-----------------------\n");
4413 printk(KERN_ERR "A. Corruption after allocation\n"); 4416 printk(KERN_ERR "A. Corruption after allocation\n");
4414 4417
4415 p = kzalloc(16, GFP_KERNEL); 4418 p = kzalloc(16, GFP_KERNEL);
4416 p[16] = 0x12; 4419 p[16] = 0x12;
4417 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" 4420 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4418 " 0x12->0x%p\n\n", p + 16); 4421 " 0x12->0x%p\n\n", p + 16);
4419 4422
4420 validate_slab_cache(kmalloc_caches[4]); 4423 validate_slab_cache(kmalloc_caches[4]);
4421 4424
4422 /* Hmmm... The next two are dangerous */ 4425 /* Hmmm... The next two are dangerous */
4423 p = kzalloc(32, GFP_KERNEL); 4426 p = kzalloc(32, GFP_KERNEL);
4424 p[32 + sizeof(void *)] = 0x34; 4427 p[32 + sizeof(void *)] = 0x34;
4425 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" 4428 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4426 " 0x34 -> -0x%p\n", p); 4429 " 0x34 -> -0x%p\n", p);
4427 printk(KERN_ERR 4430 printk(KERN_ERR
4428 "If allocated object is overwritten then not detectable\n\n"); 4431 "If allocated object is overwritten then not detectable\n\n");
4429 4432
4430 validate_slab_cache(kmalloc_caches[5]); 4433 validate_slab_cache(kmalloc_caches[5]);
4431 p = kzalloc(64, GFP_KERNEL); 4434 p = kzalloc(64, GFP_KERNEL);
4432 p += 64 + (get_cycles() & 0xff) * sizeof(void *); 4435 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4433 *p = 0x56; 4436 *p = 0x56;
4434 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", 4437 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4435 p); 4438 p);
4436 printk(KERN_ERR 4439 printk(KERN_ERR
4437 "If allocated object is overwritten then not detectable\n\n"); 4440 "If allocated object is overwritten then not detectable\n\n");
4438 validate_slab_cache(kmalloc_caches[6]); 4441 validate_slab_cache(kmalloc_caches[6]);
4439 4442
4440 printk(KERN_ERR "\nB. Corruption after free\n"); 4443 printk(KERN_ERR "\nB. Corruption after free\n");
4441 p = kzalloc(128, GFP_KERNEL); 4444 p = kzalloc(128, GFP_KERNEL);
4442 kfree(p); 4445 kfree(p);
4443 *p = 0x78; 4446 *p = 0x78;
4444 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); 4447 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4445 validate_slab_cache(kmalloc_caches[7]); 4448 validate_slab_cache(kmalloc_caches[7]);
4446 4449
4447 p = kzalloc(256, GFP_KERNEL); 4450 p = kzalloc(256, GFP_KERNEL);
4448 kfree(p); 4451 kfree(p);
4449 p[50] = 0x9a; 4452 p[50] = 0x9a;
4450 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", 4453 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4451 p); 4454 p);
4452 validate_slab_cache(kmalloc_caches[8]); 4455 validate_slab_cache(kmalloc_caches[8]);
4453 4456
4454 p = kzalloc(512, GFP_KERNEL); 4457 p = kzalloc(512, GFP_KERNEL);
4455 kfree(p); 4458 kfree(p);
4456 p[512] = 0xab; 4459 p[512] = 0xab;
4457 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); 4460 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4458 validate_slab_cache(kmalloc_caches[9]); 4461 validate_slab_cache(kmalloc_caches[9]);
4459 } 4462 }
4460 #else 4463 #else
4461 #ifdef CONFIG_SYSFS 4464 #ifdef CONFIG_SYSFS
4462 static void resiliency_test(void) {}; 4465 static void resiliency_test(void) {};
4463 #endif 4466 #endif
4464 #endif 4467 #endif
4465 4468
4466 #ifdef CONFIG_SYSFS 4469 #ifdef CONFIG_SYSFS
4467 enum slab_stat_type { 4470 enum slab_stat_type {
4468 SL_ALL, /* All slabs */ 4471 SL_ALL, /* All slabs */
4469 SL_PARTIAL, /* Only partially allocated slabs */ 4472 SL_PARTIAL, /* Only partially allocated slabs */
4470 SL_CPU, /* Only slabs used for cpu caches */ 4473 SL_CPU, /* Only slabs used for cpu caches */
4471 SL_OBJECTS, /* Determine allocated objects not slabs */ 4474 SL_OBJECTS, /* Determine allocated objects not slabs */
4472 SL_TOTAL /* Determine object capacity not slabs */ 4475 SL_TOTAL /* Determine object capacity not slabs */
4473 }; 4476 };
4474 4477
4475 #define SO_ALL (1 << SL_ALL) 4478 #define SO_ALL (1 << SL_ALL)
4476 #define SO_PARTIAL (1 << SL_PARTIAL) 4479 #define SO_PARTIAL (1 << SL_PARTIAL)
4477 #define SO_CPU (1 << SL_CPU) 4480 #define SO_CPU (1 << SL_CPU)
4478 #define SO_OBJECTS (1 << SL_OBJECTS) 4481 #define SO_OBJECTS (1 << SL_OBJECTS)
4479 #define SO_TOTAL (1 << SL_TOTAL) 4482 #define SO_TOTAL (1 << SL_TOTAL)
4480 4483
4481 static ssize_t show_slab_objects(struct kmem_cache *s, 4484 static ssize_t show_slab_objects(struct kmem_cache *s,
4482 char *buf, unsigned long flags) 4485 char *buf, unsigned long flags)
4483 { 4486 {
4484 unsigned long total = 0; 4487 unsigned long total = 0;
4485 int node; 4488 int node;
4486 int x; 4489 int x;
4487 unsigned long *nodes; 4490 unsigned long *nodes;
4488 unsigned long *per_cpu; 4491 unsigned long *per_cpu;
4489 4492
4490 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); 4493 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4491 if (!nodes) 4494 if (!nodes)
4492 return -ENOMEM; 4495 return -ENOMEM;
4493 per_cpu = nodes + nr_node_ids; 4496 per_cpu = nodes + nr_node_ids;
4494 4497
4495 if (flags & SO_CPU) { 4498 if (flags & SO_CPU) {
4496 int cpu; 4499 int cpu;
4497 4500
4498 for_each_possible_cpu(cpu) { 4501 for_each_possible_cpu(cpu) {
4499 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4502 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4500 int node = ACCESS_ONCE(c->node); 4503 int node = ACCESS_ONCE(c->node);
4501 struct page *page; 4504 struct page *page;
4502 4505
4503 if (node < 0) 4506 if (node < 0)
4504 continue; 4507 continue;
4505 page = ACCESS_ONCE(c->page); 4508 page = ACCESS_ONCE(c->page);
4506 if (page) { 4509 if (page) {
4507 if (flags & SO_TOTAL) 4510 if (flags & SO_TOTAL)
4508 x = page->objects; 4511 x = page->objects;
4509 else if (flags & SO_OBJECTS) 4512 else if (flags & SO_OBJECTS)
4510 x = page->inuse; 4513 x = page->inuse;
4511 else 4514 else
4512 x = 1; 4515 x = 1;
4513 4516
4514 total += x; 4517 total += x;
4515 nodes[node] += x; 4518 nodes[node] += x;
4516 } 4519 }
4517 page = c->partial; 4520 page = c->partial;
4518 4521
4519 if (page) { 4522 if (page) {
4520 x = page->pobjects; 4523 x = page->pobjects;
4521 total += x; 4524 total += x;
4522 nodes[node] += x; 4525 nodes[node] += x;
4523 } 4526 }
4524 per_cpu[node]++; 4527 per_cpu[node]++;
4525 } 4528 }
4526 } 4529 }
4527 4530
4528 lock_memory_hotplug(); 4531 lock_memory_hotplug();
4529 #ifdef CONFIG_SLUB_DEBUG 4532 #ifdef CONFIG_SLUB_DEBUG
4530 if (flags & SO_ALL) { 4533 if (flags & SO_ALL) {
4531 for_each_node_state(node, N_NORMAL_MEMORY) { 4534 for_each_node_state(node, N_NORMAL_MEMORY) {
4532 struct kmem_cache_node *n = get_node(s, node); 4535 struct kmem_cache_node *n = get_node(s, node);
4533 4536
4534 if (flags & SO_TOTAL) 4537 if (flags & SO_TOTAL)
4535 x = atomic_long_read(&n->total_objects); 4538 x = atomic_long_read(&n->total_objects);
4536 else if (flags & SO_OBJECTS) 4539 else if (flags & SO_OBJECTS)
4537 x = atomic_long_read(&n->total_objects) - 4540 x = atomic_long_read(&n->total_objects) -
4538 count_partial(n, count_free); 4541 count_partial(n, count_free);
4539 4542
4540 else 4543 else
4541 x = atomic_long_read(&n->nr_slabs); 4544 x = atomic_long_read(&n->nr_slabs);
4542 total += x; 4545 total += x;
4543 nodes[node] += x; 4546 nodes[node] += x;
4544 } 4547 }
4545 4548
4546 } else 4549 } else
4547 #endif 4550 #endif
4548 if (flags & SO_PARTIAL) { 4551 if (flags & SO_PARTIAL) {
4549 for_each_node_state(node, N_NORMAL_MEMORY) { 4552 for_each_node_state(node, N_NORMAL_MEMORY) {
4550 struct kmem_cache_node *n = get_node(s, node); 4553 struct kmem_cache_node *n = get_node(s, node);
4551 4554
4552 if (flags & SO_TOTAL) 4555 if (flags & SO_TOTAL)
4553 x = count_partial(n, count_total); 4556 x = count_partial(n, count_total);
4554 else if (flags & SO_OBJECTS) 4557 else if (flags & SO_OBJECTS)
4555 x = count_partial(n, count_inuse); 4558 x = count_partial(n, count_inuse);
4556 else 4559 else
4557 x = n->nr_partial; 4560 x = n->nr_partial;
4558 total += x; 4561 total += x;
4559 nodes[node] += x; 4562 nodes[node] += x;
4560 } 4563 }
4561 } 4564 }
4562 x = sprintf(buf, "%lu", total); 4565 x = sprintf(buf, "%lu", total);
4563 #ifdef CONFIG_NUMA 4566 #ifdef CONFIG_NUMA
4564 for_each_node_state(node, N_NORMAL_MEMORY) 4567 for_each_node_state(node, N_NORMAL_MEMORY)
4565 if (nodes[node]) 4568 if (nodes[node])
4566 x += sprintf(buf + x, " N%d=%lu", 4569 x += sprintf(buf + x, " N%d=%lu",
4567 node, nodes[node]); 4570 node, nodes[node]);
4568 #endif 4571 #endif
4569 unlock_memory_hotplug(); 4572 unlock_memory_hotplug();
4570 kfree(nodes); 4573 kfree(nodes);
4571 return x + sprintf(buf + x, "\n"); 4574 return x + sprintf(buf + x, "\n");
4572 } 4575 }
4573 4576
4574 #ifdef CONFIG_SLUB_DEBUG 4577 #ifdef CONFIG_SLUB_DEBUG
4575 static int any_slab_objects(struct kmem_cache *s) 4578 static int any_slab_objects(struct kmem_cache *s)
4576 { 4579 {
4577 int node; 4580 int node;
4578 4581
4579 for_each_online_node(node) { 4582 for_each_online_node(node) {
4580 struct kmem_cache_node *n = get_node(s, node); 4583 struct kmem_cache_node *n = get_node(s, node);
4581 4584
4582 if (!n) 4585 if (!n)
4583 continue; 4586 continue;
4584 4587
4585 if (atomic_long_read(&n->total_objects)) 4588 if (atomic_long_read(&n->total_objects))
4586 return 1; 4589 return 1;
4587 } 4590 }
4588 return 0; 4591 return 0;
4589 } 4592 }
4590 #endif 4593 #endif
4591 4594
4592 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) 4595 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4593 #define to_slab(n) container_of(n, struct kmem_cache, kobj) 4596 #define to_slab(n) container_of(n, struct kmem_cache, kobj)
4594 4597
4595 struct slab_attribute { 4598 struct slab_attribute {
4596 struct attribute attr; 4599 struct attribute attr;
4597 ssize_t (*show)(struct kmem_cache *s, char *buf); 4600 ssize_t (*show)(struct kmem_cache *s, char *buf);
4598 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count); 4601 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4599 }; 4602 };
4600 4603
4601 #define SLAB_ATTR_RO(_name) \ 4604 #define SLAB_ATTR_RO(_name) \
4602 static struct slab_attribute _name##_attr = \ 4605 static struct slab_attribute _name##_attr = \
4603 __ATTR(_name, 0400, _name##_show, NULL) 4606 __ATTR(_name, 0400, _name##_show, NULL)
4604 4607
4605 #define SLAB_ATTR(_name) \ 4608 #define SLAB_ATTR(_name) \
4606 static struct slab_attribute _name##_attr = \ 4609 static struct slab_attribute _name##_attr = \
4607 __ATTR(_name, 0600, _name##_show, _name##_store) 4610 __ATTR(_name, 0600, _name##_show, _name##_store)
4608 4611
4609 static ssize_t slab_size_show(struct kmem_cache *s, char *buf) 4612 static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4610 { 4613 {
4611 return sprintf(buf, "%d\n", s->size); 4614 return sprintf(buf, "%d\n", s->size);
4612 } 4615 }
4613 SLAB_ATTR_RO(slab_size); 4616 SLAB_ATTR_RO(slab_size);
4614 4617
4615 static ssize_t align_show(struct kmem_cache *s, char *buf) 4618 static ssize_t align_show(struct kmem_cache *s, char *buf)
4616 { 4619 {
4617 return sprintf(buf, "%d\n", s->align); 4620 return sprintf(buf, "%d\n", s->align);
4618 } 4621 }
4619 SLAB_ATTR_RO(align); 4622 SLAB_ATTR_RO(align);
4620 4623
4621 static ssize_t object_size_show(struct kmem_cache *s, char *buf) 4624 static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4622 { 4625 {
4623 return sprintf(buf, "%d\n", s->objsize); 4626 return sprintf(buf, "%d\n", s->objsize);
4624 } 4627 }
4625 SLAB_ATTR_RO(object_size); 4628 SLAB_ATTR_RO(object_size);
4626 4629
4627 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) 4630 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4628 { 4631 {
4629 return sprintf(buf, "%d\n", oo_objects(s->oo)); 4632 return sprintf(buf, "%d\n", oo_objects(s->oo));
4630 } 4633 }
4631 SLAB_ATTR_RO(objs_per_slab); 4634 SLAB_ATTR_RO(objs_per_slab);
4632 4635
4633 static ssize_t order_store(struct kmem_cache *s, 4636 static ssize_t order_store(struct kmem_cache *s,
4634 const char *buf, size_t length) 4637 const char *buf, size_t length)
4635 { 4638 {
4636 unsigned long order; 4639 unsigned long order;
4637 int err; 4640 int err;
4638 4641
4639 err = strict_strtoul(buf, 10, &order); 4642 err = strict_strtoul(buf, 10, &order);
4640 if (err) 4643 if (err)
4641 return err; 4644 return err;
4642 4645
4643 if (order > slub_max_order || order < slub_min_order) 4646 if (order > slub_max_order || order < slub_min_order)
4644 return -EINVAL; 4647 return -EINVAL;
4645 4648
4646 calculate_sizes(s, order); 4649 calculate_sizes(s, order);
4647 return length; 4650 return length;
4648 } 4651 }
4649 4652
4650 static ssize_t order_show(struct kmem_cache *s, char *buf) 4653 static ssize_t order_show(struct kmem_cache *s, char *buf)
4651 { 4654 {
4652 return sprintf(buf, "%d\n", oo_order(s->oo)); 4655 return sprintf(buf, "%d\n", oo_order(s->oo));
4653 } 4656 }
4654 SLAB_ATTR(order); 4657 SLAB_ATTR(order);
4655 4658
4656 static ssize_t min_partial_show(struct kmem_cache *s, char *buf) 4659 static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4657 { 4660 {
4658 return sprintf(buf, "%lu\n", s->min_partial); 4661 return sprintf(buf, "%lu\n", s->min_partial);
4659 } 4662 }
4660 4663
4661 static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, 4664 static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4662 size_t length) 4665 size_t length)
4663 { 4666 {
4664 unsigned long min; 4667 unsigned long min;
4665 int err; 4668 int err;
4666 4669
4667 err = strict_strtoul(buf, 10, &min); 4670 err = strict_strtoul(buf, 10, &min);
4668 if (err) 4671 if (err)
4669 return err; 4672 return err;
4670 4673
4671 set_min_partial(s, min); 4674 set_min_partial(s, min);
4672 return length; 4675 return length;
4673 } 4676 }
4674 SLAB_ATTR(min_partial); 4677 SLAB_ATTR(min_partial);
4675 4678
4676 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) 4679 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4677 { 4680 {
4678 return sprintf(buf, "%u\n", s->cpu_partial); 4681 return sprintf(buf, "%u\n", s->cpu_partial);
4679 } 4682 }
4680 4683
4681 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, 4684 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4682 size_t length) 4685 size_t length)
4683 { 4686 {
4684 unsigned long objects; 4687 unsigned long objects;
4685 int err; 4688 int err;
4686 4689
4687 err = strict_strtoul(buf, 10, &objects); 4690 err = strict_strtoul(buf, 10, &objects);
4688 if (err) 4691 if (err)
4689 return err; 4692 return err;
4690 if (objects && kmem_cache_debug(s)) 4693 if (objects && kmem_cache_debug(s))
4691 return -EINVAL; 4694 return -EINVAL;
4692 4695
4693 s->cpu_partial = objects; 4696 s->cpu_partial = objects;
4694 flush_all(s); 4697 flush_all(s);
4695 return length; 4698 return length;
4696 } 4699 }
4697 SLAB_ATTR(cpu_partial); 4700 SLAB_ATTR(cpu_partial);
4698 4701
4699 static ssize_t ctor_show(struct kmem_cache *s, char *buf) 4702 static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4700 { 4703 {
4701 if (!s->ctor) 4704 if (!s->ctor)
4702 return 0; 4705 return 0;
4703 return sprintf(buf, "%pS\n", s->ctor); 4706 return sprintf(buf, "%pS\n", s->ctor);
4704 } 4707 }
4705 SLAB_ATTR_RO(ctor); 4708 SLAB_ATTR_RO(ctor);
4706 4709
4707 static ssize_t aliases_show(struct kmem_cache *s, char *buf) 4710 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4708 { 4711 {
4709 return sprintf(buf, "%d\n", s->refcount - 1); 4712 return sprintf(buf, "%d\n", s->refcount - 1);
4710 } 4713 }
4711 SLAB_ATTR_RO(aliases); 4714 SLAB_ATTR_RO(aliases);
4712 4715
4713 static ssize_t partial_show(struct kmem_cache *s, char *buf) 4716 static ssize_t partial_show(struct kmem_cache *s, char *buf)
4714 { 4717 {
4715 return show_slab_objects(s, buf, SO_PARTIAL); 4718 return show_slab_objects(s, buf, SO_PARTIAL);
4716 } 4719 }
4717 SLAB_ATTR_RO(partial); 4720 SLAB_ATTR_RO(partial);
4718 4721
4719 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) 4722 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4720 { 4723 {
4721 return show_slab_objects(s, buf, SO_CPU); 4724 return show_slab_objects(s, buf, SO_CPU);
4722 } 4725 }
4723 SLAB_ATTR_RO(cpu_slabs); 4726 SLAB_ATTR_RO(cpu_slabs);
4724 4727
4725 static ssize_t objects_show(struct kmem_cache *s, char *buf) 4728 static ssize_t objects_show(struct kmem_cache *s, char *buf)
4726 { 4729 {
4727 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS); 4730 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4728 } 4731 }
4729 SLAB_ATTR_RO(objects); 4732 SLAB_ATTR_RO(objects);
4730 4733
4731 static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) 4734 static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4732 { 4735 {
4733 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS); 4736 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4734 } 4737 }
4735 SLAB_ATTR_RO(objects_partial); 4738 SLAB_ATTR_RO(objects_partial);
4736 4739
4737 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) 4740 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4738 { 4741 {
4739 int objects = 0; 4742 int objects = 0;
4740 int pages = 0; 4743 int pages = 0;
4741 int cpu; 4744 int cpu;
4742 int len; 4745 int len;
4743 4746
4744 for_each_online_cpu(cpu) { 4747 for_each_online_cpu(cpu) {
4745 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial; 4748 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4746 4749
4747 if (page) { 4750 if (page) {
4748 pages += page->pages; 4751 pages += page->pages;
4749 objects += page->pobjects; 4752 objects += page->pobjects;
4750 } 4753 }
4751 } 4754 }
4752 4755
4753 len = sprintf(buf, "%d(%d)", objects, pages); 4756 len = sprintf(buf, "%d(%d)", objects, pages);
4754 4757
4755 #ifdef CONFIG_SMP 4758 #ifdef CONFIG_SMP
4756 for_each_online_cpu(cpu) { 4759 for_each_online_cpu(cpu) {
4757 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial; 4760 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4758 4761
4759 if (page && len < PAGE_SIZE - 20) 4762 if (page && len < PAGE_SIZE - 20)
4760 len += sprintf(buf + len, " C%d=%d(%d)", cpu, 4763 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4761 page->pobjects, page->pages); 4764 page->pobjects, page->pages);
4762 } 4765 }
4763 #endif 4766 #endif
4764 return len + sprintf(buf + len, "\n"); 4767 return len + sprintf(buf + len, "\n");
4765 } 4768 }
4766 SLAB_ATTR_RO(slabs_cpu_partial); 4769 SLAB_ATTR_RO(slabs_cpu_partial);
4767 4770
4768 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 4771 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4769 { 4772 {
4770 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 4773 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4771 } 4774 }
4772 4775
4773 static ssize_t reclaim_account_store(struct kmem_cache *s, 4776 static ssize_t reclaim_account_store(struct kmem_cache *s,
4774 const char *buf, size_t length) 4777 const char *buf, size_t length)
4775 { 4778 {
4776 s->flags &= ~SLAB_RECLAIM_ACCOUNT; 4779 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4777 if (buf[0] == '1') 4780 if (buf[0] == '1')
4778 s->flags |= SLAB_RECLAIM_ACCOUNT; 4781 s->flags |= SLAB_RECLAIM_ACCOUNT;
4779 return length; 4782 return length;
4780 } 4783 }
4781 SLAB_ATTR(reclaim_account); 4784 SLAB_ATTR(reclaim_account);
4782 4785
4783 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) 4786 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4784 { 4787 {
4785 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); 4788 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4786 } 4789 }
4787 SLAB_ATTR_RO(hwcache_align); 4790 SLAB_ATTR_RO(hwcache_align);
4788 4791
4789 #ifdef CONFIG_ZONE_DMA 4792 #ifdef CONFIG_ZONE_DMA
4790 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) 4793 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4791 { 4794 {
4792 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); 4795 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4793 } 4796 }
4794 SLAB_ATTR_RO(cache_dma); 4797 SLAB_ATTR_RO(cache_dma);
4795 #endif 4798 #endif
4796 4799
4797 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) 4800 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4798 { 4801 {
4799 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); 4802 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4800 } 4803 }
4801 SLAB_ATTR_RO(destroy_by_rcu); 4804 SLAB_ATTR_RO(destroy_by_rcu);
4802 4805
4803 static ssize_t reserved_show(struct kmem_cache *s, char *buf) 4806 static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4804 { 4807 {
4805 return sprintf(buf, "%d\n", s->reserved); 4808 return sprintf(buf, "%d\n", s->reserved);
4806 } 4809 }
4807 SLAB_ATTR_RO(reserved); 4810 SLAB_ATTR_RO(reserved);
4808 4811
4809 #ifdef CONFIG_SLUB_DEBUG 4812 #ifdef CONFIG_SLUB_DEBUG
4810 static ssize_t slabs_show(struct kmem_cache *s, char *buf) 4813 static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4811 { 4814 {
4812 return show_slab_objects(s, buf, SO_ALL); 4815 return show_slab_objects(s, buf, SO_ALL);
4813 } 4816 }
4814 SLAB_ATTR_RO(slabs); 4817 SLAB_ATTR_RO(slabs);
4815 4818
4816 static ssize_t total_objects_show(struct kmem_cache *s, char *buf) 4819 static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4817 { 4820 {
4818 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); 4821 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4819 } 4822 }
4820 SLAB_ATTR_RO(total_objects); 4823 SLAB_ATTR_RO(total_objects);
4821 4824
4822 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) 4825 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4823 { 4826 {
4824 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); 4827 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4825 } 4828 }
4826 4829
4827 static ssize_t sanity_checks_store(struct kmem_cache *s, 4830 static ssize_t sanity_checks_store(struct kmem_cache *s,
4828 const char *buf, size_t length) 4831 const char *buf, size_t length)
4829 { 4832 {
4830 s->flags &= ~SLAB_DEBUG_FREE; 4833 s->flags &= ~SLAB_DEBUG_FREE;
4831 if (buf[0] == '1') { 4834 if (buf[0] == '1') {
4832 s->flags &= ~__CMPXCHG_DOUBLE; 4835 s->flags &= ~__CMPXCHG_DOUBLE;
4833 s->flags |= SLAB_DEBUG_FREE; 4836 s->flags |= SLAB_DEBUG_FREE;
4834 } 4837 }
4835 return length; 4838 return length;
4836 } 4839 }
4837 SLAB_ATTR(sanity_checks); 4840 SLAB_ATTR(sanity_checks);
4838 4841
4839 static ssize_t trace_show(struct kmem_cache *s, char *buf) 4842 static ssize_t trace_show(struct kmem_cache *s, char *buf)
4840 { 4843 {
4841 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); 4844 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4842 } 4845 }
4843 4846
4844 static ssize_t trace_store(struct kmem_cache *s, const char *buf, 4847 static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4845 size_t length) 4848 size_t length)
4846 { 4849 {
4847 s->flags &= ~SLAB_TRACE; 4850 s->flags &= ~SLAB_TRACE;
4848 if (buf[0] == '1') { 4851 if (buf[0] == '1') {
4849 s->flags &= ~__CMPXCHG_DOUBLE; 4852 s->flags &= ~__CMPXCHG_DOUBLE;
4850 s->flags |= SLAB_TRACE; 4853 s->flags |= SLAB_TRACE;
4851 } 4854 }
4852 return length; 4855 return length;
4853 } 4856 }
4854 SLAB_ATTR(trace); 4857 SLAB_ATTR(trace);
4855 4858
4856 static ssize_t red_zone_show(struct kmem_cache *s, char *buf) 4859 static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4857 { 4860 {
4858 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); 4861 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4859 } 4862 }
4860 4863
4861 static ssize_t red_zone_store(struct kmem_cache *s, 4864 static ssize_t red_zone_store(struct kmem_cache *s,
4862 const char *buf, size_t length) 4865 const char *buf, size_t length)
4863 { 4866 {
4864 if (any_slab_objects(s)) 4867 if (any_slab_objects(s))
4865 return -EBUSY; 4868 return -EBUSY;
4866 4869
4867 s->flags &= ~SLAB_RED_ZONE; 4870 s->flags &= ~SLAB_RED_ZONE;
4868 if (buf[0] == '1') { 4871 if (buf[0] == '1') {
4869 s->flags &= ~__CMPXCHG_DOUBLE; 4872 s->flags &= ~__CMPXCHG_DOUBLE;
4870 s->flags |= SLAB_RED_ZONE; 4873 s->flags |= SLAB_RED_ZONE;
4871 } 4874 }
4872 calculate_sizes(s, -1); 4875 calculate_sizes(s, -1);
4873 return length; 4876 return length;
4874 } 4877 }
4875 SLAB_ATTR(red_zone); 4878 SLAB_ATTR(red_zone);
4876 4879
4877 static ssize_t poison_show(struct kmem_cache *s, char *buf) 4880 static ssize_t poison_show(struct kmem_cache *s, char *buf)
4878 { 4881 {
4879 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON)); 4882 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4880 } 4883 }
4881 4884
4882 static ssize_t poison_store(struct kmem_cache *s, 4885 static ssize_t poison_store(struct kmem_cache *s,
4883 const char *buf, size_t length) 4886 const char *buf, size_t length)
4884 { 4887 {
4885 if (any_slab_objects(s)) 4888 if (any_slab_objects(s))
4886 return -EBUSY; 4889 return -EBUSY;
4887 4890
4888 s->flags &= ~SLAB_POISON; 4891 s->flags &= ~SLAB_POISON;
4889 if (buf[0] == '1') { 4892 if (buf[0] == '1') {
4890 s->flags &= ~__CMPXCHG_DOUBLE; 4893 s->flags &= ~__CMPXCHG_DOUBLE;
4891 s->flags |= SLAB_POISON; 4894 s->flags |= SLAB_POISON;
4892 } 4895 }
4893 calculate_sizes(s, -1); 4896 calculate_sizes(s, -1);
4894 return length; 4897 return length;
4895 } 4898 }
4896 SLAB_ATTR(poison); 4899 SLAB_ATTR(poison);
4897 4900
4898 static ssize_t store_user_show(struct kmem_cache *s, char *buf) 4901 static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4899 { 4902 {
4900 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); 4903 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4901 } 4904 }
4902 4905
4903 static ssize_t store_user_store(struct kmem_cache *s, 4906 static ssize_t store_user_store(struct kmem_cache *s,
4904 const char *buf, size_t length) 4907 const char *buf, size_t length)
4905 { 4908 {
4906 if (any_slab_objects(s)) 4909 if (any_slab_objects(s))
4907 return -EBUSY; 4910 return -EBUSY;
4908 4911
4909 s->flags &= ~SLAB_STORE_USER; 4912 s->flags &= ~SLAB_STORE_USER;
4910 if (buf[0] == '1') { 4913 if (buf[0] == '1') {
4911 s->flags &= ~__CMPXCHG_DOUBLE; 4914 s->flags &= ~__CMPXCHG_DOUBLE;
4912 s->flags |= SLAB_STORE_USER; 4915 s->flags |= SLAB_STORE_USER;
4913 } 4916 }
4914 calculate_sizes(s, -1); 4917 calculate_sizes(s, -1);
4915 return length; 4918 return length;
4916 } 4919 }
4917 SLAB_ATTR(store_user); 4920 SLAB_ATTR(store_user);
4918 4921
4919 static ssize_t validate_show(struct kmem_cache *s, char *buf) 4922 static ssize_t validate_show(struct kmem_cache *s, char *buf)
4920 { 4923 {
4921 return 0; 4924 return 0;
4922 } 4925 }
4923 4926
4924 static ssize_t validate_store(struct kmem_cache *s, 4927 static ssize_t validate_store(struct kmem_cache *s,
4925 const char *buf, size_t length) 4928 const char *buf, size_t length)
4926 { 4929 {
4927 int ret = -EINVAL; 4930 int ret = -EINVAL;
4928 4931
4929 if (buf[0] == '1') { 4932 if (buf[0] == '1') {
4930 ret = validate_slab_cache(s); 4933 ret = validate_slab_cache(s);
4931 if (ret >= 0) 4934 if (ret >= 0)
4932 ret = length; 4935 ret = length;
4933 } 4936 }
4934 return ret; 4937 return ret;
4935 } 4938 }
4936 SLAB_ATTR(validate); 4939 SLAB_ATTR(validate);
4937 4940
4938 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) 4941 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4939 { 4942 {
4940 if (!(s->flags & SLAB_STORE_USER)) 4943 if (!(s->flags & SLAB_STORE_USER))
4941 return -ENOSYS; 4944 return -ENOSYS;
4942 return list_locations(s, buf, TRACK_ALLOC); 4945 return list_locations(s, buf, TRACK_ALLOC);
4943 } 4946 }
4944 SLAB_ATTR_RO(alloc_calls); 4947 SLAB_ATTR_RO(alloc_calls);
4945 4948
4946 static ssize_t free_calls_show(struct kmem_cache *s, char *buf) 4949 static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4947 { 4950 {
4948 if (!(s->flags & SLAB_STORE_USER)) 4951 if (!(s->flags & SLAB_STORE_USER))
4949 return -ENOSYS; 4952 return -ENOSYS;
4950 return list_locations(s, buf, TRACK_FREE); 4953 return list_locations(s, buf, TRACK_FREE);
4951 } 4954 }
4952 SLAB_ATTR_RO(free_calls); 4955 SLAB_ATTR_RO(free_calls);
4953 #endif /* CONFIG_SLUB_DEBUG */ 4956 #endif /* CONFIG_SLUB_DEBUG */
4954 4957
4955 #ifdef CONFIG_FAILSLAB 4958 #ifdef CONFIG_FAILSLAB
4956 static ssize_t failslab_show(struct kmem_cache *s, char *buf) 4959 static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4957 { 4960 {
4958 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); 4961 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4959 } 4962 }
4960 4963
4961 static ssize_t failslab_store(struct kmem_cache *s, const char *buf, 4964 static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4962 size_t length) 4965 size_t length)
4963 { 4966 {
4964 s->flags &= ~SLAB_FAILSLAB; 4967 s->flags &= ~SLAB_FAILSLAB;
4965 if (buf[0] == '1') 4968 if (buf[0] == '1')
4966 s->flags |= SLAB_FAILSLAB; 4969 s->flags |= SLAB_FAILSLAB;
4967 return length; 4970 return length;
4968 } 4971 }
4969 SLAB_ATTR(failslab); 4972 SLAB_ATTR(failslab);
4970 #endif 4973 #endif
4971 4974
4972 static ssize_t shrink_show(struct kmem_cache *s, char *buf) 4975 static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4973 { 4976 {
4974 return 0; 4977 return 0;
4975 } 4978 }
4976 4979
4977 static ssize_t shrink_store(struct kmem_cache *s, 4980 static ssize_t shrink_store(struct kmem_cache *s,
4978 const char *buf, size_t length) 4981 const char *buf, size_t length)
4979 { 4982 {
4980 if (buf[0] == '1') { 4983 if (buf[0] == '1') {
4981 int rc = kmem_cache_shrink(s); 4984 int rc = kmem_cache_shrink(s);
4982 4985
4983 if (rc) 4986 if (rc)
4984 return rc; 4987 return rc;
4985 } else 4988 } else
4986 return -EINVAL; 4989 return -EINVAL;
4987 return length; 4990 return length;
4988 } 4991 }
4989 SLAB_ATTR(shrink); 4992 SLAB_ATTR(shrink);
4990 4993
4991 #ifdef CONFIG_NUMA 4994 #ifdef CONFIG_NUMA
4992 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) 4995 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4993 { 4996 {
4994 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10); 4997 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4995 } 4998 }
4996 4999
4997 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, 5000 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4998 const char *buf, size_t length) 5001 const char *buf, size_t length)
4999 { 5002 {
5000 unsigned long ratio; 5003 unsigned long ratio;
5001 int err; 5004 int err;
5002 5005
5003 err = strict_strtoul(buf, 10, &ratio); 5006 err = strict_strtoul(buf, 10, &ratio);
5004 if (err) 5007 if (err)
5005 return err; 5008 return err;
5006 5009
5007 if (ratio <= 100) 5010 if (ratio <= 100)
5008 s->remote_node_defrag_ratio = ratio * 10; 5011 s->remote_node_defrag_ratio = ratio * 10;
5009 5012
5010 return length; 5013 return length;
5011 } 5014 }
5012 SLAB_ATTR(remote_node_defrag_ratio); 5015 SLAB_ATTR(remote_node_defrag_ratio);
5013 #endif 5016 #endif
5014 5017
5015 #ifdef CONFIG_SLUB_STATS 5018 #ifdef CONFIG_SLUB_STATS
5016 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) 5019 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5017 { 5020 {
5018 unsigned long sum = 0; 5021 unsigned long sum = 0;
5019 int cpu; 5022 int cpu;
5020 int len; 5023 int len;
5021 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); 5024 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5022 5025
5023 if (!data) 5026 if (!data)
5024 return -ENOMEM; 5027 return -ENOMEM;
5025 5028
5026 for_each_online_cpu(cpu) { 5029 for_each_online_cpu(cpu) {
5027 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; 5030 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5028 5031
5029 data[cpu] = x; 5032 data[cpu] = x;
5030 sum += x; 5033 sum += x;
5031 } 5034 }
5032 5035
5033 len = sprintf(buf, "%lu", sum); 5036 len = sprintf(buf, "%lu", sum);
5034 5037
5035 #ifdef CONFIG_SMP 5038 #ifdef CONFIG_SMP
5036 for_each_online_cpu(cpu) { 5039 for_each_online_cpu(cpu) {
5037 if (data[cpu] && len < PAGE_SIZE - 20) 5040 if (data[cpu] && len < PAGE_SIZE - 20)
5038 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]); 5041 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5039 } 5042 }
5040 #endif 5043 #endif
5041 kfree(data); 5044 kfree(data);
5042 return len + sprintf(buf + len, "\n"); 5045 return len + sprintf(buf + len, "\n");
5043 } 5046 }
5044 5047
5045 static void clear_stat(struct kmem_cache *s, enum stat_item si) 5048 static void clear_stat(struct kmem_cache *s, enum stat_item si)
5046 { 5049 {
5047 int cpu; 5050 int cpu;
5048 5051
5049 for_each_online_cpu(cpu) 5052 for_each_online_cpu(cpu)
5050 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; 5053 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5051 } 5054 }
5052 5055
5053 #define STAT_ATTR(si, text) \ 5056 #define STAT_ATTR(si, text) \
5054 static ssize_t text##_show(struct kmem_cache *s, char *buf) \ 5057 static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5055 { \ 5058 { \
5056 return show_stat(s, buf, si); \ 5059 return show_stat(s, buf, si); \
5057 } \ 5060 } \
5058 static ssize_t text##_store(struct kmem_cache *s, \ 5061 static ssize_t text##_store(struct kmem_cache *s, \
5059 const char *buf, size_t length) \ 5062 const char *buf, size_t length) \
5060 { \ 5063 { \
5061 if (buf[0] != '0') \ 5064 if (buf[0] != '0') \
5062 return -EINVAL; \ 5065 return -EINVAL; \
5063 clear_stat(s, si); \ 5066 clear_stat(s, si); \
5064 return length; \ 5067 return length; \
5065 } \ 5068 } \
5066 SLAB_ATTR(text); \ 5069 SLAB_ATTR(text); \
5067 5070
5068 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 5071 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5069 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 5072 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5070 STAT_ATTR(FREE_FASTPATH, free_fastpath); 5073 STAT_ATTR(FREE_FASTPATH, free_fastpath);
5071 STAT_ATTR(FREE_SLOWPATH, free_slowpath); 5074 STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5072 STAT_ATTR(FREE_FROZEN, free_frozen); 5075 STAT_ATTR(FREE_FROZEN, free_frozen);
5073 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial); 5076 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5074 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); 5077 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5075 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); 5078 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5076 STAT_ATTR(ALLOC_SLAB, alloc_slab); 5079 STAT_ATTR(ALLOC_SLAB, alloc_slab);
5077 STAT_ATTR(ALLOC_REFILL, alloc_refill); 5080 STAT_ATTR(ALLOC_REFILL, alloc_refill);
5078 STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch); 5081 STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5079 STAT_ATTR(FREE_SLAB, free_slab); 5082 STAT_ATTR(FREE_SLAB, free_slab);
5080 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); 5083 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5081 STAT_ATTR(DEACTIVATE_FULL, deactivate_full); 5084 STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5082 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); 5085 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5083 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); 5086 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5084 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); 5087 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5085 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); 5088 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5086 STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); 5089 STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5087 STAT_ATTR(ORDER_FALLBACK, order_fallback); 5090 STAT_ATTR(ORDER_FALLBACK, order_fallback);
5088 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); 5091 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5089 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); 5092 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5090 STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); 5093 STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5091 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); 5094 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5092 STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node); 5095 STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5093 STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain); 5096 STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5094 #endif 5097 #endif
5095 5098
5096 static struct attribute *slab_attrs[] = { 5099 static struct attribute *slab_attrs[] = {
5097 &slab_size_attr.attr, 5100 &slab_size_attr.attr,
5098 &object_size_attr.attr, 5101 &object_size_attr.attr,
5099 &objs_per_slab_attr.attr, 5102 &objs_per_slab_attr.attr,
5100 &order_attr.attr, 5103 &order_attr.attr,
5101 &min_partial_attr.attr, 5104 &min_partial_attr.attr,
5102 &cpu_partial_attr.attr, 5105 &cpu_partial_attr.attr,
5103 &objects_attr.attr, 5106 &objects_attr.attr,
5104 &objects_partial_attr.attr, 5107 &objects_partial_attr.attr,
5105 &partial_attr.attr, 5108 &partial_attr.attr,
5106 &cpu_slabs_attr.attr, 5109 &cpu_slabs_attr.attr,
5107 &ctor_attr.attr, 5110 &ctor_attr.attr,
5108 &aliases_attr.attr, 5111 &aliases_attr.attr,
5109 &align_attr.attr, 5112 &align_attr.attr,
5110 &hwcache_align_attr.attr, 5113 &hwcache_align_attr.attr,
5111 &reclaim_account_attr.attr, 5114 &reclaim_account_attr.attr,
5112 &destroy_by_rcu_attr.attr, 5115 &destroy_by_rcu_attr.attr,
5113 &shrink_attr.attr, 5116 &shrink_attr.attr,
5114 &reserved_attr.attr, 5117 &reserved_attr.attr,
5115 &slabs_cpu_partial_attr.attr, 5118 &slabs_cpu_partial_attr.attr,
5116 #ifdef CONFIG_SLUB_DEBUG 5119 #ifdef CONFIG_SLUB_DEBUG
5117 &total_objects_attr.attr, 5120 &total_objects_attr.attr,
5118 &slabs_attr.attr, 5121 &slabs_attr.attr,
5119 &sanity_checks_attr.attr, 5122 &sanity_checks_attr.attr,
5120 &trace_attr.attr, 5123 &trace_attr.attr,
5121 &red_zone_attr.attr, 5124 &red_zone_attr.attr,
5122 &poison_attr.attr, 5125 &poison_attr.attr,
5123 &store_user_attr.attr, 5126 &store_user_attr.attr,
5124 &validate_attr.attr, 5127 &validate_attr.attr,
5125 &alloc_calls_attr.attr, 5128 &alloc_calls_attr.attr,
5126 &free_calls_attr.attr, 5129 &free_calls_attr.attr,
5127 #endif 5130 #endif
5128 #ifdef CONFIG_ZONE_DMA 5131 #ifdef CONFIG_ZONE_DMA
5129 &cache_dma_attr.attr, 5132 &cache_dma_attr.attr,
5130 #endif 5133 #endif
5131 #ifdef CONFIG_NUMA 5134 #ifdef CONFIG_NUMA
5132 &remote_node_defrag_ratio_attr.attr, 5135 &remote_node_defrag_ratio_attr.attr,
5133 #endif 5136 #endif
5134 #ifdef CONFIG_SLUB_STATS 5137 #ifdef CONFIG_SLUB_STATS
5135 &alloc_fastpath_attr.attr, 5138 &alloc_fastpath_attr.attr,
5136 &alloc_slowpath_attr.attr, 5139 &alloc_slowpath_attr.attr,
5137 &free_fastpath_attr.attr, 5140 &free_fastpath_attr.attr,
5138 &free_slowpath_attr.attr, 5141 &free_slowpath_attr.attr,
5139 &free_frozen_attr.attr, 5142 &free_frozen_attr.attr,
5140 &free_add_partial_attr.attr, 5143 &free_add_partial_attr.attr,
5141 &free_remove_partial_attr.attr, 5144 &free_remove_partial_attr.attr,
5142 &alloc_from_partial_attr.attr, 5145 &alloc_from_partial_attr.attr,
5143 &alloc_slab_attr.attr, 5146 &alloc_slab_attr.attr,
5144 &alloc_refill_attr.attr, 5147 &alloc_refill_attr.attr,
5145 &alloc_node_mismatch_attr.attr, 5148 &alloc_node_mismatch_attr.attr,
5146 &free_slab_attr.attr, 5149 &free_slab_attr.attr,
5147 &cpuslab_flush_attr.attr, 5150 &cpuslab_flush_attr.attr,
5148 &deactivate_full_attr.attr, 5151 &deactivate_full_attr.attr,
5149 &deactivate_empty_attr.attr, 5152 &deactivate_empty_attr.attr,
5150 &deactivate_to_head_attr.attr, 5153 &deactivate_to_head_attr.attr,
5151 &deactivate_to_tail_attr.attr, 5154 &deactivate_to_tail_attr.attr,
5152 &deactivate_remote_frees_attr.attr, 5155 &deactivate_remote_frees_attr.attr,
5153 &deactivate_bypass_attr.attr, 5156 &deactivate_bypass_attr.attr,
5154 &order_fallback_attr.attr, 5157 &order_fallback_attr.attr,
5155 &cmpxchg_double_fail_attr.attr, 5158 &cmpxchg_double_fail_attr.attr,
5156 &cmpxchg_double_cpu_fail_attr.attr, 5159 &cmpxchg_double_cpu_fail_attr.attr,
5157 &cpu_partial_alloc_attr.attr, 5160 &cpu_partial_alloc_attr.attr,
5158 &cpu_partial_free_attr.attr, 5161 &cpu_partial_free_attr.attr,
5159 &cpu_partial_node_attr.attr, 5162 &cpu_partial_node_attr.attr,
5160 &cpu_partial_drain_attr.attr, 5163 &cpu_partial_drain_attr.attr,
5161 #endif 5164 #endif
5162 #ifdef CONFIG_FAILSLAB 5165 #ifdef CONFIG_FAILSLAB
5163 &failslab_attr.attr, 5166 &failslab_attr.attr,
5164 #endif 5167 #endif
5165 5168
5166 NULL 5169 NULL
5167 }; 5170 };
5168 5171
5169 static struct attribute_group slab_attr_group = { 5172 static struct attribute_group slab_attr_group = {
5170 .attrs = slab_attrs, 5173 .attrs = slab_attrs,
5171 }; 5174 };
5172 5175
5173 static ssize_t slab_attr_show(struct kobject *kobj, 5176 static ssize_t slab_attr_show(struct kobject *kobj,
5174 struct attribute *attr, 5177 struct attribute *attr,
5175 char *buf) 5178 char *buf)
5176 { 5179 {
5177 struct slab_attribute *attribute; 5180 struct slab_attribute *attribute;
5178 struct kmem_cache *s; 5181 struct kmem_cache *s;
5179 int err; 5182 int err;
5180 5183
5181 attribute = to_slab_attr(attr); 5184 attribute = to_slab_attr(attr);
5182 s = to_slab(kobj); 5185 s = to_slab(kobj);
5183 5186
5184 if (!attribute->show) 5187 if (!attribute->show)
5185 return -EIO; 5188 return -EIO;
5186 5189
5187 err = attribute->show(s, buf); 5190 err = attribute->show(s, buf);
5188 5191
5189 return err; 5192 return err;
5190 } 5193 }
5191 5194
5192 static ssize_t slab_attr_store(struct kobject *kobj, 5195 static ssize_t slab_attr_store(struct kobject *kobj,
5193 struct attribute *attr, 5196 struct attribute *attr,
5194 const char *buf, size_t len) 5197 const char *buf, size_t len)
5195 { 5198 {
5196 struct slab_attribute *attribute; 5199 struct slab_attribute *attribute;
5197 struct kmem_cache *s; 5200 struct kmem_cache *s;
5198 int err; 5201 int err;
5199 5202
5200 attribute = to_slab_attr(attr); 5203 attribute = to_slab_attr(attr);
5201 s = to_slab(kobj); 5204 s = to_slab(kobj);
5202 5205
5203 if (!attribute->store) 5206 if (!attribute->store)
5204 return -EIO; 5207 return -EIO;
5205 5208
5206 err = attribute->store(s, buf, len); 5209 err = attribute->store(s, buf, len);
5207 5210
5208 return err; 5211 return err;
5209 } 5212 }
5210 5213
5211 static void kmem_cache_release(struct kobject *kobj) 5214 static void kmem_cache_release(struct kobject *kobj)
5212 { 5215 {
5213 struct kmem_cache *s = to_slab(kobj); 5216 struct kmem_cache *s = to_slab(kobj);
5214 5217
5215 kfree(s->name); 5218 kfree(s->name);
5216 kfree(s); 5219 kfree(s);
5217 } 5220 }
5218 5221
5219 static const struct sysfs_ops slab_sysfs_ops = { 5222 static const struct sysfs_ops slab_sysfs_ops = {
5220 .show = slab_attr_show, 5223 .show = slab_attr_show,
5221 .store = slab_attr_store, 5224 .store = slab_attr_store,
5222 }; 5225 };
5223 5226
5224 static struct kobj_type slab_ktype = { 5227 static struct kobj_type slab_ktype = {
5225 .sysfs_ops = &slab_sysfs_ops, 5228 .sysfs_ops = &slab_sysfs_ops,
5226 .release = kmem_cache_release 5229 .release = kmem_cache_release
5227 }; 5230 };
5228 5231
5229 static int uevent_filter(struct kset *kset, struct kobject *kobj) 5232 static int uevent_filter(struct kset *kset, struct kobject *kobj)
5230 { 5233 {
5231 struct kobj_type *ktype = get_ktype(kobj); 5234 struct kobj_type *ktype = get_ktype(kobj);
5232 5235
5233 if (ktype == &slab_ktype) 5236 if (ktype == &slab_ktype)
5234 return 1; 5237 return 1;
5235 return 0; 5238 return 0;
5236 } 5239 }
5237 5240
5238 static const struct kset_uevent_ops slab_uevent_ops = { 5241 static const struct kset_uevent_ops slab_uevent_ops = {
5239 .filter = uevent_filter, 5242 .filter = uevent_filter,
5240 }; 5243 };
5241 5244
5242 static struct kset *slab_kset; 5245 static struct kset *slab_kset;
5243 5246
5244 #define ID_STR_LENGTH 64 5247 #define ID_STR_LENGTH 64
5245 5248
5246 /* Create a unique string id for a slab cache: 5249 /* Create a unique string id for a slab cache:
5247 * 5250 *
5248 * Format :[flags-]size 5251 * Format :[flags-]size
5249 */ 5252 */
5250 static char *create_unique_id(struct kmem_cache *s) 5253 static char *create_unique_id(struct kmem_cache *s)
5251 { 5254 {
5252 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); 5255 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5253 char *p = name; 5256 char *p = name;
5254 5257
5255 BUG_ON(!name); 5258 BUG_ON(!name);
5256 5259
5257 *p++ = ':'; 5260 *p++ = ':';
5258 /* 5261 /*
5259 * First flags affecting slabcache operations. We will only 5262 * First flags affecting slabcache operations. We will only
5260 * get here for aliasable slabs so we do not need to support 5263 * get here for aliasable slabs so we do not need to support
5261 * too many flags. The flags here must cover all flags that 5264 * too many flags. The flags here must cover all flags that
5262 * are matched during merging to guarantee that the id is 5265 * are matched during merging to guarantee that the id is
5263 * unique. 5266 * unique.
5264 */ 5267 */
5265 if (s->flags & SLAB_CACHE_DMA) 5268 if (s->flags & SLAB_CACHE_DMA)
5266 *p++ = 'd'; 5269 *p++ = 'd';
5267 if (s->flags & SLAB_RECLAIM_ACCOUNT) 5270 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5268 *p++ = 'a'; 5271 *p++ = 'a';
5269 if (s->flags & SLAB_DEBUG_FREE) 5272 if (s->flags & SLAB_DEBUG_FREE)
5270 *p++ = 'F'; 5273 *p++ = 'F';
5271 if (!(s->flags & SLAB_NOTRACK)) 5274 if (!(s->flags & SLAB_NOTRACK))
5272 *p++ = 't'; 5275 *p++ = 't';
5273 if (p != name + 1) 5276 if (p != name + 1)
5274 *p++ = '-'; 5277 *p++ = '-';
5275 p += sprintf(p, "%07d", s->size); 5278 p += sprintf(p, "%07d", s->size);
5276 BUG_ON(p > name + ID_STR_LENGTH - 1); 5279 BUG_ON(p > name + ID_STR_LENGTH - 1);
5277 return name; 5280 return name;
5278 } 5281 }
5279 5282
5280 static int sysfs_slab_add(struct kmem_cache *s) 5283 static int sysfs_slab_add(struct kmem_cache *s)
5281 { 5284 {
5282 int err; 5285 int err;
5283 const char *name; 5286 const char *name;
5284 int unmergeable; 5287 int unmergeable;
5285 5288
5286 if (slab_state < SYSFS) 5289 if (slab_state < SYSFS)
5287 /* Defer until later */ 5290 /* Defer until later */
5288 return 0; 5291 return 0;
5289 5292
5290 unmergeable = slab_unmergeable(s); 5293 unmergeable = slab_unmergeable(s);
5291 if (unmergeable) { 5294 if (unmergeable) {
5292 /* 5295 /*
5293 * Slabcache can never be merged so we can use the name proper. 5296 * Slabcache can never be merged so we can use the name proper.
5294 * This is typically the case for debug situations. In that 5297 * This is typically the case for debug situations. In that
5295 * case we can catch duplicate names easily. 5298 * case we can catch duplicate names easily.
5296 */ 5299 */
5297 sysfs_remove_link(&slab_kset->kobj, s->name); 5300 sysfs_remove_link(&slab_kset->kobj, s->name);
5298 name = s->name; 5301 name = s->name;
5299 } else { 5302 } else {
5300 /* 5303 /*
5301 * Create a unique name for the slab as a target 5304 * Create a unique name for the slab as a target
5302 * for the symlinks. 5305 * for the symlinks.
5303 */ 5306 */
5304 name = create_unique_id(s); 5307 name = create_unique_id(s);
5305 } 5308 }
5306 5309
5307 s->kobj.kset = slab_kset; 5310 s->kobj.kset = slab_kset;
5308 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name); 5311 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5309 if (err) { 5312 if (err) {
5310 kobject_put(&s->kobj); 5313 kobject_put(&s->kobj);
5311 return err; 5314 return err;
5312 } 5315 }
5313 5316
5314 err = sysfs_create_group(&s->kobj, &slab_attr_group); 5317 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5315 if (err) { 5318 if (err) {
5316 kobject_del(&s->kobj); 5319 kobject_del(&s->kobj);
5317 kobject_put(&s->kobj); 5320 kobject_put(&s->kobj);
5318 return err; 5321 return err;
5319 } 5322 }
5320 kobject_uevent(&s->kobj, KOBJ_ADD); 5323 kobject_uevent(&s->kobj, KOBJ_ADD);
5321 if (!unmergeable) { 5324 if (!unmergeable) {
5322 /* Setup first alias */ 5325 /* Setup first alias */
5323 sysfs_slab_alias(s, s->name); 5326 sysfs_slab_alias(s, s->name);
5324 kfree(name); 5327 kfree(name);
5325 } 5328 }
5326 return 0; 5329 return 0;
5327 } 5330 }
5328 5331
5329 static void sysfs_slab_remove(struct kmem_cache *s) 5332 static void sysfs_slab_remove(struct kmem_cache *s)
5330 { 5333 {
5331 if (slab_state < SYSFS) 5334 if (slab_state < SYSFS)
5332 /* 5335 /*
5333 * Sysfs has not been setup yet so no need to remove the 5336 * Sysfs has not been setup yet so no need to remove the
5334 * cache from sysfs. 5337 * cache from sysfs.
5335 */ 5338 */
5336 return; 5339 return;
5337 5340
5338 kobject_uevent(&s->kobj, KOBJ_REMOVE); 5341 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5339 kobject_del(&s->kobj); 5342 kobject_del(&s->kobj);
5340 kobject_put(&s->kobj); 5343 kobject_put(&s->kobj);
5341 } 5344 }
5342 5345
5343 /* 5346 /*
5344 * Need to buffer aliases during bootup until sysfs becomes 5347 * Need to buffer aliases during bootup until sysfs becomes
5345 * available lest we lose that information. 5348 * available lest we lose that information.
5346 */ 5349 */
5347 struct saved_alias { 5350 struct saved_alias {
5348 struct kmem_cache *s; 5351 struct kmem_cache *s;
5349 const char *name; 5352 const char *name;
5350 struct saved_alias *next; 5353 struct saved_alias *next;
5351 }; 5354 };
5352 5355
5353 static struct saved_alias *alias_list; 5356 static struct saved_alias *alias_list;
5354 5357
5355 static int sysfs_slab_alias(struct kmem_cache *s, const char *name) 5358 static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5356 { 5359 {
5357 struct saved_alias *al; 5360 struct saved_alias *al;
5358 5361
5359 if (slab_state == SYSFS) { 5362 if (slab_state == SYSFS) {
5360 /* 5363 /*
5361 * If we have a leftover link then remove it. 5364 * If we have a leftover link then remove it.
5362 */ 5365 */
5363 sysfs_remove_link(&slab_kset->kobj, name); 5366 sysfs_remove_link(&slab_kset->kobj, name);
5364 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name); 5367 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5365 } 5368 }
5366 5369
5367 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); 5370 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5368 if (!al) 5371 if (!al)
5369 return -ENOMEM; 5372 return -ENOMEM;
5370 5373
5371 al->s = s; 5374 al->s = s;
5372 al->name = name; 5375 al->name = name;
5373 al->next = alias_list; 5376 al->next = alias_list;
5374 alias_list = al; 5377 alias_list = al;
5375 return 0; 5378 return 0;
5376 } 5379 }
5377 5380
5378 static int __init slab_sysfs_init(void) 5381 static int __init slab_sysfs_init(void)
5379 { 5382 {
5380 struct kmem_cache *s; 5383 struct kmem_cache *s;
5381 int err; 5384 int err;
5382 5385
5383 down_write(&slub_lock); 5386 down_write(&slub_lock);
5384 5387
5385 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 5388 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5386 if (!slab_kset) { 5389 if (!slab_kset) {
5387 up_write(&slub_lock); 5390 up_write(&slub_lock);
5388 printk(KERN_ERR "Cannot register slab subsystem.\n"); 5391 printk(KERN_ERR "Cannot register slab subsystem.\n");
5389 return -ENOSYS; 5392 return -ENOSYS;
5390 } 5393 }
5391 5394
5392 slab_state = SYSFS; 5395 slab_state = SYSFS;
5393 5396
5394 list_for_each_entry(s, &slab_caches, list) { 5397 list_for_each_entry(s, &slab_caches, list) {
5395 err = sysfs_slab_add(s); 5398 err = sysfs_slab_add(s);
5396 if (err) 5399 if (err)
5397 printk(KERN_ERR "SLUB: Unable to add boot slab %s" 5400 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5398 " to sysfs\n", s->name); 5401 " to sysfs\n", s->name);
5399 } 5402 }
5400 5403
5401 while (alias_list) { 5404 while (alias_list) {
5402 struct saved_alias *al = alias_list; 5405 struct saved_alias *al = alias_list;
5403 5406
5404 alias_list = alias_list->next; 5407 alias_list = alias_list->next;
5405 err = sysfs_slab_alias(al->s, al->name); 5408 err = sysfs_slab_alias(al->s, al->name);
5406 if (err) 5409 if (err)
5407 printk(KERN_ERR "SLUB: Unable to add boot slab alias" 5410 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5408 " %s to sysfs\n", s->name); 5411 " %s to sysfs\n", s->name);
5409 kfree(al); 5412 kfree(al);
5410 } 5413 }
5411 5414
5412 up_write(&slub_lock); 5415 up_write(&slub_lock);
5413 resiliency_test(); 5416 resiliency_test();
5414 return 0; 5417 return 0;
5415 } 5418 }
5416 5419
5417 __initcall(slab_sysfs_init); 5420 __initcall(slab_sysfs_init);
5418 #endif /* CONFIG_SYSFS */ 5421 #endif /* CONFIG_SYSFS */
5419 5422
5420 /* 5423 /*
5421 * The /proc/slabinfo ABI 5424 * The /proc/slabinfo ABI
5422 */ 5425 */
5423 #ifdef CONFIG_SLABINFO 5426 #ifdef CONFIG_SLABINFO
5424 static void print_slabinfo_header(struct seq_file *m) 5427 static void print_slabinfo_header(struct seq_file *m)
5425 { 5428 {
5426 seq_puts(m, "slabinfo - version: 2.1\n"); 5429 seq_puts(m, "slabinfo - version: 2.1\n");
5427 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 5430 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
5428 "<objperslab> <pagesperslab>"); 5431 "<objperslab> <pagesperslab>");
5429 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 5432 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5430 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 5433 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
5431 seq_putc(m, '\n'); 5434 seq_putc(m, '\n');
5432 } 5435 }
5433 5436
5434 static void *s_start(struct seq_file *m, loff_t *pos) 5437 static void *s_start(struct seq_file *m, loff_t *pos)
5435 { 5438 {
5436 loff_t n = *pos; 5439 loff_t n = *pos;
5437 5440
5438 down_read(&slub_lock); 5441 down_read(&slub_lock);
5439 if (!n) 5442 if (!n)
5440 print_slabinfo_header(m); 5443 print_slabinfo_header(m);
5441 5444
5442 return seq_list_start(&slab_caches, *pos); 5445 return seq_list_start(&slab_caches, *pos);
5443 } 5446 }
5444 5447
5445 static void *s_next(struct seq_file *m, void *p, loff_t *pos) 5448 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5446 { 5449 {
5447 return seq_list_next(p, &slab_caches, pos); 5450 return seq_list_next(p, &slab_caches, pos);
5448 } 5451 }
5449 5452
5450 static void s_stop(struct seq_file *m, void *p) 5453 static void s_stop(struct seq_file *m, void *p)
5451 { 5454 {
5452 up_read(&slub_lock); 5455 up_read(&slub_lock);
5453 } 5456 }
5454 5457
5455 static int s_show(struct seq_file *m, void *p) 5458 static int s_show(struct seq_file *m, void *p)
5456 { 5459 {
5457 unsigned long nr_partials = 0; 5460 unsigned long nr_partials = 0;
5458 unsigned long nr_slabs = 0; 5461 unsigned long nr_slabs = 0;
5459 unsigned long nr_inuse = 0; 5462 unsigned long nr_inuse = 0;
5460 unsigned long nr_objs = 0; 5463 unsigned long nr_objs = 0;
5461 unsigned long nr_free = 0; 5464 unsigned long nr_free = 0;
5462 struct kmem_cache *s; 5465 struct kmem_cache *s;
5463 int node; 5466 int node;
5464 5467
5465 s = list_entry(p, struct kmem_cache, list); 5468 s = list_entry(p, struct kmem_cache, list);
5466 5469
5467 for_each_online_node(node) { 5470 for_each_online_node(node) {
5468 struct kmem_cache_node *n = get_node(s, node); 5471 struct kmem_cache_node *n = get_node(s, node);
5469 5472
5470 if (!n) 5473 if (!n)
5471 continue; 5474 continue;
5472 5475
5473 nr_partials += n->nr_partial; 5476 nr_partials += n->nr_partial;
5474 nr_slabs += atomic_long_read(&n->nr_slabs); 5477 nr_slabs += atomic_long_read(&n->nr_slabs);
5475 nr_objs += atomic_long_read(&n->total_objects); 5478 nr_objs += atomic_long_read(&n->total_objects);
5476 nr_free += count_partial(n, count_free); 5479 nr_free += count_partial(n, count_free);
5477 } 5480 }
5478 5481
5479 nr_inuse = nr_objs - nr_free; 5482 nr_inuse = nr_objs - nr_free;
5480 5483
5481 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, 5484 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
5482 nr_objs, s->size, oo_objects(s->oo), 5485 nr_objs, s->size, oo_objects(s->oo),
5483 (1 << oo_order(s->oo))); 5486 (1 << oo_order(s->oo)));
5484 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); 5487 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
5485 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, 5488 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
5486 0UL); 5489 0UL);
5487 seq_putc(m, '\n'); 5490 seq_putc(m, '\n');
5488 return 0; 5491 return 0;
5489 } 5492 }
5490 5493
5491 static const struct seq_operations slabinfo_op = { 5494 static const struct seq_operations slabinfo_op = {
5492 .start = s_start, 5495 .start = s_start,
5493 .next = s_next, 5496 .next = s_next,
5494 .stop = s_stop, 5497 .stop = s_stop,
5495 .show = s_show, 5498 .show = s_show,
5496 }; 5499 };
5497 5500
5498 static int slabinfo_open(struct inode *inode, struct file *file) 5501 static int slabinfo_open(struct inode *inode, struct file *file)
5499 { 5502 {
5500 return seq_open(file, &slabinfo_op); 5503 return seq_open(file, &slabinfo_op);
5501 } 5504 }
5502 5505
5503 static const struct file_operations proc_slabinfo_operations = { 5506 static const struct file_operations proc_slabinfo_operations = {
5504 .open = slabinfo_open, 5507 .open = slabinfo_open,
5505 .read = seq_read, 5508 .read = seq_read,
5506 .llseek = seq_lseek, 5509 .llseek = seq_lseek,
5507 .release = seq_release, 5510 .release = seq_release,
5508 }; 5511 };
5509 5512
5510 static int __init slab_proc_init(void) 5513 static int __init slab_proc_init(void)
5511 { 5514 {
5512 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); 5515 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5513 return 0; 5516 return 0;
5514 } 5517 }
5515 module_init(slab_proc_init); 5518 module_init(slab_proc_init);