Commit a5c43dae7ae38c2a6b3e9a819bcf45f010bf6a4a

Authored by Alexey Dobriyan
Committed by Linus Torvalds
1 parent 9d65cb4a17

Fix race between cat /proc/slab_allocators and rmmod

Same story as with cat /proc/*/wchan race vs rmmod race, only
/proc/slab_allocators want more info than just symbol name.

Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 61 additions and 7 deletions Inline Diff

include/linux/kallsyms.h
1 /* Rewritten and vastly simplified by Rusty Russell for in-kernel 1 /* Rewritten and vastly simplified by Rusty Russell for in-kernel
2 * module loader: 2 * module loader:
3 * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 3 * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 */ 4 */
5 #ifndef _LINUX_KALLSYMS_H 5 #ifndef _LINUX_KALLSYMS_H
6 #define _LINUX_KALLSYMS_H 6 #define _LINUX_KALLSYMS_H
7 7
8 8
9 #define KSYM_NAME_LEN 127 9 #define KSYM_NAME_LEN 127
10 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \ 10 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \
11 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1) 11 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1)
12 12
13 #ifdef CONFIG_KALLSYMS 13 #ifdef CONFIG_KALLSYMS
14 /* Lookup the address for a symbol. Returns 0 if not found. */ 14 /* Lookup the address for a symbol. Returns 0 if not found. */
15 unsigned long kallsyms_lookup_name(const char *name); 15 unsigned long kallsyms_lookup_name(const char *name);
16 16
17 extern int kallsyms_lookup_size_offset(unsigned long addr, 17 extern int kallsyms_lookup_size_offset(unsigned long addr,
18 unsigned long *symbolsize, 18 unsigned long *symbolsize,
19 unsigned long *offset); 19 unsigned long *offset);
20 20
21 /* Lookup an address. modname is set to NULL if it's in the kernel. */ 21 /* Lookup an address. modname is set to NULL if it's in the kernel. */
22 const char *kallsyms_lookup(unsigned long addr, 22 const char *kallsyms_lookup(unsigned long addr,
23 unsigned long *symbolsize, 23 unsigned long *symbolsize,
24 unsigned long *offset, 24 unsigned long *offset,
25 char **modname, char *namebuf); 25 char **modname, char *namebuf);
26 26
27 /* Look up a kernel symbol and return it in a text buffer. */ 27 /* Look up a kernel symbol and return it in a text buffer. */
28 extern int sprint_symbol(char *buffer, unsigned long address); 28 extern int sprint_symbol(char *buffer, unsigned long address);
29 29
30 /* Look up a kernel symbol and print it to the kernel messages. */ 30 /* Look up a kernel symbol and print it to the kernel messages. */
31 extern void __print_symbol(const char *fmt, unsigned long address); 31 extern void __print_symbol(const char *fmt, unsigned long address);
32 32
33 int lookup_symbol_name(unsigned long addr, char *symname); 33 int lookup_symbol_name(unsigned long addr, char *symname);
34 int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
34 35
35 #else /* !CONFIG_KALLSYMS */ 36 #else /* !CONFIG_KALLSYMS */
36 37
37 static inline unsigned long kallsyms_lookup_name(const char *name) 38 static inline unsigned long kallsyms_lookup_name(const char *name)
38 { 39 {
39 return 0; 40 return 0;
40 } 41 }
41 42
42 static inline int kallsyms_lookup_size_offset(unsigned long addr, 43 static inline int kallsyms_lookup_size_offset(unsigned long addr,
43 unsigned long *symbolsize, 44 unsigned long *symbolsize,
44 unsigned long *offset) 45 unsigned long *offset)
45 { 46 {
46 return 0; 47 return 0;
47 } 48 }
48 49
49 static inline const char *kallsyms_lookup(unsigned long addr, 50 static inline const char *kallsyms_lookup(unsigned long addr,
50 unsigned long *symbolsize, 51 unsigned long *symbolsize,
51 unsigned long *offset, 52 unsigned long *offset,
52 char **modname, char *namebuf) 53 char **modname, char *namebuf)
53 { 54 {
54 return NULL; 55 return NULL;
55 } 56 }
56 57
57 static inline int sprint_symbol(char *buffer, unsigned long addr) 58 static inline int sprint_symbol(char *buffer, unsigned long addr)
58 { 59 {
59 *buffer = '\0'; 60 *buffer = '\0';
60 return 0; 61 return 0;
61 } 62 }
62 63
63 static inline int lookup_symbol_name(unsigned long addr, char *symname) 64 static inline int lookup_symbol_name(unsigned long addr, char *symname)
65 {
66 return -ERANGE;
67 }
68
69 static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
64 { 70 {
65 return -ERANGE; 71 return -ERANGE;
66 } 72 }
67 73
68 /* Stupid that this does nothing, but I didn't create this mess. */ 74 /* Stupid that this does nothing, but I didn't create this mess. */
69 #define __print_symbol(fmt, addr) 75 #define __print_symbol(fmt, addr)
70 #endif /*CONFIG_KALLSYMS*/ 76 #endif /*CONFIG_KALLSYMS*/
71 77
72 /* This macro allows us to keep printk typechecking */ 78 /* This macro allows us to keep printk typechecking */
73 static void __check_printsym_format(const char *fmt, ...) 79 static void __check_printsym_format(const char *fmt, ...)
74 __attribute__((format(printf,1,2))); 80 __attribute__((format(printf,1,2)));
75 static inline void __check_printsym_format(const char *fmt, ...) 81 static inline void __check_printsym_format(const char *fmt, ...)
76 { 82 {
77 } 83 }
78 /* ia64 and ppc64 use function descriptors, which contain the real address */ 84 /* ia64 and ppc64 use function descriptors, which contain the real address */
79 #if defined(CONFIG_IA64) || defined(CONFIG_PPC64) 85 #if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
80 #define print_fn_descriptor_symbol(fmt, addr) \ 86 #define print_fn_descriptor_symbol(fmt, addr) \
81 do { \ 87 do { \
82 unsigned long *__faddr = (unsigned long*) addr; \ 88 unsigned long *__faddr = (unsigned long*) addr; \
83 print_symbol(fmt, __faddr[0]); \ 89 print_symbol(fmt, __faddr[0]); \
84 } while (0) 90 } while (0)
85 #else 91 #else
86 #define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr) 92 #define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr)
87 #endif 93 #endif
88 94
89 static inline void print_symbol(const char *fmt, unsigned long addr) 95 static inline void print_symbol(const char *fmt, unsigned long addr)
90 { 96 {
91 __check_printsym_format(fmt, ""); 97 __check_printsym_format(fmt, "");
92 __print_symbol(fmt, (unsigned long) 98 __print_symbol(fmt, (unsigned long)
93 __builtin_extract_return_addr((void *)addr)); 99 __builtin_extract_return_addr((void *)addr));
94 } 100 }
95 101
96 #ifndef CONFIG_64BIT 102 #ifndef CONFIG_64BIT
97 #define print_ip_sym(ip) \ 103 #define print_ip_sym(ip) \
98 do { \ 104 do { \
99 printk("[<%08lx>]", ip); \ 105 printk("[<%08lx>]", ip); \
100 print_symbol(" %s\n", ip); \ 106 print_symbol(" %s\n", ip); \
101 } while(0) 107 } while(0)
102 #else 108 #else
103 #define print_ip_sym(ip) \ 109 #define print_ip_sym(ip) \
104 do { \ 110 do { \
105 printk("[<%016lx>]", ip); \ 111 printk("[<%016lx>]", ip); \
106 print_symbol(" %s\n", ip); \ 112 print_symbol(" %s\n", ip); \
107 } while(0) 113 } while(0)
108 #endif 114 #endif
109 115
110 #endif /*_LINUX_KALLSYMS_H*/ 116 #endif /*_LINUX_KALLSYMS_H*/
111 117
include/linux/module.h
1 #ifndef _LINUX_MODULE_H 1 #ifndef _LINUX_MODULE_H
2 #define _LINUX_MODULE_H 2 #define _LINUX_MODULE_H
3 /* 3 /*
4 * Dynamic loading of modules into the kernel. 4 * Dynamic loading of modules into the kernel.
5 * 5 *
6 * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 6 * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
7 * Rewritten again by Rusty Russell, 2002 7 * Rewritten again by Rusty Russell, 2002
8 */ 8 */
9 #include <linux/spinlock.h> 9 #include <linux/spinlock.h>
10 #include <linux/list.h> 10 #include <linux/list.h>
11 #include <linux/stat.h> 11 #include <linux/stat.h>
12 #include <linux/compiler.h> 12 #include <linux/compiler.h>
13 #include <linux/cache.h> 13 #include <linux/cache.h>
14 #include <linux/kmod.h> 14 #include <linux/kmod.h>
15 #include <linux/elf.h> 15 #include <linux/elf.h>
16 #include <linux/stringify.h> 16 #include <linux/stringify.h>
17 #include <linux/kobject.h> 17 #include <linux/kobject.h>
18 #include <linux/moduleparam.h> 18 #include <linux/moduleparam.h>
19 #include <asm/local.h> 19 #include <asm/local.h>
20 20
21 #include <asm/module.h> 21 #include <asm/module.h>
22 22
23 /* Not Yet Implemented */ 23 /* Not Yet Implemented */
24 #define MODULE_SUPPORTED_DEVICE(name) 24 #define MODULE_SUPPORTED_DEVICE(name)
25 25
26 /* v850 toolchain uses a `_' prefix for all user symbols */ 26 /* v850 toolchain uses a `_' prefix for all user symbols */
27 #ifndef MODULE_SYMBOL_PREFIX 27 #ifndef MODULE_SYMBOL_PREFIX
28 #define MODULE_SYMBOL_PREFIX "" 28 #define MODULE_SYMBOL_PREFIX ""
29 #endif 29 #endif
30 30
31 #define MODULE_NAME_LEN (64 - sizeof(unsigned long)) 31 #define MODULE_NAME_LEN (64 - sizeof(unsigned long))
32 32
33 struct kernel_symbol 33 struct kernel_symbol
34 { 34 {
35 unsigned long value; 35 unsigned long value;
36 const char *name; 36 const char *name;
37 }; 37 };
38 38
39 struct modversion_info 39 struct modversion_info
40 { 40 {
41 unsigned long crc; 41 unsigned long crc;
42 char name[MODULE_NAME_LEN]; 42 char name[MODULE_NAME_LEN];
43 }; 43 };
44 44
45 struct module; 45 struct module;
46 46
47 struct module_attribute { 47 struct module_attribute {
48 struct attribute attr; 48 struct attribute attr;
49 ssize_t (*show)(struct module_attribute *, struct module *, char *); 49 ssize_t (*show)(struct module_attribute *, struct module *, char *);
50 ssize_t (*store)(struct module_attribute *, struct module *, 50 ssize_t (*store)(struct module_attribute *, struct module *,
51 const char *, size_t count); 51 const char *, size_t count);
52 void (*setup)(struct module *, const char *); 52 void (*setup)(struct module *, const char *);
53 int (*test)(struct module *); 53 int (*test)(struct module *);
54 void (*free)(struct module *); 54 void (*free)(struct module *);
55 }; 55 };
56 56
57 struct module_kobject 57 struct module_kobject
58 { 58 {
59 struct kobject kobj; 59 struct kobject kobj;
60 struct module *mod; 60 struct module *mod;
61 struct kobject *drivers_dir; 61 struct kobject *drivers_dir;
62 }; 62 };
63 63
64 /* These are either module local, or the kernel's dummy ones. */ 64 /* These are either module local, or the kernel's dummy ones. */
65 extern int init_module(void); 65 extern int init_module(void);
66 extern void cleanup_module(void); 66 extern void cleanup_module(void);
67 67
68 /* Archs provide a method of finding the correct exception table. */ 68 /* Archs provide a method of finding the correct exception table. */
69 struct exception_table_entry; 69 struct exception_table_entry;
70 70
71 const struct exception_table_entry * 71 const struct exception_table_entry *
72 search_extable(const struct exception_table_entry *first, 72 search_extable(const struct exception_table_entry *first,
73 const struct exception_table_entry *last, 73 const struct exception_table_entry *last,
74 unsigned long value); 74 unsigned long value);
75 void sort_extable(struct exception_table_entry *start, 75 void sort_extable(struct exception_table_entry *start,
76 struct exception_table_entry *finish); 76 struct exception_table_entry *finish);
77 void sort_main_extable(void); 77 void sort_main_extable(void);
78 78
79 #ifdef MODULE 79 #ifdef MODULE
80 #define MODULE_GENERIC_TABLE(gtype,name) \ 80 #define MODULE_GENERIC_TABLE(gtype,name) \
81 extern const struct gtype##_id __mod_##gtype##_table \ 81 extern const struct gtype##_id __mod_##gtype##_table \
82 __attribute__ ((unused, alias(__stringify(name)))) 82 __attribute__ ((unused, alias(__stringify(name))))
83 83
84 extern struct module __this_module; 84 extern struct module __this_module;
85 #define THIS_MODULE (&__this_module) 85 #define THIS_MODULE (&__this_module)
86 #else /* !MODULE */ 86 #else /* !MODULE */
87 #define MODULE_GENERIC_TABLE(gtype,name) 87 #define MODULE_GENERIC_TABLE(gtype,name)
88 #define THIS_MODULE ((struct module *)0) 88 #define THIS_MODULE ((struct module *)0)
89 #endif 89 #endif
90 90
91 /* Generic info of form tag = "info" */ 91 /* Generic info of form tag = "info" */
92 #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) 92 #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
93 93
94 /* For userspace: you can also call me... */ 94 /* For userspace: you can also call me... */
95 #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) 95 #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
96 96
97 /* 97 /*
98 * The following license idents are currently accepted as indicating free 98 * The following license idents are currently accepted as indicating free
99 * software modules 99 * software modules
100 * 100 *
101 * "GPL" [GNU Public License v2 or later] 101 * "GPL" [GNU Public License v2 or later]
102 * "GPL v2" [GNU Public License v2] 102 * "GPL v2" [GNU Public License v2]
103 * "GPL and additional rights" [GNU Public License v2 rights and more] 103 * "GPL and additional rights" [GNU Public License v2 rights and more]
104 * "Dual BSD/GPL" [GNU Public License v2 104 * "Dual BSD/GPL" [GNU Public License v2
105 * or BSD license choice] 105 * or BSD license choice]
106 * "Dual MIT/GPL" [GNU Public License v2 106 * "Dual MIT/GPL" [GNU Public License v2
107 * or MIT license choice] 107 * or MIT license choice]
108 * "Dual MPL/GPL" [GNU Public License v2 108 * "Dual MPL/GPL" [GNU Public License v2
109 * or Mozilla license choice] 109 * or Mozilla license choice]
110 * 110 *
111 * The following other idents are available 111 * The following other idents are available
112 * 112 *
113 * "Proprietary" [Non free products] 113 * "Proprietary" [Non free products]
114 * 114 *
115 * There are dual licensed components, but when running with Linux it is the 115 * There are dual licensed components, but when running with Linux it is the
116 * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL 116 * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
117 * is a GPL combined work. 117 * is a GPL combined work.
118 * 118 *
119 * This exists for several reasons 119 * This exists for several reasons
120 * 1. So modinfo can show license info for users wanting to vet their setup 120 * 1. So modinfo can show license info for users wanting to vet their setup
121 * is free 121 * is free
122 * 2. So the community can ignore bug reports including proprietary modules 122 * 2. So the community can ignore bug reports including proprietary modules
123 * 3. So vendors can do likewise based on their own policies 123 * 3. So vendors can do likewise based on their own policies
124 */ 124 */
125 #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) 125 #define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
126 126
127 /* Author, ideally of form NAME <EMAIL>[, NAME <EMAIL>]*[ and NAME <EMAIL>] */ 127 /* Author, ideally of form NAME <EMAIL>[, NAME <EMAIL>]*[ and NAME <EMAIL>] */
128 #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) 128 #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
129 129
130 /* What your module does. */ 130 /* What your module does. */
131 #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) 131 #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
132 132
133 /* One for each parameter, describing how to use it. Some files do 133 /* One for each parameter, describing how to use it. Some files do
134 multiple of these per line, so can't just use MODULE_INFO. */ 134 multiple of these per line, so can't just use MODULE_INFO. */
135 #define MODULE_PARM_DESC(_parm, desc) \ 135 #define MODULE_PARM_DESC(_parm, desc) \
136 __MODULE_INFO(parm, _parm, #_parm ":" desc) 136 __MODULE_INFO(parm, _parm, #_parm ":" desc)
137 137
138 #define MODULE_DEVICE_TABLE(type,name) \ 138 #define MODULE_DEVICE_TABLE(type,name) \
139 MODULE_GENERIC_TABLE(type##_device,name) 139 MODULE_GENERIC_TABLE(type##_device,name)
140 140
141 /* Version of form [<epoch>:]<version>[-<extra-version>]. 141 /* Version of form [<epoch>:]<version>[-<extra-version>].
142 Or for CVS/RCS ID version, everything but the number is stripped. 142 Or for CVS/RCS ID version, everything but the number is stripped.
143 <epoch>: A (small) unsigned integer which allows you to start versions 143 <epoch>: A (small) unsigned integer which allows you to start versions
144 anew. If not mentioned, it's zero. eg. "2:1.0" is after 144 anew. If not mentioned, it's zero. eg. "2:1.0" is after
145 "1:2.0". 145 "1:2.0".
146 <version>: The <version> may contain only alphanumerics and the 146 <version>: The <version> may contain only alphanumerics and the
147 character `.'. Ordered by numeric sort for numeric parts, 147 character `.'. Ordered by numeric sort for numeric parts,
148 ascii sort for ascii parts (as per RPM or DEB algorithm). 148 ascii sort for ascii parts (as per RPM or DEB algorithm).
149 <extraversion>: Like <version>, but inserted for local 149 <extraversion>: Like <version>, but inserted for local
150 customizations, eg "rh3" or "rusty1". 150 customizations, eg "rh3" or "rusty1".
151 151
152 Using this automatically adds a checksum of the .c files and the 152 Using this automatically adds a checksum of the .c files and the
153 local headers in "srcversion". 153 local headers in "srcversion".
154 */ 154 */
155 #define MODULE_VERSION(_version) MODULE_INFO(version, _version) 155 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
156 156
157 /* Optional firmware file (or files) needed by the module 157 /* Optional firmware file (or files) needed by the module
158 * format is simply firmware file name. Multiple firmware 158 * format is simply firmware file name. Multiple firmware
159 * files require multiple MODULE_FIRMWARE() specifiers */ 159 * files require multiple MODULE_FIRMWARE() specifiers */
160 #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) 160 #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
161 161
162 /* Given an address, look for it in the exception tables */ 162 /* Given an address, look for it in the exception tables */
163 const struct exception_table_entry *search_exception_tables(unsigned long add); 163 const struct exception_table_entry *search_exception_tables(unsigned long add);
164 164
165 struct notifier_block; 165 struct notifier_block;
166 166
167 #ifdef CONFIG_MODULES 167 #ifdef CONFIG_MODULES
168 168
169 /* Get/put a kernel symbol (calls must be symmetric) */ 169 /* Get/put a kernel symbol (calls must be symmetric) */
170 void *__symbol_get(const char *symbol); 170 void *__symbol_get(const char *symbol);
171 void *__symbol_get_gpl(const char *symbol); 171 void *__symbol_get_gpl(const char *symbol);
172 #define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) 172 #define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x)))
173 173
174 #ifndef __GENKSYMS__ 174 #ifndef __GENKSYMS__
175 #ifdef CONFIG_MODVERSIONS 175 #ifdef CONFIG_MODVERSIONS
176 /* Mark the CRC weak since genksyms apparently decides not to 176 /* Mark the CRC weak since genksyms apparently decides not to
177 * generate a checksums for some symbols */ 177 * generate a checksums for some symbols */
178 #define __CRC_SYMBOL(sym, sec) \ 178 #define __CRC_SYMBOL(sym, sec) \
179 extern void *__crc_##sym __attribute__((weak)); \ 179 extern void *__crc_##sym __attribute__((weak)); \
180 static const unsigned long __kcrctab_##sym \ 180 static const unsigned long __kcrctab_##sym \
181 __attribute_used__ \ 181 __attribute_used__ \
182 __attribute__((section("__kcrctab" sec), unused)) \ 182 __attribute__((section("__kcrctab" sec), unused)) \
183 = (unsigned long) &__crc_##sym; 183 = (unsigned long) &__crc_##sym;
184 #else 184 #else
185 #define __CRC_SYMBOL(sym, sec) 185 #define __CRC_SYMBOL(sym, sec)
186 #endif 186 #endif
187 187
188 /* For every exported symbol, place a struct in the __ksymtab section */ 188 /* For every exported symbol, place a struct in the __ksymtab section */
189 #define __EXPORT_SYMBOL(sym, sec) \ 189 #define __EXPORT_SYMBOL(sym, sec) \
190 extern typeof(sym) sym; \ 190 extern typeof(sym) sym; \
191 __CRC_SYMBOL(sym, sec) \ 191 __CRC_SYMBOL(sym, sec) \
192 static const char __kstrtab_##sym[] \ 192 static const char __kstrtab_##sym[] \
193 __attribute__((section("__ksymtab_strings"))) \ 193 __attribute__((section("__ksymtab_strings"))) \
194 = MODULE_SYMBOL_PREFIX #sym; \ 194 = MODULE_SYMBOL_PREFIX #sym; \
195 static const struct kernel_symbol __ksymtab_##sym \ 195 static const struct kernel_symbol __ksymtab_##sym \
196 __attribute_used__ \ 196 __attribute_used__ \
197 __attribute__((section("__ksymtab" sec), unused)) \ 197 __attribute__((section("__ksymtab" sec), unused)) \
198 = { (unsigned long)&sym, __kstrtab_##sym } 198 = { (unsigned long)&sym, __kstrtab_##sym }
199 199
200 #define EXPORT_SYMBOL(sym) \ 200 #define EXPORT_SYMBOL(sym) \
201 __EXPORT_SYMBOL(sym, "") 201 __EXPORT_SYMBOL(sym, "")
202 202
203 #define EXPORT_SYMBOL_GPL(sym) \ 203 #define EXPORT_SYMBOL_GPL(sym) \
204 __EXPORT_SYMBOL(sym, "_gpl") 204 __EXPORT_SYMBOL(sym, "_gpl")
205 205
206 #define EXPORT_SYMBOL_GPL_FUTURE(sym) \ 206 #define EXPORT_SYMBOL_GPL_FUTURE(sym) \
207 __EXPORT_SYMBOL(sym, "_gpl_future") 207 __EXPORT_SYMBOL(sym, "_gpl_future")
208 208
209 209
210 #ifdef CONFIG_UNUSED_SYMBOLS 210 #ifdef CONFIG_UNUSED_SYMBOLS
211 #define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused") 211 #define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused")
212 #define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl") 212 #define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl")
213 #else 213 #else
214 #define EXPORT_UNUSED_SYMBOL(sym) 214 #define EXPORT_UNUSED_SYMBOL(sym)
215 #define EXPORT_UNUSED_SYMBOL_GPL(sym) 215 #define EXPORT_UNUSED_SYMBOL_GPL(sym)
216 #endif 216 #endif
217 217
218 #endif 218 #endif
219 219
220 struct module_ref 220 struct module_ref
221 { 221 {
222 local_t count; 222 local_t count;
223 } ____cacheline_aligned; 223 } ____cacheline_aligned;
224 224
225 enum module_state 225 enum module_state
226 { 226 {
227 MODULE_STATE_LIVE, 227 MODULE_STATE_LIVE,
228 MODULE_STATE_COMING, 228 MODULE_STATE_COMING,
229 MODULE_STATE_GOING, 229 MODULE_STATE_GOING,
230 }; 230 };
231 231
232 /* Similar stuff for section attributes. */ 232 /* Similar stuff for section attributes. */
233 struct module_sect_attr 233 struct module_sect_attr
234 { 234 {
235 struct module_attribute mattr; 235 struct module_attribute mattr;
236 char *name; 236 char *name;
237 unsigned long address; 237 unsigned long address;
238 }; 238 };
239 239
240 struct module_sect_attrs 240 struct module_sect_attrs
241 { 241 {
242 struct attribute_group grp; 242 struct attribute_group grp;
243 int nsections; 243 int nsections;
244 struct module_sect_attr attrs[0]; 244 struct module_sect_attr attrs[0];
245 }; 245 };
246 246
247 struct module_param_attrs; 247 struct module_param_attrs;
248 248
249 struct module 249 struct module
250 { 250 {
251 enum module_state state; 251 enum module_state state;
252 252
253 /* Member of list of modules */ 253 /* Member of list of modules */
254 struct list_head list; 254 struct list_head list;
255 255
256 /* Unique handle for this module */ 256 /* Unique handle for this module */
257 char name[MODULE_NAME_LEN]; 257 char name[MODULE_NAME_LEN];
258 258
259 /* Sysfs stuff. */ 259 /* Sysfs stuff. */
260 struct module_kobject mkobj; 260 struct module_kobject mkobj;
261 struct module_param_attrs *param_attrs; 261 struct module_param_attrs *param_attrs;
262 struct module_attribute *modinfo_attrs; 262 struct module_attribute *modinfo_attrs;
263 const char *version; 263 const char *version;
264 const char *srcversion; 264 const char *srcversion;
265 struct kobject *holders_dir; 265 struct kobject *holders_dir;
266 266
267 /* Exported symbols */ 267 /* Exported symbols */
268 const struct kernel_symbol *syms; 268 const struct kernel_symbol *syms;
269 unsigned int num_syms; 269 unsigned int num_syms;
270 const unsigned long *crcs; 270 const unsigned long *crcs;
271 271
272 /* GPL-only exported symbols. */ 272 /* GPL-only exported symbols. */
273 const struct kernel_symbol *gpl_syms; 273 const struct kernel_symbol *gpl_syms;
274 unsigned int num_gpl_syms; 274 unsigned int num_gpl_syms;
275 const unsigned long *gpl_crcs; 275 const unsigned long *gpl_crcs;
276 276
277 /* unused exported symbols. */ 277 /* unused exported symbols. */
278 const struct kernel_symbol *unused_syms; 278 const struct kernel_symbol *unused_syms;
279 unsigned int num_unused_syms; 279 unsigned int num_unused_syms;
280 const unsigned long *unused_crcs; 280 const unsigned long *unused_crcs;
281 /* GPL-only, unused exported symbols. */ 281 /* GPL-only, unused exported symbols. */
282 const struct kernel_symbol *unused_gpl_syms; 282 const struct kernel_symbol *unused_gpl_syms;
283 unsigned int num_unused_gpl_syms; 283 unsigned int num_unused_gpl_syms;
284 const unsigned long *unused_gpl_crcs; 284 const unsigned long *unused_gpl_crcs;
285 285
286 /* symbols that will be GPL-only in the near future. */ 286 /* symbols that will be GPL-only in the near future. */
287 const struct kernel_symbol *gpl_future_syms; 287 const struct kernel_symbol *gpl_future_syms;
288 unsigned int num_gpl_future_syms; 288 unsigned int num_gpl_future_syms;
289 const unsigned long *gpl_future_crcs; 289 const unsigned long *gpl_future_crcs;
290 290
291 /* Exception table */ 291 /* Exception table */
292 unsigned int num_exentries; 292 unsigned int num_exentries;
293 const struct exception_table_entry *extable; 293 const struct exception_table_entry *extable;
294 294
295 /* Startup function. */ 295 /* Startup function. */
296 int (*init)(void); 296 int (*init)(void);
297 297
298 /* If this is non-NULL, vfree after init() returns */ 298 /* If this is non-NULL, vfree after init() returns */
299 void *module_init; 299 void *module_init;
300 300
301 /* Here is the actual code + data, vfree'd on unload. */ 301 /* Here is the actual code + data, vfree'd on unload. */
302 void *module_core; 302 void *module_core;
303 303
304 /* Here are the sizes of the init and core sections */ 304 /* Here are the sizes of the init and core sections */
305 unsigned long init_size, core_size; 305 unsigned long init_size, core_size;
306 306
307 /* The size of the executable code in each section. */ 307 /* The size of the executable code in each section. */
308 unsigned long init_text_size, core_text_size; 308 unsigned long init_text_size, core_text_size;
309 309
310 /* The handle returned from unwind_add_table. */ 310 /* The handle returned from unwind_add_table. */
311 void *unwind_info; 311 void *unwind_info;
312 312
313 /* Arch-specific module values */ 313 /* Arch-specific module values */
314 struct mod_arch_specific arch; 314 struct mod_arch_specific arch;
315 315
316 /* Am I unsafe to unload? */ 316 /* Am I unsafe to unload? */
317 int unsafe; 317 int unsafe;
318 318
319 unsigned int taints; /* same bits as kernel:tainted */ 319 unsigned int taints; /* same bits as kernel:tainted */
320 320
321 #ifdef CONFIG_GENERIC_BUG 321 #ifdef CONFIG_GENERIC_BUG
322 /* Support for BUG */ 322 /* Support for BUG */
323 struct list_head bug_list; 323 struct list_head bug_list;
324 struct bug_entry *bug_table; 324 struct bug_entry *bug_table;
325 unsigned num_bugs; 325 unsigned num_bugs;
326 #endif 326 #endif
327 327
328 #ifdef CONFIG_MODULE_UNLOAD 328 #ifdef CONFIG_MODULE_UNLOAD
329 /* Reference counts */ 329 /* Reference counts */
330 struct module_ref ref[NR_CPUS]; 330 struct module_ref ref[NR_CPUS];
331 331
332 /* What modules depend on me? */ 332 /* What modules depend on me? */
333 struct list_head modules_which_use_me; 333 struct list_head modules_which_use_me;
334 334
335 /* Who is waiting for us to be unloaded */ 335 /* Who is waiting for us to be unloaded */
336 struct task_struct *waiter; 336 struct task_struct *waiter;
337 337
338 /* Destruction function. */ 338 /* Destruction function. */
339 void (*exit)(void); 339 void (*exit)(void);
340 #endif 340 #endif
341 341
342 #ifdef CONFIG_KALLSYMS 342 #ifdef CONFIG_KALLSYMS
343 /* We keep the symbol and string tables for kallsyms. */ 343 /* We keep the symbol and string tables for kallsyms. */
344 Elf_Sym *symtab; 344 Elf_Sym *symtab;
345 unsigned long num_symtab; 345 unsigned long num_symtab;
346 char *strtab; 346 char *strtab;
347 347
348 /* Section attributes */ 348 /* Section attributes */
349 struct module_sect_attrs *sect_attrs; 349 struct module_sect_attrs *sect_attrs;
350 #endif 350 #endif
351 351
352 /* Per-cpu data. */ 352 /* Per-cpu data. */
353 void *percpu; 353 void *percpu;
354 354
355 /* The command line arguments (may be mangled). People like 355 /* The command line arguments (may be mangled). People like
356 keeping pointers to this stuff */ 356 keeping pointers to this stuff */
357 char *args; 357 char *args;
358 }; 358 };
359 359
360 /* FIXME: It'd be nice to isolate modules during init, too, so they 360 /* FIXME: It'd be nice to isolate modules during init, too, so they
361 aren't used before they (may) fail. But presently too much code 361 aren't used before they (may) fail. But presently too much code
362 (IDE & SCSI) require entry into the module during init.*/ 362 (IDE & SCSI) require entry into the module during init.*/
363 static inline int module_is_live(struct module *mod) 363 static inline int module_is_live(struct module *mod)
364 { 364 {
365 return mod->state != MODULE_STATE_GOING; 365 return mod->state != MODULE_STATE_GOING;
366 } 366 }
367 367
368 /* Is this address in a module? (second is with no locks, for oops) */ 368 /* Is this address in a module? (second is with no locks, for oops) */
369 struct module *module_text_address(unsigned long addr); 369 struct module *module_text_address(unsigned long addr);
370 struct module *__module_text_address(unsigned long addr); 370 struct module *__module_text_address(unsigned long addr);
371 int is_module_address(unsigned long addr); 371 int is_module_address(unsigned long addr);
372 372
373 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if 373 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
374 symnum out of range. */ 374 symnum out of range. */
375 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, 375 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
376 char *name, char *module_name, int *exported); 376 char *name, char *module_name, int *exported);
377 377
378 /* Look for this name: can be of form module:name. */ 378 /* Look for this name: can be of form module:name. */
379 unsigned long module_kallsyms_lookup_name(const char *name); 379 unsigned long module_kallsyms_lookup_name(const char *name);
380 380
381 extern void __module_put_and_exit(struct module *mod, long code) 381 extern void __module_put_and_exit(struct module *mod, long code)
382 __attribute__((noreturn)); 382 __attribute__((noreturn));
383 #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code); 383 #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code);
384 384
385 #ifdef CONFIG_MODULE_UNLOAD 385 #ifdef CONFIG_MODULE_UNLOAD
386 unsigned int module_refcount(struct module *mod); 386 unsigned int module_refcount(struct module *mod);
387 void __symbol_put(const char *symbol); 387 void __symbol_put(const char *symbol);
388 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) 388 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
389 void symbol_put_addr(void *addr); 389 void symbol_put_addr(void *addr);
390 390
391 /* Sometimes we know we already have a refcount, and it's easier not 391 /* Sometimes we know we already have a refcount, and it's easier not
392 to handle the error case (which only happens with rmmod --wait). */ 392 to handle the error case (which only happens with rmmod --wait). */
393 static inline void __module_get(struct module *module) 393 static inline void __module_get(struct module *module)
394 { 394 {
395 if (module) { 395 if (module) {
396 BUG_ON(module_refcount(module) == 0); 396 BUG_ON(module_refcount(module) == 0);
397 local_inc(&module->ref[get_cpu()].count); 397 local_inc(&module->ref[get_cpu()].count);
398 put_cpu(); 398 put_cpu();
399 } 399 }
400 } 400 }
401 401
402 static inline int try_module_get(struct module *module) 402 static inline int try_module_get(struct module *module)
403 { 403 {
404 int ret = 1; 404 int ret = 1;
405 405
406 if (module) { 406 if (module) {
407 unsigned int cpu = get_cpu(); 407 unsigned int cpu = get_cpu();
408 if (likely(module_is_live(module))) 408 if (likely(module_is_live(module)))
409 local_inc(&module->ref[cpu].count); 409 local_inc(&module->ref[cpu].count);
410 else 410 else
411 ret = 0; 411 ret = 0;
412 put_cpu(); 412 put_cpu();
413 } 413 }
414 return ret; 414 return ret;
415 } 415 }
416 416
417 extern void module_put(struct module *module); 417 extern void module_put(struct module *module);
418 418
419 #else /*!CONFIG_MODULE_UNLOAD*/ 419 #else /*!CONFIG_MODULE_UNLOAD*/
420 static inline int try_module_get(struct module *module) 420 static inline int try_module_get(struct module *module)
421 { 421 {
422 return !module || module_is_live(module); 422 return !module || module_is_live(module);
423 } 423 }
424 static inline void module_put(struct module *module) 424 static inline void module_put(struct module *module)
425 { 425 {
426 } 426 }
427 static inline void __module_get(struct module *module) 427 static inline void __module_get(struct module *module)
428 { 428 {
429 } 429 }
430 #define symbol_put(x) do { } while(0) 430 #define symbol_put(x) do { } while(0)
431 #define symbol_put_addr(p) do { } while(0) 431 #define symbol_put_addr(p) do { } while(0)
432 432
433 #endif /* CONFIG_MODULE_UNLOAD */ 433 #endif /* CONFIG_MODULE_UNLOAD */
434 434
435 /* This is a #define so the string doesn't get put in every .o file */ 435 /* This is a #define so the string doesn't get put in every .o file */
436 #define module_name(mod) \ 436 #define module_name(mod) \
437 ({ \ 437 ({ \
438 struct module *__mod = (mod); \ 438 struct module *__mod = (mod); \
439 __mod ? __mod->name : "kernel"; \ 439 __mod ? __mod->name : "kernel"; \
440 }) 440 })
441 441
442 #define __unsafe(mod) \ 442 #define __unsafe(mod) \
443 do { \ 443 do { \
444 if (mod && !(mod)->unsafe) { \ 444 if (mod && !(mod)->unsafe) { \
445 printk(KERN_WARNING \ 445 printk(KERN_WARNING \
446 "Module %s cannot be unloaded due to unsafe usage in" \ 446 "Module %s cannot be unloaded due to unsafe usage in" \
447 " %s:%u\n", (mod)->name, __FILE__, __LINE__); \ 447 " %s:%u\n", (mod)->name, __FILE__, __LINE__); \
448 (mod)->unsafe = 1; \ 448 (mod)->unsafe = 1; \
449 } \ 449 } \
450 } while(0) 450 } while(0)
451 451
452 /* For kallsyms to ask for address resolution. NULL means not found. */ 452 /* For kallsyms to ask for address resolution. NULL means not found. */
453 const char *module_address_lookup(unsigned long addr, 453 const char *module_address_lookup(unsigned long addr,
454 unsigned long *symbolsize, 454 unsigned long *symbolsize,
455 unsigned long *offset, 455 unsigned long *offset,
456 char **modname); 456 char **modname);
457 int lookup_module_symbol_name(unsigned long addr, char *symname); 457 int lookup_module_symbol_name(unsigned long addr, char *symname);
458 int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
458 459
459 /* For extable.c to search modules' exception tables. */ 460 /* For extable.c to search modules' exception tables. */
460 const struct exception_table_entry *search_module_extables(unsigned long addr); 461 const struct exception_table_entry *search_module_extables(unsigned long addr);
461 462
462 int register_module_notifier(struct notifier_block * nb); 463 int register_module_notifier(struct notifier_block * nb);
463 int unregister_module_notifier(struct notifier_block * nb); 464 int unregister_module_notifier(struct notifier_block * nb);
464 465
465 extern void print_modules(void); 466 extern void print_modules(void);
466 467
467 #else /* !CONFIG_MODULES... */ 468 #else /* !CONFIG_MODULES... */
468 #define EXPORT_SYMBOL(sym) 469 #define EXPORT_SYMBOL(sym)
469 #define EXPORT_SYMBOL_GPL(sym) 470 #define EXPORT_SYMBOL_GPL(sym)
470 #define EXPORT_SYMBOL_GPL_FUTURE(sym) 471 #define EXPORT_SYMBOL_GPL_FUTURE(sym)
471 #define EXPORT_UNUSED_SYMBOL(sym) 472 #define EXPORT_UNUSED_SYMBOL(sym)
472 #define EXPORT_UNUSED_SYMBOL_GPL(sym) 473 #define EXPORT_UNUSED_SYMBOL_GPL(sym)
473 474
474 /* Given an address, look for it in the exception tables. */ 475 /* Given an address, look for it in the exception tables. */
475 static inline const struct exception_table_entry * 476 static inline const struct exception_table_entry *
476 search_module_extables(unsigned long addr) 477 search_module_extables(unsigned long addr)
477 { 478 {
478 return NULL; 479 return NULL;
479 } 480 }
480 481
481 /* Is this address in a module? */ 482 /* Is this address in a module? */
482 static inline struct module *module_text_address(unsigned long addr) 483 static inline struct module *module_text_address(unsigned long addr)
483 { 484 {
484 return NULL; 485 return NULL;
485 } 486 }
486 487
487 /* Is this address in a module? (don't take a lock, we're oopsing) */ 488 /* Is this address in a module? (don't take a lock, we're oopsing) */
488 static inline struct module *__module_text_address(unsigned long addr) 489 static inline struct module *__module_text_address(unsigned long addr)
489 { 490 {
490 return NULL; 491 return NULL;
491 } 492 }
492 493
493 static inline int is_module_address(unsigned long addr) 494 static inline int is_module_address(unsigned long addr)
494 { 495 {
495 return 0; 496 return 0;
496 } 497 }
497 498
498 /* Get/put a kernel symbol (calls should be symmetric) */ 499 /* Get/put a kernel symbol (calls should be symmetric) */
499 #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) 500 #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
500 #define symbol_put(x) do { } while(0) 501 #define symbol_put(x) do { } while(0)
501 #define symbol_put_addr(x) do { } while(0) 502 #define symbol_put_addr(x) do { } while(0)
502 503
503 static inline void __module_get(struct module *module) 504 static inline void __module_get(struct module *module)
504 { 505 {
505 } 506 }
506 507
507 static inline int try_module_get(struct module *module) 508 static inline int try_module_get(struct module *module)
508 { 509 {
509 return 1; 510 return 1;
510 } 511 }
511 512
512 static inline void module_put(struct module *module) 513 static inline void module_put(struct module *module)
513 { 514 {
514 } 515 }
515 516
516 #define module_name(mod) "kernel" 517 #define module_name(mod) "kernel"
517 518
518 #define __unsafe(mod) 519 #define __unsafe(mod)
519 520
520 /* For kallsyms to ask for address resolution. NULL means not found. */ 521 /* For kallsyms to ask for address resolution. NULL means not found. */
521 static inline const char *module_address_lookup(unsigned long addr, 522 static inline const char *module_address_lookup(unsigned long addr,
522 unsigned long *symbolsize, 523 unsigned long *symbolsize,
523 unsigned long *offset, 524 unsigned long *offset,
524 char **modname) 525 char **modname)
525 { 526 {
526 return NULL; 527 return NULL;
527 } 528 }
528 529
529 static inline int lookup_module_symbol_name(unsigned long addr, char *symname) 530 static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
531 {
532 return -ERANGE;
533 }
534
535 static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
530 { 536 {
531 return -ERANGE; 537 return -ERANGE;
532 } 538 }
533 539
534 static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, 540 static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
535 char *type, char *name, 541 char *type, char *name,
536 char *module_name, int *exported) 542 char *module_name, int *exported)
537 { 543 {
538 return -ERANGE; 544 return -ERANGE;
539 } 545 }
540 546
541 static inline unsigned long module_kallsyms_lookup_name(const char *name) 547 static inline unsigned long module_kallsyms_lookup_name(const char *name)
542 { 548 {
543 return 0; 549 return 0;
544 } 550 }
545 551
546 static inline int register_module_notifier(struct notifier_block * nb) 552 static inline int register_module_notifier(struct notifier_block * nb)
547 { 553 {
548 /* no events will happen anyway, so this can always succeed */ 554 /* no events will happen anyway, so this can always succeed */
549 return 0; 555 return 0;
550 } 556 }
551 557
552 static inline int unregister_module_notifier(struct notifier_block * nb) 558 static inline int unregister_module_notifier(struct notifier_block * nb)
553 { 559 {
554 return 0; 560 return 0;
555 } 561 }
556 562
557 #define module_put_and_exit(code) do_exit(code) 563 #define module_put_and_exit(code) do_exit(code)
558 564
559 static inline void print_modules(void) 565 static inline void print_modules(void)
560 { 566 {
561 } 567 }
562 568
563 #endif /* CONFIG_MODULES */ 569 #endif /* CONFIG_MODULES */
564 570
565 struct device_driver; 571 struct device_driver;
566 #ifdef CONFIG_SYSFS 572 #ifdef CONFIG_SYSFS
567 struct module; 573 struct module;
568 574
569 extern struct kset module_subsys; 575 extern struct kset module_subsys;
570 576
571 int mod_sysfs_init(struct module *mod); 577 int mod_sysfs_init(struct module *mod);
572 int mod_sysfs_setup(struct module *mod, 578 int mod_sysfs_setup(struct module *mod,
573 struct kernel_param *kparam, 579 struct kernel_param *kparam,
574 unsigned int num_params); 580 unsigned int num_params);
575 int module_add_modinfo_attrs(struct module *mod); 581 int module_add_modinfo_attrs(struct module *mod);
576 void module_remove_modinfo_attrs(struct module *mod); 582 void module_remove_modinfo_attrs(struct module *mod);
577 583
578 #else /* !CONFIG_SYSFS */ 584 #else /* !CONFIG_SYSFS */
579 585
580 static inline int mod_sysfs_init(struct module *mod) 586 static inline int mod_sysfs_init(struct module *mod)
581 { 587 {
582 return 0; 588 return 0;
583 } 589 }
584 590
585 static inline int mod_sysfs_setup(struct module *mod, 591 static inline int mod_sysfs_setup(struct module *mod,
586 struct kernel_param *kparam, 592 struct kernel_param *kparam,
587 unsigned int num_params) 593 unsigned int num_params)
588 { 594 {
589 return 0; 595 return 0;
590 } 596 }
591 597
592 static inline int module_add_modinfo_attrs(struct module *mod) 598 static inline int module_add_modinfo_attrs(struct module *mod)
593 { 599 {
594 return 0; 600 return 0;
595 } 601 }
596 602
597 static inline void module_remove_modinfo_attrs(struct module *mod) 603 static inline void module_remove_modinfo_attrs(struct module *mod)
598 { } 604 { }
599 605
600 #endif /* CONFIG_SYSFS */ 606 #endif /* CONFIG_SYSFS */
601 607
602 #if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES) 608 #if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES)
603 609
604 void module_add_driver(struct module *mod, struct device_driver *drv); 610 void module_add_driver(struct module *mod, struct device_driver *drv);
605 void module_remove_driver(struct device_driver *drv); 611 void module_remove_driver(struct device_driver *drv);
606 612
607 #else /* not both CONFIG_SYSFS && CONFIG_MODULES */ 613 #else /* not both CONFIG_SYSFS && CONFIG_MODULES */
608 614
609 static inline void module_add_driver(struct module *mod, struct device_driver *drv) 615 static inline void module_add_driver(struct module *mod, struct device_driver *drv)
610 { } 616 { }
611 617
612 static inline void module_remove_driver(struct device_driver *drv) 618 static inline void module_remove_driver(struct device_driver *drv)
613 { } 619 { }
614 620
615 #endif 621 #endif
616 622
617 #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) 623 #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)
618 624
619 /* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */ 625 /* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */
620 626
621 #define __MODULE_STRING(x) __stringify(x) 627 #define __MODULE_STRING(x) __stringify(x)
622 628
623 #endif /* _LINUX_MODULE_H */ 629 #endif /* _LINUX_MODULE_H */
624 630
1 /* 1 /*
2 * kallsyms.c: in-kernel printing of symbolic oopses and stack traces. 2 * kallsyms.c: in-kernel printing of symbolic oopses and stack traces.
3 * 3 *
4 * Rewritten and vastly simplified by Rusty Russell for in-kernel 4 * Rewritten and vastly simplified by Rusty Russell for in-kernel
5 * module loader: 5 * module loader:
6 * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 6 * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
7 * 7 *
8 * ChangeLog: 8 * ChangeLog:
9 * 9 *
10 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com> 10 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
11 * Changed the compression method from stem compression to "table lookup" 11 * Changed the compression method from stem compression to "table lookup"
12 * compression (see scripts/kallsyms.c for a more complete description) 12 * compression (see scripts/kallsyms.c for a more complete description)
13 */ 13 */
14 #include <linux/kallsyms.h> 14 #include <linux/kallsyms.h>
15 #include <linux/module.h> 15 #include <linux/module.h>
16 #include <linux/init.h> 16 #include <linux/init.h>
17 #include <linux/seq_file.h> 17 #include <linux/seq_file.h>
18 #include <linux/fs.h> 18 #include <linux/fs.h>
19 #include <linux/err.h> 19 #include <linux/err.h>
20 #include <linux/proc_fs.h> 20 #include <linux/proc_fs.h>
21 #include <linux/sched.h> /* for cond_resched */ 21 #include <linux/sched.h> /* for cond_resched */
22 #include <linux/mm.h> 22 #include <linux/mm.h>
23 #include <linux/ctype.h> 23 #include <linux/ctype.h>
24 24
25 #include <asm/sections.h> 25 #include <asm/sections.h>
26 26
27 #ifdef CONFIG_KALLSYMS_ALL 27 #ifdef CONFIG_KALLSYMS_ALL
28 #define all_var 1 28 #define all_var 1
29 #else 29 #else
30 #define all_var 0 30 #define all_var 0
31 #endif 31 #endif
32 32
33 /* These will be re-linked against their real values during the second link stage */ 33 /* These will be re-linked against their real values during the second link stage */
34 extern const unsigned long kallsyms_addresses[] __attribute__((weak)); 34 extern const unsigned long kallsyms_addresses[] __attribute__((weak));
35 extern const unsigned long kallsyms_num_syms __attribute__((weak)); 35 extern const unsigned long kallsyms_num_syms __attribute__((weak));
36 extern const u8 kallsyms_names[] __attribute__((weak)); 36 extern const u8 kallsyms_names[] __attribute__((weak));
37 37
38 extern const u8 kallsyms_token_table[] __attribute__((weak)); 38 extern const u8 kallsyms_token_table[] __attribute__((weak));
39 extern const u16 kallsyms_token_index[] __attribute__((weak)); 39 extern const u16 kallsyms_token_index[] __attribute__((weak));
40 40
41 extern const unsigned long kallsyms_markers[] __attribute__((weak)); 41 extern const unsigned long kallsyms_markers[] __attribute__((weak));
42 42
43 static inline int is_kernel_inittext(unsigned long addr) 43 static inline int is_kernel_inittext(unsigned long addr)
44 { 44 {
45 if (addr >= (unsigned long)_sinittext 45 if (addr >= (unsigned long)_sinittext
46 && addr <= (unsigned long)_einittext) 46 && addr <= (unsigned long)_einittext)
47 return 1; 47 return 1;
48 return 0; 48 return 0;
49 } 49 }
50 50
51 static inline int is_kernel_extratext(unsigned long addr) 51 static inline int is_kernel_extratext(unsigned long addr)
52 { 52 {
53 if (addr >= (unsigned long)_sextratext 53 if (addr >= (unsigned long)_sextratext
54 && addr <= (unsigned long)_eextratext) 54 && addr <= (unsigned long)_eextratext)
55 return 1; 55 return 1;
56 return 0; 56 return 0;
57 } 57 }
58 58
59 static inline int is_kernel_text(unsigned long addr) 59 static inline int is_kernel_text(unsigned long addr)
60 { 60 {
61 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) 61 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
62 return 1; 62 return 1;
63 return in_gate_area_no_task(addr); 63 return in_gate_area_no_task(addr);
64 } 64 }
65 65
66 static inline int is_kernel(unsigned long addr) 66 static inline int is_kernel(unsigned long addr)
67 { 67 {
68 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) 68 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
69 return 1; 69 return 1;
70 return in_gate_area_no_task(addr); 70 return in_gate_area_no_task(addr);
71 } 71 }
72 72
73 static int is_ksym_addr(unsigned long addr) 73 static int is_ksym_addr(unsigned long addr)
74 { 74 {
75 if (all_var) 75 if (all_var)
76 return is_kernel(addr); 76 return is_kernel(addr);
77 77
78 return is_kernel_text(addr) || is_kernel_inittext(addr) || 78 return is_kernel_text(addr) || is_kernel_inittext(addr) ||
79 is_kernel_extratext(addr); 79 is_kernel_extratext(addr);
80 } 80 }
81 81
82 /* expand a compressed symbol data into the resulting uncompressed string, 82 /* expand a compressed symbol data into the resulting uncompressed string,
83 given the offset to where the symbol is in the compressed stream */ 83 given the offset to where the symbol is in the compressed stream */
84 static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) 84 static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
85 { 85 {
86 int len, skipped_first = 0; 86 int len, skipped_first = 0;
87 const u8 *tptr, *data; 87 const u8 *tptr, *data;
88 88
89 /* get the compressed symbol length from the first symbol byte */ 89 /* get the compressed symbol length from the first symbol byte */
90 data = &kallsyms_names[off]; 90 data = &kallsyms_names[off];
91 len = *data; 91 len = *data;
92 data++; 92 data++;
93 93
94 /* update the offset to return the offset for the next symbol on 94 /* update the offset to return the offset for the next symbol on
95 * the compressed stream */ 95 * the compressed stream */
96 off += len + 1; 96 off += len + 1;
97 97
98 /* for every byte on the compressed symbol data, copy the table 98 /* for every byte on the compressed symbol data, copy the table
99 entry for that byte */ 99 entry for that byte */
100 while(len) { 100 while(len) {
101 tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ]; 101 tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
102 data++; 102 data++;
103 len--; 103 len--;
104 104
105 while (*tptr) { 105 while (*tptr) {
106 if(skipped_first) { 106 if(skipped_first) {
107 *result = *tptr; 107 *result = *tptr;
108 result++; 108 result++;
109 } else 109 } else
110 skipped_first = 1; 110 skipped_first = 1;
111 tptr++; 111 tptr++;
112 } 112 }
113 } 113 }
114 114
115 *result = '\0'; 115 *result = '\0';
116 116
117 /* return to offset to the next symbol */ 117 /* return to offset to the next symbol */
118 return off; 118 return off;
119 } 119 }
120 120
121 /* get symbol type information. This is encoded as a single char at the 121 /* get symbol type information. This is encoded as a single char at the
122 * begining of the symbol name */ 122 * begining of the symbol name */
123 static char kallsyms_get_symbol_type(unsigned int off) 123 static char kallsyms_get_symbol_type(unsigned int off)
124 { 124 {
125 /* get just the first code, look it up in the token table, and return the 125 /* get just the first code, look it up in the token table, and return the
126 * first char from this token */ 126 * first char from this token */
127 return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ]; 127 return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
128 } 128 }
129 129
130 130
131 /* find the offset on the compressed stream given and index in the 131 /* find the offset on the compressed stream given and index in the
132 * kallsyms array */ 132 * kallsyms array */
133 static unsigned int get_symbol_offset(unsigned long pos) 133 static unsigned int get_symbol_offset(unsigned long pos)
134 { 134 {
135 const u8 *name; 135 const u8 *name;
136 int i; 136 int i;
137 137
138 /* use the closest marker we have. We have markers every 256 positions, 138 /* use the closest marker we have. We have markers every 256 positions,
139 * so that should be close enough */ 139 * so that should be close enough */
140 name = &kallsyms_names[ kallsyms_markers[pos>>8] ]; 140 name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
141 141
142 /* sequentially scan all the symbols up to the point we're searching for. 142 /* sequentially scan all the symbols up to the point we're searching for.
143 * Every symbol is stored in a [<len>][<len> bytes of data] format, so we 143 * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
144 * just need to add the len to the current pointer for every symbol we 144 * just need to add the len to the current pointer for every symbol we
145 * wish to skip */ 145 * wish to skip */
146 for(i = 0; i < (pos&0xFF); i++) 146 for(i = 0; i < (pos&0xFF); i++)
147 name = name + (*name) + 1; 147 name = name + (*name) + 1;
148 148
149 return name - kallsyms_names; 149 return name - kallsyms_names;
150 } 150 }
151 151
152 /* Lookup the address for this symbol. Returns 0 if not found. */ 152 /* Lookup the address for this symbol. Returns 0 if not found. */
153 unsigned long kallsyms_lookup_name(const char *name) 153 unsigned long kallsyms_lookup_name(const char *name)
154 { 154 {
155 char namebuf[KSYM_NAME_LEN+1]; 155 char namebuf[KSYM_NAME_LEN+1];
156 unsigned long i; 156 unsigned long i;
157 unsigned int off; 157 unsigned int off;
158 158
159 for (i = 0, off = 0; i < kallsyms_num_syms; i++) { 159 for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
160 off = kallsyms_expand_symbol(off, namebuf); 160 off = kallsyms_expand_symbol(off, namebuf);
161 161
162 if (strcmp(namebuf, name) == 0) 162 if (strcmp(namebuf, name) == 0)
163 return kallsyms_addresses[i]; 163 return kallsyms_addresses[i];
164 } 164 }
165 return module_kallsyms_lookup_name(name); 165 return module_kallsyms_lookup_name(name);
166 } 166 }
167 167
168 static unsigned long get_symbol_pos(unsigned long addr, 168 static unsigned long get_symbol_pos(unsigned long addr,
169 unsigned long *symbolsize, 169 unsigned long *symbolsize,
170 unsigned long *offset) 170 unsigned long *offset)
171 { 171 {
172 unsigned long symbol_start = 0, symbol_end = 0; 172 unsigned long symbol_start = 0, symbol_end = 0;
173 unsigned long i, low, high, mid; 173 unsigned long i, low, high, mid;
174 174
175 /* This kernel should never had been booted. */ 175 /* This kernel should never had been booted. */
176 BUG_ON(!kallsyms_addresses); 176 BUG_ON(!kallsyms_addresses);
177 177
178 /* do a binary search on the sorted kallsyms_addresses array */ 178 /* do a binary search on the sorted kallsyms_addresses array */
179 low = 0; 179 low = 0;
180 high = kallsyms_num_syms; 180 high = kallsyms_num_syms;
181 181
182 while (high - low > 1) { 182 while (high - low > 1) {
183 mid = (low + high) / 2; 183 mid = (low + high) / 2;
184 if (kallsyms_addresses[mid] <= addr) 184 if (kallsyms_addresses[mid] <= addr)
185 low = mid; 185 low = mid;
186 else 186 else
187 high = mid; 187 high = mid;
188 } 188 }
189 189
190 /* 190 /*
191 * search for the first aliased symbol. Aliased 191 * search for the first aliased symbol. Aliased
192 * symbols are symbols with the same address 192 * symbols are symbols with the same address
193 */ 193 */
194 while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) 194 while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
195 --low; 195 --low;
196 196
197 symbol_start = kallsyms_addresses[low]; 197 symbol_start = kallsyms_addresses[low];
198 198
199 /* Search for next non-aliased symbol */ 199 /* Search for next non-aliased symbol */
200 for (i = low + 1; i < kallsyms_num_syms; i++) { 200 for (i = low + 1; i < kallsyms_num_syms; i++) {
201 if (kallsyms_addresses[i] > symbol_start) { 201 if (kallsyms_addresses[i] > symbol_start) {
202 symbol_end = kallsyms_addresses[i]; 202 symbol_end = kallsyms_addresses[i];
203 break; 203 break;
204 } 204 }
205 } 205 }
206 206
207 /* if we found no next symbol, we use the end of the section */ 207 /* if we found no next symbol, we use the end of the section */
208 if (!symbol_end) { 208 if (!symbol_end) {
209 if (is_kernel_inittext(addr)) 209 if (is_kernel_inittext(addr))
210 symbol_end = (unsigned long)_einittext; 210 symbol_end = (unsigned long)_einittext;
211 else if (all_var) 211 else if (all_var)
212 symbol_end = (unsigned long)_end; 212 symbol_end = (unsigned long)_end;
213 else 213 else
214 symbol_end = (unsigned long)_etext; 214 symbol_end = (unsigned long)_etext;
215 } 215 }
216 216
217 if (symbolsize) 217 if (symbolsize)
218 *symbolsize = symbol_end - symbol_start; 218 *symbolsize = symbol_end - symbol_start;
219 if (offset) 219 if (offset)
220 *offset = addr - symbol_start; 220 *offset = addr - symbol_start;
221 221
222 return low; 222 return low;
223 } 223 }
224 224
225 /* 225 /*
226 * Lookup an address but don't bother to find any names. 226 * Lookup an address but don't bother to find any names.
227 */ 227 */
228 int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, 228 int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
229 unsigned long *offset) 229 unsigned long *offset)
230 { 230 {
231 if (is_ksym_addr(addr)) 231 if (is_ksym_addr(addr))
232 return !!get_symbol_pos(addr, symbolsize, offset); 232 return !!get_symbol_pos(addr, symbolsize, offset);
233 233
234 return !!module_address_lookup(addr, symbolsize, offset, NULL); 234 return !!module_address_lookup(addr, symbolsize, offset, NULL);
235 } 235 }
236 236
237 /* 237 /*
238 * Lookup an address 238 * Lookup an address
239 * - modname is set to NULL if it's in the kernel 239 * - modname is set to NULL if it's in the kernel
240 * - we guarantee that the returned name is valid until we reschedule even if 240 * - we guarantee that the returned name is valid until we reschedule even if
241 * it resides in a module 241 * it resides in a module
242 * - we also guarantee that modname will be valid until rescheduled 242 * - we also guarantee that modname will be valid until rescheduled
243 */ 243 */
244 const char *kallsyms_lookup(unsigned long addr, 244 const char *kallsyms_lookup(unsigned long addr,
245 unsigned long *symbolsize, 245 unsigned long *symbolsize,
246 unsigned long *offset, 246 unsigned long *offset,
247 char **modname, char *namebuf) 247 char **modname, char *namebuf)
248 { 248 {
249 const char *msym; 249 const char *msym;
250 250
251 namebuf[KSYM_NAME_LEN] = 0; 251 namebuf[KSYM_NAME_LEN] = 0;
252 namebuf[0] = 0; 252 namebuf[0] = 0;
253 253
254 if (is_ksym_addr(addr)) { 254 if (is_ksym_addr(addr)) {
255 unsigned long pos; 255 unsigned long pos;
256 256
257 pos = get_symbol_pos(addr, symbolsize, offset); 257 pos = get_symbol_pos(addr, symbolsize, offset);
258 /* Grab name */ 258 /* Grab name */
259 kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); 259 kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
260 *modname = NULL; 260 *modname = NULL;
261 return namebuf; 261 return namebuf;
262 } 262 }
263 263
264 /* see if it's in a module */ 264 /* see if it's in a module */
265 msym = module_address_lookup(addr, symbolsize, offset, modname); 265 msym = module_address_lookup(addr, symbolsize, offset, modname);
266 if (msym) 266 if (msym)
267 return strncpy(namebuf, msym, KSYM_NAME_LEN); 267 return strncpy(namebuf, msym, KSYM_NAME_LEN);
268 268
269 return NULL; 269 return NULL;
270 } 270 }
271 271
272 int lookup_symbol_name(unsigned long addr, char *symname) 272 int lookup_symbol_name(unsigned long addr, char *symname)
273 { 273 {
274 symname[0] = '\0'; 274 symname[0] = '\0';
275 symname[KSYM_NAME_LEN] = '\0'; 275 symname[KSYM_NAME_LEN] = '\0';
276 276
277 if (is_ksym_addr(addr)) { 277 if (is_ksym_addr(addr)) {
278 unsigned long pos; 278 unsigned long pos;
279 279
280 pos = get_symbol_pos(addr, NULL, NULL); 280 pos = get_symbol_pos(addr, NULL, NULL);
281 /* Grab name */ 281 /* Grab name */
282 kallsyms_expand_symbol(get_symbol_offset(pos), symname); 282 kallsyms_expand_symbol(get_symbol_offset(pos), symname);
283 return 0; 283 return 0;
284 } 284 }
285 /* see if it's in a module */ 285 /* see if it's in a module */
286 return lookup_module_symbol_name(addr, symname); 286 return lookup_module_symbol_name(addr, symname);
287 } 287 }
288 288
289 int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
290 unsigned long *offset, char *modname, char *name)
291 {
292 name[0] = '\0';
293 name[KSYM_NAME_LEN] = '\0';
294
295 if (is_ksym_addr(addr)) {
296 unsigned long pos;
297
298 pos = get_symbol_pos(addr, size, offset);
299 /* Grab name */
300 kallsyms_expand_symbol(get_symbol_offset(pos), name);
301 modname[0] = '\0';
302 return 0;
303 }
304 /* see if it's in a module */
305 return lookup_module_symbol_attrs(addr, size, offset, modname, name);
306 }
307
289 /* Look up a kernel symbol and return it in a text buffer. */ 308 /* Look up a kernel symbol and return it in a text buffer. */
290 int sprint_symbol(char *buffer, unsigned long address) 309 int sprint_symbol(char *buffer, unsigned long address)
291 { 310 {
292 char *modname; 311 char *modname;
293 const char *name; 312 const char *name;
294 unsigned long offset, size; 313 unsigned long offset, size;
295 char namebuf[KSYM_NAME_LEN+1]; 314 char namebuf[KSYM_NAME_LEN+1];
296 315
297 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 316 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
298 if (!name) 317 if (!name)
299 return sprintf(buffer, "0x%lx", address); 318 return sprintf(buffer, "0x%lx", address);
300 else { 319 else {
301 if (modname) 320 if (modname)
302 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, 321 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
303 size, modname); 322 size, modname);
304 else 323 else
305 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); 324 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
306 } 325 }
307 } 326 }
308 327
309 /* Look up a kernel symbol and print it to the kernel messages. */ 328 /* Look up a kernel symbol and print it to the kernel messages. */
310 void __print_symbol(const char *fmt, unsigned long address) 329 void __print_symbol(const char *fmt, unsigned long address)
311 { 330 {
312 char buffer[KSYM_SYMBOL_LEN]; 331 char buffer[KSYM_SYMBOL_LEN];
313 332
314 sprint_symbol(buffer, address); 333 sprint_symbol(buffer, address);
315 334
316 printk(fmt, buffer); 335 printk(fmt, buffer);
317 } 336 }
318 337
319 /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ 338 /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
320 struct kallsym_iter 339 struct kallsym_iter
321 { 340 {
322 loff_t pos; 341 loff_t pos;
323 unsigned long value; 342 unsigned long value;
324 unsigned int nameoff; /* If iterating in core kernel symbols */ 343 unsigned int nameoff; /* If iterating in core kernel symbols */
325 char type; 344 char type;
326 char name[KSYM_NAME_LEN+1]; 345 char name[KSYM_NAME_LEN+1];
327 char module_name[MODULE_NAME_LEN + 1]; 346 char module_name[MODULE_NAME_LEN + 1];
328 int exported; 347 int exported;
329 }; 348 };
330 349
331 static int get_ksymbol_mod(struct kallsym_iter *iter) 350 static int get_ksymbol_mod(struct kallsym_iter *iter)
332 { 351 {
333 if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value, 352 if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value,
334 &iter->type, iter->name, iter->module_name, 353 &iter->type, iter->name, iter->module_name,
335 &iter->exported) < 0) 354 &iter->exported) < 0)
336 return 0; 355 return 0;
337 return 1; 356 return 1;
338 } 357 }
339 358
340 /* Returns space to next name. */ 359 /* Returns space to next name. */
341 static unsigned long get_ksymbol_core(struct kallsym_iter *iter) 360 static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
342 { 361 {
343 unsigned off = iter->nameoff; 362 unsigned off = iter->nameoff;
344 363
345 iter->module_name[0] = '\0'; 364 iter->module_name[0] = '\0';
346 iter->value = kallsyms_addresses[iter->pos]; 365 iter->value = kallsyms_addresses[iter->pos];
347 366
348 iter->type = kallsyms_get_symbol_type(off); 367 iter->type = kallsyms_get_symbol_type(off);
349 368
350 off = kallsyms_expand_symbol(off, iter->name); 369 off = kallsyms_expand_symbol(off, iter->name);
351 370
352 return off - iter->nameoff; 371 return off - iter->nameoff;
353 } 372 }
354 373
355 static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) 374 static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
356 { 375 {
357 iter->name[0] = '\0'; 376 iter->name[0] = '\0';
358 iter->nameoff = get_symbol_offset(new_pos); 377 iter->nameoff = get_symbol_offset(new_pos);
359 iter->pos = new_pos; 378 iter->pos = new_pos;
360 } 379 }
361 380
362 /* Returns false if pos at or past end of file. */ 381 /* Returns false if pos at or past end of file. */
363 static int update_iter(struct kallsym_iter *iter, loff_t pos) 382 static int update_iter(struct kallsym_iter *iter, loff_t pos)
364 { 383 {
365 /* Module symbols can be accessed randomly. */ 384 /* Module symbols can be accessed randomly. */
366 if (pos >= kallsyms_num_syms) { 385 if (pos >= kallsyms_num_syms) {
367 iter->pos = pos; 386 iter->pos = pos;
368 return get_ksymbol_mod(iter); 387 return get_ksymbol_mod(iter);
369 } 388 }
370 389
371 /* If we're not on the desired position, reset to new position. */ 390 /* If we're not on the desired position, reset to new position. */
372 if (pos != iter->pos) 391 if (pos != iter->pos)
373 reset_iter(iter, pos); 392 reset_iter(iter, pos);
374 393
375 iter->nameoff += get_ksymbol_core(iter); 394 iter->nameoff += get_ksymbol_core(iter);
376 iter->pos++; 395 iter->pos++;
377 396
378 return 1; 397 return 1;
379 } 398 }
380 399
381 static void *s_next(struct seq_file *m, void *p, loff_t *pos) 400 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
382 { 401 {
383 (*pos)++; 402 (*pos)++;
384 403
385 if (!update_iter(m->private, *pos)) 404 if (!update_iter(m->private, *pos))
386 return NULL; 405 return NULL;
387 return p; 406 return p;
388 } 407 }
389 408
390 static void *s_start(struct seq_file *m, loff_t *pos) 409 static void *s_start(struct seq_file *m, loff_t *pos)
391 { 410 {
392 if (!update_iter(m->private, *pos)) 411 if (!update_iter(m->private, *pos))
393 return NULL; 412 return NULL;
394 return m->private; 413 return m->private;
395 } 414 }
396 415
397 static void s_stop(struct seq_file *m, void *p) 416 static void s_stop(struct seq_file *m, void *p)
398 { 417 {
399 } 418 }
400 419
401 static int s_show(struct seq_file *m, void *p) 420 static int s_show(struct seq_file *m, void *p)
402 { 421 {
403 struct kallsym_iter *iter = m->private; 422 struct kallsym_iter *iter = m->private;
404 423
405 /* Some debugging symbols have no name. Ignore them. */ 424 /* Some debugging symbols have no name. Ignore them. */
406 if (!iter->name[0]) 425 if (!iter->name[0])
407 return 0; 426 return 0;
408 427
409 if (iter->module_name[0]) { 428 if (iter->module_name[0]) {
410 char type; 429 char type;
411 430
412 /* Label it "global" if it is exported, 431 /* Label it "global" if it is exported,
413 * "local" if not exported. */ 432 * "local" if not exported. */
414 type = iter->exported ? toupper(iter->type) : 433 type = iter->exported ? toupper(iter->type) :
415 tolower(iter->type); 434 tolower(iter->type);
416 seq_printf(m, "%0*lx %c %s\t[%s]\n", 435 seq_printf(m, "%0*lx %c %s\t[%s]\n",
417 (int)(2*sizeof(void*)), 436 (int)(2*sizeof(void*)),
418 iter->value, type, iter->name, iter->module_name); 437 iter->value, type, iter->name, iter->module_name);
419 } else 438 } else
420 seq_printf(m, "%0*lx %c %s\n", 439 seq_printf(m, "%0*lx %c %s\n",
421 (int)(2*sizeof(void*)), 440 (int)(2*sizeof(void*)),
422 iter->value, iter->type, iter->name); 441 iter->value, iter->type, iter->name);
423 return 0; 442 return 0;
424 } 443 }
425 444
426 static const struct seq_operations kallsyms_op = { 445 static const struct seq_operations kallsyms_op = {
427 .start = s_start, 446 .start = s_start,
428 .next = s_next, 447 .next = s_next,
429 .stop = s_stop, 448 .stop = s_stop,
430 .show = s_show 449 .show = s_show
431 }; 450 };
432 451
433 static int kallsyms_open(struct inode *inode, struct file *file) 452 static int kallsyms_open(struct inode *inode, struct file *file)
434 { 453 {
435 /* We keep iterator in m->private, since normal case is to 454 /* We keep iterator in m->private, since normal case is to
436 * s_start from where we left off, so we avoid doing 455 * s_start from where we left off, so we avoid doing
437 * using get_symbol_offset for every symbol */ 456 * using get_symbol_offset for every symbol */
438 struct kallsym_iter *iter; 457 struct kallsym_iter *iter;
439 int ret; 458 int ret;
440 459
441 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 460 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
442 if (!iter) 461 if (!iter)
443 return -ENOMEM; 462 return -ENOMEM;
444 reset_iter(iter, 0); 463 reset_iter(iter, 0);
445 464
446 ret = seq_open(file, &kallsyms_op); 465 ret = seq_open(file, &kallsyms_op);
447 if (ret == 0) 466 if (ret == 0)
448 ((struct seq_file *)file->private_data)->private = iter; 467 ((struct seq_file *)file->private_data)->private = iter;
449 else 468 else
450 kfree(iter); 469 kfree(iter);
451 return ret; 470 return ret;
452 } 471 }
453 472
454 static int kallsyms_release(struct inode *inode, struct file *file) 473 static int kallsyms_release(struct inode *inode, struct file *file)
455 { 474 {
456 struct seq_file *m = (struct seq_file *)file->private_data; 475 struct seq_file *m = (struct seq_file *)file->private_data;
457 kfree(m->private); 476 kfree(m->private);
458 return seq_release(inode, file); 477 return seq_release(inode, file);
459 } 478 }
460 479
461 static const struct file_operations kallsyms_operations = { 480 static const struct file_operations kallsyms_operations = {
462 .open = kallsyms_open, 481 .open = kallsyms_open,
463 .read = seq_read, 482 .read = seq_read,
464 .llseek = seq_lseek, 483 .llseek = seq_lseek,
465 .release = kallsyms_release, 484 .release = kallsyms_release,
466 }; 485 };
467 486
468 static int __init kallsyms_init(void) 487 static int __init kallsyms_init(void)
469 { 488 {
470 struct proc_dir_entry *entry; 489 struct proc_dir_entry *entry;
471 490
472 entry = create_proc_entry("kallsyms", 0444, NULL); 491 entry = create_proc_entry("kallsyms", 0444, NULL);
473 if (entry) 492 if (entry)
474 entry->proc_fops = &kallsyms_operations; 493 entry->proc_fops = &kallsyms_operations;
475 return 0; 494 return 0;
476 } 495 }
477 __initcall(kallsyms_init); 496 __initcall(kallsyms_init);
478 497
479 EXPORT_SYMBOL(__print_symbol); 498 EXPORT_SYMBOL(__print_symbol);
480 EXPORT_SYMBOL_GPL(sprint_symbol); 499 EXPORT_SYMBOL_GPL(sprint_symbol);
481 500
1 /* 1 /*
2 Copyright (C) 2002 Richard Henderson 2 Copyright (C) 2002 Richard Henderson
3 Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. 3 Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
4 4
5 This program is free software; you can redistribute it and/or modify 5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by 6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or 7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version. 8 (at your option) any later version.
9 9
10 This program is distributed in the hope that it will be useful, 10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details. 13 GNU General Public License for more details.
14 14
15 You should have received a copy of the GNU General Public License 15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software 16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/moduleloader.h> 20 #include <linux/moduleloader.h>
21 #include <linux/init.h> 21 #include <linux/init.h>
22 #include <linux/kallsyms.h> 22 #include <linux/kallsyms.h>
23 #include <linux/kernel.h> 23 #include <linux/kernel.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/vmalloc.h> 25 #include <linux/vmalloc.h>
26 #include <linux/elf.h> 26 #include <linux/elf.h>
27 #include <linux/seq_file.h> 27 #include <linux/seq_file.h>
28 #include <linux/syscalls.h> 28 #include <linux/syscalls.h>
29 #include <linux/fcntl.h> 29 #include <linux/fcntl.h>
30 #include <linux/rcupdate.h> 30 #include <linux/rcupdate.h>
31 #include <linux/capability.h> 31 #include <linux/capability.h>
32 #include <linux/cpu.h> 32 #include <linux/cpu.h>
33 #include <linux/moduleparam.h> 33 #include <linux/moduleparam.h>
34 #include <linux/errno.h> 34 #include <linux/errno.h>
35 #include <linux/err.h> 35 #include <linux/err.h>
36 #include <linux/vermagic.h> 36 #include <linux/vermagic.h>
37 #include <linux/notifier.h> 37 #include <linux/notifier.h>
38 #include <linux/sched.h> 38 #include <linux/sched.h>
39 #include <linux/stop_machine.h> 39 #include <linux/stop_machine.h>
40 #include <linux/device.h> 40 #include <linux/device.h>
41 #include <linux/string.h> 41 #include <linux/string.h>
42 #include <linux/mutex.h> 42 #include <linux/mutex.h>
43 #include <linux/unwind.h> 43 #include <linux/unwind.h>
44 #include <asm/uaccess.h> 44 #include <asm/uaccess.h>
45 #include <asm/semaphore.h> 45 #include <asm/semaphore.h>
46 #include <asm/cacheflush.h> 46 #include <asm/cacheflush.h>
47 #include <linux/license.h> 47 #include <linux/license.h>
48 48
49 extern int module_sysfs_initialized; 49 extern int module_sysfs_initialized;
50 50
51 #if 0 51 #if 0
52 #define DEBUGP printk 52 #define DEBUGP printk
53 #else 53 #else
54 #define DEBUGP(fmt , a...) 54 #define DEBUGP(fmt , a...)
55 #endif 55 #endif
56 56
57 #ifndef ARCH_SHF_SMALL 57 #ifndef ARCH_SHF_SMALL
58 #define ARCH_SHF_SMALL 0 58 #define ARCH_SHF_SMALL 0
59 #endif 59 #endif
60 60
61 /* If this is set, the section belongs in the init part of the module */ 61 /* If this is set, the section belongs in the init part of the module */
62 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 62 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
63 63
64 /* Protects module list */ 64 /* Protects module list */
65 static DEFINE_SPINLOCK(modlist_lock); 65 static DEFINE_SPINLOCK(modlist_lock);
66 66
67 /* List of modules, protected by module_mutex AND modlist_lock */ 67 /* List of modules, protected by module_mutex AND modlist_lock */
68 static DEFINE_MUTEX(module_mutex); 68 static DEFINE_MUTEX(module_mutex);
69 static LIST_HEAD(modules); 69 static LIST_HEAD(modules);
70 70
71 static BLOCKING_NOTIFIER_HEAD(module_notify_list); 71 static BLOCKING_NOTIFIER_HEAD(module_notify_list);
72 72
73 int register_module_notifier(struct notifier_block * nb) 73 int register_module_notifier(struct notifier_block * nb)
74 { 74 {
75 return blocking_notifier_chain_register(&module_notify_list, nb); 75 return blocking_notifier_chain_register(&module_notify_list, nb);
76 } 76 }
77 EXPORT_SYMBOL(register_module_notifier); 77 EXPORT_SYMBOL(register_module_notifier);
78 78
79 int unregister_module_notifier(struct notifier_block * nb) 79 int unregister_module_notifier(struct notifier_block * nb)
80 { 80 {
81 return blocking_notifier_chain_unregister(&module_notify_list, nb); 81 return blocking_notifier_chain_unregister(&module_notify_list, nb);
82 } 82 }
83 EXPORT_SYMBOL(unregister_module_notifier); 83 EXPORT_SYMBOL(unregister_module_notifier);
84 84
85 /* We require a truly strong try_module_get() */ 85 /* We require a truly strong try_module_get() */
86 static inline int strong_try_module_get(struct module *mod) 86 static inline int strong_try_module_get(struct module *mod)
87 { 87 {
88 if (mod && mod->state == MODULE_STATE_COMING) 88 if (mod && mod->state == MODULE_STATE_COMING)
89 return 0; 89 return 0;
90 return try_module_get(mod); 90 return try_module_get(mod);
91 } 91 }
92 92
93 static inline void add_taint_module(struct module *mod, unsigned flag) 93 static inline void add_taint_module(struct module *mod, unsigned flag)
94 { 94 {
95 add_taint(flag); 95 add_taint(flag);
96 mod->taints |= flag; 96 mod->taints |= flag;
97 } 97 }
98 98
99 /* A thread that wants to hold a reference to a module only while it 99 /* A thread that wants to hold a reference to a module only while it
100 * is running can call ths to safely exit. 100 * is running can call ths to safely exit.
101 * nfsd and lockd use this. 101 * nfsd and lockd use this.
102 */ 102 */
103 void __module_put_and_exit(struct module *mod, long code) 103 void __module_put_and_exit(struct module *mod, long code)
104 { 104 {
105 module_put(mod); 105 module_put(mod);
106 do_exit(code); 106 do_exit(code);
107 } 107 }
108 EXPORT_SYMBOL(__module_put_and_exit); 108 EXPORT_SYMBOL(__module_put_and_exit);
109 109
110 /* Find a module section: 0 means not found. */ 110 /* Find a module section: 0 means not found. */
111 static unsigned int find_sec(Elf_Ehdr *hdr, 111 static unsigned int find_sec(Elf_Ehdr *hdr,
112 Elf_Shdr *sechdrs, 112 Elf_Shdr *sechdrs,
113 const char *secstrings, 113 const char *secstrings,
114 const char *name) 114 const char *name)
115 { 115 {
116 unsigned int i; 116 unsigned int i;
117 117
118 for (i = 1; i < hdr->e_shnum; i++) 118 for (i = 1; i < hdr->e_shnum; i++)
119 /* Alloc bit cleared means "ignore it." */ 119 /* Alloc bit cleared means "ignore it." */
120 if ((sechdrs[i].sh_flags & SHF_ALLOC) 120 if ((sechdrs[i].sh_flags & SHF_ALLOC)
121 && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) 121 && strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
122 return i; 122 return i;
123 return 0; 123 return 0;
124 } 124 }
125 125
126 /* Provided by the linker */ 126 /* Provided by the linker */
127 extern const struct kernel_symbol __start___ksymtab[]; 127 extern const struct kernel_symbol __start___ksymtab[];
128 extern const struct kernel_symbol __stop___ksymtab[]; 128 extern const struct kernel_symbol __stop___ksymtab[];
129 extern const struct kernel_symbol __start___ksymtab_gpl[]; 129 extern const struct kernel_symbol __start___ksymtab_gpl[];
130 extern const struct kernel_symbol __stop___ksymtab_gpl[]; 130 extern const struct kernel_symbol __stop___ksymtab_gpl[];
131 extern const struct kernel_symbol __start___ksymtab_gpl_future[]; 131 extern const struct kernel_symbol __start___ksymtab_gpl_future[];
132 extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; 132 extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
133 extern const struct kernel_symbol __start___ksymtab_unused[]; 133 extern const struct kernel_symbol __start___ksymtab_unused[];
134 extern const struct kernel_symbol __stop___ksymtab_unused[]; 134 extern const struct kernel_symbol __stop___ksymtab_unused[];
135 extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; 135 extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
136 extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; 136 extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
137 extern const struct kernel_symbol __start___ksymtab_gpl_future[]; 137 extern const struct kernel_symbol __start___ksymtab_gpl_future[];
138 extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; 138 extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
139 extern const unsigned long __start___kcrctab[]; 139 extern const unsigned long __start___kcrctab[];
140 extern const unsigned long __start___kcrctab_gpl[]; 140 extern const unsigned long __start___kcrctab_gpl[];
141 extern const unsigned long __start___kcrctab_gpl_future[]; 141 extern const unsigned long __start___kcrctab_gpl_future[];
142 extern const unsigned long __start___kcrctab_unused[]; 142 extern const unsigned long __start___kcrctab_unused[];
143 extern const unsigned long __start___kcrctab_unused_gpl[]; 143 extern const unsigned long __start___kcrctab_unused_gpl[];
144 144
145 #ifndef CONFIG_MODVERSIONS 145 #ifndef CONFIG_MODVERSIONS
146 #define symversion(base, idx) NULL 146 #define symversion(base, idx) NULL
147 #else 147 #else
148 #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) 148 #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
149 #endif 149 #endif
150 150
151 /* lookup symbol in given range of kernel_symbols */ 151 /* lookup symbol in given range of kernel_symbols */
152 static const struct kernel_symbol *lookup_symbol(const char *name, 152 static const struct kernel_symbol *lookup_symbol(const char *name,
153 const struct kernel_symbol *start, 153 const struct kernel_symbol *start,
154 const struct kernel_symbol *stop) 154 const struct kernel_symbol *stop)
155 { 155 {
156 const struct kernel_symbol *ks = start; 156 const struct kernel_symbol *ks = start;
157 for (; ks < stop; ks++) 157 for (; ks < stop; ks++)
158 if (strcmp(ks->name, name) == 0) 158 if (strcmp(ks->name, name) == 0)
159 return ks; 159 return ks;
160 return NULL; 160 return NULL;
161 } 161 }
162 162
163 static void printk_unused_warning(const char *name) 163 static void printk_unused_warning(const char *name)
164 { 164 {
165 printk(KERN_WARNING "Symbol %s is marked as UNUSED, " 165 printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
166 "however this module is using it.\n", name); 166 "however this module is using it.\n", name);
167 printk(KERN_WARNING "This symbol will go away in the future.\n"); 167 printk(KERN_WARNING "This symbol will go away in the future.\n");
168 printk(KERN_WARNING "Please evalute if this is the right api to use, " 168 printk(KERN_WARNING "Please evalute if this is the right api to use, "
169 "and if it really is, submit a report the linux kernel " 169 "and if it really is, submit a report the linux kernel "
170 "mailinglist together with submitting your code for " 170 "mailinglist together with submitting your code for "
171 "inclusion.\n"); 171 "inclusion.\n");
172 } 172 }
173 173
174 /* Find a symbol, return value, crc and module which owns it */ 174 /* Find a symbol, return value, crc and module which owns it */
175 static unsigned long __find_symbol(const char *name, 175 static unsigned long __find_symbol(const char *name,
176 struct module **owner, 176 struct module **owner,
177 const unsigned long **crc, 177 const unsigned long **crc,
178 int gplok) 178 int gplok)
179 { 179 {
180 struct module *mod; 180 struct module *mod;
181 const struct kernel_symbol *ks; 181 const struct kernel_symbol *ks;
182 182
183 /* Core kernel first. */ 183 /* Core kernel first. */
184 *owner = NULL; 184 *owner = NULL;
185 ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); 185 ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
186 if (ks) { 186 if (ks) {
187 *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); 187 *crc = symversion(__start___kcrctab, (ks - __start___ksymtab));
188 return ks->value; 188 return ks->value;
189 } 189 }
190 if (gplok) { 190 if (gplok) {
191 ks = lookup_symbol(name, __start___ksymtab_gpl, 191 ks = lookup_symbol(name, __start___ksymtab_gpl,
192 __stop___ksymtab_gpl); 192 __stop___ksymtab_gpl);
193 if (ks) { 193 if (ks) {
194 *crc = symversion(__start___kcrctab_gpl, 194 *crc = symversion(__start___kcrctab_gpl,
195 (ks - __start___ksymtab_gpl)); 195 (ks - __start___ksymtab_gpl));
196 return ks->value; 196 return ks->value;
197 } 197 }
198 } 198 }
199 ks = lookup_symbol(name, __start___ksymtab_gpl_future, 199 ks = lookup_symbol(name, __start___ksymtab_gpl_future,
200 __stop___ksymtab_gpl_future); 200 __stop___ksymtab_gpl_future);
201 if (ks) { 201 if (ks) {
202 if (!gplok) { 202 if (!gplok) {
203 printk(KERN_WARNING "Symbol %s is being used " 203 printk(KERN_WARNING "Symbol %s is being used "
204 "by a non-GPL module, which will not " 204 "by a non-GPL module, which will not "
205 "be allowed in the future\n", name); 205 "be allowed in the future\n", name);
206 printk(KERN_WARNING "Please see the file " 206 printk(KERN_WARNING "Please see the file "
207 "Documentation/feature-removal-schedule.txt " 207 "Documentation/feature-removal-schedule.txt "
208 "in the kernel source tree for more " 208 "in the kernel source tree for more "
209 "details.\n"); 209 "details.\n");
210 } 210 }
211 *crc = symversion(__start___kcrctab_gpl_future, 211 *crc = symversion(__start___kcrctab_gpl_future,
212 (ks - __start___ksymtab_gpl_future)); 212 (ks - __start___ksymtab_gpl_future));
213 return ks->value; 213 return ks->value;
214 } 214 }
215 215
216 ks = lookup_symbol(name, __start___ksymtab_unused, 216 ks = lookup_symbol(name, __start___ksymtab_unused,
217 __stop___ksymtab_unused); 217 __stop___ksymtab_unused);
218 if (ks) { 218 if (ks) {
219 printk_unused_warning(name); 219 printk_unused_warning(name);
220 *crc = symversion(__start___kcrctab_unused, 220 *crc = symversion(__start___kcrctab_unused,
221 (ks - __start___ksymtab_unused)); 221 (ks - __start___ksymtab_unused));
222 return ks->value; 222 return ks->value;
223 } 223 }
224 224
225 if (gplok) 225 if (gplok)
226 ks = lookup_symbol(name, __start___ksymtab_unused_gpl, 226 ks = lookup_symbol(name, __start___ksymtab_unused_gpl,
227 __stop___ksymtab_unused_gpl); 227 __stop___ksymtab_unused_gpl);
228 if (ks) { 228 if (ks) {
229 printk_unused_warning(name); 229 printk_unused_warning(name);
230 *crc = symversion(__start___kcrctab_unused_gpl, 230 *crc = symversion(__start___kcrctab_unused_gpl,
231 (ks - __start___ksymtab_unused_gpl)); 231 (ks - __start___ksymtab_unused_gpl));
232 return ks->value; 232 return ks->value;
233 } 233 }
234 234
235 /* Now try modules. */ 235 /* Now try modules. */
236 list_for_each_entry(mod, &modules, list) { 236 list_for_each_entry(mod, &modules, list) {
237 *owner = mod; 237 *owner = mod;
238 ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); 238 ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
239 if (ks) { 239 if (ks) {
240 *crc = symversion(mod->crcs, (ks - mod->syms)); 240 *crc = symversion(mod->crcs, (ks - mod->syms));
241 return ks->value; 241 return ks->value;
242 } 242 }
243 243
244 if (gplok) { 244 if (gplok) {
245 ks = lookup_symbol(name, mod->gpl_syms, 245 ks = lookup_symbol(name, mod->gpl_syms,
246 mod->gpl_syms + mod->num_gpl_syms); 246 mod->gpl_syms + mod->num_gpl_syms);
247 if (ks) { 247 if (ks) {
248 *crc = symversion(mod->gpl_crcs, 248 *crc = symversion(mod->gpl_crcs,
249 (ks - mod->gpl_syms)); 249 (ks - mod->gpl_syms));
250 return ks->value; 250 return ks->value;
251 } 251 }
252 } 252 }
253 ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms); 253 ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms);
254 if (ks) { 254 if (ks) {
255 printk_unused_warning(name); 255 printk_unused_warning(name);
256 *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms)); 256 *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms));
257 return ks->value; 257 return ks->value;
258 } 258 }
259 259
260 if (gplok) { 260 if (gplok) {
261 ks = lookup_symbol(name, mod->unused_gpl_syms, 261 ks = lookup_symbol(name, mod->unused_gpl_syms,
262 mod->unused_gpl_syms + mod->num_unused_gpl_syms); 262 mod->unused_gpl_syms + mod->num_unused_gpl_syms);
263 if (ks) { 263 if (ks) {
264 printk_unused_warning(name); 264 printk_unused_warning(name);
265 *crc = symversion(mod->unused_gpl_crcs, 265 *crc = symversion(mod->unused_gpl_crcs,
266 (ks - mod->unused_gpl_syms)); 266 (ks - mod->unused_gpl_syms));
267 return ks->value; 267 return ks->value;
268 } 268 }
269 } 269 }
270 ks = lookup_symbol(name, mod->gpl_future_syms, 270 ks = lookup_symbol(name, mod->gpl_future_syms,
271 (mod->gpl_future_syms + 271 (mod->gpl_future_syms +
272 mod->num_gpl_future_syms)); 272 mod->num_gpl_future_syms));
273 if (ks) { 273 if (ks) {
274 if (!gplok) { 274 if (!gplok) {
275 printk(KERN_WARNING "Symbol %s is being used " 275 printk(KERN_WARNING "Symbol %s is being used "
276 "by a non-GPL module, which will not " 276 "by a non-GPL module, which will not "
277 "be allowed in the future\n", name); 277 "be allowed in the future\n", name);
278 printk(KERN_WARNING "Please see the file " 278 printk(KERN_WARNING "Please see the file "
279 "Documentation/feature-removal-schedule.txt " 279 "Documentation/feature-removal-schedule.txt "
280 "in the kernel source tree for more " 280 "in the kernel source tree for more "
281 "details.\n"); 281 "details.\n");
282 } 282 }
283 *crc = symversion(mod->gpl_future_crcs, 283 *crc = symversion(mod->gpl_future_crcs,
284 (ks - mod->gpl_future_syms)); 284 (ks - mod->gpl_future_syms));
285 return ks->value; 285 return ks->value;
286 } 286 }
287 } 287 }
288 DEBUGP("Failed to find symbol %s\n", name); 288 DEBUGP("Failed to find symbol %s\n", name);
289 return 0; 289 return 0;
290 } 290 }
291 291
292 /* Search for module by name: must hold module_mutex. */ 292 /* Search for module by name: must hold module_mutex. */
293 static struct module *find_module(const char *name) 293 static struct module *find_module(const char *name)
294 { 294 {
295 struct module *mod; 295 struct module *mod;
296 296
297 list_for_each_entry(mod, &modules, list) { 297 list_for_each_entry(mod, &modules, list) {
298 if (strcmp(mod->name, name) == 0) 298 if (strcmp(mod->name, name) == 0)
299 return mod; 299 return mod;
300 } 300 }
301 return NULL; 301 return NULL;
302 } 302 }
303 303
304 #ifdef CONFIG_SMP 304 #ifdef CONFIG_SMP
305 /* Number of blocks used and allocated. */ 305 /* Number of blocks used and allocated. */
306 static unsigned int pcpu_num_used, pcpu_num_allocated; 306 static unsigned int pcpu_num_used, pcpu_num_allocated;
307 /* Size of each block. -ve means used. */ 307 /* Size of each block. -ve means used. */
308 static int *pcpu_size; 308 static int *pcpu_size;
309 309
310 static int split_block(unsigned int i, unsigned short size) 310 static int split_block(unsigned int i, unsigned short size)
311 { 311 {
312 /* Reallocation required? */ 312 /* Reallocation required? */
313 if (pcpu_num_used + 1 > pcpu_num_allocated) { 313 if (pcpu_num_used + 1 > pcpu_num_allocated) {
314 int *new; 314 int *new;
315 315
316 new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2, 316 new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2,
317 GFP_KERNEL); 317 GFP_KERNEL);
318 if (!new) 318 if (!new)
319 return 0; 319 return 0;
320 320
321 pcpu_num_allocated *= 2; 321 pcpu_num_allocated *= 2;
322 pcpu_size = new; 322 pcpu_size = new;
323 } 323 }
324 324
325 /* Insert a new subblock */ 325 /* Insert a new subblock */
326 memmove(&pcpu_size[i+1], &pcpu_size[i], 326 memmove(&pcpu_size[i+1], &pcpu_size[i],
327 sizeof(pcpu_size[0]) * (pcpu_num_used - i)); 327 sizeof(pcpu_size[0]) * (pcpu_num_used - i));
328 pcpu_num_used++; 328 pcpu_num_used++;
329 329
330 pcpu_size[i+1] -= size; 330 pcpu_size[i+1] -= size;
331 pcpu_size[i] = size; 331 pcpu_size[i] = size;
332 return 1; 332 return 1;
333 } 333 }
334 334
335 static inline unsigned int block_size(int val) 335 static inline unsigned int block_size(int val)
336 { 336 {
337 if (val < 0) 337 if (val < 0)
338 return -val; 338 return -val;
339 return val; 339 return val;
340 } 340 }
341 341
342 /* Created by linker magic */ 342 /* Created by linker magic */
343 extern char __per_cpu_start[], __per_cpu_end[]; 343 extern char __per_cpu_start[], __per_cpu_end[];
344 344
345 static void *percpu_modalloc(unsigned long size, unsigned long align, 345 static void *percpu_modalloc(unsigned long size, unsigned long align,
346 const char *name) 346 const char *name)
347 { 347 {
348 unsigned long extra; 348 unsigned long extra;
349 unsigned int i; 349 unsigned int i;
350 void *ptr; 350 void *ptr;
351 351
352 if (align > PAGE_SIZE) { 352 if (align > PAGE_SIZE) {
353 printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", 353 printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
354 name, align, PAGE_SIZE); 354 name, align, PAGE_SIZE);
355 align = PAGE_SIZE; 355 align = PAGE_SIZE;
356 } 356 }
357 357
358 ptr = __per_cpu_start; 358 ptr = __per_cpu_start;
359 for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { 359 for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
360 /* Extra for alignment requirement. */ 360 /* Extra for alignment requirement. */
361 extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; 361 extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
362 BUG_ON(i == 0 && extra != 0); 362 BUG_ON(i == 0 && extra != 0);
363 363
364 if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) 364 if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
365 continue; 365 continue;
366 366
367 /* Transfer extra to previous block. */ 367 /* Transfer extra to previous block. */
368 if (pcpu_size[i-1] < 0) 368 if (pcpu_size[i-1] < 0)
369 pcpu_size[i-1] -= extra; 369 pcpu_size[i-1] -= extra;
370 else 370 else
371 pcpu_size[i-1] += extra; 371 pcpu_size[i-1] += extra;
372 pcpu_size[i] -= extra; 372 pcpu_size[i] -= extra;
373 ptr += extra; 373 ptr += extra;
374 374
375 /* Split block if warranted */ 375 /* Split block if warranted */
376 if (pcpu_size[i] - size > sizeof(unsigned long)) 376 if (pcpu_size[i] - size > sizeof(unsigned long))
377 if (!split_block(i, size)) 377 if (!split_block(i, size))
378 return NULL; 378 return NULL;
379 379
380 /* Mark allocated */ 380 /* Mark allocated */
381 pcpu_size[i] = -pcpu_size[i]; 381 pcpu_size[i] = -pcpu_size[i];
382 return ptr; 382 return ptr;
383 } 383 }
384 384
385 printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", 385 printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
386 size); 386 size);
387 return NULL; 387 return NULL;
388 } 388 }
389 389
390 static void percpu_modfree(void *freeme) 390 static void percpu_modfree(void *freeme)
391 { 391 {
392 unsigned int i; 392 unsigned int i;
393 void *ptr = __per_cpu_start + block_size(pcpu_size[0]); 393 void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
394 394
395 /* First entry is core kernel percpu data. */ 395 /* First entry is core kernel percpu data. */
396 for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { 396 for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
397 if (ptr == freeme) { 397 if (ptr == freeme) {
398 pcpu_size[i] = -pcpu_size[i]; 398 pcpu_size[i] = -pcpu_size[i];
399 goto free; 399 goto free;
400 } 400 }
401 } 401 }
402 BUG(); 402 BUG();
403 403
404 free: 404 free:
405 /* Merge with previous? */ 405 /* Merge with previous? */
406 if (pcpu_size[i-1] >= 0) { 406 if (pcpu_size[i-1] >= 0) {
407 pcpu_size[i-1] += pcpu_size[i]; 407 pcpu_size[i-1] += pcpu_size[i];
408 pcpu_num_used--; 408 pcpu_num_used--;
409 memmove(&pcpu_size[i], &pcpu_size[i+1], 409 memmove(&pcpu_size[i], &pcpu_size[i+1],
410 (pcpu_num_used - i) * sizeof(pcpu_size[0])); 410 (pcpu_num_used - i) * sizeof(pcpu_size[0]));
411 i--; 411 i--;
412 } 412 }
413 /* Merge with next? */ 413 /* Merge with next? */
414 if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { 414 if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
415 pcpu_size[i] += pcpu_size[i+1]; 415 pcpu_size[i] += pcpu_size[i+1];
416 pcpu_num_used--; 416 pcpu_num_used--;
417 memmove(&pcpu_size[i+1], &pcpu_size[i+2], 417 memmove(&pcpu_size[i+1], &pcpu_size[i+2],
418 (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); 418 (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
419 } 419 }
420 } 420 }
421 421
422 static unsigned int find_pcpusec(Elf_Ehdr *hdr, 422 static unsigned int find_pcpusec(Elf_Ehdr *hdr,
423 Elf_Shdr *sechdrs, 423 Elf_Shdr *sechdrs,
424 const char *secstrings) 424 const char *secstrings)
425 { 425 {
426 return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); 426 return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
427 } 427 }
428 428
429 static int percpu_modinit(void) 429 static int percpu_modinit(void)
430 { 430 {
431 pcpu_num_used = 2; 431 pcpu_num_used = 2;
432 pcpu_num_allocated = 2; 432 pcpu_num_allocated = 2;
433 pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, 433 pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
434 GFP_KERNEL); 434 GFP_KERNEL);
435 /* Static in-kernel percpu data (used). */ 435 /* Static in-kernel percpu data (used). */
436 pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); 436 pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
437 /* Free room. */ 437 /* Free room. */
438 pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; 438 pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
439 if (pcpu_size[1] < 0) { 439 if (pcpu_size[1] < 0) {
440 printk(KERN_ERR "No per-cpu room for modules.\n"); 440 printk(KERN_ERR "No per-cpu room for modules.\n");
441 pcpu_num_used = 1; 441 pcpu_num_used = 1;
442 } 442 }
443 443
444 return 0; 444 return 0;
445 } 445 }
446 __initcall(percpu_modinit); 446 __initcall(percpu_modinit);
447 #else /* ... !CONFIG_SMP */ 447 #else /* ... !CONFIG_SMP */
448 static inline void *percpu_modalloc(unsigned long size, unsigned long align, 448 static inline void *percpu_modalloc(unsigned long size, unsigned long align,
449 const char *name) 449 const char *name)
450 { 450 {
451 return NULL; 451 return NULL;
452 } 452 }
453 static inline void percpu_modfree(void *pcpuptr) 453 static inline void percpu_modfree(void *pcpuptr)
454 { 454 {
455 BUG(); 455 BUG();
456 } 456 }
457 static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, 457 static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
458 Elf_Shdr *sechdrs, 458 Elf_Shdr *sechdrs,
459 const char *secstrings) 459 const char *secstrings)
460 { 460 {
461 return 0; 461 return 0;
462 } 462 }
463 static inline void percpu_modcopy(void *pcpudst, const void *src, 463 static inline void percpu_modcopy(void *pcpudst, const void *src,
464 unsigned long size) 464 unsigned long size)
465 { 465 {
466 /* pcpusec should be 0, and size of that section should be 0. */ 466 /* pcpusec should be 0, and size of that section should be 0. */
467 BUG_ON(size != 0); 467 BUG_ON(size != 0);
468 } 468 }
469 #endif /* CONFIG_SMP */ 469 #endif /* CONFIG_SMP */
470 470
471 #define MODINFO_ATTR(field) \ 471 #define MODINFO_ATTR(field) \
472 static void setup_modinfo_##field(struct module *mod, const char *s) \ 472 static void setup_modinfo_##field(struct module *mod, const char *s) \
473 { \ 473 { \
474 mod->field = kstrdup(s, GFP_KERNEL); \ 474 mod->field = kstrdup(s, GFP_KERNEL); \
475 } \ 475 } \
476 static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ 476 static ssize_t show_modinfo_##field(struct module_attribute *mattr, \
477 struct module *mod, char *buffer) \ 477 struct module *mod, char *buffer) \
478 { \ 478 { \
479 return sprintf(buffer, "%s\n", mod->field); \ 479 return sprintf(buffer, "%s\n", mod->field); \
480 } \ 480 } \
481 static int modinfo_##field##_exists(struct module *mod) \ 481 static int modinfo_##field##_exists(struct module *mod) \
482 { \ 482 { \
483 return mod->field != NULL; \ 483 return mod->field != NULL; \
484 } \ 484 } \
485 static void free_modinfo_##field(struct module *mod) \ 485 static void free_modinfo_##field(struct module *mod) \
486 { \ 486 { \
487 kfree(mod->field); \ 487 kfree(mod->field); \
488 mod->field = NULL; \ 488 mod->field = NULL; \
489 } \ 489 } \
490 static struct module_attribute modinfo_##field = { \ 490 static struct module_attribute modinfo_##field = { \
491 .attr = { .name = __stringify(field), .mode = 0444, \ 491 .attr = { .name = __stringify(field), .mode = 0444, \
492 .owner = THIS_MODULE }, \ 492 .owner = THIS_MODULE }, \
493 .show = show_modinfo_##field, \ 493 .show = show_modinfo_##field, \
494 .setup = setup_modinfo_##field, \ 494 .setup = setup_modinfo_##field, \
495 .test = modinfo_##field##_exists, \ 495 .test = modinfo_##field##_exists, \
496 .free = free_modinfo_##field, \ 496 .free = free_modinfo_##field, \
497 }; 497 };
498 498
499 MODINFO_ATTR(version); 499 MODINFO_ATTR(version);
500 MODINFO_ATTR(srcversion); 500 MODINFO_ATTR(srcversion);
501 501
502 #ifdef CONFIG_MODULE_UNLOAD 502 #ifdef CONFIG_MODULE_UNLOAD
503 /* Init the unload section of the module. */ 503 /* Init the unload section of the module. */
504 static void module_unload_init(struct module *mod) 504 static void module_unload_init(struct module *mod)
505 { 505 {
506 unsigned int i; 506 unsigned int i;
507 507
508 INIT_LIST_HEAD(&mod->modules_which_use_me); 508 INIT_LIST_HEAD(&mod->modules_which_use_me);
509 for (i = 0; i < NR_CPUS; i++) 509 for (i = 0; i < NR_CPUS; i++)
510 local_set(&mod->ref[i].count, 0); 510 local_set(&mod->ref[i].count, 0);
511 /* Hold reference count during initialization. */ 511 /* Hold reference count during initialization. */
512 local_set(&mod->ref[raw_smp_processor_id()].count, 1); 512 local_set(&mod->ref[raw_smp_processor_id()].count, 1);
513 /* Backwards compatibility macros put refcount during init. */ 513 /* Backwards compatibility macros put refcount during init. */
514 mod->waiter = current; 514 mod->waiter = current;
515 } 515 }
516 516
517 /* modules using other modules */ 517 /* modules using other modules */
518 struct module_use 518 struct module_use
519 { 519 {
520 struct list_head list; 520 struct list_head list;
521 struct module *module_which_uses; 521 struct module *module_which_uses;
522 }; 522 };
523 523
524 /* Does a already use b? */ 524 /* Does a already use b? */
525 static int already_uses(struct module *a, struct module *b) 525 static int already_uses(struct module *a, struct module *b)
526 { 526 {
527 struct module_use *use; 527 struct module_use *use;
528 528
529 list_for_each_entry(use, &b->modules_which_use_me, list) { 529 list_for_each_entry(use, &b->modules_which_use_me, list) {
530 if (use->module_which_uses == a) { 530 if (use->module_which_uses == a) {
531 DEBUGP("%s uses %s!\n", a->name, b->name); 531 DEBUGP("%s uses %s!\n", a->name, b->name);
532 return 1; 532 return 1;
533 } 533 }
534 } 534 }
535 DEBUGP("%s does not use %s!\n", a->name, b->name); 535 DEBUGP("%s does not use %s!\n", a->name, b->name);
536 return 0; 536 return 0;
537 } 537 }
538 538
539 /* Module a uses b */ 539 /* Module a uses b */
540 static int use_module(struct module *a, struct module *b) 540 static int use_module(struct module *a, struct module *b)
541 { 541 {
542 struct module_use *use; 542 struct module_use *use;
543 int no_warn; 543 int no_warn;
544 544
545 if (b == NULL || already_uses(a, b)) return 1; 545 if (b == NULL || already_uses(a, b)) return 1;
546 546
547 if (!strong_try_module_get(b)) 547 if (!strong_try_module_get(b))
548 return 0; 548 return 0;
549 549
550 DEBUGP("Allocating new usage for %s.\n", a->name); 550 DEBUGP("Allocating new usage for %s.\n", a->name);
551 use = kmalloc(sizeof(*use), GFP_ATOMIC); 551 use = kmalloc(sizeof(*use), GFP_ATOMIC);
552 if (!use) { 552 if (!use) {
553 printk("%s: out of memory loading\n", a->name); 553 printk("%s: out of memory loading\n", a->name);
554 module_put(b); 554 module_put(b);
555 return 0; 555 return 0;
556 } 556 }
557 557
558 use->module_which_uses = a; 558 use->module_which_uses = a;
559 list_add(&use->list, &b->modules_which_use_me); 559 list_add(&use->list, &b->modules_which_use_me);
560 no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); 560 no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name);
561 return 1; 561 return 1;
562 } 562 }
563 563
564 /* Clear the unload stuff of the module. */ 564 /* Clear the unload stuff of the module. */
565 static void module_unload_free(struct module *mod) 565 static void module_unload_free(struct module *mod)
566 { 566 {
567 struct module *i; 567 struct module *i;
568 568
569 list_for_each_entry(i, &modules, list) { 569 list_for_each_entry(i, &modules, list) {
570 struct module_use *use; 570 struct module_use *use;
571 571
572 list_for_each_entry(use, &i->modules_which_use_me, list) { 572 list_for_each_entry(use, &i->modules_which_use_me, list) {
573 if (use->module_which_uses == mod) { 573 if (use->module_which_uses == mod) {
574 DEBUGP("%s unusing %s\n", mod->name, i->name); 574 DEBUGP("%s unusing %s\n", mod->name, i->name);
575 module_put(i); 575 module_put(i);
576 list_del(&use->list); 576 list_del(&use->list);
577 kfree(use); 577 kfree(use);
578 sysfs_remove_link(i->holders_dir, mod->name); 578 sysfs_remove_link(i->holders_dir, mod->name);
579 /* There can be at most one match. */ 579 /* There can be at most one match. */
580 break; 580 break;
581 } 581 }
582 } 582 }
583 } 583 }
584 } 584 }
585 585
586 #ifdef CONFIG_MODULE_FORCE_UNLOAD 586 #ifdef CONFIG_MODULE_FORCE_UNLOAD
587 static inline int try_force_unload(unsigned int flags) 587 static inline int try_force_unload(unsigned int flags)
588 { 588 {
589 int ret = (flags & O_TRUNC); 589 int ret = (flags & O_TRUNC);
590 if (ret) 590 if (ret)
591 add_taint(TAINT_FORCED_RMMOD); 591 add_taint(TAINT_FORCED_RMMOD);
592 return ret; 592 return ret;
593 } 593 }
594 #else 594 #else
595 static inline int try_force_unload(unsigned int flags) 595 static inline int try_force_unload(unsigned int flags)
596 { 596 {
597 return 0; 597 return 0;
598 } 598 }
599 #endif /* CONFIG_MODULE_FORCE_UNLOAD */ 599 #endif /* CONFIG_MODULE_FORCE_UNLOAD */
600 600
601 struct stopref 601 struct stopref
602 { 602 {
603 struct module *mod; 603 struct module *mod;
604 int flags; 604 int flags;
605 int *forced; 605 int *forced;
606 }; 606 };
607 607
608 /* Whole machine is stopped with interrupts off when this runs. */ 608 /* Whole machine is stopped with interrupts off when this runs. */
609 static int __try_stop_module(void *_sref) 609 static int __try_stop_module(void *_sref)
610 { 610 {
611 struct stopref *sref = _sref; 611 struct stopref *sref = _sref;
612 612
613 /* If it's not unused, quit unless we are told to block. */ 613 /* If it's not unused, quit unless we are told to block. */
614 if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { 614 if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) {
615 if (!(*sref->forced = try_force_unload(sref->flags))) 615 if (!(*sref->forced = try_force_unload(sref->flags)))
616 return -EWOULDBLOCK; 616 return -EWOULDBLOCK;
617 } 617 }
618 618
619 /* Mark it as dying. */ 619 /* Mark it as dying. */
620 sref->mod->state = MODULE_STATE_GOING; 620 sref->mod->state = MODULE_STATE_GOING;
621 return 0; 621 return 0;
622 } 622 }
623 623
624 static int try_stop_module(struct module *mod, int flags, int *forced) 624 static int try_stop_module(struct module *mod, int flags, int *forced)
625 { 625 {
626 struct stopref sref = { mod, flags, forced }; 626 struct stopref sref = { mod, flags, forced };
627 627
628 return stop_machine_run(__try_stop_module, &sref, NR_CPUS); 628 return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
629 } 629 }
630 630
631 unsigned int module_refcount(struct module *mod) 631 unsigned int module_refcount(struct module *mod)
632 { 632 {
633 unsigned int i, total = 0; 633 unsigned int i, total = 0;
634 634
635 for (i = 0; i < NR_CPUS; i++) 635 for (i = 0; i < NR_CPUS; i++)
636 total += local_read(&mod->ref[i].count); 636 total += local_read(&mod->ref[i].count);
637 return total; 637 return total;
638 } 638 }
639 EXPORT_SYMBOL(module_refcount); 639 EXPORT_SYMBOL(module_refcount);
640 640
641 /* This exists whether we can unload or not */ 641 /* This exists whether we can unload or not */
642 static void free_module(struct module *mod); 642 static void free_module(struct module *mod);
643 643
644 static void wait_for_zero_refcount(struct module *mod) 644 static void wait_for_zero_refcount(struct module *mod)
645 { 645 {
646 /* Since we might sleep for some time, drop the semaphore first */ 646 /* Since we might sleep for some time, drop the semaphore first */
647 mutex_unlock(&module_mutex); 647 mutex_unlock(&module_mutex);
648 for (;;) { 648 for (;;) {
649 DEBUGP("Looking at refcount...\n"); 649 DEBUGP("Looking at refcount...\n");
650 set_current_state(TASK_UNINTERRUPTIBLE); 650 set_current_state(TASK_UNINTERRUPTIBLE);
651 if (module_refcount(mod) == 0) 651 if (module_refcount(mod) == 0)
652 break; 652 break;
653 schedule(); 653 schedule();
654 } 654 }
655 current->state = TASK_RUNNING; 655 current->state = TASK_RUNNING;
656 mutex_lock(&module_mutex); 656 mutex_lock(&module_mutex);
657 } 657 }
658 658
659 asmlinkage long 659 asmlinkage long
660 sys_delete_module(const char __user *name_user, unsigned int flags) 660 sys_delete_module(const char __user *name_user, unsigned int flags)
661 { 661 {
662 struct module *mod; 662 struct module *mod;
663 char name[MODULE_NAME_LEN]; 663 char name[MODULE_NAME_LEN];
664 int ret, forced = 0; 664 int ret, forced = 0;
665 665
666 if (!capable(CAP_SYS_MODULE)) 666 if (!capable(CAP_SYS_MODULE))
667 return -EPERM; 667 return -EPERM;
668 668
669 if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) 669 if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
670 return -EFAULT; 670 return -EFAULT;
671 name[MODULE_NAME_LEN-1] = '\0'; 671 name[MODULE_NAME_LEN-1] = '\0';
672 672
673 if (mutex_lock_interruptible(&module_mutex) != 0) 673 if (mutex_lock_interruptible(&module_mutex) != 0)
674 return -EINTR; 674 return -EINTR;
675 675
676 mod = find_module(name); 676 mod = find_module(name);
677 if (!mod) { 677 if (!mod) {
678 ret = -ENOENT; 678 ret = -ENOENT;
679 goto out; 679 goto out;
680 } 680 }
681 681
682 if (!list_empty(&mod->modules_which_use_me)) { 682 if (!list_empty(&mod->modules_which_use_me)) {
683 /* Other modules depend on us: get rid of them first. */ 683 /* Other modules depend on us: get rid of them first. */
684 ret = -EWOULDBLOCK; 684 ret = -EWOULDBLOCK;
685 goto out; 685 goto out;
686 } 686 }
687 687
688 /* Doing init or already dying? */ 688 /* Doing init or already dying? */
689 if (mod->state != MODULE_STATE_LIVE) { 689 if (mod->state != MODULE_STATE_LIVE) {
690 /* FIXME: if (force), slam module count and wake up 690 /* FIXME: if (force), slam module count and wake up
691 waiter --RR */ 691 waiter --RR */
692 DEBUGP("%s already dying\n", mod->name); 692 DEBUGP("%s already dying\n", mod->name);
693 ret = -EBUSY; 693 ret = -EBUSY;
694 goto out; 694 goto out;
695 } 695 }
696 696
697 /* If it has an init func, it must have an exit func to unload */ 697 /* If it has an init func, it must have an exit func to unload */
698 if ((mod->init != NULL && mod->exit == NULL) 698 if ((mod->init != NULL && mod->exit == NULL)
699 || mod->unsafe) { 699 || mod->unsafe) {
700 forced = try_force_unload(flags); 700 forced = try_force_unload(flags);
701 if (!forced) { 701 if (!forced) {
702 /* This module can't be removed */ 702 /* This module can't be removed */
703 ret = -EBUSY; 703 ret = -EBUSY;
704 goto out; 704 goto out;
705 } 705 }
706 } 706 }
707 707
708 /* Set this up before setting mod->state */ 708 /* Set this up before setting mod->state */
709 mod->waiter = current; 709 mod->waiter = current;
710 710
711 /* Stop the machine so refcounts can't move and disable module. */ 711 /* Stop the machine so refcounts can't move and disable module. */
712 ret = try_stop_module(mod, flags, &forced); 712 ret = try_stop_module(mod, flags, &forced);
713 if (ret != 0) 713 if (ret != 0)
714 goto out; 714 goto out;
715 715
716 /* Never wait if forced. */ 716 /* Never wait if forced. */
717 if (!forced && module_refcount(mod) != 0) 717 if (!forced && module_refcount(mod) != 0)
718 wait_for_zero_refcount(mod); 718 wait_for_zero_refcount(mod);
719 719
720 /* Final destruction now noone is using it. */ 720 /* Final destruction now noone is using it. */
721 if (mod->exit != NULL) { 721 if (mod->exit != NULL) {
722 mutex_unlock(&module_mutex); 722 mutex_unlock(&module_mutex);
723 mod->exit(); 723 mod->exit();
724 mutex_lock(&module_mutex); 724 mutex_lock(&module_mutex);
725 } 725 }
726 free_module(mod); 726 free_module(mod);
727 727
728 out: 728 out:
729 mutex_unlock(&module_mutex); 729 mutex_unlock(&module_mutex);
730 return ret; 730 return ret;
731 } 731 }
732 732
733 static void print_unload_info(struct seq_file *m, struct module *mod) 733 static void print_unload_info(struct seq_file *m, struct module *mod)
734 { 734 {
735 struct module_use *use; 735 struct module_use *use;
736 int printed_something = 0; 736 int printed_something = 0;
737 737
738 seq_printf(m, " %u ", module_refcount(mod)); 738 seq_printf(m, " %u ", module_refcount(mod));
739 739
740 /* Always include a trailing , so userspace can differentiate 740 /* Always include a trailing , so userspace can differentiate
741 between this and the old multi-field proc format. */ 741 between this and the old multi-field proc format. */
742 list_for_each_entry(use, &mod->modules_which_use_me, list) { 742 list_for_each_entry(use, &mod->modules_which_use_me, list) {
743 printed_something = 1; 743 printed_something = 1;
744 seq_printf(m, "%s,", use->module_which_uses->name); 744 seq_printf(m, "%s,", use->module_which_uses->name);
745 } 745 }
746 746
747 if (mod->unsafe) { 747 if (mod->unsafe) {
748 printed_something = 1; 748 printed_something = 1;
749 seq_printf(m, "[unsafe],"); 749 seq_printf(m, "[unsafe],");
750 } 750 }
751 751
752 if (mod->init != NULL && mod->exit == NULL) { 752 if (mod->init != NULL && mod->exit == NULL) {
753 printed_something = 1; 753 printed_something = 1;
754 seq_printf(m, "[permanent],"); 754 seq_printf(m, "[permanent],");
755 } 755 }
756 756
757 if (!printed_something) 757 if (!printed_something)
758 seq_printf(m, "-"); 758 seq_printf(m, "-");
759 } 759 }
760 760
761 void __symbol_put(const char *symbol) 761 void __symbol_put(const char *symbol)
762 { 762 {
763 struct module *owner; 763 struct module *owner;
764 unsigned long flags; 764 unsigned long flags;
765 const unsigned long *crc; 765 const unsigned long *crc;
766 766
767 spin_lock_irqsave(&modlist_lock, flags); 767 spin_lock_irqsave(&modlist_lock, flags);
768 if (!__find_symbol(symbol, &owner, &crc, 1)) 768 if (!__find_symbol(symbol, &owner, &crc, 1))
769 BUG(); 769 BUG();
770 module_put(owner); 770 module_put(owner);
771 spin_unlock_irqrestore(&modlist_lock, flags); 771 spin_unlock_irqrestore(&modlist_lock, flags);
772 } 772 }
773 EXPORT_SYMBOL(__symbol_put); 773 EXPORT_SYMBOL(__symbol_put);
774 774
775 void symbol_put_addr(void *addr) 775 void symbol_put_addr(void *addr)
776 { 776 {
777 struct module *modaddr; 777 struct module *modaddr;
778 778
779 if (core_kernel_text((unsigned long)addr)) 779 if (core_kernel_text((unsigned long)addr))
780 return; 780 return;
781 781
782 if (!(modaddr = module_text_address((unsigned long)addr))) 782 if (!(modaddr = module_text_address((unsigned long)addr)))
783 BUG(); 783 BUG();
784 module_put(modaddr); 784 module_put(modaddr);
785 } 785 }
786 EXPORT_SYMBOL_GPL(symbol_put_addr); 786 EXPORT_SYMBOL_GPL(symbol_put_addr);
787 787
788 static ssize_t show_refcnt(struct module_attribute *mattr, 788 static ssize_t show_refcnt(struct module_attribute *mattr,
789 struct module *mod, char *buffer) 789 struct module *mod, char *buffer)
790 { 790 {
791 /* sysfs holds a reference */ 791 /* sysfs holds a reference */
792 return sprintf(buffer, "%u\n", module_refcount(mod)-1); 792 return sprintf(buffer, "%u\n", module_refcount(mod)-1);
793 } 793 }
794 794
795 static struct module_attribute refcnt = { 795 static struct module_attribute refcnt = {
796 .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, 796 .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE },
797 .show = show_refcnt, 797 .show = show_refcnt,
798 }; 798 };
799 799
800 void module_put(struct module *module) 800 void module_put(struct module *module)
801 { 801 {
802 if (module) { 802 if (module) {
803 unsigned int cpu = get_cpu(); 803 unsigned int cpu = get_cpu();
804 local_dec(&module->ref[cpu].count); 804 local_dec(&module->ref[cpu].count);
805 /* Maybe they're waiting for us to drop reference? */ 805 /* Maybe they're waiting for us to drop reference? */
806 if (unlikely(!module_is_live(module))) 806 if (unlikely(!module_is_live(module)))
807 wake_up_process(module->waiter); 807 wake_up_process(module->waiter);
808 put_cpu(); 808 put_cpu();
809 } 809 }
810 } 810 }
811 EXPORT_SYMBOL(module_put); 811 EXPORT_SYMBOL(module_put);
812 812
813 #else /* !CONFIG_MODULE_UNLOAD */ 813 #else /* !CONFIG_MODULE_UNLOAD */
814 static void print_unload_info(struct seq_file *m, struct module *mod) 814 static void print_unload_info(struct seq_file *m, struct module *mod)
815 { 815 {
816 /* We don't know the usage count, or what modules are using. */ 816 /* We don't know the usage count, or what modules are using. */
817 seq_printf(m, " - -"); 817 seq_printf(m, " - -");
818 } 818 }
819 819
820 static inline void module_unload_free(struct module *mod) 820 static inline void module_unload_free(struct module *mod)
821 { 821 {
822 } 822 }
823 823
824 static inline int use_module(struct module *a, struct module *b) 824 static inline int use_module(struct module *a, struct module *b)
825 { 825 {
826 return strong_try_module_get(b); 826 return strong_try_module_get(b);
827 } 827 }
828 828
829 static inline void module_unload_init(struct module *mod) 829 static inline void module_unload_init(struct module *mod)
830 { 830 {
831 } 831 }
832 #endif /* CONFIG_MODULE_UNLOAD */ 832 #endif /* CONFIG_MODULE_UNLOAD */
833 833
834 static ssize_t show_initstate(struct module_attribute *mattr, 834 static ssize_t show_initstate(struct module_attribute *mattr,
835 struct module *mod, char *buffer) 835 struct module *mod, char *buffer)
836 { 836 {
837 const char *state = "unknown"; 837 const char *state = "unknown";
838 838
839 switch (mod->state) { 839 switch (mod->state) {
840 case MODULE_STATE_LIVE: 840 case MODULE_STATE_LIVE:
841 state = "live"; 841 state = "live";
842 break; 842 break;
843 case MODULE_STATE_COMING: 843 case MODULE_STATE_COMING:
844 state = "coming"; 844 state = "coming";
845 break; 845 break;
846 case MODULE_STATE_GOING: 846 case MODULE_STATE_GOING:
847 state = "going"; 847 state = "going";
848 break; 848 break;
849 } 849 }
850 return sprintf(buffer, "%s\n", state); 850 return sprintf(buffer, "%s\n", state);
851 } 851 }
852 852
853 static struct module_attribute initstate = { 853 static struct module_attribute initstate = {
854 .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, 854 .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE },
855 .show = show_initstate, 855 .show = show_initstate,
856 }; 856 };
857 857
858 static struct module_attribute *modinfo_attrs[] = { 858 static struct module_attribute *modinfo_attrs[] = {
859 &modinfo_version, 859 &modinfo_version,
860 &modinfo_srcversion, 860 &modinfo_srcversion,
861 &initstate, 861 &initstate,
862 #ifdef CONFIG_MODULE_UNLOAD 862 #ifdef CONFIG_MODULE_UNLOAD
863 &refcnt, 863 &refcnt,
864 #endif 864 #endif
865 NULL, 865 NULL,
866 }; 866 };
867 867
868 static const char vermagic[] = VERMAGIC_STRING; 868 static const char vermagic[] = VERMAGIC_STRING;
869 869
870 #ifdef CONFIG_MODVERSIONS 870 #ifdef CONFIG_MODVERSIONS
871 static int check_version(Elf_Shdr *sechdrs, 871 static int check_version(Elf_Shdr *sechdrs,
872 unsigned int versindex, 872 unsigned int versindex,
873 const char *symname, 873 const char *symname,
874 struct module *mod, 874 struct module *mod,
875 const unsigned long *crc) 875 const unsigned long *crc)
876 { 876 {
877 unsigned int i, num_versions; 877 unsigned int i, num_versions;
878 struct modversion_info *versions; 878 struct modversion_info *versions;
879 879
880 /* Exporting module didn't supply crcs? OK, we're already tainted. */ 880 /* Exporting module didn't supply crcs? OK, we're already tainted. */
881 if (!crc) 881 if (!crc)
882 return 1; 882 return 1;
883 883
884 versions = (void *) sechdrs[versindex].sh_addr; 884 versions = (void *) sechdrs[versindex].sh_addr;
885 num_versions = sechdrs[versindex].sh_size 885 num_versions = sechdrs[versindex].sh_size
886 / sizeof(struct modversion_info); 886 / sizeof(struct modversion_info);
887 887
888 for (i = 0; i < num_versions; i++) { 888 for (i = 0; i < num_versions; i++) {
889 if (strcmp(versions[i].name, symname) != 0) 889 if (strcmp(versions[i].name, symname) != 0)
890 continue; 890 continue;
891 891
892 if (versions[i].crc == *crc) 892 if (versions[i].crc == *crc)
893 return 1; 893 return 1;
894 printk("%s: disagrees about version of symbol %s\n", 894 printk("%s: disagrees about version of symbol %s\n",
895 mod->name, symname); 895 mod->name, symname);
896 DEBUGP("Found checksum %lX vs module %lX\n", 896 DEBUGP("Found checksum %lX vs module %lX\n",
897 *crc, versions[i].crc); 897 *crc, versions[i].crc);
898 return 0; 898 return 0;
899 } 899 }
900 /* Not in module's version table. OK, but that taints the kernel. */ 900 /* Not in module's version table. OK, but that taints the kernel. */
901 if (!(tainted & TAINT_FORCED_MODULE)) 901 if (!(tainted & TAINT_FORCED_MODULE))
902 printk("%s: no version for \"%s\" found: kernel tainted.\n", 902 printk("%s: no version for \"%s\" found: kernel tainted.\n",
903 mod->name, symname); 903 mod->name, symname);
904 add_taint_module(mod, TAINT_FORCED_MODULE); 904 add_taint_module(mod, TAINT_FORCED_MODULE);
905 return 1; 905 return 1;
906 } 906 }
907 907
908 static inline int check_modstruct_version(Elf_Shdr *sechdrs, 908 static inline int check_modstruct_version(Elf_Shdr *sechdrs,
909 unsigned int versindex, 909 unsigned int versindex,
910 struct module *mod) 910 struct module *mod)
911 { 911 {
912 const unsigned long *crc; 912 const unsigned long *crc;
913 struct module *owner; 913 struct module *owner;
914 914
915 if (!__find_symbol("struct_module", &owner, &crc, 1)) 915 if (!__find_symbol("struct_module", &owner, &crc, 1))
916 BUG(); 916 BUG();
917 return check_version(sechdrs, versindex, "struct_module", mod, 917 return check_version(sechdrs, versindex, "struct_module", mod,
918 crc); 918 crc);
919 } 919 }
920 920
921 /* First part is kernel version, which we ignore. */ 921 /* First part is kernel version, which we ignore. */
922 static inline int same_magic(const char *amagic, const char *bmagic) 922 static inline int same_magic(const char *amagic, const char *bmagic)
923 { 923 {
924 amagic += strcspn(amagic, " "); 924 amagic += strcspn(amagic, " ");
925 bmagic += strcspn(bmagic, " "); 925 bmagic += strcspn(bmagic, " ");
926 return strcmp(amagic, bmagic) == 0; 926 return strcmp(amagic, bmagic) == 0;
927 } 927 }
928 #else 928 #else
929 static inline int check_version(Elf_Shdr *sechdrs, 929 static inline int check_version(Elf_Shdr *sechdrs,
930 unsigned int versindex, 930 unsigned int versindex,
931 const char *symname, 931 const char *symname,
932 struct module *mod, 932 struct module *mod,
933 const unsigned long *crc) 933 const unsigned long *crc)
934 { 934 {
935 return 1; 935 return 1;
936 } 936 }
937 937
938 static inline int check_modstruct_version(Elf_Shdr *sechdrs, 938 static inline int check_modstruct_version(Elf_Shdr *sechdrs,
939 unsigned int versindex, 939 unsigned int versindex,
940 struct module *mod) 940 struct module *mod)
941 { 941 {
942 return 1; 942 return 1;
943 } 943 }
944 944
945 static inline int same_magic(const char *amagic, const char *bmagic) 945 static inline int same_magic(const char *amagic, const char *bmagic)
946 { 946 {
947 return strcmp(amagic, bmagic) == 0; 947 return strcmp(amagic, bmagic) == 0;
948 } 948 }
949 #endif /* CONFIG_MODVERSIONS */ 949 #endif /* CONFIG_MODVERSIONS */
950 950
951 /* Resolve a symbol for this module. I.e. if we find one, record usage. 951 /* Resolve a symbol for this module. I.e. if we find one, record usage.
952 Must be holding module_mutex. */ 952 Must be holding module_mutex. */
953 static unsigned long resolve_symbol(Elf_Shdr *sechdrs, 953 static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
954 unsigned int versindex, 954 unsigned int versindex,
955 const char *name, 955 const char *name,
956 struct module *mod) 956 struct module *mod)
957 { 957 {
958 struct module *owner; 958 struct module *owner;
959 unsigned long ret; 959 unsigned long ret;
960 const unsigned long *crc; 960 const unsigned long *crc;
961 961
962 ret = __find_symbol(name, &owner, &crc, 962 ret = __find_symbol(name, &owner, &crc,
963 !(mod->taints & TAINT_PROPRIETARY_MODULE)); 963 !(mod->taints & TAINT_PROPRIETARY_MODULE));
964 if (ret) { 964 if (ret) {
965 /* use_module can fail due to OOM, or module unloading */ 965 /* use_module can fail due to OOM, or module unloading */
966 if (!check_version(sechdrs, versindex, name, mod, crc) || 966 if (!check_version(sechdrs, versindex, name, mod, crc) ||
967 !use_module(mod, owner)) 967 !use_module(mod, owner))
968 ret = 0; 968 ret = 0;
969 } 969 }
970 return ret; 970 return ret;
971 } 971 }
972 972
973 973
974 /* 974 /*
975 * /sys/module/foo/sections stuff 975 * /sys/module/foo/sections stuff
976 * J. Corbet <corbet@lwn.net> 976 * J. Corbet <corbet@lwn.net>
977 */ 977 */
978 #ifdef CONFIG_KALLSYMS 978 #ifdef CONFIG_KALLSYMS
979 static ssize_t module_sect_show(struct module_attribute *mattr, 979 static ssize_t module_sect_show(struct module_attribute *mattr,
980 struct module *mod, char *buf) 980 struct module *mod, char *buf)
981 { 981 {
982 struct module_sect_attr *sattr = 982 struct module_sect_attr *sattr =
983 container_of(mattr, struct module_sect_attr, mattr); 983 container_of(mattr, struct module_sect_attr, mattr);
984 return sprintf(buf, "0x%lx\n", sattr->address); 984 return sprintf(buf, "0x%lx\n", sattr->address);
985 } 985 }
986 986
987 static void free_sect_attrs(struct module_sect_attrs *sect_attrs) 987 static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
988 { 988 {
989 int section; 989 int section;
990 990
991 for (section = 0; section < sect_attrs->nsections; section++) 991 for (section = 0; section < sect_attrs->nsections; section++)
992 kfree(sect_attrs->attrs[section].name); 992 kfree(sect_attrs->attrs[section].name);
993 kfree(sect_attrs); 993 kfree(sect_attrs);
994 } 994 }
995 995
996 static void add_sect_attrs(struct module *mod, unsigned int nsect, 996 static void add_sect_attrs(struct module *mod, unsigned int nsect,
997 char *secstrings, Elf_Shdr *sechdrs) 997 char *secstrings, Elf_Shdr *sechdrs)
998 { 998 {
999 unsigned int nloaded = 0, i, size[2]; 999 unsigned int nloaded = 0, i, size[2];
1000 struct module_sect_attrs *sect_attrs; 1000 struct module_sect_attrs *sect_attrs;
1001 struct module_sect_attr *sattr; 1001 struct module_sect_attr *sattr;
1002 struct attribute **gattr; 1002 struct attribute **gattr;
1003 1003
1004 /* Count loaded sections and allocate structures */ 1004 /* Count loaded sections and allocate structures */
1005 for (i = 0; i < nsect; i++) 1005 for (i = 0; i < nsect; i++)
1006 if (sechdrs[i].sh_flags & SHF_ALLOC) 1006 if (sechdrs[i].sh_flags & SHF_ALLOC)
1007 nloaded++; 1007 nloaded++;
1008 size[0] = ALIGN(sizeof(*sect_attrs) 1008 size[0] = ALIGN(sizeof(*sect_attrs)
1009 + nloaded * sizeof(sect_attrs->attrs[0]), 1009 + nloaded * sizeof(sect_attrs->attrs[0]),
1010 sizeof(sect_attrs->grp.attrs[0])); 1010 sizeof(sect_attrs->grp.attrs[0]));
1011 size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); 1011 size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]);
1012 sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); 1012 sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL);
1013 if (sect_attrs == NULL) 1013 if (sect_attrs == NULL)
1014 return; 1014 return;
1015 1015
1016 /* Setup section attributes. */ 1016 /* Setup section attributes. */
1017 sect_attrs->grp.name = "sections"; 1017 sect_attrs->grp.name = "sections";
1018 sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; 1018 sect_attrs->grp.attrs = (void *)sect_attrs + size[0];
1019 1019
1020 sect_attrs->nsections = 0; 1020 sect_attrs->nsections = 0;
1021 sattr = &sect_attrs->attrs[0]; 1021 sattr = &sect_attrs->attrs[0];
1022 gattr = &sect_attrs->grp.attrs[0]; 1022 gattr = &sect_attrs->grp.attrs[0];
1023 for (i = 0; i < nsect; i++) { 1023 for (i = 0; i < nsect; i++) {
1024 if (! (sechdrs[i].sh_flags & SHF_ALLOC)) 1024 if (! (sechdrs[i].sh_flags & SHF_ALLOC))
1025 continue; 1025 continue;
1026 sattr->address = sechdrs[i].sh_addr; 1026 sattr->address = sechdrs[i].sh_addr;
1027 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, 1027 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
1028 GFP_KERNEL); 1028 GFP_KERNEL);
1029 if (sattr->name == NULL) 1029 if (sattr->name == NULL)
1030 goto out; 1030 goto out;
1031 sect_attrs->nsections++; 1031 sect_attrs->nsections++;
1032 sattr->mattr.show = module_sect_show; 1032 sattr->mattr.show = module_sect_show;
1033 sattr->mattr.store = NULL; 1033 sattr->mattr.store = NULL;
1034 sattr->mattr.attr.name = sattr->name; 1034 sattr->mattr.attr.name = sattr->name;
1035 sattr->mattr.attr.owner = mod; 1035 sattr->mattr.attr.owner = mod;
1036 sattr->mattr.attr.mode = S_IRUGO; 1036 sattr->mattr.attr.mode = S_IRUGO;
1037 *(gattr++) = &(sattr++)->mattr.attr; 1037 *(gattr++) = &(sattr++)->mattr.attr;
1038 } 1038 }
1039 *gattr = NULL; 1039 *gattr = NULL;
1040 1040
1041 if (sysfs_create_group(&mod->mkobj.kobj, &sect_attrs->grp)) 1041 if (sysfs_create_group(&mod->mkobj.kobj, &sect_attrs->grp))
1042 goto out; 1042 goto out;
1043 1043
1044 mod->sect_attrs = sect_attrs; 1044 mod->sect_attrs = sect_attrs;
1045 return; 1045 return;
1046 out: 1046 out:
1047 free_sect_attrs(sect_attrs); 1047 free_sect_attrs(sect_attrs);
1048 } 1048 }
1049 1049
1050 static void remove_sect_attrs(struct module *mod) 1050 static void remove_sect_attrs(struct module *mod)
1051 { 1051 {
1052 if (mod->sect_attrs) { 1052 if (mod->sect_attrs) {
1053 sysfs_remove_group(&mod->mkobj.kobj, 1053 sysfs_remove_group(&mod->mkobj.kobj,
1054 &mod->sect_attrs->grp); 1054 &mod->sect_attrs->grp);
1055 /* We are positive that no one is using any sect attrs 1055 /* We are positive that no one is using any sect attrs
1056 * at this point. Deallocate immediately. */ 1056 * at this point. Deallocate immediately. */
1057 free_sect_attrs(mod->sect_attrs); 1057 free_sect_attrs(mod->sect_attrs);
1058 mod->sect_attrs = NULL; 1058 mod->sect_attrs = NULL;
1059 } 1059 }
1060 } 1060 }
1061 1061
1062 #else 1062 #else
1063 1063
1064 static inline void add_sect_attrs(struct module *mod, unsigned int nsect, 1064 static inline void add_sect_attrs(struct module *mod, unsigned int nsect,
1065 char *sectstrings, Elf_Shdr *sechdrs) 1065 char *sectstrings, Elf_Shdr *sechdrs)
1066 { 1066 {
1067 } 1067 }
1068 1068
1069 static inline void remove_sect_attrs(struct module *mod) 1069 static inline void remove_sect_attrs(struct module *mod)
1070 { 1070 {
1071 } 1071 }
1072 #endif /* CONFIG_KALLSYMS */ 1072 #endif /* CONFIG_KALLSYMS */
1073 1073
1074 #ifdef CONFIG_SYSFS 1074 #ifdef CONFIG_SYSFS
1075 int module_add_modinfo_attrs(struct module *mod) 1075 int module_add_modinfo_attrs(struct module *mod)
1076 { 1076 {
1077 struct module_attribute *attr; 1077 struct module_attribute *attr;
1078 struct module_attribute *temp_attr; 1078 struct module_attribute *temp_attr;
1079 int error = 0; 1079 int error = 0;
1080 int i; 1080 int i;
1081 1081
1082 mod->modinfo_attrs = kzalloc((sizeof(struct module_attribute) * 1082 mod->modinfo_attrs = kzalloc((sizeof(struct module_attribute) *
1083 (ARRAY_SIZE(modinfo_attrs) + 1)), 1083 (ARRAY_SIZE(modinfo_attrs) + 1)),
1084 GFP_KERNEL); 1084 GFP_KERNEL);
1085 if (!mod->modinfo_attrs) 1085 if (!mod->modinfo_attrs)
1086 return -ENOMEM; 1086 return -ENOMEM;
1087 1087
1088 temp_attr = mod->modinfo_attrs; 1088 temp_attr = mod->modinfo_attrs;
1089 for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) { 1089 for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) {
1090 if (!attr->test || 1090 if (!attr->test ||
1091 (attr->test && attr->test(mod))) { 1091 (attr->test && attr->test(mod))) {
1092 memcpy(temp_attr, attr, sizeof(*temp_attr)); 1092 memcpy(temp_attr, attr, sizeof(*temp_attr));
1093 temp_attr->attr.owner = mod; 1093 temp_attr->attr.owner = mod;
1094 error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); 1094 error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
1095 ++temp_attr; 1095 ++temp_attr;
1096 } 1096 }
1097 } 1097 }
1098 return error; 1098 return error;
1099 } 1099 }
1100 1100
1101 void module_remove_modinfo_attrs(struct module *mod) 1101 void module_remove_modinfo_attrs(struct module *mod)
1102 { 1102 {
1103 struct module_attribute *attr; 1103 struct module_attribute *attr;
1104 int i; 1104 int i;
1105 1105
1106 for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) { 1106 for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) {
1107 /* pick a field to test for end of list */ 1107 /* pick a field to test for end of list */
1108 if (!attr->attr.name) 1108 if (!attr->attr.name)
1109 break; 1109 break;
1110 sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); 1110 sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
1111 if (attr->free) 1111 if (attr->free)
1112 attr->free(mod); 1112 attr->free(mod);
1113 } 1113 }
1114 kfree(mod->modinfo_attrs); 1114 kfree(mod->modinfo_attrs);
1115 } 1115 }
1116 #endif 1116 #endif
1117 1117
1118 #ifdef CONFIG_SYSFS 1118 #ifdef CONFIG_SYSFS
1119 int mod_sysfs_init(struct module *mod) 1119 int mod_sysfs_init(struct module *mod)
1120 { 1120 {
1121 int err; 1121 int err;
1122 1122
1123 if (!module_sysfs_initialized) { 1123 if (!module_sysfs_initialized) {
1124 printk(KERN_ERR "%s: module sysfs not initialized\n", 1124 printk(KERN_ERR "%s: module sysfs not initialized\n",
1125 mod->name); 1125 mod->name);
1126 err = -EINVAL; 1126 err = -EINVAL;
1127 goto out; 1127 goto out;
1128 } 1128 }
1129 memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); 1129 memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
1130 err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); 1130 err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name);
1131 if (err) 1131 if (err)
1132 goto out; 1132 goto out;
1133 kobj_set_kset_s(&mod->mkobj, module_subsys); 1133 kobj_set_kset_s(&mod->mkobj, module_subsys);
1134 mod->mkobj.mod = mod; 1134 mod->mkobj.mod = mod;
1135 1135
1136 kobject_init(&mod->mkobj.kobj); 1136 kobject_init(&mod->mkobj.kobj);
1137 1137
1138 out: 1138 out:
1139 return err; 1139 return err;
1140 } 1140 }
1141 1141
1142 int mod_sysfs_setup(struct module *mod, 1142 int mod_sysfs_setup(struct module *mod,
1143 struct kernel_param *kparam, 1143 struct kernel_param *kparam,
1144 unsigned int num_params) 1144 unsigned int num_params)
1145 { 1145 {
1146 int err; 1146 int err;
1147 1147
1148 /* delay uevent until full sysfs population */ 1148 /* delay uevent until full sysfs population */
1149 err = kobject_add(&mod->mkobj.kobj); 1149 err = kobject_add(&mod->mkobj.kobj);
1150 if (err) 1150 if (err)
1151 goto out; 1151 goto out;
1152 1152
1153 mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders"); 1153 mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders");
1154 if (!mod->holders_dir) { 1154 if (!mod->holders_dir) {
1155 err = -ENOMEM; 1155 err = -ENOMEM;
1156 goto out_unreg; 1156 goto out_unreg;
1157 } 1157 }
1158 1158
1159 err = module_param_sysfs_setup(mod, kparam, num_params); 1159 err = module_param_sysfs_setup(mod, kparam, num_params);
1160 if (err) 1160 if (err)
1161 goto out_unreg_holders; 1161 goto out_unreg_holders;
1162 1162
1163 err = module_add_modinfo_attrs(mod); 1163 err = module_add_modinfo_attrs(mod);
1164 if (err) 1164 if (err)
1165 goto out_unreg_param; 1165 goto out_unreg_param;
1166 1166
1167 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); 1167 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
1168 return 0; 1168 return 0;
1169 1169
1170 out_unreg_param: 1170 out_unreg_param:
1171 module_param_sysfs_remove(mod); 1171 module_param_sysfs_remove(mod);
1172 out_unreg_holders: 1172 out_unreg_holders:
1173 kobject_unregister(mod->holders_dir); 1173 kobject_unregister(mod->holders_dir);
1174 out_unreg: 1174 out_unreg:
1175 kobject_del(&mod->mkobj.kobj); 1175 kobject_del(&mod->mkobj.kobj);
1176 kobject_put(&mod->mkobj.kobj); 1176 kobject_put(&mod->mkobj.kobj);
1177 out: 1177 out:
1178 return err; 1178 return err;
1179 } 1179 }
1180 #endif 1180 #endif
1181 1181
1182 static void mod_kobject_remove(struct module *mod) 1182 static void mod_kobject_remove(struct module *mod)
1183 { 1183 {
1184 module_remove_modinfo_attrs(mod); 1184 module_remove_modinfo_attrs(mod);
1185 module_param_sysfs_remove(mod); 1185 module_param_sysfs_remove(mod);
1186 kobject_unregister(mod->mkobj.drivers_dir); 1186 kobject_unregister(mod->mkobj.drivers_dir);
1187 kobject_unregister(mod->holders_dir); 1187 kobject_unregister(mod->holders_dir);
1188 kobject_unregister(&mod->mkobj.kobj); 1188 kobject_unregister(&mod->mkobj.kobj);
1189 } 1189 }
1190 1190
1191 /* 1191 /*
1192 * unlink the module with the whole machine is stopped with interrupts off 1192 * unlink the module with the whole machine is stopped with interrupts off
1193 * - this defends against kallsyms not taking locks 1193 * - this defends against kallsyms not taking locks
1194 */ 1194 */
1195 static int __unlink_module(void *_mod) 1195 static int __unlink_module(void *_mod)
1196 { 1196 {
1197 struct module *mod = _mod; 1197 struct module *mod = _mod;
1198 list_del(&mod->list); 1198 list_del(&mod->list);
1199 return 0; 1199 return 0;
1200 } 1200 }
1201 1201
1202 /* Free a module, remove from lists, etc (must hold module mutex). */ 1202 /* Free a module, remove from lists, etc (must hold module mutex). */
1203 static void free_module(struct module *mod) 1203 static void free_module(struct module *mod)
1204 { 1204 {
1205 /* Delete from various lists */ 1205 /* Delete from various lists */
1206 stop_machine_run(__unlink_module, mod, NR_CPUS); 1206 stop_machine_run(__unlink_module, mod, NR_CPUS);
1207 remove_sect_attrs(mod); 1207 remove_sect_attrs(mod);
1208 mod_kobject_remove(mod); 1208 mod_kobject_remove(mod);
1209 1209
1210 unwind_remove_table(mod->unwind_info, 0); 1210 unwind_remove_table(mod->unwind_info, 0);
1211 1211
1212 /* Arch-specific cleanup. */ 1212 /* Arch-specific cleanup. */
1213 module_arch_cleanup(mod); 1213 module_arch_cleanup(mod);
1214 1214
1215 /* Module unload stuff */ 1215 /* Module unload stuff */
1216 module_unload_free(mod); 1216 module_unload_free(mod);
1217 1217
1218 /* This may be NULL, but that's OK */ 1218 /* This may be NULL, but that's OK */
1219 module_free(mod, mod->module_init); 1219 module_free(mod, mod->module_init);
1220 kfree(mod->args); 1220 kfree(mod->args);
1221 if (mod->percpu) 1221 if (mod->percpu)
1222 percpu_modfree(mod->percpu); 1222 percpu_modfree(mod->percpu);
1223 1223
1224 /* Free lock-classes: */ 1224 /* Free lock-classes: */
1225 lockdep_free_key_range(mod->module_core, mod->core_size); 1225 lockdep_free_key_range(mod->module_core, mod->core_size);
1226 1226
1227 /* Finally, free the core (containing the module structure) */ 1227 /* Finally, free the core (containing the module structure) */
1228 module_free(mod, mod->module_core); 1228 module_free(mod, mod->module_core);
1229 } 1229 }
1230 1230
1231 void *__symbol_get(const char *symbol) 1231 void *__symbol_get(const char *symbol)
1232 { 1232 {
1233 struct module *owner; 1233 struct module *owner;
1234 unsigned long value, flags; 1234 unsigned long value, flags;
1235 const unsigned long *crc; 1235 const unsigned long *crc;
1236 1236
1237 spin_lock_irqsave(&modlist_lock, flags); 1237 spin_lock_irqsave(&modlist_lock, flags);
1238 value = __find_symbol(symbol, &owner, &crc, 1); 1238 value = __find_symbol(symbol, &owner, &crc, 1);
1239 if (value && !strong_try_module_get(owner)) 1239 if (value && !strong_try_module_get(owner))
1240 value = 0; 1240 value = 0;
1241 spin_unlock_irqrestore(&modlist_lock, flags); 1241 spin_unlock_irqrestore(&modlist_lock, flags);
1242 1242
1243 return (void *)value; 1243 return (void *)value;
1244 } 1244 }
1245 EXPORT_SYMBOL_GPL(__symbol_get); 1245 EXPORT_SYMBOL_GPL(__symbol_get);
1246 1246
1247 /* 1247 /*
1248 * Ensure that an exported symbol [global namespace] does not already exist 1248 * Ensure that an exported symbol [global namespace] does not already exist
1249 * in the Kernel or in some other modules exported symbol table. 1249 * in the Kernel or in some other modules exported symbol table.
1250 */ 1250 */
1251 static int verify_export_symbols(struct module *mod) 1251 static int verify_export_symbols(struct module *mod)
1252 { 1252 {
1253 const char *name = NULL; 1253 const char *name = NULL;
1254 unsigned long i, ret = 0; 1254 unsigned long i, ret = 0;
1255 struct module *owner; 1255 struct module *owner;
1256 const unsigned long *crc; 1256 const unsigned long *crc;
1257 1257
1258 for (i = 0; i < mod->num_syms; i++) 1258 for (i = 0; i < mod->num_syms; i++)
1259 if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) { 1259 if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) {
1260 name = mod->syms[i].name; 1260 name = mod->syms[i].name;
1261 ret = -ENOEXEC; 1261 ret = -ENOEXEC;
1262 goto dup; 1262 goto dup;
1263 } 1263 }
1264 1264
1265 for (i = 0; i < mod->num_gpl_syms; i++) 1265 for (i = 0; i < mod->num_gpl_syms; i++)
1266 if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) { 1266 if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) {
1267 name = mod->gpl_syms[i].name; 1267 name = mod->gpl_syms[i].name;
1268 ret = -ENOEXEC; 1268 ret = -ENOEXEC;
1269 goto dup; 1269 goto dup;
1270 } 1270 }
1271 1271
1272 dup: 1272 dup:
1273 if (ret) 1273 if (ret)
1274 printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n", 1274 printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n",
1275 mod->name, name, module_name(owner)); 1275 mod->name, name, module_name(owner));
1276 1276
1277 return ret; 1277 return ret;
1278 } 1278 }
1279 1279
1280 /* Change all symbols so that sh_value encodes the pointer directly. */ 1280 /* Change all symbols so that sh_value encodes the pointer directly. */
1281 static int simplify_symbols(Elf_Shdr *sechdrs, 1281 static int simplify_symbols(Elf_Shdr *sechdrs,
1282 unsigned int symindex, 1282 unsigned int symindex,
1283 const char *strtab, 1283 const char *strtab,
1284 unsigned int versindex, 1284 unsigned int versindex,
1285 unsigned int pcpuindex, 1285 unsigned int pcpuindex,
1286 struct module *mod) 1286 struct module *mod)
1287 { 1287 {
1288 Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; 1288 Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
1289 unsigned long secbase; 1289 unsigned long secbase;
1290 unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); 1290 unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
1291 int ret = 0; 1291 int ret = 0;
1292 1292
1293 for (i = 1; i < n; i++) { 1293 for (i = 1; i < n; i++) {
1294 switch (sym[i].st_shndx) { 1294 switch (sym[i].st_shndx) {
1295 case SHN_COMMON: 1295 case SHN_COMMON:
1296 /* We compiled with -fno-common. These are not 1296 /* We compiled with -fno-common. These are not
1297 supposed to happen. */ 1297 supposed to happen. */
1298 DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); 1298 DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name);
1299 printk("%s: please compile with -fno-common\n", 1299 printk("%s: please compile with -fno-common\n",
1300 mod->name); 1300 mod->name);
1301 ret = -ENOEXEC; 1301 ret = -ENOEXEC;
1302 break; 1302 break;
1303 1303
1304 case SHN_ABS: 1304 case SHN_ABS:
1305 /* Don't need to do anything */ 1305 /* Don't need to do anything */
1306 DEBUGP("Absolute symbol: 0x%08lx\n", 1306 DEBUGP("Absolute symbol: 0x%08lx\n",
1307 (long)sym[i].st_value); 1307 (long)sym[i].st_value);
1308 break; 1308 break;
1309 1309
1310 case SHN_UNDEF: 1310 case SHN_UNDEF:
1311 sym[i].st_value 1311 sym[i].st_value
1312 = resolve_symbol(sechdrs, versindex, 1312 = resolve_symbol(sechdrs, versindex,
1313 strtab + sym[i].st_name, mod); 1313 strtab + sym[i].st_name, mod);
1314 1314
1315 /* Ok if resolved. */ 1315 /* Ok if resolved. */
1316 if (sym[i].st_value != 0) 1316 if (sym[i].st_value != 0)
1317 break; 1317 break;
1318 /* Ok if weak. */ 1318 /* Ok if weak. */
1319 if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) 1319 if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
1320 break; 1320 break;
1321 1321
1322 printk(KERN_WARNING "%s: Unknown symbol %s\n", 1322 printk(KERN_WARNING "%s: Unknown symbol %s\n",
1323 mod->name, strtab + sym[i].st_name); 1323 mod->name, strtab + sym[i].st_name);
1324 ret = -ENOENT; 1324 ret = -ENOENT;
1325 break; 1325 break;
1326 1326
1327 default: 1327 default:
1328 /* Divert to percpu allocation if a percpu var. */ 1328 /* Divert to percpu allocation if a percpu var. */
1329 if (sym[i].st_shndx == pcpuindex) 1329 if (sym[i].st_shndx == pcpuindex)
1330 secbase = (unsigned long)mod->percpu; 1330 secbase = (unsigned long)mod->percpu;
1331 else 1331 else
1332 secbase = sechdrs[sym[i].st_shndx].sh_addr; 1332 secbase = sechdrs[sym[i].st_shndx].sh_addr;
1333 sym[i].st_value += secbase; 1333 sym[i].st_value += secbase;
1334 break; 1334 break;
1335 } 1335 }
1336 } 1336 }
1337 1337
1338 return ret; 1338 return ret;
1339 } 1339 }
1340 1340
1341 /* Update size with this section: return offset. */ 1341 /* Update size with this section: return offset. */
1342 static long get_offset(unsigned long *size, Elf_Shdr *sechdr) 1342 static long get_offset(unsigned long *size, Elf_Shdr *sechdr)
1343 { 1343 {
1344 long ret; 1344 long ret;
1345 1345
1346 ret = ALIGN(*size, sechdr->sh_addralign ?: 1); 1346 ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
1347 *size = ret + sechdr->sh_size; 1347 *size = ret + sechdr->sh_size;
1348 return ret; 1348 return ret;
1349 } 1349 }
1350 1350
1351 /* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld 1351 /* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld
1352 might -- code, read-only data, read-write data, small data. Tally 1352 might -- code, read-only data, read-write data, small data. Tally
1353 sizes, and place the offsets into sh_entsize fields: high bit means it 1353 sizes, and place the offsets into sh_entsize fields: high bit means it
1354 belongs in init. */ 1354 belongs in init. */
1355 static void layout_sections(struct module *mod, 1355 static void layout_sections(struct module *mod,
1356 const Elf_Ehdr *hdr, 1356 const Elf_Ehdr *hdr,
1357 Elf_Shdr *sechdrs, 1357 Elf_Shdr *sechdrs,
1358 const char *secstrings) 1358 const char *secstrings)
1359 { 1359 {
1360 static unsigned long const masks[][2] = { 1360 static unsigned long const masks[][2] = {
1361 /* NOTE: all executable code must be the first section 1361 /* NOTE: all executable code must be the first section
1362 * in this array; otherwise modify the text_size 1362 * in this array; otherwise modify the text_size
1363 * finder in the two loops below */ 1363 * finder in the two loops below */
1364 { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL }, 1364 { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
1365 { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL }, 1365 { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
1366 { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL }, 1366 { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
1367 { ARCH_SHF_SMALL | SHF_ALLOC, 0 } 1367 { ARCH_SHF_SMALL | SHF_ALLOC, 0 }
1368 }; 1368 };
1369 unsigned int m, i; 1369 unsigned int m, i;
1370 1370
1371 for (i = 0; i < hdr->e_shnum; i++) 1371 for (i = 0; i < hdr->e_shnum; i++)
1372 sechdrs[i].sh_entsize = ~0UL; 1372 sechdrs[i].sh_entsize = ~0UL;
1373 1373
1374 DEBUGP("Core section allocation order:\n"); 1374 DEBUGP("Core section allocation order:\n");
1375 for (m = 0; m < ARRAY_SIZE(masks); ++m) { 1375 for (m = 0; m < ARRAY_SIZE(masks); ++m) {
1376 for (i = 0; i < hdr->e_shnum; ++i) { 1376 for (i = 0; i < hdr->e_shnum; ++i) {
1377 Elf_Shdr *s = &sechdrs[i]; 1377 Elf_Shdr *s = &sechdrs[i];
1378 1378
1379 if ((s->sh_flags & masks[m][0]) != masks[m][0] 1379 if ((s->sh_flags & masks[m][0]) != masks[m][0]
1380 || (s->sh_flags & masks[m][1]) 1380 || (s->sh_flags & masks[m][1])
1381 || s->sh_entsize != ~0UL 1381 || s->sh_entsize != ~0UL
1382 || strncmp(secstrings + s->sh_name, 1382 || strncmp(secstrings + s->sh_name,
1383 ".init", 5) == 0) 1383 ".init", 5) == 0)
1384 continue; 1384 continue;
1385 s->sh_entsize = get_offset(&mod->core_size, s); 1385 s->sh_entsize = get_offset(&mod->core_size, s);
1386 DEBUGP("\t%s\n", secstrings + s->sh_name); 1386 DEBUGP("\t%s\n", secstrings + s->sh_name);
1387 } 1387 }
1388 if (m == 0) 1388 if (m == 0)
1389 mod->core_text_size = mod->core_size; 1389 mod->core_text_size = mod->core_size;
1390 } 1390 }
1391 1391
1392 DEBUGP("Init section allocation order:\n"); 1392 DEBUGP("Init section allocation order:\n");
1393 for (m = 0; m < ARRAY_SIZE(masks); ++m) { 1393 for (m = 0; m < ARRAY_SIZE(masks); ++m) {
1394 for (i = 0; i < hdr->e_shnum; ++i) { 1394 for (i = 0; i < hdr->e_shnum; ++i) {
1395 Elf_Shdr *s = &sechdrs[i]; 1395 Elf_Shdr *s = &sechdrs[i];
1396 1396
1397 if ((s->sh_flags & masks[m][0]) != masks[m][0] 1397 if ((s->sh_flags & masks[m][0]) != masks[m][0]
1398 || (s->sh_flags & masks[m][1]) 1398 || (s->sh_flags & masks[m][1])
1399 || s->sh_entsize != ~0UL 1399 || s->sh_entsize != ~0UL
1400 || strncmp(secstrings + s->sh_name, 1400 || strncmp(secstrings + s->sh_name,
1401 ".init", 5) != 0) 1401 ".init", 5) != 0)
1402 continue; 1402 continue;
1403 s->sh_entsize = (get_offset(&mod->init_size, s) 1403 s->sh_entsize = (get_offset(&mod->init_size, s)
1404 | INIT_OFFSET_MASK); 1404 | INIT_OFFSET_MASK);
1405 DEBUGP("\t%s\n", secstrings + s->sh_name); 1405 DEBUGP("\t%s\n", secstrings + s->sh_name);
1406 } 1406 }
1407 if (m == 0) 1407 if (m == 0)
1408 mod->init_text_size = mod->init_size; 1408 mod->init_text_size = mod->init_size;
1409 } 1409 }
1410 } 1410 }
1411 1411
1412 static void set_license(struct module *mod, const char *license) 1412 static void set_license(struct module *mod, const char *license)
1413 { 1413 {
1414 if (!license) 1414 if (!license)
1415 license = "unspecified"; 1415 license = "unspecified";
1416 1416
1417 if (!license_is_gpl_compatible(license)) { 1417 if (!license_is_gpl_compatible(license)) {
1418 if (!(tainted & TAINT_PROPRIETARY_MODULE)) 1418 if (!(tainted & TAINT_PROPRIETARY_MODULE))
1419 printk(KERN_WARNING "%s: module license '%s' taints " 1419 printk(KERN_WARNING "%s: module license '%s' taints "
1420 "kernel.\n", mod->name, license); 1420 "kernel.\n", mod->name, license);
1421 add_taint_module(mod, TAINT_PROPRIETARY_MODULE); 1421 add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
1422 } 1422 }
1423 } 1423 }
1424 1424
1425 /* Parse tag=value strings from .modinfo section */ 1425 /* Parse tag=value strings from .modinfo section */
1426 static char *next_string(char *string, unsigned long *secsize) 1426 static char *next_string(char *string, unsigned long *secsize)
1427 { 1427 {
1428 /* Skip non-zero chars */ 1428 /* Skip non-zero chars */
1429 while (string[0]) { 1429 while (string[0]) {
1430 string++; 1430 string++;
1431 if ((*secsize)-- <= 1) 1431 if ((*secsize)-- <= 1)
1432 return NULL; 1432 return NULL;
1433 } 1433 }
1434 1434
1435 /* Skip any zero padding. */ 1435 /* Skip any zero padding. */
1436 while (!string[0]) { 1436 while (!string[0]) {
1437 string++; 1437 string++;
1438 if ((*secsize)-- <= 1) 1438 if ((*secsize)-- <= 1)
1439 return NULL; 1439 return NULL;
1440 } 1440 }
1441 return string; 1441 return string;
1442 } 1442 }
1443 1443
1444 static char *get_modinfo(Elf_Shdr *sechdrs, 1444 static char *get_modinfo(Elf_Shdr *sechdrs,
1445 unsigned int info, 1445 unsigned int info,
1446 const char *tag) 1446 const char *tag)
1447 { 1447 {
1448 char *p; 1448 char *p;
1449 unsigned int taglen = strlen(tag); 1449 unsigned int taglen = strlen(tag);
1450 unsigned long size = sechdrs[info].sh_size; 1450 unsigned long size = sechdrs[info].sh_size;
1451 1451
1452 for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { 1452 for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) {
1453 if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') 1453 if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
1454 return p + taglen + 1; 1454 return p + taglen + 1;
1455 } 1455 }
1456 return NULL; 1456 return NULL;
1457 } 1457 }
1458 1458
1459 static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, 1459 static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
1460 unsigned int infoindex) 1460 unsigned int infoindex)
1461 { 1461 {
1462 struct module_attribute *attr; 1462 struct module_attribute *attr;
1463 int i; 1463 int i;
1464 1464
1465 for (i = 0; (attr = modinfo_attrs[i]); i++) { 1465 for (i = 0; (attr = modinfo_attrs[i]); i++) {
1466 if (attr->setup) 1466 if (attr->setup)
1467 attr->setup(mod, 1467 attr->setup(mod,
1468 get_modinfo(sechdrs, 1468 get_modinfo(sechdrs,
1469 infoindex, 1469 infoindex,
1470 attr->attr.name)); 1470 attr->attr.name));
1471 } 1471 }
1472 } 1472 }
1473 1473
1474 #ifdef CONFIG_KALLSYMS 1474 #ifdef CONFIG_KALLSYMS
1475 static int is_exported(const char *name, const struct module *mod) 1475 static int is_exported(const char *name, const struct module *mod)
1476 { 1476 {
1477 if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) 1477 if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab))
1478 return 1; 1478 return 1;
1479 else 1479 else
1480 if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) 1480 if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
1481 return 1; 1481 return 1;
1482 else 1482 else
1483 return 0; 1483 return 0;
1484 } 1484 }
1485 1485
1486 /* As per nm */ 1486 /* As per nm */
1487 static char elf_type(const Elf_Sym *sym, 1487 static char elf_type(const Elf_Sym *sym,
1488 Elf_Shdr *sechdrs, 1488 Elf_Shdr *sechdrs,
1489 const char *secstrings, 1489 const char *secstrings,
1490 struct module *mod) 1490 struct module *mod)
1491 { 1491 {
1492 if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { 1492 if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1493 if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) 1493 if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT)
1494 return 'v'; 1494 return 'v';
1495 else 1495 else
1496 return 'w'; 1496 return 'w';
1497 } 1497 }
1498 if (sym->st_shndx == SHN_UNDEF) 1498 if (sym->st_shndx == SHN_UNDEF)
1499 return 'U'; 1499 return 'U';
1500 if (sym->st_shndx == SHN_ABS) 1500 if (sym->st_shndx == SHN_ABS)
1501 return 'a'; 1501 return 'a';
1502 if (sym->st_shndx >= SHN_LORESERVE) 1502 if (sym->st_shndx >= SHN_LORESERVE)
1503 return '?'; 1503 return '?';
1504 if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR) 1504 if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR)
1505 return 't'; 1505 return 't';
1506 if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC 1506 if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC
1507 && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) { 1507 && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) {
1508 if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE)) 1508 if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE))
1509 return 'r'; 1509 return 'r';
1510 else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) 1510 else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL)
1511 return 'g'; 1511 return 'g';
1512 else 1512 else
1513 return 'd'; 1513 return 'd';
1514 } 1514 }
1515 if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { 1515 if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
1516 if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) 1516 if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL)
1517 return 's'; 1517 return 's';
1518 else 1518 else
1519 return 'b'; 1519 return 'b';
1520 } 1520 }
1521 if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name, 1521 if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name,
1522 ".debug", strlen(".debug")) == 0) 1522 ".debug", strlen(".debug")) == 0)
1523 return 'n'; 1523 return 'n';
1524 return '?'; 1524 return '?';
1525 } 1525 }
1526 1526
1527 static void add_kallsyms(struct module *mod, 1527 static void add_kallsyms(struct module *mod,
1528 Elf_Shdr *sechdrs, 1528 Elf_Shdr *sechdrs,
1529 unsigned int symindex, 1529 unsigned int symindex,
1530 unsigned int strindex, 1530 unsigned int strindex,
1531 const char *secstrings) 1531 const char *secstrings)
1532 { 1532 {
1533 unsigned int i; 1533 unsigned int i;
1534 1534
1535 mod->symtab = (void *)sechdrs[symindex].sh_addr; 1535 mod->symtab = (void *)sechdrs[symindex].sh_addr;
1536 mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); 1536 mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
1537 mod->strtab = (void *)sechdrs[strindex].sh_addr; 1537 mod->strtab = (void *)sechdrs[strindex].sh_addr;
1538 1538
1539 /* Set types up while we still have access to sections. */ 1539 /* Set types up while we still have access to sections. */
1540 for (i = 0; i < mod->num_symtab; i++) 1540 for (i = 0; i < mod->num_symtab; i++)
1541 mod->symtab[i].st_info 1541 mod->symtab[i].st_info
1542 = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); 1542 = elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
1543 } 1543 }
1544 #else 1544 #else
1545 static inline void add_kallsyms(struct module *mod, 1545 static inline void add_kallsyms(struct module *mod,
1546 Elf_Shdr *sechdrs, 1546 Elf_Shdr *sechdrs,
1547 unsigned int symindex, 1547 unsigned int symindex,
1548 unsigned int strindex, 1548 unsigned int strindex,
1549 const char *secstrings) 1549 const char *secstrings)
1550 { 1550 {
1551 } 1551 }
1552 #endif /* CONFIG_KALLSYMS */ 1552 #endif /* CONFIG_KALLSYMS */
1553 1553
1554 /* Allocate and load the module: note that size of section 0 is always 1554 /* Allocate and load the module: note that size of section 0 is always
1555 zero, and we rely on this for optional sections. */ 1555 zero, and we rely on this for optional sections. */
1556 static struct module *load_module(void __user *umod, 1556 static struct module *load_module(void __user *umod,
1557 unsigned long len, 1557 unsigned long len,
1558 const char __user *uargs) 1558 const char __user *uargs)
1559 { 1559 {
1560 Elf_Ehdr *hdr; 1560 Elf_Ehdr *hdr;
1561 Elf_Shdr *sechdrs; 1561 Elf_Shdr *sechdrs;
1562 char *secstrings, *args, *modmagic, *strtab = NULL; 1562 char *secstrings, *args, *modmagic, *strtab = NULL;
1563 unsigned int i; 1563 unsigned int i;
1564 unsigned int symindex = 0; 1564 unsigned int symindex = 0;
1565 unsigned int strindex = 0; 1565 unsigned int strindex = 0;
1566 unsigned int setupindex; 1566 unsigned int setupindex;
1567 unsigned int exindex; 1567 unsigned int exindex;
1568 unsigned int exportindex; 1568 unsigned int exportindex;
1569 unsigned int modindex; 1569 unsigned int modindex;
1570 unsigned int obsparmindex; 1570 unsigned int obsparmindex;
1571 unsigned int infoindex; 1571 unsigned int infoindex;
1572 unsigned int gplindex; 1572 unsigned int gplindex;
1573 unsigned int crcindex; 1573 unsigned int crcindex;
1574 unsigned int gplcrcindex; 1574 unsigned int gplcrcindex;
1575 unsigned int versindex; 1575 unsigned int versindex;
1576 unsigned int pcpuindex; 1576 unsigned int pcpuindex;
1577 unsigned int gplfutureindex; 1577 unsigned int gplfutureindex;
1578 unsigned int gplfuturecrcindex; 1578 unsigned int gplfuturecrcindex;
1579 unsigned int unwindex = 0; 1579 unsigned int unwindex = 0;
1580 unsigned int unusedindex; 1580 unsigned int unusedindex;
1581 unsigned int unusedcrcindex; 1581 unsigned int unusedcrcindex;
1582 unsigned int unusedgplindex; 1582 unsigned int unusedgplindex;
1583 unsigned int unusedgplcrcindex; 1583 unsigned int unusedgplcrcindex;
1584 struct module *mod; 1584 struct module *mod;
1585 long err = 0; 1585 long err = 0;
1586 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1586 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
1587 struct exception_table_entry *extable; 1587 struct exception_table_entry *extable;
1588 mm_segment_t old_fs; 1588 mm_segment_t old_fs;
1589 1589
1590 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", 1590 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
1591 umod, len, uargs); 1591 umod, len, uargs);
1592 if (len < sizeof(*hdr)) 1592 if (len < sizeof(*hdr))
1593 return ERR_PTR(-ENOEXEC); 1593 return ERR_PTR(-ENOEXEC);
1594 1594
1595 /* Suck in entire file: we'll want most of it. */ 1595 /* Suck in entire file: we'll want most of it. */
1596 /* vmalloc barfs on "unusual" numbers. Check here */ 1596 /* vmalloc barfs on "unusual" numbers. Check here */
1597 if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) 1597 if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
1598 return ERR_PTR(-ENOMEM); 1598 return ERR_PTR(-ENOMEM);
1599 if (copy_from_user(hdr, umod, len) != 0) { 1599 if (copy_from_user(hdr, umod, len) != 0) {
1600 err = -EFAULT; 1600 err = -EFAULT;
1601 goto free_hdr; 1601 goto free_hdr;
1602 } 1602 }
1603 1603
1604 /* Sanity checks against insmoding binaries or wrong arch, 1604 /* Sanity checks against insmoding binaries or wrong arch,
1605 weird elf version */ 1605 weird elf version */
1606 if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 1606 if (memcmp(hdr->e_ident, ELFMAG, 4) != 0
1607 || hdr->e_type != ET_REL 1607 || hdr->e_type != ET_REL
1608 || !elf_check_arch(hdr) 1608 || !elf_check_arch(hdr)
1609 || hdr->e_shentsize != sizeof(*sechdrs)) { 1609 || hdr->e_shentsize != sizeof(*sechdrs)) {
1610 err = -ENOEXEC; 1610 err = -ENOEXEC;
1611 goto free_hdr; 1611 goto free_hdr;
1612 } 1612 }
1613 1613
1614 if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) 1614 if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr))
1615 goto truncated; 1615 goto truncated;
1616 1616
1617 /* Convenience variables */ 1617 /* Convenience variables */
1618 sechdrs = (void *)hdr + hdr->e_shoff; 1618 sechdrs = (void *)hdr + hdr->e_shoff;
1619 secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 1619 secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1620 sechdrs[0].sh_addr = 0; 1620 sechdrs[0].sh_addr = 0;
1621 1621
1622 for (i = 1; i < hdr->e_shnum; i++) { 1622 for (i = 1; i < hdr->e_shnum; i++) {
1623 if (sechdrs[i].sh_type != SHT_NOBITS 1623 if (sechdrs[i].sh_type != SHT_NOBITS
1624 && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) 1624 && len < sechdrs[i].sh_offset + sechdrs[i].sh_size)
1625 goto truncated; 1625 goto truncated;
1626 1626
1627 /* Mark all sections sh_addr with their address in the 1627 /* Mark all sections sh_addr with their address in the
1628 temporary image. */ 1628 temporary image. */
1629 sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; 1629 sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset;
1630 1630
1631 /* Internal symbols and strings. */ 1631 /* Internal symbols and strings. */
1632 if (sechdrs[i].sh_type == SHT_SYMTAB) { 1632 if (sechdrs[i].sh_type == SHT_SYMTAB) {
1633 symindex = i; 1633 symindex = i;
1634 strindex = sechdrs[i].sh_link; 1634 strindex = sechdrs[i].sh_link;
1635 strtab = (char *)hdr + sechdrs[strindex].sh_offset; 1635 strtab = (char *)hdr + sechdrs[strindex].sh_offset;
1636 } 1636 }
1637 #ifndef CONFIG_MODULE_UNLOAD 1637 #ifndef CONFIG_MODULE_UNLOAD
1638 /* Don't load .exit sections */ 1638 /* Don't load .exit sections */
1639 if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0) 1639 if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0)
1640 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; 1640 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
1641 #endif 1641 #endif
1642 } 1642 }
1643 1643
1644 modindex = find_sec(hdr, sechdrs, secstrings, 1644 modindex = find_sec(hdr, sechdrs, secstrings,
1645 ".gnu.linkonce.this_module"); 1645 ".gnu.linkonce.this_module");
1646 if (!modindex) { 1646 if (!modindex) {
1647 printk(KERN_WARNING "No module found in object\n"); 1647 printk(KERN_WARNING "No module found in object\n");
1648 err = -ENOEXEC; 1648 err = -ENOEXEC;
1649 goto free_hdr; 1649 goto free_hdr;
1650 } 1650 }
1651 mod = (void *)sechdrs[modindex].sh_addr; 1651 mod = (void *)sechdrs[modindex].sh_addr;
1652 1652
1653 if (symindex == 0) { 1653 if (symindex == 0) {
1654 printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", 1654 printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
1655 mod->name); 1655 mod->name);
1656 err = -ENOEXEC; 1656 err = -ENOEXEC;
1657 goto free_hdr; 1657 goto free_hdr;
1658 } 1658 }
1659 1659
1660 /* Optional sections */ 1660 /* Optional sections */
1661 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); 1661 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
1662 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); 1662 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
1663 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); 1663 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
1664 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused"); 1664 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
1665 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl"); 1665 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
1666 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); 1666 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
1667 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); 1667 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
1668 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); 1668 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
1669 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); 1669 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
1670 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); 1670 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
1671 setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); 1671 setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
1672 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); 1672 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
1673 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); 1673 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
1674 versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); 1674 versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
1675 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); 1675 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
1676 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); 1676 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
1677 #ifdef ARCH_UNWIND_SECTION_NAME 1677 #ifdef ARCH_UNWIND_SECTION_NAME
1678 unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); 1678 unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
1679 #endif 1679 #endif
1680 1680
1681 /* Don't keep modinfo section */ 1681 /* Don't keep modinfo section */
1682 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 1682 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
1683 #ifdef CONFIG_KALLSYMS 1683 #ifdef CONFIG_KALLSYMS
1684 /* Keep symbol and string tables for decoding later. */ 1684 /* Keep symbol and string tables for decoding later. */
1685 sechdrs[symindex].sh_flags |= SHF_ALLOC; 1685 sechdrs[symindex].sh_flags |= SHF_ALLOC;
1686 sechdrs[strindex].sh_flags |= SHF_ALLOC; 1686 sechdrs[strindex].sh_flags |= SHF_ALLOC;
1687 #endif 1687 #endif
1688 if (unwindex) 1688 if (unwindex)
1689 sechdrs[unwindex].sh_flags |= SHF_ALLOC; 1689 sechdrs[unwindex].sh_flags |= SHF_ALLOC;
1690 1690
1691 /* Check module struct version now, before we try to use module. */ 1691 /* Check module struct version now, before we try to use module. */
1692 if (!check_modstruct_version(sechdrs, versindex, mod)) { 1692 if (!check_modstruct_version(sechdrs, versindex, mod)) {
1693 err = -ENOEXEC; 1693 err = -ENOEXEC;
1694 goto free_hdr; 1694 goto free_hdr;
1695 } 1695 }
1696 1696
1697 modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); 1697 modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
1698 /* This is allowed: modprobe --force will invalidate it. */ 1698 /* This is allowed: modprobe --force will invalidate it. */
1699 if (!modmagic) { 1699 if (!modmagic) {
1700 add_taint_module(mod, TAINT_FORCED_MODULE); 1700 add_taint_module(mod, TAINT_FORCED_MODULE);
1701 printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", 1701 printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
1702 mod->name); 1702 mod->name);
1703 } else if (!same_magic(modmagic, vermagic)) { 1703 } else if (!same_magic(modmagic, vermagic)) {
1704 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", 1704 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
1705 mod->name, modmagic, vermagic); 1705 mod->name, modmagic, vermagic);
1706 err = -ENOEXEC; 1706 err = -ENOEXEC;
1707 goto free_hdr; 1707 goto free_hdr;
1708 } 1708 }
1709 1709
1710 /* Now copy in args */ 1710 /* Now copy in args */
1711 args = strndup_user(uargs, ~0UL >> 1); 1711 args = strndup_user(uargs, ~0UL >> 1);
1712 if (IS_ERR(args)) { 1712 if (IS_ERR(args)) {
1713 err = PTR_ERR(args); 1713 err = PTR_ERR(args);
1714 goto free_hdr; 1714 goto free_hdr;
1715 } 1715 }
1716 1716
1717 if (find_module(mod->name)) { 1717 if (find_module(mod->name)) {
1718 err = -EEXIST; 1718 err = -EEXIST;
1719 goto free_mod; 1719 goto free_mod;
1720 } 1720 }
1721 1721
1722 mod->state = MODULE_STATE_COMING; 1722 mod->state = MODULE_STATE_COMING;
1723 1723
1724 /* Allow arches to frob section contents and sizes. */ 1724 /* Allow arches to frob section contents and sizes. */
1725 err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); 1725 err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod);
1726 if (err < 0) 1726 if (err < 0)
1727 goto free_mod; 1727 goto free_mod;
1728 1728
1729 if (pcpuindex) { 1729 if (pcpuindex) {
1730 /* We have a special allocation for this section. */ 1730 /* We have a special allocation for this section. */
1731 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, 1731 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
1732 sechdrs[pcpuindex].sh_addralign, 1732 sechdrs[pcpuindex].sh_addralign,
1733 mod->name); 1733 mod->name);
1734 if (!percpu) { 1734 if (!percpu) {
1735 err = -ENOMEM; 1735 err = -ENOMEM;
1736 goto free_mod; 1736 goto free_mod;
1737 } 1737 }
1738 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 1738 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
1739 mod->percpu = percpu; 1739 mod->percpu = percpu;
1740 } 1740 }
1741 1741
1742 /* Determine total sizes, and put offsets in sh_entsize. For now 1742 /* Determine total sizes, and put offsets in sh_entsize. For now
1743 this is done generically; there doesn't appear to be any 1743 this is done generically; there doesn't appear to be any
1744 special cases for the architectures. */ 1744 special cases for the architectures. */
1745 layout_sections(mod, hdr, sechdrs, secstrings); 1745 layout_sections(mod, hdr, sechdrs, secstrings);
1746 1746
1747 /* Do the allocs. */ 1747 /* Do the allocs. */
1748 ptr = module_alloc(mod->core_size); 1748 ptr = module_alloc(mod->core_size);
1749 if (!ptr) { 1749 if (!ptr) {
1750 err = -ENOMEM; 1750 err = -ENOMEM;
1751 goto free_percpu; 1751 goto free_percpu;
1752 } 1752 }
1753 memset(ptr, 0, mod->core_size); 1753 memset(ptr, 0, mod->core_size);
1754 mod->module_core = ptr; 1754 mod->module_core = ptr;
1755 1755
1756 ptr = module_alloc(mod->init_size); 1756 ptr = module_alloc(mod->init_size);
1757 if (!ptr && mod->init_size) { 1757 if (!ptr && mod->init_size) {
1758 err = -ENOMEM; 1758 err = -ENOMEM;
1759 goto free_core; 1759 goto free_core;
1760 } 1760 }
1761 memset(ptr, 0, mod->init_size); 1761 memset(ptr, 0, mod->init_size);
1762 mod->module_init = ptr; 1762 mod->module_init = ptr;
1763 1763
1764 /* Transfer each section which specifies SHF_ALLOC */ 1764 /* Transfer each section which specifies SHF_ALLOC */
1765 DEBUGP("final section addresses:\n"); 1765 DEBUGP("final section addresses:\n");
1766 for (i = 0; i < hdr->e_shnum; i++) { 1766 for (i = 0; i < hdr->e_shnum; i++) {
1767 void *dest; 1767 void *dest;
1768 1768
1769 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 1769 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
1770 continue; 1770 continue;
1771 1771
1772 if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) 1772 if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK)
1773 dest = mod->module_init 1773 dest = mod->module_init
1774 + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); 1774 + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK);
1775 else 1775 else
1776 dest = mod->module_core + sechdrs[i].sh_entsize; 1776 dest = mod->module_core + sechdrs[i].sh_entsize;
1777 1777
1778 if (sechdrs[i].sh_type != SHT_NOBITS) 1778 if (sechdrs[i].sh_type != SHT_NOBITS)
1779 memcpy(dest, (void *)sechdrs[i].sh_addr, 1779 memcpy(dest, (void *)sechdrs[i].sh_addr,
1780 sechdrs[i].sh_size); 1780 sechdrs[i].sh_size);
1781 /* Update sh_addr to point to copy in image. */ 1781 /* Update sh_addr to point to copy in image. */
1782 sechdrs[i].sh_addr = (unsigned long)dest; 1782 sechdrs[i].sh_addr = (unsigned long)dest;
1783 DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); 1783 DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name);
1784 } 1784 }
1785 /* Module has been moved. */ 1785 /* Module has been moved. */
1786 mod = (void *)sechdrs[modindex].sh_addr; 1786 mod = (void *)sechdrs[modindex].sh_addr;
1787 1787
1788 /* Now we've moved module, initialize linked lists, etc. */ 1788 /* Now we've moved module, initialize linked lists, etc. */
1789 module_unload_init(mod); 1789 module_unload_init(mod);
1790 1790
1791 /* Initialize kobject, so we can reference it. */ 1791 /* Initialize kobject, so we can reference it. */
1792 if (mod_sysfs_init(mod) != 0) 1792 if (mod_sysfs_init(mod) != 0)
1793 goto cleanup; 1793 goto cleanup;
1794 1794
1795 /* Set up license info based on the info section */ 1795 /* Set up license info based on the info section */
1796 set_license(mod, get_modinfo(sechdrs, infoindex, "license")); 1796 set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
1797 1797
1798 if (strcmp(mod->name, "ndiswrapper") == 0) 1798 if (strcmp(mod->name, "ndiswrapper") == 0)
1799 add_taint(TAINT_PROPRIETARY_MODULE); 1799 add_taint(TAINT_PROPRIETARY_MODULE);
1800 if (strcmp(mod->name, "driverloader") == 0) 1800 if (strcmp(mod->name, "driverloader") == 0)
1801 add_taint_module(mod, TAINT_PROPRIETARY_MODULE); 1801 add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
1802 1802
1803 /* Set up MODINFO_ATTR fields */ 1803 /* Set up MODINFO_ATTR fields */
1804 setup_modinfo(mod, sechdrs, infoindex); 1804 setup_modinfo(mod, sechdrs, infoindex);
1805 1805
1806 /* Fix up syms, so that st_value is a pointer to location. */ 1806 /* Fix up syms, so that st_value is a pointer to location. */
1807 err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, 1807 err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
1808 mod); 1808 mod);
1809 if (err < 0) 1809 if (err < 0)
1810 goto cleanup; 1810 goto cleanup;
1811 1811
1812 /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ 1812 /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */
1813 mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); 1813 mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms);
1814 mod->syms = (void *)sechdrs[exportindex].sh_addr; 1814 mod->syms = (void *)sechdrs[exportindex].sh_addr;
1815 if (crcindex) 1815 if (crcindex)
1816 mod->crcs = (void *)sechdrs[crcindex].sh_addr; 1816 mod->crcs = (void *)sechdrs[crcindex].sh_addr;
1817 mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); 1817 mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms);
1818 mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; 1818 mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr;
1819 if (gplcrcindex) 1819 if (gplcrcindex)
1820 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; 1820 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
1821 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / 1821 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
1822 sizeof(*mod->gpl_future_syms); 1822 sizeof(*mod->gpl_future_syms);
1823 mod->num_unused_syms = sechdrs[unusedindex].sh_size / 1823 mod->num_unused_syms = sechdrs[unusedindex].sh_size /
1824 sizeof(*mod->unused_syms); 1824 sizeof(*mod->unused_syms);
1825 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / 1825 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
1826 sizeof(*mod->unused_gpl_syms); 1826 sizeof(*mod->unused_gpl_syms);
1827 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; 1827 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
1828 if (gplfuturecrcindex) 1828 if (gplfuturecrcindex)
1829 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; 1829 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
1830 1830
1831 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; 1831 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
1832 if (unusedcrcindex) 1832 if (unusedcrcindex)
1833 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; 1833 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
1834 mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; 1834 mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr;
1835 if (unusedgplcrcindex) 1835 if (unusedgplcrcindex)
1836 mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr; 1836 mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr;
1837 1837
1838 #ifdef CONFIG_MODVERSIONS 1838 #ifdef CONFIG_MODVERSIONS
1839 if ((mod->num_syms && !crcindex) || 1839 if ((mod->num_syms && !crcindex) ||
1840 (mod->num_gpl_syms && !gplcrcindex) || 1840 (mod->num_gpl_syms && !gplcrcindex) ||
1841 (mod->num_gpl_future_syms && !gplfuturecrcindex) || 1841 (mod->num_gpl_future_syms && !gplfuturecrcindex) ||
1842 (mod->num_unused_syms && !unusedcrcindex) || 1842 (mod->num_unused_syms && !unusedcrcindex) ||
1843 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { 1843 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
1844 printk(KERN_WARNING "%s: No versions for exported symbols." 1844 printk(KERN_WARNING "%s: No versions for exported symbols."
1845 " Tainting kernel.\n", mod->name); 1845 " Tainting kernel.\n", mod->name);
1846 add_taint_module(mod, TAINT_FORCED_MODULE); 1846 add_taint_module(mod, TAINT_FORCED_MODULE);
1847 } 1847 }
1848 #endif 1848 #endif
1849 1849
1850 /* Now do relocations. */ 1850 /* Now do relocations. */
1851 for (i = 1; i < hdr->e_shnum; i++) { 1851 for (i = 1; i < hdr->e_shnum; i++) {
1852 const char *strtab = (char *)sechdrs[strindex].sh_addr; 1852 const char *strtab = (char *)sechdrs[strindex].sh_addr;
1853 unsigned int info = sechdrs[i].sh_info; 1853 unsigned int info = sechdrs[i].sh_info;
1854 1854
1855 /* Not a valid relocation section? */ 1855 /* Not a valid relocation section? */
1856 if (info >= hdr->e_shnum) 1856 if (info >= hdr->e_shnum)
1857 continue; 1857 continue;
1858 1858
1859 /* Don't bother with non-allocated sections */ 1859 /* Don't bother with non-allocated sections */
1860 if (!(sechdrs[info].sh_flags & SHF_ALLOC)) 1860 if (!(sechdrs[info].sh_flags & SHF_ALLOC))
1861 continue; 1861 continue;
1862 1862
1863 if (sechdrs[i].sh_type == SHT_REL) 1863 if (sechdrs[i].sh_type == SHT_REL)
1864 err = apply_relocate(sechdrs, strtab, symindex, i,mod); 1864 err = apply_relocate(sechdrs, strtab, symindex, i,mod);
1865 else if (sechdrs[i].sh_type == SHT_RELA) 1865 else if (sechdrs[i].sh_type == SHT_RELA)
1866 err = apply_relocate_add(sechdrs, strtab, symindex, i, 1866 err = apply_relocate_add(sechdrs, strtab, symindex, i,
1867 mod); 1867 mod);
1868 if (err < 0) 1868 if (err < 0)
1869 goto cleanup; 1869 goto cleanup;
1870 } 1870 }
1871 1871
1872 /* Find duplicate symbols */ 1872 /* Find duplicate symbols */
1873 err = verify_export_symbols(mod); 1873 err = verify_export_symbols(mod);
1874 1874
1875 if (err < 0) 1875 if (err < 0)
1876 goto cleanup; 1876 goto cleanup;
1877 1877
1878 /* Set up and sort exception table */ 1878 /* Set up and sort exception table */
1879 mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); 1879 mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
1880 mod->extable = extable = (void *)sechdrs[exindex].sh_addr; 1880 mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
1881 sort_extable(extable, extable + mod->num_exentries); 1881 sort_extable(extable, extable + mod->num_exentries);
1882 1882
1883 /* Finally, copy percpu area over. */ 1883 /* Finally, copy percpu area over. */
1884 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, 1884 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
1885 sechdrs[pcpuindex].sh_size); 1885 sechdrs[pcpuindex].sh_size);
1886 1886
1887 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); 1887 add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
1888 1888
1889 err = module_finalize(hdr, sechdrs, mod); 1889 err = module_finalize(hdr, sechdrs, mod);
1890 if (err < 0) 1890 if (err < 0)
1891 goto cleanup; 1891 goto cleanup;
1892 1892
1893 /* flush the icache in correct context */ 1893 /* flush the icache in correct context */
1894 old_fs = get_fs(); 1894 old_fs = get_fs();
1895 set_fs(KERNEL_DS); 1895 set_fs(KERNEL_DS);
1896 1896
1897 /* 1897 /*
1898 * Flush the instruction cache, since we've played with text. 1898 * Flush the instruction cache, since we've played with text.
1899 * Do it before processing of module parameters, so the module 1899 * Do it before processing of module parameters, so the module
1900 * can provide parameter accessor functions of its own. 1900 * can provide parameter accessor functions of its own.
1901 */ 1901 */
1902 if (mod->module_init) 1902 if (mod->module_init)
1903 flush_icache_range((unsigned long)mod->module_init, 1903 flush_icache_range((unsigned long)mod->module_init,
1904 (unsigned long)mod->module_init 1904 (unsigned long)mod->module_init
1905 + mod->init_size); 1905 + mod->init_size);
1906 flush_icache_range((unsigned long)mod->module_core, 1906 flush_icache_range((unsigned long)mod->module_core,
1907 (unsigned long)mod->module_core + mod->core_size); 1907 (unsigned long)mod->module_core + mod->core_size);
1908 1908
1909 set_fs(old_fs); 1909 set_fs(old_fs);
1910 1910
1911 mod->args = args; 1911 mod->args = args;
1912 if (obsparmindex) 1912 if (obsparmindex)
1913 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", 1913 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
1914 mod->name); 1914 mod->name);
1915 1915
1916 /* Size of section 0 is 0, so this works well if no params */ 1916 /* Size of section 0 is 0, so this works well if no params */
1917 err = parse_args(mod->name, mod->args, 1917 err = parse_args(mod->name, mod->args,
1918 (struct kernel_param *) 1918 (struct kernel_param *)
1919 sechdrs[setupindex].sh_addr, 1919 sechdrs[setupindex].sh_addr,
1920 sechdrs[setupindex].sh_size 1920 sechdrs[setupindex].sh_size
1921 / sizeof(struct kernel_param), 1921 / sizeof(struct kernel_param),
1922 NULL); 1922 NULL);
1923 if (err < 0) 1923 if (err < 0)
1924 goto arch_cleanup; 1924 goto arch_cleanup;
1925 1925
1926 err = mod_sysfs_setup(mod, 1926 err = mod_sysfs_setup(mod,
1927 (struct kernel_param *) 1927 (struct kernel_param *)
1928 sechdrs[setupindex].sh_addr, 1928 sechdrs[setupindex].sh_addr,
1929 sechdrs[setupindex].sh_size 1929 sechdrs[setupindex].sh_size
1930 / sizeof(struct kernel_param)); 1930 / sizeof(struct kernel_param));
1931 if (err < 0) 1931 if (err < 0)
1932 goto arch_cleanup; 1932 goto arch_cleanup;
1933 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 1933 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
1934 1934
1935 /* Size of section 0 is 0, so this works well if no unwind info. */ 1935 /* Size of section 0 is 0, so this works well if no unwind info. */
1936 mod->unwind_info = unwind_add_table(mod, 1936 mod->unwind_info = unwind_add_table(mod,
1937 (void *)sechdrs[unwindex].sh_addr, 1937 (void *)sechdrs[unwindex].sh_addr,
1938 sechdrs[unwindex].sh_size); 1938 sechdrs[unwindex].sh_size);
1939 1939
1940 /* Get rid of temporary copy */ 1940 /* Get rid of temporary copy */
1941 vfree(hdr); 1941 vfree(hdr);
1942 1942
1943 /* Done! */ 1943 /* Done! */
1944 return mod; 1944 return mod;
1945 1945
1946 arch_cleanup: 1946 arch_cleanup:
1947 module_arch_cleanup(mod); 1947 module_arch_cleanup(mod);
1948 cleanup: 1948 cleanup:
1949 module_unload_free(mod); 1949 module_unload_free(mod);
1950 module_free(mod, mod->module_init); 1950 module_free(mod, mod->module_init);
1951 free_core: 1951 free_core:
1952 module_free(mod, mod->module_core); 1952 module_free(mod, mod->module_core);
1953 free_percpu: 1953 free_percpu:
1954 if (percpu) 1954 if (percpu)
1955 percpu_modfree(percpu); 1955 percpu_modfree(percpu);
1956 free_mod: 1956 free_mod:
1957 kfree(args); 1957 kfree(args);
1958 free_hdr: 1958 free_hdr:
1959 vfree(hdr); 1959 vfree(hdr);
1960 return ERR_PTR(err); 1960 return ERR_PTR(err);
1961 1961
1962 truncated: 1962 truncated:
1963 printk(KERN_ERR "Module len %lu truncated\n", len); 1963 printk(KERN_ERR "Module len %lu truncated\n", len);
1964 err = -ENOEXEC; 1964 err = -ENOEXEC;
1965 goto free_hdr; 1965 goto free_hdr;
1966 } 1966 }
1967 1967
1968 /* 1968 /*
1969 * link the module with the whole machine is stopped with interrupts off 1969 * link the module with the whole machine is stopped with interrupts off
1970 * - this defends against kallsyms not taking locks 1970 * - this defends against kallsyms not taking locks
1971 */ 1971 */
1972 static int __link_module(void *_mod) 1972 static int __link_module(void *_mod)
1973 { 1973 {
1974 struct module *mod = _mod; 1974 struct module *mod = _mod;
1975 list_add(&mod->list, &modules); 1975 list_add(&mod->list, &modules);
1976 return 0; 1976 return 0;
1977 } 1977 }
1978 1978
1979 /* This is where the real work happens */ 1979 /* This is where the real work happens */
1980 asmlinkage long 1980 asmlinkage long
1981 sys_init_module(void __user *umod, 1981 sys_init_module(void __user *umod,
1982 unsigned long len, 1982 unsigned long len,
1983 const char __user *uargs) 1983 const char __user *uargs)
1984 { 1984 {
1985 struct module *mod; 1985 struct module *mod;
1986 int ret = 0; 1986 int ret = 0;
1987 1987
1988 /* Must have permission */ 1988 /* Must have permission */
1989 if (!capable(CAP_SYS_MODULE)) 1989 if (!capable(CAP_SYS_MODULE))
1990 return -EPERM; 1990 return -EPERM;
1991 1991
1992 /* Only one module load at a time, please */ 1992 /* Only one module load at a time, please */
1993 if (mutex_lock_interruptible(&module_mutex) != 0) 1993 if (mutex_lock_interruptible(&module_mutex) != 0)
1994 return -EINTR; 1994 return -EINTR;
1995 1995
1996 /* Do all the hard work */ 1996 /* Do all the hard work */
1997 mod = load_module(umod, len, uargs); 1997 mod = load_module(umod, len, uargs);
1998 if (IS_ERR(mod)) { 1998 if (IS_ERR(mod)) {
1999 mutex_unlock(&module_mutex); 1999 mutex_unlock(&module_mutex);
2000 return PTR_ERR(mod); 2000 return PTR_ERR(mod);
2001 } 2001 }
2002 2002
2003 /* Now sew it into the lists. They won't access us, since 2003 /* Now sew it into the lists. They won't access us, since
2004 strong_try_module_get() will fail. */ 2004 strong_try_module_get() will fail. */
2005 stop_machine_run(__link_module, mod, NR_CPUS); 2005 stop_machine_run(__link_module, mod, NR_CPUS);
2006 2006
2007 /* Drop lock so they can recurse */ 2007 /* Drop lock so they can recurse */
2008 mutex_unlock(&module_mutex); 2008 mutex_unlock(&module_mutex);
2009 2009
2010 blocking_notifier_call_chain(&module_notify_list, 2010 blocking_notifier_call_chain(&module_notify_list,
2011 MODULE_STATE_COMING, mod); 2011 MODULE_STATE_COMING, mod);
2012 2012
2013 /* Start the module */ 2013 /* Start the module */
2014 if (mod->init != NULL) 2014 if (mod->init != NULL)
2015 ret = mod->init(); 2015 ret = mod->init();
2016 if (ret < 0) { 2016 if (ret < 0) {
2017 /* Init routine failed: abort. Try to protect us from 2017 /* Init routine failed: abort. Try to protect us from
2018 buggy refcounters. */ 2018 buggy refcounters. */
2019 mod->state = MODULE_STATE_GOING; 2019 mod->state = MODULE_STATE_GOING;
2020 synchronize_sched(); 2020 synchronize_sched();
2021 if (mod->unsafe) 2021 if (mod->unsafe)
2022 printk(KERN_ERR "%s: module is now stuck!\n", 2022 printk(KERN_ERR "%s: module is now stuck!\n",
2023 mod->name); 2023 mod->name);
2024 else { 2024 else {
2025 module_put(mod); 2025 module_put(mod);
2026 mutex_lock(&module_mutex); 2026 mutex_lock(&module_mutex);
2027 free_module(mod); 2027 free_module(mod);
2028 mutex_unlock(&module_mutex); 2028 mutex_unlock(&module_mutex);
2029 } 2029 }
2030 return ret; 2030 return ret;
2031 } 2031 }
2032 2032
2033 /* Now it's a first class citizen! */ 2033 /* Now it's a first class citizen! */
2034 mutex_lock(&module_mutex); 2034 mutex_lock(&module_mutex);
2035 mod->state = MODULE_STATE_LIVE; 2035 mod->state = MODULE_STATE_LIVE;
2036 /* Drop initial reference. */ 2036 /* Drop initial reference. */
2037 module_put(mod); 2037 module_put(mod);
2038 unwind_remove_table(mod->unwind_info, 1); 2038 unwind_remove_table(mod->unwind_info, 1);
2039 module_free(mod, mod->module_init); 2039 module_free(mod, mod->module_init);
2040 mod->module_init = NULL; 2040 mod->module_init = NULL;
2041 mod->init_size = 0; 2041 mod->init_size = 0;
2042 mod->init_text_size = 0; 2042 mod->init_text_size = 0;
2043 mutex_unlock(&module_mutex); 2043 mutex_unlock(&module_mutex);
2044 2044
2045 return 0; 2045 return 0;
2046 } 2046 }
2047 2047
2048 static inline int within(unsigned long addr, void *start, unsigned long size) 2048 static inline int within(unsigned long addr, void *start, unsigned long size)
2049 { 2049 {
2050 return ((void *)addr >= start && (void *)addr < start + size); 2050 return ((void *)addr >= start && (void *)addr < start + size);
2051 } 2051 }
2052 2052
2053 #ifdef CONFIG_KALLSYMS 2053 #ifdef CONFIG_KALLSYMS
2054 /* 2054 /*
2055 * This ignores the intensely annoying "mapping symbols" found 2055 * This ignores the intensely annoying "mapping symbols" found
2056 * in ARM ELF files: $a, $t and $d. 2056 * in ARM ELF files: $a, $t and $d.
2057 */ 2057 */
2058 static inline int is_arm_mapping_symbol(const char *str) 2058 static inline int is_arm_mapping_symbol(const char *str)
2059 { 2059 {
2060 return str[0] == '$' && strchr("atd", str[1]) 2060 return str[0] == '$' && strchr("atd", str[1])
2061 && (str[2] == '\0' || str[2] == '.'); 2061 && (str[2] == '\0' || str[2] == '.');
2062 } 2062 }
2063 2063
2064 static const char *get_ksymbol(struct module *mod, 2064 static const char *get_ksymbol(struct module *mod,
2065 unsigned long addr, 2065 unsigned long addr,
2066 unsigned long *size, 2066 unsigned long *size,
2067 unsigned long *offset) 2067 unsigned long *offset)
2068 { 2068 {
2069 unsigned int i, best = 0; 2069 unsigned int i, best = 0;
2070 unsigned long nextval; 2070 unsigned long nextval;
2071 2071
2072 /* At worse, next value is at end of module */ 2072 /* At worse, next value is at end of module */
2073 if (within(addr, mod->module_init, mod->init_size)) 2073 if (within(addr, mod->module_init, mod->init_size))
2074 nextval = (unsigned long)mod->module_init+mod->init_text_size; 2074 nextval = (unsigned long)mod->module_init+mod->init_text_size;
2075 else 2075 else
2076 nextval = (unsigned long)mod->module_core+mod->core_text_size; 2076 nextval = (unsigned long)mod->module_core+mod->core_text_size;
2077 2077
2078 /* Scan for closest preceeding symbol, and next symbol. (ELF 2078 /* Scan for closest preceeding symbol, and next symbol. (ELF
2079 starts real symbols at 1). */ 2079 starts real symbols at 1). */
2080 for (i = 1; i < mod->num_symtab; i++) { 2080 for (i = 1; i < mod->num_symtab; i++) {
2081 if (mod->symtab[i].st_shndx == SHN_UNDEF) 2081 if (mod->symtab[i].st_shndx == SHN_UNDEF)
2082 continue; 2082 continue;
2083 2083
2084 /* We ignore unnamed symbols: they're uninformative 2084 /* We ignore unnamed symbols: they're uninformative
2085 * and inserted at a whim. */ 2085 * and inserted at a whim. */
2086 if (mod->symtab[i].st_value <= addr 2086 if (mod->symtab[i].st_value <= addr
2087 && mod->symtab[i].st_value > mod->symtab[best].st_value 2087 && mod->symtab[i].st_value > mod->symtab[best].st_value
2088 && *(mod->strtab + mod->symtab[i].st_name) != '\0' 2088 && *(mod->strtab + mod->symtab[i].st_name) != '\0'
2089 && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) 2089 && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
2090 best = i; 2090 best = i;
2091 if (mod->symtab[i].st_value > addr 2091 if (mod->symtab[i].st_value > addr
2092 && mod->symtab[i].st_value < nextval 2092 && mod->symtab[i].st_value < nextval
2093 && *(mod->strtab + mod->symtab[i].st_name) != '\0' 2093 && *(mod->strtab + mod->symtab[i].st_name) != '\0'
2094 && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) 2094 && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
2095 nextval = mod->symtab[i].st_value; 2095 nextval = mod->symtab[i].st_value;
2096 } 2096 }
2097 2097
2098 if (!best) 2098 if (!best)
2099 return NULL; 2099 return NULL;
2100 2100
2101 if (size) 2101 if (size)
2102 *size = nextval - mod->symtab[best].st_value; 2102 *size = nextval - mod->symtab[best].st_value;
2103 if (offset) 2103 if (offset)
2104 *offset = addr - mod->symtab[best].st_value; 2104 *offset = addr - mod->symtab[best].st_value;
2105 return mod->strtab + mod->symtab[best].st_name; 2105 return mod->strtab + mod->symtab[best].st_name;
2106 } 2106 }
2107 2107
2108 /* For kallsyms to ask for address resolution. NULL means not found. 2108 /* For kallsyms to ask for address resolution. NULL means not found.
2109 We don't lock, as this is used for oops resolution and races are a 2109 We don't lock, as this is used for oops resolution and races are a
2110 lesser concern. */ 2110 lesser concern. */
2111 const char *module_address_lookup(unsigned long addr, 2111 const char *module_address_lookup(unsigned long addr,
2112 unsigned long *size, 2112 unsigned long *size,
2113 unsigned long *offset, 2113 unsigned long *offset,
2114 char **modname) 2114 char **modname)
2115 { 2115 {
2116 struct module *mod; 2116 struct module *mod;
2117 2117
2118 list_for_each_entry(mod, &modules, list) { 2118 list_for_each_entry(mod, &modules, list) {
2119 if (within(addr, mod->module_init, mod->init_size) 2119 if (within(addr, mod->module_init, mod->init_size)
2120 || within(addr, mod->module_core, mod->core_size)) { 2120 || within(addr, mod->module_core, mod->core_size)) {
2121 if (modname) 2121 if (modname)
2122 *modname = mod->name; 2122 *modname = mod->name;
2123 return get_ksymbol(mod, addr, size, offset); 2123 return get_ksymbol(mod, addr, size, offset);
2124 } 2124 }
2125 } 2125 }
2126 return NULL; 2126 return NULL;
2127 } 2127 }
2128 2128
2129 int lookup_module_symbol_name(unsigned long addr, char *symname) 2129 int lookup_module_symbol_name(unsigned long addr, char *symname)
2130 { 2130 {
2131 struct module *mod; 2131 struct module *mod;
2132 2132
2133 mutex_lock(&module_mutex); 2133 mutex_lock(&module_mutex);
2134 list_for_each_entry(mod, &modules, list) { 2134 list_for_each_entry(mod, &modules, list) {
2135 if (within(addr, mod->module_init, mod->init_size) || 2135 if (within(addr, mod->module_init, mod->init_size) ||
2136 within(addr, mod->module_core, mod->core_size)) { 2136 within(addr, mod->module_core, mod->core_size)) {
2137 const char *sym; 2137 const char *sym;
2138 2138
2139 sym = get_ksymbol(mod, addr, NULL, NULL); 2139 sym = get_ksymbol(mod, addr, NULL, NULL);
2140 if (!sym) 2140 if (!sym)
2141 goto out; 2141 goto out;
2142 strlcpy(symname, sym, KSYM_NAME_LEN + 1); 2142 strlcpy(symname, sym, KSYM_NAME_LEN + 1);
2143 mutex_unlock(&module_mutex); 2143 mutex_unlock(&module_mutex);
2144 return 0; 2144 return 0;
2145 } 2145 }
2146 } 2146 }
2147 out: 2147 out:
2148 mutex_unlock(&module_mutex); 2148 mutex_unlock(&module_mutex);
2149 return -ERANGE; 2149 return -ERANGE;
2150 } 2150 }
2151 2151
2152 int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
2153 unsigned long *offset, char *modname, char *name)
2154 {
2155 struct module *mod;
2156
2157 mutex_lock(&module_mutex);
2158 list_for_each_entry(mod, &modules, list) {
2159 if (within(addr, mod->module_init, mod->init_size) ||
2160 within(addr, mod->module_core, mod->core_size)) {
2161 const char *sym;
2162
2163 sym = get_ksymbol(mod, addr, size, offset);
2164 if (!sym)
2165 goto out;
2166 if (modname)
2167 strlcpy(modname, mod->name, MODULE_NAME_LEN + 1);
2168 if (name)
2169 strlcpy(name, sym, KSYM_NAME_LEN + 1);
2170 mutex_unlock(&module_mutex);
2171 return 0;
2172 }
2173 }
2174 out:
2175 mutex_unlock(&module_mutex);
2176 return -ERANGE;
2177 }
2178
2152 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, 2179 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2153 char *name, char *module_name, int *exported) 2180 char *name, char *module_name, int *exported)
2154 { 2181 {
2155 struct module *mod; 2182 struct module *mod;
2156 2183
2157 mutex_lock(&module_mutex); 2184 mutex_lock(&module_mutex);
2158 list_for_each_entry(mod, &modules, list) { 2185 list_for_each_entry(mod, &modules, list) {
2159 if (symnum < mod->num_symtab) { 2186 if (symnum < mod->num_symtab) {
2160 *value = mod->symtab[symnum].st_value; 2187 *value = mod->symtab[symnum].st_value;
2161 *type = mod->symtab[symnum].st_info; 2188 *type = mod->symtab[symnum].st_info;
2162 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, 2189 strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
2163 KSYM_NAME_LEN + 1); 2190 KSYM_NAME_LEN + 1);
2164 strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); 2191 strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1);
2165 *exported = is_exported(name, mod); 2192 *exported = is_exported(name, mod);
2166 mutex_unlock(&module_mutex); 2193 mutex_unlock(&module_mutex);
2167 return 0; 2194 return 0;
2168 } 2195 }
2169 symnum -= mod->num_symtab; 2196 symnum -= mod->num_symtab;
2170 } 2197 }
2171 mutex_unlock(&module_mutex); 2198 mutex_unlock(&module_mutex);
2172 return -ERANGE; 2199 return -ERANGE;
2173 } 2200 }
2174 2201
2175 static unsigned long mod_find_symname(struct module *mod, const char *name) 2202 static unsigned long mod_find_symname(struct module *mod, const char *name)
2176 { 2203 {
2177 unsigned int i; 2204 unsigned int i;
2178 2205
2179 for (i = 0; i < mod->num_symtab; i++) 2206 for (i = 0; i < mod->num_symtab; i++)
2180 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 && 2207 if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
2181 mod->symtab[i].st_info != 'U') 2208 mod->symtab[i].st_info != 'U')
2182 return mod->symtab[i].st_value; 2209 return mod->symtab[i].st_value;
2183 return 0; 2210 return 0;
2184 } 2211 }
2185 2212
2186 /* Look for this name: can be of form module:name. */ 2213 /* Look for this name: can be of form module:name. */
2187 unsigned long module_kallsyms_lookup_name(const char *name) 2214 unsigned long module_kallsyms_lookup_name(const char *name)
2188 { 2215 {
2189 struct module *mod; 2216 struct module *mod;
2190 char *colon; 2217 char *colon;
2191 unsigned long ret = 0; 2218 unsigned long ret = 0;
2192 2219
2193 /* Don't lock: we're in enough trouble already. */ 2220 /* Don't lock: we're in enough trouble already. */
2194 if ((colon = strchr(name, ':')) != NULL) { 2221 if ((colon = strchr(name, ':')) != NULL) {
2195 *colon = '\0'; 2222 *colon = '\0';
2196 if ((mod = find_module(name)) != NULL) 2223 if ((mod = find_module(name)) != NULL)
2197 ret = mod_find_symname(mod, colon+1); 2224 ret = mod_find_symname(mod, colon+1);
2198 *colon = ':'; 2225 *colon = ':';
2199 } else { 2226 } else {
2200 list_for_each_entry(mod, &modules, list) 2227 list_for_each_entry(mod, &modules, list)
2201 if ((ret = mod_find_symname(mod, name)) != 0) 2228 if ((ret = mod_find_symname(mod, name)) != 0)
2202 break; 2229 break;
2203 } 2230 }
2204 return ret; 2231 return ret;
2205 } 2232 }
2206 #endif /* CONFIG_KALLSYMS */ 2233 #endif /* CONFIG_KALLSYMS */
2207 2234
2208 /* Called by the /proc file system to return a list of modules. */ 2235 /* Called by the /proc file system to return a list of modules. */
2209 static void *m_start(struct seq_file *m, loff_t *pos) 2236 static void *m_start(struct seq_file *m, loff_t *pos)
2210 { 2237 {
2211 struct list_head *i; 2238 struct list_head *i;
2212 loff_t n = 0; 2239 loff_t n = 0;
2213 2240
2214 mutex_lock(&module_mutex); 2241 mutex_lock(&module_mutex);
2215 list_for_each(i, &modules) { 2242 list_for_each(i, &modules) {
2216 if (n++ == *pos) 2243 if (n++ == *pos)
2217 break; 2244 break;
2218 } 2245 }
2219 if (i == &modules) 2246 if (i == &modules)
2220 return NULL; 2247 return NULL;
2221 return i; 2248 return i;
2222 } 2249 }
2223 2250
2224 static void *m_next(struct seq_file *m, void *p, loff_t *pos) 2251 static void *m_next(struct seq_file *m, void *p, loff_t *pos)
2225 { 2252 {
2226 struct list_head *i = p; 2253 struct list_head *i = p;
2227 (*pos)++; 2254 (*pos)++;
2228 if (i->next == &modules) 2255 if (i->next == &modules)
2229 return NULL; 2256 return NULL;
2230 return i->next; 2257 return i->next;
2231 } 2258 }
2232 2259
2233 static void m_stop(struct seq_file *m, void *p) 2260 static void m_stop(struct seq_file *m, void *p)
2234 { 2261 {
2235 mutex_unlock(&module_mutex); 2262 mutex_unlock(&module_mutex);
2236 } 2263 }
2237 2264
2238 static char *taint_flags(unsigned int taints, char *buf) 2265 static char *taint_flags(unsigned int taints, char *buf)
2239 { 2266 {
2240 int bx = 0; 2267 int bx = 0;
2241 2268
2242 if (taints) { 2269 if (taints) {
2243 buf[bx++] = '('; 2270 buf[bx++] = '(';
2244 if (taints & TAINT_PROPRIETARY_MODULE) 2271 if (taints & TAINT_PROPRIETARY_MODULE)
2245 buf[bx++] = 'P'; 2272 buf[bx++] = 'P';
2246 if (taints & TAINT_FORCED_MODULE) 2273 if (taints & TAINT_FORCED_MODULE)
2247 buf[bx++] = 'F'; 2274 buf[bx++] = 'F';
2248 /* 2275 /*
2249 * TAINT_FORCED_RMMOD: could be added. 2276 * TAINT_FORCED_RMMOD: could be added.
2250 * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't 2277 * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
2251 * apply to modules. 2278 * apply to modules.
2252 */ 2279 */
2253 buf[bx++] = ')'; 2280 buf[bx++] = ')';
2254 } 2281 }
2255 buf[bx] = '\0'; 2282 buf[bx] = '\0';
2256 2283
2257 return buf; 2284 return buf;
2258 } 2285 }
2259 2286
2260 static int m_show(struct seq_file *m, void *p) 2287 static int m_show(struct seq_file *m, void *p)
2261 { 2288 {
2262 struct module *mod = list_entry(p, struct module, list); 2289 struct module *mod = list_entry(p, struct module, list);
2263 char buf[8]; 2290 char buf[8];
2264 2291
2265 seq_printf(m, "%s %lu", 2292 seq_printf(m, "%s %lu",
2266 mod->name, mod->init_size + mod->core_size); 2293 mod->name, mod->init_size + mod->core_size);
2267 print_unload_info(m, mod); 2294 print_unload_info(m, mod);
2268 2295
2269 /* Informative for users. */ 2296 /* Informative for users. */
2270 seq_printf(m, " %s", 2297 seq_printf(m, " %s",
2271 mod->state == MODULE_STATE_GOING ? "Unloading": 2298 mod->state == MODULE_STATE_GOING ? "Unloading":
2272 mod->state == MODULE_STATE_COMING ? "Loading": 2299 mod->state == MODULE_STATE_COMING ? "Loading":
2273 "Live"); 2300 "Live");
2274 /* Used by oprofile and other similar tools. */ 2301 /* Used by oprofile and other similar tools. */
2275 seq_printf(m, " 0x%p", mod->module_core); 2302 seq_printf(m, " 0x%p", mod->module_core);
2276 2303
2277 /* Taints info */ 2304 /* Taints info */
2278 if (mod->taints) 2305 if (mod->taints)
2279 seq_printf(m, " %s", taint_flags(mod->taints, buf)); 2306 seq_printf(m, " %s", taint_flags(mod->taints, buf));
2280 2307
2281 seq_printf(m, "\n"); 2308 seq_printf(m, "\n");
2282 return 0; 2309 return 0;
2283 } 2310 }
2284 2311
2285 /* Format: modulename size refcount deps address 2312 /* Format: modulename size refcount deps address
2286 2313
2287 Where refcount is a number or -, and deps is a comma-separated list 2314 Where refcount is a number or -, and deps is a comma-separated list
2288 of depends or -. 2315 of depends or -.
2289 */ 2316 */
2290 const struct seq_operations modules_op = { 2317 const struct seq_operations modules_op = {
2291 .start = m_start, 2318 .start = m_start,
2292 .next = m_next, 2319 .next = m_next,
2293 .stop = m_stop, 2320 .stop = m_stop,
2294 .show = m_show 2321 .show = m_show
2295 }; 2322 };
2296 2323
2297 /* Given an address, look for it in the module exception tables. */ 2324 /* Given an address, look for it in the module exception tables. */
2298 const struct exception_table_entry *search_module_extables(unsigned long addr) 2325 const struct exception_table_entry *search_module_extables(unsigned long addr)
2299 { 2326 {
2300 unsigned long flags; 2327 unsigned long flags;
2301 const struct exception_table_entry *e = NULL; 2328 const struct exception_table_entry *e = NULL;
2302 struct module *mod; 2329 struct module *mod;
2303 2330
2304 spin_lock_irqsave(&modlist_lock, flags); 2331 spin_lock_irqsave(&modlist_lock, flags);
2305 list_for_each_entry(mod, &modules, list) { 2332 list_for_each_entry(mod, &modules, list) {
2306 if (mod->num_exentries == 0) 2333 if (mod->num_exentries == 0)
2307 continue; 2334 continue;
2308 2335
2309 e = search_extable(mod->extable, 2336 e = search_extable(mod->extable,
2310 mod->extable + mod->num_exentries - 1, 2337 mod->extable + mod->num_exentries - 1,
2311 addr); 2338 addr);
2312 if (e) 2339 if (e)
2313 break; 2340 break;
2314 } 2341 }
2315 spin_unlock_irqrestore(&modlist_lock, flags); 2342 spin_unlock_irqrestore(&modlist_lock, flags);
2316 2343
2317 /* Now, if we found one, we are running inside it now, hence 2344 /* Now, if we found one, we are running inside it now, hence
2318 we cannot unload the module, hence no refcnt needed. */ 2345 we cannot unload the module, hence no refcnt needed. */
2319 return e; 2346 return e;
2320 } 2347 }
2321 2348
2322 /* 2349 /*
2323 * Is this a valid module address? 2350 * Is this a valid module address?
2324 */ 2351 */
2325 int is_module_address(unsigned long addr) 2352 int is_module_address(unsigned long addr)
2326 { 2353 {
2327 unsigned long flags; 2354 unsigned long flags;
2328 struct module *mod; 2355 struct module *mod;
2329 2356
2330 spin_lock_irqsave(&modlist_lock, flags); 2357 spin_lock_irqsave(&modlist_lock, flags);
2331 2358
2332 list_for_each_entry(mod, &modules, list) { 2359 list_for_each_entry(mod, &modules, list) {
2333 if (within(addr, mod->module_core, mod->core_size)) { 2360 if (within(addr, mod->module_core, mod->core_size)) {
2334 spin_unlock_irqrestore(&modlist_lock, flags); 2361 spin_unlock_irqrestore(&modlist_lock, flags);
2335 return 1; 2362 return 1;
2336 } 2363 }
2337 } 2364 }
2338 2365
2339 spin_unlock_irqrestore(&modlist_lock, flags); 2366 spin_unlock_irqrestore(&modlist_lock, flags);
2340 2367
2341 return 0; 2368 return 0;
2342 } 2369 }
2343 2370
2344 2371
2345 /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ 2372 /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */
2346 struct module *__module_text_address(unsigned long addr) 2373 struct module *__module_text_address(unsigned long addr)
2347 { 2374 {
2348 struct module *mod; 2375 struct module *mod;
2349 2376
2350 list_for_each_entry(mod, &modules, list) 2377 list_for_each_entry(mod, &modules, list)
2351 if (within(addr, mod->module_init, mod->init_text_size) 2378 if (within(addr, mod->module_init, mod->init_text_size)
2352 || within(addr, mod->module_core, mod->core_text_size)) 2379 || within(addr, mod->module_core, mod->core_text_size))
2353 return mod; 2380 return mod;
2354 return NULL; 2381 return NULL;
2355 } 2382 }
2356 2383
2357 struct module *module_text_address(unsigned long addr) 2384 struct module *module_text_address(unsigned long addr)
2358 { 2385 {
2359 struct module *mod; 2386 struct module *mod;
2360 unsigned long flags; 2387 unsigned long flags;
2361 2388
2362 spin_lock_irqsave(&modlist_lock, flags); 2389 spin_lock_irqsave(&modlist_lock, flags);
2363 mod = __module_text_address(addr); 2390 mod = __module_text_address(addr);
2364 spin_unlock_irqrestore(&modlist_lock, flags); 2391 spin_unlock_irqrestore(&modlist_lock, flags);
2365 2392
2366 return mod; 2393 return mod;
2367 } 2394 }
2368 2395
2369 /* Don't grab lock, we're oopsing. */ 2396 /* Don't grab lock, we're oopsing. */
2370 void print_modules(void) 2397 void print_modules(void)
2371 { 2398 {
2372 struct module *mod; 2399 struct module *mod;
2373 char buf[8]; 2400 char buf[8];
2374 2401
2375 printk("Modules linked in:"); 2402 printk("Modules linked in:");
2376 list_for_each_entry(mod, &modules, list) 2403 list_for_each_entry(mod, &modules, list)
2377 printk(" %s%s", mod->name, taint_flags(mod->taints, buf)); 2404 printk(" %s%s", mod->name, taint_flags(mod->taints, buf));
2378 printk("\n"); 2405 printk("\n");
2379 } 2406 }
2380 2407
2381 #ifdef CONFIG_SYSFS 2408 #ifdef CONFIG_SYSFS
2382 static char *make_driver_name(struct device_driver *drv) 2409 static char *make_driver_name(struct device_driver *drv)
2383 { 2410 {
2384 char *driver_name; 2411 char *driver_name;
2385 2412
2386 driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2, 2413 driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2,
2387 GFP_KERNEL); 2414 GFP_KERNEL);
2388 if (!driver_name) 2415 if (!driver_name)
2389 return NULL; 2416 return NULL;
2390 2417
2391 sprintf(driver_name, "%s:%s", drv->bus->name, drv->name); 2418 sprintf(driver_name, "%s:%s", drv->bus->name, drv->name);
2392 return driver_name; 2419 return driver_name;
2393 } 2420 }
2394 2421
2395 static void module_create_drivers_dir(struct module_kobject *mk) 2422 static void module_create_drivers_dir(struct module_kobject *mk)
2396 { 2423 {
2397 if (!mk || mk->drivers_dir) 2424 if (!mk || mk->drivers_dir)
2398 return; 2425 return;
2399 2426
2400 mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers"); 2427 mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers");
2401 } 2428 }
2402 2429
2403 void module_add_driver(struct module *mod, struct device_driver *drv) 2430 void module_add_driver(struct module *mod, struct device_driver *drv)
2404 { 2431 {
2405 char *driver_name; 2432 char *driver_name;
2406 int no_warn; 2433 int no_warn;
2407 struct module_kobject *mk = NULL; 2434 struct module_kobject *mk = NULL;
2408 2435
2409 if (!drv) 2436 if (!drv)
2410 return; 2437 return;
2411 2438
2412 if (mod) 2439 if (mod)
2413 mk = &mod->mkobj; 2440 mk = &mod->mkobj;
2414 else if (drv->mod_name) { 2441 else if (drv->mod_name) {
2415 struct kobject *mkobj; 2442 struct kobject *mkobj;
2416 2443
2417 /* Lookup built-in module entry in /sys/modules */ 2444 /* Lookup built-in module entry in /sys/modules */
2418 mkobj = kset_find_obj(&module_subsys, drv->mod_name); 2445 mkobj = kset_find_obj(&module_subsys, drv->mod_name);
2419 if (mkobj) { 2446 if (mkobj) {
2420 mk = container_of(mkobj, struct module_kobject, kobj); 2447 mk = container_of(mkobj, struct module_kobject, kobj);
2421 /* remember our module structure */ 2448 /* remember our module structure */
2422 drv->mkobj = mk; 2449 drv->mkobj = mk;
2423 /* kset_find_obj took a reference */ 2450 /* kset_find_obj took a reference */
2424 kobject_put(mkobj); 2451 kobject_put(mkobj);
2425 } 2452 }
2426 } 2453 }
2427 2454
2428 if (!mk) 2455 if (!mk)
2429 return; 2456 return;
2430 2457
2431 /* Don't check return codes; these calls are idempotent */ 2458 /* Don't check return codes; these calls are idempotent */
2432 no_warn = sysfs_create_link(&drv->kobj, &mk->kobj, "module"); 2459 no_warn = sysfs_create_link(&drv->kobj, &mk->kobj, "module");
2433 driver_name = make_driver_name(drv); 2460 driver_name = make_driver_name(drv);
2434 if (driver_name) { 2461 if (driver_name) {
2435 module_create_drivers_dir(mk); 2462 module_create_drivers_dir(mk);
2436 no_warn = sysfs_create_link(mk->drivers_dir, &drv->kobj, 2463 no_warn = sysfs_create_link(mk->drivers_dir, &drv->kobj,
2437 driver_name); 2464 driver_name);
2438 kfree(driver_name); 2465 kfree(driver_name);
2439 } 2466 }
2440 } 2467 }
2441 EXPORT_SYMBOL(module_add_driver); 2468 EXPORT_SYMBOL(module_add_driver);
2442 2469
2443 void module_remove_driver(struct device_driver *drv) 2470 void module_remove_driver(struct device_driver *drv)
2444 { 2471 {
2445 struct module_kobject *mk = NULL; 2472 struct module_kobject *mk = NULL;
2446 char *driver_name; 2473 char *driver_name;
2447 2474
2448 if (!drv) 2475 if (!drv)
2449 return; 2476 return;
2450 2477
2451 sysfs_remove_link(&drv->kobj, "module"); 2478 sysfs_remove_link(&drv->kobj, "module");
2452 2479
2453 if (drv->owner) 2480 if (drv->owner)
2454 mk = &drv->owner->mkobj; 2481 mk = &drv->owner->mkobj;
2455 else if (drv->mkobj) 2482 else if (drv->mkobj)
2456 mk = drv->mkobj; 2483 mk = drv->mkobj;
2457 if (mk && mk->drivers_dir) { 2484 if (mk && mk->drivers_dir) {
2458 driver_name = make_driver_name(drv); 2485 driver_name = make_driver_name(drv);
2459 if (driver_name) { 2486 if (driver_name) {
2460 sysfs_remove_link(mk->drivers_dir, driver_name); 2487 sysfs_remove_link(mk->drivers_dir, driver_name);
2461 kfree(driver_name); 2488 kfree(driver_name);
2462 } 2489 }
2463 } 2490 }
2464 } 2491 }
2465 EXPORT_SYMBOL(module_remove_driver); 2492 EXPORT_SYMBOL(module_remove_driver);
2466 #endif 2493 #endif
2467 2494
2468 #ifdef CONFIG_MODVERSIONS 2495 #ifdef CONFIG_MODVERSIONS
2469 /* Generate the signature for struct module here, too, for modversions. */ 2496 /* Generate the signature for struct module here, too, for modversions. */
2470 void struct_module(struct module *mod) { return; } 2497 void struct_module(struct module *mod) { return; }
2471 EXPORT_SYMBOL(struct_module); 2498 EXPORT_SYMBOL(struct_module);
2472 #endif 2499 #endif
2473 2500
1 /* 1 /*
2 * linux/mm/slab.c 2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97. 3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk) 4 * (markhe@nextd.demon.co.uk)
5 * 5 *
6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli 6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
7 * 7 *
8 * Major cleanup, different bufctl logic, per-cpu arrays 8 * Major cleanup, different bufctl logic, per-cpu arrays
9 * (c) 2000 Manfred Spraul 9 * (c) 2000 Manfred Spraul
10 * 10 *
11 * Cleanup, make the head arrays unconditional, preparation for NUMA 11 * Cleanup, make the head arrays unconditional, preparation for NUMA
12 * (c) 2002 Manfred Spraul 12 * (c) 2002 Manfred Spraul
13 * 13 *
14 * An implementation of the Slab Allocator as described in outline in; 14 * An implementation of the Slab Allocator as described in outline in;
15 * UNIX Internals: The New Frontiers by Uresh Vahalia 15 * UNIX Internals: The New Frontiers by Uresh Vahalia
16 * Pub: Prentice Hall ISBN 0-13-101908-2 16 * Pub: Prentice Hall ISBN 0-13-101908-2
17 * or with a little more detail in; 17 * or with a little more detail in;
18 * The Slab Allocator: An Object-Caching Kernel Memory Allocator 18 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
19 * Jeff Bonwick (Sun Microsystems). 19 * Jeff Bonwick (Sun Microsystems).
20 * Presented at: USENIX Summer 1994 Technical Conference 20 * Presented at: USENIX Summer 1994 Technical Conference
21 * 21 *
22 * The memory is organized in caches, one cache for each object type. 22 * The memory is organized in caches, one cache for each object type.
23 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) 23 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
24 * Each cache consists out of many slabs (they are small (usually one 24 * Each cache consists out of many slabs (they are small (usually one
25 * page long) and always contiguous), and each slab contains multiple 25 * page long) and always contiguous), and each slab contains multiple
26 * initialized objects. 26 * initialized objects.
27 * 27 *
28 * This means, that your constructor is used only for newly allocated 28 * This means, that your constructor is used only for newly allocated
29 * slabs and you must pass objects with the same intializations to 29 * slabs and you must pass objects with the same intializations to
30 * kmem_cache_free. 30 * kmem_cache_free.
31 * 31 *
32 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, 32 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
33 * normal). If you need a special memory type, then must create a new 33 * normal). If you need a special memory type, then must create a new
34 * cache for that memory type. 34 * cache for that memory type.
35 * 35 *
36 * In order to reduce fragmentation, the slabs are sorted in 3 groups: 36 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
37 * full slabs with 0 free objects 37 * full slabs with 0 free objects
38 * partial slabs 38 * partial slabs
39 * empty slabs with no allocated objects 39 * empty slabs with no allocated objects
40 * 40 *
41 * If partial slabs exist, then new allocations come from these slabs, 41 * If partial slabs exist, then new allocations come from these slabs,
42 * otherwise from empty slabs or new slabs are allocated. 42 * otherwise from empty slabs or new slabs are allocated.
43 * 43 *
44 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache 44 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
45 * during kmem_cache_destroy(). The caller must prevent concurrent allocs. 45 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
46 * 46 *
47 * Each cache has a short per-cpu head array, most allocs 47 * Each cache has a short per-cpu head array, most allocs
48 * and frees go into that array, and if that array overflows, then 1/2 48 * and frees go into that array, and if that array overflows, then 1/2
49 * of the entries in the array are given back into the global cache. 49 * of the entries in the array are given back into the global cache.
50 * The head array is strictly LIFO and should improve the cache hit rates. 50 * The head array is strictly LIFO and should improve the cache hit rates.
51 * On SMP, it additionally reduces the spinlock operations. 51 * On SMP, it additionally reduces the spinlock operations.
52 * 52 *
53 * The c_cpuarray may not be read with enabled local interrupts - 53 * The c_cpuarray may not be read with enabled local interrupts -
54 * it's changed with a smp_call_function(). 54 * it's changed with a smp_call_function().
55 * 55 *
56 * SMP synchronization: 56 * SMP synchronization:
57 * constructors and destructors are called without any locking. 57 * constructors and destructors are called without any locking.
58 * Several members in struct kmem_cache and struct slab never change, they 58 * Several members in struct kmem_cache and struct slab never change, they
59 * are accessed without any locking. 59 * are accessed without any locking.
60 * The per-cpu arrays are never accessed from the wrong cpu, no locking, 60 * The per-cpu arrays are never accessed from the wrong cpu, no locking,
61 * and local interrupts are disabled so slab code is preempt-safe. 61 * and local interrupts are disabled so slab code is preempt-safe.
62 * The non-constant members are protected with a per-cache irq spinlock. 62 * The non-constant members are protected with a per-cache irq spinlock.
63 * 63 *
64 * Many thanks to Mark Hemment, who wrote another per-cpu slab patch 64 * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
65 * in 2000 - many ideas in the current implementation are derived from 65 * in 2000 - many ideas in the current implementation are derived from
66 * his patch. 66 * his patch.
67 * 67 *
68 * Further notes from the original documentation: 68 * Further notes from the original documentation:
69 * 69 *
70 * 11 April '97. Started multi-threading - markhe 70 * 11 April '97. Started multi-threading - markhe
71 * The global cache-chain is protected by the mutex 'cache_chain_mutex'. 71 * The global cache-chain is protected by the mutex 'cache_chain_mutex'.
72 * The sem is only needed when accessing/extending the cache-chain, which 72 * The sem is only needed when accessing/extending the cache-chain, which
73 * can never happen inside an interrupt (kmem_cache_create(), 73 * can never happen inside an interrupt (kmem_cache_create(),
74 * kmem_cache_shrink() and kmem_cache_reap()). 74 * kmem_cache_shrink() and kmem_cache_reap()).
75 * 75 *
76 * At present, each engine can be growing a cache. This should be blocked. 76 * At present, each engine can be growing a cache. This should be blocked.
77 * 77 *
78 * 15 March 2005. NUMA slab allocator. 78 * 15 March 2005. NUMA slab allocator.
79 * Shai Fultheim <shai@scalex86.org>. 79 * Shai Fultheim <shai@scalex86.org>.
80 * Shobhit Dayal <shobhit@calsoftinc.com> 80 * Shobhit Dayal <shobhit@calsoftinc.com>
81 * Alok N Kataria <alokk@calsoftinc.com> 81 * Alok N Kataria <alokk@calsoftinc.com>
82 * Christoph Lameter <christoph@lameter.com> 82 * Christoph Lameter <christoph@lameter.com>
83 * 83 *
84 * Modified the slab allocator to be node aware on NUMA systems. 84 * Modified the slab allocator to be node aware on NUMA systems.
85 * Each node has its own list of partial, free and full slabs. 85 * Each node has its own list of partial, free and full slabs.
86 * All object allocations for a node occur from node specific slab lists. 86 * All object allocations for a node occur from node specific slab lists.
87 */ 87 */
88 88
89 #include <linux/slab.h> 89 #include <linux/slab.h>
90 #include <linux/mm.h> 90 #include <linux/mm.h>
91 #include <linux/poison.h> 91 #include <linux/poison.h>
92 #include <linux/swap.h> 92 #include <linux/swap.h>
93 #include <linux/cache.h> 93 #include <linux/cache.h>
94 #include <linux/interrupt.h> 94 #include <linux/interrupt.h>
95 #include <linux/init.h> 95 #include <linux/init.h>
96 #include <linux/compiler.h> 96 #include <linux/compiler.h>
97 #include <linux/cpuset.h> 97 #include <linux/cpuset.h>
98 #include <linux/seq_file.h> 98 #include <linux/seq_file.h>
99 #include <linux/notifier.h> 99 #include <linux/notifier.h>
100 #include <linux/kallsyms.h> 100 #include <linux/kallsyms.h>
101 #include <linux/cpu.h> 101 #include <linux/cpu.h>
102 #include <linux/sysctl.h> 102 #include <linux/sysctl.h>
103 #include <linux/module.h> 103 #include <linux/module.h>
104 #include <linux/rcupdate.h> 104 #include <linux/rcupdate.h>
105 #include <linux/string.h> 105 #include <linux/string.h>
106 #include <linux/uaccess.h> 106 #include <linux/uaccess.h>
107 #include <linux/nodemask.h> 107 #include <linux/nodemask.h>
108 #include <linux/mempolicy.h> 108 #include <linux/mempolicy.h>
109 #include <linux/mutex.h> 109 #include <linux/mutex.h>
110 #include <linux/fault-inject.h> 110 #include <linux/fault-inject.h>
111 #include <linux/rtmutex.h> 111 #include <linux/rtmutex.h>
112 #include <linux/reciprocal_div.h> 112 #include <linux/reciprocal_div.h>
113 113
114 #include <asm/cacheflush.h> 114 #include <asm/cacheflush.h>
115 #include <asm/tlbflush.h> 115 #include <asm/tlbflush.h>
116 #include <asm/page.h> 116 #include <asm/page.h>
117 117
118 /* 118 /*
119 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. 119 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
120 * 0 for faster, smaller code (especially in the critical paths). 120 * 0 for faster, smaller code (especially in the critical paths).
121 * 121 *
122 * STATS - 1 to collect stats for /proc/slabinfo. 122 * STATS - 1 to collect stats for /proc/slabinfo.
123 * 0 for faster, smaller code (especially in the critical paths). 123 * 0 for faster, smaller code (especially in the critical paths).
124 * 124 *
125 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) 125 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
126 */ 126 */
127 127
128 #ifdef CONFIG_DEBUG_SLAB 128 #ifdef CONFIG_DEBUG_SLAB
129 #define DEBUG 1 129 #define DEBUG 1
130 #define STATS 1 130 #define STATS 1
131 #define FORCED_DEBUG 1 131 #define FORCED_DEBUG 1
132 #else 132 #else
133 #define DEBUG 0 133 #define DEBUG 0
134 #define STATS 0 134 #define STATS 0
135 #define FORCED_DEBUG 0 135 #define FORCED_DEBUG 0
136 #endif 136 #endif
137 137
138 /* Shouldn't this be in a header file somewhere? */ 138 /* Shouldn't this be in a header file somewhere? */
139 #define BYTES_PER_WORD sizeof(void *) 139 #define BYTES_PER_WORD sizeof(void *)
140 140
141 #ifndef cache_line_size 141 #ifndef cache_line_size
142 #define cache_line_size() L1_CACHE_BYTES 142 #define cache_line_size() L1_CACHE_BYTES
143 #endif 143 #endif
144 144
145 #ifndef ARCH_KMALLOC_MINALIGN 145 #ifndef ARCH_KMALLOC_MINALIGN
146 /* 146 /*
147 * Enforce a minimum alignment for the kmalloc caches. 147 * Enforce a minimum alignment for the kmalloc caches.
148 * Usually, the kmalloc caches are cache_line_size() aligned, except when 148 * Usually, the kmalloc caches are cache_line_size() aligned, except when
149 * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. 149 * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
150 * Some archs want to perform DMA into kmalloc caches and need a guaranteed 150 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
151 * alignment larger than the alignment of a 64-bit integer. 151 * alignment larger than the alignment of a 64-bit integer.
152 * ARCH_KMALLOC_MINALIGN allows that. 152 * ARCH_KMALLOC_MINALIGN allows that.
153 * Note that increasing this value may disable some debug features. 153 * Note that increasing this value may disable some debug features.
154 */ 154 */
155 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 155 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
156 #endif 156 #endif
157 157
158 #ifndef ARCH_SLAB_MINALIGN 158 #ifndef ARCH_SLAB_MINALIGN
159 /* 159 /*
160 * Enforce a minimum alignment for all caches. 160 * Enforce a minimum alignment for all caches.
161 * Intended for archs that get misalignment faults even for BYTES_PER_WORD 161 * Intended for archs that get misalignment faults even for BYTES_PER_WORD
162 * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. 162 * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
163 * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables 163 * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
164 * some debug features. 164 * some debug features.
165 */ 165 */
166 #define ARCH_SLAB_MINALIGN 0 166 #define ARCH_SLAB_MINALIGN 0
167 #endif 167 #endif
168 168
169 #ifndef ARCH_KMALLOC_FLAGS 169 #ifndef ARCH_KMALLOC_FLAGS
170 #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN 170 #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
171 #endif 171 #endif
172 172
173 /* Legal flag mask for kmem_cache_create(). */ 173 /* Legal flag mask for kmem_cache_create(). */
174 #if DEBUG 174 #if DEBUG
175 # define CREATE_MASK (SLAB_RED_ZONE | \ 175 # define CREATE_MASK (SLAB_RED_ZONE | \
176 SLAB_POISON | SLAB_HWCACHE_ALIGN | \ 176 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
177 SLAB_CACHE_DMA | \ 177 SLAB_CACHE_DMA | \
178 SLAB_STORE_USER | \ 178 SLAB_STORE_USER | \
179 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 179 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
180 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) 180 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
181 #else 181 #else
182 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ 182 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
183 SLAB_CACHE_DMA | \ 183 SLAB_CACHE_DMA | \
184 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 184 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
185 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) 185 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
186 #endif 186 #endif
187 187
188 /* 188 /*
189 * kmem_bufctl_t: 189 * kmem_bufctl_t:
190 * 190 *
191 * Bufctl's are used for linking objs within a slab 191 * Bufctl's are used for linking objs within a slab
192 * linked offsets. 192 * linked offsets.
193 * 193 *
194 * This implementation relies on "struct page" for locating the cache & 194 * This implementation relies on "struct page" for locating the cache &
195 * slab an object belongs to. 195 * slab an object belongs to.
196 * This allows the bufctl structure to be small (one int), but limits 196 * This allows the bufctl structure to be small (one int), but limits
197 * the number of objects a slab (not a cache) can contain when off-slab 197 * the number of objects a slab (not a cache) can contain when off-slab
198 * bufctls are used. The limit is the size of the largest general cache 198 * bufctls are used. The limit is the size of the largest general cache
199 * that does not use off-slab slabs. 199 * that does not use off-slab slabs.
200 * For 32bit archs with 4 kB pages, is this 56. 200 * For 32bit archs with 4 kB pages, is this 56.
201 * This is not serious, as it is only for large objects, when it is unwise 201 * This is not serious, as it is only for large objects, when it is unwise
202 * to have too many per slab. 202 * to have too many per slab.
203 * Note: This limit can be raised by introducing a general cache whose size 203 * Note: This limit can be raised by introducing a general cache whose size
204 * is less than 512 (PAGE_SIZE<<3), but greater than 256. 204 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
205 */ 205 */
206 206
207 typedef unsigned int kmem_bufctl_t; 207 typedef unsigned int kmem_bufctl_t;
208 #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) 208 #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
209 #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) 209 #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
210 #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) 210 #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
211 #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) 211 #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
212 212
213 /* 213 /*
214 * struct slab 214 * struct slab
215 * 215 *
216 * Manages the objs in a slab. Placed either at the beginning of mem allocated 216 * Manages the objs in a slab. Placed either at the beginning of mem allocated
217 * for a slab, or allocated from an general cache. 217 * for a slab, or allocated from an general cache.
218 * Slabs are chained into three list: fully used, partial, fully free slabs. 218 * Slabs are chained into three list: fully used, partial, fully free slabs.
219 */ 219 */
220 struct slab { 220 struct slab {
221 struct list_head list; 221 struct list_head list;
222 unsigned long colouroff; 222 unsigned long colouroff;
223 void *s_mem; /* including colour offset */ 223 void *s_mem; /* including colour offset */
224 unsigned int inuse; /* num of objs active in slab */ 224 unsigned int inuse; /* num of objs active in slab */
225 kmem_bufctl_t free; 225 kmem_bufctl_t free;
226 unsigned short nodeid; 226 unsigned short nodeid;
227 }; 227 };
228 228
229 /* 229 /*
230 * struct slab_rcu 230 * struct slab_rcu
231 * 231 *
232 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to 232 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
233 * arrange for kmem_freepages to be called via RCU. This is useful if 233 * arrange for kmem_freepages to be called via RCU. This is useful if
234 * we need to approach a kernel structure obliquely, from its address 234 * we need to approach a kernel structure obliquely, from its address
235 * obtained without the usual locking. We can lock the structure to 235 * obtained without the usual locking. We can lock the structure to
236 * stabilize it and check it's still at the given address, only if we 236 * stabilize it and check it's still at the given address, only if we
237 * can be sure that the memory has not been meanwhile reused for some 237 * can be sure that the memory has not been meanwhile reused for some
238 * other kind of object (which our subsystem's lock might corrupt). 238 * other kind of object (which our subsystem's lock might corrupt).
239 * 239 *
240 * rcu_read_lock before reading the address, then rcu_read_unlock after 240 * rcu_read_lock before reading the address, then rcu_read_unlock after
241 * taking the spinlock within the structure expected at that address. 241 * taking the spinlock within the structure expected at that address.
242 * 242 *
243 * We assume struct slab_rcu can overlay struct slab when destroying. 243 * We assume struct slab_rcu can overlay struct slab when destroying.
244 */ 244 */
245 struct slab_rcu { 245 struct slab_rcu {
246 struct rcu_head head; 246 struct rcu_head head;
247 struct kmem_cache *cachep; 247 struct kmem_cache *cachep;
248 void *addr; 248 void *addr;
249 }; 249 };
250 250
251 /* 251 /*
252 * struct array_cache 252 * struct array_cache
253 * 253 *
254 * Purpose: 254 * Purpose:
255 * - LIFO ordering, to hand out cache-warm objects from _alloc 255 * - LIFO ordering, to hand out cache-warm objects from _alloc
256 * - reduce the number of linked list operations 256 * - reduce the number of linked list operations
257 * - reduce spinlock operations 257 * - reduce spinlock operations
258 * 258 *
259 * The limit is stored in the per-cpu structure to reduce the data cache 259 * The limit is stored in the per-cpu structure to reduce the data cache
260 * footprint. 260 * footprint.
261 * 261 *
262 */ 262 */
263 struct array_cache { 263 struct array_cache {
264 unsigned int avail; 264 unsigned int avail;
265 unsigned int limit; 265 unsigned int limit;
266 unsigned int batchcount; 266 unsigned int batchcount;
267 unsigned int touched; 267 unsigned int touched;
268 spinlock_t lock; 268 spinlock_t lock;
269 void *entry[0]; /* 269 void *entry[0]; /*
270 * Must have this definition in here for the proper 270 * Must have this definition in here for the proper
271 * alignment of array_cache. Also simplifies accessing 271 * alignment of array_cache. Also simplifies accessing
272 * the entries. 272 * the entries.
273 * [0] is for gcc 2.95. It should really be []. 273 * [0] is for gcc 2.95. It should really be [].
274 */ 274 */
275 }; 275 };
276 276
277 /* 277 /*
278 * bootstrap: The caches do not work without cpuarrays anymore, but the 278 * bootstrap: The caches do not work without cpuarrays anymore, but the
279 * cpuarrays are allocated from the generic caches... 279 * cpuarrays are allocated from the generic caches...
280 */ 280 */
281 #define BOOT_CPUCACHE_ENTRIES 1 281 #define BOOT_CPUCACHE_ENTRIES 1
282 struct arraycache_init { 282 struct arraycache_init {
283 struct array_cache cache; 283 struct array_cache cache;
284 void *entries[BOOT_CPUCACHE_ENTRIES]; 284 void *entries[BOOT_CPUCACHE_ENTRIES];
285 }; 285 };
286 286
287 /* 287 /*
288 * The slab lists for all objects. 288 * The slab lists for all objects.
289 */ 289 */
290 struct kmem_list3 { 290 struct kmem_list3 {
291 struct list_head slabs_partial; /* partial list first, better asm code */ 291 struct list_head slabs_partial; /* partial list first, better asm code */
292 struct list_head slabs_full; 292 struct list_head slabs_full;
293 struct list_head slabs_free; 293 struct list_head slabs_free;
294 unsigned long free_objects; 294 unsigned long free_objects;
295 unsigned int free_limit; 295 unsigned int free_limit;
296 unsigned int colour_next; /* Per-node cache coloring */ 296 unsigned int colour_next; /* Per-node cache coloring */
297 spinlock_t list_lock; 297 spinlock_t list_lock;
298 struct array_cache *shared; /* shared per node */ 298 struct array_cache *shared; /* shared per node */
299 struct array_cache **alien; /* on other nodes */ 299 struct array_cache **alien; /* on other nodes */
300 unsigned long next_reap; /* updated without locking */ 300 unsigned long next_reap; /* updated without locking */
301 int free_touched; /* updated without locking */ 301 int free_touched; /* updated without locking */
302 }; 302 };
303 303
304 /* 304 /*
305 * Need this for bootstrapping a per node allocator. 305 * Need this for bootstrapping a per node allocator.
306 */ 306 */
307 #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) 307 #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
308 struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; 308 struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
309 #define CACHE_CACHE 0 309 #define CACHE_CACHE 0
310 #define SIZE_AC 1 310 #define SIZE_AC 1
311 #define SIZE_L3 (1 + MAX_NUMNODES) 311 #define SIZE_L3 (1 + MAX_NUMNODES)
312 312
313 static int drain_freelist(struct kmem_cache *cache, 313 static int drain_freelist(struct kmem_cache *cache,
314 struct kmem_list3 *l3, int tofree); 314 struct kmem_list3 *l3, int tofree);
315 static void free_block(struct kmem_cache *cachep, void **objpp, int len, 315 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
316 int node); 316 int node);
317 static int enable_cpucache(struct kmem_cache *cachep); 317 static int enable_cpucache(struct kmem_cache *cachep);
318 static void cache_reap(struct work_struct *unused); 318 static void cache_reap(struct work_struct *unused);
319 319
320 /* 320 /*
321 * This function must be completely optimized away if a constant is passed to 321 * This function must be completely optimized away if a constant is passed to
322 * it. Mostly the same as what is in linux/slab.h except it returns an index. 322 * it. Mostly the same as what is in linux/slab.h except it returns an index.
323 */ 323 */
324 static __always_inline int index_of(const size_t size) 324 static __always_inline int index_of(const size_t size)
325 { 325 {
326 extern void __bad_size(void); 326 extern void __bad_size(void);
327 327
328 if (__builtin_constant_p(size)) { 328 if (__builtin_constant_p(size)) {
329 int i = 0; 329 int i = 0;
330 330
331 #define CACHE(x) \ 331 #define CACHE(x) \
332 if (size <=x) \ 332 if (size <=x) \
333 return i; \ 333 return i; \
334 else \ 334 else \
335 i++; 335 i++;
336 #include "linux/kmalloc_sizes.h" 336 #include "linux/kmalloc_sizes.h"
337 #undef CACHE 337 #undef CACHE
338 __bad_size(); 338 __bad_size();
339 } else 339 } else
340 __bad_size(); 340 __bad_size();
341 return 0; 341 return 0;
342 } 342 }
343 343
344 static int slab_early_init = 1; 344 static int slab_early_init = 1;
345 345
346 #define INDEX_AC index_of(sizeof(struct arraycache_init)) 346 #define INDEX_AC index_of(sizeof(struct arraycache_init))
347 #define INDEX_L3 index_of(sizeof(struct kmem_list3)) 347 #define INDEX_L3 index_of(sizeof(struct kmem_list3))
348 348
349 static void kmem_list3_init(struct kmem_list3 *parent) 349 static void kmem_list3_init(struct kmem_list3 *parent)
350 { 350 {
351 INIT_LIST_HEAD(&parent->slabs_full); 351 INIT_LIST_HEAD(&parent->slabs_full);
352 INIT_LIST_HEAD(&parent->slabs_partial); 352 INIT_LIST_HEAD(&parent->slabs_partial);
353 INIT_LIST_HEAD(&parent->slabs_free); 353 INIT_LIST_HEAD(&parent->slabs_free);
354 parent->shared = NULL; 354 parent->shared = NULL;
355 parent->alien = NULL; 355 parent->alien = NULL;
356 parent->colour_next = 0; 356 parent->colour_next = 0;
357 spin_lock_init(&parent->list_lock); 357 spin_lock_init(&parent->list_lock);
358 parent->free_objects = 0; 358 parent->free_objects = 0;
359 parent->free_touched = 0; 359 parent->free_touched = 0;
360 } 360 }
361 361
362 #define MAKE_LIST(cachep, listp, slab, nodeid) \ 362 #define MAKE_LIST(cachep, listp, slab, nodeid) \
363 do { \ 363 do { \
364 INIT_LIST_HEAD(listp); \ 364 INIT_LIST_HEAD(listp); \
365 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ 365 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
366 } while (0) 366 } while (0)
367 367
368 #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ 368 #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
369 do { \ 369 do { \
370 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ 370 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
371 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ 371 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
372 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ 372 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
373 } while (0) 373 } while (0)
374 374
375 /* 375 /*
376 * struct kmem_cache 376 * struct kmem_cache
377 * 377 *
378 * manages a cache. 378 * manages a cache.
379 */ 379 */
380 380
381 struct kmem_cache { 381 struct kmem_cache {
382 /* 1) per-cpu data, touched during every alloc/free */ 382 /* 1) per-cpu data, touched during every alloc/free */
383 struct array_cache *array[NR_CPUS]; 383 struct array_cache *array[NR_CPUS];
384 /* 2) Cache tunables. Protected by cache_chain_mutex */ 384 /* 2) Cache tunables. Protected by cache_chain_mutex */
385 unsigned int batchcount; 385 unsigned int batchcount;
386 unsigned int limit; 386 unsigned int limit;
387 unsigned int shared; 387 unsigned int shared;
388 388
389 unsigned int buffer_size; 389 unsigned int buffer_size;
390 u32 reciprocal_buffer_size; 390 u32 reciprocal_buffer_size;
391 /* 3) touched by every alloc & free from the backend */ 391 /* 3) touched by every alloc & free from the backend */
392 392
393 unsigned int flags; /* constant flags */ 393 unsigned int flags; /* constant flags */
394 unsigned int num; /* # of objs per slab */ 394 unsigned int num; /* # of objs per slab */
395 395
396 /* 4) cache_grow/shrink */ 396 /* 4) cache_grow/shrink */
397 /* order of pgs per slab (2^n) */ 397 /* order of pgs per slab (2^n) */
398 unsigned int gfporder; 398 unsigned int gfporder;
399 399
400 /* force GFP flags, e.g. GFP_DMA */ 400 /* force GFP flags, e.g. GFP_DMA */
401 gfp_t gfpflags; 401 gfp_t gfpflags;
402 402
403 size_t colour; /* cache colouring range */ 403 size_t colour; /* cache colouring range */
404 unsigned int colour_off; /* colour offset */ 404 unsigned int colour_off; /* colour offset */
405 struct kmem_cache *slabp_cache; 405 struct kmem_cache *slabp_cache;
406 unsigned int slab_size; 406 unsigned int slab_size;
407 unsigned int dflags; /* dynamic flags */ 407 unsigned int dflags; /* dynamic flags */
408 408
409 /* constructor func */ 409 /* constructor func */
410 void (*ctor) (void *, struct kmem_cache *, unsigned long); 410 void (*ctor) (void *, struct kmem_cache *, unsigned long);
411 411
412 /* de-constructor func */ 412 /* de-constructor func */
413 void (*dtor) (void *, struct kmem_cache *, unsigned long); 413 void (*dtor) (void *, struct kmem_cache *, unsigned long);
414 414
415 /* 5) cache creation/removal */ 415 /* 5) cache creation/removal */
416 const char *name; 416 const char *name;
417 struct list_head next; 417 struct list_head next;
418 418
419 /* 6) statistics */ 419 /* 6) statistics */
420 #if STATS 420 #if STATS
421 unsigned long num_active; 421 unsigned long num_active;
422 unsigned long num_allocations; 422 unsigned long num_allocations;
423 unsigned long high_mark; 423 unsigned long high_mark;
424 unsigned long grown; 424 unsigned long grown;
425 unsigned long reaped; 425 unsigned long reaped;
426 unsigned long errors; 426 unsigned long errors;
427 unsigned long max_freeable; 427 unsigned long max_freeable;
428 unsigned long node_allocs; 428 unsigned long node_allocs;
429 unsigned long node_frees; 429 unsigned long node_frees;
430 unsigned long node_overflow; 430 unsigned long node_overflow;
431 atomic_t allochit; 431 atomic_t allochit;
432 atomic_t allocmiss; 432 atomic_t allocmiss;
433 atomic_t freehit; 433 atomic_t freehit;
434 atomic_t freemiss; 434 atomic_t freemiss;
435 #endif 435 #endif
436 #if DEBUG 436 #if DEBUG
437 /* 437 /*
438 * If debugging is enabled, then the allocator can add additional 438 * If debugging is enabled, then the allocator can add additional
439 * fields and/or padding to every object. buffer_size contains the total 439 * fields and/or padding to every object. buffer_size contains the total
440 * object size including these internal fields, the following two 440 * object size including these internal fields, the following two
441 * variables contain the offset to the user object and its size. 441 * variables contain the offset to the user object and its size.
442 */ 442 */
443 int obj_offset; 443 int obj_offset;
444 int obj_size; 444 int obj_size;
445 #endif 445 #endif
446 /* 446 /*
447 * We put nodelists[] at the end of kmem_cache, because we want to size 447 * We put nodelists[] at the end of kmem_cache, because we want to size
448 * this array to nr_node_ids slots instead of MAX_NUMNODES 448 * this array to nr_node_ids slots instead of MAX_NUMNODES
449 * (see kmem_cache_init()) 449 * (see kmem_cache_init())
450 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache 450 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
451 * is statically defined, so we reserve the max number of nodes. 451 * is statically defined, so we reserve the max number of nodes.
452 */ 452 */
453 struct kmem_list3 *nodelists[MAX_NUMNODES]; 453 struct kmem_list3 *nodelists[MAX_NUMNODES];
454 /* 454 /*
455 * Do not add fields after nodelists[] 455 * Do not add fields after nodelists[]
456 */ 456 */
457 }; 457 };
458 458
459 #define CFLGS_OFF_SLAB (0x80000000UL) 459 #define CFLGS_OFF_SLAB (0x80000000UL)
460 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) 460 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
461 461
462 #define BATCHREFILL_LIMIT 16 462 #define BATCHREFILL_LIMIT 16
463 /* 463 /*
464 * Optimization question: fewer reaps means less probability for unnessary 464 * Optimization question: fewer reaps means less probability for unnessary
465 * cpucache drain/refill cycles. 465 * cpucache drain/refill cycles.
466 * 466 *
467 * OTOH the cpuarrays can contain lots of objects, 467 * OTOH the cpuarrays can contain lots of objects,
468 * which could lock up otherwise freeable slabs. 468 * which could lock up otherwise freeable slabs.
469 */ 469 */
470 #define REAPTIMEOUT_CPUC (2*HZ) 470 #define REAPTIMEOUT_CPUC (2*HZ)
471 #define REAPTIMEOUT_LIST3 (4*HZ) 471 #define REAPTIMEOUT_LIST3 (4*HZ)
472 472
473 #if STATS 473 #if STATS
474 #define STATS_INC_ACTIVE(x) ((x)->num_active++) 474 #define STATS_INC_ACTIVE(x) ((x)->num_active++)
475 #define STATS_DEC_ACTIVE(x) ((x)->num_active--) 475 #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
476 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) 476 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
477 #define STATS_INC_GROWN(x) ((x)->grown++) 477 #define STATS_INC_GROWN(x) ((x)->grown++)
478 #define STATS_ADD_REAPED(x,y) ((x)->reaped += (y)) 478 #define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
479 #define STATS_SET_HIGH(x) \ 479 #define STATS_SET_HIGH(x) \
480 do { \ 480 do { \
481 if ((x)->num_active > (x)->high_mark) \ 481 if ((x)->num_active > (x)->high_mark) \
482 (x)->high_mark = (x)->num_active; \ 482 (x)->high_mark = (x)->num_active; \
483 } while (0) 483 } while (0)
484 #define STATS_INC_ERR(x) ((x)->errors++) 484 #define STATS_INC_ERR(x) ((x)->errors++)
485 #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) 485 #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
486 #define STATS_INC_NODEFREES(x) ((x)->node_frees++) 486 #define STATS_INC_NODEFREES(x) ((x)->node_frees++)
487 #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++) 487 #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
488 #define STATS_SET_FREEABLE(x, i) \ 488 #define STATS_SET_FREEABLE(x, i) \
489 do { \ 489 do { \
490 if ((x)->max_freeable < i) \ 490 if ((x)->max_freeable < i) \
491 (x)->max_freeable = i; \ 491 (x)->max_freeable = i; \
492 } while (0) 492 } while (0)
493 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) 493 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
494 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) 494 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
495 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) 495 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
496 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss) 496 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
497 #else 497 #else
498 #define STATS_INC_ACTIVE(x) do { } while (0) 498 #define STATS_INC_ACTIVE(x) do { } while (0)
499 #define STATS_DEC_ACTIVE(x) do { } while (0) 499 #define STATS_DEC_ACTIVE(x) do { } while (0)
500 #define STATS_INC_ALLOCED(x) do { } while (0) 500 #define STATS_INC_ALLOCED(x) do { } while (0)
501 #define STATS_INC_GROWN(x) do { } while (0) 501 #define STATS_INC_GROWN(x) do { } while (0)
502 #define STATS_ADD_REAPED(x,y) do { } while (0) 502 #define STATS_ADD_REAPED(x,y) do { } while (0)
503 #define STATS_SET_HIGH(x) do { } while (0) 503 #define STATS_SET_HIGH(x) do { } while (0)
504 #define STATS_INC_ERR(x) do { } while (0) 504 #define STATS_INC_ERR(x) do { } while (0)
505 #define STATS_INC_NODEALLOCS(x) do { } while (0) 505 #define STATS_INC_NODEALLOCS(x) do { } while (0)
506 #define STATS_INC_NODEFREES(x) do { } while (0) 506 #define STATS_INC_NODEFREES(x) do { } while (0)
507 #define STATS_INC_ACOVERFLOW(x) do { } while (0) 507 #define STATS_INC_ACOVERFLOW(x) do { } while (0)
508 #define STATS_SET_FREEABLE(x, i) do { } while (0) 508 #define STATS_SET_FREEABLE(x, i) do { } while (0)
509 #define STATS_INC_ALLOCHIT(x) do { } while (0) 509 #define STATS_INC_ALLOCHIT(x) do { } while (0)
510 #define STATS_INC_ALLOCMISS(x) do { } while (0) 510 #define STATS_INC_ALLOCMISS(x) do { } while (0)
511 #define STATS_INC_FREEHIT(x) do { } while (0) 511 #define STATS_INC_FREEHIT(x) do { } while (0)
512 #define STATS_INC_FREEMISS(x) do { } while (0) 512 #define STATS_INC_FREEMISS(x) do { } while (0)
513 #endif 513 #endif
514 514
515 #if DEBUG 515 #if DEBUG
516 516
517 /* 517 /*
518 * memory layout of objects: 518 * memory layout of objects:
519 * 0 : objp 519 * 0 : objp
520 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that 520 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
521 * the end of an object is aligned with the end of the real 521 * the end of an object is aligned with the end of the real
522 * allocation. Catches writes behind the end of the allocation. 522 * allocation. Catches writes behind the end of the allocation.
523 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: 523 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
524 * redzone word. 524 * redzone word.
525 * cachep->obj_offset: The real object. 525 * cachep->obj_offset: The real object.
526 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] 526 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
527 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address 527 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
528 * [BYTES_PER_WORD long] 528 * [BYTES_PER_WORD long]
529 */ 529 */
530 static int obj_offset(struct kmem_cache *cachep) 530 static int obj_offset(struct kmem_cache *cachep)
531 { 531 {
532 return cachep->obj_offset; 532 return cachep->obj_offset;
533 } 533 }
534 534
535 static int obj_size(struct kmem_cache *cachep) 535 static int obj_size(struct kmem_cache *cachep)
536 { 536 {
537 return cachep->obj_size; 537 return cachep->obj_size;
538 } 538 }
539 539
540 static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) 540 static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
541 { 541 {
542 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 542 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
543 return (unsigned long long*) (objp + obj_offset(cachep) - 543 return (unsigned long long*) (objp + obj_offset(cachep) -
544 sizeof(unsigned long long)); 544 sizeof(unsigned long long));
545 } 545 }
546 546
547 static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp) 547 static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
548 { 548 {
549 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 549 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
550 if (cachep->flags & SLAB_STORE_USER) 550 if (cachep->flags & SLAB_STORE_USER)
551 return (unsigned long long *)(objp + cachep->buffer_size - 551 return (unsigned long long *)(objp + cachep->buffer_size -
552 sizeof(unsigned long long) - 552 sizeof(unsigned long long) -
553 BYTES_PER_WORD); 553 BYTES_PER_WORD);
554 return (unsigned long long *) (objp + cachep->buffer_size - 554 return (unsigned long long *) (objp + cachep->buffer_size -
555 sizeof(unsigned long long)); 555 sizeof(unsigned long long));
556 } 556 }
557 557
558 static void **dbg_userword(struct kmem_cache *cachep, void *objp) 558 static void **dbg_userword(struct kmem_cache *cachep, void *objp)
559 { 559 {
560 BUG_ON(!(cachep->flags & SLAB_STORE_USER)); 560 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
561 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); 561 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
562 } 562 }
563 563
564 #else 564 #else
565 565
566 #define obj_offset(x) 0 566 #define obj_offset(x) 0
567 #define obj_size(cachep) (cachep->buffer_size) 567 #define obj_size(cachep) (cachep->buffer_size)
568 #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 568 #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
569 #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 569 #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
570 #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) 570 #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
571 571
572 #endif 572 #endif
573 573
574 /* 574 /*
575 * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp 575 * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp
576 * order. 576 * order.
577 */ 577 */
578 #if defined(CONFIG_LARGE_ALLOCS) 578 #if defined(CONFIG_LARGE_ALLOCS)
579 #define MAX_OBJ_ORDER 13 /* up to 32Mb */ 579 #define MAX_OBJ_ORDER 13 /* up to 32Mb */
580 #define MAX_GFP_ORDER 13 /* up to 32Mb */ 580 #define MAX_GFP_ORDER 13 /* up to 32Mb */
581 #elif defined(CONFIG_MMU) 581 #elif defined(CONFIG_MMU)
582 #define MAX_OBJ_ORDER 5 /* 32 pages */ 582 #define MAX_OBJ_ORDER 5 /* 32 pages */
583 #define MAX_GFP_ORDER 5 /* 32 pages */ 583 #define MAX_GFP_ORDER 5 /* 32 pages */
584 #else 584 #else
585 #define MAX_OBJ_ORDER 8 /* up to 1Mb */ 585 #define MAX_OBJ_ORDER 8 /* up to 1Mb */
586 #define MAX_GFP_ORDER 8 /* up to 1Mb */ 586 #define MAX_GFP_ORDER 8 /* up to 1Mb */
587 #endif 587 #endif
588 588
589 /* 589 /*
590 * Do not go above this order unless 0 objects fit into the slab. 590 * Do not go above this order unless 0 objects fit into the slab.
591 */ 591 */
592 #define BREAK_GFP_ORDER_HI 1 592 #define BREAK_GFP_ORDER_HI 1
593 #define BREAK_GFP_ORDER_LO 0 593 #define BREAK_GFP_ORDER_LO 0
594 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; 594 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
595 595
596 /* 596 /*
597 * Functions for storing/retrieving the cachep and or slab from the page 597 * Functions for storing/retrieving the cachep and or slab from the page
598 * allocator. These are used to find the slab an obj belongs to. With kfree(), 598 * allocator. These are used to find the slab an obj belongs to. With kfree(),
599 * these are used to find the cache which an obj belongs to. 599 * these are used to find the cache which an obj belongs to.
600 */ 600 */
601 static inline void page_set_cache(struct page *page, struct kmem_cache *cache) 601 static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
602 { 602 {
603 page->lru.next = (struct list_head *)cache; 603 page->lru.next = (struct list_head *)cache;
604 } 604 }
605 605
606 static inline struct kmem_cache *page_get_cache(struct page *page) 606 static inline struct kmem_cache *page_get_cache(struct page *page)
607 { 607 {
608 page = compound_head(page); 608 page = compound_head(page);
609 BUG_ON(!PageSlab(page)); 609 BUG_ON(!PageSlab(page));
610 return (struct kmem_cache *)page->lru.next; 610 return (struct kmem_cache *)page->lru.next;
611 } 611 }
612 612
613 static inline void page_set_slab(struct page *page, struct slab *slab) 613 static inline void page_set_slab(struct page *page, struct slab *slab)
614 { 614 {
615 page->lru.prev = (struct list_head *)slab; 615 page->lru.prev = (struct list_head *)slab;
616 } 616 }
617 617
618 static inline struct slab *page_get_slab(struct page *page) 618 static inline struct slab *page_get_slab(struct page *page)
619 { 619 {
620 BUG_ON(!PageSlab(page)); 620 BUG_ON(!PageSlab(page));
621 return (struct slab *)page->lru.prev; 621 return (struct slab *)page->lru.prev;
622 } 622 }
623 623
624 static inline struct kmem_cache *virt_to_cache(const void *obj) 624 static inline struct kmem_cache *virt_to_cache(const void *obj)
625 { 625 {
626 struct page *page = virt_to_head_page(obj); 626 struct page *page = virt_to_head_page(obj);
627 return page_get_cache(page); 627 return page_get_cache(page);
628 } 628 }
629 629
630 static inline struct slab *virt_to_slab(const void *obj) 630 static inline struct slab *virt_to_slab(const void *obj)
631 { 631 {
632 struct page *page = virt_to_head_page(obj); 632 struct page *page = virt_to_head_page(obj);
633 return page_get_slab(page); 633 return page_get_slab(page);
634 } 634 }
635 635
636 static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, 636 static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
637 unsigned int idx) 637 unsigned int idx)
638 { 638 {
639 return slab->s_mem + cache->buffer_size * idx; 639 return slab->s_mem + cache->buffer_size * idx;
640 } 640 }
641 641
642 /* 642 /*
643 * We want to avoid an expensive divide : (offset / cache->buffer_size) 643 * We want to avoid an expensive divide : (offset / cache->buffer_size)
644 * Using the fact that buffer_size is a constant for a particular cache, 644 * Using the fact that buffer_size is a constant for a particular cache,
645 * we can replace (offset / cache->buffer_size) by 645 * we can replace (offset / cache->buffer_size) by
646 * reciprocal_divide(offset, cache->reciprocal_buffer_size) 646 * reciprocal_divide(offset, cache->reciprocal_buffer_size)
647 */ 647 */
648 static inline unsigned int obj_to_index(const struct kmem_cache *cache, 648 static inline unsigned int obj_to_index(const struct kmem_cache *cache,
649 const struct slab *slab, void *obj) 649 const struct slab *slab, void *obj)
650 { 650 {
651 u32 offset = (obj - slab->s_mem); 651 u32 offset = (obj - slab->s_mem);
652 return reciprocal_divide(offset, cache->reciprocal_buffer_size); 652 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
653 } 653 }
654 654
655 /* 655 /*
656 * These are the default caches for kmalloc. Custom caches can have other sizes. 656 * These are the default caches for kmalloc. Custom caches can have other sizes.
657 */ 657 */
658 struct cache_sizes malloc_sizes[] = { 658 struct cache_sizes malloc_sizes[] = {
659 #define CACHE(x) { .cs_size = (x) }, 659 #define CACHE(x) { .cs_size = (x) },
660 #include <linux/kmalloc_sizes.h> 660 #include <linux/kmalloc_sizes.h>
661 CACHE(ULONG_MAX) 661 CACHE(ULONG_MAX)
662 #undef CACHE 662 #undef CACHE
663 }; 663 };
664 EXPORT_SYMBOL(malloc_sizes); 664 EXPORT_SYMBOL(malloc_sizes);
665 665
666 /* Must match cache_sizes above. Out of line to keep cache footprint low. */ 666 /* Must match cache_sizes above. Out of line to keep cache footprint low. */
667 struct cache_names { 667 struct cache_names {
668 char *name; 668 char *name;
669 char *name_dma; 669 char *name_dma;
670 }; 670 };
671 671
672 static struct cache_names __initdata cache_names[] = { 672 static struct cache_names __initdata cache_names[] = {
673 #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, 673 #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
674 #include <linux/kmalloc_sizes.h> 674 #include <linux/kmalloc_sizes.h>
675 {NULL,} 675 {NULL,}
676 #undef CACHE 676 #undef CACHE
677 }; 677 };
678 678
679 static struct arraycache_init initarray_cache __initdata = 679 static struct arraycache_init initarray_cache __initdata =
680 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 680 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
681 static struct arraycache_init initarray_generic = 681 static struct arraycache_init initarray_generic =
682 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 682 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
683 683
684 /* internal cache of cache description objs */ 684 /* internal cache of cache description objs */
685 static struct kmem_cache cache_cache = { 685 static struct kmem_cache cache_cache = {
686 .batchcount = 1, 686 .batchcount = 1,
687 .limit = BOOT_CPUCACHE_ENTRIES, 687 .limit = BOOT_CPUCACHE_ENTRIES,
688 .shared = 1, 688 .shared = 1,
689 .buffer_size = sizeof(struct kmem_cache), 689 .buffer_size = sizeof(struct kmem_cache),
690 .name = "kmem_cache", 690 .name = "kmem_cache",
691 }; 691 };
692 692
693 #define BAD_ALIEN_MAGIC 0x01020304ul 693 #define BAD_ALIEN_MAGIC 0x01020304ul
694 694
695 #ifdef CONFIG_LOCKDEP 695 #ifdef CONFIG_LOCKDEP
696 696
697 /* 697 /*
698 * Slab sometimes uses the kmalloc slabs to store the slab headers 698 * Slab sometimes uses the kmalloc slabs to store the slab headers
699 * for other slabs "off slab". 699 * for other slabs "off slab".
700 * The locking for this is tricky in that it nests within the locks 700 * The locking for this is tricky in that it nests within the locks
701 * of all other slabs in a few places; to deal with this special 701 * of all other slabs in a few places; to deal with this special
702 * locking we put on-slab caches into a separate lock-class. 702 * locking we put on-slab caches into a separate lock-class.
703 * 703 *
704 * We set lock class for alien array caches which are up during init. 704 * We set lock class for alien array caches which are up during init.
705 * The lock annotation will be lost if all cpus of a node goes down and 705 * The lock annotation will be lost if all cpus of a node goes down and
706 * then comes back up during hotplug 706 * then comes back up during hotplug
707 */ 707 */
708 static struct lock_class_key on_slab_l3_key; 708 static struct lock_class_key on_slab_l3_key;
709 static struct lock_class_key on_slab_alc_key; 709 static struct lock_class_key on_slab_alc_key;
710 710
711 static inline void init_lock_keys(void) 711 static inline void init_lock_keys(void)
712 712
713 { 713 {
714 int q; 714 int q;
715 struct cache_sizes *s = malloc_sizes; 715 struct cache_sizes *s = malloc_sizes;
716 716
717 while (s->cs_size != ULONG_MAX) { 717 while (s->cs_size != ULONG_MAX) {
718 for_each_node(q) { 718 for_each_node(q) {
719 struct array_cache **alc; 719 struct array_cache **alc;
720 int r; 720 int r;
721 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; 721 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
722 if (!l3 || OFF_SLAB(s->cs_cachep)) 722 if (!l3 || OFF_SLAB(s->cs_cachep))
723 continue; 723 continue;
724 lockdep_set_class(&l3->list_lock, &on_slab_l3_key); 724 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
725 alc = l3->alien; 725 alc = l3->alien;
726 /* 726 /*
727 * FIXME: This check for BAD_ALIEN_MAGIC 727 * FIXME: This check for BAD_ALIEN_MAGIC
728 * should go away when common slab code is taught to 728 * should go away when common slab code is taught to
729 * work even without alien caches. 729 * work even without alien caches.
730 * Currently, non NUMA code returns BAD_ALIEN_MAGIC 730 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
731 * for alloc_alien_cache, 731 * for alloc_alien_cache,
732 */ 732 */
733 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) 733 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
734 continue; 734 continue;
735 for_each_node(r) { 735 for_each_node(r) {
736 if (alc[r]) 736 if (alc[r])
737 lockdep_set_class(&alc[r]->lock, 737 lockdep_set_class(&alc[r]->lock,
738 &on_slab_alc_key); 738 &on_slab_alc_key);
739 } 739 }
740 } 740 }
741 s++; 741 s++;
742 } 742 }
743 } 743 }
744 #else 744 #else
745 static inline void init_lock_keys(void) 745 static inline void init_lock_keys(void)
746 { 746 {
747 } 747 }
748 #endif 748 #endif
749 749
750 /* 750 /*
751 * 1. Guard access to the cache-chain. 751 * 1. Guard access to the cache-chain.
752 * 2. Protect sanity of cpu_online_map against cpu hotplug events 752 * 2. Protect sanity of cpu_online_map against cpu hotplug events
753 */ 753 */
754 static DEFINE_MUTEX(cache_chain_mutex); 754 static DEFINE_MUTEX(cache_chain_mutex);
755 static struct list_head cache_chain; 755 static struct list_head cache_chain;
756 756
757 /* 757 /*
758 * chicken and egg problem: delay the per-cpu array allocation 758 * chicken and egg problem: delay the per-cpu array allocation
759 * until the general caches are up. 759 * until the general caches are up.
760 */ 760 */
761 static enum { 761 static enum {
762 NONE, 762 NONE,
763 PARTIAL_AC, 763 PARTIAL_AC,
764 PARTIAL_L3, 764 PARTIAL_L3,
765 FULL 765 FULL
766 } g_cpucache_up; 766 } g_cpucache_up;
767 767
768 /* 768 /*
769 * used by boot code to determine if it can use slab based allocator 769 * used by boot code to determine if it can use slab based allocator
770 */ 770 */
771 int slab_is_available(void) 771 int slab_is_available(void)
772 { 772 {
773 return g_cpucache_up == FULL; 773 return g_cpucache_up == FULL;
774 } 774 }
775 775
776 static DEFINE_PER_CPU(struct delayed_work, reap_work); 776 static DEFINE_PER_CPU(struct delayed_work, reap_work);
777 777
778 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 778 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
779 { 779 {
780 return cachep->array[smp_processor_id()]; 780 return cachep->array[smp_processor_id()];
781 } 781 }
782 782
783 static inline struct kmem_cache *__find_general_cachep(size_t size, 783 static inline struct kmem_cache *__find_general_cachep(size_t size,
784 gfp_t gfpflags) 784 gfp_t gfpflags)
785 { 785 {
786 struct cache_sizes *csizep = malloc_sizes; 786 struct cache_sizes *csizep = malloc_sizes;
787 787
788 #if DEBUG 788 #if DEBUG
789 /* This happens if someone tries to call 789 /* This happens if someone tries to call
790 * kmem_cache_create(), or __kmalloc(), before 790 * kmem_cache_create(), or __kmalloc(), before
791 * the generic caches are initialized. 791 * the generic caches are initialized.
792 */ 792 */
793 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); 793 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
794 #endif 794 #endif
795 while (size > csizep->cs_size) 795 while (size > csizep->cs_size)
796 csizep++; 796 csizep++;
797 797
798 /* 798 /*
799 * Really subtle: The last entry with cs->cs_size==ULONG_MAX 799 * Really subtle: The last entry with cs->cs_size==ULONG_MAX
800 * has cs_{dma,}cachep==NULL. Thus no special case 800 * has cs_{dma,}cachep==NULL. Thus no special case
801 * for large kmalloc calls required. 801 * for large kmalloc calls required.
802 */ 802 */
803 #ifdef CONFIG_ZONE_DMA 803 #ifdef CONFIG_ZONE_DMA
804 if (unlikely(gfpflags & GFP_DMA)) 804 if (unlikely(gfpflags & GFP_DMA))
805 return csizep->cs_dmacachep; 805 return csizep->cs_dmacachep;
806 #endif 806 #endif
807 return csizep->cs_cachep; 807 return csizep->cs_cachep;
808 } 808 }
809 809
810 static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) 810 static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
811 { 811 {
812 return __find_general_cachep(size, gfpflags); 812 return __find_general_cachep(size, gfpflags);
813 } 813 }
814 814
815 static size_t slab_mgmt_size(size_t nr_objs, size_t align) 815 static size_t slab_mgmt_size(size_t nr_objs, size_t align)
816 { 816 {
817 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); 817 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
818 } 818 }
819 819
820 /* 820 /*
821 * Calculate the number of objects and left-over bytes for a given buffer size. 821 * Calculate the number of objects and left-over bytes for a given buffer size.
822 */ 822 */
823 static void cache_estimate(unsigned long gfporder, size_t buffer_size, 823 static void cache_estimate(unsigned long gfporder, size_t buffer_size,
824 size_t align, int flags, size_t *left_over, 824 size_t align, int flags, size_t *left_over,
825 unsigned int *num) 825 unsigned int *num)
826 { 826 {
827 int nr_objs; 827 int nr_objs;
828 size_t mgmt_size; 828 size_t mgmt_size;
829 size_t slab_size = PAGE_SIZE << gfporder; 829 size_t slab_size = PAGE_SIZE << gfporder;
830 830
831 /* 831 /*
832 * The slab management structure can be either off the slab or 832 * The slab management structure can be either off the slab or
833 * on it. For the latter case, the memory allocated for a 833 * on it. For the latter case, the memory allocated for a
834 * slab is used for: 834 * slab is used for:
835 * 835 *
836 * - The struct slab 836 * - The struct slab
837 * - One kmem_bufctl_t for each object 837 * - One kmem_bufctl_t for each object
838 * - Padding to respect alignment of @align 838 * - Padding to respect alignment of @align
839 * - @buffer_size bytes for each object 839 * - @buffer_size bytes for each object
840 * 840 *
841 * If the slab management structure is off the slab, then the 841 * If the slab management structure is off the slab, then the
842 * alignment will already be calculated into the size. Because 842 * alignment will already be calculated into the size. Because
843 * the slabs are all pages aligned, the objects will be at the 843 * the slabs are all pages aligned, the objects will be at the
844 * correct alignment when allocated. 844 * correct alignment when allocated.
845 */ 845 */
846 if (flags & CFLGS_OFF_SLAB) { 846 if (flags & CFLGS_OFF_SLAB) {
847 mgmt_size = 0; 847 mgmt_size = 0;
848 nr_objs = slab_size / buffer_size; 848 nr_objs = slab_size / buffer_size;
849 849
850 if (nr_objs > SLAB_LIMIT) 850 if (nr_objs > SLAB_LIMIT)
851 nr_objs = SLAB_LIMIT; 851 nr_objs = SLAB_LIMIT;
852 } else { 852 } else {
853 /* 853 /*
854 * Ignore padding for the initial guess. The padding 854 * Ignore padding for the initial guess. The padding
855 * is at most @align-1 bytes, and @buffer_size is at 855 * is at most @align-1 bytes, and @buffer_size is at
856 * least @align. In the worst case, this result will 856 * least @align. In the worst case, this result will
857 * be one greater than the number of objects that fit 857 * be one greater than the number of objects that fit
858 * into the memory allocation when taking the padding 858 * into the memory allocation when taking the padding
859 * into account. 859 * into account.
860 */ 860 */
861 nr_objs = (slab_size - sizeof(struct slab)) / 861 nr_objs = (slab_size - sizeof(struct slab)) /
862 (buffer_size + sizeof(kmem_bufctl_t)); 862 (buffer_size + sizeof(kmem_bufctl_t));
863 863
864 /* 864 /*
865 * This calculated number will be either the right 865 * This calculated number will be either the right
866 * amount, or one greater than what we want. 866 * amount, or one greater than what we want.
867 */ 867 */
868 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size 868 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
869 > slab_size) 869 > slab_size)
870 nr_objs--; 870 nr_objs--;
871 871
872 if (nr_objs > SLAB_LIMIT) 872 if (nr_objs > SLAB_LIMIT)
873 nr_objs = SLAB_LIMIT; 873 nr_objs = SLAB_LIMIT;
874 874
875 mgmt_size = slab_mgmt_size(nr_objs, align); 875 mgmt_size = slab_mgmt_size(nr_objs, align);
876 } 876 }
877 *num = nr_objs; 877 *num = nr_objs;
878 *left_over = slab_size - nr_objs*buffer_size - mgmt_size; 878 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
879 } 879 }
880 880
881 #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) 881 #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg)
882 882
883 static void __slab_error(const char *function, struct kmem_cache *cachep, 883 static void __slab_error(const char *function, struct kmem_cache *cachep,
884 char *msg) 884 char *msg)
885 { 885 {
886 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", 886 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
887 function, cachep->name, msg); 887 function, cachep->name, msg);
888 dump_stack(); 888 dump_stack();
889 } 889 }
890 890
891 /* 891 /*
892 * By default on NUMA we use alien caches to stage the freeing of 892 * By default on NUMA we use alien caches to stage the freeing of
893 * objects allocated from other nodes. This causes massive memory 893 * objects allocated from other nodes. This causes massive memory
894 * inefficiencies when using fake NUMA setup to split memory into a 894 * inefficiencies when using fake NUMA setup to split memory into a
895 * large number of small nodes, so it can be disabled on the command 895 * large number of small nodes, so it can be disabled on the command
896 * line 896 * line
897 */ 897 */
898 898
899 static int use_alien_caches __read_mostly = 1; 899 static int use_alien_caches __read_mostly = 1;
900 static int __init noaliencache_setup(char *s) 900 static int __init noaliencache_setup(char *s)
901 { 901 {
902 use_alien_caches = 0; 902 use_alien_caches = 0;
903 return 1; 903 return 1;
904 } 904 }
905 __setup("noaliencache", noaliencache_setup); 905 __setup("noaliencache", noaliencache_setup);
906 906
907 #ifdef CONFIG_NUMA 907 #ifdef CONFIG_NUMA
908 /* 908 /*
909 * Special reaping functions for NUMA systems called from cache_reap(). 909 * Special reaping functions for NUMA systems called from cache_reap().
910 * These take care of doing round robin flushing of alien caches (containing 910 * These take care of doing round robin flushing of alien caches (containing
911 * objects freed on different nodes from which they were allocated) and the 911 * objects freed on different nodes from which they were allocated) and the
912 * flushing of remote pcps by calling drain_node_pages. 912 * flushing of remote pcps by calling drain_node_pages.
913 */ 913 */
914 static DEFINE_PER_CPU(unsigned long, reap_node); 914 static DEFINE_PER_CPU(unsigned long, reap_node);
915 915
916 static void init_reap_node(int cpu) 916 static void init_reap_node(int cpu)
917 { 917 {
918 int node; 918 int node;
919 919
920 node = next_node(cpu_to_node(cpu), node_online_map); 920 node = next_node(cpu_to_node(cpu), node_online_map);
921 if (node == MAX_NUMNODES) 921 if (node == MAX_NUMNODES)
922 node = first_node(node_online_map); 922 node = first_node(node_online_map);
923 923
924 per_cpu(reap_node, cpu) = node; 924 per_cpu(reap_node, cpu) = node;
925 } 925 }
926 926
927 static void next_reap_node(void) 927 static void next_reap_node(void)
928 { 928 {
929 int node = __get_cpu_var(reap_node); 929 int node = __get_cpu_var(reap_node);
930 930
931 /* 931 /*
932 * Also drain per cpu pages on remote zones 932 * Also drain per cpu pages on remote zones
933 */ 933 */
934 if (node != numa_node_id()) 934 if (node != numa_node_id())
935 drain_node_pages(node); 935 drain_node_pages(node);
936 936
937 node = next_node(node, node_online_map); 937 node = next_node(node, node_online_map);
938 if (unlikely(node >= MAX_NUMNODES)) 938 if (unlikely(node >= MAX_NUMNODES))
939 node = first_node(node_online_map); 939 node = first_node(node_online_map);
940 __get_cpu_var(reap_node) = node; 940 __get_cpu_var(reap_node) = node;
941 } 941 }
942 942
943 #else 943 #else
944 #define init_reap_node(cpu) do { } while (0) 944 #define init_reap_node(cpu) do { } while (0)
945 #define next_reap_node(void) do { } while (0) 945 #define next_reap_node(void) do { } while (0)
946 #endif 946 #endif
947 947
948 /* 948 /*
949 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz 949 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz
950 * via the workqueue/eventd. 950 * via the workqueue/eventd.
951 * Add the CPU number into the expiration time to minimize the possibility of 951 * Add the CPU number into the expiration time to minimize the possibility of
952 * the CPUs getting into lockstep and contending for the global cache chain 952 * the CPUs getting into lockstep and contending for the global cache chain
953 * lock. 953 * lock.
954 */ 954 */
955 static void __devinit start_cpu_timer(int cpu) 955 static void __devinit start_cpu_timer(int cpu)
956 { 956 {
957 struct delayed_work *reap_work = &per_cpu(reap_work, cpu); 957 struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
958 958
959 /* 959 /*
960 * When this gets called from do_initcalls via cpucache_init(), 960 * When this gets called from do_initcalls via cpucache_init(),
961 * init_workqueues() has already run, so keventd will be setup 961 * init_workqueues() has already run, so keventd will be setup
962 * at that time. 962 * at that time.
963 */ 963 */
964 if (keventd_up() && reap_work->work.func == NULL) { 964 if (keventd_up() && reap_work->work.func == NULL) {
965 init_reap_node(cpu); 965 init_reap_node(cpu);
966 INIT_DELAYED_WORK(reap_work, cache_reap); 966 INIT_DELAYED_WORK(reap_work, cache_reap);
967 schedule_delayed_work_on(cpu, reap_work, 967 schedule_delayed_work_on(cpu, reap_work,
968 __round_jiffies_relative(HZ, cpu)); 968 __round_jiffies_relative(HZ, cpu));
969 } 969 }
970 } 970 }
971 971
972 static struct array_cache *alloc_arraycache(int node, int entries, 972 static struct array_cache *alloc_arraycache(int node, int entries,
973 int batchcount) 973 int batchcount)
974 { 974 {
975 int memsize = sizeof(void *) * entries + sizeof(struct array_cache); 975 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
976 struct array_cache *nc = NULL; 976 struct array_cache *nc = NULL;
977 977
978 nc = kmalloc_node(memsize, GFP_KERNEL, node); 978 nc = kmalloc_node(memsize, GFP_KERNEL, node);
979 if (nc) { 979 if (nc) {
980 nc->avail = 0; 980 nc->avail = 0;
981 nc->limit = entries; 981 nc->limit = entries;
982 nc->batchcount = batchcount; 982 nc->batchcount = batchcount;
983 nc->touched = 0; 983 nc->touched = 0;
984 spin_lock_init(&nc->lock); 984 spin_lock_init(&nc->lock);
985 } 985 }
986 return nc; 986 return nc;
987 } 987 }
988 988
989 /* 989 /*
990 * Transfer objects in one arraycache to another. 990 * Transfer objects in one arraycache to another.
991 * Locking must be handled by the caller. 991 * Locking must be handled by the caller.
992 * 992 *
993 * Return the number of entries transferred. 993 * Return the number of entries transferred.
994 */ 994 */
995 static int transfer_objects(struct array_cache *to, 995 static int transfer_objects(struct array_cache *to,
996 struct array_cache *from, unsigned int max) 996 struct array_cache *from, unsigned int max)
997 { 997 {
998 /* Figure out how many entries to transfer */ 998 /* Figure out how many entries to transfer */
999 int nr = min(min(from->avail, max), to->limit - to->avail); 999 int nr = min(min(from->avail, max), to->limit - to->avail);
1000 1000
1001 if (!nr) 1001 if (!nr)
1002 return 0; 1002 return 0;
1003 1003
1004 memcpy(to->entry + to->avail, from->entry + from->avail -nr, 1004 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
1005 sizeof(void *) *nr); 1005 sizeof(void *) *nr);
1006 1006
1007 from->avail -= nr; 1007 from->avail -= nr;
1008 to->avail += nr; 1008 to->avail += nr;
1009 to->touched = 1; 1009 to->touched = 1;
1010 return nr; 1010 return nr;
1011 } 1011 }
1012 1012
1013 #ifndef CONFIG_NUMA 1013 #ifndef CONFIG_NUMA
1014 1014
1015 #define drain_alien_cache(cachep, alien) do { } while (0) 1015 #define drain_alien_cache(cachep, alien) do { } while (0)
1016 #define reap_alien(cachep, l3) do { } while (0) 1016 #define reap_alien(cachep, l3) do { } while (0)
1017 1017
1018 static inline struct array_cache **alloc_alien_cache(int node, int limit) 1018 static inline struct array_cache **alloc_alien_cache(int node, int limit)
1019 { 1019 {
1020 return (struct array_cache **)BAD_ALIEN_MAGIC; 1020 return (struct array_cache **)BAD_ALIEN_MAGIC;
1021 } 1021 }
1022 1022
1023 static inline void free_alien_cache(struct array_cache **ac_ptr) 1023 static inline void free_alien_cache(struct array_cache **ac_ptr)
1024 { 1024 {
1025 } 1025 }
1026 1026
1027 static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1027 static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1028 { 1028 {
1029 return 0; 1029 return 0;
1030 } 1030 }
1031 1031
1032 static inline void *alternate_node_alloc(struct kmem_cache *cachep, 1032 static inline void *alternate_node_alloc(struct kmem_cache *cachep,
1033 gfp_t flags) 1033 gfp_t flags)
1034 { 1034 {
1035 return NULL; 1035 return NULL;
1036 } 1036 }
1037 1037
1038 static inline void *____cache_alloc_node(struct kmem_cache *cachep, 1038 static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1039 gfp_t flags, int nodeid) 1039 gfp_t flags, int nodeid)
1040 { 1040 {
1041 return NULL; 1041 return NULL;
1042 } 1042 }
1043 1043
1044 #else /* CONFIG_NUMA */ 1044 #else /* CONFIG_NUMA */
1045 1045
1046 static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 1046 static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1047 static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 1047 static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1048 1048
1049 static struct array_cache **alloc_alien_cache(int node, int limit) 1049 static struct array_cache **alloc_alien_cache(int node, int limit)
1050 { 1050 {
1051 struct array_cache **ac_ptr; 1051 struct array_cache **ac_ptr;
1052 int memsize = sizeof(void *) * nr_node_ids; 1052 int memsize = sizeof(void *) * nr_node_ids;
1053 int i; 1053 int i;
1054 1054
1055 if (limit > 1) 1055 if (limit > 1)
1056 limit = 12; 1056 limit = 12;
1057 ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); 1057 ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
1058 if (ac_ptr) { 1058 if (ac_ptr) {
1059 for_each_node(i) { 1059 for_each_node(i) {
1060 if (i == node || !node_online(i)) { 1060 if (i == node || !node_online(i)) {
1061 ac_ptr[i] = NULL; 1061 ac_ptr[i] = NULL;
1062 continue; 1062 continue;
1063 } 1063 }
1064 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); 1064 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
1065 if (!ac_ptr[i]) { 1065 if (!ac_ptr[i]) {
1066 for (i--; i <= 0; i--) 1066 for (i--; i <= 0; i--)
1067 kfree(ac_ptr[i]); 1067 kfree(ac_ptr[i]);
1068 kfree(ac_ptr); 1068 kfree(ac_ptr);
1069 return NULL; 1069 return NULL;
1070 } 1070 }
1071 } 1071 }
1072 } 1072 }
1073 return ac_ptr; 1073 return ac_ptr;
1074 } 1074 }
1075 1075
1076 static void free_alien_cache(struct array_cache **ac_ptr) 1076 static void free_alien_cache(struct array_cache **ac_ptr)
1077 { 1077 {
1078 int i; 1078 int i;
1079 1079
1080 if (!ac_ptr) 1080 if (!ac_ptr)
1081 return; 1081 return;
1082 for_each_node(i) 1082 for_each_node(i)
1083 kfree(ac_ptr[i]); 1083 kfree(ac_ptr[i]);
1084 kfree(ac_ptr); 1084 kfree(ac_ptr);
1085 } 1085 }
1086 1086
1087 static void __drain_alien_cache(struct kmem_cache *cachep, 1087 static void __drain_alien_cache(struct kmem_cache *cachep,
1088 struct array_cache *ac, int node) 1088 struct array_cache *ac, int node)
1089 { 1089 {
1090 struct kmem_list3 *rl3 = cachep->nodelists[node]; 1090 struct kmem_list3 *rl3 = cachep->nodelists[node];
1091 1091
1092 if (ac->avail) { 1092 if (ac->avail) {
1093 spin_lock(&rl3->list_lock); 1093 spin_lock(&rl3->list_lock);
1094 /* 1094 /*
1095 * Stuff objects into the remote nodes shared array first. 1095 * Stuff objects into the remote nodes shared array first.
1096 * That way we could avoid the overhead of putting the objects 1096 * That way we could avoid the overhead of putting the objects
1097 * into the free lists and getting them back later. 1097 * into the free lists and getting them back later.
1098 */ 1098 */
1099 if (rl3->shared) 1099 if (rl3->shared)
1100 transfer_objects(rl3->shared, ac, ac->limit); 1100 transfer_objects(rl3->shared, ac, ac->limit);
1101 1101
1102 free_block(cachep, ac->entry, ac->avail, node); 1102 free_block(cachep, ac->entry, ac->avail, node);
1103 ac->avail = 0; 1103 ac->avail = 0;
1104 spin_unlock(&rl3->list_lock); 1104 spin_unlock(&rl3->list_lock);
1105 } 1105 }
1106 } 1106 }
1107 1107
1108 /* 1108 /*
1109 * Called from cache_reap() to regularly drain alien caches round robin. 1109 * Called from cache_reap() to regularly drain alien caches round robin.
1110 */ 1110 */
1111 static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) 1111 static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1112 { 1112 {
1113 int node = __get_cpu_var(reap_node); 1113 int node = __get_cpu_var(reap_node);
1114 1114
1115 if (l3->alien) { 1115 if (l3->alien) {
1116 struct array_cache *ac = l3->alien[node]; 1116 struct array_cache *ac = l3->alien[node];
1117 1117
1118 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { 1118 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1119 __drain_alien_cache(cachep, ac, node); 1119 __drain_alien_cache(cachep, ac, node);
1120 spin_unlock_irq(&ac->lock); 1120 spin_unlock_irq(&ac->lock);
1121 } 1121 }
1122 } 1122 }
1123 } 1123 }
1124 1124
1125 static void drain_alien_cache(struct kmem_cache *cachep, 1125 static void drain_alien_cache(struct kmem_cache *cachep,
1126 struct array_cache **alien) 1126 struct array_cache **alien)
1127 { 1127 {
1128 int i = 0; 1128 int i = 0;
1129 struct array_cache *ac; 1129 struct array_cache *ac;
1130 unsigned long flags; 1130 unsigned long flags;
1131 1131
1132 for_each_online_node(i) { 1132 for_each_online_node(i) {
1133 ac = alien[i]; 1133 ac = alien[i];
1134 if (ac) { 1134 if (ac) {
1135 spin_lock_irqsave(&ac->lock, flags); 1135 spin_lock_irqsave(&ac->lock, flags);
1136 __drain_alien_cache(cachep, ac, i); 1136 __drain_alien_cache(cachep, ac, i);
1137 spin_unlock_irqrestore(&ac->lock, flags); 1137 spin_unlock_irqrestore(&ac->lock, flags);
1138 } 1138 }
1139 } 1139 }
1140 } 1140 }
1141 1141
1142 static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1142 static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1143 { 1143 {
1144 struct slab *slabp = virt_to_slab(objp); 1144 struct slab *slabp = virt_to_slab(objp);
1145 int nodeid = slabp->nodeid; 1145 int nodeid = slabp->nodeid;
1146 struct kmem_list3 *l3; 1146 struct kmem_list3 *l3;
1147 struct array_cache *alien = NULL; 1147 struct array_cache *alien = NULL;
1148 int node; 1148 int node;
1149 1149
1150 node = numa_node_id(); 1150 node = numa_node_id();
1151 1151
1152 /* 1152 /*
1153 * Make sure we are not freeing a object from another node to the array 1153 * Make sure we are not freeing a object from another node to the array
1154 * cache on this cpu. 1154 * cache on this cpu.
1155 */ 1155 */
1156 if (likely(slabp->nodeid == node)) 1156 if (likely(slabp->nodeid == node))
1157 return 0; 1157 return 0;
1158 1158
1159 l3 = cachep->nodelists[node]; 1159 l3 = cachep->nodelists[node];
1160 STATS_INC_NODEFREES(cachep); 1160 STATS_INC_NODEFREES(cachep);
1161 if (l3->alien && l3->alien[nodeid]) { 1161 if (l3->alien && l3->alien[nodeid]) {
1162 alien = l3->alien[nodeid]; 1162 alien = l3->alien[nodeid];
1163 spin_lock(&alien->lock); 1163 spin_lock(&alien->lock);
1164 if (unlikely(alien->avail == alien->limit)) { 1164 if (unlikely(alien->avail == alien->limit)) {
1165 STATS_INC_ACOVERFLOW(cachep); 1165 STATS_INC_ACOVERFLOW(cachep);
1166 __drain_alien_cache(cachep, alien, nodeid); 1166 __drain_alien_cache(cachep, alien, nodeid);
1167 } 1167 }
1168 alien->entry[alien->avail++] = objp; 1168 alien->entry[alien->avail++] = objp;
1169 spin_unlock(&alien->lock); 1169 spin_unlock(&alien->lock);
1170 } else { 1170 } else {
1171 spin_lock(&(cachep->nodelists[nodeid])->list_lock); 1171 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1172 free_block(cachep, &objp, 1, nodeid); 1172 free_block(cachep, &objp, 1, nodeid);
1173 spin_unlock(&(cachep->nodelists[nodeid])->list_lock); 1173 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1174 } 1174 }
1175 return 1; 1175 return 1;
1176 } 1176 }
1177 #endif 1177 #endif
1178 1178
1179 static int __cpuinit cpuup_callback(struct notifier_block *nfb, 1179 static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1180 unsigned long action, void *hcpu) 1180 unsigned long action, void *hcpu)
1181 { 1181 {
1182 long cpu = (long)hcpu; 1182 long cpu = (long)hcpu;
1183 struct kmem_cache *cachep; 1183 struct kmem_cache *cachep;
1184 struct kmem_list3 *l3 = NULL; 1184 struct kmem_list3 *l3 = NULL;
1185 int node = cpu_to_node(cpu); 1185 int node = cpu_to_node(cpu);
1186 int memsize = sizeof(struct kmem_list3); 1186 int memsize = sizeof(struct kmem_list3);
1187 1187
1188 switch (action) { 1188 switch (action) {
1189 case CPU_UP_PREPARE: 1189 case CPU_UP_PREPARE:
1190 mutex_lock(&cache_chain_mutex); 1190 mutex_lock(&cache_chain_mutex);
1191 /* 1191 /*
1192 * We need to do this right in the beginning since 1192 * We need to do this right in the beginning since
1193 * alloc_arraycache's are going to use this list. 1193 * alloc_arraycache's are going to use this list.
1194 * kmalloc_node allows us to add the slab to the right 1194 * kmalloc_node allows us to add the slab to the right
1195 * kmem_list3 and not this cpu's kmem_list3 1195 * kmem_list3 and not this cpu's kmem_list3
1196 */ 1196 */
1197 1197
1198 list_for_each_entry(cachep, &cache_chain, next) { 1198 list_for_each_entry(cachep, &cache_chain, next) {
1199 /* 1199 /*
1200 * Set up the size64 kmemlist for cpu before we can 1200 * Set up the size64 kmemlist for cpu before we can
1201 * begin anything. Make sure some other cpu on this 1201 * begin anything. Make sure some other cpu on this
1202 * node has not already allocated this 1202 * node has not already allocated this
1203 */ 1203 */
1204 if (!cachep->nodelists[node]) { 1204 if (!cachep->nodelists[node]) {
1205 l3 = kmalloc_node(memsize, GFP_KERNEL, node); 1205 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1206 if (!l3) 1206 if (!l3)
1207 goto bad; 1207 goto bad;
1208 kmem_list3_init(l3); 1208 kmem_list3_init(l3);
1209 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 1209 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1210 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 1210 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1211 1211
1212 /* 1212 /*
1213 * The l3s don't come and go as CPUs come and 1213 * The l3s don't come and go as CPUs come and
1214 * go. cache_chain_mutex is sufficient 1214 * go. cache_chain_mutex is sufficient
1215 * protection here. 1215 * protection here.
1216 */ 1216 */
1217 cachep->nodelists[node] = l3; 1217 cachep->nodelists[node] = l3;
1218 } 1218 }
1219 1219
1220 spin_lock_irq(&cachep->nodelists[node]->list_lock); 1220 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1221 cachep->nodelists[node]->free_limit = 1221 cachep->nodelists[node]->free_limit =
1222 (1 + nr_cpus_node(node)) * 1222 (1 + nr_cpus_node(node)) *
1223 cachep->batchcount + cachep->num; 1223 cachep->batchcount + cachep->num;
1224 spin_unlock_irq(&cachep->nodelists[node]->list_lock); 1224 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1225 } 1225 }
1226 1226
1227 /* 1227 /*
1228 * Now we can go ahead with allocating the shared arrays and 1228 * Now we can go ahead with allocating the shared arrays and
1229 * array caches 1229 * array caches
1230 */ 1230 */
1231 list_for_each_entry(cachep, &cache_chain, next) { 1231 list_for_each_entry(cachep, &cache_chain, next) {
1232 struct array_cache *nc; 1232 struct array_cache *nc;
1233 struct array_cache *shared = NULL; 1233 struct array_cache *shared = NULL;
1234 struct array_cache **alien = NULL; 1234 struct array_cache **alien = NULL;
1235 1235
1236 nc = alloc_arraycache(node, cachep->limit, 1236 nc = alloc_arraycache(node, cachep->limit,
1237 cachep->batchcount); 1237 cachep->batchcount);
1238 if (!nc) 1238 if (!nc)
1239 goto bad; 1239 goto bad;
1240 if (cachep->shared) { 1240 if (cachep->shared) {
1241 shared = alloc_arraycache(node, 1241 shared = alloc_arraycache(node,
1242 cachep->shared * cachep->batchcount, 1242 cachep->shared * cachep->batchcount,
1243 0xbaadf00d); 1243 0xbaadf00d);
1244 if (!shared) 1244 if (!shared)
1245 goto bad; 1245 goto bad;
1246 } 1246 }
1247 if (use_alien_caches) { 1247 if (use_alien_caches) {
1248 alien = alloc_alien_cache(node, cachep->limit); 1248 alien = alloc_alien_cache(node, cachep->limit);
1249 if (!alien) 1249 if (!alien)
1250 goto bad; 1250 goto bad;
1251 } 1251 }
1252 cachep->array[cpu] = nc; 1252 cachep->array[cpu] = nc;
1253 l3 = cachep->nodelists[node]; 1253 l3 = cachep->nodelists[node];
1254 BUG_ON(!l3); 1254 BUG_ON(!l3);
1255 1255
1256 spin_lock_irq(&l3->list_lock); 1256 spin_lock_irq(&l3->list_lock);
1257 if (!l3->shared) { 1257 if (!l3->shared) {
1258 /* 1258 /*
1259 * We are serialised from CPU_DEAD or 1259 * We are serialised from CPU_DEAD or
1260 * CPU_UP_CANCELLED by the cpucontrol lock 1260 * CPU_UP_CANCELLED by the cpucontrol lock
1261 */ 1261 */
1262 l3->shared = shared; 1262 l3->shared = shared;
1263 shared = NULL; 1263 shared = NULL;
1264 } 1264 }
1265 #ifdef CONFIG_NUMA 1265 #ifdef CONFIG_NUMA
1266 if (!l3->alien) { 1266 if (!l3->alien) {
1267 l3->alien = alien; 1267 l3->alien = alien;
1268 alien = NULL; 1268 alien = NULL;
1269 } 1269 }
1270 #endif 1270 #endif
1271 spin_unlock_irq(&l3->list_lock); 1271 spin_unlock_irq(&l3->list_lock);
1272 kfree(shared); 1272 kfree(shared);
1273 free_alien_cache(alien); 1273 free_alien_cache(alien);
1274 } 1274 }
1275 break; 1275 break;
1276 case CPU_ONLINE: 1276 case CPU_ONLINE:
1277 mutex_unlock(&cache_chain_mutex); 1277 mutex_unlock(&cache_chain_mutex);
1278 start_cpu_timer(cpu); 1278 start_cpu_timer(cpu);
1279 break; 1279 break;
1280 #ifdef CONFIG_HOTPLUG_CPU 1280 #ifdef CONFIG_HOTPLUG_CPU
1281 case CPU_DOWN_PREPARE: 1281 case CPU_DOWN_PREPARE:
1282 mutex_lock(&cache_chain_mutex); 1282 mutex_lock(&cache_chain_mutex);
1283 break; 1283 break;
1284 case CPU_DOWN_FAILED: 1284 case CPU_DOWN_FAILED:
1285 mutex_unlock(&cache_chain_mutex); 1285 mutex_unlock(&cache_chain_mutex);
1286 break; 1286 break;
1287 case CPU_DEAD: 1287 case CPU_DEAD:
1288 /* 1288 /*
1289 * Even if all the cpus of a node are down, we don't free the 1289 * Even if all the cpus of a node are down, we don't free the
1290 * kmem_list3 of any cache. This to avoid a race between 1290 * kmem_list3 of any cache. This to avoid a race between
1291 * cpu_down, and a kmalloc allocation from another cpu for 1291 * cpu_down, and a kmalloc allocation from another cpu for
1292 * memory from the node of the cpu going down. The list3 1292 * memory from the node of the cpu going down. The list3
1293 * structure is usually allocated from kmem_cache_create() and 1293 * structure is usually allocated from kmem_cache_create() and
1294 * gets destroyed at kmem_cache_destroy(). 1294 * gets destroyed at kmem_cache_destroy().
1295 */ 1295 */
1296 /* fall thru */ 1296 /* fall thru */
1297 #endif 1297 #endif
1298 case CPU_UP_CANCELED: 1298 case CPU_UP_CANCELED:
1299 list_for_each_entry(cachep, &cache_chain, next) { 1299 list_for_each_entry(cachep, &cache_chain, next) {
1300 struct array_cache *nc; 1300 struct array_cache *nc;
1301 struct array_cache *shared; 1301 struct array_cache *shared;
1302 struct array_cache **alien; 1302 struct array_cache **alien;
1303 cpumask_t mask; 1303 cpumask_t mask;
1304 1304
1305 mask = node_to_cpumask(node); 1305 mask = node_to_cpumask(node);
1306 /* cpu is dead; no one can alloc from it. */ 1306 /* cpu is dead; no one can alloc from it. */
1307 nc = cachep->array[cpu]; 1307 nc = cachep->array[cpu];
1308 cachep->array[cpu] = NULL; 1308 cachep->array[cpu] = NULL;
1309 l3 = cachep->nodelists[node]; 1309 l3 = cachep->nodelists[node];
1310 1310
1311 if (!l3) 1311 if (!l3)
1312 goto free_array_cache; 1312 goto free_array_cache;
1313 1313
1314 spin_lock_irq(&l3->list_lock); 1314 spin_lock_irq(&l3->list_lock);
1315 1315
1316 /* Free limit for this kmem_list3 */ 1316 /* Free limit for this kmem_list3 */
1317 l3->free_limit -= cachep->batchcount; 1317 l3->free_limit -= cachep->batchcount;
1318 if (nc) 1318 if (nc)
1319 free_block(cachep, nc->entry, nc->avail, node); 1319 free_block(cachep, nc->entry, nc->avail, node);
1320 1320
1321 if (!cpus_empty(mask)) { 1321 if (!cpus_empty(mask)) {
1322 spin_unlock_irq(&l3->list_lock); 1322 spin_unlock_irq(&l3->list_lock);
1323 goto free_array_cache; 1323 goto free_array_cache;
1324 } 1324 }
1325 1325
1326 shared = l3->shared; 1326 shared = l3->shared;
1327 if (shared) { 1327 if (shared) {
1328 free_block(cachep, shared->entry, 1328 free_block(cachep, shared->entry,
1329 shared->avail, node); 1329 shared->avail, node);
1330 l3->shared = NULL; 1330 l3->shared = NULL;
1331 } 1331 }
1332 1332
1333 alien = l3->alien; 1333 alien = l3->alien;
1334 l3->alien = NULL; 1334 l3->alien = NULL;
1335 1335
1336 spin_unlock_irq(&l3->list_lock); 1336 spin_unlock_irq(&l3->list_lock);
1337 1337
1338 kfree(shared); 1338 kfree(shared);
1339 if (alien) { 1339 if (alien) {
1340 drain_alien_cache(cachep, alien); 1340 drain_alien_cache(cachep, alien);
1341 free_alien_cache(alien); 1341 free_alien_cache(alien);
1342 } 1342 }
1343 free_array_cache: 1343 free_array_cache:
1344 kfree(nc); 1344 kfree(nc);
1345 } 1345 }
1346 /* 1346 /*
1347 * In the previous loop, all the objects were freed to 1347 * In the previous loop, all the objects were freed to
1348 * the respective cache's slabs, now we can go ahead and 1348 * the respective cache's slabs, now we can go ahead and
1349 * shrink each nodelist to its limit. 1349 * shrink each nodelist to its limit.
1350 */ 1350 */
1351 list_for_each_entry(cachep, &cache_chain, next) { 1351 list_for_each_entry(cachep, &cache_chain, next) {
1352 l3 = cachep->nodelists[node]; 1352 l3 = cachep->nodelists[node];
1353 if (!l3) 1353 if (!l3)
1354 continue; 1354 continue;
1355 drain_freelist(cachep, l3, l3->free_objects); 1355 drain_freelist(cachep, l3, l3->free_objects);
1356 } 1356 }
1357 mutex_unlock(&cache_chain_mutex); 1357 mutex_unlock(&cache_chain_mutex);
1358 break; 1358 break;
1359 } 1359 }
1360 return NOTIFY_OK; 1360 return NOTIFY_OK;
1361 bad: 1361 bad:
1362 return NOTIFY_BAD; 1362 return NOTIFY_BAD;
1363 } 1363 }
1364 1364
1365 static struct notifier_block __cpuinitdata cpucache_notifier = { 1365 static struct notifier_block __cpuinitdata cpucache_notifier = {
1366 &cpuup_callback, NULL, 0 1366 &cpuup_callback, NULL, 0
1367 }; 1367 };
1368 1368
1369 /* 1369 /*
1370 * swap the static kmem_list3 with kmalloced memory 1370 * swap the static kmem_list3 with kmalloced memory
1371 */ 1371 */
1372 static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, 1372 static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1373 int nodeid) 1373 int nodeid)
1374 { 1374 {
1375 struct kmem_list3 *ptr; 1375 struct kmem_list3 *ptr;
1376 1376
1377 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); 1377 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
1378 BUG_ON(!ptr); 1378 BUG_ON(!ptr);
1379 1379
1380 local_irq_disable(); 1380 local_irq_disable();
1381 memcpy(ptr, list, sizeof(struct kmem_list3)); 1381 memcpy(ptr, list, sizeof(struct kmem_list3));
1382 /* 1382 /*
1383 * Do not assume that spinlocks can be initialized via memcpy: 1383 * Do not assume that spinlocks can be initialized via memcpy:
1384 */ 1384 */
1385 spin_lock_init(&ptr->list_lock); 1385 spin_lock_init(&ptr->list_lock);
1386 1386
1387 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1387 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1388 cachep->nodelists[nodeid] = ptr; 1388 cachep->nodelists[nodeid] = ptr;
1389 local_irq_enable(); 1389 local_irq_enable();
1390 } 1390 }
1391 1391
1392 /* 1392 /*
1393 * Initialisation. Called after the page allocator have been initialised and 1393 * Initialisation. Called after the page allocator have been initialised and
1394 * before smp_init(). 1394 * before smp_init().
1395 */ 1395 */
1396 void __init kmem_cache_init(void) 1396 void __init kmem_cache_init(void)
1397 { 1397 {
1398 size_t left_over; 1398 size_t left_over;
1399 struct cache_sizes *sizes; 1399 struct cache_sizes *sizes;
1400 struct cache_names *names; 1400 struct cache_names *names;
1401 int i; 1401 int i;
1402 int order; 1402 int order;
1403 int node; 1403 int node;
1404 1404
1405 if (num_possible_nodes() == 1) 1405 if (num_possible_nodes() == 1)
1406 use_alien_caches = 0; 1406 use_alien_caches = 0;
1407 1407
1408 for (i = 0; i < NUM_INIT_LISTS; i++) { 1408 for (i = 0; i < NUM_INIT_LISTS; i++) {
1409 kmem_list3_init(&initkmem_list3[i]); 1409 kmem_list3_init(&initkmem_list3[i]);
1410 if (i < MAX_NUMNODES) 1410 if (i < MAX_NUMNODES)
1411 cache_cache.nodelists[i] = NULL; 1411 cache_cache.nodelists[i] = NULL;
1412 } 1412 }
1413 1413
1414 /* 1414 /*
1415 * Fragmentation resistance on low memory - only use bigger 1415 * Fragmentation resistance on low memory - only use bigger
1416 * page orders on machines with more than 32MB of memory. 1416 * page orders on machines with more than 32MB of memory.
1417 */ 1417 */
1418 if (num_physpages > (32 << 20) >> PAGE_SHIFT) 1418 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
1419 slab_break_gfp_order = BREAK_GFP_ORDER_HI; 1419 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1420 1420
1421 /* Bootstrap is tricky, because several objects are allocated 1421 /* Bootstrap is tricky, because several objects are allocated
1422 * from caches that do not exist yet: 1422 * from caches that do not exist yet:
1423 * 1) initialize the cache_cache cache: it contains the struct 1423 * 1) initialize the cache_cache cache: it contains the struct
1424 * kmem_cache structures of all caches, except cache_cache itself: 1424 * kmem_cache structures of all caches, except cache_cache itself:
1425 * cache_cache is statically allocated. 1425 * cache_cache is statically allocated.
1426 * Initially an __init data area is used for the head array and the 1426 * Initially an __init data area is used for the head array and the
1427 * kmem_list3 structures, it's replaced with a kmalloc allocated 1427 * kmem_list3 structures, it's replaced with a kmalloc allocated
1428 * array at the end of the bootstrap. 1428 * array at the end of the bootstrap.
1429 * 2) Create the first kmalloc cache. 1429 * 2) Create the first kmalloc cache.
1430 * The struct kmem_cache for the new cache is allocated normally. 1430 * The struct kmem_cache for the new cache is allocated normally.
1431 * An __init data area is used for the head array. 1431 * An __init data area is used for the head array.
1432 * 3) Create the remaining kmalloc caches, with minimally sized 1432 * 3) Create the remaining kmalloc caches, with minimally sized
1433 * head arrays. 1433 * head arrays.
1434 * 4) Replace the __init data head arrays for cache_cache and the first 1434 * 4) Replace the __init data head arrays for cache_cache and the first
1435 * kmalloc cache with kmalloc allocated arrays. 1435 * kmalloc cache with kmalloc allocated arrays.
1436 * 5) Replace the __init data for kmem_list3 for cache_cache and 1436 * 5) Replace the __init data for kmem_list3 for cache_cache and
1437 * the other cache's with kmalloc allocated memory. 1437 * the other cache's with kmalloc allocated memory.
1438 * 6) Resize the head arrays of the kmalloc caches to their final sizes. 1438 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
1439 */ 1439 */
1440 1440
1441 node = numa_node_id(); 1441 node = numa_node_id();
1442 1442
1443 /* 1) create the cache_cache */ 1443 /* 1) create the cache_cache */
1444 INIT_LIST_HEAD(&cache_chain); 1444 INIT_LIST_HEAD(&cache_chain);
1445 list_add(&cache_cache.next, &cache_chain); 1445 list_add(&cache_cache.next, &cache_chain);
1446 cache_cache.colour_off = cache_line_size(); 1446 cache_cache.colour_off = cache_line_size();
1447 cache_cache.array[smp_processor_id()] = &initarray_cache.cache; 1447 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1448 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; 1448 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
1449 1449
1450 /* 1450 /*
1451 * struct kmem_cache size depends on nr_node_ids, which 1451 * struct kmem_cache size depends on nr_node_ids, which
1452 * can be less than MAX_NUMNODES. 1452 * can be less than MAX_NUMNODES.
1453 */ 1453 */
1454 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + 1454 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1455 nr_node_ids * sizeof(struct kmem_list3 *); 1455 nr_node_ids * sizeof(struct kmem_list3 *);
1456 #if DEBUG 1456 #if DEBUG
1457 cache_cache.obj_size = cache_cache.buffer_size; 1457 cache_cache.obj_size = cache_cache.buffer_size;
1458 #endif 1458 #endif
1459 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, 1459 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1460 cache_line_size()); 1460 cache_line_size());
1461 cache_cache.reciprocal_buffer_size = 1461 cache_cache.reciprocal_buffer_size =
1462 reciprocal_value(cache_cache.buffer_size); 1462 reciprocal_value(cache_cache.buffer_size);
1463 1463
1464 for (order = 0; order < MAX_ORDER; order++) { 1464 for (order = 0; order < MAX_ORDER; order++) {
1465 cache_estimate(order, cache_cache.buffer_size, 1465 cache_estimate(order, cache_cache.buffer_size,
1466 cache_line_size(), 0, &left_over, &cache_cache.num); 1466 cache_line_size(), 0, &left_over, &cache_cache.num);
1467 if (cache_cache.num) 1467 if (cache_cache.num)
1468 break; 1468 break;
1469 } 1469 }
1470 BUG_ON(!cache_cache.num); 1470 BUG_ON(!cache_cache.num);
1471 cache_cache.gfporder = order; 1471 cache_cache.gfporder = order;
1472 cache_cache.colour = left_over / cache_cache.colour_off; 1472 cache_cache.colour = left_over / cache_cache.colour_off;
1473 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1473 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1474 sizeof(struct slab), cache_line_size()); 1474 sizeof(struct slab), cache_line_size());
1475 1475
1476 /* 2+3) create the kmalloc caches */ 1476 /* 2+3) create the kmalloc caches */
1477 sizes = malloc_sizes; 1477 sizes = malloc_sizes;
1478 names = cache_names; 1478 names = cache_names;
1479 1479
1480 /* 1480 /*
1481 * Initialize the caches that provide memory for the array cache and the 1481 * Initialize the caches that provide memory for the array cache and the
1482 * kmem_list3 structures first. Without this, further allocations will 1482 * kmem_list3 structures first. Without this, further allocations will
1483 * bug. 1483 * bug.
1484 */ 1484 */
1485 1485
1486 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1486 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1487 sizes[INDEX_AC].cs_size, 1487 sizes[INDEX_AC].cs_size,
1488 ARCH_KMALLOC_MINALIGN, 1488 ARCH_KMALLOC_MINALIGN,
1489 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1489 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1490 NULL, NULL); 1490 NULL, NULL);
1491 1491
1492 if (INDEX_AC != INDEX_L3) { 1492 if (INDEX_AC != INDEX_L3) {
1493 sizes[INDEX_L3].cs_cachep = 1493 sizes[INDEX_L3].cs_cachep =
1494 kmem_cache_create(names[INDEX_L3].name, 1494 kmem_cache_create(names[INDEX_L3].name,
1495 sizes[INDEX_L3].cs_size, 1495 sizes[INDEX_L3].cs_size,
1496 ARCH_KMALLOC_MINALIGN, 1496 ARCH_KMALLOC_MINALIGN,
1497 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1497 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1498 NULL, NULL); 1498 NULL, NULL);
1499 } 1499 }
1500 1500
1501 slab_early_init = 0; 1501 slab_early_init = 0;
1502 1502
1503 while (sizes->cs_size != ULONG_MAX) { 1503 while (sizes->cs_size != ULONG_MAX) {
1504 /* 1504 /*
1505 * For performance, all the general caches are L1 aligned. 1505 * For performance, all the general caches are L1 aligned.
1506 * This should be particularly beneficial on SMP boxes, as it 1506 * This should be particularly beneficial on SMP boxes, as it
1507 * eliminates "false sharing". 1507 * eliminates "false sharing".
1508 * Note for systems short on memory removing the alignment will 1508 * Note for systems short on memory removing the alignment will
1509 * allow tighter packing of the smaller caches. 1509 * allow tighter packing of the smaller caches.
1510 */ 1510 */
1511 if (!sizes->cs_cachep) { 1511 if (!sizes->cs_cachep) {
1512 sizes->cs_cachep = kmem_cache_create(names->name, 1512 sizes->cs_cachep = kmem_cache_create(names->name,
1513 sizes->cs_size, 1513 sizes->cs_size,
1514 ARCH_KMALLOC_MINALIGN, 1514 ARCH_KMALLOC_MINALIGN,
1515 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1515 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1516 NULL, NULL); 1516 NULL, NULL);
1517 } 1517 }
1518 #ifdef CONFIG_ZONE_DMA 1518 #ifdef CONFIG_ZONE_DMA
1519 sizes->cs_dmacachep = kmem_cache_create( 1519 sizes->cs_dmacachep = kmem_cache_create(
1520 names->name_dma, 1520 names->name_dma,
1521 sizes->cs_size, 1521 sizes->cs_size,
1522 ARCH_KMALLOC_MINALIGN, 1522 ARCH_KMALLOC_MINALIGN,
1523 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| 1523 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1524 SLAB_PANIC, 1524 SLAB_PANIC,
1525 NULL, NULL); 1525 NULL, NULL);
1526 #endif 1526 #endif
1527 sizes++; 1527 sizes++;
1528 names++; 1528 names++;
1529 } 1529 }
1530 /* 4) Replace the bootstrap head arrays */ 1530 /* 4) Replace the bootstrap head arrays */
1531 { 1531 {
1532 struct array_cache *ptr; 1532 struct array_cache *ptr;
1533 1533
1534 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1534 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1535 1535
1536 local_irq_disable(); 1536 local_irq_disable();
1537 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1537 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1538 memcpy(ptr, cpu_cache_get(&cache_cache), 1538 memcpy(ptr, cpu_cache_get(&cache_cache),
1539 sizeof(struct arraycache_init)); 1539 sizeof(struct arraycache_init));
1540 /* 1540 /*
1541 * Do not assume that spinlocks can be initialized via memcpy: 1541 * Do not assume that spinlocks can be initialized via memcpy:
1542 */ 1542 */
1543 spin_lock_init(&ptr->lock); 1543 spin_lock_init(&ptr->lock);
1544 1544
1545 cache_cache.array[smp_processor_id()] = ptr; 1545 cache_cache.array[smp_processor_id()] = ptr;
1546 local_irq_enable(); 1546 local_irq_enable();
1547 1547
1548 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1548 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1549 1549
1550 local_irq_disable(); 1550 local_irq_disable();
1551 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) 1551 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1552 != &initarray_generic.cache); 1552 != &initarray_generic.cache);
1553 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1553 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1554 sizeof(struct arraycache_init)); 1554 sizeof(struct arraycache_init));
1555 /* 1555 /*
1556 * Do not assume that spinlocks can be initialized via memcpy: 1556 * Do not assume that spinlocks can be initialized via memcpy:
1557 */ 1557 */
1558 spin_lock_init(&ptr->lock); 1558 spin_lock_init(&ptr->lock);
1559 1559
1560 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1560 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1561 ptr; 1561 ptr;
1562 local_irq_enable(); 1562 local_irq_enable();
1563 } 1563 }
1564 /* 5) Replace the bootstrap kmem_list3's */ 1564 /* 5) Replace the bootstrap kmem_list3's */
1565 { 1565 {
1566 int nid; 1566 int nid;
1567 1567
1568 /* Replace the static kmem_list3 structures for the boot cpu */ 1568 /* Replace the static kmem_list3 structures for the boot cpu */
1569 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); 1569 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node);
1570 1570
1571 for_each_online_node(nid) { 1571 for_each_online_node(nid) {
1572 init_list(malloc_sizes[INDEX_AC].cs_cachep, 1572 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1573 &initkmem_list3[SIZE_AC + nid], nid); 1573 &initkmem_list3[SIZE_AC + nid], nid);
1574 1574
1575 if (INDEX_AC != INDEX_L3) { 1575 if (INDEX_AC != INDEX_L3) {
1576 init_list(malloc_sizes[INDEX_L3].cs_cachep, 1576 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1577 &initkmem_list3[SIZE_L3 + nid], nid); 1577 &initkmem_list3[SIZE_L3 + nid], nid);
1578 } 1578 }
1579 } 1579 }
1580 } 1580 }
1581 1581
1582 /* 6) resize the head arrays to their final sizes */ 1582 /* 6) resize the head arrays to their final sizes */
1583 { 1583 {
1584 struct kmem_cache *cachep; 1584 struct kmem_cache *cachep;
1585 mutex_lock(&cache_chain_mutex); 1585 mutex_lock(&cache_chain_mutex);
1586 list_for_each_entry(cachep, &cache_chain, next) 1586 list_for_each_entry(cachep, &cache_chain, next)
1587 if (enable_cpucache(cachep)) 1587 if (enable_cpucache(cachep))
1588 BUG(); 1588 BUG();
1589 mutex_unlock(&cache_chain_mutex); 1589 mutex_unlock(&cache_chain_mutex);
1590 } 1590 }
1591 1591
1592 /* Annotate slab for lockdep -- annotate the malloc caches */ 1592 /* Annotate slab for lockdep -- annotate the malloc caches */
1593 init_lock_keys(); 1593 init_lock_keys();
1594 1594
1595 1595
1596 /* Done! */ 1596 /* Done! */
1597 g_cpucache_up = FULL; 1597 g_cpucache_up = FULL;
1598 1598
1599 /* 1599 /*
1600 * Register a cpu startup notifier callback that initializes 1600 * Register a cpu startup notifier callback that initializes
1601 * cpu_cache_get for all new cpus 1601 * cpu_cache_get for all new cpus
1602 */ 1602 */
1603 register_cpu_notifier(&cpucache_notifier); 1603 register_cpu_notifier(&cpucache_notifier);
1604 1604
1605 /* 1605 /*
1606 * The reap timers are started later, with a module init call: That part 1606 * The reap timers are started later, with a module init call: That part
1607 * of the kernel is not yet operational. 1607 * of the kernel is not yet operational.
1608 */ 1608 */
1609 } 1609 }
1610 1610
1611 static int __init cpucache_init(void) 1611 static int __init cpucache_init(void)
1612 { 1612 {
1613 int cpu; 1613 int cpu;
1614 1614
1615 /* 1615 /*
1616 * Register the timers that return unneeded pages to the page allocator 1616 * Register the timers that return unneeded pages to the page allocator
1617 */ 1617 */
1618 for_each_online_cpu(cpu) 1618 for_each_online_cpu(cpu)
1619 start_cpu_timer(cpu); 1619 start_cpu_timer(cpu);
1620 return 0; 1620 return 0;
1621 } 1621 }
1622 __initcall(cpucache_init); 1622 __initcall(cpucache_init);
1623 1623
1624 /* 1624 /*
1625 * Interface to system's page allocator. No need to hold the cache-lock. 1625 * Interface to system's page allocator. No need to hold the cache-lock.
1626 * 1626 *
1627 * If we requested dmaable memory, we will get it. Even if we 1627 * If we requested dmaable memory, we will get it. Even if we
1628 * did not request dmaable memory, we might get it, but that 1628 * did not request dmaable memory, we might get it, but that
1629 * would be relatively rare and ignorable. 1629 * would be relatively rare and ignorable.
1630 */ 1630 */
1631 static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) 1631 static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1632 { 1632 {
1633 struct page *page; 1633 struct page *page;
1634 int nr_pages; 1634 int nr_pages;
1635 int i; 1635 int i;
1636 1636
1637 #ifndef CONFIG_MMU 1637 #ifndef CONFIG_MMU
1638 /* 1638 /*
1639 * Nommu uses slab's for process anonymous memory allocations, and thus 1639 * Nommu uses slab's for process anonymous memory allocations, and thus
1640 * requires __GFP_COMP to properly refcount higher order allocations 1640 * requires __GFP_COMP to properly refcount higher order allocations
1641 */ 1641 */
1642 flags |= __GFP_COMP; 1642 flags |= __GFP_COMP;
1643 #endif 1643 #endif
1644 1644
1645 flags |= cachep->gfpflags; 1645 flags |= cachep->gfpflags;
1646 1646
1647 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1647 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1648 if (!page) 1648 if (!page)
1649 return NULL; 1649 return NULL;
1650 1650
1651 nr_pages = (1 << cachep->gfporder); 1651 nr_pages = (1 << cachep->gfporder);
1652 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1652 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1653 add_zone_page_state(page_zone(page), 1653 add_zone_page_state(page_zone(page),
1654 NR_SLAB_RECLAIMABLE, nr_pages); 1654 NR_SLAB_RECLAIMABLE, nr_pages);
1655 else 1655 else
1656 add_zone_page_state(page_zone(page), 1656 add_zone_page_state(page_zone(page),
1657 NR_SLAB_UNRECLAIMABLE, nr_pages); 1657 NR_SLAB_UNRECLAIMABLE, nr_pages);
1658 for (i = 0; i < nr_pages; i++) 1658 for (i = 0; i < nr_pages; i++)
1659 __SetPageSlab(page + i); 1659 __SetPageSlab(page + i);
1660 return page_address(page); 1660 return page_address(page);
1661 } 1661 }
1662 1662
1663 /* 1663 /*
1664 * Interface to system's page release. 1664 * Interface to system's page release.
1665 */ 1665 */
1666 static void kmem_freepages(struct kmem_cache *cachep, void *addr) 1666 static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1667 { 1667 {
1668 unsigned long i = (1 << cachep->gfporder); 1668 unsigned long i = (1 << cachep->gfporder);
1669 struct page *page = virt_to_page(addr); 1669 struct page *page = virt_to_page(addr);
1670 const unsigned long nr_freed = i; 1670 const unsigned long nr_freed = i;
1671 1671
1672 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1672 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1673 sub_zone_page_state(page_zone(page), 1673 sub_zone_page_state(page_zone(page),
1674 NR_SLAB_RECLAIMABLE, nr_freed); 1674 NR_SLAB_RECLAIMABLE, nr_freed);
1675 else 1675 else
1676 sub_zone_page_state(page_zone(page), 1676 sub_zone_page_state(page_zone(page),
1677 NR_SLAB_UNRECLAIMABLE, nr_freed); 1677 NR_SLAB_UNRECLAIMABLE, nr_freed);
1678 while (i--) { 1678 while (i--) {
1679 BUG_ON(!PageSlab(page)); 1679 BUG_ON(!PageSlab(page));
1680 __ClearPageSlab(page); 1680 __ClearPageSlab(page);
1681 page++; 1681 page++;
1682 } 1682 }
1683 if (current->reclaim_state) 1683 if (current->reclaim_state)
1684 current->reclaim_state->reclaimed_slab += nr_freed; 1684 current->reclaim_state->reclaimed_slab += nr_freed;
1685 free_pages((unsigned long)addr, cachep->gfporder); 1685 free_pages((unsigned long)addr, cachep->gfporder);
1686 } 1686 }
1687 1687
1688 static void kmem_rcu_free(struct rcu_head *head) 1688 static void kmem_rcu_free(struct rcu_head *head)
1689 { 1689 {
1690 struct slab_rcu *slab_rcu = (struct slab_rcu *)head; 1690 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1691 struct kmem_cache *cachep = slab_rcu->cachep; 1691 struct kmem_cache *cachep = slab_rcu->cachep;
1692 1692
1693 kmem_freepages(cachep, slab_rcu->addr); 1693 kmem_freepages(cachep, slab_rcu->addr);
1694 if (OFF_SLAB(cachep)) 1694 if (OFF_SLAB(cachep))
1695 kmem_cache_free(cachep->slabp_cache, slab_rcu); 1695 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1696 } 1696 }
1697 1697
1698 #if DEBUG 1698 #if DEBUG
1699 1699
1700 #ifdef CONFIG_DEBUG_PAGEALLOC 1700 #ifdef CONFIG_DEBUG_PAGEALLOC
1701 static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, 1701 static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1702 unsigned long caller) 1702 unsigned long caller)
1703 { 1703 {
1704 int size = obj_size(cachep); 1704 int size = obj_size(cachep);
1705 1705
1706 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; 1706 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1707 1707
1708 if (size < 5 * sizeof(unsigned long)) 1708 if (size < 5 * sizeof(unsigned long))
1709 return; 1709 return;
1710 1710
1711 *addr++ = 0x12345678; 1711 *addr++ = 0x12345678;
1712 *addr++ = caller; 1712 *addr++ = caller;
1713 *addr++ = smp_processor_id(); 1713 *addr++ = smp_processor_id();
1714 size -= 3 * sizeof(unsigned long); 1714 size -= 3 * sizeof(unsigned long);
1715 { 1715 {
1716 unsigned long *sptr = &caller; 1716 unsigned long *sptr = &caller;
1717 unsigned long svalue; 1717 unsigned long svalue;
1718 1718
1719 while (!kstack_end(sptr)) { 1719 while (!kstack_end(sptr)) {
1720 svalue = *sptr++; 1720 svalue = *sptr++;
1721 if (kernel_text_address(svalue)) { 1721 if (kernel_text_address(svalue)) {
1722 *addr++ = svalue; 1722 *addr++ = svalue;
1723 size -= sizeof(unsigned long); 1723 size -= sizeof(unsigned long);
1724 if (size <= sizeof(unsigned long)) 1724 if (size <= sizeof(unsigned long))
1725 break; 1725 break;
1726 } 1726 }
1727 } 1727 }
1728 1728
1729 } 1729 }
1730 *addr++ = 0x87654321; 1730 *addr++ = 0x87654321;
1731 } 1731 }
1732 #endif 1732 #endif
1733 1733
1734 static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) 1734 static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1735 { 1735 {
1736 int size = obj_size(cachep); 1736 int size = obj_size(cachep);
1737 addr = &((char *)addr)[obj_offset(cachep)]; 1737 addr = &((char *)addr)[obj_offset(cachep)];
1738 1738
1739 memset(addr, val, size); 1739 memset(addr, val, size);
1740 *(unsigned char *)(addr + size - 1) = POISON_END; 1740 *(unsigned char *)(addr + size - 1) = POISON_END;
1741 } 1741 }
1742 1742
1743 static void dump_line(char *data, int offset, int limit) 1743 static void dump_line(char *data, int offset, int limit)
1744 { 1744 {
1745 int i; 1745 int i;
1746 unsigned char error = 0; 1746 unsigned char error = 0;
1747 int bad_count = 0; 1747 int bad_count = 0;
1748 1748
1749 printk(KERN_ERR "%03x:", offset); 1749 printk(KERN_ERR "%03x:", offset);
1750 for (i = 0; i < limit; i++) { 1750 for (i = 0; i < limit; i++) {
1751 if (data[offset + i] != POISON_FREE) { 1751 if (data[offset + i] != POISON_FREE) {
1752 error = data[offset + i]; 1752 error = data[offset + i];
1753 bad_count++; 1753 bad_count++;
1754 } 1754 }
1755 printk(" %02x", (unsigned char)data[offset + i]); 1755 printk(" %02x", (unsigned char)data[offset + i]);
1756 } 1756 }
1757 printk("\n"); 1757 printk("\n");
1758 1758
1759 if (bad_count == 1) { 1759 if (bad_count == 1) {
1760 error ^= POISON_FREE; 1760 error ^= POISON_FREE;
1761 if (!(error & (error - 1))) { 1761 if (!(error & (error - 1))) {
1762 printk(KERN_ERR "Single bit error detected. Probably " 1762 printk(KERN_ERR "Single bit error detected. Probably "
1763 "bad RAM.\n"); 1763 "bad RAM.\n");
1764 #ifdef CONFIG_X86 1764 #ifdef CONFIG_X86
1765 printk(KERN_ERR "Run memtest86+ or a similar memory " 1765 printk(KERN_ERR "Run memtest86+ or a similar memory "
1766 "test tool.\n"); 1766 "test tool.\n");
1767 #else 1767 #else
1768 printk(KERN_ERR "Run a memory test tool.\n"); 1768 printk(KERN_ERR "Run a memory test tool.\n");
1769 #endif 1769 #endif
1770 } 1770 }
1771 } 1771 }
1772 } 1772 }
1773 #endif 1773 #endif
1774 1774
1775 #if DEBUG 1775 #if DEBUG
1776 1776
1777 static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) 1777 static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1778 { 1778 {
1779 int i, size; 1779 int i, size;
1780 char *realobj; 1780 char *realobj;
1781 1781
1782 if (cachep->flags & SLAB_RED_ZONE) { 1782 if (cachep->flags & SLAB_RED_ZONE) {
1783 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n", 1783 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1784 *dbg_redzone1(cachep, objp), 1784 *dbg_redzone1(cachep, objp),
1785 *dbg_redzone2(cachep, objp)); 1785 *dbg_redzone2(cachep, objp));
1786 } 1786 }
1787 1787
1788 if (cachep->flags & SLAB_STORE_USER) { 1788 if (cachep->flags & SLAB_STORE_USER) {
1789 printk(KERN_ERR "Last user: [<%p>]", 1789 printk(KERN_ERR "Last user: [<%p>]",
1790 *dbg_userword(cachep, objp)); 1790 *dbg_userword(cachep, objp));
1791 print_symbol("(%s)", 1791 print_symbol("(%s)",
1792 (unsigned long)*dbg_userword(cachep, objp)); 1792 (unsigned long)*dbg_userword(cachep, objp));
1793 printk("\n"); 1793 printk("\n");
1794 } 1794 }
1795 realobj = (char *)objp + obj_offset(cachep); 1795 realobj = (char *)objp + obj_offset(cachep);
1796 size = obj_size(cachep); 1796 size = obj_size(cachep);
1797 for (i = 0; i < size && lines; i += 16, lines--) { 1797 for (i = 0; i < size && lines; i += 16, lines--) {
1798 int limit; 1798 int limit;
1799 limit = 16; 1799 limit = 16;
1800 if (i + limit > size) 1800 if (i + limit > size)
1801 limit = size - i; 1801 limit = size - i;
1802 dump_line(realobj, i, limit); 1802 dump_line(realobj, i, limit);
1803 } 1803 }
1804 } 1804 }
1805 1805
1806 static void check_poison_obj(struct kmem_cache *cachep, void *objp) 1806 static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1807 { 1807 {
1808 char *realobj; 1808 char *realobj;
1809 int size, i; 1809 int size, i;
1810 int lines = 0; 1810 int lines = 0;
1811 1811
1812 realobj = (char *)objp + obj_offset(cachep); 1812 realobj = (char *)objp + obj_offset(cachep);
1813 size = obj_size(cachep); 1813 size = obj_size(cachep);
1814 1814
1815 for (i = 0; i < size; i++) { 1815 for (i = 0; i < size; i++) {
1816 char exp = POISON_FREE; 1816 char exp = POISON_FREE;
1817 if (i == size - 1) 1817 if (i == size - 1)
1818 exp = POISON_END; 1818 exp = POISON_END;
1819 if (realobj[i] != exp) { 1819 if (realobj[i] != exp) {
1820 int limit; 1820 int limit;
1821 /* Mismatch ! */ 1821 /* Mismatch ! */
1822 /* Print header */ 1822 /* Print header */
1823 if (lines == 0) { 1823 if (lines == 0) {
1824 printk(KERN_ERR 1824 printk(KERN_ERR
1825 "Slab corruption: %s start=%p, len=%d\n", 1825 "Slab corruption: %s start=%p, len=%d\n",
1826 cachep->name, realobj, size); 1826 cachep->name, realobj, size);
1827 print_objinfo(cachep, objp, 0); 1827 print_objinfo(cachep, objp, 0);
1828 } 1828 }
1829 /* Hexdump the affected line */ 1829 /* Hexdump the affected line */
1830 i = (i / 16) * 16; 1830 i = (i / 16) * 16;
1831 limit = 16; 1831 limit = 16;
1832 if (i + limit > size) 1832 if (i + limit > size)
1833 limit = size - i; 1833 limit = size - i;
1834 dump_line(realobj, i, limit); 1834 dump_line(realobj, i, limit);
1835 i += 16; 1835 i += 16;
1836 lines++; 1836 lines++;
1837 /* Limit to 5 lines */ 1837 /* Limit to 5 lines */
1838 if (lines > 5) 1838 if (lines > 5)
1839 break; 1839 break;
1840 } 1840 }
1841 } 1841 }
1842 if (lines != 0) { 1842 if (lines != 0) {
1843 /* Print some data about the neighboring objects, if they 1843 /* Print some data about the neighboring objects, if they
1844 * exist: 1844 * exist:
1845 */ 1845 */
1846 struct slab *slabp = virt_to_slab(objp); 1846 struct slab *slabp = virt_to_slab(objp);
1847 unsigned int objnr; 1847 unsigned int objnr;
1848 1848
1849 objnr = obj_to_index(cachep, slabp, objp); 1849 objnr = obj_to_index(cachep, slabp, objp);
1850 if (objnr) { 1850 if (objnr) {
1851 objp = index_to_obj(cachep, slabp, objnr - 1); 1851 objp = index_to_obj(cachep, slabp, objnr - 1);
1852 realobj = (char *)objp + obj_offset(cachep); 1852 realobj = (char *)objp + obj_offset(cachep);
1853 printk(KERN_ERR "Prev obj: start=%p, len=%d\n", 1853 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1854 realobj, size); 1854 realobj, size);
1855 print_objinfo(cachep, objp, 2); 1855 print_objinfo(cachep, objp, 2);
1856 } 1856 }
1857 if (objnr + 1 < cachep->num) { 1857 if (objnr + 1 < cachep->num) {
1858 objp = index_to_obj(cachep, slabp, objnr + 1); 1858 objp = index_to_obj(cachep, slabp, objnr + 1);
1859 realobj = (char *)objp + obj_offset(cachep); 1859 realobj = (char *)objp + obj_offset(cachep);
1860 printk(KERN_ERR "Next obj: start=%p, len=%d\n", 1860 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1861 realobj, size); 1861 realobj, size);
1862 print_objinfo(cachep, objp, 2); 1862 print_objinfo(cachep, objp, 2);
1863 } 1863 }
1864 } 1864 }
1865 } 1865 }
1866 #endif 1866 #endif
1867 1867
1868 #if DEBUG 1868 #if DEBUG
1869 /** 1869 /**
1870 * slab_destroy_objs - destroy a slab and its objects 1870 * slab_destroy_objs - destroy a slab and its objects
1871 * @cachep: cache pointer being destroyed 1871 * @cachep: cache pointer being destroyed
1872 * @slabp: slab pointer being destroyed 1872 * @slabp: slab pointer being destroyed
1873 * 1873 *
1874 * Call the registered destructor for each object in a slab that is being 1874 * Call the registered destructor for each object in a slab that is being
1875 * destroyed. 1875 * destroyed.
1876 */ 1876 */
1877 static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) 1877 static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1878 { 1878 {
1879 int i; 1879 int i;
1880 for (i = 0; i < cachep->num; i++) { 1880 for (i = 0; i < cachep->num; i++) {
1881 void *objp = index_to_obj(cachep, slabp, i); 1881 void *objp = index_to_obj(cachep, slabp, i);
1882 1882
1883 if (cachep->flags & SLAB_POISON) { 1883 if (cachep->flags & SLAB_POISON) {
1884 #ifdef CONFIG_DEBUG_PAGEALLOC 1884 #ifdef CONFIG_DEBUG_PAGEALLOC
1885 if (cachep->buffer_size % PAGE_SIZE == 0 && 1885 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1886 OFF_SLAB(cachep)) 1886 OFF_SLAB(cachep))
1887 kernel_map_pages(virt_to_page(objp), 1887 kernel_map_pages(virt_to_page(objp),
1888 cachep->buffer_size / PAGE_SIZE, 1); 1888 cachep->buffer_size / PAGE_SIZE, 1);
1889 else 1889 else
1890 check_poison_obj(cachep, objp); 1890 check_poison_obj(cachep, objp);
1891 #else 1891 #else
1892 check_poison_obj(cachep, objp); 1892 check_poison_obj(cachep, objp);
1893 #endif 1893 #endif
1894 } 1894 }
1895 if (cachep->flags & SLAB_RED_ZONE) { 1895 if (cachep->flags & SLAB_RED_ZONE) {
1896 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 1896 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1897 slab_error(cachep, "start of a freed object " 1897 slab_error(cachep, "start of a freed object "
1898 "was overwritten"); 1898 "was overwritten");
1899 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 1899 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1900 slab_error(cachep, "end of a freed object " 1900 slab_error(cachep, "end of a freed object "
1901 "was overwritten"); 1901 "was overwritten");
1902 } 1902 }
1903 if (cachep->dtor && !(cachep->flags & SLAB_POISON)) 1903 if (cachep->dtor && !(cachep->flags & SLAB_POISON))
1904 (cachep->dtor) (objp + obj_offset(cachep), cachep, 0); 1904 (cachep->dtor) (objp + obj_offset(cachep), cachep, 0);
1905 } 1905 }
1906 } 1906 }
1907 #else 1907 #else
1908 static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) 1908 static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1909 { 1909 {
1910 if (cachep->dtor) { 1910 if (cachep->dtor) {
1911 int i; 1911 int i;
1912 for (i = 0; i < cachep->num; i++) { 1912 for (i = 0; i < cachep->num; i++) {
1913 void *objp = index_to_obj(cachep, slabp, i); 1913 void *objp = index_to_obj(cachep, slabp, i);
1914 (cachep->dtor) (objp, cachep, 0); 1914 (cachep->dtor) (objp, cachep, 0);
1915 } 1915 }
1916 } 1916 }
1917 } 1917 }
1918 #endif 1918 #endif
1919 1919
1920 /** 1920 /**
1921 * slab_destroy - destroy and release all objects in a slab 1921 * slab_destroy - destroy and release all objects in a slab
1922 * @cachep: cache pointer being destroyed 1922 * @cachep: cache pointer being destroyed
1923 * @slabp: slab pointer being destroyed 1923 * @slabp: slab pointer being destroyed
1924 * 1924 *
1925 * Destroy all the objs in a slab, and release the mem back to the system. 1925 * Destroy all the objs in a slab, and release the mem back to the system.
1926 * Before calling the slab must have been unlinked from the cache. The 1926 * Before calling the slab must have been unlinked from the cache. The
1927 * cache-lock is not held/needed. 1927 * cache-lock is not held/needed.
1928 */ 1928 */
1929 static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) 1929 static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1930 { 1930 {
1931 void *addr = slabp->s_mem - slabp->colouroff; 1931 void *addr = slabp->s_mem - slabp->colouroff;
1932 1932
1933 slab_destroy_objs(cachep, slabp); 1933 slab_destroy_objs(cachep, slabp);
1934 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { 1934 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1935 struct slab_rcu *slab_rcu; 1935 struct slab_rcu *slab_rcu;
1936 1936
1937 slab_rcu = (struct slab_rcu *)slabp; 1937 slab_rcu = (struct slab_rcu *)slabp;
1938 slab_rcu->cachep = cachep; 1938 slab_rcu->cachep = cachep;
1939 slab_rcu->addr = addr; 1939 slab_rcu->addr = addr;
1940 call_rcu(&slab_rcu->head, kmem_rcu_free); 1940 call_rcu(&slab_rcu->head, kmem_rcu_free);
1941 } else { 1941 } else {
1942 kmem_freepages(cachep, addr); 1942 kmem_freepages(cachep, addr);
1943 if (OFF_SLAB(cachep)) 1943 if (OFF_SLAB(cachep))
1944 kmem_cache_free(cachep->slabp_cache, slabp); 1944 kmem_cache_free(cachep->slabp_cache, slabp);
1945 } 1945 }
1946 } 1946 }
1947 1947
1948 /* 1948 /*
1949 * For setting up all the kmem_list3s for cache whose buffer_size is same as 1949 * For setting up all the kmem_list3s for cache whose buffer_size is same as
1950 * size of kmem_list3. 1950 * size of kmem_list3.
1951 */ 1951 */
1952 static void __init set_up_list3s(struct kmem_cache *cachep, int index) 1952 static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1953 { 1953 {
1954 int node; 1954 int node;
1955 1955
1956 for_each_online_node(node) { 1956 for_each_online_node(node) {
1957 cachep->nodelists[node] = &initkmem_list3[index + node]; 1957 cachep->nodelists[node] = &initkmem_list3[index + node];
1958 cachep->nodelists[node]->next_reap = jiffies + 1958 cachep->nodelists[node]->next_reap = jiffies +
1959 REAPTIMEOUT_LIST3 + 1959 REAPTIMEOUT_LIST3 +
1960 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 1960 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1961 } 1961 }
1962 } 1962 }
1963 1963
1964 static void __kmem_cache_destroy(struct kmem_cache *cachep) 1964 static void __kmem_cache_destroy(struct kmem_cache *cachep)
1965 { 1965 {
1966 int i; 1966 int i;
1967 struct kmem_list3 *l3; 1967 struct kmem_list3 *l3;
1968 1968
1969 for_each_online_cpu(i) 1969 for_each_online_cpu(i)
1970 kfree(cachep->array[i]); 1970 kfree(cachep->array[i]);
1971 1971
1972 /* NUMA: free the list3 structures */ 1972 /* NUMA: free the list3 structures */
1973 for_each_online_node(i) { 1973 for_each_online_node(i) {
1974 l3 = cachep->nodelists[i]; 1974 l3 = cachep->nodelists[i];
1975 if (l3) { 1975 if (l3) {
1976 kfree(l3->shared); 1976 kfree(l3->shared);
1977 free_alien_cache(l3->alien); 1977 free_alien_cache(l3->alien);
1978 kfree(l3); 1978 kfree(l3);
1979 } 1979 }
1980 } 1980 }
1981 kmem_cache_free(&cache_cache, cachep); 1981 kmem_cache_free(&cache_cache, cachep);
1982 } 1982 }
1983 1983
1984 1984
1985 /** 1985 /**
1986 * calculate_slab_order - calculate size (page order) of slabs 1986 * calculate_slab_order - calculate size (page order) of slabs
1987 * @cachep: pointer to the cache that is being created 1987 * @cachep: pointer to the cache that is being created
1988 * @size: size of objects to be created in this cache. 1988 * @size: size of objects to be created in this cache.
1989 * @align: required alignment for the objects. 1989 * @align: required alignment for the objects.
1990 * @flags: slab allocation flags 1990 * @flags: slab allocation flags
1991 * 1991 *
1992 * Also calculates the number of objects per slab. 1992 * Also calculates the number of objects per slab.
1993 * 1993 *
1994 * This could be made much more intelligent. For now, try to avoid using 1994 * This could be made much more intelligent. For now, try to avoid using
1995 * high order pages for slabs. When the gfp() functions are more friendly 1995 * high order pages for slabs. When the gfp() functions are more friendly
1996 * towards high-order requests, this should be changed. 1996 * towards high-order requests, this should be changed.
1997 */ 1997 */
1998 static size_t calculate_slab_order(struct kmem_cache *cachep, 1998 static size_t calculate_slab_order(struct kmem_cache *cachep,
1999 size_t size, size_t align, unsigned long flags) 1999 size_t size, size_t align, unsigned long flags)
2000 { 2000 {
2001 unsigned long offslab_limit; 2001 unsigned long offslab_limit;
2002 size_t left_over = 0; 2002 size_t left_over = 0;
2003 int gfporder; 2003 int gfporder;
2004 2004
2005 for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { 2005 for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) {
2006 unsigned int num; 2006 unsigned int num;
2007 size_t remainder; 2007 size_t remainder;
2008 2008
2009 cache_estimate(gfporder, size, align, flags, &remainder, &num); 2009 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2010 if (!num) 2010 if (!num)
2011 continue; 2011 continue;
2012 2012
2013 if (flags & CFLGS_OFF_SLAB) { 2013 if (flags & CFLGS_OFF_SLAB) {
2014 /* 2014 /*
2015 * Max number of objs-per-slab for caches which 2015 * Max number of objs-per-slab for caches which
2016 * use off-slab slabs. Needed to avoid a possible 2016 * use off-slab slabs. Needed to avoid a possible
2017 * looping condition in cache_grow(). 2017 * looping condition in cache_grow().
2018 */ 2018 */
2019 offslab_limit = size - sizeof(struct slab); 2019 offslab_limit = size - sizeof(struct slab);
2020 offslab_limit /= sizeof(kmem_bufctl_t); 2020 offslab_limit /= sizeof(kmem_bufctl_t);
2021 2021
2022 if (num > offslab_limit) 2022 if (num > offslab_limit)
2023 break; 2023 break;
2024 } 2024 }
2025 2025
2026 /* Found something acceptable - save it away */ 2026 /* Found something acceptable - save it away */
2027 cachep->num = num; 2027 cachep->num = num;
2028 cachep->gfporder = gfporder; 2028 cachep->gfporder = gfporder;
2029 left_over = remainder; 2029 left_over = remainder;
2030 2030
2031 /* 2031 /*
2032 * A VFS-reclaimable slab tends to have most allocations 2032 * A VFS-reclaimable slab tends to have most allocations
2033 * as GFP_NOFS and we really don't want to have to be allocating 2033 * as GFP_NOFS and we really don't want to have to be allocating
2034 * higher-order pages when we are unable to shrink dcache. 2034 * higher-order pages when we are unable to shrink dcache.
2035 */ 2035 */
2036 if (flags & SLAB_RECLAIM_ACCOUNT) 2036 if (flags & SLAB_RECLAIM_ACCOUNT)
2037 break; 2037 break;
2038 2038
2039 /* 2039 /*
2040 * Large number of objects is good, but very large slabs are 2040 * Large number of objects is good, but very large slabs are
2041 * currently bad for the gfp()s. 2041 * currently bad for the gfp()s.
2042 */ 2042 */
2043 if (gfporder >= slab_break_gfp_order) 2043 if (gfporder >= slab_break_gfp_order)
2044 break; 2044 break;
2045 2045
2046 /* 2046 /*
2047 * Acceptable internal fragmentation? 2047 * Acceptable internal fragmentation?
2048 */ 2048 */
2049 if (left_over * 8 <= (PAGE_SIZE << gfporder)) 2049 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2050 break; 2050 break;
2051 } 2051 }
2052 return left_over; 2052 return left_over;
2053 } 2053 }
2054 2054
2055 static int setup_cpu_cache(struct kmem_cache *cachep) 2055 static int setup_cpu_cache(struct kmem_cache *cachep)
2056 { 2056 {
2057 if (g_cpucache_up == FULL) 2057 if (g_cpucache_up == FULL)
2058 return enable_cpucache(cachep); 2058 return enable_cpucache(cachep);
2059 2059
2060 if (g_cpucache_up == NONE) { 2060 if (g_cpucache_up == NONE) {
2061 /* 2061 /*
2062 * Note: the first kmem_cache_create must create the cache 2062 * Note: the first kmem_cache_create must create the cache
2063 * that's used by kmalloc(24), otherwise the creation of 2063 * that's used by kmalloc(24), otherwise the creation of
2064 * further caches will BUG(). 2064 * further caches will BUG().
2065 */ 2065 */
2066 cachep->array[smp_processor_id()] = &initarray_generic.cache; 2066 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2067 2067
2068 /* 2068 /*
2069 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is 2069 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
2070 * the first cache, then we need to set up all its list3s, 2070 * the first cache, then we need to set up all its list3s,
2071 * otherwise the creation of further caches will BUG(). 2071 * otherwise the creation of further caches will BUG().
2072 */ 2072 */
2073 set_up_list3s(cachep, SIZE_AC); 2073 set_up_list3s(cachep, SIZE_AC);
2074 if (INDEX_AC == INDEX_L3) 2074 if (INDEX_AC == INDEX_L3)
2075 g_cpucache_up = PARTIAL_L3; 2075 g_cpucache_up = PARTIAL_L3;
2076 else 2076 else
2077 g_cpucache_up = PARTIAL_AC; 2077 g_cpucache_up = PARTIAL_AC;
2078 } else { 2078 } else {
2079 cachep->array[smp_processor_id()] = 2079 cachep->array[smp_processor_id()] =
2080 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 2080 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
2081 2081
2082 if (g_cpucache_up == PARTIAL_AC) { 2082 if (g_cpucache_up == PARTIAL_AC) {
2083 set_up_list3s(cachep, SIZE_L3); 2083 set_up_list3s(cachep, SIZE_L3);
2084 g_cpucache_up = PARTIAL_L3; 2084 g_cpucache_up = PARTIAL_L3;
2085 } else { 2085 } else {
2086 int node; 2086 int node;
2087 for_each_online_node(node) { 2087 for_each_online_node(node) {
2088 cachep->nodelists[node] = 2088 cachep->nodelists[node] =
2089 kmalloc_node(sizeof(struct kmem_list3), 2089 kmalloc_node(sizeof(struct kmem_list3),
2090 GFP_KERNEL, node); 2090 GFP_KERNEL, node);
2091 BUG_ON(!cachep->nodelists[node]); 2091 BUG_ON(!cachep->nodelists[node]);
2092 kmem_list3_init(cachep->nodelists[node]); 2092 kmem_list3_init(cachep->nodelists[node]);
2093 } 2093 }
2094 } 2094 }
2095 } 2095 }
2096 cachep->nodelists[numa_node_id()]->next_reap = 2096 cachep->nodelists[numa_node_id()]->next_reap =
2097 jiffies + REAPTIMEOUT_LIST3 + 2097 jiffies + REAPTIMEOUT_LIST3 +
2098 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 2098 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2099 2099
2100 cpu_cache_get(cachep)->avail = 0; 2100 cpu_cache_get(cachep)->avail = 0;
2101 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; 2101 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2102 cpu_cache_get(cachep)->batchcount = 1; 2102 cpu_cache_get(cachep)->batchcount = 1;
2103 cpu_cache_get(cachep)->touched = 0; 2103 cpu_cache_get(cachep)->touched = 0;
2104 cachep->batchcount = 1; 2104 cachep->batchcount = 1;
2105 cachep->limit = BOOT_CPUCACHE_ENTRIES; 2105 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2106 return 0; 2106 return 0;
2107 } 2107 }
2108 2108
2109 /** 2109 /**
2110 * kmem_cache_create - Create a cache. 2110 * kmem_cache_create - Create a cache.
2111 * @name: A string which is used in /proc/slabinfo to identify this cache. 2111 * @name: A string which is used in /proc/slabinfo to identify this cache.
2112 * @size: The size of objects to be created in this cache. 2112 * @size: The size of objects to be created in this cache.
2113 * @align: The required alignment for the objects. 2113 * @align: The required alignment for the objects.
2114 * @flags: SLAB flags 2114 * @flags: SLAB flags
2115 * @ctor: A constructor for the objects. 2115 * @ctor: A constructor for the objects.
2116 * @dtor: A destructor for the objects. 2116 * @dtor: A destructor for the objects.
2117 * 2117 *
2118 * Returns a ptr to the cache on success, NULL on failure. 2118 * Returns a ptr to the cache on success, NULL on failure.
2119 * Cannot be called within a int, but can be interrupted. 2119 * Cannot be called within a int, but can be interrupted.
2120 * The @ctor is run when new pages are allocated by the cache 2120 * The @ctor is run when new pages are allocated by the cache
2121 * and the @dtor is run before the pages are handed back. 2121 * and the @dtor is run before the pages are handed back.
2122 * 2122 *
2123 * @name must be valid until the cache is destroyed. This implies that 2123 * @name must be valid until the cache is destroyed. This implies that
2124 * the module calling this has to destroy the cache before getting unloaded. 2124 * the module calling this has to destroy the cache before getting unloaded.
2125 * 2125 *
2126 * The flags are 2126 * The flags are
2127 * 2127 *
2128 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 2128 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
2129 * to catch references to uninitialised memory. 2129 * to catch references to uninitialised memory.
2130 * 2130 *
2131 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 2131 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
2132 * for buffer overruns. 2132 * for buffer overruns.
2133 * 2133 *
2134 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 2134 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
2135 * cacheline. This can be beneficial if you're counting cycles as closely 2135 * cacheline. This can be beneficial if you're counting cycles as closely
2136 * as davem. 2136 * as davem.
2137 */ 2137 */
2138 struct kmem_cache * 2138 struct kmem_cache *
2139 kmem_cache_create (const char *name, size_t size, size_t align, 2139 kmem_cache_create (const char *name, size_t size, size_t align,
2140 unsigned long flags, 2140 unsigned long flags,
2141 void (*ctor)(void*, struct kmem_cache *, unsigned long), 2141 void (*ctor)(void*, struct kmem_cache *, unsigned long),
2142 void (*dtor)(void*, struct kmem_cache *, unsigned long)) 2142 void (*dtor)(void*, struct kmem_cache *, unsigned long))
2143 { 2143 {
2144 size_t left_over, slab_size, ralign; 2144 size_t left_over, slab_size, ralign;
2145 struct kmem_cache *cachep = NULL, *pc; 2145 struct kmem_cache *cachep = NULL, *pc;
2146 2146
2147 /* 2147 /*
2148 * Sanity checks... these are all serious usage bugs. 2148 * Sanity checks... these are all serious usage bugs.
2149 */ 2149 */
2150 if (!name || in_interrupt() || (size < BYTES_PER_WORD) || 2150 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2151 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { 2151 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
2152 printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, 2152 printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
2153 name); 2153 name);
2154 BUG(); 2154 BUG();
2155 } 2155 }
2156 2156
2157 /* 2157 /*
2158 * We use cache_chain_mutex to ensure a consistent view of 2158 * We use cache_chain_mutex to ensure a consistent view of
2159 * cpu_online_map as well. Please see cpuup_callback 2159 * cpu_online_map as well. Please see cpuup_callback
2160 */ 2160 */
2161 mutex_lock(&cache_chain_mutex); 2161 mutex_lock(&cache_chain_mutex);
2162 2162
2163 list_for_each_entry(pc, &cache_chain, next) { 2163 list_for_each_entry(pc, &cache_chain, next) {
2164 char tmp; 2164 char tmp;
2165 int res; 2165 int res;
2166 2166
2167 /* 2167 /*
2168 * This happens when the module gets unloaded and doesn't 2168 * This happens when the module gets unloaded and doesn't
2169 * destroy its slab cache and no-one else reuses the vmalloc 2169 * destroy its slab cache and no-one else reuses the vmalloc
2170 * area of the module. Print a warning. 2170 * area of the module. Print a warning.
2171 */ 2171 */
2172 res = probe_kernel_address(pc->name, tmp); 2172 res = probe_kernel_address(pc->name, tmp);
2173 if (res) { 2173 if (res) {
2174 printk(KERN_ERR 2174 printk(KERN_ERR
2175 "SLAB: cache with size %d has lost its name\n", 2175 "SLAB: cache with size %d has lost its name\n",
2176 pc->buffer_size); 2176 pc->buffer_size);
2177 continue; 2177 continue;
2178 } 2178 }
2179 2179
2180 if (!strcmp(pc->name, name)) { 2180 if (!strcmp(pc->name, name)) {
2181 printk(KERN_ERR 2181 printk(KERN_ERR
2182 "kmem_cache_create: duplicate cache %s\n", name); 2182 "kmem_cache_create: duplicate cache %s\n", name);
2183 dump_stack(); 2183 dump_stack();
2184 goto oops; 2184 goto oops;
2185 } 2185 }
2186 } 2186 }
2187 2187
2188 #if DEBUG 2188 #if DEBUG
2189 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 2189 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
2190 #if FORCED_DEBUG 2190 #if FORCED_DEBUG
2191 /* 2191 /*
2192 * Enable redzoning and last user accounting, except for caches with 2192 * Enable redzoning and last user accounting, except for caches with
2193 * large objects, if the increased size would increase the object size 2193 * large objects, if the increased size would increase the object size
2194 * above the next power of two: caches with object sizes just above a 2194 * above the next power of two: caches with object sizes just above a
2195 * power of two have a significant amount of internal fragmentation. 2195 * power of two have a significant amount of internal fragmentation.
2196 */ 2196 */
2197 if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) 2197 if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD))
2198 flags |= SLAB_RED_ZONE | SLAB_STORE_USER; 2198 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2199 if (!(flags & SLAB_DESTROY_BY_RCU)) 2199 if (!(flags & SLAB_DESTROY_BY_RCU))
2200 flags |= SLAB_POISON; 2200 flags |= SLAB_POISON;
2201 #endif 2201 #endif
2202 if (flags & SLAB_DESTROY_BY_RCU) 2202 if (flags & SLAB_DESTROY_BY_RCU)
2203 BUG_ON(flags & SLAB_POISON); 2203 BUG_ON(flags & SLAB_POISON);
2204 #endif 2204 #endif
2205 if (flags & SLAB_DESTROY_BY_RCU) 2205 if (flags & SLAB_DESTROY_BY_RCU)
2206 BUG_ON(dtor); 2206 BUG_ON(dtor);
2207 2207
2208 /* 2208 /*
2209 * Always checks flags, a caller might be expecting debug support which 2209 * Always checks flags, a caller might be expecting debug support which
2210 * isn't available. 2210 * isn't available.
2211 */ 2211 */
2212 BUG_ON(flags & ~CREATE_MASK); 2212 BUG_ON(flags & ~CREATE_MASK);
2213 2213
2214 /* 2214 /*
2215 * Check that size is in terms of words. This is needed to avoid 2215 * Check that size is in terms of words. This is needed to avoid
2216 * unaligned accesses for some archs when redzoning is used, and makes 2216 * unaligned accesses for some archs when redzoning is used, and makes
2217 * sure any on-slab bufctl's are also correctly aligned. 2217 * sure any on-slab bufctl's are also correctly aligned.
2218 */ 2218 */
2219 if (size & (BYTES_PER_WORD - 1)) { 2219 if (size & (BYTES_PER_WORD - 1)) {
2220 size += (BYTES_PER_WORD - 1); 2220 size += (BYTES_PER_WORD - 1);
2221 size &= ~(BYTES_PER_WORD - 1); 2221 size &= ~(BYTES_PER_WORD - 1);
2222 } 2222 }
2223 2223
2224 /* calculate the final buffer alignment: */ 2224 /* calculate the final buffer alignment: */
2225 2225
2226 /* 1) arch recommendation: can be overridden for debug */ 2226 /* 1) arch recommendation: can be overridden for debug */
2227 if (flags & SLAB_HWCACHE_ALIGN) { 2227 if (flags & SLAB_HWCACHE_ALIGN) {
2228 /* 2228 /*
2229 * Default alignment: as specified by the arch code. Except if 2229 * Default alignment: as specified by the arch code. Except if
2230 * an object is really small, then squeeze multiple objects into 2230 * an object is really small, then squeeze multiple objects into
2231 * one cacheline. 2231 * one cacheline.
2232 */ 2232 */
2233 ralign = cache_line_size(); 2233 ralign = cache_line_size();
2234 while (size <= ralign / 2) 2234 while (size <= ralign / 2)
2235 ralign /= 2; 2235 ralign /= 2;
2236 } else { 2236 } else {
2237 ralign = BYTES_PER_WORD; 2237 ralign = BYTES_PER_WORD;
2238 } 2238 }
2239 2239
2240 /* 2240 /*
2241 * Redzoning and user store require word alignment. Note this will be 2241 * Redzoning and user store require word alignment. Note this will be
2242 * overridden by architecture or caller mandated alignment if either 2242 * overridden by architecture or caller mandated alignment if either
2243 * is greater than BYTES_PER_WORD. 2243 * is greater than BYTES_PER_WORD.
2244 */ 2244 */
2245 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) 2245 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
2246 ralign = __alignof__(unsigned long long); 2246 ralign = __alignof__(unsigned long long);
2247 2247
2248 /* 2) arch mandated alignment */ 2248 /* 2) arch mandated alignment */
2249 if (ralign < ARCH_SLAB_MINALIGN) { 2249 if (ralign < ARCH_SLAB_MINALIGN) {
2250 ralign = ARCH_SLAB_MINALIGN; 2250 ralign = ARCH_SLAB_MINALIGN;
2251 } 2251 }
2252 /* 3) caller mandated alignment */ 2252 /* 3) caller mandated alignment */
2253 if (ralign < align) { 2253 if (ralign < align) {
2254 ralign = align; 2254 ralign = align;
2255 } 2255 }
2256 /* disable debug if necessary */ 2256 /* disable debug if necessary */
2257 if (ralign > __alignof__(unsigned long long)) 2257 if (ralign > __alignof__(unsigned long long))
2258 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 2258 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2259 /* 2259 /*
2260 * 4) Store it. 2260 * 4) Store it.
2261 */ 2261 */
2262 align = ralign; 2262 align = ralign;
2263 2263
2264 /* Get cache's description obj. */ 2264 /* Get cache's description obj. */
2265 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); 2265 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
2266 if (!cachep) 2266 if (!cachep)
2267 goto oops; 2267 goto oops;
2268 2268
2269 #if DEBUG 2269 #if DEBUG
2270 cachep->obj_size = size; 2270 cachep->obj_size = size;
2271 2271
2272 /* 2272 /*
2273 * Both debugging options require word-alignment which is calculated 2273 * Both debugging options require word-alignment which is calculated
2274 * into align above. 2274 * into align above.
2275 */ 2275 */
2276 if (flags & SLAB_RED_ZONE) { 2276 if (flags & SLAB_RED_ZONE) {
2277 /* add space for red zone words */ 2277 /* add space for red zone words */
2278 cachep->obj_offset += sizeof(unsigned long long); 2278 cachep->obj_offset += sizeof(unsigned long long);
2279 size += 2 * sizeof(unsigned long long); 2279 size += 2 * sizeof(unsigned long long);
2280 } 2280 }
2281 if (flags & SLAB_STORE_USER) { 2281 if (flags & SLAB_STORE_USER) {
2282 /* user store requires one word storage behind the end of 2282 /* user store requires one word storage behind the end of
2283 * the real object. 2283 * the real object.
2284 */ 2284 */
2285 size += BYTES_PER_WORD; 2285 size += BYTES_PER_WORD;
2286 } 2286 }
2287 #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2287 #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2288 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size 2288 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2289 && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) { 2289 && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
2290 cachep->obj_offset += PAGE_SIZE - size; 2290 cachep->obj_offset += PAGE_SIZE - size;
2291 size = PAGE_SIZE; 2291 size = PAGE_SIZE;
2292 } 2292 }
2293 #endif 2293 #endif
2294 #endif 2294 #endif
2295 2295
2296 /* 2296 /*
2297 * Determine if the slab management is 'on' or 'off' slab. 2297 * Determine if the slab management is 'on' or 'off' slab.
2298 * (bootstrapping cannot cope with offslab caches so don't do 2298 * (bootstrapping cannot cope with offslab caches so don't do
2299 * it too early on.) 2299 * it too early on.)
2300 */ 2300 */
2301 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) 2301 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
2302 /* 2302 /*
2303 * Size is large, assume best to place the slab management obj 2303 * Size is large, assume best to place the slab management obj
2304 * off-slab (should allow better packing of objs). 2304 * off-slab (should allow better packing of objs).
2305 */ 2305 */
2306 flags |= CFLGS_OFF_SLAB; 2306 flags |= CFLGS_OFF_SLAB;
2307 2307
2308 size = ALIGN(size, align); 2308 size = ALIGN(size, align);
2309 2309
2310 left_over = calculate_slab_order(cachep, size, align, flags); 2310 left_over = calculate_slab_order(cachep, size, align, flags);
2311 2311
2312 if (!cachep->num) { 2312 if (!cachep->num) {
2313 printk(KERN_ERR 2313 printk(KERN_ERR
2314 "kmem_cache_create: couldn't create cache %s.\n", name); 2314 "kmem_cache_create: couldn't create cache %s.\n", name);
2315 kmem_cache_free(&cache_cache, cachep); 2315 kmem_cache_free(&cache_cache, cachep);
2316 cachep = NULL; 2316 cachep = NULL;
2317 goto oops; 2317 goto oops;
2318 } 2318 }
2319 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) 2319 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2320 + sizeof(struct slab), align); 2320 + sizeof(struct slab), align);
2321 2321
2322 /* 2322 /*
2323 * If the slab has been placed off-slab, and we have enough space then 2323 * If the slab has been placed off-slab, and we have enough space then
2324 * move it on-slab. This is at the expense of any extra colouring. 2324 * move it on-slab. This is at the expense of any extra colouring.
2325 */ 2325 */
2326 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { 2326 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2327 flags &= ~CFLGS_OFF_SLAB; 2327 flags &= ~CFLGS_OFF_SLAB;
2328 left_over -= slab_size; 2328 left_over -= slab_size;
2329 } 2329 }
2330 2330
2331 if (flags & CFLGS_OFF_SLAB) { 2331 if (flags & CFLGS_OFF_SLAB) {
2332 /* really off slab. No need for manual alignment */ 2332 /* really off slab. No need for manual alignment */
2333 slab_size = 2333 slab_size =
2334 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); 2334 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2335 } 2335 }
2336 2336
2337 cachep->colour_off = cache_line_size(); 2337 cachep->colour_off = cache_line_size();
2338 /* Offset must be a multiple of the alignment. */ 2338 /* Offset must be a multiple of the alignment. */
2339 if (cachep->colour_off < align) 2339 if (cachep->colour_off < align)
2340 cachep->colour_off = align; 2340 cachep->colour_off = align;
2341 cachep->colour = left_over / cachep->colour_off; 2341 cachep->colour = left_over / cachep->colour_off;
2342 cachep->slab_size = slab_size; 2342 cachep->slab_size = slab_size;
2343 cachep->flags = flags; 2343 cachep->flags = flags;
2344 cachep->gfpflags = 0; 2344 cachep->gfpflags = 0;
2345 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) 2345 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2346 cachep->gfpflags |= GFP_DMA; 2346 cachep->gfpflags |= GFP_DMA;
2347 cachep->buffer_size = size; 2347 cachep->buffer_size = size;
2348 cachep->reciprocal_buffer_size = reciprocal_value(size); 2348 cachep->reciprocal_buffer_size = reciprocal_value(size);
2349 2349
2350 if (flags & CFLGS_OFF_SLAB) { 2350 if (flags & CFLGS_OFF_SLAB) {
2351 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); 2351 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2352 /* 2352 /*
2353 * This is a possibility for one of the malloc_sizes caches. 2353 * This is a possibility for one of the malloc_sizes caches.
2354 * But since we go off slab only for object size greater than 2354 * But since we go off slab only for object size greater than
2355 * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, 2355 * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
2356 * this should not happen at all. 2356 * this should not happen at all.
2357 * But leave a BUG_ON for some lucky dude. 2357 * But leave a BUG_ON for some lucky dude.
2358 */ 2358 */
2359 BUG_ON(!cachep->slabp_cache); 2359 BUG_ON(!cachep->slabp_cache);
2360 } 2360 }
2361 cachep->ctor = ctor; 2361 cachep->ctor = ctor;
2362 cachep->dtor = dtor; 2362 cachep->dtor = dtor;
2363 cachep->name = name; 2363 cachep->name = name;
2364 2364
2365 if (setup_cpu_cache(cachep)) { 2365 if (setup_cpu_cache(cachep)) {
2366 __kmem_cache_destroy(cachep); 2366 __kmem_cache_destroy(cachep);
2367 cachep = NULL; 2367 cachep = NULL;
2368 goto oops; 2368 goto oops;
2369 } 2369 }
2370 2370
2371 /* cache setup completed, link it into the list */ 2371 /* cache setup completed, link it into the list */
2372 list_add(&cachep->next, &cache_chain); 2372 list_add(&cachep->next, &cache_chain);
2373 oops: 2373 oops:
2374 if (!cachep && (flags & SLAB_PANIC)) 2374 if (!cachep && (flags & SLAB_PANIC))
2375 panic("kmem_cache_create(): failed to create slab `%s'\n", 2375 panic("kmem_cache_create(): failed to create slab `%s'\n",
2376 name); 2376 name);
2377 mutex_unlock(&cache_chain_mutex); 2377 mutex_unlock(&cache_chain_mutex);
2378 return cachep; 2378 return cachep;
2379 } 2379 }
2380 EXPORT_SYMBOL(kmem_cache_create); 2380 EXPORT_SYMBOL(kmem_cache_create);
2381 2381
2382 #if DEBUG 2382 #if DEBUG
2383 static void check_irq_off(void) 2383 static void check_irq_off(void)
2384 { 2384 {
2385 BUG_ON(!irqs_disabled()); 2385 BUG_ON(!irqs_disabled());
2386 } 2386 }
2387 2387
2388 static void check_irq_on(void) 2388 static void check_irq_on(void)
2389 { 2389 {
2390 BUG_ON(irqs_disabled()); 2390 BUG_ON(irqs_disabled());
2391 } 2391 }
2392 2392
2393 static void check_spinlock_acquired(struct kmem_cache *cachep) 2393 static void check_spinlock_acquired(struct kmem_cache *cachep)
2394 { 2394 {
2395 #ifdef CONFIG_SMP 2395 #ifdef CONFIG_SMP
2396 check_irq_off(); 2396 check_irq_off();
2397 assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); 2397 assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
2398 #endif 2398 #endif
2399 } 2399 }
2400 2400
2401 static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) 2401 static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2402 { 2402 {
2403 #ifdef CONFIG_SMP 2403 #ifdef CONFIG_SMP
2404 check_irq_off(); 2404 check_irq_off();
2405 assert_spin_locked(&cachep->nodelists[node]->list_lock); 2405 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2406 #endif 2406 #endif
2407 } 2407 }
2408 2408
2409 #else 2409 #else
2410 #define check_irq_off() do { } while(0) 2410 #define check_irq_off() do { } while(0)
2411 #define check_irq_on() do { } while(0) 2411 #define check_irq_on() do { } while(0)
2412 #define check_spinlock_acquired(x) do { } while(0) 2412 #define check_spinlock_acquired(x) do { } while(0)
2413 #define check_spinlock_acquired_node(x, y) do { } while(0) 2413 #define check_spinlock_acquired_node(x, y) do { } while(0)
2414 #endif 2414 #endif
2415 2415
2416 static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, 2416 static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2417 struct array_cache *ac, 2417 struct array_cache *ac,
2418 int force, int node); 2418 int force, int node);
2419 2419
2420 static void do_drain(void *arg) 2420 static void do_drain(void *arg)
2421 { 2421 {
2422 struct kmem_cache *cachep = arg; 2422 struct kmem_cache *cachep = arg;
2423 struct array_cache *ac; 2423 struct array_cache *ac;
2424 int node = numa_node_id(); 2424 int node = numa_node_id();
2425 2425
2426 check_irq_off(); 2426 check_irq_off();
2427 ac = cpu_cache_get(cachep); 2427 ac = cpu_cache_get(cachep);
2428 spin_lock(&cachep->nodelists[node]->list_lock); 2428 spin_lock(&cachep->nodelists[node]->list_lock);
2429 free_block(cachep, ac->entry, ac->avail, node); 2429 free_block(cachep, ac->entry, ac->avail, node);
2430 spin_unlock(&cachep->nodelists[node]->list_lock); 2430 spin_unlock(&cachep->nodelists[node]->list_lock);
2431 ac->avail = 0; 2431 ac->avail = 0;
2432 } 2432 }
2433 2433
2434 static void drain_cpu_caches(struct kmem_cache *cachep) 2434 static void drain_cpu_caches(struct kmem_cache *cachep)
2435 { 2435 {
2436 struct kmem_list3 *l3; 2436 struct kmem_list3 *l3;
2437 int node; 2437 int node;
2438 2438
2439 on_each_cpu(do_drain, cachep, 1, 1); 2439 on_each_cpu(do_drain, cachep, 1, 1);
2440 check_irq_on(); 2440 check_irq_on();
2441 for_each_online_node(node) { 2441 for_each_online_node(node) {
2442 l3 = cachep->nodelists[node]; 2442 l3 = cachep->nodelists[node];
2443 if (l3 && l3->alien) 2443 if (l3 && l3->alien)
2444 drain_alien_cache(cachep, l3->alien); 2444 drain_alien_cache(cachep, l3->alien);
2445 } 2445 }
2446 2446
2447 for_each_online_node(node) { 2447 for_each_online_node(node) {
2448 l3 = cachep->nodelists[node]; 2448 l3 = cachep->nodelists[node];
2449 if (l3) 2449 if (l3)
2450 drain_array(cachep, l3, l3->shared, 1, node); 2450 drain_array(cachep, l3, l3->shared, 1, node);
2451 } 2451 }
2452 } 2452 }
2453 2453
2454 /* 2454 /*
2455 * Remove slabs from the list of free slabs. 2455 * Remove slabs from the list of free slabs.
2456 * Specify the number of slabs to drain in tofree. 2456 * Specify the number of slabs to drain in tofree.
2457 * 2457 *
2458 * Returns the actual number of slabs released. 2458 * Returns the actual number of slabs released.
2459 */ 2459 */
2460 static int drain_freelist(struct kmem_cache *cache, 2460 static int drain_freelist(struct kmem_cache *cache,
2461 struct kmem_list3 *l3, int tofree) 2461 struct kmem_list3 *l3, int tofree)
2462 { 2462 {
2463 struct list_head *p; 2463 struct list_head *p;
2464 int nr_freed; 2464 int nr_freed;
2465 struct slab *slabp; 2465 struct slab *slabp;
2466 2466
2467 nr_freed = 0; 2467 nr_freed = 0;
2468 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { 2468 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2469 2469
2470 spin_lock_irq(&l3->list_lock); 2470 spin_lock_irq(&l3->list_lock);
2471 p = l3->slabs_free.prev; 2471 p = l3->slabs_free.prev;
2472 if (p == &l3->slabs_free) { 2472 if (p == &l3->slabs_free) {
2473 spin_unlock_irq(&l3->list_lock); 2473 spin_unlock_irq(&l3->list_lock);
2474 goto out; 2474 goto out;
2475 } 2475 }
2476 2476
2477 slabp = list_entry(p, struct slab, list); 2477 slabp = list_entry(p, struct slab, list);
2478 #if DEBUG 2478 #if DEBUG
2479 BUG_ON(slabp->inuse); 2479 BUG_ON(slabp->inuse);
2480 #endif 2480 #endif
2481 list_del(&slabp->list); 2481 list_del(&slabp->list);
2482 /* 2482 /*
2483 * Safe to drop the lock. The slab is no longer linked 2483 * Safe to drop the lock. The slab is no longer linked
2484 * to the cache. 2484 * to the cache.
2485 */ 2485 */
2486 l3->free_objects -= cache->num; 2486 l3->free_objects -= cache->num;
2487 spin_unlock_irq(&l3->list_lock); 2487 spin_unlock_irq(&l3->list_lock);
2488 slab_destroy(cache, slabp); 2488 slab_destroy(cache, slabp);
2489 nr_freed++; 2489 nr_freed++;
2490 } 2490 }
2491 out: 2491 out:
2492 return nr_freed; 2492 return nr_freed;
2493 } 2493 }
2494 2494
2495 /* Called with cache_chain_mutex held to protect against cpu hotplug */ 2495 /* Called with cache_chain_mutex held to protect against cpu hotplug */
2496 static int __cache_shrink(struct kmem_cache *cachep) 2496 static int __cache_shrink(struct kmem_cache *cachep)
2497 { 2497 {
2498 int ret = 0, i = 0; 2498 int ret = 0, i = 0;
2499 struct kmem_list3 *l3; 2499 struct kmem_list3 *l3;
2500 2500
2501 drain_cpu_caches(cachep); 2501 drain_cpu_caches(cachep);
2502 2502
2503 check_irq_on(); 2503 check_irq_on();
2504 for_each_online_node(i) { 2504 for_each_online_node(i) {
2505 l3 = cachep->nodelists[i]; 2505 l3 = cachep->nodelists[i];
2506 if (!l3) 2506 if (!l3)
2507 continue; 2507 continue;
2508 2508
2509 drain_freelist(cachep, l3, l3->free_objects); 2509 drain_freelist(cachep, l3, l3->free_objects);
2510 2510
2511 ret += !list_empty(&l3->slabs_full) || 2511 ret += !list_empty(&l3->slabs_full) ||
2512 !list_empty(&l3->slabs_partial); 2512 !list_empty(&l3->slabs_partial);
2513 } 2513 }
2514 return (ret ? 1 : 0); 2514 return (ret ? 1 : 0);
2515 } 2515 }
2516 2516
2517 /** 2517 /**
2518 * kmem_cache_shrink - Shrink a cache. 2518 * kmem_cache_shrink - Shrink a cache.
2519 * @cachep: The cache to shrink. 2519 * @cachep: The cache to shrink.
2520 * 2520 *
2521 * Releases as many slabs as possible for a cache. 2521 * Releases as many slabs as possible for a cache.
2522 * To help debugging, a zero exit status indicates all slabs were released. 2522 * To help debugging, a zero exit status indicates all slabs were released.
2523 */ 2523 */
2524 int kmem_cache_shrink(struct kmem_cache *cachep) 2524 int kmem_cache_shrink(struct kmem_cache *cachep)
2525 { 2525 {
2526 int ret; 2526 int ret;
2527 BUG_ON(!cachep || in_interrupt()); 2527 BUG_ON(!cachep || in_interrupt());
2528 2528
2529 mutex_lock(&cache_chain_mutex); 2529 mutex_lock(&cache_chain_mutex);
2530 ret = __cache_shrink(cachep); 2530 ret = __cache_shrink(cachep);
2531 mutex_unlock(&cache_chain_mutex); 2531 mutex_unlock(&cache_chain_mutex);
2532 return ret; 2532 return ret;
2533 } 2533 }
2534 EXPORT_SYMBOL(kmem_cache_shrink); 2534 EXPORT_SYMBOL(kmem_cache_shrink);
2535 2535
2536 /** 2536 /**
2537 * kmem_cache_destroy - delete a cache 2537 * kmem_cache_destroy - delete a cache
2538 * @cachep: the cache to destroy 2538 * @cachep: the cache to destroy
2539 * 2539 *
2540 * Remove a &struct kmem_cache object from the slab cache. 2540 * Remove a &struct kmem_cache object from the slab cache.
2541 * 2541 *
2542 * It is expected this function will be called by a module when it is 2542 * It is expected this function will be called by a module when it is
2543 * unloaded. This will remove the cache completely, and avoid a duplicate 2543 * unloaded. This will remove the cache completely, and avoid a duplicate
2544 * cache being allocated each time a module is loaded and unloaded, if the 2544 * cache being allocated each time a module is loaded and unloaded, if the
2545 * module doesn't have persistent in-kernel storage across loads and unloads. 2545 * module doesn't have persistent in-kernel storage across loads and unloads.
2546 * 2546 *
2547 * The cache must be empty before calling this function. 2547 * The cache must be empty before calling this function.
2548 * 2548 *
2549 * The caller must guarantee that noone will allocate memory from the cache 2549 * The caller must guarantee that noone will allocate memory from the cache
2550 * during the kmem_cache_destroy(). 2550 * during the kmem_cache_destroy().
2551 */ 2551 */
2552 void kmem_cache_destroy(struct kmem_cache *cachep) 2552 void kmem_cache_destroy(struct kmem_cache *cachep)
2553 { 2553 {
2554 BUG_ON(!cachep || in_interrupt()); 2554 BUG_ON(!cachep || in_interrupt());
2555 2555
2556 /* Find the cache in the chain of caches. */ 2556 /* Find the cache in the chain of caches. */
2557 mutex_lock(&cache_chain_mutex); 2557 mutex_lock(&cache_chain_mutex);
2558 /* 2558 /*
2559 * the chain is never empty, cache_cache is never destroyed 2559 * the chain is never empty, cache_cache is never destroyed
2560 */ 2560 */
2561 list_del(&cachep->next); 2561 list_del(&cachep->next);
2562 if (__cache_shrink(cachep)) { 2562 if (__cache_shrink(cachep)) {
2563 slab_error(cachep, "Can't free all objects"); 2563 slab_error(cachep, "Can't free all objects");
2564 list_add(&cachep->next, &cache_chain); 2564 list_add(&cachep->next, &cache_chain);
2565 mutex_unlock(&cache_chain_mutex); 2565 mutex_unlock(&cache_chain_mutex);
2566 return; 2566 return;
2567 } 2567 }
2568 2568
2569 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) 2569 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2570 synchronize_rcu(); 2570 synchronize_rcu();
2571 2571
2572 __kmem_cache_destroy(cachep); 2572 __kmem_cache_destroy(cachep);
2573 mutex_unlock(&cache_chain_mutex); 2573 mutex_unlock(&cache_chain_mutex);
2574 } 2574 }
2575 EXPORT_SYMBOL(kmem_cache_destroy); 2575 EXPORT_SYMBOL(kmem_cache_destroy);
2576 2576
2577 /* 2577 /*
2578 * Get the memory for a slab management obj. 2578 * Get the memory for a slab management obj.
2579 * For a slab cache when the slab descriptor is off-slab, slab descriptors 2579 * For a slab cache when the slab descriptor is off-slab, slab descriptors
2580 * always come from malloc_sizes caches. The slab descriptor cannot 2580 * always come from malloc_sizes caches. The slab descriptor cannot
2581 * come from the same cache which is getting created because, 2581 * come from the same cache which is getting created because,
2582 * when we are searching for an appropriate cache for these 2582 * when we are searching for an appropriate cache for these
2583 * descriptors in kmem_cache_create, we search through the malloc_sizes array. 2583 * descriptors in kmem_cache_create, we search through the malloc_sizes array.
2584 * If we are creating a malloc_sizes cache here it would not be visible to 2584 * If we are creating a malloc_sizes cache here it would not be visible to
2585 * kmem_find_general_cachep till the initialization is complete. 2585 * kmem_find_general_cachep till the initialization is complete.
2586 * Hence we cannot have slabp_cache same as the original cache. 2586 * Hence we cannot have slabp_cache same as the original cache.
2587 */ 2587 */
2588 static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2588 static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2589 int colour_off, gfp_t local_flags, 2589 int colour_off, gfp_t local_flags,
2590 int nodeid) 2590 int nodeid)
2591 { 2591 {
2592 struct slab *slabp; 2592 struct slab *slabp;
2593 2593
2594 if (OFF_SLAB(cachep)) { 2594 if (OFF_SLAB(cachep)) {
2595 /* Slab management obj is off-slab. */ 2595 /* Slab management obj is off-slab. */
2596 slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2596 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2597 local_flags & ~GFP_THISNODE, nodeid); 2597 local_flags & ~GFP_THISNODE, nodeid);
2598 if (!slabp) 2598 if (!slabp)
2599 return NULL; 2599 return NULL;
2600 } else { 2600 } else {
2601 slabp = objp + colour_off; 2601 slabp = objp + colour_off;
2602 colour_off += cachep->slab_size; 2602 colour_off += cachep->slab_size;
2603 } 2603 }
2604 slabp->inuse = 0; 2604 slabp->inuse = 0;
2605 slabp->colouroff = colour_off; 2605 slabp->colouroff = colour_off;
2606 slabp->s_mem = objp + colour_off; 2606 slabp->s_mem = objp + colour_off;
2607 slabp->nodeid = nodeid; 2607 slabp->nodeid = nodeid;
2608 return slabp; 2608 return slabp;
2609 } 2609 }
2610 2610
2611 static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) 2611 static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2612 { 2612 {
2613 return (kmem_bufctl_t *) (slabp + 1); 2613 return (kmem_bufctl_t *) (slabp + 1);
2614 } 2614 }
2615 2615
2616 static void cache_init_objs(struct kmem_cache *cachep, 2616 static void cache_init_objs(struct kmem_cache *cachep,
2617 struct slab *slabp, unsigned long ctor_flags) 2617 struct slab *slabp, unsigned long ctor_flags)
2618 { 2618 {
2619 int i; 2619 int i;
2620 2620
2621 for (i = 0; i < cachep->num; i++) { 2621 for (i = 0; i < cachep->num; i++) {
2622 void *objp = index_to_obj(cachep, slabp, i); 2622 void *objp = index_to_obj(cachep, slabp, i);
2623 #if DEBUG 2623 #if DEBUG
2624 /* need to poison the objs? */ 2624 /* need to poison the objs? */
2625 if (cachep->flags & SLAB_POISON) 2625 if (cachep->flags & SLAB_POISON)
2626 poison_obj(cachep, objp, POISON_FREE); 2626 poison_obj(cachep, objp, POISON_FREE);
2627 if (cachep->flags & SLAB_STORE_USER) 2627 if (cachep->flags & SLAB_STORE_USER)
2628 *dbg_userword(cachep, objp) = NULL; 2628 *dbg_userword(cachep, objp) = NULL;
2629 2629
2630 if (cachep->flags & SLAB_RED_ZONE) { 2630 if (cachep->flags & SLAB_RED_ZONE) {
2631 *dbg_redzone1(cachep, objp) = RED_INACTIVE; 2631 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2632 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2632 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2633 } 2633 }
2634 /* 2634 /*
2635 * Constructors are not allowed to allocate memory from the same 2635 * Constructors are not allowed to allocate memory from the same
2636 * cache which they are a constructor for. Otherwise, deadlock. 2636 * cache which they are a constructor for. Otherwise, deadlock.
2637 * They must also be threaded. 2637 * They must also be threaded.
2638 */ 2638 */
2639 if (cachep->ctor && !(cachep->flags & SLAB_POISON)) 2639 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2640 cachep->ctor(objp + obj_offset(cachep), cachep, 2640 cachep->ctor(objp + obj_offset(cachep), cachep,
2641 ctor_flags); 2641 ctor_flags);
2642 2642
2643 if (cachep->flags & SLAB_RED_ZONE) { 2643 if (cachep->flags & SLAB_RED_ZONE) {
2644 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 2644 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2645 slab_error(cachep, "constructor overwrote the" 2645 slab_error(cachep, "constructor overwrote the"
2646 " end of an object"); 2646 " end of an object");
2647 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 2647 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2648 slab_error(cachep, "constructor overwrote the" 2648 slab_error(cachep, "constructor overwrote the"
2649 " start of an object"); 2649 " start of an object");
2650 } 2650 }
2651 if ((cachep->buffer_size % PAGE_SIZE) == 0 && 2651 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2652 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) 2652 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2653 kernel_map_pages(virt_to_page(objp), 2653 kernel_map_pages(virt_to_page(objp),
2654 cachep->buffer_size / PAGE_SIZE, 0); 2654 cachep->buffer_size / PAGE_SIZE, 0);
2655 #else 2655 #else
2656 if (cachep->ctor) 2656 if (cachep->ctor)
2657 cachep->ctor(objp, cachep, ctor_flags); 2657 cachep->ctor(objp, cachep, ctor_flags);
2658 #endif 2658 #endif
2659 slab_bufctl(slabp)[i] = i + 1; 2659 slab_bufctl(slabp)[i] = i + 1;
2660 } 2660 }
2661 slab_bufctl(slabp)[i - 1] = BUFCTL_END; 2661 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2662 slabp->free = 0; 2662 slabp->free = 0;
2663 } 2663 }
2664 2664
2665 static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2665 static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2666 { 2666 {
2667 if (CONFIG_ZONE_DMA_FLAG) { 2667 if (CONFIG_ZONE_DMA_FLAG) {
2668 if (flags & GFP_DMA) 2668 if (flags & GFP_DMA)
2669 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2669 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2670 else 2670 else
2671 BUG_ON(cachep->gfpflags & GFP_DMA); 2671 BUG_ON(cachep->gfpflags & GFP_DMA);
2672 } 2672 }
2673 } 2673 }
2674 2674
2675 static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, 2675 static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2676 int nodeid) 2676 int nodeid)
2677 { 2677 {
2678 void *objp = index_to_obj(cachep, slabp, slabp->free); 2678 void *objp = index_to_obj(cachep, slabp, slabp->free);
2679 kmem_bufctl_t next; 2679 kmem_bufctl_t next;
2680 2680
2681 slabp->inuse++; 2681 slabp->inuse++;
2682 next = slab_bufctl(slabp)[slabp->free]; 2682 next = slab_bufctl(slabp)[slabp->free];
2683 #if DEBUG 2683 #if DEBUG
2684 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; 2684 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2685 WARN_ON(slabp->nodeid != nodeid); 2685 WARN_ON(slabp->nodeid != nodeid);
2686 #endif 2686 #endif
2687 slabp->free = next; 2687 slabp->free = next;
2688 2688
2689 return objp; 2689 return objp;
2690 } 2690 }
2691 2691
2692 static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, 2692 static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2693 void *objp, int nodeid) 2693 void *objp, int nodeid)
2694 { 2694 {
2695 unsigned int objnr = obj_to_index(cachep, slabp, objp); 2695 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2696 2696
2697 #if DEBUG 2697 #if DEBUG
2698 /* Verify that the slab belongs to the intended node */ 2698 /* Verify that the slab belongs to the intended node */
2699 WARN_ON(slabp->nodeid != nodeid); 2699 WARN_ON(slabp->nodeid != nodeid);
2700 2700
2701 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { 2701 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2702 printk(KERN_ERR "slab: double free detected in cache " 2702 printk(KERN_ERR "slab: double free detected in cache "
2703 "'%s', objp %p\n", cachep->name, objp); 2703 "'%s', objp %p\n", cachep->name, objp);
2704 BUG(); 2704 BUG();
2705 } 2705 }
2706 #endif 2706 #endif
2707 slab_bufctl(slabp)[objnr] = slabp->free; 2707 slab_bufctl(slabp)[objnr] = slabp->free;
2708 slabp->free = objnr; 2708 slabp->free = objnr;
2709 slabp->inuse--; 2709 slabp->inuse--;
2710 } 2710 }
2711 2711
2712 /* 2712 /*
2713 * Map pages beginning at addr to the given cache and slab. This is required 2713 * Map pages beginning at addr to the given cache and slab. This is required
2714 * for the slab allocator to be able to lookup the cache and slab of a 2714 * for the slab allocator to be able to lookup the cache and slab of a
2715 * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging. 2715 * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
2716 */ 2716 */
2717 static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, 2717 static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2718 void *addr) 2718 void *addr)
2719 { 2719 {
2720 int nr_pages; 2720 int nr_pages;
2721 struct page *page; 2721 struct page *page;
2722 2722
2723 page = virt_to_page(addr); 2723 page = virt_to_page(addr);
2724 2724
2725 nr_pages = 1; 2725 nr_pages = 1;
2726 if (likely(!PageCompound(page))) 2726 if (likely(!PageCompound(page)))
2727 nr_pages <<= cache->gfporder; 2727 nr_pages <<= cache->gfporder;
2728 2728
2729 do { 2729 do {
2730 page_set_cache(page, cache); 2730 page_set_cache(page, cache);
2731 page_set_slab(page, slab); 2731 page_set_slab(page, slab);
2732 page++; 2732 page++;
2733 } while (--nr_pages); 2733 } while (--nr_pages);
2734 } 2734 }
2735 2735
2736 /* 2736 /*
2737 * Grow (by 1) the number of slabs within a cache. This is called by 2737 * Grow (by 1) the number of slabs within a cache. This is called by
2738 * kmem_cache_alloc() when there are no active objs left in a cache. 2738 * kmem_cache_alloc() when there are no active objs left in a cache.
2739 */ 2739 */
2740 static int cache_grow(struct kmem_cache *cachep, 2740 static int cache_grow(struct kmem_cache *cachep,
2741 gfp_t flags, int nodeid, void *objp) 2741 gfp_t flags, int nodeid, void *objp)
2742 { 2742 {
2743 struct slab *slabp; 2743 struct slab *slabp;
2744 size_t offset; 2744 size_t offset;
2745 gfp_t local_flags; 2745 gfp_t local_flags;
2746 unsigned long ctor_flags; 2746 unsigned long ctor_flags;
2747 struct kmem_list3 *l3; 2747 struct kmem_list3 *l3;
2748 2748
2749 /* 2749 /*
2750 * Be lazy and only check for valid flags here, keeping it out of the 2750 * Be lazy and only check for valid flags here, keeping it out of the
2751 * critical path in kmem_cache_alloc(). 2751 * critical path in kmem_cache_alloc().
2752 */ 2752 */
2753 BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK)); 2753 BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK));
2754 2754
2755 ctor_flags = SLAB_CTOR_CONSTRUCTOR; 2755 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
2756 local_flags = (flags & GFP_LEVEL_MASK); 2756 local_flags = (flags & GFP_LEVEL_MASK);
2757 /* Take the l3 list lock to change the colour_next on this node */ 2757 /* Take the l3 list lock to change the colour_next on this node */
2758 check_irq_off(); 2758 check_irq_off();
2759 l3 = cachep->nodelists[nodeid]; 2759 l3 = cachep->nodelists[nodeid];
2760 spin_lock(&l3->list_lock); 2760 spin_lock(&l3->list_lock);
2761 2761
2762 /* Get colour for the slab, and cal the next value. */ 2762 /* Get colour for the slab, and cal the next value. */
2763 offset = l3->colour_next; 2763 offset = l3->colour_next;
2764 l3->colour_next++; 2764 l3->colour_next++;
2765 if (l3->colour_next >= cachep->colour) 2765 if (l3->colour_next >= cachep->colour)
2766 l3->colour_next = 0; 2766 l3->colour_next = 0;
2767 spin_unlock(&l3->list_lock); 2767 spin_unlock(&l3->list_lock);
2768 2768
2769 offset *= cachep->colour_off; 2769 offset *= cachep->colour_off;
2770 2770
2771 if (local_flags & __GFP_WAIT) 2771 if (local_flags & __GFP_WAIT)
2772 local_irq_enable(); 2772 local_irq_enable();
2773 2773
2774 /* 2774 /*
2775 * The test for missing atomic flag is performed here, rather than 2775 * The test for missing atomic flag is performed here, rather than
2776 * the more obvious place, simply to reduce the critical path length 2776 * the more obvious place, simply to reduce the critical path length
2777 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they 2777 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
2778 * will eventually be caught here (where it matters). 2778 * will eventually be caught here (where it matters).
2779 */ 2779 */
2780 kmem_flagcheck(cachep, flags); 2780 kmem_flagcheck(cachep, flags);
2781 2781
2782 /* 2782 /*
2783 * Get mem for the objs. Attempt to allocate a physical page from 2783 * Get mem for the objs. Attempt to allocate a physical page from
2784 * 'nodeid'. 2784 * 'nodeid'.
2785 */ 2785 */
2786 if (!objp) 2786 if (!objp)
2787 objp = kmem_getpages(cachep, flags, nodeid); 2787 objp = kmem_getpages(cachep, flags, nodeid);
2788 if (!objp) 2788 if (!objp)
2789 goto failed; 2789 goto failed;
2790 2790
2791 /* Get slab management. */ 2791 /* Get slab management. */
2792 slabp = alloc_slabmgmt(cachep, objp, offset, 2792 slabp = alloc_slabmgmt(cachep, objp, offset,
2793 local_flags & ~GFP_THISNODE, nodeid); 2793 local_flags & ~GFP_THISNODE, nodeid);
2794 if (!slabp) 2794 if (!slabp)
2795 goto opps1; 2795 goto opps1;
2796 2796
2797 slabp->nodeid = nodeid; 2797 slabp->nodeid = nodeid;
2798 slab_map_pages(cachep, slabp, objp); 2798 slab_map_pages(cachep, slabp, objp);
2799 2799
2800 cache_init_objs(cachep, slabp, ctor_flags); 2800 cache_init_objs(cachep, slabp, ctor_flags);
2801 2801
2802 if (local_flags & __GFP_WAIT) 2802 if (local_flags & __GFP_WAIT)
2803 local_irq_disable(); 2803 local_irq_disable();
2804 check_irq_off(); 2804 check_irq_off();
2805 spin_lock(&l3->list_lock); 2805 spin_lock(&l3->list_lock);
2806 2806
2807 /* Make slab active. */ 2807 /* Make slab active. */
2808 list_add_tail(&slabp->list, &(l3->slabs_free)); 2808 list_add_tail(&slabp->list, &(l3->slabs_free));
2809 STATS_INC_GROWN(cachep); 2809 STATS_INC_GROWN(cachep);
2810 l3->free_objects += cachep->num; 2810 l3->free_objects += cachep->num;
2811 spin_unlock(&l3->list_lock); 2811 spin_unlock(&l3->list_lock);
2812 return 1; 2812 return 1;
2813 opps1: 2813 opps1:
2814 kmem_freepages(cachep, objp); 2814 kmem_freepages(cachep, objp);
2815 failed: 2815 failed:
2816 if (local_flags & __GFP_WAIT) 2816 if (local_flags & __GFP_WAIT)
2817 local_irq_disable(); 2817 local_irq_disable();
2818 return 0; 2818 return 0;
2819 } 2819 }
2820 2820
2821 #if DEBUG 2821 #if DEBUG
2822 2822
2823 /* 2823 /*
2824 * Perform extra freeing checks: 2824 * Perform extra freeing checks:
2825 * - detect bad pointers. 2825 * - detect bad pointers.
2826 * - POISON/RED_ZONE checking 2826 * - POISON/RED_ZONE checking
2827 * - destructor calls, for caches with POISON+dtor 2827 * - destructor calls, for caches with POISON+dtor
2828 */ 2828 */
2829 static void kfree_debugcheck(const void *objp) 2829 static void kfree_debugcheck(const void *objp)
2830 { 2830 {
2831 if (!virt_addr_valid(objp)) { 2831 if (!virt_addr_valid(objp)) {
2832 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", 2832 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2833 (unsigned long)objp); 2833 (unsigned long)objp);
2834 BUG(); 2834 BUG();
2835 } 2835 }
2836 } 2836 }
2837 2837
2838 static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) 2838 static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2839 { 2839 {
2840 unsigned long long redzone1, redzone2; 2840 unsigned long long redzone1, redzone2;
2841 2841
2842 redzone1 = *dbg_redzone1(cache, obj); 2842 redzone1 = *dbg_redzone1(cache, obj);
2843 redzone2 = *dbg_redzone2(cache, obj); 2843 redzone2 = *dbg_redzone2(cache, obj);
2844 2844
2845 /* 2845 /*
2846 * Redzone is ok. 2846 * Redzone is ok.
2847 */ 2847 */
2848 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE) 2848 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2849 return; 2849 return;
2850 2850
2851 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE) 2851 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2852 slab_error(cache, "double free detected"); 2852 slab_error(cache, "double free detected");
2853 else 2853 else
2854 slab_error(cache, "memory outside object was overwritten"); 2854 slab_error(cache, "memory outside object was overwritten");
2855 2855
2856 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n", 2856 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2857 obj, redzone1, redzone2); 2857 obj, redzone1, redzone2);
2858 } 2858 }
2859 2859
2860 static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, 2860 static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2861 void *caller) 2861 void *caller)
2862 { 2862 {
2863 struct page *page; 2863 struct page *page;
2864 unsigned int objnr; 2864 unsigned int objnr;
2865 struct slab *slabp; 2865 struct slab *slabp;
2866 2866
2867 objp -= obj_offset(cachep); 2867 objp -= obj_offset(cachep);
2868 kfree_debugcheck(objp); 2868 kfree_debugcheck(objp);
2869 page = virt_to_head_page(objp); 2869 page = virt_to_head_page(objp);
2870 2870
2871 slabp = page_get_slab(page); 2871 slabp = page_get_slab(page);
2872 2872
2873 if (cachep->flags & SLAB_RED_ZONE) { 2873 if (cachep->flags & SLAB_RED_ZONE) {
2874 verify_redzone_free(cachep, objp); 2874 verify_redzone_free(cachep, objp);
2875 *dbg_redzone1(cachep, objp) = RED_INACTIVE; 2875 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2876 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2876 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2877 } 2877 }
2878 if (cachep->flags & SLAB_STORE_USER) 2878 if (cachep->flags & SLAB_STORE_USER)
2879 *dbg_userword(cachep, objp) = caller; 2879 *dbg_userword(cachep, objp) = caller;
2880 2880
2881 objnr = obj_to_index(cachep, slabp, objp); 2881 objnr = obj_to_index(cachep, slabp, objp);
2882 2882
2883 BUG_ON(objnr >= cachep->num); 2883 BUG_ON(objnr >= cachep->num);
2884 BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); 2884 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2885 2885
2886 if (cachep->flags & SLAB_POISON && cachep->dtor) { 2886 if (cachep->flags & SLAB_POISON && cachep->dtor) {
2887 /* we want to cache poison the object, 2887 /* we want to cache poison the object,
2888 * call the destruction callback 2888 * call the destruction callback
2889 */ 2889 */
2890 cachep->dtor(objp + obj_offset(cachep), cachep, 0); 2890 cachep->dtor(objp + obj_offset(cachep), cachep, 0);
2891 } 2891 }
2892 #ifdef CONFIG_DEBUG_SLAB_LEAK 2892 #ifdef CONFIG_DEBUG_SLAB_LEAK
2893 slab_bufctl(slabp)[objnr] = BUFCTL_FREE; 2893 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2894 #endif 2894 #endif
2895 if (cachep->flags & SLAB_POISON) { 2895 if (cachep->flags & SLAB_POISON) {
2896 #ifdef CONFIG_DEBUG_PAGEALLOC 2896 #ifdef CONFIG_DEBUG_PAGEALLOC
2897 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { 2897 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2898 store_stackinfo(cachep, objp, (unsigned long)caller); 2898 store_stackinfo(cachep, objp, (unsigned long)caller);
2899 kernel_map_pages(virt_to_page(objp), 2899 kernel_map_pages(virt_to_page(objp),
2900 cachep->buffer_size / PAGE_SIZE, 0); 2900 cachep->buffer_size / PAGE_SIZE, 0);
2901 } else { 2901 } else {
2902 poison_obj(cachep, objp, POISON_FREE); 2902 poison_obj(cachep, objp, POISON_FREE);
2903 } 2903 }
2904 #else 2904 #else
2905 poison_obj(cachep, objp, POISON_FREE); 2905 poison_obj(cachep, objp, POISON_FREE);
2906 #endif 2906 #endif
2907 } 2907 }
2908 return objp; 2908 return objp;
2909 } 2909 }
2910 2910
2911 static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) 2911 static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2912 { 2912 {
2913 kmem_bufctl_t i; 2913 kmem_bufctl_t i;
2914 int entries = 0; 2914 int entries = 0;
2915 2915
2916 /* Check slab's freelist to see if this obj is there. */ 2916 /* Check slab's freelist to see if this obj is there. */
2917 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { 2917 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2918 entries++; 2918 entries++;
2919 if (entries > cachep->num || i >= cachep->num) 2919 if (entries > cachep->num || i >= cachep->num)
2920 goto bad; 2920 goto bad;
2921 } 2921 }
2922 if (entries != cachep->num - slabp->inuse) { 2922 if (entries != cachep->num - slabp->inuse) {
2923 bad: 2923 bad:
2924 printk(KERN_ERR "slab: Internal list corruption detected in " 2924 printk(KERN_ERR "slab: Internal list corruption detected in "
2925 "cache '%s'(%d), slabp %p(%d). Hexdump:\n", 2925 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2926 cachep->name, cachep->num, slabp, slabp->inuse); 2926 cachep->name, cachep->num, slabp, slabp->inuse);
2927 for (i = 0; 2927 for (i = 0;
2928 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); 2928 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2929 i++) { 2929 i++) {
2930 if (i % 16 == 0) 2930 if (i % 16 == 0)
2931 printk("\n%03x:", i); 2931 printk("\n%03x:", i);
2932 printk(" %02x", ((unsigned char *)slabp)[i]); 2932 printk(" %02x", ((unsigned char *)slabp)[i]);
2933 } 2933 }
2934 printk("\n"); 2934 printk("\n");
2935 BUG(); 2935 BUG();
2936 } 2936 }
2937 } 2937 }
2938 #else 2938 #else
2939 #define kfree_debugcheck(x) do { } while(0) 2939 #define kfree_debugcheck(x) do { } while(0)
2940 #define cache_free_debugcheck(x,objp,z) (objp) 2940 #define cache_free_debugcheck(x,objp,z) (objp)
2941 #define check_slabp(x,y) do { } while(0) 2941 #define check_slabp(x,y) do { } while(0)
2942 #endif 2942 #endif
2943 2943
2944 static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) 2944 static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2945 { 2945 {
2946 int batchcount; 2946 int batchcount;
2947 struct kmem_list3 *l3; 2947 struct kmem_list3 *l3;
2948 struct array_cache *ac; 2948 struct array_cache *ac;
2949 int node; 2949 int node;
2950 2950
2951 node = numa_node_id(); 2951 node = numa_node_id();
2952 2952
2953 check_irq_off(); 2953 check_irq_off();
2954 ac = cpu_cache_get(cachep); 2954 ac = cpu_cache_get(cachep);
2955 retry: 2955 retry:
2956 batchcount = ac->batchcount; 2956 batchcount = ac->batchcount;
2957 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 2957 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2958 /* 2958 /*
2959 * If there was little recent activity on this cache, then 2959 * If there was little recent activity on this cache, then
2960 * perform only a partial refill. Otherwise we could generate 2960 * perform only a partial refill. Otherwise we could generate
2961 * refill bouncing. 2961 * refill bouncing.
2962 */ 2962 */
2963 batchcount = BATCHREFILL_LIMIT; 2963 batchcount = BATCHREFILL_LIMIT;
2964 } 2964 }
2965 l3 = cachep->nodelists[node]; 2965 l3 = cachep->nodelists[node];
2966 2966
2967 BUG_ON(ac->avail > 0 || !l3); 2967 BUG_ON(ac->avail > 0 || !l3);
2968 spin_lock(&l3->list_lock); 2968 spin_lock(&l3->list_lock);
2969 2969
2970 /* See if we can refill from the shared array */ 2970 /* See if we can refill from the shared array */
2971 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) 2971 if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
2972 goto alloc_done; 2972 goto alloc_done;
2973 2973
2974 while (batchcount > 0) { 2974 while (batchcount > 0) {
2975 struct list_head *entry; 2975 struct list_head *entry;
2976 struct slab *slabp; 2976 struct slab *slabp;
2977 /* Get slab alloc is to come from. */ 2977 /* Get slab alloc is to come from. */
2978 entry = l3->slabs_partial.next; 2978 entry = l3->slabs_partial.next;
2979 if (entry == &l3->slabs_partial) { 2979 if (entry == &l3->slabs_partial) {
2980 l3->free_touched = 1; 2980 l3->free_touched = 1;
2981 entry = l3->slabs_free.next; 2981 entry = l3->slabs_free.next;
2982 if (entry == &l3->slabs_free) 2982 if (entry == &l3->slabs_free)
2983 goto must_grow; 2983 goto must_grow;
2984 } 2984 }
2985 2985
2986 slabp = list_entry(entry, struct slab, list); 2986 slabp = list_entry(entry, struct slab, list);
2987 check_slabp(cachep, slabp); 2987 check_slabp(cachep, slabp);
2988 check_spinlock_acquired(cachep); 2988 check_spinlock_acquired(cachep);
2989 2989
2990 /* 2990 /*
2991 * The slab was either on partial or free list so 2991 * The slab was either on partial or free list so
2992 * there must be at least one object available for 2992 * there must be at least one object available for
2993 * allocation. 2993 * allocation.
2994 */ 2994 */
2995 BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num); 2995 BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);
2996 2996
2997 while (slabp->inuse < cachep->num && batchcount--) { 2997 while (slabp->inuse < cachep->num && batchcount--) {
2998 STATS_INC_ALLOCED(cachep); 2998 STATS_INC_ALLOCED(cachep);
2999 STATS_INC_ACTIVE(cachep); 2999 STATS_INC_ACTIVE(cachep);
3000 STATS_SET_HIGH(cachep); 3000 STATS_SET_HIGH(cachep);
3001 3001
3002 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, 3002 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
3003 node); 3003 node);
3004 } 3004 }
3005 check_slabp(cachep, slabp); 3005 check_slabp(cachep, slabp);
3006 3006
3007 /* move slabp to correct slabp list: */ 3007 /* move slabp to correct slabp list: */
3008 list_del(&slabp->list); 3008 list_del(&slabp->list);
3009 if (slabp->free == BUFCTL_END) 3009 if (slabp->free == BUFCTL_END)
3010 list_add(&slabp->list, &l3->slabs_full); 3010 list_add(&slabp->list, &l3->slabs_full);
3011 else 3011 else
3012 list_add(&slabp->list, &l3->slabs_partial); 3012 list_add(&slabp->list, &l3->slabs_partial);
3013 } 3013 }
3014 3014
3015 must_grow: 3015 must_grow:
3016 l3->free_objects -= ac->avail; 3016 l3->free_objects -= ac->avail;
3017 alloc_done: 3017 alloc_done:
3018 spin_unlock(&l3->list_lock); 3018 spin_unlock(&l3->list_lock);
3019 3019
3020 if (unlikely(!ac->avail)) { 3020 if (unlikely(!ac->avail)) {
3021 int x; 3021 int x;
3022 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); 3022 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3023 3023
3024 /* cache_grow can reenable interrupts, then ac could change. */ 3024 /* cache_grow can reenable interrupts, then ac could change. */
3025 ac = cpu_cache_get(cachep); 3025 ac = cpu_cache_get(cachep);
3026 if (!x && ac->avail == 0) /* no objects in sight? abort */ 3026 if (!x && ac->avail == 0) /* no objects in sight? abort */
3027 return NULL; 3027 return NULL;
3028 3028
3029 if (!ac->avail) /* objects refilled by interrupt? */ 3029 if (!ac->avail) /* objects refilled by interrupt? */
3030 goto retry; 3030 goto retry;
3031 } 3031 }
3032 ac->touched = 1; 3032 ac->touched = 1;
3033 return ac->entry[--ac->avail]; 3033 return ac->entry[--ac->avail];
3034 } 3034 }
3035 3035
3036 static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, 3036 static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3037 gfp_t flags) 3037 gfp_t flags)
3038 { 3038 {
3039 might_sleep_if(flags & __GFP_WAIT); 3039 might_sleep_if(flags & __GFP_WAIT);
3040 #if DEBUG 3040 #if DEBUG
3041 kmem_flagcheck(cachep, flags); 3041 kmem_flagcheck(cachep, flags);
3042 #endif 3042 #endif
3043 } 3043 }
3044 3044
3045 #if DEBUG 3045 #if DEBUG
3046 static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, 3046 static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3047 gfp_t flags, void *objp, void *caller) 3047 gfp_t flags, void *objp, void *caller)
3048 { 3048 {
3049 if (!objp) 3049 if (!objp)
3050 return objp; 3050 return objp;
3051 if (cachep->flags & SLAB_POISON) { 3051 if (cachep->flags & SLAB_POISON) {
3052 #ifdef CONFIG_DEBUG_PAGEALLOC 3052 #ifdef CONFIG_DEBUG_PAGEALLOC
3053 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) 3053 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3054 kernel_map_pages(virt_to_page(objp), 3054 kernel_map_pages(virt_to_page(objp),
3055 cachep->buffer_size / PAGE_SIZE, 1); 3055 cachep->buffer_size / PAGE_SIZE, 1);
3056 else 3056 else
3057 check_poison_obj(cachep, objp); 3057 check_poison_obj(cachep, objp);
3058 #else 3058 #else
3059 check_poison_obj(cachep, objp); 3059 check_poison_obj(cachep, objp);
3060 #endif 3060 #endif
3061 poison_obj(cachep, objp, POISON_INUSE); 3061 poison_obj(cachep, objp, POISON_INUSE);
3062 } 3062 }
3063 if (cachep->flags & SLAB_STORE_USER) 3063 if (cachep->flags & SLAB_STORE_USER)
3064 *dbg_userword(cachep, objp) = caller; 3064 *dbg_userword(cachep, objp) = caller;
3065 3065
3066 if (cachep->flags & SLAB_RED_ZONE) { 3066 if (cachep->flags & SLAB_RED_ZONE) {
3067 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || 3067 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3068 *dbg_redzone2(cachep, objp) != RED_INACTIVE) { 3068 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3069 slab_error(cachep, "double free, or memory outside" 3069 slab_error(cachep, "double free, or memory outside"
3070 " object was overwritten"); 3070 " object was overwritten");
3071 printk(KERN_ERR 3071 printk(KERN_ERR
3072 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n", 3072 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3073 objp, *dbg_redzone1(cachep, objp), 3073 objp, *dbg_redzone1(cachep, objp),
3074 *dbg_redzone2(cachep, objp)); 3074 *dbg_redzone2(cachep, objp));
3075 } 3075 }
3076 *dbg_redzone1(cachep, objp) = RED_ACTIVE; 3076 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3077 *dbg_redzone2(cachep, objp) = RED_ACTIVE; 3077 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3078 } 3078 }
3079 #ifdef CONFIG_DEBUG_SLAB_LEAK 3079 #ifdef CONFIG_DEBUG_SLAB_LEAK
3080 { 3080 {
3081 struct slab *slabp; 3081 struct slab *slabp;
3082 unsigned objnr; 3082 unsigned objnr;
3083 3083
3084 slabp = page_get_slab(virt_to_head_page(objp)); 3084 slabp = page_get_slab(virt_to_head_page(objp));
3085 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; 3085 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3086 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; 3086 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3087 } 3087 }
3088 #endif 3088 #endif
3089 objp += obj_offset(cachep); 3089 objp += obj_offset(cachep);
3090 if (cachep->ctor && cachep->flags & SLAB_POISON) 3090 if (cachep->ctor && cachep->flags & SLAB_POISON)
3091 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR); 3091 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR);
3092 #if ARCH_SLAB_MINALIGN 3092 #if ARCH_SLAB_MINALIGN
3093 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { 3093 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3094 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", 3094 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3095 objp, ARCH_SLAB_MINALIGN); 3095 objp, ARCH_SLAB_MINALIGN);
3096 } 3096 }
3097 #endif 3097 #endif
3098 return objp; 3098 return objp;
3099 } 3099 }
3100 #else 3100 #else
3101 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) 3101 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3102 #endif 3102 #endif
3103 3103
3104 #ifdef CONFIG_FAILSLAB 3104 #ifdef CONFIG_FAILSLAB
3105 3105
3106 static struct failslab_attr { 3106 static struct failslab_attr {
3107 3107
3108 struct fault_attr attr; 3108 struct fault_attr attr;
3109 3109
3110 u32 ignore_gfp_wait; 3110 u32 ignore_gfp_wait;
3111 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 3111 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3112 struct dentry *ignore_gfp_wait_file; 3112 struct dentry *ignore_gfp_wait_file;
3113 #endif 3113 #endif
3114 3114
3115 } failslab = { 3115 } failslab = {
3116 .attr = FAULT_ATTR_INITIALIZER, 3116 .attr = FAULT_ATTR_INITIALIZER,
3117 .ignore_gfp_wait = 1, 3117 .ignore_gfp_wait = 1,
3118 }; 3118 };
3119 3119
3120 static int __init setup_failslab(char *str) 3120 static int __init setup_failslab(char *str)
3121 { 3121 {
3122 return setup_fault_attr(&failslab.attr, str); 3122 return setup_fault_attr(&failslab.attr, str);
3123 } 3123 }
3124 __setup("failslab=", setup_failslab); 3124 __setup("failslab=", setup_failslab);
3125 3125
3126 static int should_failslab(struct kmem_cache *cachep, gfp_t flags) 3126 static int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3127 { 3127 {
3128 if (cachep == &cache_cache) 3128 if (cachep == &cache_cache)
3129 return 0; 3129 return 0;
3130 if (flags & __GFP_NOFAIL) 3130 if (flags & __GFP_NOFAIL)
3131 return 0; 3131 return 0;
3132 if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT)) 3132 if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT))
3133 return 0; 3133 return 0;
3134 3134
3135 return should_fail(&failslab.attr, obj_size(cachep)); 3135 return should_fail(&failslab.attr, obj_size(cachep));
3136 } 3136 }
3137 3137
3138 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 3138 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3139 3139
3140 static int __init failslab_debugfs(void) 3140 static int __init failslab_debugfs(void)
3141 { 3141 {
3142 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 3142 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
3143 struct dentry *dir; 3143 struct dentry *dir;
3144 int err; 3144 int err;
3145 3145
3146 err = init_fault_attr_dentries(&failslab.attr, "failslab"); 3146 err = init_fault_attr_dentries(&failslab.attr, "failslab");
3147 if (err) 3147 if (err)
3148 return err; 3148 return err;
3149 dir = failslab.attr.dentries.dir; 3149 dir = failslab.attr.dentries.dir;
3150 3150
3151 failslab.ignore_gfp_wait_file = 3151 failslab.ignore_gfp_wait_file =
3152 debugfs_create_bool("ignore-gfp-wait", mode, dir, 3152 debugfs_create_bool("ignore-gfp-wait", mode, dir,
3153 &failslab.ignore_gfp_wait); 3153 &failslab.ignore_gfp_wait);
3154 3154
3155 if (!failslab.ignore_gfp_wait_file) { 3155 if (!failslab.ignore_gfp_wait_file) {
3156 err = -ENOMEM; 3156 err = -ENOMEM;
3157 debugfs_remove(failslab.ignore_gfp_wait_file); 3157 debugfs_remove(failslab.ignore_gfp_wait_file);
3158 cleanup_fault_attr_dentries(&failslab.attr); 3158 cleanup_fault_attr_dentries(&failslab.attr);
3159 } 3159 }
3160 3160
3161 return err; 3161 return err;
3162 } 3162 }
3163 3163
3164 late_initcall(failslab_debugfs); 3164 late_initcall(failslab_debugfs);
3165 3165
3166 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 3166 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
3167 3167
3168 #else /* CONFIG_FAILSLAB */ 3168 #else /* CONFIG_FAILSLAB */
3169 3169
3170 static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags) 3170 static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3171 { 3171 {
3172 return 0; 3172 return 0;
3173 } 3173 }
3174 3174
3175 #endif /* CONFIG_FAILSLAB */ 3175 #endif /* CONFIG_FAILSLAB */
3176 3176
3177 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3177 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3178 { 3178 {
3179 void *objp; 3179 void *objp;
3180 struct array_cache *ac; 3180 struct array_cache *ac;
3181 3181
3182 check_irq_off(); 3182 check_irq_off();
3183 3183
3184 ac = cpu_cache_get(cachep); 3184 ac = cpu_cache_get(cachep);
3185 if (likely(ac->avail)) { 3185 if (likely(ac->avail)) {
3186 STATS_INC_ALLOCHIT(cachep); 3186 STATS_INC_ALLOCHIT(cachep);
3187 ac->touched = 1; 3187 ac->touched = 1;
3188 objp = ac->entry[--ac->avail]; 3188 objp = ac->entry[--ac->avail];
3189 } else { 3189 } else {
3190 STATS_INC_ALLOCMISS(cachep); 3190 STATS_INC_ALLOCMISS(cachep);
3191 objp = cache_alloc_refill(cachep, flags); 3191 objp = cache_alloc_refill(cachep, flags);
3192 } 3192 }
3193 return objp; 3193 return objp;
3194 } 3194 }
3195 3195
3196 #ifdef CONFIG_NUMA 3196 #ifdef CONFIG_NUMA
3197 /* 3197 /*
3198 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. 3198 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
3199 * 3199 *
3200 * If we are in_interrupt, then process context, including cpusets and 3200 * If we are in_interrupt, then process context, including cpusets and
3201 * mempolicy, may not apply and should not be used for allocation policy. 3201 * mempolicy, may not apply and should not be used for allocation policy.
3202 */ 3202 */
3203 static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) 3203 static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3204 { 3204 {
3205 int nid_alloc, nid_here; 3205 int nid_alloc, nid_here;
3206 3206
3207 if (in_interrupt() || (flags & __GFP_THISNODE)) 3207 if (in_interrupt() || (flags & __GFP_THISNODE))
3208 return NULL; 3208 return NULL;
3209 nid_alloc = nid_here = numa_node_id(); 3209 nid_alloc = nid_here = numa_node_id();
3210 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3210 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3211 nid_alloc = cpuset_mem_spread_node(); 3211 nid_alloc = cpuset_mem_spread_node();
3212 else if (current->mempolicy) 3212 else if (current->mempolicy)
3213 nid_alloc = slab_node(current->mempolicy); 3213 nid_alloc = slab_node(current->mempolicy);
3214 if (nid_alloc != nid_here) 3214 if (nid_alloc != nid_here)
3215 return ____cache_alloc_node(cachep, flags, nid_alloc); 3215 return ____cache_alloc_node(cachep, flags, nid_alloc);
3216 return NULL; 3216 return NULL;
3217 } 3217 }
3218 3218
3219 /* 3219 /*
3220 * Fallback function if there was no memory available and no objects on a 3220 * Fallback function if there was no memory available and no objects on a
3221 * certain node and fall back is permitted. First we scan all the 3221 * certain node and fall back is permitted. First we scan all the
3222 * available nodelists for available objects. If that fails then we 3222 * available nodelists for available objects. If that fails then we
3223 * perform an allocation without specifying a node. This allows the page 3223 * perform an allocation without specifying a node. This allows the page
3224 * allocator to do its reclaim / fallback magic. We then insert the 3224 * allocator to do its reclaim / fallback magic. We then insert the
3225 * slab into the proper nodelist and then allocate from it. 3225 * slab into the proper nodelist and then allocate from it.
3226 */ 3226 */
3227 static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) 3227 static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3228 { 3228 {
3229 struct zonelist *zonelist; 3229 struct zonelist *zonelist;
3230 gfp_t local_flags; 3230 gfp_t local_flags;
3231 struct zone **z; 3231 struct zone **z;
3232 void *obj = NULL; 3232 void *obj = NULL;
3233 int nid; 3233 int nid;
3234 3234
3235 if (flags & __GFP_THISNODE) 3235 if (flags & __GFP_THISNODE)
3236 return NULL; 3236 return NULL;
3237 3237
3238 zonelist = &NODE_DATA(slab_node(current->mempolicy)) 3238 zonelist = &NODE_DATA(slab_node(current->mempolicy))
3239 ->node_zonelists[gfp_zone(flags)]; 3239 ->node_zonelists[gfp_zone(flags)];
3240 local_flags = (flags & GFP_LEVEL_MASK); 3240 local_flags = (flags & GFP_LEVEL_MASK);
3241 3241
3242 retry: 3242 retry:
3243 /* 3243 /*
3244 * Look through allowed nodes for objects available 3244 * Look through allowed nodes for objects available
3245 * from existing per node queues. 3245 * from existing per node queues.
3246 */ 3246 */
3247 for (z = zonelist->zones; *z && !obj; z++) { 3247 for (z = zonelist->zones; *z && !obj; z++) {
3248 nid = zone_to_nid(*z); 3248 nid = zone_to_nid(*z);
3249 3249
3250 if (cpuset_zone_allowed_hardwall(*z, flags) && 3250 if (cpuset_zone_allowed_hardwall(*z, flags) &&
3251 cache->nodelists[nid] && 3251 cache->nodelists[nid] &&
3252 cache->nodelists[nid]->free_objects) 3252 cache->nodelists[nid]->free_objects)
3253 obj = ____cache_alloc_node(cache, 3253 obj = ____cache_alloc_node(cache,
3254 flags | GFP_THISNODE, nid); 3254 flags | GFP_THISNODE, nid);
3255 } 3255 }
3256 3256
3257 if (!obj) { 3257 if (!obj) {
3258 /* 3258 /*
3259 * This allocation will be performed within the constraints 3259 * This allocation will be performed within the constraints
3260 * of the current cpuset / memory policy requirements. 3260 * of the current cpuset / memory policy requirements.
3261 * We may trigger various forms of reclaim on the allowed 3261 * We may trigger various forms of reclaim on the allowed
3262 * set and go into memory reserves if necessary. 3262 * set and go into memory reserves if necessary.
3263 */ 3263 */
3264 if (local_flags & __GFP_WAIT) 3264 if (local_flags & __GFP_WAIT)
3265 local_irq_enable(); 3265 local_irq_enable();
3266 kmem_flagcheck(cache, flags); 3266 kmem_flagcheck(cache, flags);
3267 obj = kmem_getpages(cache, flags, -1); 3267 obj = kmem_getpages(cache, flags, -1);
3268 if (local_flags & __GFP_WAIT) 3268 if (local_flags & __GFP_WAIT)
3269 local_irq_disable(); 3269 local_irq_disable();
3270 if (obj) { 3270 if (obj) {
3271 /* 3271 /*
3272 * Insert into the appropriate per node queues 3272 * Insert into the appropriate per node queues
3273 */ 3273 */
3274 nid = page_to_nid(virt_to_page(obj)); 3274 nid = page_to_nid(virt_to_page(obj));
3275 if (cache_grow(cache, flags, nid, obj)) { 3275 if (cache_grow(cache, flags, nid, obj)) {
3276 obj = ____cache_alloc_node(cache, 3276 obj = ____cache_alloc_node(cache,
3277 flags | GFP_THISNODE, nid); 3277 flags | GFP_THISNODE, nid);
3278 if (!obj) 3278 if (!obj)
3279 /* 3279 /*
3280 * Another processor may allocate the 3280 * Another processor may allocate the
3281 * objects in the slab since we are 3281 * objects in the slab since we are
3282 * not holding any locks. 3282 * not holding any locks.
3283 */ 3283 */
3284 goto retry; 3284 goto retry;
3285 } else { 3285 } else {
3286 /* cache_grow already freed obj */ 3286 /* cache_grow already freed obj */
3287 obj = NULL; 3287 obj = NULL;
3288 } 3288 }
3289 } 3289 }
3290 } 3290 }
3291 return obj; 3291 return obj;
3292 } 3292 }
3293 3293
3294 /* 3294 /*
3295 * A interface to enable slab creation on nodeid 3295 * A interface to enable slab creation on nodeid
3296 */ 3296 */
3297 static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 3297 static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3298 int nodeid) 3298 int nodeid)
3299 { 3299 {
3300 struct list_head *entry; 3300 struct list_head *entry;
3301 struct slab *slabp; 3301 struct slab *slabp;
3302 struct kmem_list3 *l3; 3302 struct kmem_list3 *l3;
3303 void *obj; 3303 void *obj;
3304 int x; 3304 int x;
3305 3305
3306 l3 = cachep->nodelists[nodeid]; 3306 l3 = cachep->nodelists[nodeid];
3307 BUG_ON(!l3); 3307 BUG_ON(!l3);
3308 3308
3309 retry: 3309 retry:
3310 check_irq_off(); 3310 check_irq_off();
3311 spin_lock(&l3->list_lock); 3311 spin_lock(&l3->list_lock);
3312 entry = l3->slabs_partial.next; 3312 entry = l3->slabs_partial.next;
3313 if (entry == &l3->slabs_partial) { 3313 if (entry == &l3->slabs_partial) {
3314 l3->free_touched = 1; 3314 l3->free_touched = 1;
3315 entry = l3->slabs_free.next; 3315 entry = l3->slabs_free.next;
3316 if (entry == &l3->slabs_free) 3316 if (entry == &l3->slabs_free)
3317 goto must_grow; 3317 goto must_grow;
3318 } 3318 }
3319 3319
3320 slabp = list_entry(entry, struct slab, list); 3320 slabp = list_entry(entry, struct slab, list);
3321 check_spinlock_acquired_node(cachep, nodeid); 3321 check_spinlock_acquired_node(cachep, nodeid);
3322 check_slabp(cachep, slabp); 3322 check_slabp(cachep, slabp);
3323 3323
3324 STATS_INC_NODEALLOCS(cachep); 3324 STATS_INC_NODEALLOCS(cachep);
3325 STATS_INC_ACTIVE(cachep); 3325 STATS_INC_ACTIVE(cachep);
3326 STATS_SET_HIGH(cachep); 3326 STATS_SET_HIGH(cachep);
3327 3327
3328 BUG_ON(slabp->inuse == cachep->num); 3328 BUG_ON(slabp->inuse == cachep->num);
3329 3329
3330 obj = slab_get_obj(cachep, slabp, nodeid); 3330 obj = slab_get_obj(cachep, slabp, nodeid);
3331 check_slabp(cachep, slabp); 3331 check_slabp(cachep, slabp);
3332 l3->free_objects--; 3332 l3->free_objects--;
3333 /* move slabp to correct slabp list: */ 3333 /* move slabp to correct slabp list: */
3334 list_del(&slabp->list); 3334 list_del(&slabp->list);
3335 3335
3336 if (slabp->free == BUFCTL_END) 3336 if (slabp->free == BUFCTL_END)
3337 list_add(&slabp->list, &l3->slabs_full); 3337 list_add(&slabp->list, &l3->slabs_full);
3338 else 3338 else
3339 list_add(&slabp->list, &l3->slabs_partial); 3339 list_add(&slabp->list, &l3->slabs_partial);
3340 3340
3341 spin_unlock(&l3->list_lock); 3341 spin_unlock(&l3->list_lock);
3342 goto done; 3342 goto done;
3343 3343
3344 must_grow: 3344 must_grow:
3345 spin_unlock(&l3->list_lock); 3345 spin_unlock(&l3->list_lock);
3346 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); 3346 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3347 if (x) 3347 if (x)
3348 goto retry; 3348 goto retry;
3349 3349
3350 return fallback_alloc(cachep, flags); 3350 return fallback_alloc(cachep, flags);
3351 3351
3352 done: 3352 done:
3353 return obj; 3353 return obj;
3354 } 3354 }
3355 3355
3356 /** 3356 /**
3357 * kmem_cache_alloc_node - Allocate an object on the specified node 3357 * kmem_cache_alloc_node - Allocate an object on the specified node
3358 * @cachep: The cache to allocate from. 3358 * @cachep: The cache to allocate from.
3359 * @flags: See kmalloc(). 3359 * @flags: See kmalloc().
3360 * @nodeid: node number of the target node. 3360 * @nodeid: node number of the target node.
3361 * @caller: return address of caller, used for debug information 3361 * @caller: return address of caller, used for debug information
3362 * 3362 *
3363 * Identical to kmem_cache_alloc but it will allocate memory on the given 3363 * Identical to kmem_cache_alloc but it will allocate memory on the given
3364 * node, which can improve the performance for cpu bound structures. 3364 * node, which can improve the performance for cpu bound structures.
3365 * 3365 *
3366 * Fallback to other node is possible if __GFP_THISNODE is not set. 3366 * Fallback to other node is possible if __GFP_THISNODE is not set.
3367 */ 3367 */
3368 static __always_inline void * 3368 static __always_inline void *
3369 __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, 3369 __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3370 void *caller) 3370 void *caller)
3371 { 3371 {
3372 unsigned long save_flags; 3372 unsigned long save_flags;
3373 void *ptr; 3373 void *ptr;
3374 3374
3375 if (should_failslab(cachep, flags)) 3375 if (should_failslab(cachep, flags))
3376 return NULL; 3376 return NULL;
3377 3377
3378 cache_alloc_debugcheck_before(cachep, flags); 3378 cache_alloc_debugcheck_before(cachep, flags);
3379 local_irq_save(save_flags); 3379 local_irq_save(save_flags);
3380 3380
3381 if (unlikely(nodeid == -1)) 3381 if (unlikely(nodeid == -1))
3382 nodeid = numa_node_id(); 3382 nodeid = numa_node_id();
3383 3383
3384 if (unlikely(!cachep->nodelists[nodeid])) { 3384 if (unlikely(!cachep->nodelists[nodeid])) {
3385 /* Node not bootstrapped yet */ 3385 /* Node not bootstrapped yet */
3386 ptr = fallback_alloc(cachep, flags); 3386 ptr = fallback_alloc(cachep, flags);
3387 goto out; 3387 goto out;
3388 } 3388 }
3389 3389
3390 if (nodeid == numa_node_id()) { 3390 if (nodeid == numa_node_id()) {
3391 /* 3391 /*
3392 * Use the locally cached objects if possible. 3392 * Use the locally cached objects if possible.
3393 * However ____cache_alloc does not allow fallback 3393 * However ____cache_alloc does not allow fallback
3394 * to other nodes. It may fail while we still have 3394 * to other nodes. It may fail while we still have
3395 * objects on other nodes available. 3395 * objects on other nodes available.
3396 */ 3396 */
3397 ptr = ____cache_alloc(cachep, flags); 3397 ptr = ____cache_alloc(cachep, flags);
3398 if (ptr) 3398 if (ptr)
3399 goto out; 3399 goto out;
3400 } 3400 }
3401 /* ___cache_alloc_node can fall back to other nodes */ 3401 /* ___cache_alloc_node can fall back to other nodes */
3402 ptr = ____cache_alloc_node(cachep, flags, nodeid); 3402 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3403 out: 3403 out:
3404 local_irq_restore(save_flags); 3404 local_irq_restore(save_flags);
3405 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); 3405 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3406 3406
3407 return ptr; 3407 return ptr;
3408 } 3408 }
3409 3409
3410 static __always_inline void * 3410 static __always_inline void *
3411 __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) 3411 __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3412 { 3412 {
3413 void *objp; 3413 void *objp;
3414 3414
3415 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { 3415 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3416 objp = alternate_node_alloc(cache, flags); 3416 objp = alternate_node_alloc(cache, flags);
3417 if (objp) 3417 if (objp)
3418 goto out; 3418 goto out;
3419 } 3419 }
3420 objp = ____cache_alloc(cache, flags); 3420 objp = ____cache_alloc(cache, flags);
3421 3421
3422 /* 3422 /*
3423 * We may just have run out of memory on the local node. 3423 * We may just have run out of memory on the local node.
3424 * ____cache_alloc_node() knows how to locate memory on other nodes 3424 * ____cache_alloc_node() knows how to locate memory on other nodes
3425 */ 3425 */
3426 if (!objp) 3426 if (!objp)
3427 objp = ____cache_alloc_node(cache, flags, numa_node_id()); 3427 objp = ____cache_alloc_node(cache, flags, numa_node_id());
3428 3428
3429 out: 3429 out:
3430 return objp; 3430 return objp;
3431 } 3431 }
3432 #else 3432 #else
3433 3433
3434 static __always_inline void * 3434 static __always_inline void *
3435 __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3435 __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3436 { 3436 {
3437 return ____cache_alloc(cachep, flags); 3437 return ____cache_alloc(cachep, flags);
3438 } 3438 }
3439 3439
3440 #endif /* CONFIG_NUMA */ 3440 #endif /* CONFIG_NUMA */
3441 3441
3442 static __always_inline void * 3442 static __always_inline void *
3443 __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) 3443 __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3444 { 3444 {
3445 unsigned long save_flags; 3445 unsigned long save_flags;
3446 void *objp; 3446 void *objp;
3447 3447
3448 if (should_failslab(cachep, flags)) 3448 if (should_failslab(cachep, flags))
3449 return NULL; 3449 return NULL;
3450 3450
3451 cache_alloc_debugcheck_before(cachep, flags); 3451 cache_alloc_debugcheck_before(cachep, flags);
3452 local_irq_save(save_flags); 3452 local_irq_save(save_flags);
3453 objp = __do_cache_alloc(cachep, flags); 3453 objp = __do_cache_alloc(cachep, flags);
3454 local_irq_restore(save_flags); 3454 local_irq_restore(save_flags);
3455 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); 3455 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3456 prefetchw(objp); 3456 prefetchw(objp);
3457 3457
3458 return objp; 3458 return objp;
3459 } 3459 }
3460 3460
3461 /* 3461 /*
3462 * Caller needs to acquire correct kmem_list's list_lock 3462 * Caller needs to acquire correct kmem_list's list_lock
3463 */ 3463 */
3464 static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, 3464 static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3465 int node) 3465 int node)
3466 { 3466 {
3467 int i; 3467 int i;
3468 struct kmem_list3 *l3; 3468 struct kmem_list3 *l3;
3469 3469
3470 for (i = 0; i < nr_objects; i++) { 3470 for (i = 0; i < nr_objects; i++) {
3471 void *objp = objpp[i]; 3471 void *objp = objpp[i];
3472 struct slab *slabp; 3472 struct slab *slabp;
3473 3473
3474 slabp = virt_to_slab(objp); 3474 slabp = virt_to_slab(objp);
3475 l3 = cachep->nodelists[node]; 3475 l3 = cachep->nodelists[node];
3476 list_del(&slabp->list); 3476 list_del(&slabp->list);
3477 check_spinlock_acquired_node(cachep, node); 3477 check_spinlock_acquired_node(cachep, node);
3478 check_slabp(cachep, slabp); 3478 check_slabp(cachep, slabp);
3479 slab_put_obj(cachep, slabp, objp, node); 3479 slab_put_obj(cachep, slabp, objp, node);
3480 STATS_DEC_ACTIVE(cachep); 3480 STATS_DEC_ACTIVE(cachep);
3481 l3->free_objects++; 3481 l3->free_objects++;
3482 check_slabp(cachep, slabp); 3482 check_slabp(cachep, slabp);
3483 3483
3484 /* fixup slab chains */ 3484 /* fixup slab chains */
3485 if (slabp->inuse == 0) { 3485 if (slabp->inuse == 0) {
3486 if (l3->free_objects > l3->free_limit) { 3486 if (l3->free_objects > l3->free_limit) {
3487 l3->free_objects -= cachep->num; 3487 l3->free_objects -= cachep->num;
3488 /* No need to drop any previously held 3488 /* No need to drop any previously held
3489 * lock here, even if we have a off-slab slab 3489 * lock here, even if we have a off-slab slab
3490 * descriptor it is guaranteed to come from 3490 * descriptor it is guaranteed to come from
3491 * a different cache, refer to comments before 3491 * a different cache, refer to comments before
3492 * alloc_slabmgmt. 3492 * alloc_slabmgmt.
3493 */ 3493 */
3494 slab_destroy(cachep, slabp); 3494 slab_destroy(cachep, slabp);
3495 } else { 3495 } else {
3496 list_add(&slabp->list, &l3->slabs_free); 3496 list_add(&slabp->list, &l3->slabs_free);
3497 } 3497 }
3498 } else { 3498 } else {
3499 /* Unconditionally move a slab to the end of the 3499 /* Unconditionally move a slab to the end of the
3500 * partial list on free - maximum time for the 3500 * partial list on free - maximum time for the
3501 * other objects to be freed, too. 3501 * other objects to be freed, too.
3502 */ 3502 */
3503 list_add_tail(&slabp->list, &l3->slabs_partial); 3503 list_add_tail(&slabp->list, &l3->slabs_partial);
3504 } 3504 }
3505 } 3505 }
3506 } 3506 }
3507 3507
3508 static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) 3508 static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3509 { 3509 {
3510 int batchcount; 3510 int batchcount;
3511 struct kmem_list3 *l3; 3511 struct kmem_list3 *l3;
3512 int node = numa_node_id(); 3512 int node = numa_node_id();
3513 3513
3514 batchcount = ac->batchcount; 3514 batchcount = ac->batchcount;
3515 #if DEBUG 3515 #if DEBUG
3516 BUG_ON(!batchcount || batchcount > ac->avail); 3516 BUG_ON(!batchcount || batchcount > ac->avail);
3517 #endif 3517 #endif
3518 check_irq_off(); 3518 check_irq_off();
3519 l3 = cachep->nodelists[node]; 3519 l3 = cachep->nodelists[node];
3520 spin_lock(&l3->list_lock); 3520 spin_lock(&l3->list_lock);
3521 if (l3->shared) { 3521 if (l3->shared) {
3522 struct array_cache *shared_array = l3->shared; 3522 struct array_cache *shared_array = l3->shared;
3523 int max = shared_array->limit - shared_array->avail; 3523 int max = shared_array->limit - shared_array->avail;
3524 if (max) { 3524 if (max) {
3525 if (batchcount > max) 3525 if (batchcount > max)
3526 batchcount = max; 3526 batchcount = max;
3527 memcpy(&(shared_array->entry[shared_array->avail]), 3527 memcpy(&(shared_array->entry[shared_array->avail]),
3528 ac->entry, sizeof(void *) * batchcount); 3528 ac->entry, sizeof(void *) * batchcount);
3529 shared_array->avail += batchcount; 3529 shared_array->avail += batchcount;
3530 goto free_done; 3530 goto free_done;
3531 } 3531 }
3532 } 3532 }
3533 3533
3534 free_block(cachep, ac->entry, batchcount, node); 3534 free_block(cachep, ac->entry, batchcount, node);
3535 free_done: 3535 free_done:
3536 #if STATS 3536 #if STATS
3537 { 3537 {
3538 int i = 0; 3538 int i = 0;
3539 struct list_head *p; 3539 struct list_head *p;
3540 3540
3541 p = l3->slabs_free.next; 3541 p = l3->slabs_free.next;
3542 while (p != &(l3->slabs_free)) { 3542 while (p != &(l3->slabs_free)) {
3543 struct slab *slabp; 3543 struct slab *slabp;
3544 3544
3545 slabp = list_entry(p, struct slab, list); 3545 slabp = list_entry(p, struct slab, list);
3546 BUG_ON(slabp->inuse); 3546 BUG_ON(slabp->inuse);
3547 3547
3548 i++; 3548 i++;
3549 p = p->next; 3549 p = p->next;
3550 } 3550 }
3551 STATS_SET_FREEABLE(cachep, i); 3551 STATS_SET_FREEABLE(cachep, i);
3552 } 3552 }
3553 #endif 3553 #endif
3554 spin_unlock(&l3->list_lock); 3554 spin_unlock(&l3->list_lock);
3555 ac->avail -= batchcount; 3555 ac->avail -= batchcount;
3556 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); 3556 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3557 } 3557 }
3558 3558
3559 /* 3559 /*
3560 * Release an obj back to its cache. If the obj has a constructed state, it must 3560 * Release an obj back to its cache. If the obj has a constructed state, it must
3561 * be in this state _before_ it is released. Called with disabled ints. 3561 * be in this state _before_ it is released. Called with disabled ints.
3562 */ 3562 */
3563 static inline void __cache_free(struct kmem_cache *cachep, void *objp) 3563 static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3564 { 3564 {
3565 struct array_cache *ac = cpu_cache_get(cachep); 3565 struct array_cache *ac = cpu_cache_get(cachep);
3566 3566
3567 check_irq_off(); 3567 check_irq_off();
3568 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3568 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3569 3569
3570 if (use_alien_caches && cache_free_alien(cachep, objp)) 3570 if (use_alien_caches && cache_free_alien(cachep, objp))
3571 return; 3571 return;
3572 3572
3573 if (likely(ac->avail < ac->limit)) { 3573 if (likely(ac->avail < ac->limit)) {
3574 STATS_INC_FREEHIT(cachep); 3574 STATS_INC_FREEHIT(cachep);
3575 ac->entry[ac->avail++] = objp; 3575 ac->entry[ac->avail++] = objp;
3576 return; 3576 return;
3577 } else { 3577 } else {
3578 STATS_INC_FREEMISS(cachep); 3578 STATS_INC_FREEMISS(cachep);
3579 cache_flusharray(cachep, ac); 3579 cache_flusharray(cachep, ac);
3580 ac->entry[ac->avail++] = objp; 3580 ac->entry[ac->avail++] = objp;
3581 } 3581 }
3582 } 3582 }
3583 3583
3584 /** 3584 /**
3585 * kmem_cache_alloc - Allocate an object 3585 * kmem_cache_alloc - Allocate an object
3586 * @cachep: The cache to allocate from. 3586 * @cachep: The cache to allocate from.
3587 * @flags: See kmalloc(). 3587 * @flags: See kmalloc().
3588 * 3588 *
3589 * Allocate an object from this cache. The flags are only relevant 3589 * Allocate an object from this cache. The flags are only relevant
3590 * if the cache has no available objects. 3590 * if the cache has no available objects.
3591 */ 3591 */
3592 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3592 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3593 { 3593 {
3594 return __cache_alloc(cachep, flags, __builtin_return_address(0)); 3594 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3595 } 3595 }
3596 EXPORT_SYMBOL(kmem_cache_alloc); 3596 EXPORT_SYMBOL(kmem_cache_alloc);
3597 3597
3598 /** 3598 /**
3599 * kmem_cache_zalloc - Allocate an object. The memory is set to zero. 3599 * kmem_cache_zalloc - Allocate an object. The memory is set to zero.
3600 * @cache: The cache to allocate from. 3600 * @cache: The cache to allocate from.
3601 * @flags: See kmalloc(). 3601 * @flags: See kmalloc().
3602 * 3602 *
3603 * Allocate an object from this cache and set the allocated memory to zero. 3603 * Allocate an object from this cache and set the allocated memory to zero.
3604 * The flags are only relevant if the cache has no available objects. 3604 * The flags are only relevant if the cache has no available objects.
3605 */ 3605 */
3606 void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags) 3606 void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags)
3607 { 3607 {
3608 void *ret = __cache_alloc(cache, flags, __builtin_return_address(0)); 3608 void *ret = __cache_alloc(cache, flags, __builtin_return_address(0));
3609 if (ret) 3609 if (ret)
3610 memset(ret, 0, obj_size(cache)); 3610 memset(ret, 0, obj_size(cache));
3611 return ret; 3611 return ret;
3612 } 3612 }
3613 EXPORT_SYMBOL(kmem_cache_zalloc); 3613 EXPORT_SYMBOL(kmem_cache_zalloc);
3614 3614
3615 /** 3615 /**
3616 * kmem_ptr_validate - check if an untrusted pointer might 3616 * kmem_ptr_validate - check if an untrusted pointer might
3617 * be a slab entry. 3617 * be a slab entry.
3618 * @cachep: the cache we're checking against 3618 * @cachep: the cache we're checking against
3619 * @ptr: pointer to validate 3619 * @ptr: pointer to validate
3620 * 3620 *
3621 * This verifies that the untrusted pointer looks sane: 3621 * This verifies that the untrusted pointer looks sane:
3622 * it is _not_ a guarantee that the pointer is actually 3622 * it is _not_ a guarantee that the pointer is actually
3623 * part of the slab cache in question, but it at least 3623 * part of the slab cache in question, but it at least
3624 * validates that the pointer can be dereferenced and 3624 * validates that the pointer can be dereferenced and
3625 * looks half-way sane. 3625 * looks half-way sane.
3626 * 3626 *
3627 * Currently only used for dentry validation. 3627 * Currently only used for dentry validation.
3628 */ 3628 */
3629 int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) 3629 int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3630 { 3630 {
3631 unsigned long addr = (unsigned long)ptr; 3631 unsigned long addr = (unsigned long)ptr;
3632 unsigned long min_addr = PAGE_OFFSET; 3632 unsigned long min_addr = PAGE_OFFSET;
3633 unsigned long align_mask = BYTES_PER_WORD - 1; 3633 unsigned long align_mask = BYTES_PER_WORD - 1;
3634 unsigned long size = cachep->buffer_size; 3634 unsigned long size = cachep->buffer_size;
3635 struct page *page; 3635 struct page *page;
3636 3636
3637 if (unlikely(addr < min_addr)) 3637 if (unlikely(addr < min_addr))
3638 goto out; 3638 goto out;
3639 if (unlikely(addr > (unsigned long)high_memory - size)) 3639 if (unlikely(addr > (unsigned long)high_memory - size))
3640 goto out; 3640 goto out;
3641 if (unlikely(addr & align_mask)) 3641 if (unlikely(addr & align_mask))
3642 goto out; 3642 goto out;
3643 if (unlikely(!kern_addr_valid(addr))) 3643 if (unlikely(!kern_addr_valid(addr)))
3644 goto out; 3644 goto out;
3645 if (unlikely(!kern_addr_valid(addr + size - 1))) 3645 if (unlikely(!kern_addr_valid(addr + size - 1)))
3646 goto out; 3646 goto out;
3647 page = virt_to_page(ptr); 3647 page = virt_to_page(ptr);
3648 if (unlikely(!PageSlab(page))) 3648 if (unlikely(!PageSlab(page)))
3649 goto out; 3649 goto out;
3650 if (unlikely(page_get_cache(page) != cachep)) 3650 if (unlikely(page_get_cache(page) != cachep))
3651 goto out; 3651 goto out;
3652 return 1; 3652 return 1;
3653 out: 3653 out:
3654 return 0; 3654 return 0;
3655 } 3655 }
3656 3656
3657 #ifdef CONFIG_NUMA 3657 #ifdef CONFIG_NUMA
3658 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3658 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3659 { 3659 {
3660 return __cache_alloc_node(cachep, flags, nodeid, 3660 return __cache_alloc_node(cachep, flags, nodeid,
3661 __builtin_return_address(0)); 3661 __builtin_return_address(0));
3662 } 3662 }
3663 EXPORT_SYMBOL(kmem_cache_alloc_node); 3663 EXPORT_SYMBOL(kmem_cache_alloc_node);
3664 3664
3665 static __always_inline void * 3665 static __always_inline void *
3666 __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) 3666 __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3667 { 3667 {
3668 struct kmem_cache *cachep; 3668 struct kmem_cache *cachep;
3669 3669
3670 cachep = kmem_find_general_cachep(size, flags); 3670 cachep = kmem_find_general_cachep(size, flags);
3671 if (unlikely(cachep == NULL)) 3671 if (unlikely(cachep == NULL))
3672 return NULL; 3672 return NULL;
3673 return kmem_cache_alloc_node(cachep, flags, node); 3673 return kmem_cache_alloc_node(cachep, flags, node);
3674 } 3674 }
3675 3675
3676 #ifdef CONFIG_DEBUG_SLAB 3676 #ifdef CONFIG_DEBUG_SLAB
3677 void *__kmalloc_node(size_t size, gfp_t flags, int node) 3677 void *__kmalloc_node(size_t size, gfp_t flags, int node)
3678 { 3678 {
3679 return __do_kmalloc_node(size, flags, node, 3679 return __do_kmalloc_node(size, flags, node,
3680 __builtin_return_address(0)); 3680 __builtin_return_address(0));
3681 } 3681 }
3682 EXPORT_SYMBOL(__kmalloc_node); 3682 EXPORT_SYMBOL(__kmalloc_node);
3683 3683
3684 void *__kmalloc_node_track_caller(size_t size, gfp_t flags, 3684 void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3685 int node, void *caller) 3685 int node, void *caller)
3686 { 3686 {
3687 return __do_kmalloc_node(size, flags, node, caller); 3687 return __do_kmalloc_node(size, flags, node, caller);
3688 } 3688 }
3689 EXPORT_SYMBOL(__kmalloc_node_track_caller); 3689 EXPORT_SYMBOL(__kmalloc_node_track_caller);
3690 #else 3690 #else
3691 void *__kmalloc_node(size_t size, gfp_t flags, int node) 3691 void *__kmalloc_node(size_t size, gfp_t flags, int node)
3692 { 3692 {
3693 return __do_kmalloc_node(size, flags, node, NULL); 3693 return __do_kmalloc_node(size, flags, node, NULL);
3694 } 3694 }
3695 EXPORT_SYMBOL(__kmalloc_node); 3695 EXPORT_SYMBOL(__kmalloc_node);
3696 #endif /* CONFIG_DEBUG_SLAB */ 3696 #endif /* CONFIG_DEBUG_SLAB */
3697 #endif /* CONFIG_NUMA */ 3697 #endif /* CONFIG_NUMA */
3698 3698
3699 /** 3699 /**
3700 * __do_kmalloc - allocate memory 3700 * __do_kmalloc - allocate memory
3701 * @size: how many bytes of memory are required. 3701 * @size: how many bytes of memory are required.
3702 * @flags: the type of memory to allocate (see kmalloc). 3702 * @flags: the type of memory to allocate (see kmalloc).
3703 * @caller: function caller for debug tracking of the caller 3703 * @caller: function caller for debug tracking of the caller
3704 */ 3704 */
3705 static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, 3705 static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3706 void *caller) 3706 void *caller)
3707 { 3707 {
3708 struct kmem_cache *cachep; 3708 struct kmem_cache *cachep;
3709 3709
3710 /* If you want to save a few bytes .text space: replace 3710 /* If you want to save a few bytes .text space: replace
3711 * __ with kmem_. 3711 * __ with kmem_.
3712 * Then kmalloc uses the uninlined functions instead of the inline 3712 * Then kmalloc uses the uninlined functions instead of the inline
3713 * functions. 3713 * functions.
3714 */ 3714 */
3715 cachep = __find_general_cachep(size, flags); 3715 cachep = __find_general_cachep(size, flags);
3716 if (unlikely(cachep == NULL)) 3716 if (unlikely(cachep == NULL))
3717 return NULL; 3717 return NULL;
3718 return __cache_alloc(cachep, flags, caller); 3718 return __cache_alloc(cachep, flags, caller);
3719 } 3719 }
3720 3720
3721 3721
3722 #ifdef CONFIG_DEBUG_SLAB 3722 #ifdef CONFIG_DEBUG_SLAB
3723 void *__kmalloc(size_t size, gfp_t flags) 3723 void *__kmalloc(size_t size, gfp_t flags)
3724 { 3724 {
3725 return __do_kmalloc(size, flags, __builtin_return_address(0)); 3725 return __do_kmalloc(size, flags, __builtin_return_address(0));
3726 } 3726 }
3727 EXPORT_SYMBOL(__kmalloc); 3727 EXPORT_SYMBOL(__kmalloc);
3728 3728
3729 void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) 3729 void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
3730 { 3730 {
3731 return __do_kmalloc(size, flags, caller); 3731 return __do_kmalloc(size, flags, caller);
3732 } 3732 }
3733 EXPORT_SYMBOL(__kmalloc_track_caller); 3733 EXPORT_SYMBOL(__kmalloc_track_caller);
3734 3734
3735 #else 3735 #else
3736 void *__kmalloc(size_t size, gfp_t flags) 3736 void *__kmalloc(size_t size, gfp_t flags)
3737 { 3737 {
3738 return __do_kmalloc(size, flags, NULL); 3738 return __do_kmalloc(size, flags, NULL);
3739 } 3739 }
3740 EXPORT_SYMBOL(__kmalloc); 3740 EXPORT_SYMBOL(__kmalloc);
3741 #endif 3741 #endif
3742 3742
3743 /** 3743 /**
3744 * krealloc - reallocate memory. The contents will remain unchanged. 3744 * krealloc - reallocate memory. The contents will remain unchanged.
3745 * 3745 *
3746 * @p: object to reallocate memory for. 3746 * @p: object to reallocate memory for.
3747 * @new_size: how many bytes of memory are required. 3747 * @new_size: how many bytes of memory are required.
3748 * @flags: the type of memory to allocate. 3748 * @flags: the type of memory to allocate.
3749 * 3749 *
3750 * The contents of the object pointed to are preserved up to the 3750 * The contents of the object pointed to are preserved up to the
3751 * lesser of the new and old sizes. If @p is %NULL, krealloc() 3751 * lesser of the new and old sizes. If @p is %NULL, krealloc()
3752 * behaves exactly like kmalloc(). If @size is 0 and @p is not a 3752 * behaves exactly like kmalloc(). If @size is 0 and @p is not a
3753 * %NULL pointer, the object pointed to is freed. 3753 * %NULL pointer, the object pointed to is freed.
3754 */ 3754 */
3755 void *krealloc(const void *p, size_t new_size, gfp_t flags) 3755 void *krealloc(const void *p, size_t new_size, gfp_t flags)
3756 { 3756 {
3757 struct kmem_cache *cache, *new_cache; 3757 struct kmem_cache *cache, *new_cache;
3758 void *ret; 3758 void *ret;
3759 3759
3760 if (unlikely(!p)) 3760 if (unlikely(!p))
3761 return kmalloc_track_caller(new_size, flags); 3761 return kmalloc_track_caller(new_size, flags);
3762 3762
3763 if (unlikely(!new_size)) { 3763 if (unlikely(!new_size)) {
3764 kfree(p); 3764 kfree(p);
3765 return NULL; 3765 return NULL;
3766 } 3766 }
3767 3767
3768 cache = virt_to_cache(p); 3768 cache = virt_to_cache(p);
3769 new_cache = __find_general_cachep(new_size, flags); 3769 new_cache = __find_general_cachep(new_size, flags);
3770 3770
3771 /* 3771 /*
3772 * If new size fits in the current cache, bail out. 3772 * If new size fits in the current cache, bail out.
3773 */ 3773 */
3774 if (likely(cache == new_cache)) 3774 if (likely(cache == new_cache))
3775 return (void *)p; 3775 return (void *)p;
3776 3776
3777 /* 3777 /*
3778 * We are on the slow-path here so do not use __cache_alloc 3778 * We are on the slow-path here so do not use __cache_alloc
3779 * because it bloats kernel text. 3779 * because it bloats kernel text.
3780 */ 3780 */
3781 ret = kmalloc_track_caller(new_size, flags); 3781 ret = kmalloc_track_caller(new_size, flags);
3782 if (ret) { 3782 if (ret) {
3783 memcpy(ret, p, min(new_size, ksize(p))); 3783 memcpy(ret, p, min(new_size, ksize(p)));
3784 kfree(p); 3784 kfree(p);
3785 } 3785 }
3786 return ret; 3786 return ret;
3787 } 3787 }
3788 EXPORT_SYMBOL(krealloc); 3788 EXPORT_SYMBOL(krealloc);
3789 3789
3790 /** 3790 /**
3791 * kmem_cache_free - Deallocate an object 3791 * kmem_cache_free - Deallocate an object
3792 * @cachep: The cache the allocation was from. 3792 * @cachep: The cache the allocation was from.
3793 * @objp: The previously allocated object. 3793 * @objp: The previously allocated object.
3794 * 3794 *
3795 * Free an object which was previously allocated from this 3795 * Free an object which was previously allocated from this
3796 * cache. 3796 * cache.
3797 */ 3797 */
3798 void kmem_cache_free(struct kmem_cache *cachep, void *objp) 3798 void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3799 { 3799 {
3800 unsigned long flags; 3800 unsigned long flags;
3801 3801
3802 BUG_ON(virt_to_cache(objp) != cachep); 3802 BUG_ON(virt_to_cache(objp) != cachep);
3803 3803
3804 local_irq_save(flags); 3804 local_irq_save(flags);
3805 debug_check_no_locks_freed(objp, obj_size(cachep)); 3805 debug_check_no_locks_freed(objp, obj_size(cachep));
3806 __cache_free(cachep, objp); 3806 __cache_free(cachep, objp);
3807 local_irq_restore(flags); 3807 local_irq_restore(flags);
3808 } 3808 }
3809 EXPORT_SYMBOL(kmem_cache_free); 3809 EXPORT_SYMBOL(kmem_cache_free);
3810 3810
3811 /** 3811 /**
3812 * kfree - free previously allocated memory 3812 * kfree - free previously allocated memory
3813 * @objp: pointer returned by kmalloc. 3813 * @objp: pointer returned by kmalloc.
3814 * 3814 *
3815 * If @objp is NULL, no operation is performed. 3815 * If @objp is NULL, no operation is performed.
3816 * 3816 *
3817 * Don't free memory not originally allocated by kmalloc() 3817 * Don't free memory not originally allocated by kmalloc()
3818 * or you will run into trouble. 3818 * or you will run into trouble.
3819 */ 3819 */
3820 void kfree(const void *objp) 3820 void kfree(const void *objp)
3821 { 3821 {
3822 struct kmem_cache *c; 3822 struct kmem_cache *c;
3823 unsigned long flags; 3823 unsigned long flags;
3824 3824
3825 if (unlikely(!objp)) 3825 if (unlikely(!objp))
3826 return; 3826 return;
3827 local_irq_save(flags); 3827 local_irq_save(flags);
3828 kfree_debugcheck(objp); 3828 kfree_debugcheck(objp);
3829 c = virt_to_cache(objp); 3829 c = virt_to_cache(objp);
3830 debug_check_no_locks_freed(objp, obj_size(c)); 3830 debug_check_no_locks_freed(objp, obj_size(c));
3831 __cache_free(c, (void *)objp); 3831 __cache_free(c, (void *)objp);
3832 local_irq_restore(flags); 3832 local_irq_restore(flags);
3833 } 3833 }
3834 EXPORT_SYMBOL(kfree); 3834 EXPORT_SYMBOL(kfree);
3835 3835
3836 unsigned int kmem_cache_size(struct kmem_cache *cachep) 3836 unsigned int kmem_cache_size(struct kmem_cache *cachep)
3837 { 3837 {
3838 return obj_size(cachep); 3838 return obj_size(cachep);
3839 } 3839 }
3840 EXPORT_SYMBOL(kmem_cache_size); 3840 EXPORT_SYMBOL(kmem_cache_size);
3841 3841
3842 const char *kmem_cache_name(struct kmem_cache *cachep) 3842 const char *kmem_cache_name(struct kmem_cache *cachep)
3843 { 3843 {
3844 return cachep->name; 3844 return cachep->name;
3845 } 3845 }
3846 EXPORT_SYMBOL_GPL(kmem_cache_name); 3846 EXPORT_SYMBOL_GPL(kmem_cache_name);
3847 3847
3848 /* 3848 /*
3849 * This initializes kmem_list3 or resizes varioius caches for all nodes. 3849 * This initializes kmem_list3 or resizes varioius caches for all nodes.
3850 */ 3850 */
3851 static int alloc_kmemlist(struct kmem_cache *cachep) 3851 static int alloc_kmemlist(struct kmem_cache *cachep)
3852 { 3852 {
3853 int node; 3853 int node;
3854 struct kmem_list3 *l3; 3854 struct kmem_list3 *l3;
3855 struct array_cache *new_shared; 3855 struct array_cache *new_shared;
3856 struct array_cache **new_alien = NULL; 3856 struct array_cache **new_alien = NULL;
3857 3857
3858 for_each_online_node(node) { 3858 for_each_online_node(node) {
3859 3859
3860 if (use_alien_caches) { 3860 if (use_alien_caches) {
3861 new_alien = alloc_alien_cache(node, cachep->limit); 3861 new_alien = alloc_alien_cache(node, cachep->limit);
3862 if (!new_alien) 3862 if (!new_alien)
3863 goto fail; 3863 goto fail;
3864 } 3864 }
3865 3865
3866 new_shared = NULL; 3866 new_shared = NULL;
3867 if (cachep->shared) { 3867 if (cachep->shared) {
3868 new_shared = alloc_arraycache(node, 3868 new_shared = alloc_arraycache(node,
3869 cachep->shared*cachep->batchcount, 3869 cachep->shared*cachep->batchcount,
3870 0xbaadf00d); 3870 0xbaadf00d);
3871 if (!new_shared) { 3871 if (!new_shared) {
3872 free_alien_cache(new_alien); 3872 free_alien_cache(new_alien);
3873 goto fail; 3873 goto fail;
3874 } 3874 }
3875 } 3875 }
3876 3876
3877 l3 = cachep->nodelists[node]; 3877 l3 = cachep->nodelists[node];
3878 if (l3) { 3878 if (l3) {
3879 struct array_cache *shared = l3->shared; 3879 struct array_cache *shared = l3->shared;
3880 3880
3881 spin_lock_irq(&l3->list_lock); 3881 spin_lock_irq(&l3->list_lock);
3882 3882
3883 if (shared) 3883 if (shared)
3884 free_block(cachep, shared->entry, 3884 free_block(cachep, shared->entry,
3885 shared->avail, node); 3885 shared->avail, node);
3886 3886
3887 l3->shared = new_shared; 3887 l3->shared = new_shared;
3888 if (!l3->alien) { 3888 if (!l3->alien) {
3889 l3->alien = new_alien; 3889 l3->alien = new_alien;
3890 new_alien = NULL; 3890 new_alien = NULL;
3891 } 3891 }
3892 l3->free_limit = (1 + nr_cpus_node(node)) * 3892 l3->free_limit = (1 + nr_cpus_node(node)) *
3893 cachep->batchcount + cachep->num; 3893 cachep->batchcount + cachep->num;
3894 spin_unlock_irq(&l3->list_lock); 3894 spin_unlock_irq(&l3->list_lock);
3895 kfree(shared); 3895 kfree(shared);
3896 free_alien_cache(new_alien); 3896 free_alien_cache(new_alien);
3897 continue; 3897 continue;
3898 } 3898 }
3899 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); 3899 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
3900 if (!l3) { 3900 if (!l3) {
3901 free_alien_cache(new_alien); 3901 free_alien_cache(new_alien);
3902 kfree(new_shared); 3902 kfree(new_shared);
3903 goto fail; 3903 goto fail;
3904 } 3904 }
3905 3905
3906 kmem_list3_init(l3); 3906 kmem_list3_init(l3);
3907 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 3907 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3908 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 3908 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3909 l3->shared = new_shared; 3909 l3->shared = new_shared;
3910 l3->alien = new_alien; 3910 l3->alien = new_alien;
3911 l3->free_limit = (1 + nr_cpus_node(node)) * 3911 l3->free_limit = (1 + nr_cpus_node(node)) *
3912 cachep->batchcount + cachep->num; 3912 cachep->batchcount + cachep->num;
3913 cachep->nodelists[node] = l3; 3913 cachep->nodelists[node] = l3;
3914 } 3914 }
3915 return 0; 3915 return 0;
3916 3916
3917 fail: 3917 fail:
3918 if (!cachep->next.next) { 3918 if (!cachep->next.next) {
3919 /* Cache is not active yet. Roll back what we did */ 3919 /* Cache is not active yet. Roll back what we did */
3920 node--; 3920 node--;
3921 while (node >= 0) { 3921 while (node >= 0) {
3922 if (cachep->nodelists[node]) { 3922 if (cachep->nodelists[node]) {
3923 l3 = cachep->nodelists[node]; 3923 l3 = cachep->nodelists[node];
3924 3924
3925 kfree(l3->shared); 3925 kfree(l3->shared);
3926 free_alien_cache(l3->alien); 3926 free_alien_cache(l3->alien);
3927 kfree(l3); 3927 kfree(l3);
3928 cachep->nodelists[node] = NULL; 3928 cachep->nodelists[node] = NULL;
3929 } 3929 }
3930 node--; 3930 node--;
3931 } 3931 }
3932 } 3932 }
3933 return -ENOMEM; 3933 return -ENOMEM;
3934 } 3934 }
3935 3935
3936 struct ccupdate_struct { 3936 struct ccupdate_struct {
3937 struct kmem_cache *cachep; 3937 struct kmem_cache *cachep;
3938 struct array_cache *new[NR_CPUS]; 3938 struct array_cache *new[NR_CPUS];
3939 }; 3939 };
3940 3940
3941 static void do_ccupdate_local(void *info) 3941 static void do_ccupdate_local(void *info)
3942 { 3942 {
3943 struct ccupdate_struct *new = info; 3943 struct ccupdate_struct *new = info;
3944 struct array_cache *old; 3944 struct array_cache *old;
3945 3945
3946 check_irq_off(); 3946 check_irq_off();
3947 old = cpu_cache_get(new->cachep); 3947 old = cpu_cache_get(new->cachep);
3948 3948
3949 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; 3949 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3950 new->new[smp_processor_id()] = old; 3950 new->new[smp_processor_id()] = old;
3951 } 3951 }
3952 3952
3953 /* Always called with the cache_chain_mutex held */ 3953 /* Always called with the cache_chain_mutex held */
3954 static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3954 static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3955 int batchcount, int shared) 3955 int batchcount, int shared)
3956 { 3956 {
3957 struct ccupdate_struct *new; 3957 struct ccupdate_struct *new;
3958 int i; 3958 int i;
3959 3959
3960 new = kzalloc(sizeof(*new), GFP_KERNEL); 3960 new = kzalloc(sizeof(*new), GFP_KERNEL);
3961 if (!new) 3961 if (!new)
3962 return -ENOMEM; 3962 return -ENOMEM;
3963 3963
3964 for_each_online_cpu(i) { 3964 for_each_online_cpu(i) {
3965 new->new[i] = alloc_arraycache(cpu_to_node(i), limit, 3965 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3966 batchcount); 3966 batchcount);
3967 if (!new->new[i]) { 3967 if (!new->new[i]) {
3968 for (i--; i >= 0; i--) 3968 for (i--; i >= 0; i--)
3969 kfree(new->new[i]); 3969 kfree(new->new[i]);
3970 kfree(new); 3970 kfree(new);
3971 return -ENOMEM; 3971 return -ENOMEM;
3972 } 3972 }
3973 } 3973 }
3974 new->cachep = cachep; 3974 new->cachep = cachep;
3975 3975
3976 on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); 3976 on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
3977 3977
3978 check_irq_on(); 3978 check_irq_on();
3979 cachep->batchcount = batchcount; 3979 cachep->batchcount = batchcount;
3980 cachep->limit = limit; 3980 cachep->limit = limit;
3981 cachep->shared = shared; 3981 cachep->shared = shared;
3982 3982
3983 for_each_online_cpu(i) { 3983 for_each_online_cpu(i) {
3984 struct array_cache *ccold = new->new[i]; 3984 struct array_cache *ccold = new->new[i];
3985 if (!ccold) 3985 if (!ccold)
3986 continue; 3986 continue;
3987 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3987 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3988 free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); 3988 free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
3989 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3989 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3990 kfree(ccold); 3990 kfree(ccold);
3991 } 3991 }
3992 kfree(new); 3992 kfree(new);
3993 return alloc_kmemlist(cachep); 3993 return alloc_kmemlist(cachep);
3994 } 3994 }
3995 3995
3996 /* Called with cache_chain_mutex held always */ 3996 /* Called with cache_chain_mutex held always */
3997 static int enable_cpucache(struct kmem_cache *cachep) 3997 static int enable_cpucache(struct kmem_cache *cachep)
3998 { 3998 {
3999 int err; 3999 int err;
4000 int limit, shared; 4000 int limit, shared;
4001 4001
4002 /* 4002 /*
4003 * The head array serves three purposes: 4003 * The head array serves three purposes:
4004 * - create a LIFO ordering, i.e. return objects that are cache-warm 4004 * - create a LIFO ordering, i.e. return objects that are cache-warm
4005 * - reduce the number of spinlock operations. 4005 * - reduce the number of spinlock operations.
4006 * - reduce the number of linked list operations on the slab and 4006 * - reduce the number of linked list operations on the slab and
4007 * bufctl chains: array operations are cheaper. 4007 * bufctl chains: array operations are cheaper.
4008 * The numbers are guessed, we should auto-tune as described by 4008 * The numbers are guessed, we should auto-tune as described by
4009 * Bonwick. 4009 * Bonwick.
4010 */ 4010 */
4011 if (cachep->buffer_size > 131072) 4011 if (cachep->buffer_size > 131072)
4012 limit = 1; 4012 limit = 1;
4013 else if (cachep->buffer_size > PAGE_SIZE) 4013 else if (cachep->buffer_size > PAGE_SIZE)
4014 limit = 8; 4014 limit = 8;
4015 else if (cachep->buffer_size > 1024) 4015 else if (cachep->buffer_size > 1024)
4016 limit = 24; 4016 limit = 24;
4017 else if (cachep->buffer_size > 256) 4017 else if (cachep->buffer_size > 256)
4018 limit = 54; 4018 limit = 54;
4019 else 4019 else
4020 limit = 120; 4020 limit = 120;
4021 4021
4022 /* 4022 /*
4023 * CPU bound tasks (e.g. network routing) can exhibit cpu bound 4023 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
4024 * allocation behaviour: Most allocs on one cpu, most free operations 4024 * allocation behaviour: Most allocs on one cpu, most free operations
4025 * on another cpu. For these cases, an efficient object passing between 4025 * on another cpu. For these cases, an efficient object passing between
4026 * cpus is necessary. This is provided by a shared array. The array 4026 * cpus is necessary. This is provided by a shared array. The array
4027 * replaces Bonwick's magazine layer. 4027 * replaces Bonwick's magazine layer.
4028 * On uniprocessor, it's functionally equivalent (but less efficient) 4028 * On uniprocessor, it's functionally equivalent (but less efficient)
4029 * to a larger limit. Thus disabled by default. 4029 * to a larger limit. Thus disabled by default.
4030 */ 4030 */
4031 shared = 0; 4031 shared = 0;
4032 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1) 4032 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
4033 shared = 8; 4033 shared = 8;
4034 4034
4035 #if DEBUG 4035 #if DEBUG
4036 /* 4036 /*
4037 * With debugging enabled, large batchcount lead to excessively long 4037 * With debugging enabled, large batchcount lead to excessively long
4038 * periods with disabled local interrupts. Limit the batchcount 4038 * periods with disabled local interrupts. Limit the batchcount
4039 */ 4039 */
4040 if (limit > 32) 4040 if (limit > 32)
4041 limit = 32; 4041 limit = 32;
4042 #endif 4042 #endif
4043 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); 4043 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
4044 if (err) 4044 if (err)
4045 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 4045 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4046 cachep->name, -err); 4046 cachep->name, -err);
4047 return err; 4047 return err;
4048 } 4048 }
4049 4049
4050 /* 4050 /*
4051 * Drain an array if it contains any elements taking the l3 lock only if 4051 * Drain an array if it contains any elements taking the l3 lock only if
4052 * necessary. Note that the l3 listlock also protects the array_cache 4052 * necessary. Note that the l3 listlock also protects the array_cache
4053 * if drain_array() is used on the shared array. 4053 * if drain_array() is used on the shared array.
4054 */ 4054 */
4055 void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, 4055 void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4056 struct array_cache *ac, int force, int node) 4056 struct array_cache *ac, int force, int node)
4057 { 4057 {
4058 int tofree; 4058 int tofree;
4059 4059
4060 if (!ac || !ac->avail) 4060 if (!ac || !ac->avail)
4061 return; 4061 return;
4062 if (ac->touched && !force) { 4062 if (ac->touched && !force) {
4063 ac->touched = 0; 4063 ac->touched = 0;
4064 } else { 4064 } else {
4065 spin_lock_irq(&l3->list_lock); 4065 spin_lock_irq(&l3->list_lock);
4066 if (ac->avail) { 4066 if (ac->avail) {
4067 tofree = force ? ac->avail : (ac->limit + 4) / 5; 4067 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4068 if (tofree > ac->avail) 4068 if (tofree > ac->avail)
4069 tofree = (ac->avail + 1) / 2; 4069 tofree = (ac->avail + 1) / 2;
4070 free_block(cachep, ac->entry, tofree, node); 4070 free_block(cachep, ac->entry, tofree, node);
4071 ac->avail -= tofree; 4071 ac->avail -= tofree;
4072 memmove(ac->entry, &(ac->entry[tofree]), 4072 memmove(ac->entry, &(ac->entry[tofree]),
4073 sizeof(void *) * ac->avail); 4073 sizeof(void *) * ac->avail);
4074 } 4074 }
4075 spin_unlock_irq(&l3->list_lock); 4075 spin_unlock_irq(&l3->list_lock);
4076 } 4076 }
4077 } 4077 }
4078 4078
4079 /** 4079 /**
4080 * cache_reap - Reclaim memory from caches. 4080 * cache_reap - Reclaim memory from caches.
4081 * @w: work descriptor 4081 * @w: work descriptor
4082 * 4082 *
4083 * Called from workqueue/eventd every few seconds. 4083 * Called from workqueue/eventd every few seconds.
4084 * Purpose: 4084 * Purpose:
4085 * - clear the per-cpu caches for this CPU. 4085 * - clear the per-cpu caches for this CPU.
4086 * - return freeable pages to the main free memory pool. 4086 * - return freeable pages to the main free memory pool.
4087 * 4087 *
4088 * If we cannot acquire the cache chain mutex then just give up - we'll try 4088 * If we cannot acquire the cache chain mutex then just give up - we'll try
4089 * again on the next iteration. 4089 * again on the next iteration.
4090 */ 4090 */
4091 static void cache_reap(struct work_struct *w) 4091 static void cache_reap(struct work_struct *w)
4092 { 4092 {
4093 struct kmem_cache *searchp; 4093 struct kmem_cache *searchp;
4094 struct kmem_list3 *l3; 4094 struct kmem_list3 *l3;
4095 int node = numa_node_id(); 4095 int node = numa_node_id();
4096 struct delayed_work *work = 4096 struct delayed_work *work =
4097 container_of(w, struct delayed_work, work); 4097 container_of(w, struct delayed_work, work);
4098 4098
4099 if (!mutex_trylock(&cache_chain_mutex)) 4099 if (!mutex_trylock(&cache_chain_mutex))
4100 /* Give up. Setup the next iteration. */ 4100 /* Give up. Setup the next iteration. */
4101 goto out; 4101 goto out;
4102 4102
4103 list_for_each_entry(searchp, &cache_chain, next) { 4103 list_for_each_entry(searchp, &cache_chain, next) {
4104 check_irq_on(); 4104 check_irq_on();
4105 4105
4106 /* 4106 /*
4107 * We only take the l3 lock if absolutely necessary and we 4107 * We only take the l3 lock if absolutely necessary and we
4108 * have established with reasonable certainty that 4108 * have established with reasonable certainty that
4109 * we can do some work if the lock was obtained. 4109 * we can do some work if the lock was obtained.
4110 */ 4110 */
4111 l3 = searchp->nodelists[node]; 4111 l3 = searchp->nodelists[node];
4112 4112
4113 reap_alien(searchp, l3); 4113 reap_alien(searchp, l3);
4114 4114
4115 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); 4115 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4116 4116
4117 /* 4117 /*
4118 * These are racy checks but it does not matter 4118 * These are racy checks but it does not matter
4119 * if we skip one check or scan twice. 4119 * if we skip one check or scan twice.
4120 */ 4120 */
4121 if (time_after(l3->next_reap, jiffies)) 4121 if (time_after(l3->next_reap, jiffies))
4122 goto next; 4122 goto next;
4123 4123
4124 l3->next_reap = jiffies + REAPTIMEOUT_LIST3; 4124 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4125 4125
4126 drain_array(searchp, l3, l3->shared, 0, node); 4126 drain_array(searchp, l3, l3->shared, 0, node);
4127 4127
4128 if (l3->free_touched) 4128 if (l3->free_touched)
4129 l3->free_touched = 0; 4129 l3->free_touched = 0;
4130 else { 4130 else {
4131 int freed; 4131 int freed;
4132 4132
4133 freed = drain_freelist(searchp, l3, (l3->free_limit + 4133 freed = drain_freelist(searchp, l3, (l3->free_limit +
4134 5 * searchp->num - 1) / (5 * searchp->num)); 4134 5 * searchp->num - 1) / (5 * searchp->num));
4135 STATS_ADD_REAPED(searchp, freed); 4135 STATS_ADD_REAPED(searchp, freed);
4136 } 4136 }
4137 next: 4137 next:
4138 cond_resched(); 4138 cond_resched();
4139 } 4139 }
4140 check_irq_on(); 4140 check_irq_on();
4141 mutex_unlock(&cache_chain_mutex); 4141 mutex_unlock(&cache_chain_mutex);
4142 next_reap_node(); 4142 next_reap_node();
4143 refresh_cpu_vm_stats(smp_processor_id()); 4143 refresh_cpu_vm_stats(smp_processor_id());
4144 out: 4144 out:
4145 /* Set up the next iteration */ 4145 /* Set up the next iteration */
4146 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); 4146 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4147 } 4147 }
4148 4148
4149 #ifdef CONFIG_PROC_FS 4149 #ifdef CONFIG_PROC_FS
4150 4150
4151 static void print_slabinfo_header(struct seq_file *m) 4151 static void print_slabinfo_header(struct seq_file *m)
4152 { 4152 {
4153 /* 4153 /*
4154 * Output format version, so at least we can change it 4154 * Output format version, so at least we can change it
4155 * without _too_ many complaints. 4155 * without _too_ many complaints.
4156 */ 4156 */
4157 #if STATS 4157 #if STATS
4158 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 4158 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4159 #else 4159 #else
4160 seq_puts(m, "slabinfo - version: 2.1\n"); 4160 seq_puts(m, "slabinfo - version: 2.1\n");
4161 #endif 4161 #endif
4162 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 4162 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4163 "<objperslab> <pagesperslab>"); 4163 "<objperslab> <pagesperslab>");
4164 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 4164 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4165 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 4165 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4166 #if STATS 4166 #if STATS
4167 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 4167 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4168 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 4168 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4169 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 4169 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4170 #endif 4170 #endif
4171 seq_putc(m, '\n'); 4171 seq_putc(m, '\n');
4172 } 4172 }
4173 4173
4174 static void *s_start(struct seq_file *m, loff_t *pos) 4174 static void *s_start(struct seq_file *m, loff_t *pos)
4175 { 4175 {
4176 loff_t n = *pos; 4176 loff_t n = *pos;
4177 struct list_head *p; 4177 struct list_head *p;
4178 4178
4179 mutex_lock(&cache_chain_mutex); 4179 mutex_lock(&cache_chain_mutex);
4180 if (!n) 4180 if (!n)
4181 print_slabinfo_header(m); 4181 print_slabinfo_header(m);
4182 p = cache_chain.next; 4182 p = cache_chain.next;
4183 while (n--) { 4183 while (n--) {
4184 p = p->next; 4184 p = p->next;
4185 if (p == &cache_chain) 4185 if (p == &cache_chain)
4186 return NULL; 4186 return NULL;
4187 } 4187 }
4188 return list_entry(p, struct kmem_cache, next); 4188 return list_entry(p, struct kmem_cache, next);
4189 } 4189 }
4190 4190
4191 static void *s_next(struct seq_file *m, void *p, loff_t *pos) 4191 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4192 { 4192 {
4193 struct kmem_cache *cachep = p; 4193 struct kmem_cache *cachep = p;
4194 ++*pos; 4194 ++*pos;
4195 return cachep->next.next == &cache_chain ? 4195 return cachep->next.next == &cache_chain ?
4196 NULL : list_entry(cachep->next.next, struct kmem_cache, next); 4196 NULL : list_entry(cachep->next.next, struct kmem_cache, next);
4197 } 4197 }
4198 4198
4199 static void s_stop(struct seq_file *m, void *p) 4199 static void s_stop(struct seq_file *m, void *p)
4200 { 4200 {
4201 mutex_unlock(&cache_chain_mutex); 4201 mutex_unlock(&cache_chain_mutex);
4202 } 4202 }
4203 4203
4204 static int s_show(struct seq_file *m, void *p) 4204 static int s_show(struct seq_file *m, void *p)
4205 { 4205 {
4206 struct kmem_cache *cachep = p; 4206 struct kmem_cache *cachep = p;
4207 struct slab *slabp; 4207 struct slab *slabp;
4208 unsigned long active_objs; 4208 unsigned long active_objs;
4209 unsigned long num_objs; 4209 unsigned long num_objs;
4210 unsigned long active_slabs = 0; 4210 unsigned long active_slabs = 0;
4211 unsigned long num_slabs, free_objects = 0, shared_avail = 0; 4211 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4212 const char *name; 4212 const char *name;
4213 char *error = NULL; 4213 char *error = NULL;
4214 int node; 4214 int node;
4215 struct kmem_list3 *l3; 4215 struct kmem_list3 *l3;
4216 4216
4217 active_objs = 0; 4217 active_objs = 0;
4218 num_slabs = 0; 4218 num_slabs = 0;
4219 for_each_online_node(node) { 4219 for_each_online_node(node) {
4220 l3 = cachep->nodelists[node]; 4220 l3 = cachep->nodelists[node];
4221 if (!l3) 4221 if (!l3)
4222 continue; 4222 continue;
4223 4223
4224 check_irq_on(); 4224 check_irq_on();
4225 spin_lock_irq(&l3->list_lock); 4225 spin_lock_irq(&l3->list_lock);
4226 4226
4227 list_for_each_entry(slabp, &l3->slabs_full, list) { 4227 list_for_each_entry(slabp, &l3->slabs_full, list) {
4228 if (slabp->inuse != cachep->num && !error) 4228 if (slabp->inuse != cachep->num && !error)
4229 error = "slabs_full accounting error"; 4229 error = "slabs_full accounting error";
4230 active_objs += cachep->num; 4230 active_objs += cachep->num;
4231 active_slabs++; 4231 active_slabs++;
4232 } 4232 }
4233 list_for_each_entry(slabp, &l3->slabs_partial, list) { 4233 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4234 if (slabp->inuse == cachep->num && !error) 4234 if (slabp->inuse == cachep->num && !error)
4235 error = "slabs_partial inuse accounting error"; 4235 error = "slabs_partial inuse accounting error";
4236 if (!slabp->inuse && !error) 4236 if (!slabp->inuse && !error)
4237 error = "slabs_partial/inuse accounting error"; 4237 error = "slabs_partial/inuse accounting error";
4238 active_objs += slabp->inuse; 4238 active_objs += slabp->inuse;
4239 active_slabs++; 4239 active_slabs++;
4240 } 4240 }
4241 list_for_each_entry(slabp, &l3->slabs_free, list) { 4241 list_for_each_entry(slabp, &l3->slabs_free, list) {
4242 if (slabp->inuse && !error) 4242 if (slabp->inuse && !error)
4243 error = "slabs_free/inuse accounting error"; 4243 error = "slabs_free/inuse accounting error";
4244 num_slabs++; 4244 num_slabs++;
4245 } 4245 }
4246 free_objects += l3->free_objects; 4246 free_objects += l3->free_objects;
4247 if (l3->shared) 4247 if (l3->shared)
4248 shared_avail += l3->shared->avail; 4248 shared_avail += l3->shared->avail;
4249 4249
4250 spin_unlock_irq(&l3->list_lock); 4250 spin_unlock_irq(&l3->list_lock);
4251 } 4251 }
4252 num_slabs += active_slabs; 4252 num_slabs += active_slabs;
4253 num_objs = num_slabs * cachep->num; 4253 num_objs = num_slabs * cachep->num;
4254 if (num_objs - active_objs != free_objects && !error) 4254 if (num_objs - active_objs != free_objects && !error)
4255 error = "free_objects accounting error"; 4255 error = "free_objects accounting error";
4256 4256
4257 name = cachep->name; 4257 name = cachep->name;
4258 if (error) 4258 if (error)
4259 printk(KERN_ERR "slab: cache %s error: %s\n", name, error); 4259 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4260 4260
4261 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 4261 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4262 name, active_objs, num_objs, cachep->buffer_size, 4262 name, active_objs, num_objs, cachep->buffer_size,
4263 cachep->num, (1 << cachep->gfporder)); 4263 cachep->num, (1 << cachep->gfporder));
4264 seq_printf(m, " : tunables %4u %4u %4u", 4264 seq_printf(m, " : tunables %4u %4u %4u",
4265 cachep->limit, cachep->batchcount, cachep->shared); 4265 cachep->limit, cachep->batchcount, cachep->shared);
4266 seq_printf(m, " : slabdata %6lu %6lu %6lu", 4266 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4267 active_slabs, num_slabs, shared_avail); 4267 active_slabs, num_slabs, shared_avail);
4268 #if STATS 4268 #if STATS
4269 { /* list3 stats */ 4269 { /* list3 stats */
4270 unsigned long high = cachep->high_mark; 4270 unsigned long high = cachep->high_mark;
4271 unsigned long allocs = cachep->num_allocations; 4271 unsigned long allocs = cachep->num_allocations;
4272 unsigned long grown = cachep->grown; 4272 unsigned long grown = cachep->grown;
4273 unsigned long reaped = cachep->reaped; 4273 unsigned long reaped = cachep->reaped;
4274 unsigned long errors = cachep->errors; 4274 unsigned long errors = cachep->errors;
4275 unsigned long max_freeable = cachep->max_freeable; 4275 unsigned long max_freeable = cachep->max_freeable;
4276 unsigned long node_allocs = cachep->node_allocs; 4276 unsigned long node_allocs = cachep->node_allocs;
4277 unsigned long node_frees = cachep->node_frees; 4277 unsigned long node_frees = cachep->node_frees;
4278 unsigned long overflows = cachep->node_overflow; 4278 unsigned long overflows = cachep->node_overflow;
4279 4279
4280 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ 4280 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
4281 %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, 4281 %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
4282 reaped, errors, max_freeable, node_allocs, 4282 reaped, errors, max_freeable, node_allocs,
4283 node_frees, overflows); 4283 node_frees, overflows);
4284 } 4284 }
4285 /* cpu stats */ 4285 /* cpu stats */
4286 { 4286 {
4287 unsigned long allochit = atomic_read(&cachep->allochit); 4287 unsigned long allochit = atomic_read(&cachep->allochit);
4288 unsigned long allocmiss = atomic_read(&cachep->allocmiss); 4288 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4289 unsigned long freehit = atomic_read(&cachep->freehit); 4289 unsigned long freehit = atomic_read(&cachep->freehit);
4290 unsigned long freemiss = atomic_read(&cachep->freemiss); 4290 unsigned long freemiss = atomic_read(&cachep->freemiss);
4291 4291
4292 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", 4292 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4293 allochit, allocmiss, freehit, freemiss); 4293 allochit, allocmiss, freehit, freemiss);
4294 } 4294 }
4295 #endif 4295 #endif
4296 seq_putc(m, '\n'); 4296 seq_putc(m, '\n');
4297 return 0; 4297 return 0;
4298 } 4298 }
4299 4299
4300 /* 4300 /*
4301 * slabinfo_op - iterator that generates /proc/slabinfo 4301 * slabinfo_op - iterator that generates /proc/slabinfo
4302 * 4302 *
4303 * Output layout: 4303 * Output layout:
4304 * cache-name 4304 * cache-name
4305 * num-active-objs 4305 * num-active-objs
4306 * total-objs 4306 * total-objs
4307 * object size 4307 * object size
4308 * num-active-slabs 4308 * num-active-slabs
4309 * total-slabs 4309 * total-slabs
4310 * num-pages-per-slab 4310 * num-pages-per-slab
4311 * + further values on SMP and with statistics enabled 4311 * + further values on SMP and with statistics enabled
4312 */ 4312 */
4313 4313
4314 const struct seq_operations slabinfo_op = { 4314 const struct seq_operations slabinfo_op = {
4315 .start = s_start, 4315 .start = s_start,
4316 .next = s_next, 4316 .next = s_next,
4317 .stop = s_stop, 4317 .stop = s_stop,
4318 .show = s_show, 4318 .show = s_show,
4319 }; 4319 };
4320 4320
4321 #define MAX_SLABINFO_WRITE 128 4321 #define MAX_SLABINFO_WRITE 128
4322 /** 4322 /**
4323 * slabinfo_write - Tuning for the slab allocator 4323 * slabinfo_write - Tuning for the slab allocator
4324 * @file: unused 4324 * @file: unused
4325 * @buffer: user buffer 4325 * @buffer: user buffer
4326 * @count: data length 4326 * @count: data length
4327 * @ppos: unused 4327 * @ppos: unused
4328 */ 4328 */
4329 ssize_t slabinfo_write(struct file *file, const char __user * buffer, 4329 ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4330 size_t count, loff_t *ppos) 4330 size_t count, loff_t *ppos)
4331 { 4331 {
4332 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; 4332 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4333 int limit, batchcount, shared, res; 4333 int limit, batchcount, shared, res;
4334 struct kmem_cache *cachep; 4334 struct kmem_cache *cachep;
4335 4335
4336 if (count > MAX_SLABINFO_WRITE) 4336 if (count > MAX_SLABINFO_WRITE)
4337 return -EINVAL; 4337 return -EINVAL;
4338 if (copy_from_user(&kbuf, buffer, count)) 4338 if (copy_from_user(&kbuf, buffer, count))
4339 return -EFAULT; 4339 return -EFAULT;
4340 kbuf[MAX_SLABINFO_WRITE] = '\0'; 4340 kbuf[MAX_SLABINFO_WRITE] = '\0';
4341 4341
4342 tmp = strchr(kbuf, ' '); 4342 tmp = strchr(kbuf, ' ');
4343 if (!tmp) 4343 if (!tmp)
4344 return -EINVAL; 4344 return -EINVAL;
4345 *tmp = '\0'; 4345 *tmp = '\0';
4346 tmp++; 4346 tmp++;
4347 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3) 4347 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4348 return -EINVAL; 4348 return -EINVAL;
4349 4349
4350 /* Find the cache in the chain of caches. */ 4350 /* Find the cache in the chain of caches. */
4351 mutex_lock(&cache_chain_mutex); 4351 mutex_lock(&cache_chain_mutex);
4352 res = -EINVAL; 4352 res = -EINVAL;
4353 list_for_each_entry(cachep, &cache_chain, next) { 4353 list_for_each_entry(cachep, &cache_chain, next) {
4354 if (!strcmp(cachep->name, kbuf)) { 4354 if (!strcmp(cachep->name, kbuf)) {
4355 if (limit < 1 || batchcount < 1 || 4355 if (limit < 1 || batchcount < 1 ||
4356 batchcount > limit || shared < 0) { 4356 batchcount > limit || shared < 0) {
4357 res = 0; 4357 res = 0;
4358 } else { 4358 } else {
4359 res = do_tune_cpucache(cachep, limit, 4359 res = do_tune_cpucache(cachep, limit,
4360 batchcount, shared); 4360 batchcount, shared);
4361 } 4361 }
4362 break; 4362 break;
4363 } 4363 }
4364 } 4364 }
4365 mutex_unlock(&cache_chain_mutex); 4365 mutex_unlock(&cache_chain_mutex);
4366 if (res >= 0) 4366 if (res >= 0)
4367 res = count; 4367 res = count;
4368 return res; 4368 return res;
4369 } 4369 }
4370 4370
4371 #ifdef CONFIG_DEBUG_SLAB_LEAK 4371 #ifdef CONFIG_DEBUG_SLAB_LEAK
4372 4372
4373 static void *leaks_start(struct seq_file *m, loff_t *pos) 4373 static void *leaks_start(struct seq_file *m, loff_t *pos)
4374 { 4374 {
4375 loff_t n = *pos; 4375 loff_t n = *pos;
4376 struct list_head *p; 4376 struct list_head *p;
4377 4377
4378 mutex_lock(&cache_chain_mutex); 4378 mutex_lock(&cache_chain_mutex);
4379 p = cache_chain.next; 4379 p = cache_chain.next;
4380 while (n--) { 4380 while (n--) {
4381 p = p->next; 4381 p = p->next;
4382 if (p == &cache_chain) 4382 if (p == &cache_chain)
4383 return NULL; 4383 return NULL;
4384 } 4384 }
4385 return list_entry(p, struct kmem_cache, next); 4385 return list_entry(p, struct kmem_cache, next);
4386 } 4386 }
4387 4387
4388 static inline int add_caller(unsigned long *n, unsigned long v) 4388 static inline int add_caller(unsigned long *n, unsigned long v)
4389 { 4389 {
4390 unsigned long *p; 4390 unsigned long *p;
4391 int l; 4391 int l;
4392 if (!v) 4392 if (!v)
4393 return 1; 4393 return 1;
4394 l = n[1]; 4394 l = n[1];
4395 p = n + 2; 4395 p = n + 2;
4396 while (l) { 4396 while (l) {
4397 int i = l/2; 4397 int i = l/2;
4398 unsigned long *q = p + 2 * i; 4398 unsigned long *q = p + 2 * i;
4399 if (*q == v) { 4399 if (*q == v) {
4400 q[1]++; 4400 q[1]++;
4401 return 1; 4401 return 1;
4402 } 4402 }
4403 if (*q > v) { 4403 if (*q > v) {
4404 l = i; 4404 l = i;
4405 } else { 4405 } else {
4406 p = q + 2; 4406 p = q + 2;
4407 l -= i + 1; 4407 l -= i + 1;
4408 } 4408 }
4409 } 4409 }
4410 if (++n[1] == n[0]) 4410 if (++n[1] == n[0])
4411 return 0; 4411 return 0;
4412 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n)); 4412 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4413 p[0] = v; 4413 p[0] = v;
4414 p[1] = 1; 4414 p[1] = 1;
4415 return 1; 4415 return 1;
4416 } 4416 }
4417 4417
4418 static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) 4418 static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4419 { 4419 {
4420 void *p; 4420 void *p;
4421 int i; 4421 int i;
4422 if (n[0] == n[1]) 4422 if (n[0] == n[1])
4423 return; 4423 return;
4424 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { 4424 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4425 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) 4425 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4426 continue; 4426 continue;
4427 if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) 4427 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4428 return; 4428 return;
4429 } 4429 }
4430 } 4430 }
4431 4431
4432 static void show_symbol(struct seq_file *m, unsigned long address) 4432 static void show_symbol(struct seq_file *m, unsigned long address)
4433 { 4433 {
4434 #ifdef CONFIG_KALLSYMS 4434 #ifdef CONFIG_KALLSYMS
4435 char *modname;
4436 const char *name;
4437 unsigned long offset, size; 4435 unsigned long offset, size;
4438 char namebuf[KSYM_NAME_LEN+1]; 4436 char modname[MODULE_NAME_LEN + 1], name[KSYM_NAME_LEN + 1];
4439 4437
4440 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 4438 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4441
4442 if (name) {
4443 seq_printf(m, "%s+%#lx/%#lx", name, offset, size); 4439 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4444 if (modname) 4440 if (modname[0])
4445 seq_printf(m, " [%s]", modname); 4441 seq_printf(m, " [%s]", modname);
4446 return; 4442 return;
4447 } 4443 }
4448 #endif 4444 #endif
4449 seq_printf(m, "%p", (void *)address); 4445 seq_printf(m, "%p", (void *)address);
4450 } 4446 }
4451 4447
4452 static int leaks_show(struct seq_file *m, void *p) 4448 static int leaks_show(struct seq_file *m, void *p)
4453 { 4449 {
4454 struct kmem_cache *cachep = p; 4450 struct kmem_cache *cachep = p;
4455 struct slab *slabp; 4451 struct slab *slabp;
4456 struct kmem_list3 *l3; 4452 struct kmem_list3 *l3;
4457 const char *name; 4453 const char *name;
4458 unsigned long *n = m->private; 4454 unsigned long *n = m->private;
4459 int node; 4455 int node;
4460 int i; 4456 int i;
4461 4457
4462 if (!(cachep->flags & SLAB_STORE_USER)) 4458 if (!(cachep->flags & SLAB_STORE_USER))
4463 return 0; 4459 return 0;
4464 if (!(cachep->flags & SLAB_RED_ZONE)) 4460 if (!(cachep->flags & SLAB_RED_ZONE))
4465 return 0; 4461 return 0;
4466 4462
4467 /* OK, we can do it */ 4463 /* OK, we can do it */
4468 4464
4469 n[1] = 0; 4465 n[1] = 0;
4470 4466
4471 for_each_online_node(node) { 4467 for_each_online_node(node) {
4472 l3 = cachep->nodelists[node]; 4468 l3 = cachep->nodelists[node];
4473 if (!l3) 4469 if (!l3)
4474 continue; 4470 continue;
4475 4471
4476 check_irq_on(); 4472 check_irq_on();
4477 spin_lock_irq(&l3->list_lock); 4473 spin_lock_irq(&l3->list_lock);
4478 4474
4479 list_for_each_entry(slabp, &l3->slabs_full, list) 4475 list_for_each_entry(slabp, &l3->slabs_full, list)
4480 handle_slab(n, cachep, slabp); 4476 handle_slab(n, cachep, slabp);
4481 list_for_each_entry(slabp, &l3->slabs_partial, list) 4477 list_for_each_entry(slabp, &l3->slabs_partial, list)
4482 handle_slab(n, cachep, slabp); 4478 handle_slab(n, cachep, slabp);
4483 spin_unlock_irq(&l3->list_lock); 4479 spin_unlock_irq(&l3->list_lock);
4484 } 4480 }
4485 name = cachep->name; 4481 name = cachep->name;
4486 if (n[0] == n[1]) { 4482 if (n[0] == n[1]) {
4487 /* Increase the buffer size */ 4483 /* Increase the buffer size */
4488 mutex_unlock(&cache_chain_mutex); 4484 mutex_unlock(&cache_chain_mutex);
4489 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); 4485 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4490 if (!m->private) { 4486 if (!m->private) {
4491 /* Too bad, we are really out */ 4487 /* Too bad, we are really out */
4492 m->private = n; 4488 m->private = n;
4493 mutex_lock(&cache_chain_mutex); 4489 mutex_lock(&cache_chain_mutex);
4494 return -ENOMEM; 4490 return -ENOMEM;
4495 } 4491 }
4496 *(unsigned long *)m->private = n[0] * 2; 4492 *(unsigned long *)m->private = n[0] * 2;
4497 kfree(n); 4493 kfree(n);
4498 mutex_lock(&cache_chain_mutex); 4494 mutex_lock(&cache_chain_mutex);
4499 /* Now make sure this entry will be retried */ 4495 /* Now make sure this entry will be retried */
4500 m->count = m->size; 4496 m->count = m->size;
4501 return 0; 4497 return 0;
4502 } 4498 }
4503 for (i = 0; i < n[1]; i++) { 4499 for (i = 0; i < n[1]; i++) {
4504 seq_printf(m, "%s: %lu ", name, n[2*i+3]); 4500 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4505 show_symbol(m, n[2*i+2]); 4501 show_symbol(m, n[2*i+2]);
4506 seq_putc(m, '\n'); 4502 seq_putc(m, '\n');
4507 } 4503 }
4508 4504
4509 return 0; 4505 return 0;
4510 } 4506 }
4511 4507
4512 const struct seq_operations slabstats_op = { 4508 const struct seq_operations slabstats_op = {
4513 .start = leaks_start, 4509 .start = leaks_start,
4514 .next = s_next, 4510 .next = s_next,
4515 .stop = s_stop, 4511 .stop = s_stop,
4516 .show = leaks_show, 4512 .show = leaks_show,
4517 }; 4513 };
4518 #endif 4514 #endif
4519 #endif 4515 #endif
4520 4516
4521 /** 4517 /**
4522 * ksize - get the actual amount of memory allocated for a given object 4518 * ksize - get the actual amount of memory allocated for a given object
4523 * @objp: Pointer to the object 4519 * @objp: Pointer to the object
4524 * 4520 *
4525 * kmalloc may internally round up allocations and return more memory 4521 * kmalloc may internally round up allocations and return more memory
4526 * than requested. ksize() can be used to determine the actual amount of 4522 * than requested. ksize() can be used to determine the actual amount of
4527 * memory allocated. The caller may use this additional memory, even though 4523 * memory allocated. The caller may use this additional memory, even though
4528 * a smaller amount of memory was initially specified with the kmalloc call. 4524 * a smaller amount of memory was initially specified with the kmalloc call.
4529 * The caller must guarantee that objp points to a valid object previously 4525 * The caller must guarantee that objp points to a valid object previously
4530 * allocated with either kmalloc() or kmem_cache_alloc(). The object 4526 * allocated with either kmalloc() or kmem_cache_alloc(). The object
4531 * must not be freed during the duration of the call. 4527 * must not be freed during the duration of the call.
4532 */ 4528 */
4533 size_t ksize(const void *objp) 4529 size_t ksize(const void *objp)
4534 { 4530 {
4535 if (unlikely(objp == NULL)) 4531 if (unlikely(objp == NULL))
4536 return 0; 4532 return 0;
4537 4533
4538 return obj_size(virt_to_cache(objp)); 4534 return obj_size(virt_to_cache(objp));
4539 } 4535 }
4540 4536