Commit a5c43dae7ae38c2a6b3e9a819bcf45f010bf6a4a
Committed by
Linus Torvalds
1 parent
9d65cb4a17
Exists in
master
and in
7 other branches
Fix race between cat /proc/slab_allocators and rmmod
Same story as with cat /proc/*/wchan race vs rmmod race, only /proc/slab_allocators want more info than just symbol name. Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 5 changed files with 61 additions and 7 deletions Inline Diff
include/linux/kallsyms.h
1 | /* Rewritten and vastly simplified by Rusty Russell for in-kernel | 1 | /* Rewritten and vastly simplified by Rusty Russell for in-kernel |
2 | * module loader: | 2 | * module loader: |
3 | * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation | 3 | * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation |
4 | */ | 4 | */ |
5 | #ifndef _LINUX_KALLSYMS_H | 5 | #ifndef _LINUX_KALLSYMS_H |
6 | #define _LINUX_KALLSYMS_H | 6 | #define _LINUX_KALLSYMS_H |
7 | 7 | ||
8 | 8 | ||
9 | #define KSYM_NAME_LEN 127 | 9 | #define KSYM_NAME_LEN 127 |
10 | #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \ | 10 | #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \ |
11 | 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1) | 11 | 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1) |
12 | 12 | ||
13 | #ifdef CONFIG_KALLSYMS | 13 | #ifdef CONFIG_KALLSYMS |
14 | /* Lookup the address for a symbol. Returns 0 if not found. */ | 14 | /* Lookup the address for a symbol. Returns 0 if not found. */ |
15 | unsigned long kallsyms_lookup_name(const char *name); | 15 | unsigned long kallsyms_lookup_name(const char *name); |
16 | 16 | ||
17 | extern int kallsyms_lookup_size_offset(unsigned long addr, | 17 | extern int kallsyms_lookup_size_offset(unsigned long addr, |
18 | unsigned long *symbolsize, | 18 | unsigned long *symbolsize, |
19 | unsigned long *offset); | 19 | unsigned long *offset); |
20 | 20 | ||
21 | /* Lookup an address. modname is set to NULL if it's in the kernel. */ | 21 | /* Lookup an address. modname is set to NULL if it's in the kernel. */ |
22 | const char *kallsyms_lookup(unsigned long addr, | 22 | const char *kallsyms_lookup(unsigned long addr, |
23 | unsigned long *symbolsize, | 23 | unsigned long *symbolsize, |
24 | unsigned long *offset, | 24 | unsigned long *offset, |
25 | char **modname, char *namebuf); | 25 | char **modname, char *namebuf); |
26 | 26 | ||
27 | /* Look up a kernel symbol and return it in a text buffer. */ | 27 | /* Look up a kernel symbol and return it in a text buffer. */ |
28 | extern int sprint_symbol(char *buffer, unsigned long address); | 28 | extern int sprint_symbol(char *buffer, unsigned long address); |
29 | 29 | ||
30 | /* Look up a kernel symbol and print it to the kernel messages. */ | 30 | /* Look up a kernel symbol and print it to the kernel messages. */ |
31 | extern void __print_symbol(const char *fmt, unsigned long address); | 31 | extern void __print_symbol(const char *fmt, unsigned long address); |
32 | 32 | ||
33 | int lookup_symbol_name(unsigned long addr, char *symname); | 33 | int lookup_symbol_name(unsigned long addr, char *symname); |
34 | int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); | ||
34 | 35 | ||
35 | #else /* !CONFIG_KALLSYMS */ | 36 | #else /* !CONFIG_KALLSYMS */ |
36 | 37 | ||
37 | static inline unsigned long kallsyms_lookup_name(const char *name) | 38 | static inline unsigned long kallsyms_lookup_name(const char *name) |
38 | { | 39 | { |
39 | return 0; | 40 | return 0; |
40 | } | 41 | } |
41 | 42 | ||
42 | static inline int kallsyms_lookup_size_offset(unsigned long addr, | 43 | static inline int kallsyms_lookup_size_offset(unsigned long addr, |
43 | unsigned long *symbolsize, | 44 | unsigned long *symbolsize, |
44 | unsigned long *offset) | 45 | unsigned long *offset) |
45 | { | 46 | { |
46 | return 0; | 47 | return 0; |
47 | } | 48 | } |
48 | 49 | ||
49 | static inline const char *kallsyms_lookup(unsigned long addr, | 50 | static inline const char *kallsyms_lookup(unsigned long addr, |
50 | unsigned long *symbolsize, | 51 | unsigned long *symbolsize, |
51 | unsigned long *offset, | 52 | unsigned long *offset, |
52 | char **modname, char *namebuf) | 53 | char **modname, char *namebuf) |
53 | { | 54 | { |
54 | return NULL; | 55 | return NULL; |
55 | } | 56 | } |
56 | 57 | ||
57 | static inline int sprint_symbol(char *buffer, unsigned long addr) | 58 | static inline int sprint_symbol(char *buffer, unsigned long addr) |
58 | { | 59 | { |
59 | *buffer = '\0'; | 60 | *buffer = '\0'; |
60 | return 0; | 61 | return 0; |
61 | } | 62 | } |
62 | 63 | ||
63 | static inline int lookup_symbol_name(unsigned long addr, char *symname) | 64 | static inline int lookup_symbol_name(unsigned long addr, char *symname) |
65 | { | ||
66 | return -ERANGE; | ||
67 | } | ||
68 | |||
69 | static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) | ||
64 | { | 70 | { |
65 | return -ERANGE; | 71 | return -ERANGE; |
66 | } | 72 | } |
67 | 73 | ||
68 | /* Stupid that this does nothing, but I didn't create this mess. */ | 74 | /* Stupid that this does nothing, but I didn't create this mess. */ |
69 | #define __print_symbol(fmt, addr) | 75 | #define __print_symbol(fmt, addr) |
70 | #endif /*CONFIG_KALLSYMS*/ | 76 | #endif /*CONFIG_KALLSYMS*/ |
71 | 77 | ||
72 | /* This macro allows us to keep printk typechecking */ | 78 | /* This macro allows us to keep printk typechecking */ |
73 | static void __check_printsym_format(const char *fmt, ...) | 79 | static void __check_printsym_format(const char *fmt, ...) |
74 | __attribute__((format(printf,1,2))); | 80 | __attribute__((format(printf,1,2))); |
75 | static inline void __check_printsym_format(const char *fmt, ...) | 81 | static inline void __check_printsym_format(const char *fmt, ...) |
76 | { | 82 | { |
77 | } | 83 | } |
78 | /* ia64 and ppc64 use function descriptors, which contain the real address */ | 84 | /* ia64 and ppc64 use function descriptors, which contain the real address */ |
79 | #if defined(CONFIG_IA64) || defined(CONFIG_PPC64) | 85 | #if defined(CONFIG_IA64) || defined(CONFIG_PPC64) |
80 | #define print_fn_descriptor_symbol(fmt, addr) \ | 86 | #define print_fn_descriptor_symbol(fmt, addr) \ |
81 | do { \ | 87 | do { \ |
82 | unsigned long *__faddr = (unsigned long*) addr; \ | 88 | unsigned long *__faddr = (unsigned long*) addr; \ |
83 | print_symbol(fmt, __faddr[0]); \ | 89 | print_symbol(fmt, __faddr[0]); \ |
84 | } while (0) | 90 | } while (0) |
85 | #else | 91 | #else |
86 | #define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr) | 92 | #define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr) |
87 | #endif | 93 | #endif |
88 | 94 | ||
89 | static inline void print_symbol(const char *fmt, unsigned long addr) | 95 | static inline void print_symbol(const char *fmt, unsigned long addr) |
90 | { | 96 | { |
91 | __check_printsym_format(fmt, ""); | 97 | __check_printsym_format(fmt, ""); |
92 | __print_symbol(fmt, (unsigned long) | 98 | __print_symbol(fmt, (unsigned long) |
93 | __builtin_extract_return_addr((void *)addr)); | 99 | __builtin_extract_return_addr((void *)addr)); |
94 | } | 100 | } |
95 | 101 | ||
96 | #ifndef CONFIG_64BIT | 102 | #ifndef CONFIG_64BIT |
97 | #define print_ip_sym(ip) \ | 103 | #define print_ip_sym(ip) \ |
98 | do { \ | 104 | do { \ |
99 | printk("[<%08lx>]", ip); \ | 105 | printk("[<%08lx>]", ip); \ |
100 | print_symbol(" %s\n", ip); \ | 106 | print_symbol(" %s\n", ip); \ |
101 | } while(0) | 107 | } while(0) |
102 | #else | 108 | #else |
103 | #define print_ip_sym(ip) \ | 109 | #define print_ip_sym(ip) \ |
104 | do { \ | 110 | do { \ |
105 | printk("[<%016lx>]", ip); \ | 111 | printk("[<%016lx>]", ip); \ |
106 | print_symbol(" %s\n", ip); \ | 112 | print_symbol(" %s\n", ip); \ |
107 | } while(0) | 113 | } while(0) |
108 | #endif | 114 | #endif |
109 | 115 | ||
110 | #endif /*_LINUX_KALLSYMS_H*/ | 116 | #endif /*_LINUX_KALLSYMS_H*/ |
111 | 117 |
include/linux/module.h
1 | #ifndef _LINUX_MODULE_H | 1 | #ifndef _LINUX_MODULE_H |
2 | #define _LINUX_MODULE_H | 2 | #define _LINUX_MODULE_H |
3 | /* | 3 | /* |
4 | * Dynamic loading of modules into the kernel. | 4 | * Dynamic loading of modules into the kernel. |
5 | * | 5 | * |
6 | * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 | 6 | * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 |
7 | * Rewritten again by Rusty Russell, 2002 | 7 | * Rewritten again by Rusty Russell, 2002 |
8 | */ | 8 | */ |
9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
10 | #include <linux/list.h> | 10 | #include <linux/list.h> |
11 | #include <linux/stat.h> | 11 | #include <linux/stat.h> |
12 | #include <linux/compiler.h> | 12 | #include <linux/compiler.h> |
13 | #include <linux/cache.h> | 13 | #include <linux/cache.h> |
14 | #include <linux/kmod.h> | 14 | #include <linux/kmod.h> |
15 | #include <linux/elf.h> | 15 | #include <linux/elf.h> |
16 | #include <linux/stringify.h> | 16 | #include <linux/stringify.h> |
17 | #include <linux/kobject.h> | 17 | #include <linux/kobject.h> |
18 | #include <linux/moduleparam.h> | 18 | #include <linux/moduleparam.h> |
19 | #include <asm/local.h> | 19 | #include <asm/local.h> |
20 | 20 | ||
21 | #include <asm/module.h> | 21 | #include <asm/module.h> |
22 | 22 | ||
23 | /* Not Yet Implemented */ | 23 | /* Not Yet Implemented */ |
24 | #define MODULE_SUPPORTED_DEVICE(name) | 24 | #define MODULE_SUPPORTED_DEVICE(name) |
25 | 25 | ||
26 | /* v850 toolchain uses a `_' prefix for all user symbols */ | 26 | /* v850 toolchain uses a `_' prefix for all user symbols */ |
27 | #ifndef MODULE_SYMBOL_PREFIX | 27 | #ifndef MODULE_SYMBOL_PREFIX |
28 | #define MODULE_SYMBOL_PREFIX "" | 28 | #define MODULE_SYMBOL_PREFIX "" |
29 | #endif | 29 | #endif |
30 | 30 | ||
31 | #define MODULE_NAME_LEN (64 - sizeof(unsigned long)) | 31 | #define MODULE_NAME_LEN (64 - sizeof(unsigned long)) |
32 | 32 | ||
33 | struct kernel_symbol | 33 | struct kernel_symbol |
34 | { | 34 | { |
35 | unsigned long value; | 35 | unsigned long value; |
36 | const char *name; | 36 | const char *name; |
37 | }; | 37 | }; |
38 | 38 | ||
39 | struct modversion_info | 39 | struct modversion_info |
40 | { | 40 | { |
41 | unsigned long crc; | 41 | unsigned long crc; |
42 | char name[MODULE_NAME_LEN]; | 42 | char name[MODULE_NAME_LEN]; |
43 | }; | 43 | }; |
44 | 44 | ||
45 | struct module; | 45 | struct module; |
46 | 46 | ||
47 | struct module_attribute { | 47 | struct module_attribute { |
48 | struct attribute attr; | 48 | struct attribute attr; |
49 | ssize_t (*show)(struct module_attribute *, struct module *, char *); | 49 | ssize_t (*show)(struct module_attribute *, struct module *, char *); |
50 | ssize_t (*store)(struct module_attribute *, struct module *, | 50 | ssize_t (*store)(struct module_attribute *, struct module *, |
51 | const char *, size_t count); | 51 | const char *, size_t count); |
52 | void (*setup)(struct module *, const char *); | 52 | void (*setup)(struct module *, const char *); |
53 | int (*test)(struct module *); | 53 | int (*test)(struct module *); |
54 | void (*free)(struct module *); | 54 | void (*free)(struct module *); |
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct module_kobject | 57 | struct module_kobject |
58 | { | 58 | { |
59 | struct kobject kobj; | 59 | struct kobject kobj; |
60 | struct module *mod; | 60 | struct module *mod; |
61 | struct kobject *drivers_dir; | 61 | struct kobject *drivers_dir; |
62 | }; | 62 | }; |
63 | 63 | ||
64 | /* These are either module local, or the kernel's dummy ones. */ | 64 | /* These are either module local, or the kernel's dummy ones. */ |
65 | extern int init_module(void); | 65 | extern int init_module(void); |
66 | extern void cleanup_module(void); | 66 | extern void cleanup_module(void); |
67 | 67 | ||
68 | /* Archs provide a method of finding the correct exception table. */ | 68 | /* Archs provide a method of finding the correct exception table. */ |
69 | struct exception_table_entry; | 69 | struct exception_table_entry; |
70 | 70 | ||
71 | const struct exception_table_entry * | 71 | const struct exception_table_entry * |
72 | search_extable(const struct exception_table_entry *first, | 72 | search_extable(const struct exception_table_entry *first, |
73 | const struct exception_table_entry *last, | 73 | const struct exception_table_entry *last, |
74 | unsigned long value); | 74 | unsigned long value); |
75 | void sort_extable(struct exception_table_entry *start, | 75 | void sort_extable(struct exception_table_entry *start, |
76 | struct exception_table_entry *finish); | 76 | struct exception_table_entry *finish); |
77 | void sort_main_extable(void); | 77 | void sort_main_extable(void); |
78 | 78 | ||
79 | #ifdef MODULE | 79 | #ifdef MODULE |
80 | #define MODULE_GENERIC_TABLE(gtype,name) \ | 80 | #define MODULE_GENERIC_TABLE(gtype,name) \ |
81 | extern const struct gtype##_id __mod_##gtype##_table \ | 81 | extern const struct gtype##_id __mod_##gtype##_table \ |
82 | __attribute__ ((unused, alias(__stringify(name)))) | 82 | __attribute__ ((unused, alias(__stringify(name)))) |
83 | 83 | ||
84 | extern struct module __this_module; | 84 | extern struct module __this_module; |
85 | #define THIS_MODULE (&__this_module) | 85 | #define THIS_MODULE (&__this_module) |
86 | #else /* !MODULE */ | 86 | #else /* !MODULE */ |
87 | #define MODULE_GENERIC_TABLE(gtype,name) | 87 | #define MODULE_GENERIC_TABLE(gtype,name) |
88 | #define THIS_MODULE ((struct module *)0) | 88 | #define THIS_MODULE ((struct module *)0) |
89 | #endif | 89 | #endif |
90 | 90 | ||
91 | /* Generic info of form tag = "info" */ | 91 | /* Generic info of form tag = "info" */ |
92 | #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) | 92 | #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) |
93 | 93 | ||
94 | /* For userspace: you can also call me... */ | 94 | /* For userspace: you can also call me... */ |
95 | #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) | 95 | #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * The following license idents are currently accepted as indicating free | 98 | * The following license idents are currently accepted as indicating free |
99 | * software modules | 99 | * software modules |
100 | * | 100 | * |
101 | * "GPL" [GNU Public License v2 or later] | 101 | * "GPL" [GNU Public License v2 or later] |
102 | * "GPL v2" [GNU Public License v2] | 102 | * "GPL v2" [GNU Public License v2] |
103 | * "GPL and additional rights" [GNU Public License v2 rights and more] | 103 | * "GPL and additional rights" [GNU Public License v2 rights and more] |
104 | * "Dual BSD/GPL" [GNU Public License v2 | 104 | * "Dual BSD/GPL" [GNU Public License v2 |
105 | * or BSD license choice] | 105 | * or BSD license choice] |
106 | * "Dual MIT/GPL" [GNU Public License v2 | 106 | * "Dual MIT/GPL" [GNU Public License v2 |
107 | * or MIT license choice] | 107 | * or MIT license choice] |
108 | * "Dual MPL/GPL" [GNU Public License v2 | 108 | * "Dual MPL/GPL" [GNU Public License v2 |
109 | * or Mozilla license choice] | 109 | * or Mozilla license choice] |
110 | * | 110 | * |
111 | * The following other idents are available | 111 | * The following other idents are available |
112 | * | 112 | * |
113 | * "Proprietary" [Non free products] | 113 | * "Proprietary" [Non free products] |
114 | * | 114 | * |
115 | * There are dual licensed components, but when running with Linux it is the | 115 | * There are dual licensed components, but when running with Linux it is the |
116 | * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL | 116 | * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL |
117 | * is a GPL combined work. | 117 | * is a GPL combined work. |
118 | * | 118 | * |
119 | * This exists for several reasons | 119 | * This exists for several reasons |
120 | * 1. So modinfo can show license info for users wanting to vet their setup | 120 | * 1. So modinfo can show license info for users wanting to vet their setup |
121 | * is free | 121 | * is free |
122 | * 2. So the community can ignore bug reports including proprietary modules | 122 | * 2. So the community can ignore bug reports including proprietary modules |
123 | * 3. So vendors can do likewise based on their own policies | 123 | * 3. So vendors can do likewise based on their own policies |
124 | */ | 124 | */ |
125 | #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) | 125 | #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) |
126 | 126 | ||
127 | /* Author, ideally of form NAME <EMAIL>[, NAME <EMAIL>]*[ and NAME <EMAIL>] */ | 127 | /* Author, ideally of form NAME <EMAIL>[, NAME <EMAIL>]*[ and NAME <EMAIL>] */ |
128 | #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) | 128 | #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) |
129 | 129 | ||
130 | /* What your module does. */ | 130 | /* What your module does. */ |
131 | #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) | 131 | #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) |
132 | 132 | ||
133 | /* One for each parameter, describing how to use it. Some files do | 133 | /* One for each parameter, describing how to use it. Some files do |
134 | multiple of these per line, so can't just use MODULE_INFO. */ | 134 | multiple of these per line, so can't just use MODULE_INFO. */ |
135 | #define MODULE_PARM_DESC(_parm, desc) \ | 135 | #define MODULE_PARM_DESC(_parm, desc) \ |
136 | __MODULE_INFO(parm, _parm, #_parm ":" desc) | 136 | __MODULE_INFO(parm, _parm, #_parm ":" desc) |
137 | 137 | ||
138 | #define MODULE_DEVICE_TABLE(type,name) \ | 138 | #define MODULE_DEVICE_TABLE(type,name) \ |
139 | MODULE_GENERIC_TABLE(type##_device,name) | 139 | MODULE_GENERIC_TABLE(type##_device,name) |
140 | 140 | ||
141 | /* Version of form [<epoch>:]<version>[-<extra-version>]. | 141 | /* Version of form [<epoch>:]<version>[-<extra-version>]. |
142 | Or for CVS/RCS ID version, everything but the number is stripped. | 142 | Or for CVS/RCS ID version, everything but the number is stripped. |
143 | <epoch>: A (small) unsigned integer which allows you to start versions | 143 | <epoch>: A (small) unsigned integer which allows you to start versions |
144 | anew. If not mentioned, it's zero. eg. "2:1.0" is after | 144 | anew. If not mentioned, it's zero. eg. "2:1.0" is after |
145 | "1:2.0". | 145 | "1:2.0". |
146 | <version>: The <version> may contain only alphanumerics and the | 146 | <version>: The <version> may contain only alphanumerics and the |
147 | character `.'. Ordered by numeric sort for numeric parts, | 147 | character `.'. Ordered by numeric sort for numeric parts, |
148 | ascii sort for ascii parts (as per RPM or DEB algorithm). | 148 | ascii sort for ascii parts (as per RPM or DEB algorithm). |
149 | <extraversion>: Like <version>, but inserted for local | 149 | <extraversion>: Like <version>, but inserted for local |
150 | customizations, eg "rh3" or "rusty1". | 150 | customizations, eg "rh3" or "rusty1". |
151 | 151 | ||
152 | Using this automatically adds a checksum of the .c files and the | 152 | Using this automatically adds a checksum of the .c files and the |
153 | local headers in "srcversion". | 153 | local headers in "srcversion". |
154 | */ | 154 | */ |
155 | #define MODULE_VERSION(_version) MODULE_INFO(version, _version) | 155 | #define MODULE_VERSION(_version) MODULE_INFO(version, _version) |
156 | 156 | ||
157 | /* Optional firmware file (or files) needed by the module | 157 | /* Optional firmware file (or files) needed by the module |
158 | * format is simply firmware file name. Multiple firmware | 158 | * format is simply firmware file name. Multiple firmware |
159 | * files require multiple MODULE_FIRMWARE() specifiers */ | 159 | * files require multiple MODULE_FIRMWARE() specifiers */ |
160 | #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) | 160 | #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) |
161 | 161 | ||
162 | /* Given an address, look for it in the exception tables */ | 162 | /* Given an address, look for it in the exception tables */ |
163 | const struct exception_table_entry *search_exception_tables(unsigned long add); | 163 | const struct exception_table_entry *search_exception_tables(unsigned long add); |
164 | 164 | ||
165 | struct notifier_block; | 165 | struct notifier_block; |
166 | 166 | ||
167 | #ifdef CONFIG_MODULES | 167 | #ifdef CONFIG_MODULES |
168 | 168 | ||
169 | /* Get/put a kernel symbol (calls must be symmetric) */ | 169 | /* Get/put a kernel symbol (calls must be symmetric) */ |
170 | void *__symbol_get(const char *symbol); | 170 | void *__symbol_get(const char *symbol); |
171 | void *__symbol_get_gpl(const char *symbol); | 171 | void *__symbol_get_gpl(const char *symbol); |
172 | #define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) | 172 | #define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) |
173 | 173 | ||
174 | #ifndef __GENKSYMS__ | 174 | #ifndef __GENKSYMS__ |
175 | #ifdef CONFIG_MODVERSIONS | 175 | #ifdef CONFIG_MODVERSIONS |
176 | /* Mark the CRC weak since genksyms apparently decides not to | 176 | /* Mark the CRC weak since genksyms apparently decides not to |
177 | * generate a checksums for some symbols */ | 177 | * generate a checksums for some symbols */ |
178 | #define __CRC_SYMBOL(sym, sec) \ | 178 | #define __CRC_SYMBOL(sym, sec) \ |
179 | extern void *__crc_##sym __attribute__((weak)); \ | 179 | extern void *__crc_##sym __attribute__((weak)); \ |
180 | static const unsigned long __kcrctab_##sym \ | 180 | static const unsigned long __kcrctab_##sym \ |
181 | __attribute_used__ \ | 181 | __attribute_used__ \ |
182 | __attribute__((section("__kcrctab" sec), unused)) \ | 182 | __attribute__((section("__kcrctab" sec), unused)) \ |
183 | = (unsigned long) &__crc_##sym; | 183 | = (unsigned long) &__crc_##sym; |
184 | #else | 184 | #else |
185 | #define __CRC_SYMBOL(sym, sec) | 185 | #define __CRC_SYMBOL(sym, sec) |
186 | #endif | 186 | #endif |
187 | 187 | ||
188 | /* For every exported symbol, place a struct in the __ksymtab section */ | 188 | /* For every exported symbol, place a struct in the __ksymtab section */ |
189 | #define __EXPORT_SYMBOL(sym, sec) \ | 189 | #define __EXPORT_SYMBOL(sym, sec) \ |
190 | extern typeof(sym) sym; \ | 190 | extern typeof(sym) sym; \ |
191 | __CRC_SYMBOL(sym, sec) \ | 191 | __CRC_SYMBOL(sym, sec) \ |
192 | static const char __kstrtab_##sym[] \ | 192 | static const char __kstrtab_##sym[] \ |
193 | __attribute__((section("__ksymtab_strings"))) \ | 193 | __attribute__((section("__ksymtab_strings"))) \ |
194 | = MODULE_SYMBOL_PREFIX #sym; \ | 194 | = MODULE_SYMBOL_PREFIX #sym; \ |
195 | static const struct kernel_symbol __ksymtab_##sym \ | 195 | static const struct kernel_symbol __ksymtab_##sym \ |
196 | __attribute_used__ \ | 196 | __attribute_used__ \ |
197 | __attribute__((section("__ksymtab" sec), unused)) \ | 197 | __attribute__((section("__ksymtab" sec), unused)) \ |
198 | = { (unsigned long)&sym, __kstrtab_##sym } | 198 | = { (unsigned long)&sym, __kstrtab_##sym } |
199 | 199 | ||
200 | #define EXPORT_SYMBOL(sym) \ | 200 | #define EXPORT_SYMBOL(sym) \ |
201 | __EXPORT_SYMBOL(sym, "") | 201 | __EXPORT_SYMBOL(sym, "") |
202 | 202 | ||
203 | #define EXPORT_SYMBOL_GPL(sym) \ | 203 | #define EXPORT_SYMBOL_GPL(sym) \ |
204 | __EXPORT_SYMBOL(sym, "_gpl") | 204 | __EXPORT_SYMBOL(sym, "_gpl") |
205 | 205 | ||
206 | #define EXPORT_SYMBOL_GPL_FUTURE(sym) \ | 206 | #define EXPORT_SYMBOL_GPL_FUTURE(sym) \ |
207 | __EXPORT_SYMBOL(sym, "_gpl_future") | 207 | __EXPORT_SYMBOL(sym, "_gpl_future") |
208 | 208 | ||
209 | 209 | ||
210 | #ifdef CONFIG_UNUSED_SYMBOLS | 210 | #ifdef CONFIG_UNUSED_SYMBOLS |
211 | #define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused") | 211 | #define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused") |
212 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl") | 212 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl") |
213 | #else | 213 | #else |
214 | #define EXPORT_UNUSED_SYMBOL(sym) | 214 | #define EXPORT_UNUSED_SYMBOL(sym) |
215 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) | 215 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) |
216 | #endif | 216 | #endif |
217 | 217 | ||
218 | #endif | 218 | #endif |
219 | 219 | ||
220 | struct module_ref | 220 | struct module_ref |
221 | { | 221 | { |
222 | local_t count; | 222 | local_t count; |
223 | } ____cacheline_aligned; | 223 | } ____cacheline_aligned; |
224 | 224 | ||
225 | enum module_state | 225 | enum module_state |
226 | { | 226 | { |
227 | MODULE_STATE_LIVE, | 227 | MODULE_STATE_LIVE, |
228 | MODULE_STATE_COMING, | 228 | MODULE_STATE_COMING, |
229 | MODULE_STATE_GOING, | 229 | MODULE_STATE_GOING, |
230 | }; | 230 | }; |
231 | 231 | ||
232 | /* Similar stuff for section attributes. */ | 232 | /* Similar stuff for section attributes. */ |
233 | struct module_sect_attr | 233 | struct module_sect_attr |
234 | { | 234 | { |
235 | struct module_attribute mattr; | 235 | struct module_attribute mattr; |
236 | char *name; | 236 | char *name; |
237 | unsigned long address; | 237 | unsigned long address; |
238 | }; | 238 | }; |
239 | 239 | ||
240 | struct module_sect_attrs | 240 | struct module_sect_attrs |
241 | { | 241 | { |
242 | struct attribute_group grp; | 242 | struct attribute_group grp; |
243 | int nsections; | 243 | int nsections; |
244 | struct module_sect_attr attrs[0]; | 244 | struct module_sect_attr attrs[0]; |
245 | }; | 245 | }; |
246 | 246 | ||
247 | struct module_param_attrs; | 247 | struct module_param_attrs; |
248 | 248 | ||
249 | struct module | 249 | struct module |
250 | { | 250 | { |
251 | enum module_state state; | 251 | enum module_state state; |
252 | 252 | ||
253 | /* Member of list of modules */ | 253 | /* Member of list of modules */ |
254 | struct list_head list; | 254 | struct list_head list; |
255 | 255 | ||
256 | /* Unique handle for this module */ | 256 | /* Unique handle for this module */ |
257 | char name[MODULE_NAME_LEN]; | 257 | char name[MODULE_NAME_LEN]; |
258 | 258 | ||
259 | /* Sysfs stuff. */ | 259 | /* Sysfs stuff. */ |
260 | struct module_kobject mkobj; | 260 | struct module_kobject mkobj; |
261 | struct module_param_attrs *param_attrs; | 261 | struct module_param_attrs *param_attrs; |
262 | struct module_attribute *modinfo_attrs; | 262 | struct module_attribute *modinfo_attrs; |
263 | const char *version; | 263 | const char *version; |
264 | const char *srcversion; | 264 | const char *srcversion; |
265 | struct kobject *holders_dir; | 265 | struct kobject *holders_dir; |
266 | 266 | ||
267 | /* Exported symbols */ | 267 | /* Exported symbols */ |
268 | const struct kernel_symbol *syms; | 268 | const struct kernel_symbol *syms; |
269 | unsigned int num_syms; | 269 | unsigned int num_syms; |
270 | const unsigned long *crcs; | 270 | const unsigned long *crcs; |
271 | 271 | ||
272 | /* GPL-only exported symbols. */ | 272 | /* GPL-only exported symbols. */ |
273 | const struct kernel_symbol *gpl_syms; | 273 | const struct kernel_symbol *gpl_syms; |
274 | unsigned int num_gpl_syms; | 274 | unsigned int num_gpl_syms; |
275 | const unsigned long *gpl_crcs; | 275 | const unsigned long *gpl_crcs; |
276 | 276 | ||
277 | /* unused exported symbols. */ | 277 | /* unused exported symbols. */ |
278 | const struct kernel_symbol *unused_syms; | 278 | const struct kernel_symbol *unused_syms; |
279 | unsigned int num_unused_syms; | 279 | unsigned int num_unused_syms; |
280 | const unsigned long *unused_crcs; | 280 | const unsigned long *unused_crcs; |
281 | /* GPL-only, unused exported symbols. */ | 281 | /* GPL-only, unused exported symbols. */ |
282 | const struct kernel_symbol *unused_gpl_syms; | 282 | const struct kernel_symbol *unused_gpl_syms; |
283 | unsigned int num_unused_gpl_syms; | 283 | unsigned int num_unused_gpl_syms; |
284 | const unsigned long *unused_gpl_crcs; | 284 | const unsigned long *unused_gpl_crcs; |
285 | 285 | ||
286 | /* symbols that will be GPL-only in the near future. */ | 286 | /* symbols that will be GPL-only in the near future. */ |
287 | const struct kernel_symbol *gpl_future_syms; | 287 | const struct kernel_symbol *gpl_future_syms; |
288 | unsigned int num_gpl_future_syms; | 288 | unsigned int num_gpl_future_syms; |
289 | const unsigned long *gpl_future_crcs; | 289 | const unsigned long *gpl_future_crcs; |
290 | 290 | ||
291 | /* Exception table */ | 291 | /* Exception table */ |
292 | unsigned int num_exentries; | 292 | unsigned int num_exentries; |
293 | const struct exception_table_entry *extable; | 293 | const struct exception_table_entry *extable; |
294 | 294 | ||
295 | /* Startup function. */ | 295 | /* Startup function. */ |
296 | int (*init)(void); | 296 | int (*init)(void); |
297 | 297 | ||
298 | /* If this is non-NULL, vfree after init() returns */ | 298 | /* If this is non-NULL, vfree after init() returns */ |
299 | void *module_init; | 299 | void *module_init; |
300 | 300 | ||
301 | /* Here is the actual code + data, vfree'd on unload. */ | 301 | /* Here is the actual code + data, vfree'd on unload. */ |
302 | void *module_core; | 302 | void *module_core; |
303 | 303 | ||
304 | /* Here are the sizes of the init and core sections */ | 304 | /* Here are the sizes of the init and core sections */ |
305 | unsigned long init_size, core_size; | 305 | unsigned long init_size, core_size; |
306 | 306 | ||
307 | /* The size of the executable code in each section. */ | 307 | /* The size of the executable code in each section. */ |
308 | unsigned long init_text_size, core_text_size; | 308 | unsigned long init_text_size, core_text_size; |
309 | 309 | ||
310 | /* The handle returned from unwind_add_table. */ | 310 | /* The handle returned from unwind_add_table. */ |
311 | void *unwind_info; | 311 | void *unwind_info; |
312 | 312 | ||
313 | /* Arch-specific module values */ | 313 | /* Arch-specific module values */ |
314 | struct mod_arch_specific arch; | 314 | struct mod_arch_specific arch; |
315 | 315 | ||
316 | /* Am I unsafe to unload? */ | 316 | /* Am I unsafe to unload? */ |
317 | int unsafe; | 317 | int unsafe; |
318 | 318 | ||
319 | unsigned int taints; /* same bits as kernel:tainted */ | 319 | unsigned int taints; /* same bits as kernel:tainted */ |
320 | 320 | ||
321 | #ifdef CONFIG_GENERIC_BUG | 321 | #ifdef CONFIG_GENERIC_BUG |
322 | /* Support for BUG */ | 322 | /* Support for BUG */ |
323 | struct list_head bug_list; | 323 | struct list_head bug_list; |
324 | struct bug_entry *bug_table; | 324 | struct bug_entry *bug_table; |
325 | unsigned num_bugs; | 325 | unsigned num_bugs; |
326 | #endif | 326 | #endif |
327 | 327 | ||
328 | #ifdef CONFIG_MODULE_UNLOAD | 328 | #ifdef CONFIG_MODULE_UNLOAD |
329 | /* Reference counts */ | 329 | /* Reference counts */ |
330 | struct module_ref ref[NR_CPUS]; | 330 | struct module_ref ref[NR_CPUS]; |
331 | 331 | ||
332 | /* What modules depend on me? */ | 332 | /* What modules depend on me? */ |
333 | struct list_head modules_which_use_me; | 333 | struct list_head modules_which_use_me; |
334 | 334 | ||
335 | /* Who is waiting for us to be unloaded */ | 335 | /* Who is waiting for us to be unloaded */ |
336 | struct task_struct *waiter; | 336 | struct task_struct *waiter; |
337 | 337 | ||
338 | /* Destruction function. */ | 338 | /* Destruction function. */ |
339 | void (*exit)(void); | 339 | void (*exit)(void); |
340 | #endif | 340 | #endif |
341 | 341 | ||
342 | #ifdef CONFIG_KALLSYMS | 342 | #ifdef CONFIG_KALLSYMS |
343 | /* We keep the symbol and string tables for kallsyms. */ | 343 | /* We keep the symbol and string tables for kallsyms. */ |
344 | Elf_Sym *symtab; | 344 | Elf_Sym *symtab; |
345 | unsigned long num_symtab; | 345 | unsigned long num_symtab; |
346 | char *strtab; | 346 | char *strtab; |
347 | 347 | ||
348 | /* Section attributes */ | 348 | /* Section attributes */ |
349 | struct module_sect_attrs *sect_attrs; | 349 | struct module_sect_attrs *sect_attrs; |
350 | #endif | 350 | #endif |
351 | 351 | ||
352 | /* Per-cpu data. */ | 352 | /* Per-cpu data. */ |
353 | void *percpu; | 353 | void *percpu; |
354 | 354 | ||
355 | /* The command line arguments (may be mangled). People like | 355 | /* The command line arguments (may be mangled). People like |
356 | keeping pointers to this stuff */ | 356 | keeping pointers to this stuff */ |
357 | char *args; | 357 | char *args; |
358 | }; | 358 | }; |
359 | 359 | ||
360 | /* FIXME: It'd be nice to isolate modules during init, too, so they | 360 | /* FIXME: It'd be nice to isolate modules during init, too, so they |
361 | aren't used before they (may) fail. But presently too much code | 361 | aren't used before they (may) fail. But presently too much code |
362 | (IDE & SCSI) require entry into the module during init.*/ | 362 | (IDE & SCSI) require entry into the module during init.*/ |
363 | static inline int module_is_live(struct module *mod) | 363 | static inline int module_is_live(struct module *mod) |
364 | { | 364 | { |
365 | return mod->state != MODULE_STATE_GOING; | 365 | return mod->state != MODULE_STATE_GOING; |
366 | } | 366 | } |
367 | 367 | ||
368 | /* Is this address in a module? (second is with no locks, for oops) */ | 368 | /* Is this address in a module? (second is with no locks, for oops) */ |
369 | struct module *module_text_address(unsigned long addr); | 369 | struct module *module_text_address(unsigned long addr); |
370 | struct module *__module_text_address(unsigned long addr); | 370 | struct module *__module_text_address(unsigned long addr); |
371 | int is_module_address(unsigned long addr); | 371 | int is_module_address(unsigned long addr); |
372 | 372 | ||
373 | /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if | 373 | /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if |
374 | symnum out of range. */ | 374 | symnum out of range. */ |
375 | int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | 375 | int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, |
376 | char *name, char *module_name, int *exported); | 376 | char *name, char *module_name, int *exported); |
377 | 377 | ||
378 | /* Look for this name: can be of form module:name. */ | 378 | /* Look for this name: can be of form module:name. */ |
379 | unsigned long module_kallsyms_lookup_name(const char *name); | 379 | unsigned long module_kallsyms_lookup_name(const char *name); |
380 | 380 | ||
381 | extern void __module_put_and_exit(struct module *mod, long code) | 381 | extern void __module_put_and_exit(struct module *mod, long code) |
382 | __attribute__((noreturn)); | 382 | __attribute__((noreturn)); |
383 | #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code); | 383 | #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code); |
384 | 384 | ||
385 | #ifdef CONFIG_MODULE_UNLOAD | 385 | #ifdef CONFIG_MODULE_UNLOAD |
386 | unsigned int module_refcount(struct module *mod); | 386 | unsigned int module_refcount(struct module *mod); |
387 | void __symbol_put(const char *symbol); | 387 | void __symbol_put(const char *symbol); |
388 | #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) | 388 | #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) |
389 | void symbol_put_addr(void *addr); | 389 | void symbol_put_addr(void *addr); |
390 | 390 | ||
391 | /* Sometimes we know we already have a refcount, and it's easier not | 391 | /* Sometimes we know we already have a refcount, and it's easier not |
392 | to handle the error case (which only happens with rmmod --wait). */ | 392 | to handle the error case (which only happens with rmmod --wait). */ |
393 | static inline void __module_get(struct module *module) | 393 | static inline void __module_get(struct module *module) |
394 | { | 394 | { |
395 | if (module) { | 395 | if (module) { |
396 | BUG_ON(module_refcount(module) == 0); | 396 | BUG_ON(module_refcount(module) == 0); |
397 | local_inc(&module->ref[get_cpu()].count); | 397 | local_inc(&module->ref[get_cpu()].count); |
398 | put_cpu(); | 398 | put_cpu(); |
399 | } | 399 | } |
400 | } | 400 | } |
401 | 401 | ||
402 | static inline int try_module_get(struct module *module) | 402 | static inline int try_module_get(struct module *module) |
403 | { | 403 | { |
404 | int ret = 1; | 404 | int ret = 1; |
405 | 405 | ||
406 | if (module) { | 406 | if (module) { |
407 | unsigned int cpu = get_cpu(); | 407 | unsigned int cpu = get_cpu(); |
408 | if (likely(module_is_live(module))) | 408 | if (likely(module_is_live(module))) |
409 | local_inc(&module->ref[cpu].count); | 409 | local_inc(&module->ref[cpu].count); |
410 | else | 410 | else |
411 | ret = 0; | 411 | ret = 0; |
412 | put_cpu(); | 412 | put_cpu(); |
413 | } | 413 | } |
414 | return ret; | 414 | return ret; |
415 | } | 415 | } |
416 | 416 | ||
417 | extern void module_put(struct module *module); | 417 | extern void module_put(struct module *module); |
418 | 418 | ||
419 | #else /*!CONFIG_MODULE_UNLOAD*/ | 419 | #else /*!CONFIG_MODULE_UNLOAD*/ |
420 | static inline int try_module_get(struct module *module) | 420 | static inline int try_module_get(struct module *module) |
421 | { | 421 | { |
422 | return !module || module_is_live(module); | 422 | return !module || module_is_live(module); |
423 | } | 423 | } |
424 | static inline void module_put(struct module *module) | 424 | static inline void module_put(struct module *module) |
425 | { | 425 | { |
426 | } | 426 | } |
427 | static inline void __module_get(struct module *module) | 427 | static inline void __module_get(struct module *module) |
428 | { | 428 | { |
429 | } | 429 | } |
430 | #define symbol_put(x) do { } while(0) | 430 | #define symbol_put(x) do { } while(0) |
431 | #define symbol_put_addr(p) do { } while(0) | 431 | #define symbol_put_addr(p) do { } while(0) |
432 | 432 | ||
433 | #endif /* CONFIG_MODULE_UNLOAD */ | 433 | #endif /* CONFIG_MODULE_UNLOAD */ |
434 | 434 | ||
435 | /* This is a #define so the string doesn't get put in every .o file */ | 435 | /* This is a #define so the string doesn't get put in every .o file */ |
436 | #define module_name(mod) \ | 436 | #define module_name(mod) \ |
437 | ({ \ | 437 | ({ \ |
438 | struct module *__mod = (mod); \ | 438 | struct module *__mod = (mod); \ |
439 | __mod ? __mod->name : "kernel"; \ | 439 | __mod ? __mod->name : "kernel"; \ |
440 | }) | 440 | }) |
441 | 441 | ||
442 | #define __unsafe(mod) \ | 442 | #define __unsafe(mod) \ |
443 | do { \ | 443 | do { \ |
444 | if (mod && !(mod)->unsafe) { \ | 444 | if (mod && !(mod)->unsafe) { \ |
445 | printk(KERN_WARNING \ | 445 | printk(KERN_WARNING \ |
446 | "Module %s cannot be unloaded due to unsafe usage in" \ | 446 | "Module %s cannot be unloaded due to unsafe usage in" \ |
447 | " %s:%u\n", (mod)->name, __FILE__, __LINE__); \ | 447 | " %s:%u\n", (mod)->name, __FILE__, __LINE__); \ |
448 | (mod)->unsafe = 1; \ | 448 | (mod)->unsafe = 1; \ |
449 | } \ | 449 | } \ |
450 | } while(0) | 450 | } while(0) |
451 | 451 | ||
452 | /* For kallsyms to ask for address resolution. NULL means not found. */ | 452 | /* For kallsyms to ask for address resolution. NULL means not found. */ |
453 | const char *module_address_lookup(unsigned long addr, | 453 | const char *module_address_lookup(unsigned long addr, |
454 | unsigned long *symbolsize, | 454 | unsigned long *symbolsize, |
455 | unsigned long *offset, | 455 | unsigned long *offset, |
456 | char **modname); | 456 | char **modname); |
457 | int lookup_module_symbol_name(unsigned long addr, char *symname); | 457 | int lookup_module_symbol_name(unsigned long addr, char *symname); |
458 | int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); | ||
458 | 459 | ||
459 | /* For extable.c to search modules' exception tables. */ | 460 | /* For extable.c to search modules' exception tables. */ |
460 | const struct exception_table_entry *search_module_extables(unsigned long addr); | 461 | const struct exception_table_entry *search_module_extables(unsigned long addr); |
461 | 462 | ||
462 | int register_module_notifier(struct notifier_block * nb); | 463 | int register_module_notifier(struct notifier_block * nb); |
463 | int unregister_module_notifier(struct notifier_block * nb); | 464 | int unregister_module_notifier(struct notifier_block * nb); |
464 | 465 | ||
465 | extern void print_modules(void); | 466 | extern void print_modules(void); |
466 | 467 | ||
467 | #else /* !CONFIG_MODULES... */ | 468 | #else /* !CONFIG_MODULES... */ |
468 | #define EXPORT_SYMBOL(sym) | 469 | #define EXPORT_SYMBOL(sym) |
469 | #define EXPORT_SYMBOL_GPL(sym) | 470 | #define EXPORT_SYMBOL_GPL(sym) |
470 | #define EXPORT_SYMBOL_GPL_FUTURE(sym) | 471 | #define EXPORT_SYMBOL_GPL_FUTURE(sym) |
471 | #define EXPORT_UNUSED_SYMBOL(sym) | 472 | #define EXPORT_UNUSED_SYMBOL(sym) |
472 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) | 473 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) |
473 | 474 | ||
474 | /* Given an address, look for it in the exception tables. */ | 475 | /* Given an address, look for it in the exception tables. */ |
475 | static inline const struct exception_table_entry * | 476 | static inline const struct exception_table_entry * |
476 | search_module_extables(unsigned long addr) | 477 | search_module_extables(unsigned long addr) |
477 | { | 478 | { |
478 | return NULL; | 479 | return NULL; |
479 | } | 480 | } |
480 | 481 | ||
481 | /* Is this address in a module? */ | 482 | /* Is this address in a module? */ |
482 | static inline struct module *module_text_address(unsigned long addr) | 483 | static inline struct module *module_text_address(unsigned long addr) |
483 | { | 484 | { |
484 | return NULL; | 485 | return NULL; |
485 | } | 486 | } |
486 | 487 | ||
487 | /* Is this address in a module? (don't take a lock, we're oopsing) */ | 488 | /* Is this address in a module? (don't take a lock, we're oopsing) */ |
488 | static inline struct module *__module_text_address(unsigned long addr) | 489 | static inline struct module *__module_text_address(unsigned long addr) |
489 | { | 490 | { |
490 | return NULL; | 491 | return NULL; |
491 | } | 492 | } |
492 | 493 | ||
493 | static inline int is_module_address(unsigned long addr) | 494 | static inline int is_module_address(unsigned long addr) |
494 | { | 495 | { |
495 | return 0; | 496 | return 0; |
496 | } | 497 | } |
497 | 498 | ||
498 | /* Get/put a kernel symbol (calls should be symmetric) */ | 499 | /* Get/put a kernel symbol (calls should be symmetric) */ |
499 | #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) | 500 | #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) |
500 | #define symbol_put(x) do { } while(0) | 501 | #define symbol_put(x) do { } while(0) |
501 | #define symbol_put_addr(x) do { } while(0) | 502 | #define symbol_put_addr(x) do { } while(0) |
502 | 503 | ||
503 | static inline void __module_get(struct module *module) | 504 | static inline void __module_get(struct module *module) |
504 | { | 505 | { |
505 | } | 506 | } |
506 | 507 | ||
507 | static inline int try_module_get(struct module *module) | 508 | static inline int try_module_get(struct module *module) |
508 | { | 509 | { |
509 | return 1; | 510 | return 1; |
510 | } | 511 | } |
511 | 512 | ||
512 | static inline void module_put(struct module *module) | 513 | static inline void module_put(struct module *module) |
513 | { | 514 | { |
514 | } | 515 | } |
515 | 516 | ||
516 | #define module_name(mod) "kernel" | 517 | #define module_name(mod) "kernel" |
517 | 518 | ||
518 | #define __unsafe(mod) | 519 | #define __unsafe(mod) |
519 | 520 | ||
520 | /* For kallsyms to ask for address resolution. NULL means not found. */ | 521 | /* For kallsyms to ask for address resolution. NULL means not found. */ |
521 | static inline const char *module_address_lookup(unsigned long addr, | 522 | static inline const char *module_address_lookup(unsigned long addr, |
522 | unsigned long *symbolsize, | 523 | unsigned long *symbolsize, |
523 | unsigned long *offset, | 524 | unsigned long *offset, |
524 | char **modname) | 525 | char **modname) |
525 | { | 526 | { |
526 | return NULL; | 527 | return NULL; |
527 | } | 528 | } |
528 | 529 | ||
529 | static inline int lookup_module_symbol_name(unsigned long addr, char *symname) | 530 | static inline int lookup_module_symbol_name(unsigned long addr, char *symname) |
531 | { | ||
532 | return -ERANGE; | ||
533 | } | ||
534 | |||
535 | static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) | ||
530 | { | 536 | { |
531 | return -ERANGE; | 537 | return -ERANGE; |
532 | } | 538 | } |
533 | 539 | ||
534 | static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, | 540 | static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, |
535 | char *type, char *name, | 541 | char *type, char *name, |
536 | char *module_name, int *exported) | 542 | char *module_name, int *exported) |
537 | { | 543 | { |
538 | return -ERANGE; | 544 | return -ERANGE; |
539 | } | 545 | } |
540 | 546 | ||
541 | static inline unsigned long module_kallsyms_lookup_name(const char *name) | 547 | static inline unsigned long module_kallsyms_lookup_name(const char *name) |
542 | { | 548 | { |
543 | return 0; | 549 | return 0; |
544 | } | 550 | } |
545 | 551 | ||
546 | static inline int register_module_notifier(struct notifier_block * nb) | 552 | static inline int register_module_notifier(struct notifier_block * nb) |
547 | { | 553 | { |
548 | /* no events will happen anyway, so this can always succeed */ | 554 | /* no events will happen anyway, so this can always succeed */ |
549 | return 0; | 555 | return 0; |
550 | } | 556 | } |
551 | 557 | ||
552 | static inline int unregister_module_notifier(struct notifier_block * nb) | 558 | static inline int unregister_module_notifier(struct notifier_block * nb) |
553 | { | 559 | { |
554 | return 0; | 560 | return 0; |
555 | } | 561 | } |
556 | 562 | ||
557 | #define module_put_and_exit(code) do_exit(code) | 563 | #define module_put_and_exit(code) do_exit(code) |
558 | 564 | ||
559 | static inline void print_modules(void) | 565 | static inline void print_modules(void) |
560 | { | 566 | { |
561 | } | 567 | } |
562 | 568 | ||
563 | #endif /* CONFIG_MODULES */ | 569 | #endif /* CONFIG_MODULES */ |
564 | 570 | ||
565 | struct device_driver; | 571 | struct device_driver; |
566 | #ifdef CONFIG_SYSFS | 572 | #ifdef CONFIG_SYSFS |
567 | struct module; | 573 | struct module; |
568 | 574 | ||
569 | extern struct kset module_subsys; | 575 | extern struct kset module_subsys; |
570 | 576 | ||
571 | int mod_sysfs_init(struct module *mod); | 577 | int mod_sysfs_init(struct module *mod); |
572 | int mod_sysfs_setup(struct module *mod, | 578 | int mod_sysfs_setup(struct module *mod, |
573 | struct kernel_param *kparam, | 579 | struct kernel_param *kparam, |
574 | unsigned int num_params); | 580 | unsigned int num_params); |
575 | int module_add_modinfo_attrs(struct module *mod); | 581 | int module_add_modinfo_attrs(struct module *mod); |
576 | void module_remove_modinfo_attrs(struct module *mod); | 582 | void module_remove_modinfo_attrs(struct module *mod); |
577 | 583 | ||
578 | #else /* !CONFIG_SYSFS */ | 584 | #else /* !CONFIG_SYSFS */ |
579 | 585 | ||
580 | static inline int mod_sysfs_init(struct module *mod) | 586 | static inline int mod_sysfs_init(struct module *mod) |
581 | { | 587 | { |
582 | return 0; | 588 | return 0; |
583 | } | 589 | } |
584 | 590 | ||
585 | static inline int mod_sysfs_setup(struct module *mod, | 591 | static inline int mod_sysfs_setup(struct module *mod, |
586 | struct kernel_param *kparam, | 592 | struct kernel_param *kparam, |
587 | unsigned int num_params) | 593 | unsigned int num_params) |
588 | { | 594 | { |
589 | return 0; | 595 | return 0; |
590 | } | 596 | } |
591 | 597 | ||
592 | static inline int module_add_modinfo_attrs(struct module *mod) | 598 | static inline int module_add_modinfo_attrs(struct module *mod) |
593 | { | 599 | { |
594 | return 0; | 600 | return 0; |
595 | } | 601 | } |
596 | 602 | ||
597 | static inline void module_remove_modinfo_attrs(struct module *mod) | 603 | static inline void module_remove_modinfo_attrs(struct module *mod) |
598 | { } | 604 | { } |
599 | 605 | ||
600 | #endif /* CONFIG_SYSFS */ | 606 | #endif /* CONFIG_SYSFS */ |
601 | 607 | ||
602 | #if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES) | 608 | #if defined(CONFIG_SYSFS) && defined(CONFIG_MODULES) |
603 | 609 | ||
604 | void module_add_driver(struct module *mod, struct device_driver *drv); | 610 | void module_add_driver(struct module *mod, struct device_driver *drv); |
605 | void module_remove_driver(struct device_driver *drv); | 611 | void module_remove_driver(struct device_driver *drv); |
606 | 612 | ||
607 | #else /* not both CONFIG_SYSFS && CONFIG_MODULES */ | 613 | #else /* not both CONFIG_SYSFS && CONFIG_MODULES */ |
608 | 614 | ||
609 | static inline void module_add_driver(struct module *mod, struct device_driver *drv) | 615 | static inline void module_add_driver(struct module *mod, struct device_driver *drv) |
610 | { } | 616 | { } |
611 | 617 | ||
612 | static inline void module_remove_driver(struct device_driver *drv) | 618 | static inline void module_remove_driver(struct device_driver *drv) |
613 | { } | 619 | { } |
614 | 620 | ||
615 | #endif | 621 | #endif |
616 | 622 | ||
617 | #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) | 623 | #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) |
618 | 624 | ||
619 | /* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */ | 625 | /* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */ |
620 | 626 | ||
621 | #define __MODULE_STRING(x) __stringify(x) | 627 | #define __MODULE_STRING(x) __stringify(x) |
622 | 628 | ||
623 | #endif /* _LINUX_MODULE_H */ | 629 | #endif /* _LINUX_MODULE_H */ |
624 | 630 |
kernel/kallsyms.c
1 | /* | 1 | /* |
2 | * kallsyms.c: in-kernel printing of symbolic oopses and stack traces. | 2 | * kallsyms.c: in-kernel printing of symbolic oopses and stack traces. |
3 | * | 3 | * |
4 | * Rewritten and vastly simplified by Rusty Russell for in-kernel | 4 | * Rewritten and vastly simplified by Rusty Russell for in-kernel |
5 | * module loader: | 5 | * module loader: |
6 | * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation | 6 | * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation |
7 | * | 7 | * |
8 | * ChangeLog: | 8 | * ChangeLog: |
9 | * | 9 | * |
10 | * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com> | 10 | * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com> |
11 | * Changed the compression method from stem compression to "table lookup" | 11 | * Changed the compression method from stem compression to "table lookup" |
12 | * compression (see scripts/kallsyms.c for a more complete description) | 12 | * compression (see scripts/kallsyms.c for a more complete description) |
13 | */ | 13 | */ |
14 | #include <linux/kallsyms.h> | 14 | #include <linux/kallsyms.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/seq_file.h> | 17 | #include <linux/seq_file.h> |
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/err.h> | 19 | #include <linux/err.h> |
20 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
21 | #include <linux/sched.h> /* for cond_resched */ | 21 | #include <linux/sched.h> /* for cond_resched */ |
22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
23 | #include <linux/ctype.h> | 23 | #include <linux/ctype.h> |
24 | 24 | ||
25 | #include <asm/sections.h> | 25 | #include <asm/sections.h> |
26 | 26 | ||
27 | #ifdef CONFIG_KALLSYMS_ALL | 27 | #ifdef CONFIG_KALLSYMS_ALL |
28 | #define all_var 1 | 28 | #define all_var 1 |
29 | #else | 29 | #else |
30 | #define all_var 0 | 30 | #define all_var 0 |
31 | #endif | 31 | #endif |
32 | 32 | ||
33 | /* These will be re-linked against their real values during the second link stage */ | 33 | /* These will be re-linked against their real values during the second link stage */ |
34 | extern const unsigned long kallsyms_addresses[] __attribute__((weak)); | 34 | extern const unsigned long kallsyms_addresses[] __attribute__((weak)); |
35 | extern const unsigned long kallsyms_num_syms __attribute__((weak)); | 35 | extern const unsigned long kallsyms_num_syms __attribute__((weak)); |
36 | extern const u8 kallsyms_names[] __attribute__((weak)); | 36 | extern const u8 kallsyms_names[] __attribute__((weak)); |
37 | 37 | ||
38 | extern const u8 kallsyms_token_table[] __attribute__((weak)); | 38 | extern const u8 kallsyms_token_table[] __attribute__((weak)); |
39 | extern const u16 kallsyms_token_index[] __attribute__((weak)); | 39 | extern const u16 kallsyms_token_index[] __attribute__((weak)); |
40 | 40 | ||
41 | extern const unsigned long kallsyms_markers[] __attribute__((weak)); | 41 | extern const unsigned long kallsyms_markers[] __attribute__((weak)); |
42 | 42 | ||
43 | static inline int is_kernel_inittext(unsigned long addr) | 43 | static inline int is_kernel_inittext(unsigned long addr) |
44 | { | 44 | { |
45 | if (addr >= (unsigned long)_sinittext | 45 | if (addr >= (unsigned long)_sinittext |
46 | && addr <= (unsigned long)_einittext) | 46 | && addr <= (unsigned long)_einittext) |
47 | return 1; | 47 | return 1; |
48 | return 0; | 48 | return 0; |
49 | } | 49 | } |
50 | 50 | ||
51 | static inline int is_kernel_extratext(unsigned long addr) | 51 | static inline int is_kernel_extratext(unsigned long addr) |
52 | { | 52 | { |
53 | if (addr >= (unsigned long)_sextratext | 53 | if (addr >= (unsigned long)_sextratext |
54 | && addr <= (unsigned long)_eextratext) | 54 | && addr <= (unsigned long)_eextratext) |
55 | return 1; | 55 | return 1; |
56 | return 0; | 56 | return 0; |
57 | } | 57 | } |
58 | 58 | ||
59 | static inline int is_kernel_text(unsigned long addr) | 59 | static inline int is_kernel_text(unsigned long addr) |
60 | { | 60 | { |
61 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) | 61 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) |
62 | return 1; | 62 | return 1; |
63 | return in_gate_area_no_task(addr); | 63 | return in_gate_area_no_task(addr); |
64 | } | 64 | } |
65 | 65 | ||
66 | static inline int is_kernel(unsigned long addr) | 66 | static inline int is_kernel(unsigned long addr) |
67 | { | 67 | { |
68 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) | 68 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) |
69 | return 1; | 69 | return 1; |
70 | return in_gate_area_no_task(addr); | 70 | return in_gate_area_no_task(addr); |
71 | } | 71 | } |
72 | 72 | ||
73 | static int is_ksym_addr(unsigned long addr) | 73 | static int is_ksym_addr(unsigned long addr) |
74 | { | 74 | { |
75 | if (all_var) | 75 | if (all_var) |
76 | return is_kernel(addr); | 76 | return is_kernel(addr); |
77 | 77 | ||
78 | return is_kernel_text(addr) || is_kernel_inittext(addr) || | 78 | return is_kernel_text(addr) || is_kernel_inittext(addr) || |
79 | is_kernel_extratext(addr); | 79 | is_kernel_extratext(addr); |
80 | } | 80 | } |
81 | 81 | ||
82 | /* expand a compressed symbol data into the resulting uncompressed string, | 82 | /* expand a compressed symbol data into the resulting uncompressed string, |
83 | given the offset to where the symbol is in the compressed stream */ | 83 | given the offset to where the symbol is in the compressed stream */ |
84 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) | 84 | static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) |
85 | { | 85 | { |
86 | int len, skipped_first = 0; | 86 | int len, skipped_first = 0; |
87 | const u8 *tptr, *data; | 87 | const u8 *tptr, *data; |
88 | 88 | ||
89 | /* get the compressed symbol length from the first symbol byte */ | 89 | /* get the compressed symbol length from the first symbol byte */ |
90 | data = &kallsyms_names[off]; | 90 | data = &kallsyms_names[off]; |
91 | len = *data; | 91 | len = *data; |
92 | data++; | 92 | data++; |
93 | 93 | ||
94 | /* update the offset to return the offset for the next symbol on | 94 | /* update the offset to return the offset for the next symbol on |
95 | * the compressed stream */ | 95 | * the compressed stream */ |
96 | off += len + 1; | 96 | off += len + 1; |
97 | 97 | ||
98 | /* for every byte on the compressed symbol data, copy the table | 98 | /* for every byte on the compressed symbol data, copy the table |
99 | entry for that byte */ | 99 | entry for that byte */ |
100 | while(len) { | 100 | while(len) { |
101 | tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ]; | 101 | tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ]; |
102 | data++; | 102 | data++; |
103 | len--; | 103 | len--; |
104 | 104 | ||
105 | while (*tptr) { | 105 | while (*tptr) { |
106 | if(skipped_first) { | 106 | if(skipped_first) { |
107 | *result = *tptr; | 107 | *result = *tptr; |
108 | result++; | 108 | result++; |
109 | } else | 109 | } else |
110 | skipped_first = 1; | 110 | skipped_first = 1; |
111 | tptr++; | 111 | tptr++; |
112 | } | 112 | } |
113 | } | 113 | } |
114 | 114 | ||
115 | *result = '\0'; | 115 | *result = '\0'; |
116 | 116 | ||
117 | /* return to offset to the next symbol */ | 117 | /* return to offset to the next symbol */ |
118 | return off; | 118 | return off; |
119 | } | 119 | } |
120 | 120 | ||
121 | /* get symbol type information. This is encoded as a single char at the | 121 | /* get symbol type information. This is encoded as a single char at the |
122 | * begining of the symbol name */ | 122 | * begining of the symbol name */ |
123 | static char kallsyms_get_symbol_type(unsigned int off) | 123 | static char kallsyms_get_symbol_type(unsigned int off) |
124 | { | 124 | { |
125 | /* get just the first code, look it up in the token table, and return the | 125 | /* get just the first code, look it up in the token table, and return the |
126 | * first char from this token */ | 126 | * first char from this token */ |
127 | return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ]; | 127 | return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ]; |
128 | } | 128 | } |
129 | 129 | ||
130 | 130 | ||
131 | /* find the offset on the compressed stream given and index in the | 131 | /* find the offset on the compressed stream given and index in the |
132 | * kallsyms array */ | 132 | * kallsyms array */ |
133 | static unsigned int get_symbol_offset(unsigned long pos) | 133 | static unsigned int get_symbol_offset(unsigned long pos) |
134 | { | 134 | { |
135 | const u8 *name; | 135 | const u8 *name; |
136 | int i; | 136 | int i; |
137 | 137 | ||
138 | /* use the closest marker we have. We have markers every 256 positions, | 138 | /* use the closest marker we have. We have markers every 256 positions, |
139 | * so that should be close enough */ | 139 | * so that should be close enough */ |
140 | name = &kallsyms_names[ kallsyms_markers[pos>>8] ]; | 140 | name = &kallsyms_names[ kallsyms_markers[pos>>8] ]; |
141 | 141 | ||
142 | /* sequentially scan all the symbols up to the point we're searching for. | 142 | /* sequentially scan all the symbols up to the point we're searching for. |
143 | * Every symbol is stored in a [<len>][<len> bytes of data] format, so we | 143 | * Every symbol is stored in a [<len>][<len> bytes of data] format, so we |
144 | * just need to add the len to the current pointer for every symbol we | 144 | * just need to add the len to the current pointer for every symbol we |
145 | * wish to skip */ | 145 | * wish to skip */ |
146 | for(i = 0; i < (pos&0xFF); i++) | 146 | for(i = 0; i < (pos&0xFF); i++) |
147 | name = name + (*name) + 1; | 147 | name = name + (*name) + 1; |
148 | 148 | ||
149 | return name - kallsyms_names; | 149 | return name - kallsyms_names; |
150 | } | 150 | } |
151 | 151 | ||
152 | /* Lookup the address for this symbol. Returns 0 if not found. */ | 152 | /* Lookup the address for this symbol. Returns 0 if not found. */ |
153 | unsigned long kallsyms_lookup_name(const char *name) | 153 | unsigned long kallsyms_lookup_name(const char *name) |
154 | { | 154 | { |
155 | char namebuf[KSYM_NAME_LEN+1]; | 155 | char namebuf[KSYM_NAME_LEN+1]; |
156 | unsigned long i; | 156 | unsigned long i; |
157 | unsigned int off; | 157 | unsigned int off; |
158 | 158 | ||
159 | for (i = 0, off = 0; i < kallsyms_num_syms; i++) { | 159 | for (i = 0, off = 0; i < kallsyms_num_syms; i++) { |
160 | off = kallsyms_expand_symbol(off, namebuf); | 160 | off = kallsyms_expand_symbol(off, namebuf); |
161 | 161 | ||
162 | if (strcmp(namebuf, name) == 0) | 162 | if (strcmp(namebuf, name) == 0) |
163 | return kallsyms_addresses[i]; | 163 | return kallsyms_addresses[i]; |
164 | } | 164 | } |
165 | return module_kallsyms_lookup_name(name); | 165 | return module_kallsyms_lookup_name(name); |
166 | } | 166 | } |
167 | 167 | ||
168 | static unsigned long get_symbol_pos(unsigned long addr, | 168 | static unsigned long get_symbol_pos(unsigned long addr, |
169 | unsigned long *symbolsize, | 169 | unsigned long *symbolsize, |
170 | unsigned long *offset) | 170 | unsigned long *offset) |
171 | { | 171 | { |
172 | unsigned long symbol_start = 0, symbol_end = 0; | 172 | unsigned long symbol_start = 0, symbol_end = 0; |
173 | unsigned long i, low, high, mid; | 173 | unsigned long i, low, high, mid; |
174 | 174 | ||
175 | /* This kernel should never had been booted. */ | 175 | /* This kernel should never had been booted. */ |
176 | BUG_ON(!kallsyms_addresses); | 176 | BUG_ON(!kallsyms_addresses); |
177 | 177 | ||
178 | /* do a binary search on the sorted kallsyms_addresses array */ | 178 | /* do a binary search on the sorted kallsyms_addresses array */ |
179 | low = 0; | 179 | low = 0; |
180 | high = kallsyms_num_syms; | 180 | high = kallsyms_num_syms; |
181 | 181 | ||
182 | while (high - low > 1) { | 182 | while (high - low > 1) { |
183 | mid = (low + high) / 2; | 183 | mid = (low + high) / 2; |
184 | if (kallsyms_addresses[mid] <= addr) | 184 | if (kallsyms_addresses[mid] <= addr) |
185 | low = mid; | 185 | low = mid; |
186 | else | 186 | else |
187 | high = mid; | 187 | high = mid; |
188 | } | 188 | } |
189 | 189 | ||
190 | /* | 190 | /* |
191 | * search for the first aliased symbol. Aliased | 191 | * search for the first aliased symbol. Aliased |
192 | * symbols are symbols with the same address | 192 | * symbols are symbols with the same address |
193 | */ | 193 | */ |
194 | while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) | 194 | while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low]) |
195 | --low; | 195 | --low; |
196 | 196 | ||
197 | symbol_start = kallsyms_addresses[low]; | 197 | symbol_start = kallsyms_addresses[low]; |
198 | 198 | ||
199 | /* Search for next non-aliased symbol */ | 199 | /* Search for next non-aliased symbol */ |
200 | for (i = low + 1; i < kallsyms_num_syms; i++) { | 200 | for (i = low + 1; i < kallsyms_num_syms; i++) { |
201 | if (kallsyms_addresses[i] > symbol_start) { | 201 | if (kallsyms_addresses[i] > symbol_start) { |
202 | symbol_end = kallsyms_addresses[i]; | 202 | symbol_end = kallsyms_addresses[i]; |
203 | break; | 203 | break; |
204 | } | 204 | } |
205 | } | 205 | } |
206 | 206 | ||
207 | /* if we found no next symbol, we use the end of the section */ | 207 | /* if we found no next symbol, we use the end of the section */ |
208 | if (!symbol_end) { | 208 | if (!symbol_end) { |
209 | if (is_kernel_inittext(addr)) | 209 | if (is_kernel_inittext(addr)) |
210 | symbol_end = (unsigned long)_einittext; | 210 | symbol_end = (unsigned long)_einittext; |
211 | else if (all_var) | 211 | else if (all_var) |
212 | symbol_end = (unsigned long)_end; | 212 | symbol_end = (unsigned long)_end; |
213 | else | 213 | else |
214 | symbol_end = (unsigned long)_etext; | 214 | symbol_end = (unsigned long)_etext; |
215 | } | 215 | } |
216 | 216 | ||
217 | if (symbolsize) | 217 | if (symbolsize) |
218 | *symbolsize = symbol_end - symbol_start; | 218 | *symbolsize = symbol_end - symbol_start; |
219 | if (offset) | 219 | if (offset) |
220 | *offset = addr - symbol_start; | 220 | *offset = addr - symbol_start; |
221 | 221 | ||
222 | return low; | 222 | return low; |
223 | } | 223 | } |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Lookup an address but don't bother to find any names. | 226 | * Lookup an address but don't bother to find any names. |
227 | */ | 227 | */ |
228 | int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, | 228 | int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, |
229 | unsigned long *offset) | 229 | unsigned long *offset) |
230 | { | 230 | { |
231 | if (is_ksym_addr(addr)) | 231 | if (is_ksym_addr(addr)) |
232 | return !!get_symbol_pos(addr, symbolsize, offset); | 232 | return !!get_symbol_pos(addr, symbolsize, offset); |
233 | 233 | ||
234 | return !!module_address_lookup(addr, symbolsize, offset, NULL); | 234 | return !!module_address_lookup(addr, symbolsize, offset, NULL); |
235 | } | 235 | } |
236 | 236 | ||
237 | /* | 237 | /* |
238 | * Lookup an address | 238 | * Lookup an address |
239 | * - modname is set to NULL if it's in the kernel | 239 | * - modname is set to NULL if it's in the kernel |
240 | * - we guarantee that the returned name is valid until we reschedule even if | 240 | * - we guarantee that the returned name is valid until we reschedule even if |
241 | * it resides in a module | 241 | * it resides in a module |
242 | * - we also guarantee that modname will be valid until rescheduled | 242 | * - we also guarantee that modname will be valid until rescheduled |
243 | */ | 243 | */ |
244 | const char *kallsyms_lookup(unsigned long addr, | 244 | const char *kallsyms_lookup(unsigned long addr, |
245 | unsigned long *symbolsize, | 245 | unsigned long *symbolsize, |
246 | unsigned long *offset, | 246 | unsigned long *offset, |
247 | char **modname, char *namebuf) | 247 | char **modname, char *namebuf) |
248 | { | 248 | { |
249 | const char *msym; | 249 | const char *msym; |
250 | 250 | ||
251 | namebuf[KSYM_NAME_LEN] = 0; | 251 | namebuf[KSYM_NAME_LEN] = 0; |
252 | namebuf[0] = 0; | 252 | namebuf[0] = 0; |
253 | 253 | ||
254 | if (is_ksym_addr(addr)) { | 254 | if (is_ksym_addr(addr)) { |
255 | unsigned long pos; | 255 | unsigned long pos; |
256 | 256 | ||
257 | pos = get_symbol_pos(addr, symbolsize, offset); | 257 | pos = get_symbol_pos(addr, symbolsize, offset); |
258 | /* Grab name */ | 258 | /* Grab name */ |
259 | kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); | 259 | kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); |
260 | *modname = NULL; | 260 | *modname = NULL; |
261 | return namebuf; | 261 | return namebuf; |
262 | } | 262 | } |
263 | 263 | ||
264 | /* see if it's in a module */ | 264 | /* see if it's in a module */ |
265 | msym = module_address_lookup(addr, symbolsize, offset, modname); | 265 | msym = module_address_lookup(addr, symbolsize, offset, modname); |
266 | if (msym) | 266 | if (msym) |
267 | return strncpy(namebuf, msym, KSYM_NAME_LEN); | 267 | return strncpy(namebuf, msym, KSYM_NAME_LEN); |
268 | 268 | ||
269 | return NULL; | 269 | return NULL; |
270 | } | 270 | } |
271 | 271 | ||
272 | int lookup_symbol_name(unsigned long addr, char *symname) | 272 | int lookup_symbol_name(unsigned long addr, char *symname) |
273 | { | 273 | { |
274 | symname[0] = '\0'; | 274 | symname[0] = '\0'; |
275 | symname[KSYM_NAME_LEN] = '\0'; | 275 | symname[KSYM_NAME_LEN] = '\0'; |
276 | 276 | ||
277 | if (is_ksym_addr(addr)) { | 277 | if (is_ksym_addr(addr)) { |
278 | unsigned long pos; | 278 | unsigned long pos; |
279 | 279 | ||
280 | pos = get_symbol_pos(addr, NULL, NULL); | 280 | pos = get_symbol_pos(addr, NULL, NULL); |
281 | /* Grab name */ | 281 | /* Grab name */ |
282 | kallsyms_expand_symbol(get_symbol_offset(pos), symname); | 282 | kallsyms_expand_symbol(get_symbol_offset(pos), symname); |
283 | return 0; | 283 | return 0; |
284 | } | 284 | } |
285 | /* see if it's in a module */ | 285 | /* see if it's in a module */ |
286 | return lookup_module_symbol_name(addr, symname); | 286 | return lookup_module_symbol_name(addr, symname); |
287 | } | 287 | } |
288 | 288 | ||
289 | int lookup_symbol_attrs(unsigned long addr, unsigned long *size, | ||
290 | unsigned long *offset, char *modname, char *name) | ||
291 | { | ||
292 | name[0] = '\0'; | ||
293 | name[KSYM_NAME_LEN] = '\0'; | ||
294 | |||
295 | if (is_ksym_addr(addr)) { | ||
296 | unsigned long pos; | ||
297 | |||
298 | pos = get_symbol_pos(addr, size, offset); | ||
299 | /* Grab name */ | ||
300 | kallsyms_expand_symbol(get_symbol_offset(pos), name); | ||
301 | modname[0] = '\0'; | ||
302 | return 0; | ||
303 | } | ||
304 | /* see if it's in a module */ | ||
305 | return lookup_module_symbol_attrs(addr, size, offset, modname, name); | ||
306 | } | ||
307 | |||
289 | /* Look up a kernel symbol and return it in a text buffer. */ | 308 | /* Look up a kernel symbol and return it in a text buffer. */ |
290 | int sprint_symbol(char *buffer, unsigned long address) | 309 | int sprint_symbol(char *buffer, unsigned long address) |
291 | { | 310 | { |
292 | char *modname; | 311 | char *modname; |
293 | const char *name; | 312 | const char *name; |
294 | unsigned long offset, size; | 313 | unsigned long offset, size; |
295 | char namebuf[KSYM_NAME_LEN+1]; | 314 | char namebuf[KSYM_NAME_LEN+1]; |
296 | 315 | ||
297 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | 316 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); |
298 | if (!name) | 317 | if (!name) |
299 | return sprintf(buffer, "0x%lx", address); | 318 | return sprintf(buffer, "0x%lx", address); |
300 | else { | 319 | else { |
301 | if (modname) | 320 | if (modname) |
302 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, | 321 | return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, |
303 | size, modname); | 322 | size, modname); |
304 | else | 323 | else |
305 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); | 324 | return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); |
306 | } | 325 | } |
307 | } | 326 | } |
308 | 327 | ||
309 | /* Look up a kernel symbol and print it to the kernel messages. */ | 328 | /* Look up a kernel symbol and print it to the kernel messages. */ |
310 | void __print_symbol(const char *fmt, unsigned long address) | 329 | void __print_symbol(const char *fmt, unsigned long address) |
311 | { | 330 | { |
312 | char buffer[KSYM_SYMBOL_LEN]; | 331 | char buffer[KSYM_SYMBOL_LEN]; |
313 | 332 | ||
314 | sprint_symbol(buffer, address); | 333 | sprint_symbol(buffer, address); |
315 | 334 | ||
316 | printk(fmt, buffer); | 335 | printk(fmt, buffer); |
317 | } | 336 | } |
318 | 337 | ||
319 | /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ | 338 | /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ |
320 | struct kallsym_iter | 339 | struct kallsym_iter |
321 | { | 340 | { |
322 | loff_t pos; | 341 | loff_t pos; |
323 | unsigned long value; | 342 | unsigned long value; |
324 | unsigned int nameoff; /* If iterating in core kernel symbols */ | 343 | unsigned int nameoff; /* If iterating in core kernel symbols */ |
325 | char type; | 344 | char type; |
326 | char name[KSYM_NAME_LEN+1]; | 345 | char name[KSYM_NAME_LEN+1]; |
327 | char module_name[MODULE_NAME_LEN + 1]; | 346 | char module_name[MODULE_NAME_LEN + 1]; |
328 | int exported; | 347 | int exported; |
329 | }; | 348 | }; |
330 | 349 | ||
331 | static int get_ksymbol_mod(struct kallsym_iter *iter) | 350 | static int get_ksymbol_mod(struct kallsym_iter *iter) |
332 | { | 351 | { |
333 | if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value, | 352 | if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value, |
334 | &iter->type, iter->name, iter->module_name, | 353 | &iter->type, iter->name, iter->module_name, |
335 | &iter->exported) < 0) | 354 | &iter->exported) < 0) |
336 | return 0; | 355 | return 0; |
337 | return 1; | 356 | return 1; |
338 | } | 357 | } |
339 | 358 | ||
340 | /* Returns space to next name. */ | 359 | /* Returns space to next name. */ |
341 | static unsigned long get_ksymbol_core(struct kallsym_iter *iter) | 360 | static unsigned long get_ksymbol_core(struct kallsym_iter *iter) |
342 | { | 361 | { |
343 | unsigned off = iter->nameoff; | 362 | unsigned off = iter->nameoff; |
344 | 363 | ||
345 | iter->module_name[0] = '\0'; | 364 | iter->module_name[0] = '\0'; |
346 | iter->value = kallsyms_addresses[iter->pos]; | 365 | iter->value = kallsyms_addresses[iter->pos]; |
347 | 366 | ||
348 | iter->type = kallsyms_get_symbol_type(off); | 367 | iter->type = kallsyms_get_symbol_type(off); |
349 | 368 | ||
350 | off = kallsyms_expand_symbol(off, iter->name); | 369 | off = kallsyms_expand_symbol(off, iter->name); |
351 | 370 | ||
352 | return off - iter->nameoff; | 371 | return off - iter->nameoff; |
353 | } | 372 | } |
354 | 373 | ||
355 | static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) | 374 | static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) |
356 | { | 375 | { |
357 | iter->name[0] = '\0'; | 376 | iter->name[0] = '\0'; |
358 | iter->nameoff = get_symbol_offset(new_pos); | 377 | iter->nameoff = get_symbol_offset(new_pos); |
359 | iter->pos = new_pos; | 378 | iter->pos = new_pos; |
360 | } | 379 | } |
361 | 380 | ||
362 | /* Returns false if pos at or past end of file. */ | 381 | /* Returns false if pos at or past end of file. */ |
363 | static int update_iter(struct kallsym_iter *iter, loff_t pos) | 382 | static int update_iter(struct kallsym_iter *iter, loff_t pos) |
364 | { | 383 | { |
365 | /* Module symbols can be accessed randomly. */ | 384 | /* Module symbols can be accessed randomly. */ |
366 | if (pos >= kallsyms_num_syms) { | 385 | if (pos >= kallsyms_num_syms) { |
367 | iter->pos = pos; | 386 | iter->pos = pos; |
368 | return get_ksymbol_mod(iter); | 387 | return get_ksymbol_mod(iter); |
369 | } | 388 | } |
370 | 389 | ||
371 | /* If we're not on the desired position, reset to new position. */ | 390 | /* If we're not on the desired position, reset to new position. */ |
372 | if (pos != iter->pos) | 391 | if (pos != iter->pos) |
373 | reset_iter(iter, pos); | 392 | reset_iter(iter, pos); |
374 | 393 | ||
375 | iter->nameoff += get_ksymbol_core(iter); | 394 | iter->nameoff += get_ksymbol_core(iter); |
376 | iter->pos++; | 395 | iter->pos++; |
377 | 396 | ||
378 | return 1; | 397 | return 1; |
379 | } | 398 | } |
380 | 399 | ||
381 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) | 400 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) |
382 | { | 401 | { |
383 | (*pos)++; | 402 | (*pos)++; |
384 | 403 | ||
385 | if (!update_iter(m->private, *pos)) | 404 | if (!update_iter(m->private, *pos)) |
386 | return NULL; | 405 | return NULL; |
387 | return p; | 406 | return p; |
388 | } | 407 | } |
389 | 408 | ||
390 | static void *s_start(struct seq_file *m, loff_t *pos) | 409 | static void *s_start(struct seq_file *m, loff_t *pos) |
391 | { | 410 | { |
392 | if (!update_iter(m->private, *pos)) | 411 | if (!update_iter(m->private, *pos)) |
393 | return NULL; | 412 | return NULL; |
394 | return m->private; | 413 | return m->private; |
395 | } | 414 | } |
396 | 415 | ||
397 | static void s_stop(struct seq_file *m, void *p) | 416 | static void s_stop(struct seq_file *m, void *p) |
398 | { | 417 | { |
399 | } | 418 | } |
400 | 419 | ||
401 | static int s_show(struct seq_file *m, void *p) | 420 | static int s_show(struct seq_file *m, void *p) |
402 | { | 421 | { |
403 | struct kallsym_iter *iter = m->private; | 422 | struct kallsym_iter *iter = m->private; |
404 | 423 | ||
405 | /* Some debugging symbols have no name. Ignore them. */ | 424 | /* Some debugging symbols have no name. Ignore them. */ |
406 | if (!iter->name[0]) | 425 | if (!iter->name[0]) |
407 | return 0; | 426 | return 0; |
408 | 427 | ||
409 | if (iter->module_name[0]) { | 428 | if (iter->module_name[0]) { |
410 | char type; | 429 | char type; |
411 | 430 | ||
412 | /* Label it "global" if it is exported, | 431 | /* Label it "global" if it is exported, |
413 | * "local" if not exported. */ | 432 | * "local" if not exported. */ |
414 | type = iter->exported ? toupper(iter->type) : | 433 | type = iter->exported ? toupper(iter->type) : |
415 | tolower(iter->type); | 434 | tolower(iter->type); |
416 | seq_printf(m, "%0*lx %c %s\t[%s]\n", | 435 | seq_printf(m, "%0*lx %c %s\t[%s]\n", |
417 | (int)(2*sizeof(void*)), | 436 | (int)(2*sizeof(void*)), |
418 | iter->value, type, iter->name, iter->module_name); | 437 | iter->value, type, iter->name, iter->module_name); |
419 | } else | 438 | } else |
420 | seq_printf(m, "%0*lx %c %s\n", | 439 | seq_printf(m, "%0*lx %c %s\n", |
421 | (int)(2*sizeof(void*)), | 440 | (int)(2*sizeof(void*)), |
422 | iter->value, iter->type, iter->name); | 441 | iter->value, iter->type, iter->name); |
423 | return 0; | 442 | return 0; |
424 | } | 443 | } |
425 | 444 | ||
426 | static const struct seq_operations kallsyms_op = { | 445 | static const struct seq_operations kallsyms_op = { |
427 | .start = s_start, | 446 | .start = s_start, |
428 | .next = s_next, | 447 | .next = s_next, |
429 | .stop = s_stop, | 448 | .stop = s_stop, |
430 | .show = s_show | 449 | .show = s_show |
431 | }; | 450 | }; |
432 | 451 | ||
433 | static int kallsyms_open(struct inode *inode, struct file *file) | 452 | static int kallsyms_open(struct inode *inode, struct file *file) |
434 | { | 453 | { |
435 | /* We keep iterator in m->private, since normal case is to | 454 | /* We keep iterator in m->private, since normal case is to |
436 | * s_start from where we left off, so we avoid doing | 455 | * s_start from where we left off, so we avoid doing |
437 | * using get_symbol_offset for every symbol */ | 456 | * using get_symbol_offset for every symbol */ |
438 | struct kallsym_iter *iter; | 457 | struct kallsym_iter *iter; |
439 | int ret; | 458 | int ret; |
440 | 459 | ||
441 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); | 460 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); |
442 | if (!iter) | 461 | if (!iter) |
443 | return -ENOMEM; | 462 | return -ENOMEM; |
444 | reset_iter(iter, 0); | 463 | reset_iter(iter, 0); |
445 | 464 | ||
446 | ret = seq_open(file, &kallsyms_op); | 465 | ret = seq_open(file, &kallsyms_op); |
447 | if (ret == 0) | 466 | if (ret == 0) |
448 | ((struct seq_file *)file->private_data)->private = iter; | 467 | ((struct seq_file *)file->private_data)->private = iter; |
449 | else | 468 | else |
450 | kfree(iter); | 469 | kfree(iter); |
451 | return ret; | 470 | return ret; |
452 | } | 471 | } |
453 | 472 | ||
454 | static int kallsyms_release(struct inode *inode, struct file *file) | 473 | static int kallsyms_release(struct inode *inode, struct file *file) |
455 | { | 474 | { |
456 | struct seq_file *m = (struct seq_file *)file->private_data; | 475 | struct seq_file *m = (struct seq_file *)file->private_data; |
457 | kfree(m->private); | 476 | kfree(m->private); |
458 | return seq_release(inode, file); | 477 | return seq_release(inode, file); |
459 | } | 478 | } |
460 | 479 | ||
461 | static const struct file_operations kallsyms_operations = { | 480 | static const struct file_operations kallsyms_operations = { |
462 | .open = kallsyms_open, | 481 | .open = kallsyms_open, |
463 | .read = seq_read, | 482 | .read = seq_read, |
464 | .llseek = seq_lseek, | 483 | .llseek = seq_lseek, |
465 | .release = kallsyms_release, | 484 | .release = kallsyms_release, |
466 | }; | 485 | }; |
467 | 486 | ||
468 | static int __init kallsyms_init(void) | 487 | static int __init kallsyms_init(void) |
469 | { | 488 | { |
470 | struct proc_dir_entry *entry; | 489 | struct proc_dir_entry *entry; |
471 | 490 | ||
472 | entry = create_proc_entry("kallsyms", 0444, NULL); | 491 | entry = create_proc_entry("kallsyms", 0444, NULL); |
473 | if (entry) | 492 | if (entry) |
474 | entry->proc_fops = &kallsyms_operations; | 493 | entry->proc_fops = &kallsyms_operations; |
475 | return 0; | 494 | return 0; |
476 | } | 495 | } |
477 | __initcall(kallsyms_init); | 496 | __initcall(kallsyms_init); |
478 | 497 | ||
479 | EXPORT_SYMBOL(__print_symbol); | 498 | EXPORT_SYMBOL(__print_symbol); |
480 | EXPORT_SYMBOL_GPL(sprint_symbol); | 499 | EXPORT_SYMBOL_GPL(sprint_symbol); |
481 | 500 |
kernel/module.c
1 | /* | 1 | /* |
2 | Copyright (C) 2002 Richard Henderson | 2 | Copyright (C) 2002 Richard Henderson |
3 | Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. | 3 | Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. |
4 | 4 | ||
5 | This program is free software; you can redistribute it and/or modify | 5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by | 6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; either version 2 of the License, or | 7 | the Free Software Foundation; either version 2 of the License, or |
8 | (at your option) any later version. | 8 | (at your option) any later version. |
9 | 9 | ||
10 | This program is distributed in the hope that it will be useful, | 10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. | 13 | GNU General Public License for more details. |
14 | 14 | ||
15 | You should have received a copy of the GNU General Public License | 15 | You should have received a copy of the GNU General Public License |
16 | along with this program; if not, write to the Free Software | 16 | along with this program; if not, write to the Free Software |
17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/moduleloader.h> | 20 | #include <linux/moduleloader.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/kallsyms.h> | 22 | #include <linux/kallsyms.h> |
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/vmalloc.h> | 25 | #include <linux/vmalloc.h> |
26 | #include <linux/elf.h> | 26 | #include <linux/elf.h> |
27 | #include <linux/seq_file.h> | 27 | #include <linux/seq_file.h> |
28 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
29 | #include <linux/fcntl.h> | 29 | #include <linux/fcntl.h> |
30 | #include <linux/rcupdate.h> | 30 | #include <linux/rcupdate.h> |
31 | #include <linux/capability.h> | 31 | #include <linux/capability.h> |
32 | #include <linux/cpu.h> | 32 | #include <linux/cpu.h> |
33 | #include <linux/moduleparam.h> | 33 | #include <linux/moduleparam.h> |
34 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
35 | #include <linux/err.h> | 35 | #include <linux/err.h> |
36 | #include <linux/vermagic.h> | 36 | #include <linux/vermagic.h> |
37 | #include <linux/notifier.h> | 37 | #include <linux/notifier.h> |
38 | #include <linux/sched.h> | 38 | #include <linux/sched.h> |
39 | #include <linux/stop_machine.h> | 39 | #include <linux/stop_machine.h> |
40 | #include <linux/device.h> | 40 | #include <linux/device.h> |
41 | #include <linux/string.h> | 41 | #include <linux/string.h> |
42 | #include <linux/mutex.h> | 42 | #include <linux/mutex.h> |
43 | #include <linux/unwind.h> | 43 | #include <linux/unwind.h> |
44 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
45 | #include <asm/semaphore.h> | 45 | #include <asm/semaphore.h> |
46 | #include <asm/cacheflush.h> | 46 | #include <asm/cacheflush.h> |
47 | #include <linux/license.h> | 47 | #include <linux/license.h> |
48 | 48 | ||
49 | extern int module_sysfs_initialized; | 49 | extern int module_sysfs_initialized; |
50 | 50 | ||
51 | #if 0 | 51 | #if 0 |
52 | #define DEBUGP printk | 52 | #define DEBUGP printk |
53 | #else | 53 | #else |
54 | #define DEBUGP(fmt , a...) | 54 | #define DEBUGP(fmt , a...) |
55 | #endif | 55 | #endif |
56 | 56 | ||
57 | #ifndef ARCH_SHF_SMALL | 57 | #ifndef ARCH_SHF_SMALL |
58 | #define ARCH_SHF_SMALL 0 | 58 | #define ARCH_SHF_SMALL 0 |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | /* If this is set, the section belongs in the init part of the module */ | 61 | /* If this is set, the section belongs in the init part of the module */ |
62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 62 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
63 | 63 | ||
64 | /* Protects module list */ | 64 | /* Protects module list */ |
65 | static DEFINE_SPINLOCK(modlist_lock); | 65 | static DEFINE_SPINLOCK(modlist_lock); |
66 | 66 | ||
67 | /* List of modules, protected by module_mutex AND modlist_lock */ | 67 | /* List of modules, protected by module_mutex AND modlist_lock */ |
68 | static DEFINE_MUTEX(module_mutex); | 68 | static DEFINE_MUTEX(module_mutex); |
69 | static LIST_HEAD(modules); | 69 | static LIST_HEAD(modules); |
70 | 70 | ||
71 | static BLOCKING_NOTIFIER_HEAD(module_notify_list); | 71 | static BLOCKING_NOTIFIER_HEAD(module_notify_list); |
72 | 72 | ||
73 | int register_module_notifier(struct notifier_block * nb) | 73 | int register_module_notifier(struct notifier_block * nb) |
74 | { | 74 | { |
75 | return blocking_notifier_chain_register(&module_notify_list, nb); | 75 | return blocking_notifier_chain_register(&module_notify_list, nb); |
76 | } | 76 | } |
77 | EXPORT_SYMBOL(register_module_notifier); | 77 | EXPORT_SYMBOL(register_module_notifier); |
78 | 78 | ||
79 | int unregister_module_notifier(struct notifier_block * nb) | 79 | int unregister_module_notifier(struct notifier_block * nb) |
80 | { | 80 | { |
81 | return blocking_notifier_chain_unregister(&module_notify_list, nb); | 81 | return blocking_notifier_chain_unregister(&module_notify_list, nb); |
82 | } | 82 | } |
83 | EXPORT_SYMBOL(unregister_module_notifier); | 83 | EXPORT_SYMBOL(unregister_module_notifier); |
84 | 84 | ||
85 | /* We require a truly strong try_module_get() */ | 85 | /* We require a truly strong try_module_get() */ |
86 | static inline int strong_try_module_get(struct module *mod) | 86 | static inline int strong_try_module_get(struct module *mod) |
87 | { | 87 | { |
88 | if (mod && mod->state == MODULE_STATE_COMING) | 88 | if (mod && mod->state == MODULE_STATE_COMING) |
89 | return 0; | 89 | return 0; |
90 | return try_module_get(mod); | 90 | return try_module_get(mod); |
91 | } | 91 | } |
92 | 92 | ||
93 | static inline void add_taint_module(struct module *mod, unsigned flag) | 93 | static inline void add_taint_module(struct module *mod, unsigned flag) |
94 | { | 94 | { |
95 | add_taint(flag); | 95 | add_taint(flag); |
96 | mod->taints |= flag; | 96 | mod->taints |= flag; |
97 | } | 97 | } |
98 | 98 | ||
99 | /* A thread that wants to hold a reference to a module only while it | 99 | /* A thread that wants to hold a reference to a module only while it |
100 | * is running can call ths to safely exit. | 100 | * is running can call ths to safely exit. |
101 | * nfsd and lockd use this. | 101 | * nfsd and lockd use this. |
102 | */ | 102 | */ |
103 | void __module_put_and_exit(struct module *mod, long code) | 103 | void __module_put_and_exit(struct module *mod, long code) |
104 | { | 104 | { |
105 | module_put(mod); | 105 | module_put(mod); |
106 | do_exit(code); | 106 | do_exit(code); |
107 | } | 107 | } |
108 | EXPORT_SYMBOL(__module_put_and_exit); | 108 | EXPORT_SYMBOL(__module_put_and_exit); |
109 | 109 | ||
110 | /* Find a module section: 0 means not found. */ | 110 | /* Find a module section: 0 means not found. */ |
111 | static unsigned int find_sec(Elf_Ehdr *hdr, | 111 | static unsigned int find_sec(Elf_Ehdr *hdr, |
112 | Elf_Shdr *sechdrs, | 112 | Elf_Shdr *sechdrs, |
113 | const char *secstrings, | 113 | const char *secstrings, |
114 | const char *name) | 114 | const char *name) |
115 | { | 115 | { |
116 | unsigned int i; | 116 | unsigned int i; |
117 | 117 | ||
118 | for (i = 1; i < hdr->e_shnum; i++) | 118 | for (i = 1; i < hdr->e_shnum; i++) |
119 | /* Alloc bit cleared means "ignore it." */ | 119 | /* Alloc bit cleared means "ignore it." */ |
120 | if ((sechdrs[i].sh_flags & SHF_ALLOC) | 120 | if ((sechdrs[i].sh_flags & SHF_ALLOC) |
121 | && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) | 121 | && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) |
122 | return i; | 122 | return i; |
123 | return 0; | 123 | return 0; |
124 | } | 124 | } |
125 | 125 | ||
126 | /* Provided by the linker */ | 126 | /* Provided by the linker */ |
127 | extern const struct kernel_symbol __start___ksymtab[]; | 127 | extern const struct kernel_symbol __start___ksymtab[]; |
128 | extern const struct kernel_symbol __stop___ksymtab[]; | 128 | extern const struct kernel_symbol __stop___ksymtab[]; |
129 | extern const struct kernel_symbol __start___ksymtab_gpl[]; | 129 | extern const struct kernel_symbol __start___ksymtab_gpl[]; |
130 | extern const struct kernel_symbol __stop___ksymtab_gpl[]; | 130 | extern const struct kernel_symbol __stop___ksymtab_gpl[]; |
131 | extern const struct kernel_symbol __start___ksymtab_gpl_future[]; | 131 | extern const struct kernel_symbol __start___ksymtab_gpl_future[]; |
132 | extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; | 132 | extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; |
133 | extern const struct kernel_symbol __start___ksymtab_unused[]; | 133 | extern const struct kernel_symbol __start___ksymtab_unused[]; |
134 | extern const struct kernel_symbol __stop___ksymtab_unused[]; | 134 | extern const struct kernel_symbol __stop___ksymtab_unused[]; |
135 | extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; | 135 | extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; |
136 | extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; | 136 | extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; |
137 | extern const struct kernel_symbol __start___ksymtab_gpl_future[]; | 137 | extern const struct kernel_symbol __start___ksymtab_gpl_future[]; |
138 | extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; | 138 | extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; |
139 | extern const unsigned long __start___kcrctab[]; | 139 | extern const unsigned long __start___kcrctab[]; |
140 | extern const unsigned long __start___kcrctab_gpl[]; | 140 | extern const unsigned long __start___kcrctab_gpl[]; |
141 | extern const unsigned long __start___kcrctab_gpl_future[]; | 141 | extern const unsigned long __start___kcrctab_gpl_future[]; |
142 | extern const unsigned long __start___kcrctab_unused[]; | 142 | extern const unsigned long __start___kcrctab_unused[]; |
143 | extern const unsigned long __start___kcrctab_unused_gpl[]; | 143 | extern const unsigned long __start___kcrctab_unused_gpl[]; |
144 | 144 | ||
145 | #ifndef CONFIG_MODVERSIONS | 145 | #ifndef CONFIG_MODVERSIONS |
146 | #define symversion(base, idx) NULL | 146 | #define symversion(base, idx) NULL |
147 | #else | 147 | #else |
148 | #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) | 148 | #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) |
149 | #endif | 149 | #endif |
150 | 150 | ||
151 | /* lookup symbol in given range of kernel_symbols */ | 151 | /* lookup symbol in given range of kernel_symbols */ |
152 | static const struct kernel_symbol *lookup_symbol(const char *name, | 152 | static const struct kernel_symbol *lookup_symbol(const char *name, |
153 | const struct kernel_symbol *start, | 153 | const struct kernel_symbol *start, |
154 | const struct kernel_symbol *stop) | 154 | const struct kernel_symbol *stop) |
155 | { | 155 | { |
156 | const struct kernel_symbol *ks = start; | 156 | const struct kernel_symbol *ks = start; |
157 | for (; ks < stop; ks++) | 157 | for (; ks < stop; ks++) |
158 | if (strcmp(ks->name, name) == 0) | 158 | if (strcmp(ks->name, name) == 0) |
159 | return ks; | 159 | return ks; |
160 | return NULL; | 160 | return NULL; |
161 | } | 161 | } |
162 | 162 | ||
163 | static void printk_unused_warning(const char *name) | 163 | static void printk_unused_warning(const char *name) |
164 | { | 164 | { |
165 | printk(KERN_WARNING "Symbol %s is marked as UNUSED, " | 165 | printk(KERN_WARNING "Symbol %s is marked as UNUSED, " |
166 | "however this module is using it.\n", name); | 166 | "however this module is using it.\n", name); |
167 | printk(KERN_WARNING "This symbol will go away in the future.\n"); | 167 | printk(KERN_WARNING "This symbol will go away in the future.\n"); |
168 | printk(KERN_WARNING "Please evalute if this is the right api to use, " | 168 | printk(KERN_WARNING "Please evalute if this is the right api to use, " |
169 | "and if it really is, submit a report the linux kernel " | 169 | "and if it really is, submit a report the linux kernel " |
170 | "mailinglist together with submitting your code for " | 170 | "mailinglist together with submitting your code for " |
171 | "inclusion.\n"); | 171 | "inclusion.\n"); |
172 | } | 172 | } |
173 | 173 | ||
174 | /* Find a symbol, return value, crc and module which owns it */ | 174 | /* Find a symbol, return value, crc and module which owns it */ |
175 | static unsigned long __find_symbol(const char *name, | 175 | static unsigned long __find_symbol(const char *name, |
176 | struct module **owner, | 176 | struct module **owner, |
177 | const unsigned long **crc, | 177 | const unsigned long **crc, |
178 | int gplok) | 178 | int gplok) |
179 | { | 179 | { |
180 | struct module *mod; | 180 | struct module *mod; |
181 | const struct kernel_symbol *ks; | 181 | const struct kernel_symbol *ks; |
182 | 182 | ||
183 | /* Core kernel first. */ | 183 | /* Core kernel first. */ |
184 | *owner = NULL; | 184 | *owner = NULL; |
185 | ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); | 185 | ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); |
186 | if (ks) { | 186 | if (ks) { |
187 | *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); | 187 | *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); |
188 | return ks->value; | 188 | return ks->value; |
189 | } | 189 | } |
190 | if (gplok) { | 190 | if (gplok) { |
191 | ks = lookup_symbol(name, __start___ksymtab_gpl, | 191 | ks = lookup_symbol(name, __start___ksymtab_gpl, |
192 | __stop___ksymtab_gpl); | 192 | __stop___ksymtab_gpl); |
193 | if (ks) { | 193 | if (ks) { |
194 | *crc = symversion(__start___kcrctab_gpl, | 194 | *crc = symversion(__start___kcrctab_gpl, |
195 | (ks - __start___ksymtab_gpl)); | 195 | (ks - __start___ksymtab_gpl)); |
196 | return ks->value; | 196 | return ks->value; |
197 | } | 197 | } |
198 | } | 198 | } |
199 | ks = lookup_symbol(name, __start___ksymtab_gpl_future, | 199 | ks = lookup_symbol(name, __start___ksymtab_gpl_future, |
200 | __stop___ksymtab_gpl_future); | 200 | __stop___ksymtab_gpl_future); |
201 | if (ks) { | 201 | if (ks) { |
202 | if (!gplok) { | 202 | if (!gplok) { |
203 | printk(KERN_WARNING "Symbol %s is being used " | 203 | printk(KERN_WARNING "Symbol %s is being used " |
204 | "by a non-GPL module, which will not " | 204 | "by a non-GPL module, which will not " |
205 | "be allowed in the future\n", name); | 205 | "be allowed in the future\n", name); |
206 | printk(KERN_WARNING "Please see the file " | 206 | printk(KERN_WARNING "Please see the file " |
207 | "Documentation/feature-removal-schedule.txt " | 207 | "Documentation/feature-removal-schedule.txt " |
208 | "in the kernel source tree for more " | 208 | "in the kernel source tree for more " |
209 | "details.\n"); | 209 | "details.\n"); |
210 | } | 210 | } |
211 | *crc = symversion(__start___kcrctab_gpl_future, | 211 | *crc = symversion(__start___kcrctab_gpl_future, |
212 | (ks - __start___ksymtab_gpl_future)); | 212 | (ks - __start___ksymtab_gpl_future)); |
213 | return ks->value; | 213 | return ks->value; |
214 | } | 214 | } |
215 | 215 | ||
216 | ks = lookup_symbol(name, __start___ksymtab_unused, | 216 | ks = lookup_symbol(name, __start___ksymtab_unused, |
217 | __stop___ksymtab_unused); | 217 | __stop___ksymtab_unused); |
218 | if (ks) { | 218 | if (ks) { |
219 | printk_unused_warning(name); | 219 | printk_unused_warning(name); |
220 | *crc = symversion(__start___kcrctab_unused, | 220 | *crc = symversion(__start___kcrctab_unused, |
221 | (ks - __start___ksymtab_unused)); | 221 | (ks - __start___ksymtab_unused)); |
222 | return ks->value; | 222 | return ks->value; |
223 | } | 223 | } |
224 | 224 | ||
225 | if (gplok) | 225 | if (gplok) |
226 | ks = lookup_symbol(name, __start___ksymtab_unused_gpl, | 226 | ks = lookup_symbol(name, __start___ksymtab_unused_gpl, |
227 | __stop___ksymtab_unused_gpl); | 227 | __stop___ksymtab_unused_gpl); |
228 | if (ks) { | 228 | if (ks) { |
229 | printk_unused_warning(name); | 229 | printk_unused_warning(name); |
230 | *crc = symversion(__start___kcrctab_unused_gpl, | 230 | *crc = symversion(__start___kcrctab_unused_gpl, |
231 | (ks - __start___ksymtab_unused_gpl)); | 231 | (ks - __start___ksymtab_unused_gpl)); |
232 | return ks->value; | 232 | return ks->value; |
233 | } | 233 | } |
234 | 234 | ||
235 | /* Now try modules. */ | 235 | /* Now try modules. */ |
236 | list_for_each_entry(mod, &modules, list) { | 236 | list_for_each_entry(mod, &modules, list) { |
237 | *owner = mod; | 237 | *owner = mod; |
238 | ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); | 238 | ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); |
239 | if (ks) { | 239 | if (ks) { |
240 | *crc = symversion(mod->crcs, (ks - mod->syms)); | 240 | *crc = symversion(mod->crcs, (ks - mod->syms)); |
241 | return ks->value; | 241 | return ks->value; |
242 | } | 242 | } |
243 | 243 | ||
244 | if (gplok) { | 244 | if (gplok) { |
245 | ks = lookup_symbol(name, mod->gpl_syms, | 245 | ks = lookup_symbol(name, mod->gpl_syms, |
246 | mod->gpl_syms + mod->num_gpl_syms); | 246 | mod->gpl_syms + mod->num_gpl_syms); |
247 | if (ks) { | 247 | if (ks) { |
248 | *crc = symversion(mod->gpl_crcs, | 248 | *crc = symversion(mod->gpl_crcs, |
249 | (ks - mod->gpl_syms)); | 249 | (ks - mod->gpl_syms)); |
250 | return ks->value; | 250 | return ks->value; |
251 | } | 251 | } |
252 | } | 252 | } |
253 | ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms); | 253 | ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms); |
254 | if (ks) { | 254 | if (ks) { |
255 | printk_unused_warning(name); | 255 | printk_unused_warning(name); |
256 | *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms)); | 256 | *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms)); |
257 | return ks->value; | 257 | return ks->value; |
258 | } | 258 | } |
259 | 259 | ||
260 | if (gplok) { | 260 | if (gplok) { |
261 | ks = lookup_symbol(name, mod->unused_gpl_syms, | 261 | ks = lookup_symbol(name, mod->unused_gpl_syms, |
262 | mod->unused_gpl_syms + mod->num_unused_gpl_syms); | 262 | mod->unused_gpl_syms + mod->num_unused_gpl_syms); |
263 | if (ks) { | 263 | if (ks) { |
264 | printk_unused_warning(name); | 264 | printk_unused_warning(name); |
265 | *crc = symversion(mod->unused_gpl_crcs, | 265 | *crc = symversion(mod->unused_gpl_crcs, |
266 | (ks - mod->unused_gpl_syms)); | 266 | (ks - mod->unused_gpl_syms)); |
267 | return ks->value; | 267 | return ks->value; |
268 | } | 268 | } |
269 | } | 269 | } |
270 | ks = lookup_symbol(name, mod->gpl_future_syms, | 270 | ks = lookup_symbol(name, mod->gpl_future_syms, |
271 | (mod->gpl_future_syms + | 271 | (mod->gpl_future_syms + |
272 | mod->num_gpl_future_syms)); | 272 | mod->num_gpl_future_syms)); |
273 | if (ks) { | 273 | if (ks) { |
274 | if (!gplok) { | 274 | if (!gplok) { |
275 | printk(KERN_WARNING "Symbol %s is being used " | 275 | printk(KERN_WARNING "Symbol %s is being used " |
276 | "by a non-GPL module, which will not " | 276 | "by a non-GPL module, which will not " |
277 | "be allowed in the future\n", name); | 277 | "be allowed in the future\n", name); |
278 | printk(KERN_WARNING "Please see the file " | 278 | printk(KERN_WARNING "Please see the file " |
279 | "Documentation/feature-removal-schedule.txt " | 279 | "Documentation/feature-removal-schedule.txt " |
280 | "in the kernel source tree for more " | 280 | "in the kernel source tree for more " |
281 | "details.\n"); | 281 | "details.\n"); |
282 | } | 282 | } |
283 | *crc = symversion(mod->gpl_future_crcs, | 283 | *crc = symversion(mod->gpl_future_crcs, |
284 | (ks - mod->gpl_future_syms)); | 284 | (ks - mod->gpl_future_syms)); |
285 | return ks->value; | 285 | return ks->value; |
286 | } | 286 | } |
287 | } | 287 | } |
288 | DEBUGP("Failed to find symbol %s\n", name); | 288 | DEBUGP("Failed to find symbol %s\n", name); |
289 | return 0; | 289 | return 0; |
290 | } | 290 | } |
291 | 291 | ||
292 | /* Search for module by name: must hold module_mutex. */ | 292 | /* Search for module by name: must hold module_mutex. */ |
293 | static struct module *find_module(const char *name) | 293 | static struct module *find_module(const char *name) |
294 | { | 294 | { |
295 | struct module *mod; | 295 | struct module *mod; |
296 | 296 | ||
297 | list_for_each_entry(mod, &modules, list) { | 297 | list_for_each_entry(mod, &modules, list) { |
298 | if (strcmp(mod->name, name) == 0) | 298 | if (strcmp(mod->name, name) == 0) |
299 | return mod; | 299 | return mod; |
300 | } | 300 | } |
301 | return NULL; | 301 | return NULL; |
302 | } | 302 | } |
303 | 303 | ||
304 | #ifdef CONFIG_SMP | 304 | #ifdef CONFIG_SMP |
305 | /* Number of blocks used and allocated. */ | 305 | /* Number of blocks used and allocated. */ |
306 | static unsigned int pcpu_num_used, pcpu_num_allocated; | 306 | static unsigned int pcpu_num_used, pcpu_num_allocated; |
307 | /* Size of each block. -ve means used. */ | 307 | /* Size of each block. -ve means used. */ |
308 | static int *pcpu_size; | 308 | static int *pcpu_size; |
309 | 309 | ||
310 | static int split_block(unsigned int i, unsigned short size) | 310 | static int split_block(unsigned int i, unsigned short size) |
311 | { | 311 | { |
312 | /* Reallocation required? */ | 312 | /* Reallocation required? */ |
313 | if (pcpu_num_used + 1 > pcpu_num_allocated) { | 313 | if (pcpu_num_used + 1 > pcpu_num_allocated) { |
314 | int *new; | 314 | int *new; |
315 | 315 | ||
316 | new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2, | 316 | new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2, |
317 | GFP_KERNEL); | 317 | GFP_KERNEL); |
318 | if (!new) | 318 | if (!new) |
319 | return 0; | 319 | return 0; |
320 | 320 | ||
321 | pcpu_num_allocated *= 2; | 321 | pcpu_num_allocated *= 2; |
322 | pcpu_size = new; | 322 | pcpu_size = new; |
323 | } | 323 | } |
324 | 324 | ||
325 | /* Insert a new subblock */ | 325 | /* Insert a new subblock */ |
326 | memmove(&pcpu_size[i+1], &pcpu_size[i], | 326 | memmove(&pcpu_size[i+1], &pcpu_size[i], |
327 | sizeof(pcpu_size[0]) * (pcpu_num_used - i)); | 327 | sizeof(pcpu_size[0]) * (pcpu_num_used - i)); |
328 | pcpu_num_used++; | 328 | pcpu_num_used++; |
329 | 329 | ||
330 | pcpu_size[i+1] -= size; | 330 | pcpu_size[i+1] -= size; |
331 | pcpu_size[i] = size; | 331 | pcpu_size[i] = size; |
332 | return 1; | 332 | return 1; |
333 | } | 333 | } |
334 | 334 | ||
335 | static inline unsigned int block_size(int val) | 335 | static inline unsigned int block_size(int val) |
336 | { | 336 | { |
337 | if (val < 0) | 337 | if (val < 0) |
338 | return -val; | 338 | return -val; |
339 | return val; | 339 | return val; |
340 | } | 340 | } |
341 | 341 | ||
342 | /* Created by linker magic */ | 342 | /* Created by linker magic */ |
343 | extern char __per_cpu_start[], __per_cpu_end[]; | 343 | extern char __per_cpu_start[], __per_cpu_end[]; |
344 | 344 | ||
345 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 345 | static void *percpu_modalloc(unsigned long size, unsigned long align, |
346 | const char *name) | 346 | const char *name) |
347 | { | 347 | { |
348 | unsigned long extra; | 348 | unsigned long extra; |
349 | unsigned int i; | 349 | unsigned int i; |
350 | void *ptr; | 350 | void *ptr; |
351 | 351 | ||
352 | if (align > PAGE_SIZE) { | 352 | if (align > PAGE_SIZE) { |
353 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | 353 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
354 | name, align, PAGE_SIZE); | 354 | name, align, PAGE_SIZE); |
355 | align = PAGE_SIZE; | 355 | align = PAGE_SIZE; |
356 | } | 356 | } |
357 | 357 | ||
358 | ptr = __per_cpu_start; | 358 | ptr = __per_cpu_start; |
359 | for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { | 359 | for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { |
360 | /* Extra for alignment requirement. */ | 360 | /* Extra for alignment requirement. */ |
361 | extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; | 361 | extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; |
362 | BUG_ON(i == 0 && extra != 0); | 362 | BUG_ON(i == 0 && extra != 0); |
363 | 363 | ||
364 | if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) | 364 | if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) |
365 | continue; | 365 | continue; |
366 | 366 | ||
367 | /* Transfer extra to previous block. */ | 367 | /* Transfer extra to previous block. */ |
368 | if (pcpu_size[i-1] < 0) | 368 | if (pcpu_size[i-1] < 0) |
369 | pcpu_size[i-1] -= extra; | 369 | pcpu_size[i-1] -= extra; |
370 | else | 370 | else |
371 | pcpu_size[i-1] += extra; | 371 | pcpu_size[i-1] += extra; |
372 | pcpu_size[i] -= extra; | 372 | pcpu_size[i] -= extra; |
373 | ptr += extra; | 373 | ptr += extra; |
374 | 374 | ||
375 | /* Split block if warranted */ | 375 | /* Split block if warranted */ |
376 | if (pcpu_size[i] - size > sizeof(unsigned long)) | 376 | if (pcpu_size[i] - size > sizeof(unsigned long)) |
377 | if (!split_block(i, size)) | 377 | if (!split_block(i, size)) |
378 | return NULL; | 378 | return NULL; |
379 | 379 | ||
380 | /* Mark allocated */ | 380 | /* Mark allocated */ |
381 | pcpu_size[i] = -pcpu_size[i]; | 381 | pcpu_size[i] = -pcpu_size[i]; |
382 | return ptr; | 382 | return ptr; |
383 | } | 383 | } |
384 | 384 | ||
385 | printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", | 385 | printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", |
386 | size); | 386 | size); |
387 | return NULL; | 387 | return NULL; |
388 | } | 388 | } |
389 | 389 | ||
390 | static void percpu_modfree(void *freeme) | 390 | static void percpu_modfree(void *freeme) |
391 | { | 391 | { |
392 | unsigned int i; | 392 | unsigned int i; |
393 | void *ptr = __per_cpu_start + block_size(pcpu_size[0]); | 393 | void *ptr = __per_cpu_start + block_size(pcpu_size[0]); |
394 | 394 | ||
395 | /* First entry is core kernel percpu data. */ | 395 | /* First entry is core kernel percpu data. */ |
396 | for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { | 396 | for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { |
397 | if (ptr == freeme) { | 397 | if (ptr == freeme) { |
398 | pcpu_size[i] = -pcpu_size[i]; | 398 | pcpu_size[i] = -pcpu_size[i]; |
399 | goto free; | 399 | goto free; |
400 | } | 400 | } |
401 | } | 401 | } |
402 | BUG(); | 402 | BUG(); |
403 | 403 | ||
404 | free: | 404 | free: |
405 | /* Merge with previous? */ | 405 | /* Merge with previous? */ |
406 | if (pcpu_size[i-1] >= 0) { | 406 | if (pcpu_size[i-1] >= 0) { |
407 | pcpu_size[i-1] += pcpu_size[i]; | 407 | pcpu_size[i-1] += pcpu_size[i]; |
408 | pcpu_num_used--; | 408 | pcpu_num_used--; |
409 | memmove(&pcpu_size[i], &pcpu_size[i+1], | 409 | memmove(&pcpu_size[i], &pcpu_size[i+1], |
410 | (pcpu_num_used - i) * sizeof(pcpu_size[0])); | 410 | (pcpu_num_used - i) * sizeof(pcpu_size[0])); |
411 | i--; | 411 | i--; |
412 | } | 412 | } |
413 | /* Merge with next? */ | 413 | /* Merge with next? */ |
414 | if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { | 414 | if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { |
415 | pcpu_size[i] += pcpu_size[i+1]; | 415 | pcpu_size[i] += pcpu_size[i+1]; |
416 | pcpu_num_used--; | 416 | pcpu_num_used--; |
417 | memmove(&pcpu_size[i+1], &pcpu_size[i+2], | 417 | memmove(&pcpu_size[i+1], &pcpu_size[i+2], |
418 | (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); | 418 | (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); |
419 | } | 419 | } |
420 | } | 420 | } |
421 | 421 | ||
422 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 422 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
423 | Elf_Shdr *sechdrs, | 423 | Elf_Shdr *sechdrs, |
424 | const char *secstrings) | 424 | const char *secstrings) |
425 | { | 425 | { |
426 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); | 426 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); |
427 | } | 427 | } |
428 | 428 | ||
429 | static int percpu_modinit(void) | 429 | static int percpu_modinit(void) |
430 | { | 430 | { |
431 | pcpu_num_used = 2; | 431 | pcpu_num_used = 2; |
432 | pcpu_num_allocated = 2; | 432 | pcpu_num_allocated = 2; |
433 | pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, | 433 | pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, |
434 | GFP_KERNEL); | 434 | GFP_KERNEL); |
435 | /* Static in-kernel percpu data (used). */ | 435 | /* Static in-kernel percpu data (used). */ |
436 | pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); | 436 | pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); |
437 | /* Free room. */ | 437 | /* Free room. */ |
438 | pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; | 438 | pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; |
439 | if (pcpu_size[1] < 0) { | 439 | if (pcpu_size[1] < 0) { |
440 | printk(KERN_ERR "No per-cpu room for modules.\n"); | 440 | printk(KERN_ERR "No per-cpu room for modules.\n"); |
441 | pcpu_num_used = 1; | 441 | pcpu_num_used = 1; |
442 | } | 442 | } |
443 | 443 | ||
444 | return 0; | 444 | return 0; |
445 | } | 445 | } |
446 | __initcall(percpu_modinit); | 446 | __initcall(percpu_modinit); |
447 | #else /* ... !CONFIG_SMP */ | 447 | #else /* ... !CONFIG_SMP */ |
448 | static inline void *percpu_modalloc(unsigned long size, unsigned long align, | 448 | static inline void *percpu_modalloc(unsigned long size, unsigned long align, |
449 | const char *name) | 449 | const char *name) |
450 | { | 450 | { |
451 | return NULL; | 451 | return NULL; |
452 | } | 452 | } |
453 | static inline void percpu_modfree(void *pcpuptr) | 453 | static inline void percpu_modfree(void *pcpuptr) |
454 | { | 454 | { |
455 | BUG(); | 455 | BUG(); |
456 | } | 456 | } |
457 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | 457 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, |
458 | Elf_Shdr *sechdrs, | 458 | Elf_Shdr *sechdrs, |
459 | const char *secstrings) | 459 | const char *secstrings) |
460 | { | 460 | { |
461 | return 0; | 461 | return 0; |
462 | } | 462 | } |
463 | static inline void percpu_modcopy(void *pcpudst, const void *src, | 463 | static inline void percpu_modcopy(void *pcpudst, const void *src, |
464 | unsigned long size) | 464 | unsigned long size) |
465 | { | 465 | { |
466 | /* pcpusec should be 0, and size of that section should be 0. */ | 466 | /* pcpusec should be 0, and size of that section should be 0. */ |
467 | BUG_ON(size != 0); | 467 | BUG_ON(size != 0); |
468 | } | 468 | } |
469 | #endif /* CONFIG_SMP */ | 469 | #endif /* CONFIG_SMP */ |
470 | 470 | ||
471 | #define MODINFO_ATTR(field) \ | 471 | #define MODINFO_ATTR(field) \ |
472 | static void setup_modinfo_##field(struct module *mod, const char *s) \ | 472 | static void setup_modinfo_##field(struct module *mod, const char *s) \ |
473 | { \ | 473 | { \ |
474 | mod->field = kstrdup(s, GFP_KERNEL); \ | 474 | mod->field = kstrdup(s, GFP_KERNEL); \ |
475 | } \ | 475 | } \ |
476 | static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ | 476 | static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ |
477 | struct module *mod, char *buffer) \ | 477 | struct module *mod, char *buffer) \ |
478 | { \ | 478 | { \ |
479 | return sprintf(buffer, "%s\n", mod->field); \ | 479 | return sprintf(buffer, "%s\n", mod->field); \ |
480 | } \ | 480 | } \ |
481 | static int modinfo_##field##_exists(struct module *mod) \ | 481 | static int modinfo_##field##_exists(struct module *mod) \ |
482 | { \ | 482 | { \ |
483 | return mod->field != NULL; \ | 483 | return mod->field != NULL; \ |
484 | } \ | 484 | } \ |
485 | static void free_modinfo_##field(struct module *mod) \ | 485 | static void free_modinfo_##field(struct module *mod) \ |
486 | { \ | 486 | { \ |
487 | kfree(mod->field); \ | 487 | kfree(mod->field); \ |
488 | mod->field = NULL; \ | 488 | mod->field = NULL; \ |
489 | } \ | 489 | } \ |
490 | static struct module_attribute modinfo_##field = { \ | 490 | static struct module_attribute modinfo_##field = { \ |
491 | .attr = { .name = __stringify(field), .mode = 0444, \ | 491 | .attr = { .name = __stringify(field), .mode = 0444, \ |
492 | .owner = THIS_MODULE }, \ | 492 | .owner = THIS_MODULE }, \ |
493 | .show = show_modinfo_##field, \ | 493 | .show = show_modinfo_##field, \ |
494 | .setup = setup_modinfo_##field, \ | 494 | .setup = setup_modinfo_##field, \ |
495 | .test = modinfo_##field##_exists, \ | 495 | .test = modinfo_##field##_exists, \ |
496 | .free = free_modinfo_##field, \ | 496 | .free = free_modinfo_##field, \ |
497 | }; | 497 | }; |
498 | 498 | ||
499 | MODINFO_ATTR(version); | 499 | MODINFO_ATTR(version); |
500 | MODINFO_ATTR(srcversion); | 500 | MODINFO_ATTR(srcversion); |
501 | 501 | ||
502 | #ifdef CONFIG_MODULE_UNLOAD | 502 | #ifdef CONFIG_MODULE_UNLOAD |
503 | /* Init the unload section of the module. */ | 503 | /* Init the unload section of the module. */ |
504 | static void module_unload_init(struct module *mod) | 504 | static void module_unload_init(struct module *mod) |
505 | { | 505 | { |
506 | unsigned int i; | 506 | unsigned int i; |
507 | 507 | ||
508 | INIT_LIST_HEAD(&mod->modules_which_use_me); | 508 | INIT_LIST_HEAD(&mod->modules_which_use_me); |
509 | for (i = 0; i < NR_CPUS; i++) | 509 | for (i = 0; i < NR_CPUS; i++) |
510 | local_set(&mod->ref[i].count, 0); | 510 | local_set(&mod->ref[i].count, 0); |
511 | /* Hold reference count during initialization. */ | 511 | /* Hold reference count during initialization. */ |
512 | local_set(&mod->ref[raw_smp_processor_id()].count, 1); | 512 | local_set(&mod->ref[raw_smp_processor_id()].count, 1); |
513 | /* Backwards compatibility macros put refcount during init. */ | 513 | /* Backwards compatibility macros put refcount during init. */ |
514 | mod->waiter = current; | 514 | mod->waiter = current; |
515 | } | 515 | } |
516 | 516 | ||
517 | /* modules using other modules */ | 517 | /* modules using other modules */ |
518 | struct module_use | 518 | struct module_use |
519 | { | 519 | { |
520 | struct list_head list; | 520 | struct list_head list; |
521 | struct module *module_which_uses; | 521 | struct module *module_which_uses; |
522 | }; | 522 | }; |
523 | 523 | ||
524 | /* Does a already use b? */ | 524 | /* Does a already use b? */ |
525 | static int already_uses(struct module *a, struct module *b) | 525 | static int already_uses(struct module *a, struct module *b) |
526 | { | 526 | { |
527 | struct module_use *use; | 527 | struct module_use *use; |
528 | 528 | ||
529 | list_for_each_entry(use, &b->modules_which_use_me, list) { | 529 | list_for_each_entry(use, &b->modules_which_use_me, list) { |
530 | if (use->module_which_uses == a) { | 530 | if (use->module_which_uses == a) { |
531 | DEBUGP("%s uses %s!\n", a->name, b->name); | 531 | DEBUGP("%s uses %s!\n", a->name, b->name); |
532 | return 1; | 532 | return 1; |
533 | } | 533 | } |
534 | } | 534 | } |
535 | DEBUGP("%s does not use %s!\n", a->name, b->name); | 535 | DEBUGP("%s does not use %s!\n", a->name, b->name); |
536 | return 0; | 536 | return 0; |
537 | } | 537 | } |
538 | 538 | ||
539 | /* Module a uses b */ | 539 | /* Module a uses b */ |
540 | static int use_module(struct module *a, struct module *b) | 540 | static int use_module(struct module *a, struct module *b) |
541 | { | 541 | { |
542 | struct module_use *use; | 542 | struct module_use *use; |
543 | int no_warn; | 543 | int no_warn; |
544 | 544 | ||
545 | if (b == NULL || already_uses(a, b)) return 1; | 545 | if (b == NULL || already_uses(a, b)) return 1; |
546 | 546 | ||
547 | if (!strong_try_module_get(b)) | 547 | if (!strong_try_module_get(b)) |
548 | return 0; | 548 | return 0; |
549 | 549 | ||
550 | DEBUGP("Allocating new usage for %s.\n", a->name); | 550 | DEBUGP("Allocating new usage for %s.\n", a->name); |
551 | use = kmalloc(sizeof(*use), GFP_ATOMIC); | 551 | use = kmalloc(sizeof(*use), GFP_ATOMIC); |
552 | if (!use) { | 552 | if (!use) { |
553 | printk("%s: out of memory loading\n", a->name); | 553 | printk("%s: out of memory loading\n", a->name); |
554 | module_put(b); | 554 | module_put(b); |
555 | return 0; | 555 | return 0; |
556 | } | 556 | } |
557 | 557 | ||
558 | use->module_which_uses = a; | 558 | use->module_which_uses = a; |
559 | list_add(&use->list, &b->modules_which_use_me); | 559 | list_add(&use->list, &b->modules_which_use_me); |
560 | no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); | 560 | no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); |
561 | return 1; | 561 | return 1; |
562 | } | 562 | } |
563 | 563 | ||
564 | /* Clear the unload stuff of the module. */ | 564 | /* Clear the unload stuff of the module. */ |
565 | static void module_unload_free(struct module *mod) | 565 | static void module_unload_free(struct module *mod) |
566 | { | 566 | { |
567 | struct module *i; | 567 | struct module *i; |
568 | 568 | ||
569 | list_for_each_entry(i, &modules, list) { | 569 | list_for_each_entry(i, &modules, list) { |
570 | struct module_use *use; | 570 | struct module_use *use; |
571 | 571 | ||
572 | list_for_each_entry(use, &i->modules_which_use_me, list) { | 572 | list_for_each_entry(use, &i->modules_which_use_me, list) { |
573 | if (use->module_which_uses == mod) { | 573 | if (use->module_which_uses == mod) { |
574 | DEBUGP("%s unusing %s\n", mod->name, i->name); | 574 | DEBUGP("%s unusing %s\n", mod->name, i->name); |
575 | module_put(i); | 575 | module_put(i); |
576 | list_del(&use->list); | 576 | list_del(&use->list); |
577 | kfree(use); | 577 | kfree(use); |
578 | sysfs_remove_link(i->holders_dir, mod->name); | 578 | sysfs_remove_link(i->holders_dir, mod->name); |
579 | /* There can be at most one match. */ | 579 | /* There can be at most one match. */ |
580 | break; | 580 | break; |
581 | } | 581 | } |
582 | } | 582 | } |
583 | } | 583 | } |
584 | } | 584 | } |
585 | 585 | ||
586 | #ifdef CONFIG_MODULE_FORCE_UNLOAD | 586 | #ifdef CONFIG_MODULE_FORCE_UNLOAD |
587 | static inline int try_force_unload(unsigned int flags) | 587 | static inline int try_force_unload(unsigned int flags) |
588 | { | 588 | { |
589 | int ret = (flags & O_TRUNC); | 589 | int ret = (flags & O_TRUNC); |
590 | if (ret) | 590 | if (ret) |
591 | add_taint(TAINT_FORCED_RMMOD); | 591 | add_taint(TAINT_FORCED_RMMOD); |
592 | return ret; | 592 | return ret; |
593 | } | 593 | } |
594 | #else | 594 | #else |
595 | static inline int try_force_unload(unsigned int flags) | 595 | static inline int try_force_unload(unsigned int flags) |
596 | { | 596 | { |
597 | return 0; | 597 | return 0; |
598 | } | 598 | } |
599 | #endif /* CONFIG_MODULE_FORCE_UNLOAD */ | 599 | #endif /* CONFIG_MODULE_FORCE_UNLOAD */ |
600 | 600 | ||
601 | struct stopref | 601 | struct stopref |
602 | { | 602 | { |
603 | struct module *mod; | 603 | struct module *mod; |
604 | int flags; | 604 | int flags; |
605 | int *forced; | 605 | int *forced; |
606 | }; | 606 | }; |
607 | 607 | ||
608 | /* Whole machine is stopped with interrupts off when this runs. */ | 608 | /* Whole machine is stopped with interrupts off when this runs. */ |
609 | static int __try_stop_module(void *_sref) | 609 | static int __try_stop_module(void *_sref) |
610 | { | 610 | { |
611 | struct stopref *sref = _sref; | 611 | struct stopref *sref = _sref; |
612 | 612 | ||
613 | /* If it's not unused, quit unless we are told to block. */ | 613 | /* If it's not unused, quit unless we are told to block. */ |
614 | if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { | 614 | if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { |
615 | if (!(*sref->forced = try_force_unload(sref->flags))) | 615 | if (!(*sref->forced = try_force_unload(sref->flags))) |
616 | return -EWOULDBLOCK; | 616 | return -EWOULDBLOCK; |
617 | } | 617 | } |
618 | 618 | ||
619 | /* Mark it as dying. */ | 619 | /* Mark it as dying. */ |
620 | sref->mod->state = MODULE_STATE_GOING; | 620 | sref->mod->state = MODULE_STATE_GOING; |
621 | return 0; | 621 | return 0; |
622 | } | 622 | } |
623 | 623 | ||
624 | static int try_stop_module(struct module *mod, int flags, int *forced) | 624 | static int try_stop_module(struct module *mod, int flags, int *forced) |
625 | { | 625 | { |
626 | struct stopref sref = { mod, flags, forced }; | 626 | struct stopref sref = { mod, flags, forced }; |
627 | 627 | ||
628 | return stop_machine_run(__try_stop_module, &sref, NR_CPUS); | 628 | return stop_machine_run(__try_stop_module, &sref, NR_CPUS); |
629 | } | 629 | } |
630 | 630 | ||
631 | unsigned int module_refcount(struct module *mod) | 631 | unsigned int module_refcount(struct module *mod) |
632 | { | 632 | { |
633 | unsigned int i, total = 0; | 633 | unsigned int i, total = 0; |
634 | 634 | ||
635 | for (i = 0; i < NR_CPUS; i++) | 635 | for (i = 0; i < NR_CPUS; i++) |
636 | total += local_read(&mod->ref[i].count); | 636 | total += local_read(&mod->ref[i].count); |
637 | return total; | 637 | return total; |
638 | } | 638 | } |
639 | EXPORT_SYMBOL(module_refcount); | 639 | EXPORT_SYMBOL(module_refcount); |
640 | 640 | ||
641 | /* This exists whether we can unload or not */ | 641 | /* This exists whether we can unload or not */ |
642 | static void free_module(struct module *mod); | 642 | static void free_module(struct module *mod); |
643 | 643 | ||
644 | static void wait_for_zero_refcount(struct module *mod) | 644 | static void wait_for_zero_refcount(struct module *mod) |
645 | { | 645 | { |
646 | /* Since we might sleep for some time, drop the semaphore first */ | 646 | /* Since we might sleep for some time, drop the semaphore first */ |
647 | mutex_unlock(&module_mutex); | 647 | mutex_unlock(&module_mutex); |
648 | for (;;) { | 648 | for (;;) { |
649 | DEBUGP("Looking at refcount...\n"); | 649 | DEBUGP("Looking at refcount...\n"); |
650 | set_current_state(TASK_UNINTERRUPTIBLE); | 650 | set_current_state(TASK_UNINTERRUPTIBLE); |
651 | if (module_refcount(mod) == 0) | 651 | if (module_refcount(mod) == 0) |
652 | break; | 652 | break; |
653 | schedule(); | 653 | schedule(); |
654 | } | 654 | } |
655 | current->state = TASK_RUNNING; | 655 | current->state = TASK_RUNNING; |
656 | mutex_lock(&module_mutex); | 656 | mutex_lock(&module_mutex); |
657 | } | 657 | } |
658 | 658 | ||
659 | asmlinkage long | 659 | asmlinkage long |
660 | sys_delete_module(const char __user *name_user, unsigned int flags) | 660 | sys_delete_module(const char __user *name_user, unsigned int flags) |
661 | { | 661 | { |
662 | struct module *mod; | 662 | struct module *mod; |
663 | char name[MODULE_NAME_LEN]; | 663 | char name[MODULE_NAME_LEN]; |
664 | int ret, forced = 0; | 664 | int ret, forced = 0; |
665 | 665 | ||
666 | if (!capable(CAP_SYS_MODULE)) | 666 | if (!capable(CAP_SYS_MODULE)) |
667 | return -EPERM; | 667 | return -EPERM; |
668 | 668 | ||
669 | if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) | 669 | if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) |
670 | return -EFAULT; | 670 | return -EFAULT; |
671 | name[MODULE_NAME_LEN-1] = '\0'; | 671 | name[MODULE_NAME_LEN-1] = '\0'; |
672 | 672 | ||
673 | if (mutex_lock_interruptible(&module_mutex) != 0) | 673 | if (mutex_lock_interruptible(&module_mutex) != 0) |
674 | return -EINTR; | 674 | return -EINTR; |
675 | 675 | ||
676 | mod = find_module(name); | 676 | mod = find_module(name); |
677 | if (!mod) { | 677 | if (!mod) { |
678 | ret = -ENOENT; | 678 | ret = -ENOENT; |
679 | goto out; | 679 | goto out; |
680 | } | 680 | } |
681 | 681 | ||
682 | if (!list_empty(&mod->modules_which_use_me)) { | 682 | if (!list_empty(&mod->modules_which_use_me)) { |
683 | /* Other modules depend on us: get rid of them first. */ | 683 | /* Other modules depend on us: get rid of them first. */ |
684 | ret = -EWOULDBLOCK; | 684 | ret = -EWOULDBLOCK; |
685 | goto out; | 685 | goto out; |
686 | } | 686 | } |
687 | 687 | ||
688 | /* Doing init or already dying? */ | 688 | /* Doing init or already dying? */ |
689 | if (mod->state != MODULE_STATE_LIVE) { | 689 | if (mod->state != MODULE_STATE_LIVE) { |
690 | /* FIXME: if (force), slam module count and wake up | 690 | /* FIXME: if (force), slam module count and wake up |
691 | waiter --RR */ | 691 | waiter --RR */ |
692 | DEBUGP("%s already dying\n", mod->name); | 692 | DEBUGP("%s already dying\n", mod->name); |
693 | ret = -EBUSY; | 693 | ret = -EBUSY; |
694 | goto out; | 694 | goto out; |
695 | } | 695 | } |
696 | 696 | ||
697 | /* If it has an init func, it must have an exit func to unload */ | 697 | /* If it has an init func, it must have an exit func to unload */ |
698 | if ((mod->init != NULL && mod->exit == NULL) | 698 | if ((mod->init != NULL && mod->exit == NULL) |
699 | || mod->unsafe) { | 699 | || mod->unsafe) { |
700 | forced = try_force_unload(flags); | 700 | forced = try_force_unload(flags); |
701 | if (!forced) { | 701 | if (!forced) { |
702 | /* This module can't be removed */ | 702 | /* This module can't be removed */ |
703 | ret = -EBUSY; | 703 | ret = -EBUSY; |
704 | goto out; | 704 | goto out; |
705 | } | 705 | } |
706 | } | 706 | } |
707 | 707 | ||
708 | /* Set this up before setting mod->state */ | 708 | /* Set this up before setting mod->state */ |
709 | mod->waiter = current; | 709 | mod->waiter = current; |
710 | 710 | ||
711 | /* Stop the machine so refcounts can't move and disable module. */ | 711 | /* Stop the machine so refcounts can't move and disable module. */ |
712 | ret = try_stop_module(mod, flags, &forced); | 712 | ret = try_stop_module(mod, flags, &forced); |
713 | if (ret != 0) | 713 | if (ret != 0) |
714 | goto out; | 714 | goto out; |
715 | 715 | ||
716 | /* Never wait if forced. */ | 716 | /* Never wait if forced. */ |
717 | if (!forced && module_refcount(mod) != 0) | 717 | if (!forced && module_refcount(mod) != 0) |
718 | wait_for_zero_refcount(mod); | 718 | wait_for_zero_refcount(mod); |
719 | 719 | ||
720 | /* Final destruction now noone is using it. */ | 720 | /* Final destruction now noone is using it. */ |
721 | if (mod->exit != NULL) { | 721 | if (mod->exit != NULL) { |
722 | mutex_unlock(&module_mutex); | 722 | mutex_unlock(&module_mutex); |
723 | mod->exit(); | 723 | mod->exit(); |
724 | mutex_lock(&module_mutex); | 724 | mutex_lock(&module_mutex); |
725 | } | 725 | } |
726 | free_module(mod); | 726 | free_module(mod); |
727 | 727 | ||
728 | out: | 728 | out: |
729 | mutex_unlock(&module_mutex); | 729 | mutex_unlock(&module_mutex); |
730 | return ret; | 730 | return ret; |
731 | } | 731 | } |
732 | 732 | ||
733 | static void print_unload_info(struct seq_file *m, struct module *mod) | 733 | static void print_unload_info(struct seq_file *m, struct module *mod) |
734 | { | 734 | { |
735 | struct module_use *use; | 735 | struct module_use *use; |
736 | int printed_something = 0; | 736 | int printed_something = 0; |
737 | 737 | ||
738 | seq_printf(m, " %u ", module_refcount(mod)); | 738 | seq_printf(m, " %u ", module_refcount(mod)); |
739 | 739 | ||
740 | /* Always include a trailing , so userspace can differentiate | 740 | /* Always include a trailing , so userspace can differentiate |
741 | between this and the old multi-field proc format. */ | 741 | between this and the old multi-field proc format. */ |
742 | list_for_each_entry(use, &mod->modules_which_use_me, list) { | 742 | list_for_each_entry(use, &mod->modules_which_use_me, list) { |
743 | printed_something = 1; | 743 | printed_something = 1; |
744 | seq_printf(m, "%s,", use->module_which_uses->name); | 744 | seq_printf(m, "%s,", use->module_which_uses->name); |
745 | } | 745 | } |
746 | 746 | ||
747 | if (mod->unsafe) { | 747 | if (mod->unsafe) { |
748 | printed_something = 1; | 748 | printed_something = 1; |
749 | seq_printf(m, "[unsafe],"); | 749 | seq_printf(m, "[unsafe],"); |
750 | } | 750 | } |
751 | 751 | ||
752 | if (mod->init != NULL && mod->exit == NULL) { | 752 | if (mod->init != NULL && mod->exit == NULL) { |
753 | printed_something = 1; | 753 | printed_something = 1; |
754 | seq_printf(m, "[permanent],"); | 754 | seq_printf(m, "[permanent],"); |
755 | } | 755 | } |
756 | 756 | ||
757 | if (!printed_something) | 757 | if (!printed_something) |
758 | seq_printf(m, "-"); | 758 | seq_printf(m, "-"); |
759 | } | 759 | } |
760 | 760 | ||
761 | void __symbol_put(const char *symbol) | 761 | void __symbol_put(const char *symbol) |
762 | { | 762 | { |
763 | struct module *owner; | 763 | struct module *owner; |
764 | unsigned long flags; | 764 | unsigned long flags; |
765 | const unsigned long *crc; | 765 | const unsigned long *crc; |
766 | 766 | ||
767 | spin_lock_irqsave(&modlist_lock, flags); | 767 | spin_lock_irqsave(&modlist_lock, flags); |
768 | if (!__find_symbol(symbol, &owner, &crc, 1)) | 768 | if (!__find_symbol(symbol, &owner, &crc, 1)) |
769 | BUG(); | 769 | BUG(); |
770 | module_put(owner); | 770 | module_put(owner); |
771 | spin_unlock_irqrestore(&modlist_lock, flags); | 771 | spin_unlock_irqrestore(&modlist_lock, flags); |
772 | } | 772 | } |
773 | EXPORT_SYMBOL(__symbol_put); | 773 | EXPORT_SYMBOL(__symbol_put); |
774 | 774 | ||
775 | void symbol_put_addr(void *addr) | 775 | void symbol_put_addr(void *addr) |
776 | { | 776 | { |
777 | struct module *modaddr; | 777 | struct module *modaddr; |
778 | 778 | ||
779 | if (core_kernel_text((unsigned long)addr)) | 779 | if (core_kernel_text((unsigned long)addr)) |
780 | return; | 780 | return; |
781 | 781 | ||
782 | if (!(modaddr = module_text_address((unsigned long)addr))) | 782 | if (!(modaddr = module_text_address((unsigned long)addr))) |
783 | BUG(); | 783 | BUG(); |
784 | module_put(modaddr); | 784 | module_put(modaddr); |
785 | } | 785 | } |
786 | EXPORT_SYMBOL_GPL(symbol_put_addr); | 786 | EXPORT_SYMBOL_GPL(symbol_put_addr); |
787 | 787 | ||
788 | static ssize_t show_refcnt(struct module_attribute *mattr, | 788 | static ssize_t show_refcnt(struct module_attribute *mattr, |
789 | struct module *mod, char *buffer) | 789 | struct module *mod, char *buffer) |
790 | { | 790 | { |
791 | /* sysfs holds a reference */ | 791 | /* sysfs holds a reference */ |
792 | return sprintf(buffer, "%u\n", module_refcount(mod)-1); | 792 | return sprintf(buffer, "%u\n", module_refcount(mod)-1); |
793 | } | 793 | } |
794 | 794 | ||
795 | static struct module_attribute refcnt = { | 795 | static struct module_attribute refcnt = { |
796 | .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, | 796 | .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, |
797 | .show = show_refcnt, | 797 | .show = show_refcnt, |
798 | }; | 798 | }; |
799 | 799 | ||
800 | void module_put(struct module *module) | 800 | void module_put(struct module *module) |
801 | { | 801 | { |
802 | if (module) { | 802 | if (module) { |
803 | unsigned int cpu = get_cpu(); | 803 | unsigned int cpu = get_cpu(); |
804 | local_dec(&module->ref[cpu].count); | 804 | local_dec(&module->ref[cpu].count); |
805 | /* Maybe they're waiting for us to drop reference? */ | 805 | /* Maybe they're waiting for us to drop reference? */ |
806 | if (unlikely(!module_is_live(module))) | 806 | if (unlikely(!module_is_live(module))) |
807 | wake_up_process(module->waiter); | 807 | wake_up_process(module->waiter); |
808 | put_cpu(); | 808 | put_cpu(); |
809 | } | 809 | } |
810 | } | 810 | } |
811 | EXPORT_SYMBOL(module_put); | 811 | EXPORT_SYMBOL(module_put); |
812 | 812 | ||
813 | #else /* !CONFIG_MODULE_UNLOAD */ | 813 | #else /* !CONFIG_MODULE_UNLOAD */ |
814 | static void print_unload_info(struct seq_file *m, struct module *mod) | 814 | static void print_unload_info(struct seq_file *m, struct module *mod) |
815 | { | 815 | { |
816 | /* We don't know the usage count, or what modules are using. */ | 816 | /* We don't know the usage count, or what modules are using. */ |
817 | seq_printf(m, " - -"); | 817 | seq_printf(m, " - -"); |
818 | } | 818 | } |
819 | 819 | ||
820 | static inline void module_unload_free(struct module *mod) | 820 | static inline void module_unload_free(struct module *mod) |
821 | { | 821 | { |
822 | } | 822 | } |
823 | 823 | ||
824 | static inline int use_module(struct module *a, struct module *b) | 824 | static inline int use_module(struct module *a, struct module *b) |
825 | { | 825 | { |
826 | return strong_try_module_get(b); | 826 | return strong_try_module_get(b); |
827 | } | 827 | } |
828 | 828 | ||
829 | static inline void module_unload_init(struct module *mod) | 829 | static inline void module_unload_init(struct module *mod) |
830 | { | 830 | { |
831 | } | 831 | } |
832 | #endif /* CONFIG_MODULE_UNLOAD */ | 832 | #endif /* CONFIG_MODULE_UNLOAD */ |
833 | 833 | ||
834 | static ssize_t show_initstate(struct module_attribute *mattr, | 834 | static ssize_t show_initstate(struct module_attribute *mattr, |
835 | struct module *mod, char *buffer) | 835 | struct module *mod, char *buffer) |
836 | { | 836 | { |
837 | const char *state = "unknown"; | 837 | const char *state = "unknown"; |
838 | 838 | ||
839 | switch (mod->state) { | 839 | switch (mod->state) { |
840 | case MODULE_STATE_LIVE: | 840 | case MODULE_STATE_LIVE: |
841 | state = "live"; | 841 | state = "live"; |
842 | break; | 842 | break; |
843 | case MODULE_STATE_COMING: | 843 | case MODULE_STATE_COMING: |
844 | state = "coming"; | 844 | state = "coming"; |
845 | break; | 845 | break; |
846 | case MODULE_STATE_GOING: | 846 | case MODULE_STATE_GOING: |
847 | state = "going"; | 847 | state = "going"; |
848 | break; | 848 | break; |
849 | } | 849 | } |
850 | return sprintf(buffer, "%s\n", state); | 850 | return sprintf(buffer, "%s\n", state); |
851 | } | 851 | } |
852 | 852 | ||
853 | static struct module_attribute initstate = { | 853 | static struct module_attribute initstate = { |
854 | .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, | 854 | .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, |
855 | .show = show_initstate, | 855 | .show = show_initstate, |
856 | }; | 856 | }; |
857 | 857 | ||
858 | static struct module_attribute *modinfo_attrs[] = { | 858 | static struct module_attribute *modinfo_attrs[] = { |
859 | &modinfo_version, | 859 | &modinfo_version, |
860 | &modinfo_srcversion, | 860 | &modinfo_srcversion, |
861 | &initstate, | 861 | &initstate, |
862 | #ifdef CONFIG_MODULE_UNLOAD | 862 | #ifdef CONFIG_MODULE_UNLOAD |
863 | &refcnt, | 863 | &refcnt, |
864 | #endif | 864 | #endif |
865 | NULL, | 865 | NULL, |
866 | }; | 866 | }; |
867 | 867 | ||
868 | static const char vermagic[] = VERMAGIC_STRING; | 868 | static const char vermagic[] = VERMAGIC_STRING; |
869 | 869 | ||
870 | #ifdef CONFIG_MODVERSIONS | 870 | #ifdef CONFIG_MODVERSIONS |
871 | static int check_version(Elf_Shdr *sechdrs, | 871 | static int check_version(Elf_Shdr *sechdrs, |
872 | unsigned int versindex, | 872 | unsigned int versindex, |
873 | const char *symname, | 873 | const char *symname, |
874 | struct module *mod, | 874 | struct module *mod, |
875 | const unsigned long *crc) | 875 | const unsigned long *crc) |
876 | { | 876 | { |
877 | unsigned int i, num_versions; | 877 | unsigned int i, num_versions; |
878 | struct modversion_info *versions; | 878 | struct modversion_info *versions; |
879 | 879 | ||
880 | /* Exporting module didn't supply crcs? OK, we're already tainted. */ | 880 | /* Exporting module didn't supply crcs? OK, we're already tainted. */ |
881 | if (!crc) | 881 | if (!crc) |
882 | return 1; | 882 | return 1; |
883 | 883 | ||
884 | versions = (void *) sechdrs[versindex].sh_addr; | 884 | versions = (void *) sechdrs[versindex].sh_addr; |
885 | num_versions = sechdrs[versindex].sh_size | 885 | num_versions = sechdrs[versindex].sh_size |
886 | / sizeof(struct modversion_info); | 886 | / sizeof(struct modversion_info); |
887 | 887 | ||
888 | for (i = 0; i < num_versions; i++) { | 888 | for (i = 0; i < num_versions; i++) { |
889 | if (strcmp(versions[i].name, symname) != 0) | 889 | if (strcmp(versions[i].name, symname) != 0) |
890 | continue; | 890 | continue; |
891 | 891 | ||
892 | if (versions[i].crc == *crc) | 892 | if (versions[i].crc == *crc) |
893 | return 1; | 893 | return 1; |
894 | printk("%s: disagrees about version of symbol %s\n", | 894 | printk("%s: disagrees about version of symbol %s\n", |
895 | mod->name, symname); | 895 | mod->name, symname); |
896 | DEBUGP("Found checksum %lX vs module %lX\n", | 896 | DEBUGP("Found checksum %lX vs module %lX\n", |
897 | *crc, versions[i].crc); | 897 | *crc, versions[i].crc); |
898 | return 0; | 898 | return 0; |
899 | } | 899 | } |
900 | /* Not in module's version table. OK, but that taints the kernel. */ | 900 | /* Not in module's version table. OK, but that taints the kernel. */ |
901 | if (!(tainted & TAINT_FORCED_MODULE)) | 901 | if (!(tainted & TAINT_FORCED_MODULE)) |
902 | printk("%s: no version for \"%s\" found: kernel tainted.\n", | 902 | printk("%s: no version for \"%s\" found: kernel tainted.\n", |
903 | mod->name, symname); | 903 | mod->name, symname); |
904 | add_taint_module(mod, TAINT_FORCED_MODULE); | 904 | add_taint_module(mod, TAINT_FORCED_MODULE); |
905 | return 1; | 905 | return 1; |
906 | } | 906 | } |
907 | 907 | ||
908 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, | 908 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, |
909 | unsigned int versindex, | 909 | unsigned int versindex, |
910 | struct module *mod) | 910 | struct module *mod) |
911 | { | 911 | { |
912 | const unsigned long *crc; | 912 | const unsigned long *crc; |
913 | struct module *owner; | 913 | struct module *owner; |
914 | 914 | ||
915 | if (!__find_symbol("struct_module", &owner, &crc, 1)) | 915 | if (!__find_symbol("struct_module", &owner, &crc, 1)) |
916 | BUG(); | 916 | BUG(); |
917 | return check_version(sechdrs, versindex, "struct_module", mod, | 917 | return check_version(sechdrs, versindex, "struct_module", mod, |
918 | crc); | 918 | crc); |
919 | } | 919 | } |
920 | 920 | ||
921 | /* First part is kernel version, which we ignore. */ | 921 | /* First part is kernel version, which we ignore. */ |
922 | static inline int same_magic(const char *amagic, const char *bmagic) | 922 | static inline int same_magic(const char *amagic, const char *bmagic) |
923 | { | 923 | { |
924 | amagic += strcspn(amagic, " "); | 924 | amagic += strcspn(amagic, " "); |
925 | bmagic += strcspn(bmagic, " "); | 925 | bmagic += strcspn(bmagic, " "); |
926 | return strcmp(amagic, bmagic) == 0; | 926 | return strcmp(amagic, bmagic) == 0; |
927 | } | 927 | } |
928 | #else | 928 | #else |
929 | static inline int check_version(Elf_Shdr *sechdrs, | 929 | static inline int check_version(Elf_Shdr *sechdrs, |
930 | unsigned int versindex, | 930 | unsigned int versindex, |
931 | const char *symname, | 931 | const char *symname, |
932 | struct module *mod, | 932 | struct module *mod, |
933 | const unsigned long *crc) | 933 | const unsigned long *crc) |
934 | { | 934 | { |
935 | return 1; | 935 | return 1; |
936 | } | 936 | } |
937 | 937 | ||
938 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, | 938 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, |
939 | unsigned int versindex, | 939 | unsigned int versindex, |
940 | struct module *mod) | 940 | struct module *mod) |
941 | { | 941 | { |
942 | return 1; | 942 | return 1; |
943 | } | 943 | } |
944 | 944 | ||
945 | static inline int same_magic(const char *amagic, const char *bmagic) | 945 | static inline int same_magic(const char *amagic, const char *bmagic) |
946 | { | 946 | { |
947 | return strcmp(amagic, bmagic) == 0; | 947 | return strcmp(amagic, bmagic) == 0; |
948 | } | 948 | } |
949 | #endif /* CONFIG_MODVERSIONS */ | 949 | #endif /* CONFIG_MODVERSIONS */ |
950 | 950 | ||
951 | /* Resolve a symbol for this module. I.e. if we find one, record usage. | 951 | /* Resolve a symbol for this module. I.e. if we find one, record usage. |
952 | Must be holding module_mutex. */ | 952 | Must be holding module_mutex. */ |
953 | static unsigned long resolve_symbol(Elf_Shdr *sechdrs, | 953 | static unsigned long resolve_symbol(Elf_Shdr *sechdrs, |
954 | unsigned int versindex, | 954 | unsigned int versindex, |
955 | const char *name, | 955 | const char *name, |
956 | struct module *mod) | 956 | struct module *mod) |
957 | { | 957 | { |
958 | struct module *owner; | 958 | struct module *owner; |
959 | unsigned long ret; | 959 | unsigned long ret; |
960 | const unsigned long *crc; | 960 | const unsigned long *crc; |
961 | 961 | ||
962 | ret = __find_symbol(name, &owner, &crc, | 962 | ret = __find_symbol(name, &owner, &crc, |
963 | !(mod->taints & TAINT_PROPRIETARY_MODULE)); | 963 | !(mod->taints & TAINT_PROPRIETARY_MODULE)); |
964 | if (ret) { | 964 | if (ret) { |
965 | /* use_module can fail due to OOM, or module unloading */ | 965 | /* use_module can fail due to OOM, or module unloading */ |
966 | if (!check_version(sechdrs, versindex, name, mod, crc) || | 966 | if (!check_version(sechdrs, versindex, name, mod, crc) || |
967 | !use_module(mod, owner)) | 967 | !use_module(mod, owner)) |
968 | ret = 0; | 968 | ret = 0; |
969 | } | 969 | } |
970 | return ret; | 970 | return ret; |
971 | } | 971 | } |
972 | 972 | ||
973 | 973 | ||
974 | /* | 974 | /* |
975 | * /sys/module/foo/sections stuff | 975 | * /sys/module/foo/sections stuff |
976 | * J. Corbet <corbet@lwn.net> | 976 | * J. Corbet <corbet@lwn.net> |
977 | */ | 977 | */ |
978 | #ifdef CONFIG_KALLSYMS | 978 | #ifdef CONFIG_KALLSYMS |
979 | static ssize_t module_sect_show(struct module_attribute *mattr, | 979 | static ssize_t module_sect_show(struct module_attribute *mattr, |
980 | struct module *mod, char *buf) | 980 | struct module *mod, char *buf) |
981 | { | 981 | { |
982 | struct module_sect_attr *sattr = | 982 | struct module_sect_attr *sattr = |
983 | container_of(mattr, struct module_sect_attr, mattr); | 983 | container_of(mattr, struct module_sect_attr, mattr); |
984 | return sprintf(buf, "0x%lx\n", sattr->address); | 984 | return sprintf(buf, "0x%lx\n", sattr->address); |
985 | } | 985 | } |
986 | 986 | ||
987 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) | 987 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) |
988 | { | 988 | { |
989 | int section; | 989 | int section; |
990 | 990 | ||
991 | for (section = 0; section < sect_attrs->nsections; section++) | 991 | for (section = 0; section < sect_attrs->nsections; section++) |
992 | kfree(sect_attrs->attrs[section].name); | 992 | kfree(sect_attrs->attrs[section].name); |
993 | kfree(sect_attrs); | 993 | kfree(sect_attrs); |
994 | } | 994 | } |
995 | 995 | ||
996 | static void add_sect_attrs(struct module *mod, unsigned int nsect, | 996 | static void add_sect_attrs(struct module *mod, unsigned int nsect, |
997 | char *secstrings, Elf_Shdr *sechdrs) | 997 | char *secstrings, Elf_Shdr *sechdrs) |
998 | { | 998 | { |
999 | unsigned int nloaded = 0, i, size[2]; | 999 | unsigned int nloaded = 0, i, size[2]; |
1000 | struct module_sect_attrs *sect_attrs; | 1000 | struct module_sect_attrs *sect_attrs; |
1001 | struct module_sect_attr *sattr; | 1001 | struct module_sect_attr *sattr; |
1002 | struct attribute **gattr; | 1002 | struct attribute **gattr; |
1003 | 1003 | ||
1004 | /* Count loaded sections and allocate structures */ | 1004 | /* Count loaded sections and allocate structures */ |
1005 | for (i = 0; i < nsect; i++) | 1005 | for (i = 0; i < nsect; i++) |
1006 | if (sechdrs[i].sh_flags & SHF_ALLOC) | 1006 | if (sechdrs[i].sh_flags & SHF_ALLOC) |
1007 | nloaded++; | 1007 | nloaded++; |
1008 | size[0] = ALIGN(sizeof(*sect_attrs) | 1008 | size[0] = ALIGN(sizeof(*sect_attrs) |
1009 | + nloaded * sizeof(sect_attrs->attrs[0]), | 1009 | + nloaded * sizeof(sect_attrs->attrs[0]), |
1010 | sizeof(sect_attrs->grp.attrs[0])); | 1010 | sizeof(sect_attrs->grp.attrs[0])); |
1011 | size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); | 1011 | size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); |
1012 | sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); | 1012 | sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); |
1013 | if (sect_attrs == NULL) | 1013 | if (sect_attrs == NULL) |
1014 | return; | 1014 | return; |
1015 | 1015 | ||
1016 | /* Setup section attributes. */ | 1016 | /* Setup section attributes. */ |
1017 | sect_attrs->grp.name = "sections"; | 1017 | sect_attrs->grp.name = "sections"; |
1018 | sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; | 1018 | sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; |
1019 | 1019 | ||
1020 | sect_attrs->nsections = 0; | 1020 | sect_attrs->nsections = 0; |
1021 | sattr = §_attrs->attrs[0]; | 1021 | sattr = §_attrs->attrs[0]; |
1022 | gattr = §_attrs->grp.attrs[0]; | 1022 | gattr = §_attrs->grp.attrs[0]; |
1023 | for (i = 0; i < nsect; i++) { | 1023 | for (i = 0; i < nsect; i++) { |
1024 | if (! (sechdrs[i].sh_flags & SHF_ALLOC)) | 1024 | if (! (sechdrs[i].sh_flags & SHF_ALLOC)) |
1025 | continue; | 1025 | continue; |
1026 | sattr->address = sechdrs[i].sh_addr; | 1026 | sattr->address = sechdrs[i].sh_addr; |
1027 | sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, | 1027 | sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, |
1028 | GFP_KERNEL); | 1028 | GFP_KERNEL); |
1029 | if (sattr->name == NULL) | 1029 | if (sattr->name == NULL) |
1030 | goto out; | 1030 | goto out; |
1031 | sect_attrs->nsections++; | 1031 | sect_attrs->nsections++; |
1032 | sattr->mattr.show = module_sect_show; | 1032 | sattr->mattr.show = module_sect_show; |
1033 | sattr->mattr.store = NULL; | 1033 | sattr->mattr.store = NULL; |
1034 | sattr->mattr.attr.name = sattr->name; | 1034 | sattr->mattr.attr.name = sattr->name; |
1035 | sattr->mattr.attr.owner = mod; | 1035 | sattr->mattr.attr.owner = mod; |
1036 | sattr->mattr.attr.mode = S_IRUGO; | 1036 | sattr->mattr.attr.mode = S_IRUGO; |
1037 | *(gattr++) = &(sattr++)->mattr.attr; | 1037 | *(gattr++) = &(sattr++)->mattr.attr; |
1038 | } | 1038 | } |
1039 | *gattr = NULL; | 1039 | *gattr = NULL; |
1040 | 1040 | ||
1041 | if (sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp)) | 1041 | if (sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp)) |
1042 | goto out; | 1042 | goto out; |
1043 | 1043 | ||
1044 | mod->sect_attrs = sect_attrs; | 1044 | mod->sect_attrs = sect_attrs; |
1045 | return; | 1045 | return; |
1046 | out: | 1046 | out: |
1047 | free_sect_attrs(sect_attrs); | 1047 | free_sect_attrs(sect_attrs); |
1048 | } | 1048 | } |
1049 | 1049 | ||
1050 | static void remove_sect_attrs(struct module *mod) | 1050 | static void remove_sect_attrs(struct module *mod) |
1051 | { | 1051 | { |
1052 | if (mod->sect_attrs) { | 1052 | if (mod->sect_attrs) { |
1053 | sysfs_remove_group(&mod->mkobj.kobj, | 1053 | sysfs_remove_group(&mod->mkobj.kobj, |
1054 | &mod->sect_attrs->grp); | 1054 | &mod->sect_attrs->grp); |
1055 | /* We are positive that no one is using any sect attrs | 1055 | /* We are positive that no one is using any sect attrs |
1056 | * at this point. Deallocate immediately. */ | 1056 | * at this point. Deallocate immediately. */ |
1057 | free_sect_attrs(mod->sect_attrs); | 1057 | free_sect_attrs(mod->sect_attrs); |
1058 | mod->sect_attrs = NULL; | 1058 | mod->sect_attrs = NULL; |
1059 | } | 1059 | } |
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | #else | 1062 | #else |
1063 | 1063 | ||
1064 | static inline void add_sect_attrs(struct module *mod, unsigned int nsect, | 1064 | static inline void add_sect_attrs(struct module *mod, unsigned int nsect, |
1065 | char *sectstrings, Elf_Shdr *sechdrs) | 1065 | char *sectstrings, Elf_Shdr *sechdrs) |
1066 | { | 1066 | { |
1067 | } | 1067 | } |
1068 | 1068 | ||
1069 | static inline void remove_sect_attrs(struct module *mod) | 1069 | static inline void remove_sect_attrs(struct module *mod) |
1070 | { | 1070 | { |
1071 | } | 1071 | } |
1072 | #endif /* CONFIG_KALLSYMS */ | 1072 | #endif /* CONFIG_KALLSYMS */ |
1073 | 1073 | ||
1074 | #ifdef CONFIG_SYSFS | 1074 | #ifdef CONFIG_SYSFS |
1075 | int module_add_modinfo_attrs(struct module *mod) | 1075 | int module_add_modinfo_attrs(struct module *mod) |
1076 | { | 1076 | { |
1077 | struct module_attribute *attr; | 1077 | struct module_attribute *attr; |
1078 | struct module_attribute *temp_attr; | 1078 | struct module_attribute *temp_attr; |
1079 | int error = 0; | 1079 | int error = 0; |
1080 | int i; | 1080 | int i; |
1081 | 1081 | ||
1082 | mod->modinfo_attrs = kzalloc((sizeof(struct module_attribute) * | 1082 | mod->modinfo_attrs = kzalloc((sizeof(struct module_attribute) * |
1083 | (ARRAY_SIZE(modinfo_attrs) + 1)), | 1083 | (ARRAY_SIZE(modinfo_attrs) + 1)), |
1084 | GFP_KERNEL); | 1084 | GFP_KERNEL); |
1085 | if (!mod->modinfo_attrs) | 1085 | if (!mod->modinfo_attrs) |
1086 | return -ENOMEM; | 1086 | return -ENOMEM; |
1087 | 1087 | ||
1088 | temp_attr = mod->modinfo_attrs; | 1088 | temp_attr = mod->modinfo_attrs; |
1089 | for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) { | 1089 | for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) { |
1090 | if (!attr->test || | 1090 | if (!attr->test || |
1091 | (attr->test && attr->test(mod))) { | 1091 | (attr->test && attr->test(mod))) { |
1092 | memcpy(temp_attr, attr, sizeof(*temp_attr)); | 1092 | memcpy(temp_attr, attr, sizeof(*temp_attr)); |
1093 | temp_attr->attr.owner = mod; | 1093 | temp_attr->attr.owner = mod; |
1094 | error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); | 1094 | error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); |
1095 | ++temp_attr; | 1095 | ++temp_attr; |
1096 | } | 1096 | } |
1097 | } | 1097 | } |
1098 | return error; | 1098 | return error; |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | void module_remove_modinfo_attrs(struct module *mod) | 1101 | void module_remove_modinfo_attrs(struct module *mod) |
1102 | { | 1102 | { |
1103 | struct module_attribute *attr; | 1103 | struct module_attribute *attr; |
1104 | int i; | 1104 | int i; |
1105 | 1105 | ||
1106 | for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) { | 1106 | for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) { |
1107 | /* pick a field to test for end of list */ | 1107 | /* pick a field to test for end of list */ |
1108 | if (!attr->attr.name) | 1108 | if (!attr->attr.name) |
1109 | break; | 1109 | break; |
1110 | sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); | 1110 | sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); |
1111 | if (attr->free) | 1111 | if (attr->free) |
1112 | attr->free(mod); | 1112 | attr->free(mod); |
1113 | } | 1113 | } |
1114 | kfree(mod->modinfo_attrs); | 1114 | kfree(mod->modinfo_attrs); |
1115 | } | 1115 | } |
1116 | #endif | 1116 | #endif |
1117 | 1117 | ||
1118 | #ifdef CONFIG_SYSFS | 1118 | #ifdef CONFIG_SYSFS |
1119 | int mod_sysfs_init(struct module *mod) | 1119 | int mod_sysfs_init(struct module *mod) |
1120 | { | 1120 | { |
1121 | int err; | 1121 | int err; |
1122 | 1122 | ||
1123 | if (!module_sysfs_initialized) { | 1123 | if (!module_sysfs_initialized) { |
1124 | printk(KERN_ERR "%s: module sysfs not initialized\n", | 1124 | printk(KERN_ERR "%s: module sysfs not initialized\n", |
1125 | mod->name); | 1125 | mod->name); |
1126 | err = -EINVAL; | 1126 | err = -EINVAL; |
1127 | goto out; | 1127 | goto out; |
1128 | } | 1128 | } |
1129 | memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); | 1129 | memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); |
1130 | err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); | 1130 | err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); |
1131 | if (err) | 1131 | if (err) |
1132 | goto out; | 1132 | goto out; |
1133 | kobj_set_kset_s(&mod->mkobj, module_subsys); | 1133 | kobj_set_kset_s(&mod->mkobj, module_subsys); |
1134 | mod->mkobj.mod = mod; | 1134 | mod->mkobj.mod = mod; |
1135 | 1135 | ||
1136 | kobject_init(&mod->mkobj.kobj); | 1136 | kobject_init(&mod->mkobj.kobj); |
1137 | 1137 | ||
1138 | out: | 1138 | out: |
1139 | return err; | 1139 | return err; |
1140 | } | 1140 | } |
1141 | 1141 | ||
1142 | int mod_sysfs_setup(struct module *mod, | 1142 | int mod_sysfs_setup(struct module *mod, |
1143 | struct kernel_param *kparam, | 1143 | struct kernel_param *kparam, |
1144 | unsigned int num_params) | 1144 | unsigned int num_params) |
1145 | { | 1145 | { |
1146 | int err; | 1146 | int err; |
1147 | 1147 | ||
1148 | /* delay uevent until full sysfs population */ | 1148 | /* delay uevent until full sysfs population */ |
1149 | err = kobject_add(&mod->mkobj.kobj); | 1149 | err = kobject_add(&mod->mkobj.kobj); |
1150 | if (err) | 1150 | if (err) |
1151 | goto out; | 1151 | goto out; |
1152 | 1152 | ||
1153 | mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders"); | 1153 | mod->holders_dir = kobject_add_dir(&mod->mkobj.kobj, "holders"); |
1154 | if (!mod->holders_dir) { | 1154 | if (!mod->holders_dir) { |
1155 | err = -ENOMEM; | 1155 | err = -ENOMEM; |
1156 | goto out_unreg; | 1156 | goto out_unreg; |
1157 | } | 1157 | } |
1158 | 1158 | ||
1159 | err = module_param_sysfs_setup(mod, kparam, num_params); | 1159 | err = module_param_sysfs_setup(mod, kparam, num_params); |
1160 | if (err) | 1160 | if (err) |
1161 | goto out_unreg_holders; | 1161 | goto out_unreg_holders; |
1162 | 1162 | ||
1163 | err = module_add_modinfo_attrs(mod); | 1163 | err = module_add_modinfo_attrs(mod); |
1164 | if (err) | 1164 | if (err) |
1165 | goto out_unreg_param; | 1165 | goto out_unreg_param; |
1166 | 1166 | ||
1167 | kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); | 1167 | kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); |
1168 | return 0; | 1168 | return 0; |
1169 | 1169 | ||
1170 | out_unreg_param: | 1170 | out_unreg_param: |
1171 | module_param_sysfs_remove(mod); | 1171 | module_param_sysfs_remove(mod); |
1172 | out_unreg_holders: | 1172 | out_unreg_holders: |
1173 | kobject_unregister(mod->holders_dir); | 1173 | kobject_unregister(mod->holders_dir); |
1174 | out_unreg: | 1174 | out_unreg: |
1175 | kobject_del(&mod->mkobj.kobj); | 1175 | kobject_del(&mod->mkobj.kobj); |
1176 | kobject_put(&mod->mkobj.kobj); | 1176 | kobject_put(&mod->mkobj.kobj); |
1177 | out: | 1177 | out: |
1178 | return err; | 1178 | return err; |
1179 | } | 1179 | } |
1180 | #endif | 1180 | #endif |
1181 | 1181 | ||
1182 | static void mod_kobject_remove(struct module *mod) | 1182 | static void mod_kobject_remove(struct module *mod) |
1183 | { | 1183 | { |
1184 | module_remove_modinfo_attrs(mod); | 1184 | module_remove_modinfo_attrs(mod); |
1185 | module_param_sysfs_remove(mod); | 1185 | module_param_sysfs_remove(mod); |
1186 | kobject_unregister(mod->mkobj.drivers_dir); | 1186 | kobject_unregister(mod->mkobj.drivers_dir); |
1187 | kobject_unregister(mod->holders_dir); | 1187 | kobject_unregister(mod->holders_dir); |
1188 | kobject_unregister(&mod->mkobj.kobj); | 1188 | kobject_unregister(&mod->mkobj.kobj); |
1189 | } | 1189 | } |
1190 | 1190 | ||
1191 | /* | 1191 | /* |
1192 | * unlink the module with the whole machine is stopped with interrupts off | 1192 | * unlink the module with the whole machine is stopped with interrupts off |
1193 | * - this defends against kallsyms not taking locks | 1193 | * - this defends against kallsyms not taking locks |
1194 | */ | 1194 | */ |
1195 | static int __unlink_module(void *_mod) | 1195 | static int __unlink_module(void *_mod) |
1196 | { | 1196 | { |
1197 | struct module *mod = _mod; | 1197 | struct module *mod = _mod; |
1198 | list_del(&mod->list); | 1198 | list_del(&mod->list); |
1199 | return 0; | 1199 | return 0; |
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | /* Free a module, remove from lists, etc (must hold module mutex). */ | 1202 | /* Free a module, remove from lists, etc (must hold module mutex). */ |
1203 | static void free_module(struct module *mod) | 1203 | static void free_module(struct module *mod) |
1204 | { | 1204 | { |
1205 | /* Delete from various lists */ | 1205 | /* Delete from various lists */ |
1206 | stop_machine_run(__unlink_module, mod, NR_CPUS); | 1206 | stop_machine_run(__unlink_module, mod, NR_CPUS); |
1207 | remove_sect_attrs(mod); | 1207 | remove_sect_attrs(mod); |
1208 | mod_kobject_remove(mod); | 1208 | mod_kobject_remove(mod); |
1209 | 1209 | ||
1210 | unwind_remove_table(mod->unwind_info, 0); | 1210 | unwind_remove_table(mod->unwind_info, 0); |
1211 | 1211 | ||
1212 | /* Arch-specific cleanup. */ | 1212 | /* Arch-specific cleanup. */ |
1213 | module_arch_cleanup(mod); | 1213 | module_arch_cleanup(mod); |
1214 | 1214 | ||
1215 | /* Module unload stuff */ | 1215 | /* Module unload stuff */ |
1216 | module_unload_free(mod); | 1216 | module_unload_free(mod); |
1217 | 1217 | ||
1218 | /* This may be NULL, but that's OK */ | 1218 | /* This may be NULL, but that's OK */ |
1219 | module_free(mod, mod->module_init); | 1219 | module_free(mod, mod->module_init); |
1220 | kfree(mod->args); | 1220 | kfree(mod->args); |
1221 | if (mod->percpu) | 1221 | if (mod->percpu) |
1222 | percpu_modfree(mod->percpu); | 1222 | percpu_modfree(mod->percpu); |
1223 | 1223 | ||
1224 | /* Free lock-classes: */ | 1224 | /* Free lock-classes: */ |
1225 | lockdep_free_key_range(mod->module_core, mod->core_size); | 1225 | lockdep_free_key_range(mod->module_core, mod->core_size); |
1226 | 1226 | ||
1227 | /* Finally, free the core (containing the module structure) */ | 1227 | /* Finally, free the core (containing the module structure) */ |
1228 | module_free(mod, mod->module_core); | 1228 | module_free(mod, mod->module_core); |
1229 | } | 1229 | } |
1230 | 1230 | ||
1231 | void *__symbol_get(const char *symbol) | 1231 | void *__symbol_get(const char *symbol) |
1232 | { | 1232 | { |
1233 | struct module *owner; | 1233 | struct module *owner; |
1234 | unsigned long value, flags; | 1234 | unsigned long value, flags; |
1235 | const unsigned long *crc; | 1235 | const unsigned long *crc; |
1236 | 1236 | ||
1237 | spin_lock_irqsave(&modlist_lock, flags); | 1237 | spin_lock_irqsave(&modlist_lock, flags); |
1238 | value = __find_symbol(symbol, &owner, &crc, 1); | 1238 | value = __find_symbol(symbol, &owner, &crc, 1); |
1239 | if (value && !strong_try_module_get(owner)) | 1239 | if (value && !strong_try_module_get(owner)) |
1240 | value = 0; | 1240 | value = 0; |
1241 | spin_unlock_irqrestore(&modlist_lock, flags); | 1241 | spin_unlock_irqrestore(&modlist_lock, flags); |
1242 | 1242 | ||
1243 | return (void *)value; | 1243 | return (void *)value; |
1244 | } | 1244 | } |
1245 | EXPORT_SYMBOL_GPL(__symbol_get); | 1245 | EXPORT_SYMBOL_GPL(__symbol_get); |
1246 | 1246 | ||
1247 | /* | 1247 | /* |
1248 | * Ensure that an exported symbol [global namespace] does not already exist | 1248 | * Ensure that an exported symbol [global namespace] does not already exist |
1249 | * in the Kernel or in some other modules exported symbol table. | 1249 | * in the Kernel or in some other modules exported symbol table. |
1250 | */ | 1250 | */ |
1251 | static int verify_export_symbols(struct module *mod) | 1251 | static int verify_export_symbols(struct module *mod) |
1252 | { | 1252 | { |
1253 | const char *name = NULL; | 1253 | const char *name = NULL; |
1254 | unsigned long i, ret = 0; | 1254 | unsigned long i, ret = 0; |
1255 | struct module *owner; | 1255 | struct module *owner; |
1256 | const unsigned long *crc; | 1256 | const unsigned long *crc; |
1257 | 1257 | ||
1258 | for (i = 0; i < mod->num_syms; i++) | 1258 | for (i = 0; i < mod->num_syms; i++) |
1259 | if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) { | 1259 | if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) { |
1260 | name = mod->syms[i].name; | 1260 | name = mod->syms[i].name; |
1261 | ret = -ENOEXEC; | 1261 | ret = -ENOEXEC; |
1262 | goto dup; | 1262 | goto dup; |
1263 | } | 1263 | } |
1264 | 1264 | ||
1265 | for (i = 0; i < mod->num_gpl_syms; i++) | 1265 | for (i = 0; i < mod->num_gpl_syms; i++) |
1266 | if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) { | 1266 | if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) { |
1267 | name = mod->gpl_syms[i].name; | 1267 | name = mod->gpl_syms[i].name; |
1268 | ret = -ENOEXEC; | 1268 | ret = -ENOEXEC; |
1269 | goto dup; | 1269 | goto dup; |
1270 | } | 1270 | } |
1271 | 1271 | ||
1272 | dup: | 1272 | dup: |
1273 | if (ret) | 1273 | if (ret) |
1274 | printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n", | 1274 | printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n", |
1275 | mod->name, name, module_name(owner)); | 1275 | mod->name, name, module_name(owner)); |
1276 | 1276 | ||
1277 | return ret; | 1277 | return ret; |
1278 | } | 1278 | } |
1279 | 1279 | ||
1280 | /* Change all symbols so that sh_value encodes the pointer directly. */ | 1280 | /* Change all symbols so that sh_value encodes the pointer directly. */ |
1281 | static int simplify_symbols(Elf_Shdr *sechdrs, | 1281 | static int simplify_symbols(Elf_Shdr *sechdrs, |
1282 | unsigned int symindex, | 1282 | unsigned int symindex, |
1283 | const char *strtab, | 1283 | const char *strtab, |
1284 | unsigned int versindex, | 1284 | unsigned int versindex, |
1285 | unsigned int pcpuindex, | 1285 | unsigned int pcpuindex, |
1286 | struct module *mod) | 1286 | struct module *mod) |
1287 | { | 1287 | { |
1288 | Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; | 1288 | Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; |
1289 | unsigned long secbase; | 1289 | unsigned long secbase; |
1290 | unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); | 1290 | unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); |
1291 | int ret = 0; | 1291 | int ret = 0; |
1292 | 1292 | ||
1293 | for (i = 1; i < n; i++) { | 1293 | for (i = 1; i < n; i++) { |
1294 | switch (sym[i].st_shndx) { | 1294 | switch (sym[i].st_shndx) { |
1295 | case SHN_COMMON: | 1295 | case SHN_COMMON: |
1296 | /* We compiled with -fno-common. These are not | 1296 | /* We compiled with -fno-common. These are not |
1297 | supposed to happen. */ | 1297 | supposed to happen. */ |
1298 | DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); | 1298 | DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); |
1299 | printk("%s: please compile with -fno-common\n", | 1299 | printk("%s: please compile with -fno-common\n", |
1300 | mod->name); | 1300 | mod->name); |
1301 | ret = -ENOEXEC; | 1301 | ret = -ENOEXEC; |
1302 | break; | 1302 | break; |
1303 | 1303 | ||
1304 | case SHN_ABS: | 1304 | case SHN_ABS: |
1305 | /* Don't need to do anything */ | 1305 | /* Don't need to do anything */ |
1306 | DEBUGP("Absolute symbol: 0x%08lx\n", | 1306 | DEBUGP("Absolute symbol: 0x%08lx\n", |
1307 | (long)sym[i].st_value); | 1307 | (long)sym[i].st_value); |
1308 | break; | 1308 | break; |
1309 | 1309 | ||
1310 | case SHN_UNDEF: | 1310 | case SHN_UNDEF: |
1311 | sym[i].st_value | 1311 | sym[i].st_value |
1312 | = resolve_symbol(sechdrs, versindex, | 1312 | = resolve_symbol(sechdrs, versindex, |
1313 | strtab + sym[i].st_name, mod); | 1313 | strtab + sym[i].st_name, mod); |
1314 | 1314 | ||
1315 | /* Ok if resolved. */ | 1315 | /* Ok if resolved. */ |
1316 | if (sym[i].st_value != 0) | 1316 | if (sym[i].st_value != 0) |
1317 | break; | 1317 | break; |
1318 | /* Ok if weak. */ | 1318 | /* Ok if weak. */ |
1319 | if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) | 1319 | if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) |
1320 | break; | 1320 | break; |
1321 | 1321 | ||
1322 | printk(KERN_WARNING "%s: Unknown symbol %s\n", | 1322 | printk(KERN_WARNING "%s: Unknown symbol %s\n", |
1323 | mod->name, strtab + sym[i].st_name); | 1323 | mod->name, strtab + sym[i].st_name); |
1324 | ret = -ENOENT; | 1324 | ret = -ENOENT; |
1325 | break; | 1325 | break; |
1326 | 1326 | ||
1327 | default: | 1327 | default: |
1328 | /* Divert to percpu allocation if a percpu var. */ | 1328 | /* Divert to percpu allocation if a percpu var. */ |
1329 | if (sym[i].st_shndx == pcpuindex) | 1329 | if (sym[i].st_shndx == pcpuindex) |
1330 | secbase = (unsigned long)mod->percpu; | 1330 | secbase = (unsigned long)mod->percpu; |
1331 | else | 1331 | else |
1332 | secbase = sechdrs[sym[i].st_shndx].sh_addr; | 1332 | secbase = sechdrs[sym[i].st_shndx].sh_addr; |
1333 | sym[i].st_value += secbase; | 1333 | sym[i].st_value += secbase; |
1334 | break; | 1334 | break; |
1335 | } | 1335 | } |
1336 | } | 1336 | } |
1337 | 1337 | ||
1338 | return ret; | 1338 | return ret; |
1339 | } | 1339 | } |
1340 | 1340 | ||
1341 | /* Update size with this section: return offset. */ | 1341 | /* Update size with this section: return offset. */ |
1342 | static long get_offset(unsigned long *size, Elf_Shdr *sechdr) | 1342 | static long get_offset(unsigned long *size, Elf_Shdr *sechdr) |
1343 | { | 1343 | { |
1344 | long ret; | 1344 | long ret; |
1345 | 1345 | ||
1346 | ret = ALIGN(*size, sechdr->sh_addralign ?: 1); | 1346 | ret = ALIGN(*size, sechdr->sh_addralign ?: 1); |
1347 | *size = ret + sechdr->sh_size; | 1347 | *size = ret + sechdr->sh_size; |
1348 | return ret; | 1348 | return ret; |
1349 | } | 1349 | } |
1350 | 1350 | ||
1351 | /* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld | 1351 | /* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld |
1352 | might -- code, read-only data, read-write data, small data. Tally | 1352 | might -- code, read-only data, read-write data, small data. Tally |
1353 | sizes, and place the offsets into sh_entsize fields: high bit means it | 1353 | sizes, and place the offsets into sh_entsize fields: high bit means it |
1354 | belongs in init. */ | 1354 | belongs in init. */ |
1355 | static void layout_sections(struct module *mod, | 1355 | static void layout_sections(struct module *mod, |
1356 | const Elf_Ehdr *hdr, | 1356 | const Elf_Ehdr *hdr, |
1357 | Elf_Shdr *sechdrs, | 1357 | Elf_Shdr *sechdrs, |
1358 | const char *secstrings) | 1358 | const char *secstrings) |
1359 | { | 1359 | { |
1360 | static unsigned long const masks[][2] = { | 1360 | static unsigned long const masks[][2] = { |
1361 | /* NOTE: all executable code must be the first section | 1361 | /* NOTE: all executable code must be the first section |
1362 | * in this array; otherwise modify the text_size | 1362 | * in this array; otherwise modify the text_size |
1363 | * finder in the two loops below */ | 1363 | * finder in the two loops below */ |
1364 | { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL }, | 1364 | { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL }, |
1365 | { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL }, | 1365 | { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL }, |
1366 | { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL }, | 1366 | { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL }, |
1367 | { ARCH_SHF_SMALL | SHF_ALLOC, 0 } | 1367 | { ARCH_SHF_SMALL | SHF_ALLOC, 0 } |
1368 | }; | 1368 | }; |
1369 | unsigned int m, i; | 1369 | unsigned int m, i; |
1370 | 1370 | ||
1371 | for (i = 0; i < hdr->e_shnum; i++) | 1371 | for (i = 0; i < hdr->e_shnum; i++) |
1372 | sechdrs[i].sh_entsize = ~0UL; | 1372 | sechdrs[i].sh_entsize = ~0UL; |
1373 | 1373 | ||
1374 | DEBUGP("Core section allocation order:\n"); | 1374 | DEBUGP("Core section allocation order:\n"); |
1375 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { | 1375 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { |
1376 | for (i = 0; i < hdr->e_shnum; ++i) { | 1376 | for (i = 0; i < hdr->e_shnum; ++i) { |
1377 | Elf_Shdr *s = &sechdrs[i]; | 1377 | Elf_Shdr *s = &sechdrs[i]; |
1378 | 1378 | ||
1379 | if ((s->sh_flags & masks[m][0]) != masks[m][0] | 1379 | if ((s->sh_flags & masks[m][0]) != masks[m][0] |
1380 | || (s->sh_flags & masks[m][1]) | 1380 | || (s->sh_flags & masks[m][1]) |
1381 | || s->sh_entsize != ~0UL | 1381 | || s->sh_entsize != ~0UL |
1382 | || strncmp(secstrings + s->sh_name, | 1382 | || strncmp(secstrings + s->sh_name, |
1383 | ".init", 5) == 0) | 1383 | ".init", 5) == 0) |
1384 | continue; | 1384 | continue; |
1385 | s->sh_entsize = get_offset(&mod->core_size, s); | 1385 | s->sh_entsize = get_offset(&mod->core_size, s); |
1386 | DEBUGP("\t%s\n", secstrings + s->sh_name); | 1386 | DEBUGP("\t%s\n", secstrings + s->sh_name); |
1387 | } | 1387 | } |
1388 | if (m == 0) | 1388 | if (m == 0) |
1389 | mod->core_text_size = mod->core_size; | 1389 | mod->core_text_size = mod->core_size; |
1390 | } | 1390 | } |
1391 | 1391 | ||
1392 | DEBUGP("Init section allocation order:\n"); | 1392 | DEBUGP("Init section allocation order:\n"); |
1393 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { | 1393 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { |
1394 | for (i = 0; i < hdr->e_shnum; ++i) { | 1394 | for (i = 0; i < hdr->e_shnum; ++i) { |
1395 | Elf_Shdr *s = &sechdrs[i]; | 1395 | Elf_Shdr *s = &sechdrs[i]; |
1396 | 1396 | ||
1397 | if ((s->sh_flags & masks[m][0]) != masks[m][0] | 1397 | if ((s->sh_flags & masks[m][0]) != masks[m][0] |
1398 | || (s->sh_flags & masks[m][1]) | 1398 | || (s->sh_flags & masks[m][1]) |
1399 | || s->sh_entsize != ~0UL | 1399 | || s->sh_entsize != ~0UL |
1400 | || strncmp(secstrings + s->sh_name, | 1400 | || strncmp(secstrings + s->sh_name, |
1401 | ".init", 5) != 0) | 1401 | ".init", 5) != 0) |
1402 | continue; | 1402 | continue; |
1403 | s->sh_entsize = (get_offset(&mod->init_size, s) | 1403 | s->sh_entsize = (get_offset(&mod->init_size, s) |
1404 | | INIT_OFFSET_MASK); | 1404 | | INIT_OFFSET_MASK); |
1405 | DEBUGP("\t%s\n", secstrings + s->sh_name); | 1405 | DEBUGP("\t%s\n", secstrings + s->sh_name); |
1406 | } | 1406 | } |
1407 | if (m == 0) | 1407 | if (m == 0) |
1408 | mod->init_text_size = mod->init_size; | 1408 | mod->init_text_size = mod->init_size; |
1409 | } | 1409 | } |
1410 | } | 1410 | } |
1411 | 1411 | ||
1412 | static void set_license(struct module *mod, const char *license) | 1412 | static void set_license(struct module *mod, const char *license) |
1413 | { | 1413 | { |
1414 | if (!license) | 1414 | if (!license) |
1415 | license = "unspecified"; | 1415 | license = "unspecified"; |
1416 | 1416 | ||
1417 | if (!license_is_gpl_compatible(license)) { | 1417 | if (!license_is_gpl_compatible(license)) { |
1418 | if (!(tainted & TAINT_PROPRIETARY_MODULE)) | 1418 | if (!(tainted & TAINT_PROPRIETARY_MODULE)) |
1419 | printk(KERN_WARNING "%s: module license '%s' taints " | 1419 | printk(KERN_WARNING "%s: module license '%s' taints " |
1420 | "kernel.\n", mod->name, license); | 1420 | "kernel.\n", mod->name, license); |
1421 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 1421 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); |
1422 | } | 1422 | } |
1423 | } | 1423 | } |
1424 | 1424 | ||
1425 | /* Parse tag=value strings from .modinfo section */ | 1425 | /* Parse tag=value strings from .modinfo section */ |
1426 | static char *next_string(char *string, unsigned long *secsize) | 1426 | static char *next_string(char *string, unsigned long *secsize) |
1427 | { | 1427 | { |
1428 | /* Skip non-zero chars */ | 1428 | /* Skip non-zero chars */ |
1429 | while (string[0]) { | 1429 | while (string[0]) { |
1430 | string++; | 1430 | string++; |
1431 | if ((*secsize)-- <= 1) | 1431 | if ((*secsize)-- <= 1) |
1432 | return NULL; | 1432 | return NULL; |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | /* Skip any zero padding. */ | 1435 | /* Skip any zero padding. */ |
1436 | while (!string[0]) { | 1436 | while (!string[0]) { |
1437 | string++; | 1437 | string++; |
1438 | if ((*secsize)-- <= 1) | 1438 | if ((*secsize)-- <= 1) |
1439 | return NULL; | 1439 | return NULL; |
1440 | } | 1440 | } |
1441 | return string; | 1441 | return string; |
1442 | } | 1442 | } |
1443 | 1443 | ||
1444 | static char *get_modinfo(Elf_Shdr *sechdrs, | 1444 | static char *get_modinfo(Elf_Shdr *sechdrs, |
1445 | unsigned int info, | 1445 | unsigned int info, |
1446 | const char *tag) | 1446 | const char *tag) |
1447 | { | 1447 | { |
1448 | char *p; | 1448 | char *p; |
1449 | unsigned int taglen = strlen(tag); | 1449 | unsigned int taglen = strlen(tag); |
1450 | unsigned long size = sechdrs[info].sh_size; | 1450 | unsigned long size = sechdrs[info].sh_size; |
1451 | 1451 | ||
1452 | for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { | 1452 | for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { |
1453 | if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') | 1453 | if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') |
1454 | return p + taglen + 1; | 1454 | return p + taglen + 1; |
1455 | } | 1455 | } |
1456 | return NULL; | 1456 | return NULL; |
1457 | } | 1457 | } |
1458 | 1458 | ||
1459 | static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, | 1459 | static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, |
1460 | unsigned int infoindex) | 1460 | unsigned int infoindex) |
1461 | { | 1461 | { |
1462 | struct module_attribute *attr; | 1462 | struct module_attribute *attr; |
1463 | int i; | 1463 | int i; |
1464 | 1464 | ||
1465 | for (i = 0; (attr = modinfo_attrs[i]); i++) { | 1465 | for (i = 0; (attr = modinfo_attrs[i]); i++) { |
1466 | if (attr->setup) | 1466 | if (attr->setup) |
1467 | attr->setup(mod, | 1467 | attr->setup(mod, |
1468 | get_modinfo(sechdrs, | 1468 | get_modinfo(sechdrs, |
1469 | infoindex, | 1469 | infoindex, |
1470 | attr->attr.name)); | 1470 | attr->attr.name)); |
1471 | } | 1471 | } |
1472 | } | 1472 | } |
1473 | 1473 | ||
1474 | #ifdef CONFIG_KALLSYMS | 1474 | #ifdef CONFIG_KALLSYMS |
1475 | static int is_exported(const char *name, const struct module *mod) | 1475 | static int is_exported(const char *name, const struct module *mod) |
1476 | { | 1476 | { |
1477 | if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) | 1477 | if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) |
1478 | return 1; | 1478 | return 1; |
1479 | else | 1479 | else |
1480 | if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) | 1480 | if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) |
1481 | return 1; | 1481 | return 1; |
1482 | else | 1482 | else |
1483 | return 0; | 1483 | return 0; |
1484 | } | 1484 | } |
1485 | 1485 | ||
1486 | /* As per nm */ | 1486 | /* As per nm */ |
1487 | static char elf_type(const Elf_Sym *sym, | 1487 | static char elf_type(const Elf_Sym *sym, |
1488 | Elf_Shdr *sechdrs, | 1488 | Elf_Shdr *sechdrs, |
1489 | const char *secstrings, | 1489 | const char *secstrings, |
1490 | struct module *mod) | 1490 | struct module *mod) |
1491 | { | 1491 | { |
1492 | if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { | 1492 | if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { |
1493 | if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) | 1493 | if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) |
1494 | return 'v'; | 1494 | return 'v'; |
1495 | else | 1495 | else |
1496 | return 'w'; | 1496 | return 'w'; |
1497 | } | 1497 | } |
1498 | if (sym->st_shndx == SHN_UNDEF) | 1498 | if (sym->st_shndx == SHN_UNDEF) |
1499 | return 'U'; | 1499 | return 'U'; |
1500 | if (sym->st_shndx == SHN_ABS) | 1500 | if (sym->st_shndx == SHN_ABS) |
1501 | return 'a'; | 1501 | return 'a'; |
1502 | if (sym->st_shndx >= SHN_LORESERVE) | 1502 | if (sym->st_shndx >= SHN_LORESERVE) |
1503 | return '?'; | 1503 | return '?'; |
1504 | if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR) | 1504 | if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR) |
1505 | return 't'; | 1505 | return 't'; |
1506 | if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC | 1506 | if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC |
1507 | && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) { | 1507 | && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) { |
1508 | if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE)) | 1508 | if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE)) |
1509 | return 'r'; | 1509 | return 'r'; |
1510 | else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) | 1510 | else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) |
1511 | return 'g'; | 1511 | return 'g'; |
1512 | else | 1512 | else |
1513 | return 'd'; | 1513 | return 'd'; |
1514 | } | 1514 | } |
1515 | if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { | 1515 | if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { |
1516 | if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) | 1516 | if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) |
1517 | return 's'; | 1517 | return 's'; |
1518 | else | 1518 | else |
1519 | return 'b'; | 1519 | return 'b'; |
1520 | } | 1520 | } |
1521 | if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name, | 1521 | if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name, |
1522 | ".debug", strlen(".debug")) == 0) | 1522 | ".debug", strlen(".debug")) == 0) |
1523 | return 'n'; | 1523 | return 'n'; |
1524 | return '?'; | 1524 | return '?'; |
1525 | } | 1525 | } |
1526 | 1526 | ||
1527 | static void add_kallsyms(struct module *mod, | 1527 | static void add_kallsyms(struct module *mod, |
1528 | Elf_Shdr *sechdrs, | 1528 | Elf_Shdr *sechdrs, |
1529 | unsigned int symindex, | 1529 | unsigned int symindex, |
1530 | unsigned int strindex, | 1530 | unsigned int strindex, |
1531 | const char *secstrings) | 1531 | const char *secstrings) |
1532 | { | 1532 | { |
1533 | unsigned int i; | 1533 | unsigned int i; |
1534 | 1534 | ||
1535 | mod->symtab = (void *)sechdrs[symindex].sh_addr; | 1535 | mod->symtab = (void *)sechdrs[symindex].sh_addr; |
1536 | mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); | 1536 | mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); |
1537 | mod->strtab = (void *)sechdrs[strindex].sh_addr; | 1537 | mod->strtab = (void *)sechdrs[strindex].sh_addr; |
1538 | 1538 | ||
1539 | /* Set types up while we still have access to sections. */ | 1539 | /* Set types up while we still have access to sections. */ |
1540 | for (i = 0; i < mod->num_symtab; i++) | 1540 | for (i = 0; i < mod->num_symtab; i++) |
1541 | mod->symtab[i].st_info | 1541 | mod->symtab[i].st_info |
1542 | = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); | 1542 | = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); |
1543 | } | 1543 | } |
1544 | #else | 1544 | #else |
1545 | static inline void add_kallsyms(struct module *mod, | 1545 | static inline void add_kallsyms(struct module *mod, |
1546 | Elf_Shdr *sechdrs, | 1546 | Elf_Shdr *sechdrs, |
1547 | unsigned int symindex, | 1547 | unsigned int symindex, |
1548 | unsigned int strindex, | 1548 | unsigned int strindex, |
1549 | const char *secstrings) | 1549 | const char *secstrings) |
1550 | { | 1550 | { |
1551 | } | 1551 | } |
1552 | #endif /* CONFIG_KALLSYMS */ | 1552 | #endif /* CONFIG_KALLSYMS */ |
1553 | 1553 | ||
1554 | /* Allocate and load the module: note that size of section 0 is always | 1554 | /* Allocate and load the module: note that size of section 0 is always |
1555 | zero, and we rely on this for optional sections. */ | 1555 | zero, and we rely on this for optional sections. */ |
1556 | static struct module *load_module(void __user *umod, | 1556 | static struct module *load_module(void __user *umod, |
1557 | unsigned long len, | 1557 | unsigned long len, |
1558 | const char __user *uargs) | 1558 | const char __user *uargs) |
1559 | { | 1559 | { |
1560 | Elf_Ehdr *hdr; | 1560 | Elf_Ehdr *hdr; |
1561 | Elf_Shdr *sechdrs; | 1561 | Elf_Shdr *sechdrs; |
1562 | char *secstrings, *args, *modmagic, *strtab = NULL; | 1562 | char *secstrings, *args, *modmagic, *strtab = NULL; |
1563 | unsigned int i; | 1563 | unsigned int i; |
1564 | unsigned int symindex = 0; | 1564 | unsigned int symindex = 0; |
1565 | unsigned int strindex = 0; | 1565 | unsigned int strindex = 0; |
1566 | unsigned int setupindex; | 1566 | unsigned int setupindex; |
1567 | unsigned int exindex; | 1567 | unsigned int exindex; |
1568 | unsigned int exportindex; | 1568 | unsigned int exportindex; |
1569 | unsigned int modindex; | 1569 | unsigned int modindex; |
1570 | unsigned int obsparmindex; | 1570 | unsigned int obsparmindex; |
1571 | unsigned int infoindex; | 1571 | unsigned int infoindex; |
1572 | unsigned int gplindex; | 1572 | unsigned int gplindex; |
1573 | unsigned int crcindex; | 1573 | unsigned int crcindex; |
1574 | unsigned int gplcrcindex; | 1574 | unsigned int gplcrcindex; |
1575 | unsigned int versindex; | 1575 | unsigned int versindex; |
1576 | unsigned int pcpuindex; | 1576 | unsigned int pcpuindex; |
1577 | unsigned int gplfutureindex; | 1577 | unsigned int gplfutureindex; |
1578 | unsigned int gplfuturecrcindex; | 1578 | unsigned int gplfuturecrcindex; |
1579 | unsigned int unwindex = 0; | 1579 | unsigned int unwindex = 0; |
1580 | unsigned int unusedindex; | 1580 | unsigned int unusedindex; |
1581 | unsigned int unusedcrcindex; | 1581 | unsigned int unusedcrcindex; |
1582 | unsigned int unusedgplindex; | 1582 | unsigned int unusedgplindex; |
1583 | unsigned int unusedgplcrcindex; | 1583 | unsigned int unusedgplcrcindex; |
1584 | struct module *mod; | 1584 | struct module *mod; |
1585 | long err = 0; | 1585 | long err = 0; |
1586 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1586 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
1587 | struct exception_table_entry *extable; | 1587 | struct exception_table_entry *extable; |
1588 | mm_segment_t old_fs; | 1588 | mm_segment_t old_fs; |
1589 | 1589 | ||
1590 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", | 1590 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", |
1591 | umod, len, uargs); | 1591 | umod, len, uargs); |
1592 | if (len < sizeof(*hdr)) | 1592 | if (len < sizeof(*hdr)) |
1593 | return ERR_PTR(-ENOEXEC); | 1593 | return ERR_PTR(-ENOEXEC); |
1594 | 1594 | ||
1595 | /* Suck in entire file: we'll want most of it. */ | 1595 | /* Suck in entire file: we'll want most of it. */ |
1596 | /* vmalloc barfs on "unusual" numbers. Check here */ | 1596 | /* vmalloc barfs on "unusual" numbers. Check here */ |
1597 | if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) | 1597 | if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) |
1598 | return ERR_PTR(-ENOMEM); | 1598 | return ERR_PTR(-ENOMEM); |
1599 | if (copy_from_user(hdr, umod, len) != 0) { | 1599 | if (copy_from_user(hdr, umod, len) != 0) { |
1600 | err = -EFAULT; | 1600 | err = -EFAULT; |
1601 | goto free_hdr; | 1601 | goto free_hdr; |
1602 | } | 1602 | } |
1603 | 1603 | ||
1604 | /* Sanity checks against insmoding binaries or wrong arch, | 1604 | /* Sanity checks against insmoding binaries or wrong arch, |
1605 | weird elf version */ | 1605 | weird elf version */ |
1606 | if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 | 1606 | if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 |
1607 | || hdr->e_type != ET_REL | 1607 | || hdr->e_type != ET_REL |
1608 | || !elf_check_arch(hdr) | 1608 | || !elf_check_arch(hdr) |
1609 | || hdr->e_shentsize != sizeof(*sechdrs)) { | 1609 | || hdr->e_shentsize != sizeof(*sechdrs)) { |
1610 | err = -ENOEXEC; | 1610 | err = -ENOEXEC; |
1611 | goto free_hdr; | 1611 | goto free_hdr; |
1612 | } | 1612 | } |
1613 | 1613 | ||
1614 | if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) | 1614 | if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) |
1615 | goto truncated; | 1615 | goto truncated; |
1616 | 1616 | ||
1617 | /* Convenience variables */ | 1617 | /* Convenience variables */ |
1618 | sechdrs = (void *)hdr + hdr->e_shoff; | 1618 | sechdrs = (void *)hdr + hdr->e_shoff; |
1619 | secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 1619 | secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
1620 | sechdrs[0].sh_addr = 0; | 1620 | sechdrs[0].sh_addr = 0; |
1621 | 1621 | ||
1622 | for (i = 1; i < hdr->e_shnum; i++) { | 1622 | for (i = 1; i < hdr->e_shnum; i++) { |
1623 | if (sechdrs[i].sh_type != SHT_NOBITS | 1623 | if (sechdrs[i].sh_type != SHT_NOBITS |
1624 | && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) | 1624 | && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) |
1625 | goto truncated; | 1625 | goto truncated; |
1626 | 1626 | ||
1627 | /* Mark all sections sh_addr with their address in the | 1627 | /* Mark all sections sh_addr with their address in the |
1628 | temporary image. */ | 1628 | temporary image. */ |
1629 | sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; | 1629 | sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; |
1630 | 1630 | ||
1631 | /* Internal symbols and strings. */ | 1631 | /* Internal symbols and strings. */ |
1632 | if (sechdrs[i].sh_type == SHT_SYMTAB) { | 1632 | if (sechdrs[i].sh_type == SHT_SYMTAB) { |
1633 | symindex = i; | 1633 | symindex = i; |
1634 | strindex = sechdrs[i].sh_link; | 1634 | strindex = sechdrs[i].sh_link; |
1635 | strtab = (char *)hdr + sechdrs[strindex].sh_offset; | 1635 | strtab = (char *)hdr + sechdrs[strindex].sh_offset; |
1636 | } | 1636 | } |
1637 | #ifndef CONFIG_MODULE_UNLOAD | 1637 | #ifndef CONFIG_MODULE_UNLOAD |
1638 | /* Don't load .exit sections */ | 1638 | /* Don't load .exit sections */ |
1639 | if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0) | 1639 | if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0) |
1640 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1640 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1641 | #endif | 1641 | #endif |
1642 | } | 1642 | } |
1643 | 1643 | ||
1644 | modindex = find_sec(hdr, sechdrs, secstrings, | 1644 | modindex = find_sec(hdr, sechdrs, secstrings, |
1645 | ".gnu.linkonce.this_module"); | 1645 | ".gnu.linkonce.this_module"); |
1646 | if (!modindex) { | 1646 | if (!modindex) { |
1647 | printk(KERN_WARNING "No module found in object\n"); | 1647 | printk(KERN_WARNING "No module found in object\n"); |
1648 | err = -ENOEXEC; | 1648 | err = -ENOEXEC; |
1649 | goto free_hdr; | 1649 | goto free_hdr; |
1650 | } | 1650 | } |
1651 | mod = (void *)sechdrs[modindex].sh_addr; | 1651 | mod = (void *)sechdrs[modindex].sh_addr; |
1652 | 1652 | ||
1653 | if (symindex == 0) { | 1653 | if (symindex == 0) { |
1654 | printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", | 1654 | printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", |
1655 | mod->name); | 1655 | mod->name); |
1656 | err = -ENOEXEC; | 1656 | err = -ENOEXEC; |
1657 | goto free_hdr; | 1657 | goto free_hdr; |
1658 | } | 1658 | } |
1659 | 1659 | ||
1660 | /* Optional sections */ | 1660 | /* Optional sections */ |
1661 | exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); | 1661 | exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); |
1662 | gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); | 1662 | gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); |
1663 | gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); | 1663 | gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); |
1664 | unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused"); | 1664 | unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused"); |
1665 | unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl"); | 1665 | unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl"); |
1666 | crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); | 1666 | crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); |
1667 | gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); | 1667 | gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); |
1668 | gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); | 1668 | gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); |
1669 | unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); | 1669 | unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); |
1670 | unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); | 1670 | unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); |
1671 | setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); | 1671 | setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); |
1672 | exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); | 1672 | exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); |
1673 | obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); | 1673 | obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); |
1674 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1674 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
1675 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1675 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
1676 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1676 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
1677 | #ifdef ARCH_UNWIND_SECTION_NAME | 1677 | #ifdef ARCH_UNWIND_SECTION_NAME |
1678 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); | 1678 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); |
1679 | #endif | 1679 | #endif |
1680 | 1680 | ||
1681 | /* Don't keep modinfo section */ | 1681 | /* Don't keep modinfo section */ |
1682 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1682 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1683 | #ifdef CONFIG_KALLSYMS | 1683 | #ifdef CONFIG_KALLSYMS |
1684 | /* Keep symbol and string tables for decoding later. */ | 1684 | /* Keep symbol and string tables for decoding later. */ |
1685 | sechdrs[symindex].sh_flags |= SHF_ALLOC; | 1685 | sechdrs[symindex].sh_flags |= SHF_ALLOC; |
1686 | sechdrs[strindex].sh_flags |= SHF_ALLOC; | 1686 | sechdrs[strindex].sh_flags |= SHF_ALLOC; |
1687 | #endif | 1687 | #endif |
1688 | if (unwindex) | 1688 | if (unwindex) |
1689 | sechdrs[unwindex].sh_flags |= SHF_ALLOC; | 1689 | sechdrs[unwindex].sh_flags |= SHF_ALLOC; |
1690 | 1690 | ||
1691 | /* Check module struct version now, before we try to use module. */ | 1691 | /* Check module struct version now, before we try to use module. */ |
1692 | if (!check_modstruct_version(sechdrs, versindex, mod)) { | 1692 | if (!check_modstruct_version(sechdrs, versindex, mod)) { |
1693 | err = -ENOEXEC; | 1693 | err = -ENOEXEC; |
1694 | goto free_hdr; | 1694 | goto free_hdr; |
1695 | } | 1695 | } |
1696 | 1696 | ||
1697 | modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); | 1697 | modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); |
1698 | /* This is allowed: modprobe --force will invalidate it. */ | 1698 | /* This is allowed: modprobe --force will invalidate it. */ |
1699 | if (!modmagic) { | 1699 | if (!modmagic) { |
1700 | add_taint_module(mod, TAINT_FORCED_MODULE); | 1700 | add_taint_module(mod, TAINT_FORCED_MODULE); |
1701 | printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", | 1701 | printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", |
1702 | mod->name); | 1702 | mod->name); |
1703 | } else if (!same_magic(modmagic, vermagic)) { | 1703 | } else if (!same_magic(modmagic, vermagic)) { |
1704 | printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", | 1704 | printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", |
1705 | mod->name, modmagic, vermagic); | 1705 | mod->name, modmagic, vermagic); |
1706 | err = -ENOEXEC; | 1706 | err = -ENOEXEC; |
1707 | goto free_hdr; | 1707 | goto free_hdr; |
1708 | } | 1708 | } |
1709 | 1709 | ||
1710 | /* Now copy in args */ | 1710 | /* Now copy in args */ |
1711 | args = strndup_user(uargs, ~0UL >> 1); | 1711 | args = strndup_user(uargs, ~0UL >> 1); |
1712 | if (IS_ERR(args)) { | 1712 | if (IS_ERR(args)) { |
1713 | err = PTR_ERR(args); | 1713 | err = PTR_ERR(args); |
1714 | goto free_hdr; | 1714 | goto free_hdr; |
1715 | } | 1715 | } |
1716 | 1716 | ||
1717 | if (find_module(mod->name)) { | 1717 | if (find_module(mod->name)) { |
1718 | err = -EEXIST; | 1718 | err = -EEXIST; |
1719 | goto free_mod; | 1719 | goto free_mod; |
1720 | } | 1720 | } |
1721 | 1721 | ||
1722 | mod->state = MODULE_STATE_COMING; | 1722 | mod->state = MODULE_STATE_COMING; |
1723 | 1723 | ||
1724 | /* Allow arches to frob section contents and sizes. */ | 1724 | /* Allow arches to frob section contents and sizes. */ |
1725 | err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); | 1725 | err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); |
1726 | if (err < 0) | 1726 | if (err < 0) |
1727 | goto free_mod; | 1727 | goto free_mod; |
1728 | 1728 | ||
1729 | if (pcpuindex) { | 1729 | if (pcpuindex) { |
1730 | /* We have a special allocation for this section. */ | 1730 | /* We have a special allocation for this section. */ |
1731 | percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, | 1731 | percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, |
1732 | sechdrs[pcpuindex].sh_addralign, | 1732 | sechdrs[pcpuindex].sh_addralign, |
1733 | mod->name); | 1733 | mod->name); |
1734 | if (!percpu) { | 1734 | if (!percpu) { |
1735 | err = -ENOMEM; | 1735 | err = -ENOMEM; |
1736 | goto free_mod; | 1736 | goto free_mod; |
1737 | } | 1737 | } |
1738 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1738 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1739 | mod->percpu = percpu; | 1739 | mod->percpu = percpu; |
1740 | } | 1740 | } |
1741 | 1741 | ||
1742 | /* Determine total sizes, and put offsets in sh_entsize. For now | 1742 | /* Determine total sizes, and put offsets in sh_entsize. For now |
1743 | this is done generically; there doesn't appear to be any | 1743 | this is done generically; there doesn't appear to be any |
1744 | special cases for the architectures. */ | 1744 | special cases for the architectures. */ |
1745 | layout_sections(mod, hdr, sechdrs, secstrings); | 1745 | layout_sections(mod, hdr, sechdrs, secstrings); |
1746 | 1746 | ||
1747 | /* Do the allocs. */ | 1747 | /* Do the allocs. */ |
1748 | ptr = module_alloc(mod->core_size); | 1748 | ptr = module_alloc(mod->core_size); |
1749 | if (!ptr) { | 1749 | if (!ptr) { |
1750 | err = -ENOMEM; | 1750 | err = -ENOMEM; |
1751 | goto free_percpu; | 1751 | goto free_percpu; |
1752 | } | 1752 | } |
1753 | memset(ptr, 0, mod->core_size); | 1753 | memset(ptr, 0, mod->core_size); |
1754 | mod->module_core = ptr; | 1754 | mod->module_core = ptr; |
1755 | 1755 | ||
1756 | ptr = module_alloc(mod->init_size); | 1756 | ptr = module_alloc(mod->init_size); |
1757 | if (!ptr && mod->init_size) { | 1757 | if (!ptr && mod->init_size) { |
1758 | err = -ENOMEM; | 1758 | err = -ENOMEM; |
1759 | goto free_core; | 1759 | goto free_core; |
1760 | } | 1760 | } |
1761 | memset(ptr, 0, mod->init_size); | 1761 | memset(ptr, 0, mod->init_size); |
1762 | mod->module_init = ptr; | 1762 | mod->module_init = ptr; |
1763 | 1763 | ||
1764 | /* Transfer each section which specifies SHF_ALLOC */ | 1764 | /* Transfer each section which specifies SHF_ALLOC */ |
1765 | DEBUGP("final section addresses:\n"); | 1765 | DEBUGP("final section addresses:\n"); |
1766 | for (i = 0; i < hdr->e_shnum; i++) { | 1766 | for (i = 0; i < hdr->e_shnum; i++) { |
1767 | void *dest; | 1767 | void *dest; |
1768 | 1768 | ||
1769 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | 1769 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) |
1770 | continue; | 1770 | continue; |
1771 | 1771 | ||
1772 | if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) | 1772 | if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) |
1773 | dest = mod->module_init | 1773 | dest = mod->module_init |
1774 | + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); | 1774 | + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); |
1775 | else | 1775 | else |
1776 | dest = mod->module_core + sechdrs[i].sh_entsize; | 1776 | dest = mod->module_core + sechdrs[i].sh_entsize; |
1777 | 1777 | ||
1778 | if (sechdrs[i].sh_type != SHT_NOBITS) | 1778 | if (sechdrs[i].sh_type != SHT_NOBITS) |
1779 | memcpy(dest, (void *)sechdrs[i].sh_addr, | 1779 | memcpy(dest, (void *)sechdrs[i].sh_addr, |
1780 | sechdrs[i].sh_size); | 1780 | sechdrs[i].sh_size); |
1781 | /* Update sh_addr to point to copy in image. */ | 1781 | /* Update sh_addr to point to copy in image. */ |
1782 | sechdrs[i].sh_addr = (unsigned long)dest; | 1782 | sechdrs[i].sh_addr = (unsigned long)dest; |
1783 | DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); | 1783 | DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); |
1784 | } | 1784 | } |
1785 | /* Module has been moved. */ | 1785 | /* Module has been moved. */ |
1786 | mod = (void *)sechdrs[modindex].sh_addr; | 1786 | mod = (void *)sechdrs[modindex].sh_addr; |
1787 | 1787 | ||
1788 | /* Now we've moved module, initialize linked lists, etc. */ | 1788 | /* Now we've moved module, initialize linked lists, etc. */ |
1789 | module_unload_init(mod); | 1789 | module_unload_init(mod); |
1790 | 1790 | ||
1791 | /* Initialize kobject, so we can reference it. */ | 1791 | /* Initialize kobject, so we can reference it. */ |
1792 | if (mod_sysfs_init(mod) != 0) | 1792 | if (mod_sysfs_init(mod) != 0) |
1793 | goto cleanup; | 1793 | goto cleanup; |
1794 | 1794 | ||
1795 | /* Set up license info based on the info section */ | 1795 | /* Set up license info based on the info section */ |
1796 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); | 1796 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); |
1797 | 1797 | ||
1798 | if (strcmp(mod->name, "ndiswrapper") == 0) | 1798 | if (strcmp(mod->name, "ndiswrapper") == 0) |
1799 | add_taint(TAINT_PROPRIETARY_MODULE); | 1799 | add_taint(TAINT_PROPRIETARY_MODULE); |
1800 | if (strcmp(mod->name, "driverloader") == 0) | 1800 | if (strcmp(mod->name, "driverloader") == 0) |
1801 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 1801 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); |
1802 | 1802 | ||
1803 | /* Set up MODINFO_ATTR fields */ | 1803 | /* Set up MODINFO_ATTR fields */ |
1804 | setup_modinfo(mod, sechdrs, infoindex); | 1804 | setup_modinfo(mod, sechdrs, infoindex); |
1805 | 1805 | ||
1806 | /* Fix up syms, so that st_value is a pointer to location. */ | 1806 | /* Fix up syms, so that st_value is a pointer to location. */ |
1807 | err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, | 1807 | err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, |
1808 | mod); | 1808 | mod); |
1809 | if (err < 0) | 1809 | if (err < 0) |
1810 | goto cleanup; | 1810 | goto cleanup; |
1811 | 1811 | ||
1812 | /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ | 1812 | /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ |
1813 | mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); | 1813 | mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); |
1814 | mod->syms = (void *)sechdrs[exportindex].sh_addr; | 1814 | mod->syms = (void *)sechdrs[exportindex].sh_addr; |
1815 | if (crcindex) | 1815 | if (crcindex) |
1816 | mod->crcs = (void *)sechdrs[crcindex].sh_addr; | 1816 | mod->crcs = (void *)sechdrs[crcindex].sh_addr; |
1817 | mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); | 1817 | mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); |
1818 | mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; | 1818 | mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; |
1819 | if (gplcrcindex) | 1819 | if (gplcrcindex) |
1820 | mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; | 1820 | mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; |
1821 | mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / | 1821 | mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / |
1822 | sizeof(*mod->gpl_future_syms); | 1822 | sizeof(*mod->gpl_future_syms); |
1823 | mod->num_unused_syms = sechdrs[unusedindex].sh_size / | 1823 | mod->num_unused_syms = sechdrs[unusedindex].sh_size / |
1824 | sizeof(*mod->unused_syms); | 1824 | sizeof(*mod->unused_syms); |
1825 | mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / | 1825 | mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size / |
1826 | sizeof(*mod->unused_gpl_syms); | 1826 | sizeof(*mod->unused_gpl_syms); |
1827 | mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; | 1827 | mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; |
1828 | if (gplfuturecrcindex) | 1828 | if (gplfuturecrcindex) |
1829 | mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; | 1829 | mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; |
1830 | 1830 | ||
1831 | mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; | 1831 | mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; |
1832 | if (unusedcrcindex) | 1832 | if (unusedcrcindex) |
1833 | mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; | 1833 | mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; |
1834 | mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; | 1834 | mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr; |
1835 | if (unusedgplcrcindex) | 1835 | if (unusedgplcrcindex) |
1836 | mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr; | 1836 | mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr; |
1837 | 1837 | ||
1838 | #ifdef CONFIG_MODVERSIONS | 1838 | #ifdef CONFIG_MODVERSIONS |
1839 | if ((mod->num_syms && !crcindex) || | 1839 | if ((mod->num_syms && !crcindex) || |
1840 | (mod->num_gpl_syms && !gplcrcindex) || | 1840 | (mod->num_gpl_syms && !gplcrcindex) || |
1841 | (mod->num_gpl_future_syms && !gplfuturecrcindex) || | 1841 | (mod->num_gpl_future_syms && !gplfuturecrcindex) || |
1842 | (mod->num_unused_syms && !unusedcrcindex) || | 1842 | (mod->num_unused_syms && !unusedcrcindex) || |
1843 | (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { | 1843 | (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { |
1844 | printk(KERN_WARNING "%s: No versions for exported symbols." | 1844 | printk(KERN_WARNING "%s: No versions for exported symbols." |
1845 | " Tainting kernel.\n", mod->name); | 1845 | " Tainting kernel.\n", mod->name); |
1846 | add_taint_module(mod, TAINT_FORCED_MODULE); | 1846 | add_taint_module(mod, TAINT_FORCED_MODULE); |
1847 | } | 1847 | } |
1848 | #endif | 1848 | #endif |
1849 | 1849 | ||
1850 | /* Now do relocations. */ | 1850 | /* Now do relocations. */ |
1851 | for (i = 1; i < hdr->e_shnum; i++) { | 1851 | for (i = 1; i < hdr->e_shnum; i++) { |
1852 | const char *strtab = (char *)sechdrs[strindex].sh_addr; | 1852 | const char *strtab = (char *)sechdrs[strindex].sh_addr; |
1853 | unsigned int info = sechdrs[i].sh_info; | 1853 | unsigned int info = sechdrs[i].sh_info; |
1854 | 1854 | ||
1855 | /* Not a valid relocation section? */ | 1855 | /* Not a valid relocation section? */ |
1856 | if (info >= hdr->e_shnum) | 1856 | if (info >= hdr->e_shnum) |
1857 | continue; | 1857 | continue; |
1858 | 1858 | ||
1859 | /* Don't bother with non-allocated sections */ | 1859 | /* Don't bother with non-allocated sections */ |
1860 | if (!(sechdrs[info].sh_flags & SHF_ALLOC)) | 1860 | if (!(sechdrs[info].sh_flags & SHF_ALLOC)) |
1861 | continue; | 1861 | continue; |
1862 | 1862 | ||
1863 | if (sechdrs[i].sh_type == SHT_REL) | 1863 | if (sechdrs[i].sh_type == SHT_REL) |
1864 | err = apply_relocate(sechdrs, strtab, symindex, i,mod); | 1864 | err = apply_relocate(sechdrs, strtab, symindex, i,mod); |
1865 | else if (sechdrs[i].sh_type == SHT_RELA) | 1865 | else if (sechdrs[i].sh_type == SHT_RELA) |
1866 | err = apply_relocate_add(sechdrs, strtab, symindex, i, | 1866 | err = apply_relocate_add(sechdrs, strtab, symindex, i, |
1867 | mod); | 1867 | mod); |
1868 | if (err < 0) | 1868 | if (err < 0) |
1869 | goto cleanup; | 1869 | goto cleanup; |
1870 | } | 1870 | } |
1871 | 1871 | ||
1872 | /* Find duplicate symbols */ | 1872 | /* Find duplicate symbols */ |
1873 | err = verify_export_symbols(mod); | 1873 | err = verify_export_symbols(mod); |
1874 | 1874 | ||
1875 | if (err < 0) | 1875 | if (err < 0) |
1876 | goto cleanup; | 1876 | goto cleanup; |
1877 | 1877 | ||
1878 | /* Set up and sort exception table */ | 1878 | /* Set up and sort exception table */ |
1879 | mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); | 1879 | mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); |
1880 | mod->extable = extable = (void *)sechdrs[exindex].sh_addr; | 1880 | mod->extable = extable = (void *)sechdrs[exindex].sh_addr; |
1881 | sort_extable(extable, extable + mod->num_exentries); | 1881 | sort_extable(extable, extable + mod->num_exentries); |
1882 | 1882 | ||
1883 | /* Finally, copy percpu area over. */ | 1883 | /* Finally, copy percpu area over. */ |
1884 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, | 1884 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, |
1885 | sechdrs[pcpuindex].sh_size); | 1885 | sechdrs[pcpuindex].sh_size); |
1886 | 1886 | ||
1887 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); | 1887 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); |
1888 | 1888 | ||
1889 | err = module_finalize(hdr, sechdrs, mod); | 1889 | err = module_finalize(hdr, sechdrs, mod); |
1890 | if (err < 0) | 1890 | if (err < 0) |
1891 | goto cleanup; | 1891 | goto cleanup; |
1892 | 1892 | ||
1893 | /* flush the icache in correct context */ | 1893 | /* flush the icache in correct context */ |
1894 | old_fs = get_fs(); | 1894 | old_fs = get_fs(); |
1895 | set_fs(KERNEL_DS); | 1895 | set_fs(KERNEL_DS); |
1896 | 1896 | ||
1897 | /* | 1897 | /* |
1898 | * Flush the instruction cache, since we've played with text. | 1898 | * Flush the instruction cache, since we've played with text. |
1899 | * Do it before processing of module parameters, so the module | 1899 | * Do it before processing of module parameters, so the module |
1900 | * can provide parameter accessor functions of its own. | 1900 | * can provide parameter accessor functions of its own. |
1901 | */ | 1901 | */ |
1902 | if (mod->module_init) | 1902 | if (mod->module_init) |
1903 | flush_icache_range((unsigned long)mod->module_init, | 1903 | flush_icache_range((unsigned long)mod->module_init, |
1904 | (unsigned long)mod->module_init | 1904 | (unsigned long)mod->module_init |
1905 | + mod->init_size); | 1905 | + mod->init_size); |
1906 | flush_icache_range((unsigned long)mod->module_core, | 1906 | flush_icache_range((unsigned long)mod->module_core, |
1907 | (unsigned long)mod->module_core + mod->core_size); | 1907 | (unsigned long)mod->module_core + mod->core_size); |
1908 | 1908 | ||
1909 | set_fs(old_fs); | 1909 | set_fs(old_fs); |
1910 | 1910 | ||
1911 | mod->args = args; | 1911 | mod->args = args; |
1912 | if (obsparmindex) | 1912 | if (obsparmindex) |
1913 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", | 1913 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", |
1914 | mod->name); | 1914 | mod->name); |
1915 | 1915 | ||
1916 | /* Size of section 0 is 0, so this works well if no params */ | 1916 | /* Size of section 0 is 0, so this works well if no params */ |
1917 | err = parse_args(mod->name, mod->args, | 1917 | err = parse_args(mod->name, mod->args, |
1918 | (struct kernel_param *) | 1918 | (struct kernel_param *) |
1919 | sechdrs[setupindex].sh_addr, | 1919 | sechdrs[setupindex].sh_addr, |
1920 | sechdrs[setupindex].sh_size | 1920 | sechdrs[setupindex].sh_size |
1921 | / sizeof(struct kernel_param), | 1921 | / sizeof(struct kernel_param), |
1922 | NULL); | 1922 | NULL); |
1923 | if (err < 0) | 1923 | if (err < 0) |
1924 | goto arch_cleanup; | 1924 | goto arch_cleanup; |
1925 | 1925 | ||
1926 | err = mod_sysfs_setup(mod, | 1926 | err = mod_sysfs_setup(mod, |
1927 | (struct kernel_param *) | 1927 | (struct kernel_param *) |
1928 | sechdrs[setupindex].sh_addr, | 1928 | sechdrs[setupindex].sh_addr, |
1929 | sechdrs[setupindex].sh_size | 1929 | sechdrs[setupindex].sh_size |
1930 | / sizeof(struct kernel_param)); | 1930 | / sizeof(struct kernel_param)); |
1931 | if (err < 0) | 1931 | if (err < 0) |
1932 | goto arch_cleanup; | 1932 | goto arch_cleanup; |
1933 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 1933 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
1934 | 1934 | ||
1935 | /* Size of section 0 is 0, so this works well if no unwind info. */ | 1935 | /* Size of section 0 is 0, so this works well if no unwind info. */ |
1936 | mod->unwind_info = unwind_add_table(mod, | 1936 | mod->unwind_info = unwind_add_table(mod, |
1937 | (void *)sechdrs[unwindex].sh_addr, | 1937 | (void *)sechdrs[unwindex].sh_addr, |
1938 | sechdrs[unwindex].sh_size); | 1938 | sechdrs[unwindex].sh_size); |
1939 | 1939 | ||
1940 | /* Get rid of temporary copy */ | 1940 | /* Get rid of temporary copy */ |
1941 | vfree(hdr); | 1941 | vfree(hdr); |
1942 | 1942 | ||
1943 | /* Done! */ | 1943 | /* Done! */ |
1944 | return mod; | 1944 | return mod; |
1945 | 1945 | ||
1946 | arch_cleanup: | 1946 | arch_cleanup: |
1947 | module_arch_cleanup(mod); | 1947 | module_arch_cleanup(mod); |
1948 | cleanup: | 1948 | cleanup: |
1949 | module_unload_free(mod); | 1949 | module_unload_free(mod); |
1950 | module_free(mod, mod->module_init); | 1950 | module_free(mod, mod->module_init); |
1951 | free_core: | 1951 | free_core: |
1952 | module_free(mod, mod->module_core); | 1952 | module_free(mod, mod->module_core); |
1953 | free_percpu: | 1953 | free_percpu: |
1954 | if (percpu) | 1954 | if (percpu) |
1955 | percpu_modfree(percpu); | 1955 | percpu_modfree(percpu); |
1956 | free_mod: | 1956 | free_mod: |
1957 | kfree(args); | 1957 | kfree(args); |
1958 | free_hdr: | 1958 | free_hdr: |
1959 | vfree(hdr); | 1959 | vfree(hdr); |
1960 | return ERR_PTR(err); | 1960 | return ERR_PTR(err); |
1961 | 1961 | ||
1962 | truncated: | 1962 | truncated: |
1963 | printk(KERN_ERR "Module len %lu truncated\n", len); | 1963 | printk(KERN_ERR "Module len %lu truncated\n", len); |
1964 | err = -ENOEXEC; | 1964 | err = -ENOEXEC; |
1965 | goto free_hdr; | 1965 | goto free_hdr; |
1966 | } | 1966 | } |
1967 | 1967 | ||
1968 | /* | 1968 | /* |
1969 | * link the module with the whole machine is stopped with interrupts off | 1969 | * link the module with the whole machine is stopped with interrupts off |
1970 | * - this defends against kallsyms not taking locks | 1970 | * - this defends against kallsyms not taking locks |
1971 | */ | 1971 | */ |
1972 | static int __link_module(void *_mod) | 1972 | static int __link_module(void *_mod) |
1973 | { | 1973 | { |
1974 | struct module *mod = _mod; | 1974 | struct module *mod = _mod; |
1975 | list_add(&mod->list, &modules); | 1975 | list_add(&mod->list, &modules); |
1976 | return 0; | 1976 | return 0; |
1977 | } | 1977 | } |
1978 | 1978 | ||
1979 | /* This is where the real work happens */ | 1979 | /* This is where the real work happens */ |
1980 | asmlinkage long | 1980 | asmlinkage long |
1981 | sys_init_module(void __user *umod, | 1981 | sys_init_module(void __user *umod, |
1982 | unsigned long len, | 1982 | unsigned long len, |
1983 | const char __user *uargs) | 1983 | const char __user *uargs) |
1984 | { | 1984 | { |
1985 | struct module *mod; | 1985 | struct module *mod; |
1986 | int ret = 0; | 1986 | int ret = 0; |
1987 | 1987 | ||
1988 | /* Must have permission */ | 1988 | /* Must have permission */ |
1989 | if (!capable(CAP_SYS_MODULE)) | 1989 | if (!capable(CAP_SYS_MODULE)) |
1990 | return -EPERM; | 1990 | return -EPERM; |
1991 | 1991 | ||
1992 | /* Only one module load at a time, please */ | 1992 | /* Only one module load at a time, please */ |
1993 | if (mutex_lock_interruptible(&module_mutex) != 0) | 1993 | if (mutex_lock_interruptible(&module_mutex) != 0) |
1994 | return -EINTR; | 1994 | return -EINTR; |
1995 | 1995 | ||
1996 | /* Do all the hard work */ | 1996 | /* Do all the hard work */ |
1997 | mod = load_module(umod, len, uargs); | 1997 | mod = load_module(umod, len, uargs); |
1998 | if (IS_ERR(mod)) { | 1998 | if (IS_ERR(mod)) { |
1999 | mutex_unlock(&module_mutex); | 1999 | mutex_unlock(&module_mutex); |
2000 | return PTR_ERR(mod); | 2000 | return PTR_ERR(mod); |
2001 | } | 2001 | } |
2002 | 2002 | ||
2003 | /* Now sew it into the lists. They won't access us, since | 2003 | /* Now sew it into the lists. They won't access us, since |
2004 | strong_try_module_get() will fail. */ | 2004 | strong_try_module_get() will fail. */ |
2005 | stop_machine_run(__link_module, mod, NR_CPUS); | 2005 | stop_machine_run(__link_module, mod, NR_CPUS); |
2006 | 2006 | ||
2007 | /* Drop lock so they can recurse */ | 2007 | /* Drop lock so they can recurse */ |
2008 | mutex_unlock(&module_mutex); | 2008 | mutex_unlock(&module_mutex); |
2009 | 2009 | ||
2010 | blocking_notifier_call_chain(&module_notify_list, | 2010 | blocking_notifier_call_chain(&module_notify_list, |
2011 | MODULE_STATE_COMING, mod); | 2011 | MODULE_STATE_COMING, mod); |
2012 | 2012 | ||
2013 | /* Start the module */ | 2013 | /* Start the module */ |
2014 | if (mod->init != NULL) | 2014 | if (mod->init != NULL) |
2015 | ret = mod->init(); | 2015 | ret = mod->init(); |
2016 | if (ret < 0) { | 2016 | if (ret < 0) { |
2017 | /* Init routine failed: abort. Try to protect us from | 2017 | /* Init routine failed: abort. Try to protect us from |
2018 | buggy refcounters. */ | 2018 | buggy refcounters. */ |
2019 | mod->state = MODULE_STATE_GOING; | 2019 | mod->state = MODULE_STATE_GOING; |
2020 | synchronize_sched(); | 2020 | synchronize_sched(); |
2021 | if (mod->unsafe) | 2021 | if (mod->unsafe) |
2022 | printk(KERN_ERR "%s: module is now stuck!\n", | 2022 | printk(KERN_ERR "%s: module is now stuck!\n", |
2023 | mod->name); | 2023 | mod->name); |
2024 | else { | 2024 | else { |
2025 | module_put(mod); | 2025 | module_put(mod); |
2026 | mutex_lock(&module_mutex); | 2026 | mutex_lock(&module_mutex); |
2027 | free_module(mod); | 2027 | free_module(mod); |
2028 | mutex_unlock(&module_mutex); | 2028 | mutex_unlock(&module_mutex); |
2029 | } | 2029 | } |
2030 | return ret; | 2030 | return ret; |
2031 | } | 2031 | } |
2032 | 2032 | ||
2033 | /* Now it's a first class citizen! */ | 2033 | /* Now it's a first class citizen! */ |
2034 | mutex_lock(&module_mutex); | 2034 | mutex_lock(&module_mutex); |
2035 | mod->state = MODULE_STATE_LIVE; | 2035 | mod->state = MODULE_STATE_LIVE; |
2036 | /* Drop initial reference. */ | 2036 | /* Drop initial reference. */ |
2037 | module_put(mod); | 2037 | module_put(mod); |
2038 | unwind_remove_table(mod->unwind_info, 1); | 2038 | unwind_remove_table(mod->unwind_info, 1); |
2039 | module_free(mod, mod->module_init); | 2039 | module_free(mod, mod->module_init); |
2040 | mod->module_init = NULL; | 2040 | mod->module_init = NULL; |
2041 | mod->init_size = 0; | 2041 | mod->init_size = 0; |
2042 | mod->init_text_size = 0; | 2042 | mod->init_text_size = 0; |
2043 | mutex_unlock(&module_mutex); | 2043 | mutex_unlock(&module_mutex); |
2044 | 2044 | ||
2045 | return 0; | 2045 | return 0; |
2046 | } | 2046 | } |
2047 | 2047 | ||
2048 | static inline int within(unsigned long addr, void *start, unsigned long size) | 2048 | static inline int within(unsigned long addr, void *start, unsigned long size) |
2049 | { | 2049 | { |
2050 | return ((void *)addr >= start && (void *)addr < start + size); | 2050 | return ((void *)addr >= start && (void *)addr < start + size); |
2051 | } | 2051 | } |
2052 | 2052 | ||
2053 | #ifdef CONFIG_KALLSYMS | 2053 | #ifdef CONFIG_KALLSYMS |
2054 | /* | 2054 | /* |
2055 | * This ignores the intensely annoying "mapping symbols" found | 2055 | * This ignores the intensely annoying "mapping symbols" found |
2056 | * in ARM ELF files: $a, $t and $d. | 2056 | * in ARM ELF files: $a, $t and $d. |
2057 | */ | 2057 | */ |
2058 | static inline int is_arm_mapping_symbol(const char *str) | 2058 | static inline int is_arm_mapping_symbol(const char *str) |
2059 | { | 2059 | { |
2060 | return str[0] == '$' && strchr("atd", str[1]) | 2060 | return str[0] == '$' && strchr("atd", str[1]) |
2061 | && (str[2] == '\0' || str[2] == '.'); | 2061 | && (str[2] == '\0' || str[2] == '.'); |
2062 | } | 2062 | } |
2063 | 2063 | ||
2064 | static const char *get_ksymbol(struct module *mod, | 2064 | static const char *get_ksymbol(struct module *mod, |
2065 | unsigned long addr, | 2065 | unsigned long addr, |
2066 | unsigned long *size, | 2066 | unsigned long *size, |
2067 | unsigned long *offset) | 2067 | unsigned long *offset) |
2068 | { | 2068 | { |
2069 | unsigned int i, best = 0; | 2069 | unsigned int i, best = 0; |
2070 | unsigned long nextval; | 2070 | unsigned long nextval; |
2071 | 2071 | ||
2072 | /* At worse, next value is at end of module */ | 2072 | /* At worse, next value is at end of module */ |
2073 | if (within(addr, mod->module_init, mod->init_size)) | 2073 | if (within(addr, mod->module_init, mod->init_size)) |
2074 | nextval = (unsigned long)mod->module_init+mod->init_text_size; | 2074 | nextval = (unsigned long)mod->module_init+mod->init_text_size; |
2075 | else | 2075 | else |
2076 | nextval = (unsigned long)mod->module_core+mod->core_text_size; | 2076 | nextval = (unsigned long)mod->module_core+mod->core_text_size; |
2077 | 2077 | ||
2078 | /* Scan for closest preceeding symbol, and next symbol. (ELF | 2078 | /* Scan for closest preceeding symbol, and next symbol. (ELF |
2079 | starts real symbols at 1). */ | 2079 | starts real symbols at 1). */ |
2080 | for (i = 1; i < mod->num_symtab; i++) { | 2080 | for (i = 1; i < mod->num_symtab; i++) { |
2081 | if (mod->symtab[i].st_shndx == SHN_UNDEF) | 2081 | if (mod->symtab[i].st_shndx == SHN_UNDEF) |
2082 | continue; | 2082 | continue; |
2083 | 2083 | ||
2084 | /* We ignore unnamed symbols: they're uninformative | 2084 | /* We ignore unnamed symbols: they're uninformative |
2085 | * and inserted at a whim. */ | 2085 | * and inserted at a whim. */ |
2086 | if (mod->symtab[i].st_value <= addr | 2086 | if (mod->symtab[i].st_value <= addr |
2087 | && mod->symtab[i].st_value > mod->symtab[best].st_value | 2087 | && mod->symtab[i].st_value > mod->symtab[best].st_value |
2088 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' | 2088 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' |
2089 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) | 2089 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) |
2090 | best = i; | 2090 | best = i; |
2091 | if (mod->symtab[i].st_value > addr | 2091 | if (mod->symtab[i].st_value > addr |
2092 | && mod->symtab[i].st_value < nextval | 2092 | && mod->symtab[i].st_value < nextval |
2093 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' | 2093 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' |
2094 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) | 2094 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) |
2095 | nextval = mod->symtab[i].st_value; | 2095 | nextval = mod->symtab[i].st_value; |
2096 | } | 2096 | } |
2097 | 2097 | ||
2098 | if (!best) | 2098 | if (!best) |
2099 | return NULL; | 2099 | return NULL; |
2100 | 2100 | ||
2101 | if (size) | 2101 | if (size) |
2102 | *size = nextval - mod->symtab[best].st_value; | 2102 | *size = nextval - mod->symtab[best].st_value; |
2103 | if (offset) | 2103 | if (offset) |
2104 | *offset = addr - mod->symtab[best].st_value; | 2104 | *offset = addr - mod->symtab[best].st_value; |
2105 | return mod->strtab + mod->symtab[best].st_name; | 2105 | return mod->strtab + mod->symtab[best].st_name; |
2106 | } | 2106 | } |
2107 | 2107 | ||
2108 | /* For kallsyms to ask for address resolution. NULL means not found. | 2108 | /* For kallsyms to ask for address resolution. NULL means not found. |
2109 | We don't lock, as this is used for oops resolution and races are a | 2109 | We don't lock, as this is used for oops resolution and races are a |
2110 | lesser concern. */ | 2110 | lesser concern. */ |
2111 | const char *module_address_lookup(unsigned long addr, | 2111 | const char *module_address_lookup(unsigned long addr, |
2112 | unsigned long *size, | 2112 | unsigned long *size, |
2113 | unsigned long *offset, | 2113 | unsigned long *offset, |
2114 | char **modname) | 2114 | char **modname) |
2115 | { | 2115 | { |
2116 | struct module *mod; | 2116 | struct module *mod; |
2117 | 2117 | ||
2118 | list_for_each_entry(mod, &modules, list) { | 2118 | list_for_each_entry(mod, &modules, list) { |
2119 | if (within(addr, mod->module_init, mod->init_size) | 2119 | if (within(addr, mod->module_init, mod->init_size) |
2120 | || within(addr, mod->module_core, mod->core_size)) { | 2120 | || within(addr, mod->module_core, mod->core_size)) { |
2121 | if (modname) | 2121 | if (modname) |
2122 | *modname = mod->name; | 2122 | *modname = mod->name; |
2123 | return get_ksymbol(mod, addr, size, offset); | 2123 | return get_ksymbol(mod, addr, size, offset); |
2124 | } | 2124 | } |
2125 | } | 2125 | } |
2126 | return NULL; | 2126 | return NULL; |
2127 | } | 2127 | } |
2128 | 2128 | ||
2129 | int lookup_module_symbol_name(unsigned long addr, char *symname) | 2129 | int lookup_module_symbol_name(unsigned long addr, char *symname) |
2130 | { | 2130 | { |
2131 | struct module *mod; | 2131 | struct module *mod; |
2132 | 2132 | ||
2133 | mutex_lock(&module_mutex); | 2133 | mutex_lock(&module_mutex); |
2134 | list_for_each_entry(mod, &modules, list) { | 2134 | list_for_each_entry(mod, &modules, list) { |
2135 | if (within(addr, mod->module_init, mod->init_size) || | 2135 | if (within(addr, mod->module_init, mod->init_size) || |
2136 | within(addr, mod->module_core, mod->core_size)) { | 2136 | within(addr, mod->module_core, mod->core_size)) { |
2137 | const char *sym; | 2137 | const char *sym; |
2138 | 2138 | ||
2139 | sym = get_ksymbol(mod, addr, NULL, NULL); | 2139 | sym = get_ksymbol(mod, addr, NULL, NULL); |
2140 | if (!sym) | 2140 | if (!sym) |
2141 | goto out; | 2141 | goto out; |
2142 | strlcpy(symname, sym, KSYM_NAME_LEN + 1); | 2142 | strlcpy(symname, sym, KSYM_NAME_LEN + 1); |
2143 | mutex_unlock(&module_mutex); | 2143 | mutex_unlock(&module_mutex); |
2144 | return 0; | 2144 | return 0; |
2145 | } | 2145 | } |
2146 | } | 2146 | } |
2147 | out: | 2147 | out: |
2148 | mutex_unlock(&module_mutex); | 2148 | mutex_unlock(&module_mutex); |
2149 | return -ERANGE; | 2149 | return -ERANGE; |
2150 | } | 2150 | } |
2151 | 2151 | ||
2152 | int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | ||
2153 | unsigned long *offset, char *modname, char *name) | ||
2154 | { | ||
2155 | struct module *mod; | ||
2156 | |||
2157 | mutex_lock(&module_mutex); | ||
2158 | list_for_each_entry(mod, &modules, list) { | ||
2159 | if (within(addr, mod->module_init, mod->init_size) || | ||
2160 | within(addr, mod->module_core, mod->core_size)) { | ||
2161 | const char *sym; | ||
2162 | |||
2163 | sym = get_ksymbol(mod, addr, size, offset); | ||
2164 | if (!sym) | ||
2165 | goto out; | ||
2166 | if (modname) | ||
2167 | strlcpy(modname, mod->name, MODULE_NAME_LEN + 1); | ||
2168 | if (name) | ||
2169 | strlcpy(name, sym, KSYM_NAME_LEN + 1); | ||
2170 | mutex_unlock(&module_mutex); | ||
2171 | return 0; | ||
2172 | } | ||
2173 | } | ||
2174 | out: | ||
2175 | mutex_unlock(&module_mutex); | ||
2176 | return -ERANGE; | ||
2177 | } | ||
2178 | |||
2152 | int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | 2179 | int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, |
2153 | char *name, char *module_name, int *exported) | 2180 | char *name, char *module_name, int *exported) |
2154 | { | 2181 | { |
2155 | struct module *mod; | 2182 | struct module *mod; |
2156 | 2183 | ||
2157 | mutex_lock(&module_mutex); | 2184 | mutex_lock(&module_mutex); |
2158 | list_for_each_entry(mod, &modules, list) { | 2185 | list_for_each_entry(mod, &modules, list) { |
2159 | if (symnum < mod->num_symtab) { | 2186 | if (symnum < mod->num_symtab) { |
2160 | *value = mod->symtab[symnum].st_value; | 2187 | *value = mod->symtab[symnum].st_value; |
2161 | *type = mod->symtab[symnum].st_info; | 2188 | *type = mod->symtab[symnum].st_info; |
2162 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, | 2189 | strlcpy(name, mod->strtab + mod->symtab[symnum].st_name, |
2163 | KSYM_NAME_LEN + 1); | 2190 | KSYM_NAME_LEN + 1); |
2164 | strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); | 2191 | strlcpy(module_name, mod->name, MODULE_NAME_LEN + 1); |
2165 | *exported = is_exported(name, mod); | 2192 | *exported = is_exported(name, mod); |
2166 | mutex_unlock(&module_mutex); | 2193 | mutex_unlock(&module_mutex); |
2167 | return 0; | 2194 | return 0; |
2168 | } | 2195 | } |
2169 | symnum -= mod->num_symtab; | 2196 | symnum -= mod->num_symtab; |
2170 | } | 2197 | } |
2171 | mutex_unlock(&module_mutex); | 2198 | mutex_unlock(&module_mutex); |
2172 | return -ERANGE; | 2199 | return -ERANGE; |
2173 | } | 2200 | } |
2174 | 2201 | ||
2175 | static unsigned long mod_find_symname(struct module *mod, const char *name) | 2202 | static unsigned long mod_find_symname(struct module *mod, const char *name) |
2176 | { | 2203 | { |
2177 | unsigned int i; | 2204 | unsigned int i; |
2178 | 2205 | ||
2179 | for (i = 0; i < mod->num_symtab; i++) | 2206 | for (i = 0; i < mod->num_symtab; i++) |
2180 | if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 && | 2207 | if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 && |
2181 | mod->symtab[i].st_info != 'U') | 2208 | mod->symtab[i].st_info != 'U') |
2182 | return mod->symtab[i].st_value; | 2209 | return mod->symtab[i].st_value; |
2183 | return 0; | 2210 | return 0; |
2184 | } | 2211 | } |
2185 | 2212 | ||
2186 | /* Look for this name: can be of form module:name. */ | 2213 | /* Look for this name: can be of form module:name. */ |
2187 | unsigned long module_kallsyms_lookup_name(const char *name) | 2214 | unsigned long module_kallsyms_lookup_name(const char *name) |
2188 | { | 2215 | { |
2189 | struct module *mod; | 2216 | struct module *mod; |
2190 | char *colon; | 2217 | char *colon; |
2191 | unsigned long ret = 0; | 2218 | unsigned long ret = 0; |
2192 | 2219 | ||
2193 | /* Don't lock: we're in enough trouble already. */ | 2220 | /* Don't lock: we're in enough trouble already. */ |
2194 | if ((colon = strchr(name, ':')) != NULL) { | 2221 | if ((colon = strchr(name, ':')) != NULL) { |
2195 | *colon = '\0'; | 2222 | *colon = '\0'; |
2196 | if ((mod = find_module(name)) != NULL) | 2223 | if ((mod = find_module(name)) != NULL) |
2197 | ret = mod_find_symname(mod, colon+1); | 2224 | ret = mod_find_symname(mod, colon+1); |
2198 | *colon = ':'; | 2225 | *colon = ':'; |
2199 | } else { | 2226 | } else { |
2200 | list_for_each_entry(mod, &modules, list) | 2227 | list_for_each_entry(mod, &modules, list) |
2201 | if ((ret = mod_find_symname(mod, name)) != 0) | 2228 | if ((ret = mod_find_symname(mod, name)) != 0) |
2202 | break; | 2229 | break; |
2203 | } | 2230 | } |
2204 | return ret; | 2231 | return ret; |
2205 | } | 2232 | } |
2206 | #endif /* CONFIG_KALLSYMS */ | 2233 | #endif /* CONFIG_KALLSYMS */ |
2207 | 2234 | ||
2208 | /* Called by the /proc file system to return a list of modules. */ | 2235 | /* Called by the /proc file system to return a list of modules. */ |
2209 | static void *m_start(struct seq_file *m, loff_t *pos) | 2236 | static void *m_start(struct seq_file *m, loff_t *pos) |
2210 | { | 2237 | { |
2211 | struct list_head *i; | 2238 | struct list_head *i; |
2212 | loff_t n = 0; | 2239 | loff_t n = 0; |
2213 | 2240 | ||
2214 | mutex_lock(&module_mutex); | 2241 | mutex_lock(&module_mutex); |
2215 | list_for_each(i, &modules) { | 2242 | list_for_each(i, &modules) { |
2216 | if (n++ == *pos) | 2243 | if (n++ == *pos) |
2217 | break; | 2244 | break; |
2218 | } | 2245 | } |
2219 | if (i == &modules) | 2246 | if (i == &modules) |
2220 | return NULL; | 2247 | return NULL; |
2221 | return i; | 2248 | return i; |
2222 | } | 2249 | } |
2223 | 2250 | ||
2224 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | 2251 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) |
2225 | { | 2252 | { |
2226 | struct list_head *i = p; | 2253 | struct list_head *i = p; |
2227 | (*pos)++; | 2254 | (*pos)++; |
2228 | if (i->next == &modules) | 2255 | if (i->next == &modules) |
2229 | return NULL; | 2256 | return NULL; |
2230 | return i->next; | 2257 | return i->next; |
2231 | } | 2258 | } |
2232 | 2259 | ||
2233 | static void m_stop(struct seq_file *m, void *p) | 2260 | static void m_stop(struct seq_file *m, void *p) |
2234 | { | 2261 | { |
2235 | mutex_unlock(&module_mutex); | 2262 | mutex_unlock(&module_mutex); |
2236 | } | 2263 | } |
2237 | 2264 | ||
2238 | static char *taint_flags(unsigned int taints, char *buf) | 2265 | static char *taint_flags(unsigned int taints, char *buf) |
2239 | { | 2266 | { |
2240 | int bx = 0; | 2267 | int bx = 0; |
2241 | 2268 | ||
2242 | if (taints) { | 2269 | if (taints) { |
2243 | buf[bx++] = '('; | 2270 | buf[bx++] = '('; |
2244 | if (taints & TAINT_PROPRIETARY_MODULE) | 2271 | if (taints & TAINT_PROPRIETARY_MODULE) |
2245 | buf[bx++] = 'P'; | 2272 | buf[bx++] = 'P'; |
2246 | if (taints & TAINT_FORCED_MODULE) | 2273 | if (taints & TAINT_FORCED_MODULE) |
2247 | buf[bx++] = 'F'; | 2274 | buf[bx++] = 'F'; |
2248 | /* | 2275 | /* |
2249 | * TAINT_FORCED_RMMOD: could be added. | 2276 | * TAINT_FORCED_RMMOD: could be added. |
2250 | * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't | 2277 | * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't |
2251 | * apply to modules. | 2278 | * apply to modules. |
2252 | */ | 2279 | */ |
2253 | buf[bx++] = ')'; | 2280 | buf[bx++] = ')'; |
2254 | } | 2281 | } |
2255 | buf[bx] = '\0'; | 2282 | buf[bx] = '\0'; |
2256 | 2283 | ||
2257 | return buf; | 2284 | return buf; |
2258 | } | 2285 | } |
2259 | 2286 | ||
2260 | static int m_show(struct seq_file *m, void *p) | 2287 | static int m_show(struct seq_file *m, void *p) |
2261 | { | 2288 | { |
2262 | struct module *mod = list_entry(p, struct module, list); | 2289 | struct module *mod = list_entry(p, struct module, list); |
2263 | char buf[8]; | 2290 | char buf[8]; |
2264 | 2291 | ||
2265 | seq_printf(m, "%s %lu", | 2292 | seq_printf(m, "%s %lu", |
2266 | mod->name, mod->init_size + mod->core_size); | 2293 | mod->name, mod->init_size + mod->core_size); |
2267 | print_unload_info(m, mod); | 2294 | print_unload_info(m, mod); |
2268 | 2295 | ||
2269 | /* Informative for users. */ | 2296 | /* Informative for users. */ |
2270 | seq_printf(m, " %s", | 2297 | seq_printf(m, " %s", |
2271 | mod->state == MODULE_STATE_GOING ? "Unloading": | 2298 | mod->state == MODULE_STATE_GOING ? "Unloading": |
2272 | mod->state == MODULE_STATE_COMING ? "Loading": | 2299 | mod->state == MODULE_STATE_COMING ? "Loading": |
2273 | "Live"); | 2300 | "Live"); |
2274 | /* Used by oprofile and other similar tools. */ | 2301 | /* Used by oprofile and other similar tools. */ |
2275 | seq_printf(m, " 0x%p", mod->module_core); | 2302 | seq_printf(m, " 0x%p", mod->module_core); |
2276 | 2303 | ||
2277 | /* Taints info */ | 2304 | /* Taints info */ |
2278 | if (mod->taints) | 2305 | if (mod->taints) |
2279 | seq_printf(m, " %s", taint_flags(mod->taints, buf)); | 2306 | seq_printf(m, " %s", taint_flags(mod->taints, buf)); |
2280 | 2307 | ||
2281 | seq_printf(m, "\n"); | 2308 | seq_printf(m, "\n"); |
2282 | return 0; | 2309 | return 0; |
2283 | } | 2310 | } |
2284 | 2311 | ||
2285 | /* Format: modulename size refcount deps address | 2312 | /* Format: modulename size refcount deps address |
2286 | 2313 | ||
2287 | Where refcount is a number or -, and deps is a comma-separated list | 2314 | Where refcount is a number or -, and deps is a comma-separated list |
2288 | of depends or -. | 2315 | of depends or -. |
2289 | */ | 2316 | */ |
2290 | const struct seq_operations modules_op = { | 2317 | const struct seq_operations modules_op = { |
2291 | .start = m_start, | 2318 | .start = m_start, |
2292 | .next = m_next, | 2319 | .next = m_next, |
2293 | .stop = m_stop, | 2320 | .stop = m_stop, |
2294 | .show = m_show | 2321 | .show = m_show |
2295 | }; | 2322 | }; |
2296 | 2323 | ||
2297 | /* Given an address, look for it in the module exception tables. */ | 2324 | /* Given an address, look for it in the module exception tables. */ |
2298 | const struct exception_table_entry *search_module_extables(unsigned long addr) | 2325 | const struct exception_table_entry *search_module_extables(unsigned long addr) |
2299 | { | 2326 | { |
2300 | unsigned long flags; | 2327 | unsigned long flags; |
2301 | const struct exception_table_entry *e = NULL; | 2328 | const struct exception_table_entry *e = NULL; |
2302 | struct module *mod; | 2329 | struct module *mod; |
2303 | 2330 | ||
2304 | spin_lock_irqsave(&modlist_lock, flags); | 2331 | spin_lock_irqsave(&modlist_lock, flags); |
2305 | list_for_each_entry(mod, &modules, list) { | 2332 | list_for_each_entry(mod, &modules, list) { |
2306 | if (mod->num_exentries == 0) | 2333 | if (mod->num_exentries == 0) |
2307 | continue; | 2334 | continue; |
2308 | 2335 | ||
2309 | e = search_extable(mod->extable, | 2336 | e = search_extable(mod->extable, |
2310 | mod->extable + mod->num_exentries - 1, | 2337 | mod->extable + mod->num_exentries - 1, |
2311 | addr); | 2338 | addr); |
2312 | if (e) | 2339 | if (e) |
2313 | break; | 2340 | break; |
2314 | } | 2341 | } |
2315 | spin_unlock_irqrestore(&modlist_lock, flags); | 2342 | spin_unlock_irqrestore(&modlist_lock, flags); |
2316 | 2343 | ||
2317 | /* Now, if we found one, we are running inside it now, hence | 2344 | /* Now, if we found one, we are running inside it now, hence |
2318 | we cannot unload the module, hence no refcnt needed. */ | 2345 | we cannot unload the module, hence no refcnt needed. */ |
2319 | return e; | 2346 | return e; |
2320 | } | 2347 | } |
2321 | 2348 | ||
2322 | /* | 2349 | /* |
2323 | * Is this a valid module address? | 2350 | * Is this a valid module address? |
2324 | */ | 2351 | */ |
2325 | int is_module_address(unsigned long addr) | 2352 | int is_module_address(unsigned long addr) |
2326 | { | 2353 | { |
2327 | unsigned long flags; | 2354 | unsigned long flags; |
2328 | struct module *mod; | 2355 | struct module *mod; |
2329 | 2356 | ||
2330 | spin_lock_irqsave(&modlist_lock, flags); | 2357 | spin_lock_irqsave(&modlist_lock, flags); |
2331 | 2358 | ||
2332 | list_for_each_entry(mod, &modules, list) { | 2359 | list_for_each_entry(mod, &modules, list) { |
2333 | if (within(addr, mod->module_core, mod->core_size)) { | 2360 | if (within(addr, mod->module_core, mod->core_size)) { |
2334 | spin_unlock_irqrestore(&modlist_lock, flags); | 2361 | spin_unlock_irqrestore(&modlist_lock, flags); |
2335 | return 1; | 2362 | return 1; |
2336 | } | 2363 | } |
2337 | } | 2364 | } |
2338 | 2365 | ||
2339 | spin_unlock_irqrestore(&modlist_lock, flags); | 2366 | spin_unlock_irqrestore(&modlist_lock, flags); |
2340 | 2367 | ||
2341 | return 0; | 2368 | return 0; |
2342 | } | 2369 | } |
2343 | 2370 | ||
2344 | 2371 | ||
2345 | /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ | 2372 | /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ |
2346 | struct module *__module_text_address(unsigned long addr) | 2373 | struct module *__module_text_address(unsigned long addr) |
2347 | { | 2374 | { |
2348 | struct module *mod; | 2375 | struct module *mod; |
2349 | 2376 | ||
2350 | list_for_each_entry(mod, &modules, list) | 2377 | list_for_each_entry(mod, &modules, list) |
2351 | if (within(addr, mod->module_init, mod->init_text_size) | 2378 | if (within(addr, mod->module_init, mod->init_text_size) |
2352 | || within(addr, mod->module_core, mod->core_text_size)) | 2379 | || within(addr, mod->module_core, mod->core_text_size)) |
2353 | return mod; | 2380 | return mod; |
2354 | return NULL; | 2381 | return NULL; |
2355 | } | 2382 | } |
2356 | 2383 | ||
2357 | struct module *module_text_address(unsigned long addr) | 2384 | struct module *module_text_address(unsigned long addr) |
2358 | { | 2385 | { |
2359 | struct module *mod; | 2386 | struct module *mod; |
2360 | unsigned long flags; | 2387 | unsigned long flags; |
2361 | 2388 | ||
2362 | spin_lock_irqsave(&modlist_lock, flags); | 2389 | spin_lock_irqsave(&modlist_lock, flags); |
2363 | mod = __module_text_address(addr); | 2390 | mod = __module_text_address(addr); |
2364 | spin_unlock_irqrestore(&modlist_lock, flags); | 2391 | spin_unlock_irqrestore(&modlist_lock, flags); |
2365 | 2392 | ||
2366 | return mod; | 2393 | return mod; |
2367 | } | 2394 | } |
2368 | 2395 | ||
2369 | /* Don't grab lock, we're oopsing. */ | 2396 | /* Don't grab lock, we're oopsing. */ |
2370 | void print_modules(void) | 2397 | void print_modules(void) |
2371 | { | 2398 | { |
2372 | struct module *mod; | 2399 | struct module *mod; |
2373 | char buf[8]; | 2400 | char buf[8]; |
2374 | 2401 | ||
2375 | printk("Modules linked in:"); | 2402 | printk("Modules linked in:"); |
2376 | list_for_each_entry(mod, &modules, list) | 2403 | list_for_each_entry(mod, &modules, list) |
2377 | printk(" %s%s", mod->name, taint_flags(mod->taints, buf)); | 2404 | printk(" %s%s", mod->name, taint_flags(mod->taints, buf)); |
2378 | printk("\n"); | 2405 | printk("\n"); |
2379 | } | 2406 | } |
2380 | 2407 | ||
2381 | #ifdef CONFIG_SYSFS | 2408 | #ifdef CONFIG_SYSFS |
2382 | static char *make_driver_name(struct device_driver *drv) | 2409 | static char *make_driver_name(struct device_driver *drv) |
2383 | { | 2410 | { |
2384 | char *driver_name; | 2411 | char *driver_name; |
2385 | 2412 | ||
2386 | driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2, | 2413 | driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2, |
2387 | GFP_KERNEL); | 2414 | GFP_KERNEL); |
2388 | if (!driver_name) | 2415 | if (!driver_name) |
2389 | return NULL; | 2416 | return NULL; |
2390 | 2417 | ||
2391 | sprintf(driver_name, "%s:%s", drv->bus->name, drv->name); | 2418 | sprintf(driver_name, "%s:%s", drv->bus->name, drv->name); |
2392 | return driver_name; | 2419 | return driver_name; |
2393 | } | 2420 | } |
2394 | 2421 | ||
2395 | static void module_create_drivers_dir(struct module_kobject *mk) | 2422 | static void module_create_drivers_dir(struct module_kobject *mk) |
2396 | { | 2423 | { |
2397 | if (!mk || mk->drivers_dir) | 2424 | if (!mk || mk->drivers_dir) |
2398 | return; | 2425 | return; |
2399 | 2426 | ||
2400 | mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers"); | 2427 | mk->drivers_dir = kobject_add_dir(&mk->kobj, "drivers"); |
2401 | } | 2428 | } |
2402 | 2429 | ||
2403 | void module_add_driver(struct module *mod, struct device_driver *drv) | 2430 | void module_add_driver(struct module *mod, struct device_driver *drv) |
2404 | { | 2431 | { |
2405 | char *driver_name; | 2432 | char *driver_name; |
2406 | int no_warn; | 2433 | int no_warn; |
2407 | struct module_kobject *mk = NULL; | 2434 | struct module_kobject *mk = NULL; |
2408 | 2435 | ||
2409 | if (!drv) | 2436 | if (!drv) |
2410 | return; | 2437 | return; |
2411 | 2438 | ||
2412 | if (mod) | 2439 | if (mod) |
2413 | mk = &mod->mkobj; | 2440 | mk = &mod->mkobj; |
2414 | else if (drv->mod_name) { | 2441 | else if (drv->mod_name) { |
2415 | struct kobject *mkobj; | 2442 | struct kobject *mkobj; |
2416 | 2443 | ||
2417 | /* Lookup built-in module entry in /sys/modules */ | 2444 | /* Lookup built-in module entry in /sys/modules */ |
2418 | mkobj = kset_find_obj(&module_subsys, drv->mod_name); | 2445 | mkobj = kset_find_obj(&module_subsys, drv->mod_name); |
2419 | if (mkobj) { | 2446 | if (mkobj) { |
2420 | mk = container_of(mkobj, struct module_kobject, kobj); | 2447 | mk = container_of(mkobj, struct module_kobject, kobj); |
2421 | /* remember our module structure */ | 2448 | /* remember our module structure */ |
2422 | drv->mkobj = mk; | 2449 | drv->mkobj = mk; |
2423 | /* kset_find_obj took a reference */ | 2450 | /* kset_find_obj took a reference */ |
2424 | kobject_put(mkobj); | 2451 | kobject_put(mkobj); |
2425 | } | 2452 | } |
2426 | } | 2453 | } |
2427 | 2454 | ||
2428 | if (!mk) | 2455 | if (!mk) |
2429 | return; | 2456 | return; |
2430 | 2457 | ||
2431 | /* Don't check return codes; these calls are idempotent */ | 2458 | /* Don't check return codes; these calls are idempotent */ |
2432 | no_warn = sysfs_create_link(&drv->kobj, &mk->kobj, "module"); | 2459 | no_warn = sysfs_create_link(&drv->kobj, &mk->kobj, "module"); |
2433 | driver_name = make_driver_name(drv); | 2460 | driver_name = make_driver_name(drv); |
2434 | if (driver_name) { | 2461 | if (driver_name) { |
2435 | module_create_drivers_dir(mk); | 2462 | module_create_drivers_dir(mk); |
2436 | no_warn = sysfs_create_link(mk->drivers_dir, &drv->kobj, | 2463 | no_warn = sysfs_create_link(mk->drivers_dir, &drv->kobj, |
2437 | driver_name); | 2464 | driver_name); |
2438 | kfree(driver_name); | 2465 | kfree(driver_name); |
2439 | } | 2466 | } |
2440 | } | 2467 | } |
2441 | EXPORT_SYMBOL(module_add_driver); | 2468 | EXPORT_SYMBOL(module_add_driver); |
2442 | 2469 | ||
2443 | void module_remove_driver(struct device_driver *drv) | 2470 | void module_remove_driver(struct device_driver *drv) |
2444 | { | 2471 | { |
2445 | struct module_kobject *mk = NULL; | 2472 | struct module_kobject *mk = NULL; |
2446 | char *driver_name; | 2473 | char *driver_name; |
2447 | 2474 | ||
2448 | if (!drv) | 2475 | if (!drv) |
2449 | return; | 2476 | return; |
2450 | 2477 | ||
2451 | sysfs_remove_link(&drv->kobj, "module"); | 2478 | sysfs_remove_link(&drv->kobj, "module"); |
2452 | 2479 | ||
2453 | if (drv->owner) | 2480 | if (drv->owner) |
2454 | mk = &drv->owner->mkobj; | 2481 | mk = &drv->owner->mkobj; |
2455 | else if (drv->mkobj) | 2482 | else if (drv->mkobj) |
2456 | mk = drv->mkobj; | 2483 | mk = drv->mkobj; |
2457 | if (mk && mk->drivers_dir) { | 2484 | if (mk && mk->drivers_dir) { |
2458 | driver_name = make_driver_name(drv); | 2485 | driver_name = make_driver_name(drv); |
2459 | if (driver_name) { | 2486 | if (driver_name) { |
2460 | sysfs_remove_link(mk->drivers_dir, driver_name); | 2487 | sysfs_remove_link(mk->drivers_dir, driver_name); |
2461 | kfree(driver_name); | 2488 | kfree(driver_name); |
2462 | } | 2489 | } |
2463 | } | 2490 | } |
2464 | } | 2491 | } |
2465 | EXPORT_SYMBOL(module_remove_driver); | 2492 | EXPORT_SYMBOL(module_remove_driver); |
2466 | #endif | 2493 | #endif |
2467 | 2494 | ||
2468 | #ifdef CONFIG_MODVERSIONS | 2495 | #ifdef CONFIG_MODVERSIONS |
2469 | /* Generate the signature for struct module here, too, for modversions. */ | 2496 | /* Generate the signature for struct module here, too, for modversions. */ |
2470 | void struct_module(struct module *mod) { return; } | 2497 | void struct_module(struct module *mod) { return; } |
2471 | EXPORT_SYMBOL(struct_module); | 2498 | EXPORT_SYMBOL(struct_module); |
2472 | #endif | 2499 | #endif |
2473 | 2500 |
mm/slab.c
1 | /* | 1 | /* |
2 | * linux/mm/slab.c | 2 | * linux/mm/slab.c |
3 | * Written by Mark Hemment, 1996/97. | 3 | * Written by Mark Hemment, 1996/97. |
4 | * (markhe@nextd.demon.co.uk) | 4 | * (markhe@nextd.demon.co.uk) |
5 | * | 5 | * |
6 | * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli | 6 | * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli |
7 | * | 7 | * |
8 | * Major cleanup, different bufctl logic, per-cpu arrays | 8 | * Major cleanup, different bufctl logic, per-cpu arrays |
9 | * (c) 2000 Manfred Spraul | 9 | * (c) 2000 Manfred Spraul |
10 | * | 10 | * |
11 | * Cleanup, make the head arrays unconditional, preparation for NUMA | 11 | * Cleanup, make the head arrays unconditional, preparation for NUMA |
12 | * (c) 2002 Manfred Spraul | 12 | * (c) 2002 Manfred Spraul |
13 | * | 13 | * |
14 | * An implementation of the Slab Allocator as described in outline in; | 14 | * An implementation of the Slab Allocator as described in outline in; |
15 | * UNIX Internals: The New Frontiers by Uresh Vahalia | 15 | * UNIX Internals: The New Frontiers by Uresh Vahalia |
16 | * Pub: Prentice Hall ISBN 0-13-101908-2 | 16 | * Pub: Prentice Hall ISBN 0-13-101908-2 |
17 | * or with a little more detail in; | 17 | * or with a little more detail in; |
18 | * The Slab Allocator: An Object-Caching Kernel Memory Allocator | 18 | * The Slab Allocator: An Object-Caching Kernel Memory Allocator |
19 | * Jeff Bonwick (Sun Microsystems). | 19 | * Jeff Bonwick (Sun Microsystems). |
20 | * Presented at: USENIX Summer 1994 Technical Conference | 20 | * Presented at: USENIX Summer 1994 Technical Conference |
21 | * | 21 | * |
22 | * The memory is organized in caches, one cache for each object type. | 22 | * The memory is organized in caches, one cache for each object type. |
23 | * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) | 23 | * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) |
24 | * Each cache consists out of many slabs (they are small (usually one | 24 | * Each cache consists out of many slabs (they are small (usually one |
25 | * page long) and always contiguous), and each slab contains multiple | 25 | * page long) and always contiguous), and each slab contains multiple |
26 | * initialized objects. | 26 | * initialized objects. |
27 | * | 27 | * |
28 | * This means, that your constructor is used only for newly allocated | 28 | * This means, that your constructor is used only for newly allocated |
29 | * slabs and you must pass objects with the same intializations to | 29 | * slabs and you must pass objects with the same intializations to |
30 | * kmem_cache_free. | 30 | * kmem_cache_free. |
31 | * | 31 | * |
32 | * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, | 32 | * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, |
33 | * normal). If you need a special memory type, then must create a new | 33 | * normal). If you need a special memory type, then must create a new |
34 | * cache for that memory type. | 34 | * cache for that memory type. |
35 | * | 35 | * |
36 | * In order to reduce fragmentation, the slabs are sorted in 3 groups: | 36 | * In order to reduce fragmentation, the slabs are sorted in 3 groups: |
37 | * full slabs with 0 free objects | 37 | * full slabs with 0 free objects |
38 | * partial slabs | 38 | * partial slabs |
39 | * empty slabs with no allocated objects | 39 | * empty slabs with no allocated objects |
40 | * | 40 | * |
41 | * If partial slabs exist, then new allocations come from these slabs, | 41 | * If partial slabs exist, then new allocations come from these slabs, |
42 | * otherwise from empty slabs or new slabs are allocated. | 42 | * otherwise from empty slabs or new slabs are allocated. |
43 | * | 43 | * |
44 | * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache | 44 | * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache |
45 | * during kmem_cache_destroy(). The caller must prevent concurrent allocs. | 45 | * during kmem_cache_destroy(). The caller must prevent concurrent allocs. |
46 | * | 46 | * |
47 | * Each cache has a short per-cpu head array, most allocs | 47 | * Each cache has a short per-cpu head array, most allocs |
48 | * and frees go into that array, and if that array overflows, then 1/2 | 48 | * and frees go into that array, and if that array overflows, then 1/2 |
49 | * of the entries in the array are given back into the global cache. | 49 | * of the entries in the array are given back into the global cache. |
50 | * The head array is strictly LIFO and should improve the cache hit rates. | 50 | * The head array is strictly LIFO and should improve the cache hit rates. |
51 | * On SMP, it additionally reduces the spinlock operations. | 51 | * On SMP, it additionally reduces the spinlock operations. |
52 | * | 52 | * |
53 | * The c_cpuarray may not be read with enabled local interrupts - | 53 | * The c_cpuarray may not be read with enabled local interrupts - |
54 | * it's changed with a smp_call_function(). | 54 | * it's changed with a smp_call_function(). |
55 | * | 55 | * |
56 | * SMP synchronization: | 56 | * SMP synchronization: |
57 | * constructors and destructors are called without any locking. | 57 | * constructors and destructors are called without any locking. |
58 | * Several members in struct kmem_cache and struct slab never change, they | 58 | * Several members in struct kmem_cache and struct slab never change, they |
59 | * are accessed without any locking. | 59 | * are accessed without any locking. |
60 | * The per-cpu arrays are never accessed from the wrong cpu, no locking, | 60 | * The per-cpu arrays are never accessed from the wrong cpu, no locking, |
61 | * and local interrupts are disabled so slab code is preempt-safe. | 61 | * and local interrupts are disabled so slab code is preempt-safe. |
62 | * The non-constant members are protected with a per-cache irq spinlock. | 62 | * The non-constant members are protected with a per-cache irq spinlock. |
63 | * | 63 | * |
64 | * Many thanks to Mark Hemment, who wrote another per-cpu slab patch | 64 | * Many thanks to Mark Hemment, who wrote another per-cpu slab patch |
65 | * in 2000 - many ideas in the current implementation are derived from | 65 | * in 2000 - many ideas in the current implementation are derived from |
66 | * his patch. | 66 | * his patch. |
67 | * | 67 | * |
68 | * Further notes from the original documentation: | 68 | * Further notes from the original documentation: |
69 | * | 69 | * |
70 | * 11 April '97. Started multi-threading - markhe | 70 | * 11 April '97. Started multi-threading - markhe |
71 | * The global cache-chain is protected by the mutex 'cache_chain_mutex'. | 71 | * The global cache-chain is protected by the mutex 'cache_chain_mutex'. |
72 | * The sem is only needed when accessing/extending the cache-chain, which | 72 | * The sem is only needed when accessing/extending the cache-chain, which |
73 | * can never happen inside an interrupt (kmem_cache_create(), | 73 | * can never happen inside an interrupt (kmem_cache_create(), |
74 | * kmem_cache_shrink() and kmem_cache_reap()). | 74 | * kmem_cache_shrink() and kmem_cache_reap()). |
75 | * | 75 | * |
76 | * At present, each engine can be growing a cache. This should be blocked. | 76 | * At present, each engine can be growing a cache. This should be blocked. |
77 | * | 77 | * |
78 | * 15 March 2005. NUMA slab allocator. | 78 | * 15 March 2005. NUMA slab allocator. |
79 | * Shai Fultheim <shai@scalex86.org>. | 79 | * Shai Fultheim <shai@scalex86.org>. |
80 | * Shobhit Dayal <shobhit@calsoftinc.com> | 80 | * Shobhit Dayal <shobhit@calsoftinc.com> |
81 | * Alok N Kataria <alokk@calsoftinc.com> | 81 | * Alok N Kataria <alokk@calsoftinc.com> |
82 | * Christoph Lameter <christoph@lameter.com> | 82 | * Christoph Lameter <christoph@lameter.com> |
83 | * | 83 | * |
84 | * Modified the slab allocator to be node aware on NUMA systems. | 84 | * Modified the slab allocator to be node aware on NUMA systems. |
85 | * Each node has its own list of partial, free and full slabs. | 85 | * Each node has its own list of partial, free and full slabs. |
86 | * All object allocations for a node occur from node specific slab lists. | 86 | * All object allocations for a node occur from node specific slab lists. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | #include <linux/slab.h> | 89 | #include <linux/slab.h> |
90 | #include <linux/mm.h> | 90 | #include <linux/mm.h> |
91 | #include <linux/poison.h> | 91 | #include <linux/poison.h> |
92 | #include <linux/swap.h> | 92 | #include <linux/swap.h> |
93 | #include <linux/cache.h> | 93 | #include <linux/cache.h> |
94 | #include <linux/interrupt.h> | 94 | #include <linux/interrupt.h> |
95 | #include <linux/init.h> | 95 | #include <linux/init.h> |
96 | #include <linux/compiler.h> | 96 | #include <linux/compiler.h> |
97 | #include <linux/cpuset.h> | 97 | #include <linux/cpuset.h> |
98 | #include <linux/seq_file.h> | 98 | #include <linux/seq_file.h> |
99 | #include <linux/notifier.h> | 99 | #include <linux/notifier.h> |
100 | #include <linux/kallsyms.h> | 100 | #include <linux/kallsyms.h> |
101 | #include <linux/cpu.h> | 101 | #include <linux/cpu.h> |
102 | #include <linux/sysctl.h> | 102 | #include <linux/sysctl.h> |
103 | #include <linux/module.h> | 103 | #include <linux/module.h> |
104 | #include <linux/rcupdate.h> | 104 | #include <linux/rcupdate.h> |
105 | #include <linux/string.h> | 105 | #include <linux/string.h> |
106 | #include <linux/uaccess.h> | 106 | #include <linux/uaccess.h> |
107 | #include <linux/nodemask.h> | 107 | #include <linux/nodemask.h> |
108 | #include <linux/mempolicy.h> | 108 | #include <linux/mempolicy.h> |
109 | #include <linux/mutex.h> | 109 | #include <linux/mutex.h> |
110 | #include <linux/fault-inject.h> | 110 | #include <linux/fault-inject.h> |
111 | #include <linux/rtmutex.h> | 111 | #include <linux/rtmutex.h> |
112 | #include <linux/reciprocal_div.h> | 112 | #include <linux/reciprocal_div.h> |
113 | 113 | ||
114 | #include <asm/cacheflush.h> | 114 | #include <asm/cacheflush.h> |
115 | #include <asm/tlbflush.h> | 115 | #include <asm/tlbflush.h> |
116 | #include <asm/page.h> | 116 | #include <asm/page.h> |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. | 119 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. |
120 | * 0 for faster, smaller code (especially in the critical paths). | 120 | * 0 for faster, smaller code (especially in the critical paths). |
121 | * | 121 | * |
122 | * STATS - 1 to collect stats for /proc/slabinfo. | 122 | * STATS - 1 to collect stats for /proc/slabinfo. |
123 | * 0 for faster, smaller code (especially in the critical paths). | 123 | * 0 for faster, smaller code (especially in the critical paths). |
124 | * | 124 | * |
125 | * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) | 125 | * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) |
126 | */ | 126 | */ |
127 | 127 | ||
128 | #ifdef CONFIG_DEBUG_SLAB | 128 | #ifdef CONFIG_DEBUG_SLAB |
129 | #define DEBUG 1 | 129 | #define DEBUG 1 |
130 | #define STATS 1 | 130 | #define STATS 1 |
131 | #define FORCED_DEBUG 1 | 131 | #define FORCED_DEBUG 1 |
132 | #else | 132 | #else |
133 | #define DEBUG 0 | 133 | #define DEBUG 0 |
134 | #define STATS 0 | 134 | #define STATS 0 |
135 | #define FORCED_DEBUG 0 | 135 | #define FORCED_DEBUG 0 |
136 | #endif | 136 | #endif |
137 | 137 | ||
138 | /* Shouldn't this be in a header file somewhere? */ | 138 | /* Shouldn't this be in a header file somewhere? */ |
139 | #define BYTES_PER_WORD sizeof(void *) | 139 | #define BYTES_PER_WORD sizeof(void *) |
140 | 140 | ||
141 | #ifndef cache_line_size | 141 | #ifndef cache_line_size |
142 | #define cache_line_size() L1_CACHE_BYTES | 142 | #define cache_line_size() L1_CACHE_BYTES |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | #ifndef ARCH_KMALLOC_MINALIGN | 145 | #ifndef ARCH_KMALLOC_MINALIGN |
146 | /* | 146 | /* |
147 | * Enforce a minimum alignment for the kmalloc caches. | 147 | * Enforce a minimum alignment for the kmalloc caches. |
148 | * Usually, the kmalloc caches are cache_line_size() aligned, except when | 148 | * Usually, the kmalloc caches are cache_line_size() aligned, except when |
149 | * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. | 149 | * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. |
150 | * Some archs want to perform DMA into kmalloc caches and need a guaranteed | 150 | * Some archs want to perform DMA into kmalloc caches and need a guaranteed |
151 | * alignment larger than the alignment of a 64-bit integer. | 151 | * alignment larger than the alignment of a 64-bit integer. |
152 | * ARCH_KMALLOC_MINALIGN allows that. | 152 | * ARCH_KMALLOC_MINALIGN allows that. |
153 | * Note that increasing this value may disable some debug features. | 153 | * Note that increasing this value may disable some debug features. |
154 | */ | 154 | */ |
155 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) | 155 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) |
156 | #endif | 156 | #endif |
157 | 157 | ||
158 | #ifndef ARCH_SLAB_MINALIGN | 158 | #ifndef ARCH_SLAB_MINALIGN |
159 | /* | 159 | /* |
160 | * Enforce a minimum alignment for all caches. | 160 | * Enforce a minimum alignment for all caches. |
161 | * Intended for archs that get misalignment faults even for BYTES_PER_WORD | 161 | * Intended for archs that get misalignment faults even for BYTES_PER_WORD |
162 | * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. | 162 | * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. |
163 | * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables | 163 | * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables |
164 | * some debug features. | 164 | * some debug features. |
165 | */ | 165 | */ |
166 | #define ARCH_SLAB_MINALIGN 0 | 166 | #define ARCH_SLAB_MINALIGN 0 |
167 | #endif | 167 | #endif |
168 | 168 | ||
169 | #ifndef ARCH_KMALLOC_FLAGS | 169 | #ifndef ARCH_KMALLOC_FLAGS |
170 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN | 170 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN |
171 | #endif | 171 | #endif |
172 | 172 | ||
173 | /* Legal flag mask for kmem_cache_create(). */ | 173 | /* Legal flag mask for kmem_cache_create(). */ |
174 | #if DEBUG | 174 | #if DEBUG |
175 | # define CREATE_MASK (SLAB_RED_ZONE | \ | 175 | # define CREATE_MASK (SLAB_RED_ZONE | \ |
176 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ | 176 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ |
177 | SLAB_CACHE_DMA | \ | 177 | SLAB_CACHE_DMA | \ |
178 | SLAB_STORE_USER | \ | 178 | SLAB_STORE_USER | \ |
179 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 179 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
180 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) | 180 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) |
181 | #else | 181 | #else |
182 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ | 182 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
183 | SLAB_CACHE_DMA | \ | 183 | SLAB_CACHE_DMA | \ |
184 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 184 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
185 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) | 185 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) |
186 | #endif | 186 | #endif |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * kmem_bufctl_t: | 189 | * kmem_bufctl_t: |
190 | * | 190 | * |
191 | * Bufctl's are used for linking objs within a slab | 191 | * Bufctl's are used for linking objs within a slab |
192 | * linked offsets. | 192 | * linked offsets. |
193 | * | 193 | * |
194 | * This implementation relies on "struct page" for locating the cache & | 194 | * This implementation relies on "struct page" for locating the cache & |
195 | * slab an object belongs to. | 195 | * slab an object belongs to. |
196 | * This allows the bufctl structure to be small (one int), but limits | 196 | * This allows the bufctl structure to be small (one int), but limits |
197 | * the number of objects a slab (not a cache) can contain when off-slab | 197 | * the number of objects a slab (not a cache) can contain when off-slab |
198 | * bufctls are used. The limit is the size of the largest general cache | 198 | * bufctls are used. The limit is the size of the largest general cache |
199 | * that does not use off-slab slabs. | 199 | * that does not use off-slab slabs. |
200 | * For 32bit archs with 4 kB pages, is this 56. | 200 | * For 32bit archs with 4 kB pages, is this 56. |
201 | * This is not serious, as it is only for large objects, when it is unwise | 201 | * This is not serious, as it is only for large objects, when it is unwise |
202 | * to have too many per slab. | 202 | * to have too many per slab. |
203 | * Note: This limit can be raised by introducing a general cache whose size | 203 | * Note: This limit can be raised by introducing a general cache whose size |
204 | * is less than 512 (PAGE_SIZE<<3), but greater than 256. | 204 | * is less than 512 (PAGE_SIZE<<3), but greater than 256. |
205 | */ | 205 | */ |
206 | 206 | ||
207 | typedef unsigned int kmem_bufctl_t; | 207 | typedef unsigned int kmem_bufctl_t; |
208 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) | 208 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) |
209 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) | 209 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) |
210 | #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) | 210 | #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) |
211 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) | 211 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * struct slab | 214 | * struct slab |
215 | * | 215 | * |
216 | * Manages the objs in a slab. Placed either at the beginning of mem allocated | 216 | * Manages the objs in a slab. Placed either at the beginning of mem allocated |
217 | * for a slab, or allocated from an general cache. | 217 | * for a slab, or allocated from an general cache. |
218 | * Slabs are chained into three list: fully used, partial, fully free slabs. | 218 | * Slabs are chained into three list: fully used, partial, fully free slabs. |
219 | */ | 219 | */ |
220 | struct slab { | 220 | struct slab { |
221 | struct list_head list; | 221 | struct list_head list; |
222 | unsigned long colouroff; | 222 | unsigned long colouroff; |
223 | void *s_mem; /* including colour offset */ | 223 | void *s_mem; /* including colour offset */ |
224 | unsigned int inuse; /* num of objs active in slab */ | 224 | unsigned int inuse; /* num of objs active in slab */ |
225 | kmem_bufctl_t free; | 225 | kmem_bufctl_t free; |
226 | unsigned short nodeid; | 226 | unsigned short nodeid; |
227 | }; | 227 | }; |
228 | 228 | ||
229 | /* | 229 | /* |
230 | * struct slab_rcu | 230 | * struct slab_rcu |
231 | * | 231 | * |
232 | * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to | 232 | * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to |
233 | * arrange for kmem_freepages to be called via RCU. This is useful if | 233 | * arrange for kmem_freepages to be called via RCU. This is useful if |
234 | * we need to approach a kernel structure obliquely, from its address | 234 | * we need to approach a kernel structure obliquely, from its address |
235 | * obtained without the usual locking. We can lock the structure to | 235 | * obtained without the usual locking. We can lock the structure to |
236 | * stabilize it and check it's still at the given address, only if we | 236 | * stabilize it and check it's still at the given address, only if we |
237 | * can be sure that the memory has not been meanwhile reused for some | 237 | * can be sure that the memory has not been meanwhile reused for some |
238 | * other kind of object (which our subsystem's lock might corrupt). | 238 | * other kind of object (which our subsystem's lock might corrupt). |
239 | * | 239 | * |
240 | * rcu_read_lock before reading the address, then rcu_read_unlock after | 240 | * rcu_read_lock before reading the address, then rcu_read_unlock after |
241 | * taking the spinlock within the structure expected at that address. | 241 | * taking the spinlock within the structure expected at that address. |
242 | * | 242 | * |
243 | * We assume struct slab_rcu can overlay struct slab when destroying. | 243 | * We assume struct slab_rcu can overlay struct slab when destroying. |
244 | */ | 244 | */ |
245 | struct slab_rcu { | 245 | struct slab_rcu { |
246 | struct rcu_head head; | 246 | struct rcu_head head; |
247 | struct kmem_cache *cachep; | 247 | struct kmem_cache *cachep; |
248 | void *addr; | 248 | void *addr; |
249 | }; | 249 | }; |
250 | 250 | ||
251 | /* | 251 | /* |
252 | * struct array_cache | 252 | * struct array_cache |
253 | * | 253 | * |
254 | * Purpose: | 254 | * Purpose: |
255 | * - LIFO ordering, to hand out cache-warm objects from _alloc | 255 | * - LIFO ordering, to hand out cache-warm objects from _alloc |
256 | * - reduce the number of linked list operations | 256 | * - reduce the number of linked list operations |
257 | * - reduce spinlock operations | 257 | * - reduce spinlock operations |
258 | * | 258 | * |
259 | * The limit is stored in the per-cpu structure to reduce the data cache | 259 | * The limit is stored in the per-cpu structure to reduce the data cache |
260 | * footprint. | 260 | * footprint. |
261 | * | 261 | * |
262 | */ | 262 | */ |
263 | struct array_cache { | 263 | struct array_cache { |
264 | unsigned int avail; | 264 | unsigned int avail; |
265 | unsigned int limit; | 265 | unsigned int limit; |
266 | unsigned int batchcount; | 266 | unsigned int batchcount; |
267 | unsigned int touched; | 267 | unsigned int touched; |
268 | spinlock_t lock; | 268 | spinlock_t lock; |
269 | void *entry[0]; /* | 269 | void *entry[0]; /* |
270 | * Must have this definition in here for the proper | 270 | * Must have this definition in here for the proper |
271 | * alignment of array_cache. Also simplifies accessing | 271 | * alignment of array_cache. Also simplifies accessing |
272 | * the entries. | 272 | * the entries. |
273 | * [0] is for gcc 2.95. It should really be []. | 273 | * [0] is for gcc 2.95. It should really be []. |
274 | */ | 274 | */ |
275 | }; | 275 | }; |
276 | 276 | ||
277 | /* | 277 | /* |
278 | * bootstrap: The caches do not work without cpuarrays anymore, but the | 278 | * bootstrap: The caches do not work without cpuarrays anymore, but the |
279 | * cpuarrays are allocated from the generic caches... | 279 | * cpuarrays are allocated from the generic caches... |
280 | */ | 280 | */ |
281 | #define BOOT_CPUCACHE_ENTRIES 1 | 281 | #define BOOT_CPUCACHE_ENTRIES 1 |
282 | struct arraycache_init { | 282 | struct arraycache_init { |
283 | struct array_cache cache; | 283 | struct array_cache cache; |
284 | void *entries[BOOT_CPUCACHE_ENTRIES]; | 284 | void *entries[BOOT_CPUCACHE_ENTRIES]; |
285 | }; | 285 | }; |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * The slab lists for all objects. | 288 | * The slab lists for all objects. |
289 | */ | 289 | */ |
290 | struct kmem_list3 { | 290 | struct kmem_list3 { |
291 | struct list_head slabs_partial; /* partial list first, better asm code */ | 291 | struct list_head slabs_partial; /* partial list first, better asm code */ |
292 | struct list_head slabs_full; | 292 | struct list_head slabs_full; |
293 | struct list_head slabs_free; | 293 | struct list_head slabs_free; |
294 | unsigned long free_objects; | 294 | unsigned long free_objects; |
295 | unsigned int free_limit; | 295 | unsigned int free_limit; |
296 | unsigned int colour_next; /* Per-node cache coloring */ | 296 | unsigned int colour_next; /* Per-node cache coloring */ |
297 | spinlock_t list_lock; | 297 | spinlock_t list_lock; |
298 | struct array_cache *shared; /* shared per node */ | 298 | struct array_cache *shared; /* shared per node */ |
299 | struct array_cache **alien; /* on other nodes */ | 299 | struct array_cache **alien; /* on other nodes */ |
300 | unsigned long next_reap; /* updated without locking */ | 300 | unsigned long next_reap; /* updated without locking */ |
301 | int free_touched; /* updated without locking */ | 301 | int free_touched; /* updated without locking */ |
302 | }; | 302 | }; |
303 | 303 | ||
304 | /* | 304 | /* |
305 | * Need this for bootstrapping a per node allocator. | 305 | * Need this for bootstrapping a per node allocator. |
306 | */ | 306 | */ |
307 | #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) | 307 | #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) |
308 | struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | 308 | struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; |
309 | #define CACHE_CACHE 0 | 309 | #define CACHE_CACHE 0 |
310 | #define SIZE_AC 1 | 310 | #define SIZE_AC 1 |
311 | #define SIZE_L3 (1 + MAX_NUMNODES) | 311 | #define SIZE_L3 (1 + MAX_NUMNODES) |
312 | 312 | ||
313 | static int drain_freelist(struct kmem_cache *cache, | 313 | static int drain_freelist(struct kmem_cache *cache, |
314 | struct kmem_list3 *l3, int tofree); | 314 | struct kmem_list3 *l3, int tofree); |
315 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, | 315 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
316 | int node); | 316 | int node); |
317 | static int enable_cpucache(struct kmem_cache *cachep); | 317 | static int enable_cpucache(struct kmem_cache *cachep); |
318 | static void cache_reap(struct work_struct *unused); | 318 | static void cache_reap(struct work_struct *unused); |
319 | 319 | ||
320 | /* | 320 | /* |
321 | * This function must be completely optimized away if a constant is passed to | 321 | * This function must be completely optimized away if a constant is passed to |
322 | * it. Mostly the same as what is in linux/slab.h except it returns an index. | 322 | * it. Mostly the same as what is in linux/slab.h except it returns an index. |
323 | */ | 323 | */ |
324 | static __always_inline int index_of(const size_t size) | 324 | static __always_inline int index_of(const size_t size) |
325 | { | 325 | { |
326 | extern void __bad_size(void); | 326 | extern void __bad_size(void); |
327 | 327 | ||
328 | if (__builtin_constant_p(size)) { | 328 | if (__builtin_constant_p(size)) { |
329 | int i = 0; | 329 | int i = 0; |
330 | 330 | ||
331 | #define CACHE(x) \ | 331 | #define CACHE(x) \ |
332 | if (size <=x) \ | 332 | if (size <=x) \ |
333 | return i; \ | 333 | return i; \ |
334 | else \ | 334 | else \ |
335 | i++; | 335 | i++; |
336 | #include "linux/kmalloc_sizes.h" | 336 | #include "linux/kmalloc_sizes.h" |
337 | #undef CACHE | 337 | #undef CACHE |
338 | __bad_size(); | 338 | __bad_size(); |
339 | } else | 339 | } else |
340 | __bad_size(); | 340 | __bad_size(); |
341 | return 0; | 341 | return 0; |
342 | } | 342 | } |
343 | 343 | ||
344 | static int slab_early_init = 1; | 344 | static int slab_early_init = 1; |
345 | 345 | ||
346 | #define INDEX_AC index_of(sizeof(struct arraycache_init)) | 346 | #define INDEX_AC index_of(sizeof(struct arraycache_init)) |
347 | #define INDEX_L3 index_of(sizeof(struct kmem_list3)) | 347 | #define INDEX_L3 index_of(sizeof(struct kmem_list3)) |
348 | 348 | ||
349 | static void kmem_list3_init(struct kmem_list3 *parent) | 349 | static void kmem_list3_init(struct kmem_list3 *parent) |
350 | { | 350 | { |
351 | INIT_LIST_HEAD(&parent->slabs_full); | 351 | INIT_LIST_HEAD(&parent->slabs_full); |
352 | INIT_LIST_HEAD(&parent->slabs_partial); | 352 | INIT_LIST_HEAD(&parent->slabs_partial); |
353 | INIT_LIST_HEAD(&parent->slabs_free); | 353 | INIT_LIST_HEAD(&parent->slabs_free); |
354 | parent->shared = NULL; | 354 | parent->shared = NULL; |
355 | parent->alien = NULL; | 355 | parent->alien = NULL; |
356 | parent->colour_next = 0; | 356 | parent->colour_next = 0; |
357 | spin_lock_init(&parent->list_lock); | 357 | spin_lock_init(&parent->list_lock); |
358 | parent->free_objects = 0; | 358 | parent->free_objects = 0; |
359 | parent->free_touched = 0; | 359 | parent->free_touched = 0; |
360 | } | 360 | } |
361 | 361 | ||
362 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ | 362 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ |
363 | do { \ | 363 | do { \ |
364 | INIT_LIST_HEAD(listp); \ | 364 | INIT_LIST_HEAD(listp); \ |
365 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ | 365 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ |
366 | } while (0) | 366 | } while (0) |
367 | 367 | ||
368 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ | 368 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ |
369 | do { \ | 369 | do { \ |
370 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ | 370 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ |
371 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ | 371 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ |
372 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 372 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
373 | } while (0) | 373 | } while (0) |
374 | 374 | ||
375 | /* | 375 | /* |
376 | * struct kmem_cache | 376 | * struct kmem_cache |
377 | * | 377 | * |
378 | * manages a cache. | 378 | * manages a cache. |
379 | */ | 379 | */ |
380 | 380 | ||
381 | struct kmem_cache { | 381 | struct kmem_cache { |
382 | /* 1) per-cpu data, touched during every alloc/free */ | 382 | /* 1) per-cpu data, touched during every alloc/free */ |
383 | struct array_cache *array[NR_CPUS]; | 383 | struct array_cache *array[NR_CPUS]; |
384 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | 384 | /* 2) Cache tunables. Protected by cache_chain_mutex */ |
385 | unsigned int batchcount; | 385 | unsigned int batchcount; |
386 | unsigned int limit; | 386 | unsigned int limit; |
387 | unsigned int shared; | 387 | unsigned int shared; |
388 | 388 | ||
389 | unsigned int buffer_size; | 389 | unsigned int buffer_size; |
390 | u32 reciprocal_buffer_size; | 390 | u32 reciprocal_buffer_size; |
391 | /* 3) touched by every alloc & free from the backend */ | 391 | /* 3) touched by every alloc & free from the backend */ |
392 | 392 | ||
393 | unsigned int flags; /* constant flags */ | 393 | unsigned int flags; /* constant flags */ |
394 | unsigned int num; /* # of objs per slab */ | 394 | unsigned int num; /* # of objs per slab */ |
395 | 395 | ||
396 | /* 4) cache_grow/shrink */ | 396 | /* 4) cache_grow/shrink */ |
397 | /* order of pgs per slab (2^n) */ | 397 | /* order of pgs per slab (2^n) */ |
398 | unsigned int gfporder; | 398 | unsigned int gfporder; |
399 | 399 | ||
400 | /* force GFP flags, e.g. GFP_DMA */ | 400 | /* force GFP flags, e.g. GFP_DMA */ |
401 | gfp_t gfpflags; | 401 | gfp_t gfpflags; |
402 | 402 | ||
403 | size_t colour; /* cache colouring range */ | 403 | size_t colour; /* cache colouring range */ |
404 | unsigned int colour_off; /* colour offset */ | 404 | unsigned int colour_off; /* colour offset */ |
405 | struct kmem_cache *slabp_cache; | 405 | struct kmem_cache *slabp_cache; |
406 | unsigned int slab_size; | 406 | unsigned int slab_size; |
407 | unsigned int dflags; /* dynamic flags */ | 407 | unsigned int dflags; /* dynamic flags */ |
408 | 408 | ||
409 | /* constructor func */ | 409 | /* constructor func */ |
410 | void (*ctor) (void *, struct kmem_cache *, unsigned long); | 410 | void (*ctor) (void *, struct kmem_cache *, unsigned long); |
411 | 411 | ||
412 | /* de-constructor func */ | 412 | /* de-constructor func */ |
413 | void (*dtor) (void *, struct kmem_cache *, unsigned long); | 413 | void (*dtor) (void *, struct kmem_cache *, unsigned long); |
414 | 414 | ||
415 | /* 5) cache creation/removal */ | 415 | /* 5) cache creation/removal */ |
416 | const char *name; | 416 | const char *name; |
417 | struct list_head next; | 417 | struct list_head next; |
418 | 418 | ||
419 | /* 6) statistics */ | 419 | /* 6) statistics */ |
420 | #if STATS | 420 | #if STATS |
421 | unsigned long num_active; | 421 | unsigned long num_active; |
422 | unsigned long num_allocations; | 422 | unsigned long num_allocations; |
423 | unsigned long high_mark; | 423 | unsigned long high_mark; |
424 | unsigned long grown; | 424 | unsigned long grown; |
425 | unsigned long reaped; | 425 | unsigned long reaped; |
426 | unsigned long errors; | 426 | unsigned long errors; |
427 | unsigned long max_freeable; | 427 | unsigned long max_freeable; |
428 | unsigned long node_allocs; | 428 | unsigned long node_allocs; |
429 | unsigned long node_frees; | 429 | unsigned long node_frees; |
430 | unsigned long node_overflow; | 430 | unsigned long node_overflow; |
431 | atomic_t allochit; | 431 | atomic_t allochit; |
432 | atomic_t allocmiss; | 432 | atomic_t allocmiss; |
433 | atomic_t freehit; | 433 | atomic_t freehit; |
434 | atomic_t freemiss; | 434 | atomic_t freemiss; |
435 | #endif | 435 | #endif |
436 | #if DEBUG | 436 | #if DEBUG |
437 | /* | 437 | /* |
438 | * If debugging is enabled, then the allocator can add additional | 438 | * If debugging is enabled, then the allocator can add additional |
439 | * fields and/or padding to every object. buffer_size contains the total | 439 | * fields and/or padding to every object. buffer_size contains the total |
440 | * object size including these internal fields, the following two | 440 | * object size including these internal fields, the following two |
441 | * variables contain the offset to the user object and its size. | 441 | * variables contain the offset to the user object and its size. |
442 | */ | 442 | */ |
443 | int obj_offset; | 443 | int obj_offset; |
444 | int obj_size; | 444 | int obj_size; |
445 | #endif | 445 | #endif |
446 | /* | 446 | /* |
447 | * We put nodelists[] at the end of kmem_cache, because we want to size | 447 | * We put nodelists[] at the end of kmem_cache, because we want to size |
448 | * this array to nr_node_ids slots instead of MAX_NUMNODES | 448 | * this array to nr_node_ids slots instead of MAX_NUMNODES |
449 | * (see kmem_cache_init()) | 449 | * (see kmem_cache_init()) |
450 | * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache | 450 | * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache |
451 | * is statically defined, so we reserve the max number of nodes. | 451 | * is statically defined, so we reserve the max number of nodes. |
452 | */ | 452 | */ |
453 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 453 | struct kmem_list3 *nodelists[MAX_NUMNODES]; |
454 | /* | 454 | /* |
455 | * Do not add fields after nodelists[] | 455 | * Do not add fields after nodelists[] |
456 | */ | 456 | */ |
457 | }; | 457 | }; |
458 | 458 | ||
459 | #define CFLGS_OFF_SLAB (0x80000000UL) | 459 | #define CFLGS_OFF_SLAB (0x80000000UL) |
460 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 460 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
461 | 461 | ||
462 | #define BATCHREFILL_LIMIT 16 | 462 | #define BATCHREFILL_LIMIT 16 |
463 | /* | 463 | /* |
464 | * Optimization question: fewer reaps means less probability for unnessary | 464 | * Optimization question: fewer reaps means less probability for unnessary |
465 | * cpucache drain/refill cycles. | 465 | * cpucache drain/refill cycles. |
466 | * | 466 | * |
467 | * OTOH the cpuarrays can contain lots of objects, | 467 | * OTOH the cpuarrays can contain lots of objects, |
468 | * which could lock up otherwise freeable slabs. | 468 | * which could lock up otherwise freeable slabs. |
469 | */ | 469 | */ |
470 | #define REAPTIMEOUT_CPUC (2*HZ) | 470 | #define REAPTIMEOUT_CPUC (2*HZ) |
471 | #define REAPTIMEOUT_LIST3 (4*HZ) | 471 | #define REAPTIMEOUT_LIST3 (4*HZ) |
472 | 472 | ||
473 | #if STATS | 473 | #if STATS |
474 | #define STATS_INC_ACTIVE(x) ((x)->num_active++) | 474 | #define STATS_INC_ACTIVE(x) ((x)->num_active++) |
475 | #define STATS_DEC_ACTIVE(x) ((x)->num_active--) | 475 | #define STATS_DEC_ACTIVE(x) ((x)->num_active--) |
476 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) | 476 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) |
477 | #define STATS_INC_GROWN(x) ((x)->grown++) | 477 | #define STATS_INC_GROWN(x) ((x)->grown++) |
478 | #define STATS_ADD_REAPED(x,y) ((x)->reaped += (y)) | 478 | #define STATS_ADD_REAPED(x,y) ((x)->reaped += (y)) |
479 | #define STATS_SET_HIGH(x) \ | 479 | #define STATS_SET_HIGH(x) \ |
480 | do { \ | 480 | do { \ |
481 | if ((x)->num_active > (x)->high_mark) \ | 481 | if ((x)->num_active > (x)->high_mark) \ |
482 | (x)->high_mark = (x)->num_active; \ | 482 | (x)->high_mark = (x)->num_active; \ |
483 | } while (0) | 483 | } while (0) |
484 | #define STATS_INC_ERR(x) ((x)->errors++) | 484 | #define STATS_INC_ERR(x) ((x)->errors++) |
485 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) | 485 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) |
486 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) | 486 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) |
487 | #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++) | 487 | #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++) |
488 | #define STATS_SET_FREEABLE(x, i) \ | 488 | #define STATS_SET_FREEABLE(x, i) \ |
489 | do { \ | 489 | do { \ |
490 | if ((x)->max_freeable < i) \ | 490 | if ((x)->max_freeable < i) \ |
491 | (x)->max_freeable = i; \ | 491 | (x)->max_freeable = i; \ |
492 | } while (0) | 492 | } while (0) |
493 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) | 493 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) |
494 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) | 494 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) |
495 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) | 495 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) |
496 | #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss) | 496 | #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss) |
497 | #else | 497 | #else |
498 | #define STATS_INC_ACTIVE(x) do { } while (0) | 498 | #define STATS_INC_ACTIVE(x) do { } while (0) |
499 | #define STATS_DEC_ACTIVE(x) do { } while (0) | 499 | #define STATS_DEC_ACTIVE(x) do { } while (0) |
500 | #define STATS_INC_ALLOCED(x) do { } while (0) | 500 | #define STATS_INC_ALLOCED(x) do { } while (0) |
501 | #define STATS_INC_GROWN(x) do { } while (0) | 501 | #define STATS_INC_GROWN(x) do { } while (0) |
502 | #define STATS_ADD_REAPED(x,y) do { } while (0) | 502 | #define STATS_ADD_REAPED(x,y) do { } while (0) |
503 | #define STATS_SET_HIGH(x) do { } while (0) | 503 | #define STATS_SET_HIGH(x) do { } while (0) |
504 | #define STATS_INC_ERR(x) do { } while (0) | 504 | #define STATS_INC_ERR(x) do { } while (0) |
505 | #define STATS_INC_NODEALLOCS(x) do { } while (0) | 505 | #define STATS_INC_NODEALLOCS(x) do { } while (0) |
506 | #define STATS_INC_NODEFREES(x) do { } while (0) | 506 | #define STATS_INC_NODEFREES(x) do { } while (0) |
507 | #define STATS_INC_ACOVERFLOW(x) do { } while (0) | 507 | #define STATS_INC_ACOVERFLOW(x) do { } while (0) |
508 | #define STATS_SET_FREEABLE(x, i) do { } while (0) | 508 | #define STATS_SET_FREEABLE(x, i) do { } while (0) |
509 | #define STATS_INC_ALLOCHIT(x) do { } while (0) | 509 | #define STATS_INC_ALLOCHIT(x) do { } while (0) |
510 | #define STATS_INC_ALLOCMISS(x) do { } while (0) | 510 | #define STATS_INC_ALLOCMISS(x) do { } while (0) |
511 | #define STATS_INC_FREEHIT(x) do { } while (0) | 511 | #define STATS_INC_FREEHIT(x) do { } while (0) |
512 | #define STATS_INC_FREEMISS(x) do { } while (0) | 512 | #define STATS_INC_FREEMISS(x) do { } while (0) |
513 | #endif | 513 | #endif |
514 | 514 | ||
515 | #if DEBUG | 515 | #if DEBUG |
516 | 516 | ||
517 | /* | 517 | /* |
518 | * memory layout of objects: | 518 | * memory layout of objects: |
519 | * 0 : objp | 519 | * 0 : objp |
520 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that | 520 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that |
521 | * the end of an object is aligned with the end of the real | 521 | * the end of an object is aligned with the end of the real |
522 | * allocation. Catches writes behind the end of the allocation. | 522 | * allocation. Catches writes behind the end of the allocation. |
523 | * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: | 523 | * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: |
524 | * redzone word. | 524 | * redzone word. |
525 | * cachep->obj_offset: The real object. | 525 | * cachep->obj_offset: The real object. |
526 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] | 526 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] |
527 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address | 527 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address |
528 | * [BYTES_PER_WORD long] | 528 | * [BYTES_PER_WORD long] |
529 | */ | 529 | */ |
530 | static int obj_offset(struct kmem_cache *cachep) | 530 | static int obj_offset(struct kmem_cache *cachep) |
531 | { | 531 | { |
532 | return cachep->obj_offset; | 532 | return cachep->obj_offset; |
533 | } | 533 | } |
534 | 534 | ||
535 | static int obj_size(struct kmem_cache *cachep) | 535 | static int obj_size(struct kmem_cache *cachep) |
536 | { | 536 | { |
537 | return cachep->obj_size; | 537 | return cachep->obj_size; |
538 | } | 538 | } |
539 | 539 | ||
540 | static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) | 540 | static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) |
541 | { | 541 | { |
542 | BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); | 542 | BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); |
543 | return (unsigned long long*) (objp + obj_offset(cachep) - | 543 | return (unsigned long long*) (objp + obj_offset(cachep) - |
544 | sizeof(unsigned long long)); | 544 | sizeof(unsigned long long)); |
545 | } | 545 | } |
546 | 546 | ||
547 | static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp) | 547 | static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp) |
548 | { | 548 | { |
549 | BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); | 549 | BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); |
550 | if (cachep->flags & SLAB_STORE_USER) | 550 | if (cachep->flags & SLAB_STORE_USER) |
551 | return (unsigned long long *)(objp + cachep->buffer_size - | 551 | return (unsigned long long *)(objp + cachep->buffer_size - |
552 | sizeof(unsigned long long) - | 552 | sizeof(unsigned long long) - |
553 | BYTES_PER_WORD); | 553 | BYTES_PER_WORD); |
554 | return (unsigned long long *) (objp + cachep->buffer_size - | 554 | return (unsigned long long *) (objp + cachep->buffer_size - |
555 | sizeof(unsigned long long)); | 555 | sizeof(unsigned long long)); |
556 | } | 556 | } |
557 | 557 | ||
558 | static void **dbg_userword(struct kmem_cache *cachep, void *objp) | 558 | static void **dbg_userword(struct kmem_cache *cachep, void *objp) |
559 | { | 559 | { |
560 | BUG_ON(!(cachep->flags & SLAB_STORE_USER)); | 560 | BUG_ON(!(cachep->flags & SLAB_STORE_USER)); |
561 | return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); | 561 | return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); |
562 | } | 562 | } |
563 | 563 | ||
564 | #else | 564 | #else |
565 | 565 | ||
566 | #define obj_offset(x) 0 | 566 | #define obj_offset(x) 0 |
567 | #define obj_size(cachep) (cachep->buffer_size) | 567 | #define obj_size(cachep) (cachep->buffer_size) |
568 | #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) | 568 | #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) |
569 | #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) | 569 | #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) |
570 | #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) | 570 | #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) |
571 | 571 | ||
572 | #endif | 572 | #endif |
573 | 573 | ||
574 | /* | 574 | /* |
575 | * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp | 575 | * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp |
576 | * order. | 576 | * order. |
577 | */ | 577 | */ |
578 | #if defined(CONFIG_LARGE_ALLOCS) | 578 | #if defined(CONFIG_LARGE_ALLOCS) |
579 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ | 579 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ |
580 | #define MAX_GFP_ORDER 13 /* up to 32Mb */ | 580 | #define MAX_GFP_ORDER 13 /* up to 32Mb */ |
581 | #elif defined(CONFIG_MMU) | 581 | #elif defined(CONFIG_MMU) |
582 | #define MAX_OBJ_ORDER 5 /* 32 pages */ | 582 | #define MAX_OBJ_ORDER 5 /* 32 pages */ |
583 | #define MAX_GFP_ORDER 5 /* 32 pages */ | 583 | #define MAX_GFP_ORDER 5 /* 32 pages */ |
584 | #else | 584 | #else |
585 | #define MAX_OBJ_ORDER 8 /* up to 1Mb */ | 585 | #define MAX_OBJ_ORDER 8 /* up to 1Mb */ |
586 | #define MAX_GFP_ORDER 8 /* up to 1Mb */ | 586 | #define MAX_GFP_ORDER 8 /* up to 1Mb */ |
587 | #endif | 587 | #endif |
588 | 588 | ||
589 | /* | 589 | /* |
590 | * Do not go above this order unless 0 objects fit into the slab. | 590 | * Do not go above this order unless 0 objects fit into the slab. |
591 | */ | 591 | */ |
592 | #define BREAK_GFP_ORDER_HI 1 | 592 | #define BREAK_GFP_ORDER_HI 1 |
593 | #define BREAK_GFP_ORDER_LO 0 | 593 | #define BREAK_GFP_ORDER_LO 0 |
594 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; | 594 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; |
595 | 595 | ||
596 | /* | 596 | /* |
597 | * Functions for storing/retrieving the cachep and or slab from the page | 597 | * Functions for storing/retrieving the cachep and or slab from the page |
598 | * allocator. These are used to find the slab an obj belongs to. With kfree(), | 598 | * allocator. These are used to find the slab an obj belongs to. With kfree(), |
599 | * these are used to find the cache which an obj belongs to. | 599 | * these are used to find the cache which an obj belongs to. |
600 | */ | 600 | */ |
601 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | 601 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) |
602 | { | 602 | { |
603 | page->lru.next = (struct list_head *)cache; | 603 | page->lru.next = (struct list_head *)cache; |
604 | } | 604 | } |
605 | 605 | ||
606 | static inline struct kmem_cache *page_get_cache(struct page *page) | 606 | static inline struct kmem_cache *page_get_cache(struct page *page) |
607 | { | 607 | { |
608 | page = compound_head(page); | 608 | page = compound_head(page); |
609 | BUG_ON(!PageSlab(page)); | 609 | BUG_ON(!PageSlab(page)); |
610 | return (struct kmem_cache *)page->lru.next; | 610 | return (struct kmem_cache *)page->lru.next; |
611 | } | 611 | } |
612 | 612 | ||
613 | static inline void page_set_slab(struct page *page, struct slab *slab) | 613 | static inline void page_set_slab(struct page *page, struct slab *slab) |
614 | { | 614 | { |
615 | page->lru.prev = (struct list_head *)slab; | 615 | page->lru.prev = (struct list_head *)slab; |
616 | } | 616 | } |
617 | 617 | ||
618 | static inline struct slab *page_get_slab(struct page *page) | 618 | static inline struct slab *page_get_slab(struct page *page) |
619 | { | 619 | { |
620 | BUG_ON(!PageSlab(page)); | 620 | BUG_ON(!PageSlab(page)); |
621 | return (struct slab *)page->lru.prev; | 621 | return (struct slab *)page->lru.prev; |
622 | } | 622 | } |
623 | 623 | ||
624 | static inline struct kmem_cache *virt_to_cache(const void *obj) | 624 | static inline struct kmem_cache *virt_to_cache(const void *obj) |
625 | { | 625 | { |
626 | struct page *page = virt_to_head_page(obj); | 626 | struct page *page = virt_to_head_page(obj); |
627 | return page_get_cache(page); | 627 | return page_get_cache(page); |
628 | } | 628 | } |
629 | 629 | ||
630 | static inline struct slab *virt_to_slab(const void *obj) | 630 | static inline struct slab *virt_to_slab(const void *obj) |
631 | { | 631 | { |
632 | struct page *page = virt_to_head_page(obj); | 632 | struct page *page = virt_to_head_page(obj); |
633 | return page_get_slab(page); | 633 | return page_get_slab(page); |
634 | } | 634 | } |
635 | 635 | ||
636 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, | 636 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, |
637 | unsigned int idx) | 637 | unsigned int idx) |
638 | { | 638 | { |
639 | return slab->s_mem + cache->buffer_size * idx; | 639 | return slab->s_mem + cache->buffer_size * idx; |
640 | } | 640 | } |
641 | 641 | ||
642 | /* | 642 | /* |
643 | * We want to avoid an expensive divide : (offset / cache->buffer_size) | 643 | * We want to avoid an expensive divide : (offset / cache->buffer_size) |
644 | * Using the fact that buffer_size is a constant for a particular cache, | 644 | * Using the fact that buffer_size is a constant for a particular cache, |
645 | * we can replace (offset / cache->buffer_size) by | 645 | * we can replace (offset / cache->buffer_size) by |
646 | * reciprocal_divide(offset, cache->reciprocal_buffer_size) | 646 | * reciprocal_divide(offset, cache->reciprocal_buffer_size) |
647 | */ | 647 | */ |
648 | static inline unsigned int obj_to_index(const struct kmem_cache *cache, | 648 | static inline unsigned int obj_to_index(const struct kmem_cache *cache, |
649 | const struct slab *slab, void *obj) | 649 | const struct slab *slab, void *obj) |
650 | { | 650 | { |
651 | u32 offset = (obj - slab->s_mem); | 651 | u32 offset = (obj - slab->s_mem); |
652 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); | 652 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); |
653 | } | 653 | } |
654 | 654 | ||
655 | /* | 655 | /* |
656 | * These are the default caches for kmalloc. Custom caches can have other sizes. | 656 | * These are the default caches for kmalloc. Custom caches can have other sizes. |
657 | */ | 657 | */ |
658 | struct cache_sizes malloc_sizes[] = { | 658 | struct cache_sizes malloc_sizes[] = { |
659 | #define CACHE(x) { .cs_size = (x) }, | 659 | #define CACHE(x) { .cs_size = (x) }, |
660 | #include <linux/kmalloc_sizes.h> | 660 | #include <linux/kmalloc_sizes.h> |
661 | CACHE(ULONG_MAX) | 661 | CACHE(ULONG_MAX) |
662 | #undef CACHE | 662 | #undef CACHE |
663 | }; | 663 | }; |
664 | EXPORT_SYMBOL(malloc_sizes); | 664 | EXPORT_SYMBOL(malloc_sizes); |
665 | 665 | ||
666 | /* Must match cache_sizes above. Out of line to keep cache footprint low. */ | 666 | /* Must match cache_sizes above. Out of line to keep cache footprint low. */ |
667 | struct cache_names { | 667 | struct cache_names { |
668 | char *name; | 668 | char *name; |
669 | char *name_dma; | 669 | char *name_dma; |
670 | }; | 670 | }; |
671 | 671 | ||
672 | static struct cache_names __initdata cache_names[] = { | 672 | static struct cache_names __initdata cache_names[] = { |
673 | #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, | 673 | #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, |
674 | #include <linux/kmalloc_sizes.h> | 674 | #include <linux/kmalloc_sizes.h> |
675 | {NULL,} | 675 | {NULL,} |
676 | #undef CACHE | 676 | #undef CACHE |
677 | }; | 677 | }; |
678 | 678 | ||
679 | static struct arraycache_init initarray_cache __initdata = | 679 | static struct arraycache_init initarray_cache __initdata = |
680 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; | 680 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; |
681 | static struct arraycache_init initarray_generic = | 681 | static struct arraycache_init initarray_generic = |
682 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; | 682 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; |
683 | 683 | ||
684 | /* internal cache of cache description objs */ | 684 | /* internal cache of cache description objs */ |
685 | static struct kmem_cache cache_cache = { | 685 | static struct kmem_cache cache_cache = { |
686 | .batchcount = 1, | 686 | .batchcount = 1, |
687 | .limit = BOOT_CPUCACHE_ENTRIES, | 687 | .limit = BOOT_CPUCACHE_ENTRIES, |
688 | .shared = 1, | 688 | .shared = 1, |
689 | .buffer_size = sizeof(struct kmem_cache), | 689 | .buffer_size = sizeof(struct kmem_cache), |
690 | .name = "kmem_cache", | 690 | .name = "kmem_cache", |
691 | }; | 691 | }; |
692 | 692 | ||
693 | #define BAD_ALIEN_MAGIC 0x01020304ul | 693 | #define BAD_ALIEN_MAGIC 0x01020304ul |
694 | 694 | ||
695 | #ifdef CONFIG_LOCKDEP | 695 | #ifdef CONFIG_LOCKDEP |
696 | 696 | ||
697 | /* | 697 | /* |
698 | * Slab sometimes uses the kmalloc slabs to store the slab headers | 698 | * Slab sometimes uses the kmalloc slabs to store the slab headers |
699 | * for other slabs "off slab". | 699 | * for other slabs "off slab". |
700 | * The locking for this is tricky in that it nests within the locks | 700 | * The locking for this is tricky in that it nests within the locks |
701 | * of all other slabs in a few places; to deal with this special | 701 | * of all other slabs in a few places; to deal with this special |
702 | * locking we put on-slab caches into a separate lock-class. | 702 | * locking we put on-slab caches into a separate lock-class. |
703 | * | 703 | * |
704 | * We set lock class for alien array caches which are up during init. | 704 | * We set lock class for alien array caches which are up during init. |
705 | * The lock annotation will be lost if all cpus of a node goes down and | 705 | * The lock annotation will be lost if all cpus of a node goes down and |
706 | * then comes back up during hotplug | 706 | * then comes back up during hotplug |
707 | */ | 707 | */ |
708 | static struct lock_class_key on_slab_l3_key; | 708 | static struct lock_class_key on_slab_l3_key; |
709 | static struct lock_class_key on_slab_alc_key; | 709 | static struct lock_class_key on_slab_alc_key; |
710 | 710 | ||
711 | static inline void init_lock_keys(void) | 711 | static inline void init_lock_keys(void) |
712 | 712 | ||
713 | { | 713 | { |
714 | int q; | 714 | int q; |
715 | struct cache_sizes *s = malloc_sizes; | 715 | struct cache_sizes *s = malloc_sizes; |
716 | 716 | ||
717 | while (s->cs_size != ULONG_MAX) { | 717 | while (s->cs_size != ULONG_MAX) { |
718 | for_each_node(q) { | 718 | for_each_node(q) { |
719 | struct array_cache **alc; | 719 | struct array_cache **alc; |
720 | int r; | 720 | int r; |
721 | struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; | 721 | struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; |
722 | if (!l3 || OFF_SLAB(s->cs_cachep)) | 722 | if (!l3 || OFF_SLAB(s->cs_cachep)) |
723 | continue; | 723 | continue; |
724 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | 724 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); |
725 | alc = l3->alien; | 725 | alc = l3->alien; |
726 | /* | 726 | /* |
727 | * FIXME: This check for BAD_ALIEN_MAGIC | 727 | * FIXME: This check for BAD_ALIEN_MAGIC |
728 | * should go away when common slab code is taught to | 728 | * should go away when common slab code is taught to |
729 | * work even without alien caches. | 729 | * work even without alien caches. |
730 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC | 730 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC |
731 | * for alloc_alien_cache, | 731 | * for alloc_alien_cache, |
732 | */ | 732 | */ |
733 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | 733 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) |
734 | continue; | 734 | continue; |
735 | for_each_node(r) { | 735 | for_each_node(r) { |
736 | if (alc[r]) | 736 | if (alc[r]) |
737 | lockdep_set_class(&alc[r]->lock, | 737 | lockdep_set_class(&alc[r]->lock, |
738 | &on_slab_alc_key); | 738 | &on_slab_alc_key); |
739 | } | 739 | } |
740 | } | 740 | } |
741 | s++; | 741 | s++; |
742 | } | 742 | } |
743 | } | 743 | } |
744 | #else | 744 | #else |
745 | static inline void init_lock_keys(void) | 745 | static inline void init_lock_keys(void) |
746 | { | 746 | { |
747 | } | 747 | } |
748 | #endif | 748 | #endif |
749 | 749 | ||
750 | /* | 750 | /* |
751 | * 1. Guard access to the cache-chain. | 751 | * 1. Guard access to the cache-chain. |
752 | * 2. Protect sanity of cpu_online_map against cpu hotplug events | 752 | * 2. Protect sanity of cpu_online_map against cpu hotplug events |
753 | */ | 753 | */ |
754 | static DEFINE_MUTEX(cache_chain_mutex); | 754 | static DEFINE_MUTEX(cache_chain_mutex); |
755 | static struct list_head cache_chain; | 755 | static struct list_head cache_chain; |
756 | 756 | ||
757 | /* | 757 | /* |
758 | * chicken and egg problem: delay the per-cpu array allocation | 758 | * chicken and egg problem: delay the per-cpu array allocation |
759 | * until the general caches are up. | 759 | * until the general caches are up. |
760 | */ | 760 | */ |
761 | static enum { | 761 | static enum { |
762 | NONE, | 762 | NONE, |
763 | PARTIAL_AC, | 763 | PARTIAL_AC, |
764 | PARTIAL_L3, | 764 | PARTIAL_L3, |
765 | FULL | 765 | FULL |
766 | } g_cpucache_up; | 766 | } g_cpucache_up; |
767 | 767 | ||
768 | /* | 768 | /* |
769 | * used by boot code to determine if it can use slab based allocator | 769 | * used by boot code to determine if it can use slab based allocator |
770 | */ | 770 | */ |
771 | int slab_is_available(void) | 771 | int slab_is_available(void) |
772 | { | 772 | { |
773 | return g_cpucache_up == FULL; | 773 | return g_cpucache_up == FULL; |
774 | } | 774 | } |
775 | 775 | ||
776 | static DEFINE_PER_CPU(struct delayed_work, reap_work); | 776 | static DEFINE_PER_CPU(struct delayed_work, reap_work); |
777 | 777 | ||
778 | static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | 778 | static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) |
779 | { | 779 | { |
780 | return cachep->array[smp_processor_id()]; | 780 | return cachep->array[smp_processor_id()]; |
781 | } | 781 | } |
782 | 782 | ||
783 | static inline struct kmem_cache *__find_general_cachep(size_t size, | 783 | static inline struct kmem_cache *__find_general_cachep(size_t size, |
784 | gfp_t gfpflags) | 784 | gfp_t gfpflags) |
785 | { | 785 | { |
786 | struct cache_sizes *csizep = malloc_sizes; | 786 | struct cache_sizes *csizep = malloc_sizes; |
787 | 787 | ||
788 | #if DEBUG | 788 | #if DEBUG |
789 | /* This happens if someone tries to call | 789 | /* This happens if someone tries to call |
790 | * kmem_cache_create(), or __kmalloc(), before | 790 | * kmem_cache_create(), or __kmalloc(), before |
791 | * the generic caches are initialized. | 791 | * the generic caches are initialized. |
792 | */ | 792 | */ |
793 | BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); | 793 | BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); |
794 | #endif | 794 | #endif |
795 | while (size > csizep->cs_size) | 795 | while (size > csizep->cs_size) |
796 | csizep++; | 796 | csizep++; |
797 | 797 | ||
798 | /* | 798 | /* |
799 | * Really subtle: The last entry with cs->cs_size==ULONG_MAX | 799 | * Really subtle: The last entry with cs->cs_size==ULONG_MAX |
800 | * has cs_{dma,}cachep==NULL. Thus no special case | 800 | * has cs_{dma,}cachep==NULL. Thus no special case |
801 | * for large kmalloc calls required. | 801 | * for large kmalloc calls required. |
802 | */ | 802 | */ |
803 | #ifdef CONFIG_ZONE_DMA | 803 | #ifdef CONFIG_ZONE_DMA |
804 | if (unlikely(gfpflags & GFP_DMA)) | 804 | if (unlikely(gfpflags & GFP_DMA)) |
805 | return csizep->cs_dmacachep; | 805 | return csizep->cs_dmacachep; |
806 | #endif | 806 | #endif |
807 | return csizep->cs_cachep; | 807 | return csizep->cs_cachep; |
808 | } | 808 | } |
809 | 809 | ||
810 | static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) | 810 | static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) |
811 | { | 811 | { |
812 | return __find_general_cachep(size, gfpflags); | 812 | return __find_general_cachep(size, gfpflags); |
813 | } | 813 | } |
814 | 814 | ||
815 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) | 815 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) |
816 | { | 816 | { |
817 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); | 817 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); |
818 | } | 818 | } |
819 | 819 | ||
820 | /* | 820 | /* |
821 | * Calculate the number of objects and left-over bytes for a given buffer size. | 821 | * Calculate the number of objects and left-over bytes for a given buffer size. |
822 | */ | 822 | */ |
823 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, | 823 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, |
824 | size_t align, int flags, size_t *left_over, | 824 | size_t align, int flags, size_t *left_over, |
825 | unsigned int *num) | 825 | unsigned int *num) |
826 | { | 826 | { |
827 | int nr_objs; | 827 | int nr_objs; |
828 | size_t mgmt_size; | 828 | size_t mgmt_size; |
829 | size_t slab_size = PAGE_SIZE << gfporder; | 829 | size_t slab_size = PAGE_SIZE << gfporder; |
830 | 830 | ||
831 | /* | 831 | /* |
832 | * The slab management structure can be either off the slab or | 832 | * The slab management structure can be either off the slab or |
833 | * on it. For the latter case, the memory allocated for a | 833 | * on it. For the latter case, the memory allocated for a |
834 | * slab is used for: | 834 | * slab is used for: |
835 | * | 835 | * |
836 | * - The struct slab | 836 | * - The struct slab |
837 | * - One kmem_bufctl_t for each object | 837 | * - One kmem_bufctl_t for each object |
838 | * - Padding to respect alignment of @align | 838 | * - Padding to respect alignment of @align |
839 | * - @buffer_size bytes for each object | 839 | * - @buffer_size bytes for each object |
840 | * | 840 | * |
841 | * If the slab management structure is off the slab, then the | 841 | * If the slab management structure is off the slab, then the |
842 | * alignment will already be calculated into the size. Because | 842 | * alignment will already be calculated into the size. Because |
843 | * the slabs are all pages aligned, the objects will be at the | 843 | * the slabs are all pages aligned, the objects will be at the |
844 | * correct alignment when allocated. | 844 | * correct alignment when allocated. |
845 | */ | 845 | */ |
846 | if (flags & CFLGS_OFF_SLAB) { | 846 | if (flags & CFLGS_OFF_SLAB) { |
847 | mgmt_size = 0; | 847 | mgmt_size = 0; |
848 | nr_objs = slab_size / buffer_size; | 848 | nr_objs = slab_size / buffer_size; |
849 | 849 | ||
850 | if (nr_objs > SLAB_LIMIT) | 850 | if (nr_objs > SLAB_LIMIT) |
851 | nr_objs = SLAB_LIMIT; | 851 | nr_objs = SLAB_LIMIT; |
852 | } else { | 852 | } else { |
853 | /* | 853 | /* |
854 | * Ignore padding for the initial guess. The padding | 854 | * Ignore padding for the initial guess. The padding |
855 | * is at most @align-1 bytes, and @buffer_size is at | 855 | * is at most @align-1 bytes, and @buffer_size is at |
856 | * least @align. In the worst case, this result will | 856 | * least @align. In the worst case, this result will |
857 | * be one greater than the number of objects that fit | 857 | * be one greater than the number of objects that fit |
858 | * into the memory allocation when taking the padding | 858 | * into the memory allocation when taking the padding |
859 | * into account. | 859 | * into account. |
860 | */ | 860 | */ |
861 | nr_objs = (slab_size - sizeof(struct slab)) / | 861 | nr_objs = (slab_size - sizeof(struct slab)) / |
862 | (buffer_size + sizeof(kmem_bufctl_t)); | 862 | (buffer_size + sizeof(kmem_bufctl_t)); |
863 | 863 | ||
864 | /* | 864 | /* |
865 | * This calculated number will be either the right | 865 | * This calculated number will be either the right |
866 | * amount, or one greater than what we want. | 866 | * amount, or one greater than what we want. |
867 | */ | 867 | */ |
868 | if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size | 868 | if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size |
869 | > slab_size) | 869 | > slab_size) |
870 | nr_objs--; | 870 | nr_objs--; |
871 | 871 | ||
872 | if (nr_objs > SLAB_LIMIT) | 872 | if (nr_objs > SLAB_LIMIT) |
873 | nr_objs = SLAB_LIMIT; | 873 | nr_objs = SLAB_LIMIT; |
874 | 874 | ||
875 | mgmt_size = slab_mgmt_size(nr_objs, align); | 875 | mgmt_size = slab_mgmt_size(nr_objs, align); |
876 | } | 876 | } |
877 | *num = nr_objs; | 877 | *num = nr_objs; |
878 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; | 878 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; |
879 | } | 879 | } |
880 | 880 | ||
881 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) | 881 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) |
882 | 882 | ||
883 | static void __slab_error(const char *function, struct kmem_cache *cachep, | 883 | static void __slab_error(const char *function, struct kmem_cache *cachep, |
884 | char *msg) | 884 | char *msg) |
885 | { | 885 | { |
886 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", | 886 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", |
887 | function, cachep->name, msg); | 887 | function, cachep->name, msg); |
888 | dump_stack(); | 888 | dump_stack(); |
889 | } | 889 | } |
890 | 890 | ||
891 | /* | 891 | /* |
892 | * By default on NUMA we use alien caches to stage the freeing of | 892 | * By default on NUMA we use alien caches to stage the freeing of |
893 | * objects allocated from other nodes. This causes massive memory | 893 | * objects allocated from other nodes. This causes massive memory |
894 | * inefficiencies when using fake NUMA setup to split memory into a | 894 | * inefficiencies when using fake NUMA setup to split memory into a |
895 | * large number of small nodes, so it can be disabled on the command | 895 | * large number of small nodes, so it can be disabled on the command |
896 | * line | 896 | * line |
897 | */ | 897 | */ |
898 | 898 | ||
899 | static int use_alien_caches __read_mostly = 1; | 899 | static int use_alien_caches __read_mostly = 1; |
900 | static int __init noaliencache_setup(char *s) | 900 | static int __init noaliencache_setup(char *s) |
901 | { | 901 | { |
902 | use_alien_caches = 0; | 902 | use_alien_caches = 0; |
903 | return 1; | 903 | return 1; |
904 | } | 904 | } |
905 | __setup("noaliencache", noaliencache_setup); | 905 | __setup("noaliencache", noaliencache_setup); |
906 | 906 | ||
907 | #ifdef CONFIG_NUMA | 907 | #ifdef CONFIG_NUMA |
908 | /* | 908 | /* |
909 | * Special reaping functions for NUMA systems called from cache_reap(). | 909 | * Special reaping functions for NUMA systems called from cache_reap(). |
910 | * These take care of doing round robin flushing of alien caches (containing | 910 | * These take care of doing round robin flushing of alien caches (containing |
911 | * objects freed on different nodes from which they were allocated) and the | 911 | * objects freed on different nodes from which they were allocated) and the |
912 | * flushing of remote pcps by calling drain_node_pages. | 912 | * flushing of remote pcps by calling drain_node_pages. |
913 | */ | 913 | */ |
914 | static DEFINE_PER_CPU(unsigned long, reap_node); | 914 | static DEFINE_PER_CPU(unsigned long, reap_node); |
915 | 915 | ||
916 | static void init_reap_node(int cpu) | 916 | static void init_reap_node(int cpu) |
917 | { | 917 | { |
918 | int node; | 918 | int node; |
919 | 919 | ||
920 | node = next_node(cpu_to_node(cpu), node_online_map); | 920 | node = next_node(cpu_to_node(cpu), node_online_map); |
921 | if (node == MAX_NUMNODES) | 921 | if (node == MAX_NUMNODES) |
922 | node = first_node(node_online_map); | 922 | node = first_node(node_online_map); |
923 | 923 | ||
924 | per_cpu(reap_node, cpu) = node; | 924 | per_cpu(reap_node, cpu) = node; |
925 | } | 925 | } |
926 | 926 | ||
927 | static void next_reap_node(void) | 927 | static void next_reap_node(void) |
928 | { | 928 | { |
929 | int node = __get_cpu_var(reap_node); | 929 | int node = __get_cpu_var(reap_node); |
930 | 930 | ||
931 | /* | 931 | /* |
932 | * Also drain per cpu pages on remote zones | 932 | * Also drain per cpu pages on remote zones |
933 | */ | 933 | */ |
934 | if (node != numa_node_id()) | 934 | if (node != numa_node_id()) |
935 | drain_node_pages(node); | 935 | drain_node_pages(node); |
936 | 936 | ||
937 | node = next_node(node, node_online_map); | 937 | node = next_node(node, node_online_map); |
938 | if (unlikely(node >= MAX_NUMNODES)) | 938 | if (unlikely(node >= MAX_NUMNODES)) |
939 | node = first_node(node_online_map); | 939 | node = first_node(node_online_map); |
940 | __get_cpu_var(reap_node) = node; | 940 | __get_cpu_var(reap_node) = node; |
941 | } | 941 | } |
942 | 942 | ||
943 | #else | 943 | #else |
944 | #define init_reap_node(cpu) do { } while (0) | 944 | #define init_reap_node(cpu) do { } while (0) |
945 | #define next_reap_node(void) do { } while (0) | 945 | #define next_reap_node(void) do { } while (0) |
946 | #endif | 946 | #endif |
947 | 947 | ||
948 | /* | 948 | /* |
949 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz | 949 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz |
950 | * via the workqueue/eventd. | 950 | * via the workqueue/eventd. |
951 | * Add the CPU number into the expiration time to minimize the possibility of | 951 | * Add the CPU number into the expiration time to minimize the possibility of |
952 | * the CPUs getting into lockstep and contending for the global cache chain | 952 | * the CPUs getting into lockstep and contending for the global cache chain |
953 | * lock. | 953 | * lock. |
954 | */ | 954 | */ |
955 | static void __devinit start_cpu_timer(int cpu) | 955 | static void __devinit start_cpu_timer(int cpu) |
956 | { | 956 | { |
957 | struct delayed_work *reap_work = &per_cpu(reap_work, cpu); | 957 | struct delayed_work *reap_work = &per_cpu(reap_work, cpu); |
958 | 958 | ||
959 | /* | 959 | /* |
960 | * When this gets called from do_initcalls via cpucache_init(), | 960 | * When this gets called from do_initcalls via cpucache_init(), |
961 | * init_workqueues() has already run, so keventd will be setup | 961 | * init_workqueues() has already run, so keventd will be setup |
962 | * at that time. | 962 | * at that time. |
963 | */ | 963 | */ |
964 | if (keventd_up() && reap_work->work.func == NULL) { | 964 | if (keventd_up() && reap_work->work.func == NULL) { |
965 | init_reap_node(cpu); | 965 | init_reap_node(cpu); |
966 | INIT_DELAYED_WORK(reap_work, cache_reap); | 966 | INIT_DELAYED_WORK(reap_work, cache_reap); |
967 | schedule_delayed_work_on(cpu, reap_work, | 967 | schedule_delayed_work_on(cpu, reap_work, |
968 | __round_jiffies_relative(HZ, cpu)); | 968 | __round_jiffies_relative(HZ, cpu)); |
969 | } | 969 | } |
970 | } | 970 | } |
971 | 971 | ||
972 | static struct array_cache *alloc_arraycache(int node, int entries, | 972 | static struct array_cache *alloc_arraycache(int node, int entries, |
973 | int batchcount) | 973 | int batchcount) |
974 | { | 974 | { |
975 | int memsize = sizeof(void *) * entries + sizeof(struct array_cache); | 975 | int memsize = sizeof(void *) * entries + sizeof(struct array_cache); |
976 | struct array_cache *nc = NULL; | 976 | struct array_cache *nc = NULL; |
977 | 977 | ||
978 | nc = kmalloc_node(memsize, GFP_KERNEL, node); | 978 | nc = kmalloc_node(memsize, GFP_KERNEL, node); |
979 | if (nc) { | 979 | if (nc) { |
980 | nc->avail = 0; | 980 | nc->avail = 0; |
981 | nc->limit = entries; | 981 | nc->limit = entries; |
982 | nc->batchcount = batchcount; | 982 | nc->batchcount = batchcount; |
983 | nc->touched = 0; | 983 | nc->touched = 0; |
984 | spin_lock_init(&nc->lock); | 984 | spin_lock_init(&nc->lock); |
985 | } | 985 | } |
986 | return nc; | 986 | return nc; |
987 | } | 987 | } |
988 | 988 | ||
989 | /* | 989 | /* |
990 | * Transfer objects in one arraycache to another. | 990 | * Transfer objects in one arraycache to another. |
991 | * Locking must be handled by the caller. | 991 | * Locking must be handled by the caller. |
992 | * | 992 | * |
993 | * Return the number of entries transferred. | 993 | * Return the number of entries transferred. |
994 | */ | 994 | */ |
995 | static int transfer_objects(struct array_cache *to, | 995 | static int transfer_objects(struct array_cache *to, |
996 | struct array_cache *from, unsigned int max) | 996 | struct array_cache *from, unsigned int max) |
997 | { | 997 | { |
998 | /* Figure out how many entries to transfer */ | 998 | /* Figure out how many entries to transfer */ |
999 | int nr = min(min(from->avail, max), to->limit - to->avail); | 999 | int nr = min(min(from->avail, max), to->limit - to->avail); |
1000 | 1000 | ||
1001 | if (!nr) | 1001 | if (!nr) |
1002 | return 0; | 1002 | return 0; |
1003 | 1003 | ||
1004 | memcpy(to->entry + to->avail, from->entry + from->avail -nr, | 1004 | memcpy(to->entry + to->avail, from->entry + from->avail -nr, |
1005 | sizeof(void *) *nr); | 1005 | sizeof(void *) *nr); |
1006 | 1006 | ||
1007 | from->avail -= nr; | 1007 | from->avail -= nr; |
1008 | to->avail += nr; | 1008 | to->avail += nr; |
1009 | to->touched = 1; | 1009 | to->touched = 1; |
1010 | return nr; | 1010 | return nr; |
1011 | } | 1011 | } |
1012 | 1012 | ||
1013 | #ifndef CONFIG_NUMA | 1013 | #ifndef CONFIG_NUMA |
1014 | 1014 | ||
1015 | #define drain_alien_cache(cachep, alien) do { } while (0) | 1015 | #define drain_alien_cache(cachep, alien) do { } while (0) |
1016 | #define reap_alien(cachep, l3) do { } while (0) | 1016 | #define reap_alien(cachep, l3) do { } while (0) |
1017 | 1017 | ||
1018 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | 1018 | static inline struct array_cache **alloc_alien_cache(int node, int limit) |
1019 | { | 1019 | { |
1020 | return (struct array_cache **)BAD_ALIEN_MAGIC; | 1020 | return (struct array_cache **)BAD_ALIEN_MAGIC; |
1021 | } | 1021 | } |
1022 | 1022 | ||
1023 | static inline void free_alien_cache(struct array_cache **ac_ptr) | 1023 | static inline void free_alien_cache(struct array_cache **ac_ptr) |
1024 | { | 1024 | { |
1025 | } | 1025 | } |
1026 | 1026 | ||
1027 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | 1027 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
1028 | { | 1028 | { |
1029 | return 0; | 1029 | return 0; |
1030 | } | 1030 | } |
1031 | 1031 | ||
1032 | static inline void *alternate_node_alloc(struct kmem_cache *cachep, | 1032 | static inline void *alternate_node_alloc(struct kmem_cache *cachep, |
1033 | gfp_t flags) | 1033 | gfp_t flags) |
1034 | { | 1034 | { |
1035 | return NULL; | 1035 | return NULL; |
1036 | } | 1036 | } |
1037 | 1037 | ||
1038 | static inline void *____cache_alloc_node(struct kmem_cache *cachep, | 1038 | static inline void *____cache_alloc_node(struct kmem_cache *cachep, |
1039 | gfp_t flags, int nodeid) | 1039 | gfp_t flags, int nodeid) |
1040 | { | 1040 | { |
1041 | return NULL; | 1041 | return NULL; |
1042 | } | 1042 | } |
1043 | 1043 | ||
1044 | #else /* CONFIG_NUMA */ | 1044 | #else /* CONFIG_NUMA */ |
1045 | 1045 | ||
1046 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); | 1046 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
1047 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | 1047 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); |
1048 | 1048 | ||
1049 | static struct array_cache **alloc_alien_cache(int node, int limit) | 1049 | static struct array_cache **alloc_alien_cache(int node, int limit) |
1050 | { | 1050 | { |
1051 | struct array_cache **ac_ptr; | 1051 | struct array_cache **ac_ptr; |
1052 | int memsize = sizeof(void *) * nr_node_ids; | 1052 | int memsize = sizeof(void *) * nr_node_ids; |
1053 | int i; | 1053 | int i; |
1054 | 1054 | ||
1055 | if (limit > 1) | 1055 | if (limit > 1) |
1056 | limit = 12; | 1056 | limit = 12; |
1057 | ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); | 1057 | ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); |
1058 | if (ac_ptr) { | 1058 | if (ac_ptr) { |
1059 | for_each_node(i) { | 1059 | for_each_node(i) { |
1060 | if (i == node || !node_online(i)) { | 1060 | if (i == node || !node_online(i)) { |
1061 | ac_ptr[i] = NULL; | 1061 | ac_ptr[i] = NULL; |
1062 | continue; | 1062 | continue; |
1063 | } | 1063 | } |
1064 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); | 1064 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); |
1065 | if (!ac_ptr[i]) { | 1065 | if (!ac_ptr[i]) { |
1066 | for (i--; i <= 0; i--) | 1066 | for (i--; i <= 0; i--) |
1067 | kfree(ac_ptr[i]); | 1067 | kfree(ac_ptr[i]); |
1068 | kfree(ac_ptr); | 1068 | kfree(ac_ptr); |
1069 | return NULL; | 1069 | return NULL; |
1070 | } | 1070 | } |
1071 | } | 1071 | } |
1072 | } | 1072 | } |
1073 | return ac_ptr; | 1073 | return ac_ptr; |
1074 | } | 1074 | } |
1075 | 1075 | ||
1076 | static void free_alien_cache(struct array_cache **ac_ptr) | 1076 | static void free_alien_cache(struct array_cache **ac_ptr) |
1077 | { | 1077 | { |
1078 | int i; | 1078 | int i; |
1079 | 1079 | ||
1080 | if (!ac_ptr) | 1080 | if (!ac_ptr) |
1081 | return; | 1081 | return; |
1082 | for_each_node(i) | 1082 | for_each_node(i) |
1083 | kfree(ac_ptr[i]); | 1083 | kfree(ac_ptr[i]); |
1084 | kfree(ac_ptr); | 1084 | kfree(ac_ptr); |
1085 | } | 1085 | } |
1086 | 1086 | ||
1087 | static void __drain_alien_cache(struct kmem_cache *cachep, | 1087 | static void __drain_alien_cache(struct kmem_cache *cachep, |
1088 | struct array_cache *ac, int node) | 1088 | struct array_cache *ac, int node) |
1089 | { | 1089 | { |
1090 | struct kmem_list3 *rl3 = cachep->nodelists[node]; | 1090 | struct kmem_list3 *rl3 = cachep->nodelists[node]; |
1091 | 1091 | ||
1092 | if (ac->avail) { | 1092 | if (ac->avail) { |
1093 | spin_lock(&rl3->list_lock); | 1093 | spin_lock(&rl3->list_lock); |
1094 | /* | 1094 | /* |
1095 | * Stuff objects into the remote nodes shared array first. | 1095 | * Stuff objects into the remote nodes shared array first. |
1096 | * That way we could avoid the overhead of putting the objects | 1096 | * That way we could avoid the overhead of putting the objects |
1097 | * into the free lists and getting them back later. | 1097 | * into the free lists and getting them back later. |
1098 | */ | 1098 | */ |
1099 | if (rl3->shared) | 1099 | if (rl3->shared) |
1100 | transfer_objects(rl3->shared, ac, ac->limit); | 1100 | transfer_objects(rl3->shared, ac, ac->limit); |
1101 | 1101 | ||
1102 | free_block(cachep, ac->entry, ac->avail, node); | 1102 | free_block(cachep, ac->entry, ac->avail, node); |
1103 | ac->avail = 0; | 1103 | ac->avail = 0; |
1104 | spin_unlock(&rl3->list_lock); | 1104 | spin_unlock(&rl3->list_lock); |
1105 | } | 1105 | } |
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | /* | 1108 | /* |
1109 | * Called from cache_reap() to regularly drain alien caches round robin. | 1109 | * Called from cache_reap() to regularly drain alien caches round robin. |
1110 | */ | 1110 | */ |
1111 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | 1111 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) |
1112 | { | 1112 | { |
1113 | int node = __get_cpu_var(reap_node); | 1113 | int node = __get_cpu_var(reap_node); |
1114 | 1114 | ||
1115 | if (l3->alien) { | 1115 | if (l3->alien) { |
1116 | struct array_cache *ac = l3->alien[node]; | 1116 | struct array_cache *ac = l3->alien[node]; |
1117 | 1117 | ||
1118 | if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { | 1118 | if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { |
1119 | __drain_alien_cache(cachep, ac, node); | 1119 | __drain_alien_cache(cachep, ac, node); |
1120 | spin_unlock_irq(&ac->lock); | 1120 | spin_unlock_irq(&ac->lock); |
1121 | } | 1121 | } |
1122 | } | 1122 | } |
1123 | } | 1123 | } |
1124 | 1124 | ||
1125 | static void drain_alien_cache(struct kmem_cache *cachep, | 1125 | static void drain_alien_cache(struct kmem_cache *cachep, |
1126 | struct array_cache **alien) | 1126 | struct array_cache **alien) |
1127 | { | 1127 | { |
1128 | int i = 0; | 1128 | int i = 0; |
1129 | struct array_cache *ac; | 1129 | struct array_cache *ac; |
1130 | unsigned long flags; | 1130 | unsigned long flags; |
1131 | 1131 | ||
1132 | for_each_online_node(i) { | 1132 | for_each_online_node(i) { |
1133 | ac = alien[i]; | 1133 | ac = alien[i]; |
1134 | if (ac) { | 1134 | if (ac) { |
1135 | spin_lock_irqsave(&ac->lock, flags); | 1135 | spin_lock_irqsave(&ac->lock, flags); |
1136 | __drain_alien_cache(cachep, ac, i); | 1136 | __drain_alien_cache(cachep, ac, i); |
1137 | spin_unlock_irqrestore(&ac->lock, flags); | 1137 | spin_unlock_irqrestore(&ac->lock, flags); |
1138 | } | 1138 | } |
1139 | } | 1139 | } |
1140 | } | 1140 | } |
1141 | 1141 | ||
1142 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | 1142 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
1143 | { | 1143 | { |
1144 | struct slab *slabp = virt_to_slab(objp); | 1144 | struct slab *slabp = virt_to_slab(objp); |
1145 | int nodeid = slabp->nodeid; | 1145 | int nodeid = slabp->nodeid; |
1146 | struct kmem_list3 *l3; | 1146 | struct kmem_list3 *l3; |
1147 | struct array_cache *alien = NULL; | 1147 | struct array_cache *alien = NULL; |
1148 | int node; | 1148 | int node; |
1149 | 1149 | ||
1150 | node = numa_node_id(); | 1150 | node = numa_node_id(); |
1151 | 1151 | ||
1152 | /* | 1152 | /* |
1153 | * Make sure we are not freeing a object from another node to the array | 1153 | * Make sure we are not freeing a object from another node to the array |
1154 | * cache on this cpu. | 1154 | * cache on this cpu. |
1155 | */ | 1155 | */ |
1156 | if (likely(slabp->nodeid == node)) | 1156 | if (likely(slabp->nodeid == node)) |
1157 | return 0; | 1157 | return 0; |
1158 | 1158 | ||
1159 | l3 = cachep->nodelists[node]; | 1159 | l3 = cachep->nodelists[node]; |
1160 | STATS_INC_NODEFREES(cachep); | 1160 | STATS_INC_NODEFREES(cachep); |
1161 | if (l3->alien && l3->alien[nodeid]) { | 1161 | if (l3->alien && l3->alien[nodeid]) { |
1162 | alien = l3->alien[nodeid]; | 1162 | alien = l3->alien[nodeid]; |
1163 | spin_lock(&alien->lock); | 1163 | spin_lock(&alien->lock); |
1164 | if (unlikely(alien->avail == alien->limit)) { | 1164 | if (unlikely(alien->avail == alien->limit)) { |
1165 | STATS_INC_ACOVERFLOW(cachep); | 1165 | STATS_INC_ACOVERFLOW(cachep); |
1166 | __drain_alien_cache(cachep, alien, nodeid); | 1166 | __drain_alien_cache(cachep, alien, nodeid); |
1167 | } | 1167 | } |
1168 | alien->entry[alien->avail++] = objp; | 1168 | alien->entry[alien->avail++] = objp; |
1169 | spin_unlock(&alien->lock); | 1169 | spin_unlock(&alien->lock); |
1170 | } else { | 1170 | } else { |
1171 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); | 1171 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); |
1172 | free_block(cachep, &objp, 1, nodeid); | 1172 | free_block(cachep, &objp, 1, nodeid); |
1173 | spin_unlock(&(cachep->nodelists[nodeid])->list_lock); | 1173 | spin_unlock(&(cachep->nodelists[nodeid])->list_lock); |
1174 | } | 1174 | } |
1175 | return 1; | 1175 | return 1; |
1176 | } | 1176 | } |
1177 | #endif | 1177 | #endif |
1178 | 1178 | ||
1179 | static int __cpuinit cpuup_callback(struct notifier_block *nfb, | 1179 | static int __cpuinit cpuup_callback(struct notifier_block *nfb, |
1180 | unsigned long action, void *hcpu) | 1180 | unsigned long action, void *hcpu) |
1181 | { | 1181 | { |
1182 | long cpu = (long)hcpu; | 1182 | long cpu = (long)hcpu; |
1183 | struct kmem_cache *cachep; | 1183 | struct kmem_cache *cachep; |
1184 | struct kmem_list3 *l3 = NULL; | 1184 | struct kmem_list3 *l3 = NULL; |
1185 | int node = cpu_to_node(cpu); | 1185 | int node = cpu_to_node(cpu); |
1186 | int memsize = sizeof(struct kmem_list3); | 1186 | int memsize = sizeof(struct kmem_list3); |
1187 | 1187 | ||
1188 | switch (action) { | 1188 | switch (action) { |
1189 | case CPU_UP_PREPARE: | 1189 | case CPU_UP_PREPARE: |
1190 | mutex_lock(&cache_chain_mutex); | 1190 | mutex_lock(&cache_chain_mutex); |
1191 | /* | 1191 | /* |
1192 | * We need to do this right in the beginning since | 1192 | * We need to do this right in the beginning since |
1193 | * alloc_arraycache's are going to use this list. | 1193 | * alloc_arraycache's are going to use this list. |
1194 | * kmalloc_node allows us to add the slab to the right | 1194 | * kmalloc_node allows us to add the slab to the right |
1195 | * kmem_list3 and not this cpu's kmem_list3 | 1195 | * kmem_list3 and not this cpu's kmem_list3 |
1196 | */ | 1196 | */ |
1197 | 1197 | ||
1198 | list_for_each_entry(cachep, &cache_chain, next) { | 1198 | list_for_each_entry(cachep, &cache_chain, next) { |
1199 | /* | 1199 | /* |
1200 | * Set up the size64 kmemlist for cpu before we can | 1200 | * Set up the size64 kmemlist for cpu before we can |
1201 | * begin anything. Make sure some other cpu on this | 1201 | * begin anything. Make sure some other cpu on this |
1202 | * node has not already allocated this | 1202 | * node has not already allocated this |
1203 | */ | 1203 | */ |
1204 | if (!cachep->nodelists[node]) { | 1204 | if (!cachep->nodelists[node]) { |
1205 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); | 1205 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); |
1206 | if (!l3) | 1206 | if (!l3) |
1207 | goto bad; | 1207 | goto bad; |
1208 | kmem_list3_init(l3); | 1208 | kmem_list3_init(l3); |
1209 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 1209 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
1210 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 1210 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
1211 | 1211 | ||
1212 | /* | 1212 | /* |
1213 | * The l3s don't come and go as CPUs come and | 1213 | * The l3s don't come and go as CPUs come and |
1214 | * go. cache_chain_mutex is sufficient | 1214 | * go. cache_chain_mutex is sufficient |
1215 | * protection here. | 1215 | * protection here. |
1216 | */ | 1216 | */ |
1217 | cachep->nodelists[node] = l3; | 1217 | cachep->nodelists[node] = l3; |
1218 | } | 1218 | } |
1219 | 1219 | ||
1220 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | 1220 | spin_lock_irq(&cachep->nodelists[node]->list_lock); |
1221 | cachep->nodelists[node]->free_limit = | 1221 | cachep->nodelists[node]->free_limit = |
1222 | (1 + nr_cpus_node(node)) * | 1222 | (1 + nr_cpus_node(node)) * |
1223 | cachep->batchcount + cachep->num; | 1223 | cachep->batchcount + cachep->num; |
1224 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | 1224 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); |
1225 | } | 1225 | } |
1226 | 1226 | ||
1227 | /* | 1227 | /* |
1228 | * Now we can go ahead with allocating the shared arrays and | 1228 | * Now we can go ahead with allocating the shared arrays and |
1229 | * array caches | 1229 | * array caches |
1230 | */ | 1230 | */ |
1231 | list_for_each_entry(cachep, &cache_chain, next) { | 1231 | list_for_each_entry(cachep, &cache_chain, next) { |
1232 | struct array_cache *nc; | 1232 | struct array_cache *nc; |
1233 | struct array_cache *shared = NULL; | 1233 | struct array_cache *shared = NULL; |
1234 | struct array_cache **alien = NULL; | 1234 | struct array_cache **alien = NULL; |
1235 | 1235 | ||
1236 | nc = alloc_arraycache(node, cachep->limit, | 1236 | nc = alloc_arraycache(node, cachep->limit, |
1237 | cachep->batchcount); | 1237 | cachep->batchcount); |
1238 | if (!nc) | 1238 | if (!nc) |
1239 | goto bad; | 1239 | goto bad; |
1240 | if (cachep->shared) { | 1240 | if (cachep->shared) { |
1241 | shared = alloc_arraycache(node, | 1241 | shared = alloc_arraycache(node, |
1242 | cachep->shared * cachep->batchcount, | 1242 | cachep->shared * cachep->batchcount, |
1243 | 0xbaadf00d); | 1243 | 0xbaadf00d); |
1244 | if (!shared) | 1244 | if (!shared) |
1245 | goto bad; | 1245 | goto bad; |
1246 | } | 1246 | } |
1247 | if (use_alien_caches) { | 1247 | if (use_alien_caches) { |
1248 | alien = alloc_alien_cache(node, cachep->limit); | 1248 | alien = alloc_alien_cache(node, cachep->limit); |
1249 | if (!alien) | 1249 | if (!alien) |
1250 | goto bad; | 1250 | goto bad; |
1251 | } | 1251 | } |
1252 | cachep->array[cpu] = nc; | 1252 | cachep->array[cpu] = nc; |
1253 | l3 = cachep->nodelists[node]; | 1253 | l3 = cachep->nodelists[node]; |
1254 | BUG_ON(!l3); | 1254 | BUG_ON(!l3); |
1255 | 1255 | ||
1256 | spin_lock_irq(&l3->list_lock); | 1256 | spin_lock_irq(&l3->list_lock); |
1257 | if (!l3->shared) { | 1257 | if (!l3->shared) { |
1258 | /* | 1258 | /* |
1259 | * We are serialised from CPU_DEAD or | 1259 | * We are serialised from CPU_DEAD or |
1260 | * CPU_UP_CANCELLED by the cpucontrol lock | 1260 | * CPU_UP_CANCELLED by the cpucontrol lock |
1261 | */ | 1261 | */ |
1262 | l3->shared = shared; | 1262 | l3->shared = shared; |
1263 | shared = NULL; | 1263 | shared = NULL; |
1264 | } | 1264 | } |
1265 | #ifdef CONFIG_NUMA | 1265 | #ifdef CONFIG_NUMA |
1266 | if (!l3->alien) { | 1266 | if (!l3->alien) { |
1267 | l3->alien = alien; | 1267 | l3->alien = alien; |
1268 | alien = NULL; | 1268 | alien = NULL; |
1269 | } | 1269 | } |
1270 | #endif | 1270 | #endif |
1271 | spin_unlock_irq(&l3->list_lock); | 1271 | spin_unlock_irq(&l3->list_lock); |
1272 | kfree(shared); | 1272 | kfree(shared); |
1273 | free_alien_cache(alien); | 1273 | free_alien_cache(alien); |
1274 | } | 1274 | } |
1275 | break; | 1275 | break; |
1276 | case CPU_ONLINE: | 1276 | case CPU_ONLINE: |
1277 | mutex_unlock(&cache_chain_mutex); | 1277 | mutex_unlock(&cache_chain_mutex); |
1278 | start_cpu_timer(cpu); | 1278 | start_cpu_timer(cpu); |
1279 | break; | 1279 | break; |
1280 | #ifdef CONFIG_HOTPLUG_CPU | 1280 | #ifdef CONFIG_HOTPLUG_CPU |
1281 | case CPU_DOWN_PREPARE: | 1281 | case CPU_DOWN_PREPARE: |
1282 | mutex_lock(&cache_chain_mutex); | 1282 | mutex_lock(&cache_chain_mutex); |
1283 | break; | 1283 | break; |
1284 | case CPU_DOWN_FAILED: | 1284 | case CPU_DOWN_FAILED: |
1285 | mutex_unlock(&cache_chain_mutex); | 1285 | mutex_unlock(&cache_chain_mutex); |
1286 | break; | 1286 | break; |
1287 | case CPU_DEAD: | 1287 | case CPU_DEAD: |
1288 | /* | 1288 | /* |
1289 | * Even if all the cpus of a node are down, we don't free the | 1289 | * Even if all the cpus of a node are down, we don't free the |
1290 | * kmem_list3 of any cache. This to avoid a race between | 1290 | * kmem_list3 of any cache. This to avoid a race between |
1291 | * cpu_down, and a kmalloc allocation from another cpu for | 1291 | * cpu_down, and a kmalloc allocation from another cpu for |
1292 | * memory from the node of the cpu going down. The list3 | 1292 | * memory from the node of the cpu going down. The list3 |
1293 | * structure is usually allocated from kmem_cache_create() and | 1293 | * structure is usually allocated from kmem_cache_create() and |
1294 | * gets destroyed at kmem_cache_destroy(). | 1294 | * gets destroyed at kmem_cache_destroy(). |
1295 | */ | 1295 | */ |
1296 | /* fall thru */ | 1296 | /* fall thru */ |
1297 | #endif | 1297 | #endif |
1298 | case CPU_UP_CANCELED: | 1298 | case CPU_UP_CANCELED: |
1299 | list_for_each_entry(cachep, &cache_chain, next) { | 1299 | list_for_each_entry(cachep, &cache_chain, next) { |
1300 | struct array_cache *nc; | 1300 | struct array_cache *nc; |
1301 | struct array_cache *shared; | 1301 | struct array_cache *shared; |
1302 | struct array_cache **alien; | 1302 | struct array_cache **alien; |
1303 | cpumask_t mask; | 1303 | cpumask_t mask; |
1304 | 1304 | ||
1305 | mask = node_to_cpumask(node); | 1305 | mask = node_to_cpumask(node); |
1306 | /* cpu is dead; no one can alloc from it. */ | 1306 | /* cpu is dead; no one can alloc from it. */ |
1307 | nc = cachep->array[cpu]; | 1307 | nc = cachep->array[cpu]; |
1308 | cachep->array[cpu] = NULL; | 1308 | cachep->array[cpu] = NULL; |
1309 | l3 = cachep->nodelists[node]; | 1309 | l3 = cachep->nodelists[node]; |
1310 | 1310 | ||
1311 | if (!l3) | 1311 | if (!l3) |
1312 | goto free_array_cache; | 1312 | goto free_array_cache; |
1313 | 1313 | ||
1314 | spin_lock_irq(&l3->list_lock); | 1314 | spin_lock_irq(&l3->list_lock); |
1315 | 1315 | ||
1316 | /* Free limit for this kmem_list3 */ | 1316 | /* Free limit for this kmem_list3 */ |
1317 | l3->free_limit -= cachep->batchcount; | 1317 | l3->free_limit -= cachep->batchcount; |
1318 | if (nc) | 1318 | if (nc) |
1319 | free_block(cachep, nc->entry, nc->avail, node); | 1319 | free_block(cachep, nc->entry, nc->avail, node); |
1320 | 1320 | ||
1321 | if (!cpus_empty(mask)) { | 1321 | if (!cpus_empty(mask)) { |
1322 | spin_unlock_irq(&l3->list_lock); | 1322 | spin_unlock_irq(&l3->list_lock); |
1323 | goto free_array_cache; | 1323 | goto free_array_cache; |
1324 | } | 1324 | } |
1325 | 1325 | ||
1326 | shared = l3->shared; | 1326 | shared = l3->shared; |
1327 | if (shared) { | 1327 | if (shared) { |
1328 | free_block(cachep, shared->entry, | 1328 | free_block(cachep, shared->entry, |
1329 | shared->avail, node); | 1329 | shared->avail, node); |
1330 | l3->shared = NULL; | 1330 | l3->shared = NULL; |
1331 | } | 1331 | } |
1332 | 1332 | ||
1333 | alien = l3->alien; | 1333 | alien = l3->alien; |
1334 | l3->alien = NULL; | 1334 | l3->alien = NULL; |
1335 | 1335 | ||
1336 | spin_unlock_irq(&l3->list_lock); | 1336 | spin_unlock_irq(&l3->list_lock); |
1337 | 1337 | ||
1338 | kfree(shared); | 1338 | kfree(shared); |
1339 | if (alien) { | 1339 | if (alien) { |
1340 | drain_alien_cache(cachep, alien); | 1340 | drain_alien_cache(cachep, alien); |
1341 | free_alien_cache(alien); | 1341 | free_alien_cache(alien); |
1342 | } | 1342 | } |
1343 | free_array_cache: | 1343 | free_array_cache: |
1344 | kfree(nc); | 1344 | kfree(nc); |
1345 | } | 1345 | } |
1346 | /* | 1346 | /* |
1347 | * In the previous loop, all the objects were freed to | 1347 | * In the previous loop, all the objects were freed to |
1348 | * the respective cache's slabs, now we can go ahead and | 1348 | * the respective cache's slabs, now we can go ahead and |
1349 | * shrink each nodelist to its limit. | 1349 | * shrink each nodelist to its limit. |
1350 | */ | 1350 | */ |
1351 | list_for_each_entry(cachep, &cache_chain, next) { | 1351 | list_for_each_entry(cachep, &cache_chain, next) { |
1352 | l3 = cachep->nodelists[node]; | 1352 | l3 = cachep->nodelists[node]; |
1353 | if (!l3) | 1353 | if (!l3) |
1354 | continue; | 1354 | continue; |
1355 | drain_freelist(cachep, l3, l3->free_objects); | 1355 | drain_freelist(cachep, l3, l3->free_objects); |
1356 | } | 1356 | } |
1357 | mutex_unlock(&cache_chain_mutex); | 1357 | mutex_unlock(&cache_chain_mutex); |
1358 | break; | 1358 | break; |
1359 | } | 1359 | } |
1360 | return NOTIFY_OK; | 1360 | return NOTIFY_OK; |
1361 | bad: | 1361 | bad: |
1362 | return NOTIFY_BAD; | 1362 | return NOTIFY_BAD; |
1363 | } | 1363 | } |
1364 | 1364 | ||
1365 | static struct notifier_block __cpuinitdata cpucache_notifier = { | 1365 | static struct notifier_block __cpuinitdata cpucache_notifier = { |
1366 | &cpuup_callback, NULL, 0 | 1366 | &cpuup_callback, NULL, 0 |
1367 | }; | 1367 | }; |
1368 | 1368 | ||
1369 | /* | 1369 | /* |
1370 | * swap the static kmem_list3 with kmalloced memory | 1370 | * swap the static kmem_list3 with kmalloced memory |
1371 | */ | 1371 | */ |
1372 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | 1372 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
1373 | int nodeid) | 1373 | int nodeid) |
1374 | { | 1374 | { |
1375 | struct kmem_list3 *ptr; | 1375 | struct kmem_list3 *ptr; |
1376 | 1376 | ||
1377 | ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); | 1377 | ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); |
1378 | BUG_ON(!ptr); | 1378 | BUG_ON(!ptr); |
1379 | 1379 | ||
1380 | local_irq_disable(); | 1380 | local_irq_disable(); |
1381 | memcpy(ptr, list, sizeof(struct kmem_list3)); | 1381 | memcpy(ptr, list, sizeof(struct kmem_list3)); |
1382 | /* | 1382 | /* |
1383 | * Do not assume that spinlocks can be initialized via memcpy: | 1383 | * Do not assume that spinlocks can be initialized via memcpy: |
1384 | */ | 1384 | */ |
1385 | spin_lock_init(&ptr->list_lock); | 1385 | spin_lock_init(&ptr->list_lock); |
1386 | 1386 | ||
1387 | MAKE_ALL_LISTS(cachep, ptr, nodeid); | 1387 | MAKE_ALL_LISTS(cachep, ptr, nodeid); |
1388 | cachep->nodelists[nodeid] = ptr; | 1388 | cachep->nodelists[nodeid] = ptr; |
1389 | local_irq_enable(); | 1389 | local_irq_enable(); |
1390 | } | 1390 | } |
1391 | 1391 | ||
1392 | /* | 1392 | /* |
1393 | * Initialisation. Called after the page allocator have been initialised and | 1393 | * Initialisation. Called after the page allocator have been initialised and |
1394 | * before smp_init(). | 1394 | * before smp_init(). |
1395 | */ | 1395 | */ |
1396 | void __init kmem_cache_init(void) | 1396 | void __init kmem_cache_init(void) |
1397 | { | 1397 | { |
1398 | size_t left_over; | 1398 | size_t left_over; |
1399 | struct cache_sizes *sizes; | 1399 | struct cache_sizes *sizes; |
1400 | struct cache_names *names; | 1400 | struct cache_names *names; |
1401 | int i; | 1401 | int i; |
1402 | int order; | 1402 | int order; |
1403 | int node; | 1403 | int node; |
1404 | 1404 | ||
1405 | if (num_possible_nodes() == 1) | 1405 | if (num_possible_nodes() == 1) |
1406 | use_alien_caches = 0; | 1406 | use_alien_caches = 0; |
1407 | 1407 | ||
1408 | for (i = 0; i < NUM_INIT_LISTS; i++) { | 1408 | for (i = 0; i < NUM_INIT_LISTS; i++) { |
1409 | kmem_list3_init(&initkmem_list3[i]); | 1409 | kmem_list3_init(&initkmem_list3[i]); |
1410 | if (i < MAX_NUMNODES) | 1410 | if (i < MAX_NUMNODES) |
1411 | cache_cache.nodelists[i] = NULL; | 1411 | cache_cache.nodelists[i] = NULL; |
1412 | } | 1412 | } |
1413 | 1413 | ||
1414 | /* | 1414 | /* |
1415 | * Fragmentation resistance on low memory - only use bigger | 1415 | * Fragmentation resistance on low memory - only use bigger |
1416 | * page orders on machines with more than 32MB of memory. | 1416 | * page orders on machines with more than 32MB of memory. |
1417 | */ | 1417 | */ |
1418 | if (num_physpages > (32 << 20) >> PAGE_SHIFT) | 1418 | if (num_physpages > (32 << 20) >> PAGE_SHIFT) |
1419 | slab_break_gfp_order = BREAK_GFP_ORDER_HI; | 1419 | slab_break_gfp_order = BREAK_GFP_ORDER_HI; |
1420 | 1420 | ||
1421 | /* Bootstrap is tricky, because several objects are allocated | 1421 | /* Bootstrap is tricky, because several objects are allocated |
1422 | * from caches that do not exist yet: | 1422 | * from caches that do not exist yet: |
1423 | * 1) initialize the cache_cache cache: it contains the struct | 1423 | * 1) initialize the cache_cache cache: it contains the struct |
1424 | * kmem_cache structures of all caches, except cache_cache itself: | 1424 | * kmem_cache structures of all caches, except cache_cache itself: |
1425 | * cache_cache is statically allocated. | 1425 | * cache_cache is statically allocated. |
1426 | * Initially an __init data area is used for the head array and the | 1426 | * Initially an __init data area is used for the head array and the |
1427 | * kmem_list3 structures, it's replaced with a kmalloc allocated | 1427 | * kmem_list3 structures, it's replaced with a kmalloc allocated |
1428 | * array at the end of the bootstrap. | 1428 | * array at the end of the bootstrap. |
1429 | * 2) Create the first kmalloc cache. | 1429 | * 2) Create the first kmalloc cache. |
1430 | * The struct kmem_cache for the new cache is allocated normally. | 1430 | * The struct kmem_cache for the new cache is allocated normally. |
1431 | * An __init data area is used for the head array. | 1431 | * An __init data area is used for the head array. |
1432 | * 3) Create the remaining kmalloc caches, with minimally sized | 1432 | * 3) Create the remaining kmalloc caches, with minimally sized |
1433 | * head arrays. | 1433 | * head arrays. |
1434 | * 4) Replace the __init data head arrays for cache_cache and the first | 1434 | * 4) Replace the __init data head arrays for cache_cache and the first |
1435 | * kmalloc cache with kmalloc allocated arrays. | 1435 | * kmalloc cache with kmalloc allocated arrays. |
1436 | * 5) Replace the __init data for kmem_list3 for cache_cache and | 1436 | * 5) Replace the __init data for kmem_list3 for cache_cache and |
1437 | * the other cache's with kmalloc allocated memory. | 1437 | * the other cache's with kmalloc allocated memory. |
1438 | * 6) Resize the head arrays of the kmalloc caches to their final sizes. | 1438 | * 6) Resize the head arrays of the kmalloc caches to their final sizes. |
1439 | */ | 1439 | */ |
1440 | 1440 | ||
1441 | node = numa_node_id(); | 1441 | node = numa_node_id(); |
1442 | 1442 | ||
1443 | /* 1) create the cache_cache */ | 1443 | /* 1) create the cache_cache */ |
1444 | INIT_LIST_HEAD(&cache_chain); | 1444 | INIT_LIST_HEAD(&cache_chain); |
1445 | list_add(&cache_cache.next, &cache_chain); | 1445 | list_add(&cache_cache.next, &cache_chain); |
1446 | cache_cache.colour_off = cache_line_size(); | 1446 | cache_cache.colour_off = cache_line_size(); |
1447 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; | 1447 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; |
1448 | cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; | 1448 | cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; |
1449 | 1449 | ||
1450 | /* | 1450 | /* |
1451 | * struct kmem_cache size depends on nr_node_ids, which | 1451 | * struct kmem_cache size depends on nr_node_ids, which |
1452 | * can be less than MAX_NUMNODES. | 1452 | * can be less than MAX_NUMNODES. |
1453 | */ | 1453 | */ |
1454 | cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + | 1454 | cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + |
1455 | nr_node_ids * sizeof(struct kmem_list3 *); | 1455 | nr_node_ids * sizeof(struct kmem_list3 *); |
1456 | #if DEBUG | 1456 | #if DEBUG |
1457 | cache_cache.obj_size = cache_cache.buffer_size; | 1457 | cache_cache.obj_size = cache_cache.buffer_size; |
1458 | #endif | 1458 | #endif |
1459 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, | 1459 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, |
1460 | cache_line_size()); | 1460 | cache_line_size()); |
1461 | cache_cache.reciprocal_buffer_size = | 1461 | cache_cache.reciprocal_buffer_size = |
1462 | reciprocal_value(cache_cache.buffer_size); | 1462 | reciprocal_value(cache_cache.buffer_size); |
1463 | 1463 | ||
1464 | for (order = 0; order < MAX_ORDER; order++) { | 1464 | for (order = 0; order < MAX_ORDER; order++) { |
1465 | cache_estimate(order, cache_cache.buffer_size, | 1465 | cache_estimate(order, cache_cache.buffer_size, |
1466 | cache_line_size(), 0, &left_over, &cache_cache.num); | 1466 | cache_line_size(), 0, &left_over, &cache_cache.num); |
1467 | if (cache_cache.num) | 1467 | if (cache_cache.num) |
1468 | break; | 1468 | break; |
1469 | } | 1469 | } |
1470 | BUG_ON(!cache_cache.num); | 1470 | BUG_ON(!cache_cache.num); |
1471 | cache_cache.gfporder = order; | 1471 | cache_cache.gfporder = order; |
1472 | cache_cache.colour = left_over / cache_cache.colour_off; | 1472 | cache_cache.colour = left_over / cache_cache.colour_off; |
1473 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + | 1473 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + |
1474 | sizeof(struct slab), cache_line_size()); | 1474 | sizeof(struct slab), cache_line_size()); |
1475 | 1475 | ||
1476 | /* 2+3) create the kmalloc caches */ | 1476 | /* 2+3) create the kmalloc caches */ |
1477 | sizes = malloc_sizes; | 1477 | sizes = malloc_sizes; |
1478 | names = cache_names; | 1478 | names = cache_names; |
1479 | 1479 | ||
1480 | /* | 1480 | /* |
1481 | * Initialize the caches that provide memory for the array cache and the | 1481 | * Initialize the caches that provide memory for the array cache and the |
1482 | * kmem_list3 structures first. Without this, further allocations will | 1482 | * kmem_list3 structures first. Without this, further allocations will |
1483 | * bug. | 1483 | * bug. |
1484 | */ | 1484 | */ |
1485 | 1485 | ||
1486 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, | 1486 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, |
1487 | sizes[INDEX_AC].cs_size, | 1487 | sizes[INDEX_AC].cs_size, |
1488 | ARCH_KMALLOC_MINALIGN, | 1488 | ARCH_KMALLOC_MINALIGN, |
1489 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, | 1489 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1490 | NULL, NULL); | 1490 | NULL, NULL); |
1491 | 1491 | ||
1492 | if (INDEX_AC != INDEX_L3) { | 1492 | if (INDEX_AC != INDEX_L3) { |
1493 | sizes[INDEX_L3].cs_cachep = | 1493 | sizes[INDEX_L3].cs_cachep = |
1494 | kmem_cache_create(names[INDEX_L3].name, | 1494 | kmem_cache_create(names[INDEX_L3].name, |
1495 | sizes[INDEX_L3].cs_size, | 1495 | sizes[INDEX_L3].cs_size, |
1496 | ARCH_KMALLOC_MINALIGN, | 1496 | ARCH_KMALLOC_MINALIGN, |
1497 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, | 1497 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1498 | NULL, NULL); | 1498 | NULL, NULL); |
1499 | } | 1499 | } |
1500 | 1500 | ||
1501 | slab_early_init = 0; | 1501 | slab_early_init = 0; |
1502 | 1502 | ||
1503 | while (sizes->cs_size != ULONG_MAX) { | 1503 | while (sizes->cs_size != ULONG_MAX) { |
1504 | /* | 1504 | /* |
1505 | * For performance, all the general caches are L1 aligned. | 1505 | * For performance, all the general caches are L1 aligned. |
1506 | * This should be particularly beneficial on SMP boxes, as it | 1506 | * This should be particularly beneficial on SMP boxes, as it |
1507 | * eliminates "false sharing". | 1507 | * eliminates "false sharing". |
1508 | * Note for systems short on memory removing the alignment will | 1508 | * Note for systems short on memory removing the alignment will |
1509 | * allow tighter packing of the smaller caches. | 1509 | * allow tighter packing of the smaller caches. |
1510 | */ | 1510 | */ |
1511 | if (!sizes->cs_cachep) { | 1511 | if (!sizes->cs_cachep) { |
1512 | sizes->cs_cachep = kmem_cache_create(names->name, | 1512 | sizes->cs_cachep = kmem_cache_create(names->name, |
1513 | sizes->cs_size, | 1513 | sizes->cs_size, |
1514 | ARCH_KMALLOC_MINALIGN, | 1514 | ARCH_KMALLOC_MINALIGN, |
1515 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, | 1515 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1516 | NULL, NULL); | 1516 | NULL, NULL); |
1517 | } | 1517 | } |
1518 | #ifdef CONFIG_ZONE_DMA | 1518 | #ifdef CONFIG_ZONE_DMA |
1519 | sizes->cs_dmacachep = kmem_cache_create( | 1519 | sizes->cs_dmacachep = kmem_cache_create( |
1520 | names->name_dma, | 1520 | names->name_dma, |
1521 | sizes->cs_size, | 1521 | sizes->cs_size, |
1522 | ARCH_KMALLOC_MINALIGN, | 1522 | ARCH_KMALLOC_MINALIGN, |
1523 | ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| | 1523 | ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| |
1524 | SLAB_PANIC, | 1524 | SLAB_PANIC, |
1525 | NULL, NULL); | 1525 | NULL, NULL); |
1526 | #endif | 1526 | #endif |
1527 | sizes++; | 1527 | sizes++; |
1528 | names++; | 1528 | names++; |
1529 | } | 1529 | } |
1530 | /* 4) Replace the bootstrap head arrays */ | 1530 | /* 4) Replace the bootstrap head arrays */ |
1531 | { | 1531 | { |
1532 | struct array_cache *ptr; | 1532 | struct array_cache *ptr; |
1533 | 1533 | ||
1534 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 1534 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); |
1535 | 1535 | ||
1536 | local_irq_disable(); | 1536 | local_irq_disable(); |
1537 | BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); | 1537 | BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); |
1538 | memcpy(ptr, cpu_cache_get(&cache_cache), | 1538 | memcpy(ptr, cpu_cache_get(&cache_cache), |
1539 | sizeof(struct arraycache_init)); | 1539 | sizeof(struct arraycache_init)); |
1540 | /* | 1540 | /* |
1541 | * Do not assume that spinlocks can be initialized via memcpy: | 1541 | * Do not assume that spinlocks can be initialized via memcpy: |
1542 | */ | 1542 | */ |
1543 | spin_lock_init(&ptr->lock); | 1543 | spin_lock_init(&ptr->lock); |
1544 | 1544 | ||
1545 | cache_cache.array[smp_processor_id()] = ptr; | 1545 | cache_cache.array[smp_processor_id()] = ptr; |
1546 | local_irq_enable(); | 1546 | local_irq_enable(); |
1547 | 1547 | ||
1548 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 1548 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); |
1549 | 1549 | ||
1550 | local_irq_disable(); | 1550 | local_irq_disable(); |
1551 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) | 1551 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) |
1552 | != &initarray_generic.cache); | 1552 | != &initarray_generic.cache); |
1553 | memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), | 1553 | memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), |
1554 | sizeof(struct arraycache_init)); | 1554 | sizeof(struct arraycache_init)); |
1555 | /* | 1555 | /* |
1556 | * Do not assume that spinlocks can be initialized via memcpy: | 1556 | * Do not assume that spinlocks can be initialized via memcpy: |
1557 | */ | 1557 | */ |
1558 | spin_lock_init(&ptr->lock); | 1558 | spin_lock_init(&ptr->lock); |
1559 | 1559 | ||
1560 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = | 1560 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = |
1561 | ptr; | 1561 | ptr; |
1562 | local_irq_enable(); | 1562 | local_irq_enable(); |
1563 | } | 1563 | } |
1564 | /* 5) Replace the bootstrap kmem_list3's */ | 1564 | /* 5) Replace the bootstrap kmem_list3's */ |
1565 | { | 1565 | { |
1566 | int nid; | 1566 | int nid; |
1567 | 1567 | ||
1568 | /* Replace the static kmem_list3 structures for the boot cpu */ | 1568 | /* Replace the static kmem_list3 structures for the boot cpu */ |
1569 | init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); | 1569 | init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); |
1570 | 1570 | ||
1571 | for_each_online_node(nid) { | 1571 | for_each_online_node(nid) { |
1572 | init_list(malloc_sizes[INDEX_AC].cs_cachep, | 1572 | init_list(malloc_sizes[INDEX_AC].cs_cachep, |
1573 | &initkmem_list3[SIZE_AC + nid], nid); | 1573 | &initkmem_list3[SIZE_AC + nid], nid); |
1574 | 1574 | ||
1575 | if (INDEX_AC != INDEX_L3) { | 1575 | if (INDEX_AC != INDEX_L3) { |
1576 | init_list(malloc_sizes[INDEX_L3].cs_cachep, | 1576 | init_list(malloc_sizes[INDEX_L3].cs_cachep, |
1577 | &initkmem_list3[SIZE_L3 + nid], nid); | 1577 | &initkmem_list3[SIZE_L3 + nid], nid); |
1578 | } | 1578 | } |
1579 | } | 1579 | } |
1580 | } | 1580 | } |
1581 | 1581 | ||
1582 | /* 6) resize the head arrays to their final sizes */ | 1582 | /* 6) resize the head arrays to their final sizes */ |
1583 | { | 1583 | { |
1584 | struct kmem_cache *cachep; | 1584 | struct kmem_cache *cachep; |
1585 | mutex_lock(&cache_chain_mutex); | 1585 | mutex_lock(&cache_chain_mutex); |
1586 | list_for_each_entry(cachep, &cache_chain, next) | 1586 | list_for_each_entry(cachep, &cache_chain, next) |
1587 | if (enable_cpucache(cachep)) | 1587 | if (enable_cpucache(cachep)) |
1588 | BUG(); | 1588 | BUG(); |
1589 | mutex_unlock(&cache_chain_mutex); | 1589 | mutex_unlock(&cache_chain_mutex); |
1590 | } | 1590 | } |
1591 | 1591 | ||
1592 | /* Annotate slab for lockdep -- annotate the malloc caches */ | 1592 | /* Annotate slab for lockdep -- annotate the malloc caches */ |
1593 | init_lock_keys(); | 1593 | init_lock_keys(); |
1594 | 1594 | ||
1595 | 1595 | ||
1596 | /* Done! */ | 1596 | /* Done! */ |
1597 | g_cpucache_up = FULL; | 1597 | g_cpucache_up = FULL; |
1598 | 1598 | ||
1599 | /* | 1599 | /* |
1600 | * Register a cpu startup notifier callback that initializes | 1600 | * Register a cpu startup notifier callback that initializes |
1601 | * cpu_cache_get for all new cpus | 1601 | * cpu_cache_get for all new cpus |
1602 | */ | 1602 | */ |
1603 | register_cpu_notifier(&cpucache_notifier); | 1603 | register_cpu_notifier(&cpucache_notifier); |
1604 | 1604 | ||
1605 | /* | 1605 | /* |
1606 | * The reap timers are started later, with a module init call: That part | 1606 | * The reap timers are started later, with a module init call: That part |
1607 | * of the kernel is not yet operational. | 1607 | * of the kernel is not yet operational. |
1608 | */ | 1608 | */ |
1609 | } | 1609 | } |
1610 | 1610 | ||
1611 | static int __init cpucache_init(void) | 1611 | static int __init cpucache_init(void) |
1612 | { | 1612 | { |
1613 | int cpu; | 1613 | int cpu; |
1614 | 1614 | ||
1615 | /* | 1615 | /* |
1616 | * Register the timers that return unneeded pages to the page allocator | 1616 | * Register the timers that return unneeded pages to the page allocator |
1617 | */ | 1617 | */ |
1618 | for_each_online_cpu(cpu) | 1618 | for_each_online_cpu(cpu) |
1619 | start_cpu_timer(cpu); | 1619 | start_cpu_timer(cpu); |
1620 | return 0; | 1620 | return 0; |
1621 | } | 1621 | } |
1622 | __initcall(cpucache_init); | 1622 | __initcall(cpucache_init); |
1623 | 1623 | ||
1624 | /* | 1624 | /* |
1625 | * Interface to system's page allocator. No need to hold the cache-lock. | 1625 | * Interface to system's page allocator. No need to hold the cache-lock. |
1626 | * | 1626 | * |
1627 | * If we requested dmaable memory, we will get it. Even if we | 1627 | * If we requested dmaable memory, we will get it. Even if we |
1628 | * did not request dmaable memory, we might get it, but that | 1628 | * did not request dmaable memory, we might get it, but that |
1629 | * would be relatively rare and ignorable. | 1629 | * would be relatively rare and ignorable. |
1630 | */ | 1630 | */ |
1631 | static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 1631 | static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) |
1632 | { | 1632 | { |
1633 | struct page *page; | 1633 | struct page *page; |
1634 | int nr_pages; | 1634 | int nr_pages; |
1635 | int i; | 1635 | int i; |
1636 | 1636 | ||
1637 | #ifndef CONFIG_MMU | 1637 | #ifndef CONFIG_MMU |
1638 | /* | 1638 | /* |
1639 | * Nommu uses slab's for process anonymous memory allocations, and thus | 1639 | * Nommu uses slab's for process anonymous memory allocations, and thus |
1640 | * requires __GFP_COMP to properly refcount higher order allocations | 1640 | * requires __GFP_COMP to properly refcount higher order allocations |
1641 | */ | 1641 | */ |
1642 | flags |= __GFP_COMP; | 1642 | flags |= __GFP_COMP; |
1643 | #endif | 1643 | #endif |
1644 | 1644 | ||
1645 | flags |= cachep->gfpflags; | 1645 | flags |= cachep->gfpflags; |
1646 | 1646 | ||
1647 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); | 1647 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); |
1648 | if (!page) | 1648 | if (!page) |
1649 | return NULL; | 1649 | return NULL; |
1650 | 1650 | ||
1651 | nr_pages = (1 << cachep->gfporder); | 1651 | nr_pages = (1 << cachep->gfporder); |
1652 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1652 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1653 | add_zone_page_state(page_zone(page), | 1653 | add_zone_page_state(page_zone(page), |
1654 | NR_SLAB_RECLAIMABLE, nr_pages); | 1654 | NR_SLAB_RECLAIMABLE, nr_pages); |
1655 | else | 1655 | else |
1656 | add_zone_page_state(page_zone(page), | 1656 | add_zone_page_state(page_zone(page), |
1657 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1657 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
1658 | for (i = 0; i < nr_pages; i++) | 1658 | for (i = 0; i < nr_pages; i++) |
1659 | __SetPageSlab(page + i); | 1659 | __SetPageSlab(page + i); |
1660 | return page_address(page); | 1660 | return page_address(page); |
1661 | } | 1661 | } |
1662 | 1662 | ||
1663 | /* | 1663 | /* |
1664 | * Interface to system's page release. | 1664 | * Interface to system's page release. |
1665 | */ | 1665 | */ |
1666 | static void kmem_freepages(struct kmem_cache *cachep, void *addr) | 1666 | static void kmem_freepages(struct kmem_cache *cachep, void *addr) |
1667 | { | 1667 | { |
1668 | unsigned long i = (1 << cachep->gfporder); | 1668 | unsigned long i = (1 << cachep->gfporder); |
1669 | struct page *page = virt_to_page(addr); | 1669 | struct page *page = virt_to_page(addr); |
1670 | const unsigned long nr_freed = i; | 1670 | const unsigned long nr_freed = i; |
1671 | 1671 | ||
1672 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1672 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1673 | sub_zone_page_state(page_zone(page), | 1673 | sub_zone_page_state(page_zone(page), |
1674 | NR_SLAB_RECLAIMABLE, nr_freed); | 1674 | NR_SLAB_RECLAIMABLE, nr_freed); |
1675 | else | 1675 | else |
1676 | sub_zone_page_state(page_zone(page), | 1676 | sub_zone_page_state(page_zone(page), |
1677 | NR_SLAB_UNRECLAIMABLE, nr_freed); | 1677 | NR_SLAB_UNRECLAIMABLE, nr_freed); |
1678 | while (i--) { | 1678 | while (i--) { |
1679 | BUG_ON(!PageSlab(page)); | 1679 | BUG_ON(!PageSlab(page)); |
1680 | __ClearPageSlab(page); | 1680 | __ClearPageSlab(page); |
1681 | page++; | 1681 | page++; |
1682 | } | 1682 | } |
1683 | if (current->reclaim_state) | 1683 | if (current->reclaim_state) |
1684 | current->reclaim_state->reclaimed_slab += nr_freed; | 1684 | current->reclaim_state->reclaimed_slab += nr_freed; |
1685 | free_pages((unsigned long)addr, cachep->gfporder); | 1685 | free_pages((unsigned long)addr, cachep->gfporder); |
1686 | } | 1686 | } |
1687 | 1687 | ||
1688 | static void kmem_rcu_free(struct rcu_head *head) | 1688 | static void kmem_rcu_free(struct rcu_head *head) |
1689 | { | 1689 | { |
1690 | struct slab_rcu *slab_rcu = (struct slab_rcu *)head; | 1690 | struct slab_rcu *slab_rcu = (struct slab_rcu *)head; |
1691 | struct kmem_cache *cachep = slab_rcu->cachep; | 1691 | struct kmem_cache *cachep = slab_rcu->cachep; |
1692 | 1692 | ||
1693 | kmem_freepages(cachep, slab_rcu->addr); | 1693 | kmem_freepages(cachep, slab_rcu->addr); |
1694 | if (OFF_SLAB(cachep)) | 1694 | if (OFF_SLAB(cachep)) |
1695 | kmem_cache_free(cachep->slabp_cache, slab_rcu); | 1695 | kmem_cache_free(cachep->slabp_cache, slab_rcu); |
1696 | } | 1696 | } |
1697 | 1697 | ||
1698 | #if DEBUG | 1698 | #if DEBUG |
1699 | 1699 | ||
1700 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1700 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1701 | static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, | 1701 | static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, |
1702 | unsigned long caller) | 1702 | unsigned long caller) |
1703 | { | 1703 | { |
1704 | int size = obj_size(cachep); | 1704 | int size = obj_size(cachep); |
1705 | 1705 | ||
1706 | addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; | 1706 | addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; |
1707 | 1707 | ||
1708 | if (size < 5 * sizeof(unsigned long)) | 1708 | if (size < 5 * sizeof(unsigned long)) |
1709 | return; | 1709 | return; |
1710 | 1710 | ||
1711 | *addr++ = 0x12345678; | 1711 | *addr++ = 0x12345678; |
1712 | *addr++ = caller; | 1712 | *addr++ = caller; |
1713 | *addr++ = smp_processor_id(); | 1713 | *addr++ = smp_processor_id(); |
1714 | size -= 3 * sizeof(unsigned long); | 1714 | size -= 3 * sizeof(unsigned long); |
1715 | { | 1715 | { |
1716 | unsigned long *sptr = &caller; | 1716 | unsigned long *sptr = &caller; |
1717 | unsigned long svalue; | 1717 | unsigned long svalue; |
1718 | 1718 | ||
1719 | while (!kstack_end(sptr)) { | 1719 | while (!kstack_end(sptr)) { |
1720 | svalue = *sptr++; | 1720 | svalue = *sptr++; |
1721 | if (kernel_text_address(svalue)) { | 1721 | if (kernel_text_address(svalue)) { |
1722 | *addr++ = svalue; | 1722 | *addr++ = svalue; |
1723 | size -= sizeof(unsigned long); | 1723 | size -= sizeof(unsigned long); |
1724 | if (size <= sizeof(unsigned long)) | 1724 | if (size <= sizeof(unsigned long)) |
1725 | break; | 1725 | break; |
1726 | } | 1726 | } |
1727 | } | 1727 | } |
1728 | 1728 | ||
1729 | } | 1729 | } |
1730 | *addr++ = 0x87654321; | 1730 | *addr++ = 0x87654321; |
1731 | } | 1731 | } |
1732 | #endif | 1732 | #endif |
1733 | 1733 | ||
1734 | static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) | 1734 | static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) |
1735 | { | 1735 | { |
1736 | int size = obj_size(cachep); | 1736 | int size = obj_size(cachep); |
1737 | addr = &((char *)addr)[obj_offset(cachep)]; | 1737 | addr = &((char *)addr)[obj_offset(cachep)]; |
1738 | 1738 | ||
1739 | memset(addr, val, size); | 1739 | memset(addr, val, size); |
1740 | *(unsigned char *)(addr + size - 1) = POISON_END; | 1740 | *(unsigned char *)(addr + size - 1) = POISON_END; |
1741 | } | 1741 | } |
1742 | 1742 | ||
1743 | static void dump_line(char *data, int offset, int limit) | 1743 | static void dump_line(char *data, int offset, int limit) |
1744 | { | 1744 | { |
1745 | int i; | 1745 | int i; |
1746 | unsigned char error = 0; | 1746 | unsigned char error = 0; |
1747 | int bad_count = 0; | 1747 | int bad_count = 0; |
1748 | 1748 | ||
1749 | printk(KERN_ERR "%03x:", offset); | 1749 | printk(KERN_ERR "%03x:", offset); |
1750 | for (i = 0; i < limit; i++) { | 1750 | for (i = 0; i < limit; i++) { |
1751 | if (data[offset + i] != POISON_FREE) { | 1751 | if (data[offset + i] != POISON_FREE) { |
1752 | error = data[offset + i]; | 1752 | error = data[offset + i]; |
1753 | bad_count++; | 1753 | bad_count++; |
1754 | } | 1754 | } |
1755 | printk(" %02x", (unsigned char)data[offset + i]); | 1755 | printk(" %02x", (unsigned char)data[offset + i]); |
1756 | } | 1756 | } |
1757 | printk("\n"); | 1757 | printk("\n"); |
1758 | 1758 | ||
1759 | if (bad_count == 1) { | 1759 | if (bad_count == 1) { |
1760 | error ^= POISON_FREE; | 1760 | error ^= POISON_FREE; |
1761 | if (!(error & (error - 1))) { | 1761 | if (!(error & (error - 1))) { |
1762 | printk(KERN_ERR "Single bit error detected. Probably " | 1762 | printk(KERN_ERR "Single bit error detected. Probably " |
1763 | "bad RAM.\n"); | 1763 | "bad RAM.\n"); |
1764 | #ifdef CONFIG_X86 | 1764 | #ifdef CONFIG_X86 |
1765 | printk(KERN_ERR "Run memtest86+ or a similar memory " | 1765 | printk(KERN_ERR "Run memtest86+ or a similar memory " |
1766 | "test tool.\n"); | 1766 | "test tool.\n"); |
1767 | #else | 1767 | #else |
1768 | printk(KERN_ERR "Run a memory test tool.\n"); | 1768 | printk(KERN_ERR "Run a memory test tool.\n"); |
1769 | #endif | 1769 | #endif |
1770 | } | 1770 | } |
1771 | } | 1771 | } |
1772 | } | 1772 | } |
1773 | #endif | 1773 | #endif |
1774 | 1774 | ||
1775 | #if DEBUG | 1775 | #if DEBUG |
1776 | 1776 | ||
1777 | static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) | 1777 | static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) |
1778 | { | 1778 | { |
1779 | int i, size; | 1779 | int i, size; |
1780 | char *realobj; | 1780 | char *realobj; |
1781 | 1781 | ||
1782 | if (cachep->flags & SLAB_RED_ZONE) { | 1782 | if (cachep->flags & SLAB_RED_ZONE) { |
1783 | printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n", | 1783 | printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n", |
1784 | *dbg_redzone1(cachep, objp), | 1784 | *dbg_redzone1(cachep, objp), |
1785 | *dbg_redzone2(cachep, objp)); | 1785 | *dbg_redzone2(cachep, objp)); |
1786 | } | 1786 | } |
1787 | 1787 | ||
1788 | if (cachep->flags & SLAB_STORE_USER) { | 1788 | if (cachep->flags & SLAB_STORE_USER) { |
1789 | printk(KERN_ERR "Last user: [<%p>]", | 1789 | printk(KERN_ERR "Last user: [<%p>]", |
1790 | *dbg_userword(cachep, objp)); | 1790 | *dbg_userword(cachep, objp)); |
1791 | print_symbol("(%s)", | 1791 | print_symbol("(%s)", |
1792 | (unsigned long)*dbg_userword(cachep, objp)); | 1792 | (unsigned long)*dbg_userword(cachep, objp)); |
1793 | printk("\n"); | 1793 | printk("\n"); |
1794 | } | 1794 | } |
1795 | realobj = (char *)objp + obj_offset(cachep); | 1795 | realobj = (char *)objp + obj_offset(cachep); |
1796 | size = obj_size(cachep); | 1796 | size = obj_size(cachep); |
1797 | for (i = 0; i < size && lines; i += 16, lines--) { | 1797 | for (i = 0; i < size && lines; i += 16, lines--) { |
1798 | int limit; | 1798 | int limit; |
1799 | limit = 16; | 1799 | limit = 16; |
1800 | if (i + limit > size) | 1800 | if (i + limit > size) |
1801 | limit = size - i; | 1801 | limit = size - i; |
1802 | dump_line(realobj, i, limit); | 1802 | dump_line(realobj, i, limit); |
1803 | } | 1803 | } |
1804 | } | 1804 | } |
1805 | 1805 | ||
1806 | static void check_poison_obj(struct kmem_cache *cachep, void *objp) | 1806 | static void check_poison_obj(struct kmem_cache *cachep, void *objp) |
1807 | { | 1807 | { |
1808 | char *realobj; | 1808 | char *realobj; |
1809 | int size, i; | 1809 | int size, i; |
1810 | int lines = 0; | 1810 | int lines = 0; |
1811 | 1811 | ||
1812 | realobj = (char *)objp + obj_offset(cachep); | 1812 | realobj = (char *)objp + obj_offset(cachep); |
1813 | size = obj_size(cachep); | 1813 | size = obj_size(cachep); |
1814 | 1814 | ||
1815 | for (i = 0; i < size; i++) { | 1815 | for (i = 0; i < size; i++) { |
1816 | char exp = POISON_FREE; | 1816 | char exp = POISON_FREE; |
1817 | if (i == size - 1) | 1817 | if (i == size - 1) |
1818 | exp = POISON_END; | 1818 | exp = POISON_END; |
1819 | if (realobj[i] != exp) { | 1819 | if (realobj[i] != exp) { |
1820 | int limit; | 1820 | int limit; |
1821 | /* Mismatch ! */ | 1821 | /* Mismatch ! */ |
1822 | /* Print header */ | 1822 | /* Print header */ |
1823 | if (lines == 0) { | 1823 | if (lines == 0) { |
1824 | printk(KERN_ERR | 1824 | printk(KERN_ERR |
1825 | "Slab corruption: %s start=%p, len=%d\n", | 1825 | "Slab corruption: %s start=%p, len=%d\n", |
1826 | cachep->name, realobj, size); | 1826 | cachep->name, realobj, size); |
1827 | print_objinfo(cachep, objp, 0); | 1827 | print_objinfo(cachep, objp, 0); |
1828 | } | 1828 | } |
1829 | /* Hexdump the affected line */ | 1829 | /* Hexdump the affected line */ |
1830 | i = (i / 16) * 16; | 1830 | i = (i / 16) * 16; |
1831 | limit = 16; | 1831 | limit = 16; |
1832 | if (i + limit > size) | 1832 | if (i + limit > size) |
1833 | limit = size - i; | 1833 | limit = size - i; |
1834 | dump_line(realobj, i, limit); | 1834 | dump_line(realobj, i, limit); |
1835 | i += 16; | 1835 | i += 16; |
1836 | lines++; | 1836 | lines++; |
1837 | /* Limit to 5 lines */ | 1837 | /* Limit to 5 lines */ |
1838 | if (lines > 5) | 1838 | if (lines > 5) |
1839 | break; | 1839 | break; |
1840 | } | 1840 | } |
1841 | } | 1841 | } |
1842 | if (lines != 0) { | 1842 | if (lines != 0) { |
1843 | /* Print some data about the neighboring objects, if they | 1843 | /* Print some data about the neighboring objects, if they |
1844 | * exist: | 1844 | * exist: |
1845 | */ | 1845 | */ |
1846 | struct slab *slabp = virt_to_slab(objp); | 1846 | struct slab *slabp = virt_to_slab(objp); |
1847 | unsigned int objnr; | 1847 | unsigned int objnr; |
1848 | 1848 | ||
1849 | objnr = obj_to_index(cachep, slabp, objp); | 1849 | objnr = obj_to_index(cachep, slabp, objp); |
1850 | if (objnr) { | 1850 | if (objnr) { |
1851 | objp = index_to_obj(cachep, slabp, objnr - 1); | 1851 | objp = index_to_obj(cachep, slabp, objnr - 1); |
1852 | realobj = (char *)objp + obj_offset(cachep); | 1852 | realobj = (char *)objp + obj_offset(cachep); |
1853 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1853 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
1854 | realobj, size); | 1854 | realobj, size); |
1855 | print_objinfo(cachep, objp, 2); | 1855 | print_objinfo(cachep, objp, 2); |
1856 | } | 1856 | } |
1857 | if (objnr + 1 < cachep->num) { | 1857 | if (objnr + 1 < cachep->num) { |
1858 | objp = index_to_obj(cachep, slabp, objnr + 1); | 1858 | objp = index_to_obj(cachep, slabp, objnr + 1); |
1859 | realobj = (char *)objp + obj_offset(cachep); | 1859 | realobj = (char *)objp + obj_offset(cachep); |
1860 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1860 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
1861 | realobj, size); | 1861 | realobj, size); |
1862 | print_objinfo(cachep, objp, 2); | 1862 | print_objinfo(cachep, objp, 2); |
1863 | } | 1863 | } |
1864 | } | 1864 | } |
1865 | } | 1865 | } |
1866 | #endif | 1866 | #endif |
1867 | 1867 | ||
1868 | #if DEBUG | 1868 | #if DEBUG |
1869 | /** | 1869 | /** |
1870 | * slab_destroy_objs - destroy a slab and its objects | 1870 | * slab_destroy_objs - destroy a slab and its objects |
1871 | * @cachep: cache pointer being destroyed | 1871 | * @cachep: cache pointer being destroyed |
1872 | * @slabp: slab pointer being destroyed | 1872 | * @slabp: slab pointer being destroyed |
1873 | * | 1873 | * |
1874 | * Call the registered destructor for each object in a slab that is being | 1874 | * Call the registered destructor for each object in a slab that is being |
1875 | * destroyed. | 1875 | * destroyed. |
1876 | */ | 1876 | */ |
1877 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | 1877 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) |
1878 | { | 1878 | { |
1879 | int i; | 1879 | int i; |
1880 | for (i = 0; i < cachep->num; i++) { | 1880 | for (i = 0; i < cachep->num; i++) { |
1881 | void *objp = index_to_obj(cachep, slabp, i); | 1881 | void *objp = index_to_obj(cachep, slabp, i); |
1882 | 1882 | ||
1883 | if (cachep->flags & SLAB_POISON) { | 1883 | if (cachep->flags & SLAB_POISON) { |
1884 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1884 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1885 | if (cachep->buffer_size % PAGE_SIZE == 0 && | 1885 | if (cachep->buffer_size % PAGE_SIZE == 0 && |
1886 | OFF_SLAB(cachep)) | 1886 | OFF_SLAB(cachep)) |
1887 | kernel_map_pages(virt_to_page(objp), | 1887 | kernel_map_pages(virt_to_page(objp), |
1888 | cachep->buffer_size / PAGE_SIZE, 1); | 1888 | cachep->buffer_size / PAGE_SIZE, 1); |
1889 | else | 1889 | else |
1890 | check_poison_obj(cachep, objp); | 1890 | check_poison_obj(cachep, objp); |
1891 | #else | 1891 | #else |
1892 | check_poison_obj(cachep, objp); | 1892 | check_poison_obj(cachep, objp); |
1893 | #endif | 1893 | #endif |
1894 | } | 1894 | } |
1895 | if (cachep->flags & SLAB_RED_ZONE) { | 1895 | if (cachep->flags & SLAB_RED_ZONE) { |
1896 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) | 1896 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) |
1897 | slab_error(cachep, "start of a freed object " | 1897 | slab_error(cachep, "start of a freed object " |
1898 | "was overwritten"); | 1898 | "was overwritten"); |
1899 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) | 1899 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) |
1900 | slab_error(cachep, "end of a freed object " | 1900 | slab_error(cachep, "end of a freed object " |
1901 | "was overwritten"); | 1901 | "was overwritten"); |
1902 | } | 1902 | } |
1903 | if (cachep->dtor && !(cachep->flags & SLAB_POISON)) | 1903 | if (cachep->dtor && !(cachep->flags & SLAB_POISON)) |
1904 | (cachep->dtor) (objp + obj_offset(cachep), cachep, 0); | 1904 | (cachep->dtor) (objp + obj_offset(cachep), cachep, 0); |
1905 | } | 1905 | } |
1906 | } | 1906 | } |
1907 | #else | 1907 | #else |
1908 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | 1908 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) |
1909 | { | 1909 | { |
1910 | if (cachep->dtor) { | 1910 | if (cachep->dtor) { |
1911 | int i; | 1911 | int i; |
1912 | for (i = 0; i < cachep->num; i++) { | 1912 | for (i = 0; i < cachep->num; i++) { |
1913 | void *objp = index_to_obj(cachep, slabp, i); | 1913 | void *objp = index_to_obj(cachep, slabp, i); |
1914 | (cachep->dtor) (objp, cachep, 0); | 1914 | (cachep->dtor) (objp, cachep, 0); |
1915 | } | 1915 | } |
1916 | } | 1916 | } |
1917 | } | 1917 | } |
1918 | #endif | 1918 | #endif |
1919 | 1919 | ||
1920 | /** | 1920 | /** |
1921 | * slab_destroy - destroy and release all objects in a slab | 1921 | * slab_destroy - destroy and release all objects in a slab |
1922 | * @cachep: cache pointer being destroyed | 1922 | * @cachep: cache pointer being destroyed |
1923 | * @slabp: slab pointer being destroyed | 1923 | * @slabp: slab pointer being destroyed |
1924 | * | 1924 | * |
1925 | * Destroy all the objs in a slab, and release the mem back to the system. | 1925 | * Destroy all the objs in a slab, and release the mem back to the system. |
1926 | * Before calling the slab must have been unlinked from the cache. The | 1926 | * Before calling the slab must have been unlinked from the cache. The |
1927 | * cache-lock is not held/needed. | 1927 | * cache-lock is not held/needed. |
1928 | */ | 1928 | */ |
1929 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 1929 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
1930 | { | 1930 | { |
1931 | void *addr = slabp->s_mem - slabp->colouroff; | 1931 | void *addr = slabp->s_mem - slabp->colouroff; |
1932 | 1932 | ||
1933 | slab_destroy_objs(cachep, slabp); | 1933 | slab_destroy_objs(cachep, slabp); |
1934 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { | 1934 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { |
1935 | struct slab_rcu *slab_rcu; | 1935 | struct slab_rcu *slab_rcu; |
1936 | 1936 | ||
1937 | slab_rcu = (struct slab_rcu *)slabp; | 1937 | slab_rcu = (struct slab_rcu *)slabp; |
1938 | slab_rcu->cachep = cachep; | 1938 | slab_rcu->cachep = cachep; |
1939 | slab_rcu->addr = addr; | 1939 | slab_rcu->addr = addr; |
1940 | call_rcu(&slab_rcu->head, kmem_rcu_free); | 1940 | call_rcu(&slab_rcu->head, kmem_rcu_free); |
1941 | } else { | 1941 | } else { |
1942 | kmem_freepages(cachep, addr); | 1942 | kmem_freepages(cachep, addr); |
1943 | if (OFF_SLAB(cachep)) | 1943 | if (OFF_SLAB(cachep)) |
1944 | kmem_cache_free(cachep->slabp_cache, slabp); | 1944 | kmem_cache_free(cachep->slabp_cache, slabp); |
1945 | } | 1945 | } |
1946 | } | 1946 | } |
1947 | 1947 | ||
1948 | /* | 1948 | /* |
1949 | * For setting up all the kmem_list3s for cache whose buffer_size is same as | 1949 | * For setting up all the kmem_list3s for cache whose buffer_size is same as |
1950 | * size of kmem_list3. | 1950 | * size of kmem_list3. |
1951 | */ | 1951 | */ |
1952 | static void __init set_up_list3s(struct kmem_cache *cachep, int index) | 1952 | static void __init set_up_list3s(struct kmem_cache *cachep, int index) |
1953 | { | 1953 | { |
1954 | int node; | 1954 | int node; |
1955 | 1955 | ||
1956 | for_each_online_node(node) { | 1956 | for_each_online_node(node) { |
1957 | cachep->nodelists[node] = &initkmem_list3[index + node]; | 1957 | cachep->nodelists[node] = &initkmem_list3[index + node]; |
1958 | cachep->nodelists[node]->next_reap = jiffies + | 1958 | cachep->nodelists[node]->next_reap = jiffies + |
1959 | REAPTIMEOUT_LIST3 + | 1959 | REAPTIMEOUT_LIST3 + |
1960 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 1960 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
1961 | } | 1961 | } |
1962 | } | 1962 | } |
1963 | 1963 | ||
1964 | static void __kmem_cache_destroy(struct kmem_cache *cachep) | 1964 | static void __kmem_cache_destroy(struct kmem_cache *cachep) |
1965 | { | 1965 | { |
1966 | int i; | 1966 | int i; |
1967 | struct kmem_list3 *l3; | 1967 | struct kmem_list3 *l3; |
1968 | 1968 | ||
1969 | for_each_online_cpu(i) | 1969 | for_each_online_cpu(i) |
1970 | kfree(cachep->array[i]); | 1970 | kfree(cachep->array[i]); |
1971 | 1971 | ||
1972 | /* NUMA: free the list3 structures */ | 1972 | /* NUMA: free the list3 structures */ |
1973 | for_each_online_node(i) { | 1973 | for_each_online_node(i) { |
1974 | l3 = cachep->nodelists[i]; | 1974 | l3 = cachep->nodelists[i]; |
1975 | if (l3) { | 1975 | if (l3) { |
1976 | kfree(l3->shared); | 1976 | kfree(l3->shared); |
1977 | free_alien_cache(l3->alien); | 1977 | free_alien_cache(l3->alien); |
1978 | kfree(l3); | 1978 | kfree(l3); |
1979 | } | 1979 | } |
1980 | } | 1980 | } |
1981 | kmem_cache_free(&cache_cache, cachep); | 1981 | kmem_cache_free(&cache_cache, cachep); |
1982 | } | 1982 | } |
1983 | 1983 | ||
1984 | 1984 | ||
1985 | /** | 1985 | /** |
1986 | * calculate_slab_order - calculate size (page order) of slabs | 1986 | * calculate_slab_order - calculate size (page order) of slabs |
1987 | * @cachep: pointer to the cache that is being created | 1987 | * @cachep: pointer to the cache that is being created |
1988 | * @size: size of objects to be created in this cache. | 1988 | * @size: size of objects to be created in this cache. |
1989 | * @align: required alignment for the objects. | 1989 | * @align: required alignment for the objects. |
1990 | * @flags: slab allocation flags | 1990 | * @flags: slab allocation flags |
1991 | * | 1991 | * |
1992 | * Also calculates the number of objects per slab. | 1992 | * Also calculates the number of objects per slab. |
1993 | * | 1993 | * |
1994 | * This could be made much more intelligent. For now, try to avoid using | 1994 | * This could be made much more intelligent. For now, try to avoid using |
1995 | * high order pages for slabs. When the gfp() functions are more friendly | 1995 | * high order pages for slabs. When the gfp() functions are more friendly |
1996 | * towards high-order requests, this should be changed. | 1996 | * towards high-order requests, this should be changed. |
1997 | */ | 1997 | */ |
1998 | static size_t calculate_slab_order(struct kmem_cache *cachep, | 1998 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
1999 | size_t size, size_t align, unsigned long flags) | 1999 | size_t size, size_t align, unsigned long flags) |
2000 | { | 2000 | { |
2001 | unsigned long offslab_limit; | 2001 | unsigned long offslab_limit; |
2002 | size_t left_over = 0; | 2002 | size_t left_over = 0; |
2003 | int gfporder; | 2003 | int gfporder; |
2004 | 2004 | ||
2005 | for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { | 2005 | for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { |
2006 | unsigned int num; | 2006 | unsigned int num; |
2007 | size_t remainder; | 2007 | size_t remainder; |
2008 | 2008 | ||
2009 | cache_estimate(gfporder, size, align, flags, &remainder, &num); | 2009 | cache_estimate(gfporder, size, align, flags, &remainder, &num); |
2010 | if (!num) | 2010 | if (!num) |
2011 | continue; | 2011 | continue; |
2012 | 2012 | ||
2013 | if (flags & CFLGS_OFF_SLAB) { | 2013 | if (flags & CFLGS_OFF_SLAB) { |
2014 | /* | 2014 | /* |
2015 | * Max number of objs-per-slab for caches which | 2015 | * Max number of objs-per-slab for caches which |
2016 | * use off-slab slabs. Needed to avoid a possible | 2016 | * use off-slab slabs. Needed to avoid a possible |
2017 | * looping condition in cache_grow(). | 2017 | * looping condition in cache_grow(). |
2018 | */ | 2018 | */ |
2019 | offslab_limit = size - sizeof(struct slab); | 2019 | offslab_limit = size - sizeof(struct slab); |
2020 | offslab_limit /= sizeof(kmem_bufctl_t); | 2020 | offslab_limit /= sizeof(kmem_bufctl_t); |
2021 | 2021 | ||
2022 | if (num > offslab_limit) | 2022 | if (num > offslab_limit) |
2023 | break; | 2023 | break; |
2024 | } | 2024 | } |
2025 | 2025 | ||
2026 | /* Found something acceptable - save it away */ | 2026 | /* Found something acceptable - save it away */ |
2027 | cachep->num = num; | 2027 | cachep->num = num; |
2028 | cachep->gfporder = gfporder; | 2028 | cachep->gfporder = gfporder; |
2029 | left_over = remainder; | 2029 | left_over = remainder; |
2030 | 2030 | ||
2031 | /* | 2031 | /* |
2032 | * A VFS-reclaimable slab tends to have most allocations | 2032 | * A VFS-reclaimable slab tends to have most allocations |
2033 | * as GFP_NOFS and we really don't want to have to be allocating | 2033 | * as GFP_NOFS and we really don't want to have to be allocating |
2034 | * higher-order pages when we are unable to shrink dcache. | 2034 | * higher-order pages when we are unable to shrink dcache. |
2035 | */ | 2035 | */ |
2036 | if (flags & SLAB_RECLAIM_ACCOUNT) | 2036 | if (flags & SLAB_RECLAIM_ACCOUNT) |
2037 | break; | 2037 | break; |
2038 | 2038 | ||
2039 | /* | 2039 | /* |
2040 | * Large number of objects is good, but very large slabs are | 2040 | * Large number of objects is good, but very large slabs are |
2041 | * currently bad for the gfp()s. | 2041 | * currently bad for the gfp()s. |
2042 | */ | 2042 | */ |
2043 | if (gfporder >= slab_break_gfp_order) | 2043 | if (gfporder >= slab_break_gfp_order) |
2044 | break; | 2044 | break; |
2045 | 2045 | ||
2046 | /* | 2046 | /* |
2047 | * Acceptable internal fragmentation? | 2047 | * Acceptable internal fragmentation? |
2048 | */ | 2048 | */ |
2049 | if (left_over * 8 <= (PAGE_SIZE << gfporder)) | 2049 | if (left_over * 8 <= (PAGE_SIZE << gfporder)) |
2050 | break; | 2050 | break; |
2051 | } | 2051 | } |
2052 | return left_over; | 2052 | return left_over; |
2053 | } | 2053 | } |
2054 | 2054 | ||
2055 | static int setup_cpu_cache(struct kmem_cache *cachep) | 2055 | static int setup_cpu_cache(struct kmem_cache *cachep) |
2056 | { | 2056 | { |
2057 | if (g_cpucache_up == FULL) | 2057 | if (g_cpucache_up == FULL) |
2058 | return enable_cpucache(cachep); | 2058 | return enable_cpucache(cachep); |
2059 | 2059 | ||
2060 | if (g_cpucache_up == NONE) { | 2060 | if (g_cpucache_up == NONE) { |
2061 | /* | 2061 | /* |
2062 | * Note: the first kmem_cache_create must create the cache | 2062 | * Note: the first kmem_cache_create must create the cache |
2063 | * that's used by kmalloc(24), otherwise the creation of | 2063 | * that's used by kmalloc(24), otherwise the creation of |
2064 | * further caches will BUG(). | 2064 | * further caches will BUG(). |
2065 | */ | 2065 | */ |
2066 | cachep->array[smp_processor_id()] = &initarray_generic.cache; | 2066 | cachep->array[smp_processor_id()] = &initarray_generic.cache; |
2067 | 2067 | ||
2068 | /* | 2068 | /* |
2069 | * If the cache that's used by kmalloc(sizeof(kmem_list3)) is | 2069 | * If the cache that's used by kmalloc(sizeof(kmem_list3)) is |
2070 | * the first cache, then we need to set up all its list3s, | 2070 | * the first cache, then we need to set up all its list3s, |
2071 | * otherwise the creation of further caches will BUG(). | 2071 | * otherwise the creation of further caches will BUG(). |
2072 | */ | 2072 | */ |
2073 | set_up_list3s(cachep, SIZE_AC); | 2073 | set_up_list3s(cachep, SIZE_AC); |
2074 | if (INDEX_AC == INDEX_L3) | 2074 | if (INDEX_AC == INDEX_L3) |
2075 | g_cpucache_up = PARTIAL_L3; | 2075 | g_cpucache_up = PARTIAL_L3; |
2076 | else | 2076 | else |
2077 | g_cpucache_up = PARTIAL_AC; | 2077 | g_cpucache_up = PARTIAL_AC; |
2078 | } else { | 2078 | } else { |
2079 | cachep->array[smp_processor_id()] = | 2079 | cachep->array[smp_processor_id()] = |
2080 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 2080 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); |
2081 | 2081 | ||
2082 | if (g_cpucache_up == PARTIAL_AC) { | 2082 | if (g_cpucache_up == PARTIAL_AC) { |
2083 | set_up_list3s(cachep, SIZE_L3); | 2083 | set_up_list3s(cachep, SIZE_L3); |
2084 | g_cpucache_up = PARTIAL_L3; | 2084 | g_cpucache_up = PARTIAL_L3; |
2085 | } else { | 2085 | } else { |
2086 | int node; | 2086 | int node; |
2087 | for_each_online_node(node) { | 2087 | for_each_online_node(node) { |
2088 | cachep->nodelists[node] = | 2088 | cachep->nodelists[node] = |
2089 | kmalloc_node(sizeof(struct kmem_list3), | 2089 | kmalloc_node(sizeof(struct kmem_list3), |
2090 | GFP_KERNEL, node); | 2090 | GFP_KERNEL, node); |
2091 | BUG_ON(!cachep->nodelists[node]); | 2091 | BUG_ON(!cachep->nodelists[node]); |
2092 | kmem_list3_init(cachep->nodelists[node]); | 2092 | kmem_list3_init(cachep->nodelists[node]); |
2093 | } | 2093 | } |
2094 | } | 2094 | } |
2095 | } | 2095 | } |
2096 | cachep->nodelists[numa_node_id()]->next_reap = | 2096 | cachep->nodelists[numa_node_id()]->next_reap = |
2097 | jiffies + REAPTIMEOUT_LIST3 + | 2097 | jiffies + REAPTIMEOUT_LIST3 + |
2098 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 2098 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
2099 | 2099 | ||
2100 | cpu_cache_get(cachep)->avail = 0; | 2100 | cpu_cache_get(cachep)->avail = 0; |
2101 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | 2101 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; |
2102 | cpu_cache_get(cachep)->batchcount = 1; | 2102 | cpu_cache_get(cachep)->batchcount = 1; |
2103 | cpu_cache_get(cachep)->touched = 0; | 2103 | cpu_cache_get(cachep)->touched = 0; |
2104 | cachep->batchcount = 1; | 2104 | cachep->batchcount = 1; |
2105 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | 2105 | cachep->limit = BOOT_CPUCACHE_ENTRIES; |
2106 | return 0; | 2106 | return 0; |
2107 | } | 2107 | } |
2108 | 2108 | ||
2109 | /** | 2109 | /** |
2110 | * kmem_cache_create - Create a cache. | 2110 | * kmem_cache_create - Create a cache. |
2111 | * @name: A string which is used in /proc/slabinfo to identify this cache. | 2111 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
2112 | * @size: The size of objects to be created in this cache. | 2112 | * @size: The size of objects to be created in this cache. |
2113 | * @align: The required alignment for the objects. | 2113 | * @align: The required alignment for the objects. |
2114 | * @flags: SLAB flags | 2114 | * @flags: SLAB flags |
2115 | * @ctor: A constructor for the objects. | 2115 | * @ctor: A constructor for the objects. |
2116 | * @dtor: A destructor for the objects. | 2116 | * @dtor: A destructor for the objects. |
2117 | * | 2117 | * |
2118 | * Returns a ptr to the cache on success, NULL on failure. | 2118 | * Returns a ptr to the cache on success, NULL on failure. |
2119 | * Cannot be called within a int, but can be interrupted. | 2119 | * Cannot be called within a int, but can be interrupted. |
2120 | * The @ctor is run when new pages are allocated by the cache | 2120 | * The @ctor is run when new pages are allocated by the cache |
2121 | * and the @dtor is run before the pages are handed back. | 2121 | * and the @dtor is run before the pages are handed back. |
2122 | * | 2122 | * |
2123 | * @name must be valid until the cache is destroyed. This implies that | 2123 | * @name must be valid until the cache is destroyed. This implies that |
2124 | * the module calling this has to destroy the cache before getting unloaded. | 2124 | * the module calling this has to destroy the cache before getting unloaded. |
2125 | * | 2125 | * |
2126 | * The flags are | 2126 | * The flags are |
2127 | * | 2127 | * |
2128 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) | 2128 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
2129 | * to catch references to uninitialised memory. | 2129 | * to catch references to uninitialised memory. |
2130 | * | 2130 | * |
2131 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check | 2131 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check |
2132 | * for buffer overruns. | 2132 | * for buffer overruns. |
2133 | * | 2133 | * |
2134 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware | 2134 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
2135 | * cacheline. This can be beneficial if you're counting cycles as closely | 2135 | * cacheline. This can be beneficial if you're counting cycles as closely |
2136 | * as davem. | 2136 | * as davem. |
2137 | */ | 2137 | */ |
2138 | struct kmem_cache * | 2138 | struct kmem_cache * |
2139 | kmem_cache_create (const char *name, size_t size, size_t align, | 2139 | kmem_cache_create (const char *name, size_t size, size_t align, |
2140 | unsigned long flags, | 2140 | unsigned long flags, |
2141 | void (*ctor)(void*, struct kmem_cache *, unsigned long), | 2141 | void (*ctor)(void*, struct kmem_cache *, unsigned long), |
2142 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) | 2142 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) |
2143 | { | 2143 | { |
2144 | size_t left_over, slab_size, ralign; | 2144 | size_t left_over, slab_size, ralign; |
2145 | struct kmem_cache *cachep = NULL, *pc; | 2145 | struct kmem_cache *cachep = NULL, *pc; |
2146 | 2146 | ||
2147 | /* | 2147 | /* |
2148 | * Sanity checks... these are all serious usage bugs. | 2148 | * Sanity checks... these are all serious usage bugs. |
2149 | */ | 2149 | */ |
2150 | if (!name || in_interrupt() || (size < BYTES_PER_WORD) || | 2150 | if (!name || in_interrupt() || (size < BYTES_PER_WORD) || |
2151 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { | 2151 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { |
2152 | printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, | 2152 | printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, |
2153 | name); | 2153 | name); |
2154 | BUG(); | 2154 | BUG(); |
2155 | } | 2155 | } |
2156 | 2156 | ||
2157 | /* | 2157 | /* |
2158 | * We use cache_chain_mutex to ensure a consistent view of | 2158 | * We use cache_chain_mutex to ensure a consistent view of |
2159 | * cpu_online_map as well. Please see cpuup_callback | 2159 | * cpu_online_map as well. Please see cpuup_callback |
2160 | */ | 2160 | */ |
2161 | mutex_lock(&cache_chain_mutex); | 2161 | mutex_lock(&cache_chain_mutex); |
2162 | 2162 | ||
2163 | list_for_each_entry(pc, &cache_chain, next) { | 2163 | list_for_each_entry(pc, &cache_chain, next) { |
2164 | char tmp; | 2164 | char tmp; |
2165 | int res; | 2165 | int res; |
2166 | 2166 | ||
2167 | /* | 2167 | /* |
2168 | * This happens when the module gets unloaded and doesn't | 2168 | * This happens when the module gets unloaded and doesn't |
2169 | * destroy its slab cache and no-one else reuses the vmalloc | 2169 | * destroy its slab cache and no-one else reuses the vmalloc |
2170 | * area of the module. Print a warning. | 2170 | * area of the module. Print a warning. |
2171 | */ | 2171 | */ |
2172 | res = probe_kernel_address(pc->name, tmp); | 2172 | res = probe_kernel_address(pc->name, tmp); |
2173 | if (res) { | 2173 | if (res) { |
2174 | printk(KERN_ERR | 2174 | printk(KERN_ERR |
2175 | "SLAB: cache with size %d has lost its name\n", | 2175 | "SLAB: cache with size %d has lost its name\n", |
2176 | pc->buffer_size); | 2176 | pc->buffer_size); |
2177 | continue; | 2177 | continue; |
2178 | } | 2178 | } |
2179 | 2179 | ||
2180 | if (!strcmp(pc->name, name)) { | 2180 | if (!strcmp(pc->name, name)) { |
2181 | printk(KERN_ERR | 2181 | printk(KERN_ERR |
2182 | "kmem_cache_create: duplicate cache %s\n", name); | 2182 | "kmem_cache_create: duplicate cache %s\n", name); |
2183 | dump_stack(); | 2183 | dump_stack(); |
2184 | goto oops; | 2184 | goto oops; |
2185 | } | 2185 | } |
2186 | } | 2186 | } |
2187 | 2187 | ||
2188 | #if DEBUG | 2188 | #if DEBUG |
2189 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ | 2189 | WARN_ON(strchr(name, ' ')); /* It confuses parsers */ |
2190 | #if FORCED_DEBUG | 2190 | #if FORCED_DEBUG |
2191 | /* | 2191 | /* |
2192 | * Enable redzoning and last user accounting, except for caches with | 2192 | * Enable redzoning and last user accounting, except for caches with |
2193 | * large objects, if the increased size would increase the object size | 2193 | * large objects, if the increased size would increase the object size |
2194 | * above the next power of two: caches with object sizes just above a | 2194 | * above the next power of two: caches with object sizes just above a |
2195 | * power of two have a significant amount of internal fragmentation. | 2195 | * power of two have a significant amount of internal fragmentation. |
2196 | */ | 2196 | */ |
2197 | if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) | 2197 | if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) |
2198 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; | 2198 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; |
2199 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 2199 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
2200 | flags |= SLAB_POISON; | 2200 | flags |= SLAB_POISON; |
2201 | #endif | 2201 | #endif |
2202 | if (flags & SLAB_DESTROY_BY_RCU) | 2202 | if (flags & SLAB_DESTROY_BY_RCU) |
2203 | BUG_ON(flags & SLAB_POISON); | 2203 | BUG_ON(flags & SLAB_POISON); |
2204 | #endif | 2204 | #endif |
2205 | if (flags & SLAB_DESTROY_BY_RCU) | 2205 | if (flags & SLAB_DESTROY_BY_RCU) |
2206 | BUG_ON(dtor); | 2206 | BUG_ON(dtor); |
2207 | 2207 | ||
2208 | /* | 2208 | /* |
2209 | * Always checks flags, a caller might be expecting debug support which | 2209 | * Always checks flags, a caller might be expecting debug support which |
2210 | * isn't available. | 2210 | * isn't available. |
2211 | */ | 2211 | */ |
2212 | BUG_ON(flags & ~CREATE_MASK); | 2212 | BUG_ON(flags & ~CREATE_MASK); |
2213 | 2213 | ||
2214 | /* | 2214 | /* |
2215 | * Check that size is in terms of words. This is needed to avoid | 2215 | * Check that size is in terms of words. This is needed to avoid |
2216 | * unaligned accesses for some archs when redzoning is used, and makes | 2216 | * unaligned accesses for some archs when redzoning is used, and makes |
2217 | * sure any on-slab bufctl's are also correctly aligned. | 2217 | * sure any on-slab bufctl's are also correctly aligned. |
2218 | */ | 2218 | */ |
2219 | if (size & (BYTES_PER_WORD - 1)) { | 2219 | if (size & (BYTES_PER_WORD - 1)) { |
2220 | size += (BYTES_PER_WORD - 1); | 2220 | size += (BYTES_PER_WORD - 1); |
2221 | size &= ~(BYTES_PER_WORD - 1); | 2221 | size &= ~(BYTES_PER_WORD - 1); |
2222 | } | 2222 | } |
2223 | 2223 | ||
2224 | /* calculate the final buffer alignment: */ | 2224 | /* calculate the final buffer alignment: */ |
2225 | 2225 | ||
2226 | /* 1) arch recommendation: can be overridden for debug */ | 2226 | /* 1) arch recommendation: can be overridden for debug */ |
2227 | if (flags & SLAB_HWCACHE_ALIGN) { | 2227 | if (flags & SLAB_HWCACHE_ALIGN) { |
2228 | /* | 2228 | /* |
2229 | * Default alignment: as specified by the arch code. Except if | 2229 | * Default alignment: as specified by the arch code. Except if |
2230 | * an object is really small, then squeeze multiple objects into | 2230 | * an object is really small, then squeeze multiple objects into |
2231 | * one cacheline. | 2231 | * one cacheline. |
2232 | */ | 2232 | */ |
2233 | ralign = cache_line_size(); | 2233 | ralign = cache_line_size(); |
2234 | while (size <= ralign / 2) | 2234 | while (size <= ralign / 2) |
2235 | ralign /= 2; | 2235 | ralign /= 2; |
2236 | } else { | 2236 | } else { |
2237 | ralign = BYTES_PER_WORD; | 2237 | ralign = BYTES_PER_WORD; |
2238 | } | 2238 | } |
2239 | 2239 | ||
2240 | /* | 2240 | /* |
2241 | * Redzoning and user store require word alignment. Note this will be | 2241 | * Redzoning and user store require word alignment. Note this will be |
2242 | * overridden by architecture or caller mandated alignment if either | 2242 | * overridden by architecture or caller mandated alignment if either |
2243 | * is greater than BYTES_PER_WORD. | 2243 | * is greater than BYTES_PER_WORD. |
2244 | */ | 2244 | */ |
2245 | if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) | 2245 | if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) |
2246 | ralign = __alignof__(unsigned long long); | 2246 | ralign = __alignof__(unsigned long long); |
2247 | 2247 | ||
2248 | /* 2) arch mandated alignment */ | 2248 | /* 2) arch mandated alignment */ |
2249 | if (ralign < ARCH_SLAB_MINALIGN) { | 2249 | if (ralign < ARCH_SLAB_MINALIGN) { |
2250 | ralign = ARCH_SLAB_MINALIGN; | 2250 | ralign = ARCH_SLAB_MINALIGN; |
2251 | } | 2251 | } |
2252 | /* 3) caller mandated alignment */ | 2252 | /* 3) caller mandated alignment */ |
2253 | if (ralign < align) { | 2253 | if (ralign < align) { |
2254 | ralign = align; | 2254 | ralign = align; |
2255 | } | 2255 | } |
2256 | /* disable debug if necessary */ | 2256 | /* disable debug if necessary */ |
2257 | if (ralign > __alignof__(unsigned long long)) | 2257 | if (ralign > __alignof__(unsigned long long)) |
2258 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2258 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
2259 | /* | 2259 | /* |
2260 | * 4) Store it. | 2260 | * 4) Store it. |
2261 | */ | 2261 | */ |
2262 | align = ralign; | 2262 | align = ralign; |
2263 | 2263 | ||
2264 | /* Get cache's description obj. */ | 2264 | /* Get cache's description obj. */ |
2265 | cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); | 2265 | cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); |
2266 | if (!cachep) | 2266 | if (!cachep) |
2267 | goto oops; | 2267 | goto oops; |
2268 | 2268 | ||
2269 | #if DEBUG | 2269 | #if DEBUG |
2270 | cachep->obj_size = size; | 2270 | cachep->obj_size = size; |
2271 | 2271 | ||
2272 | /* | 2272 | /* |
2273 | * Both debugging options require word-alignment which is calculated | 2273 | * Both debugging options require word-alignment which is calculated |
2274 | * into align above. | 2274 | * into align above. |
2275 | */ | 2275 | */ |
2276 | if (flags & SLAB_RED_ZONE) { | 2276 | if (flags & SLAB_RED_ZONE) { |
2277 | /* add space for red zone words */ | 2277 | /* add space for red zone words */ |
2278 | cachep->obj_offset += sizeof(unsigned long long); | 2278 | cachep->obj_offset += sizeof(unsigned long long); |
2279 | size += 2 * sizeof(unsigned long long); | 2279 | size += 2 * sizeof(unsigned long long); |
2280 | } | 2280 | } |
2281 | if (flags & SLAB_STORE_USER) { | 2281 | if (flags & SLAB_STORE_USER) { |
2282 | /* user store requires one word storage behind the end of | 2282 | /* user store requires one word storage behind the end of |
2283 | * the real object. | 2283 | * the real object. |
2284 | */ | 2284 | */ |
2285 | size += BYTES_PER_WORD; | 2285 | size += BYTES_PER_WORD; |
2286 | } | 2286 | } |
2287 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) | 2287 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) |
2288 | if (size >= malloc_sizes[INDEX_L3 + 1].cs_size | 2288 | if (size >= malloc_sizes[INDEX_L3 + 1].cs_size |
2289 | && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) { | 2289 | && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) { |
2290 | cachep->obj_offset += PAGE_SIZE - size; | 2290 | cachep->obj_offset += PAGE_SIZE - size; |
2291 | size = PAGE_SIZE; | 2291 | size = PAGE_SIZE; |
2292 | } | 2292 | } |
2293 | #endif | 2293 | #endif |
2294 | #endif | 2294 | #endif |
2295 | 2295 | ||
2296 | /* | 2296 | /* |
2297 | * Determine if the slab management is 'on' or 'off' slab. | 2297 | * Determine if the slab management is 'on' or 'off' slab. |
2298 | * (bootstrapping cannot cope with offslab caches so don't do | 2298 | * (bootstrapping cannot cope with offslab caches so don't do |
2299 | * it too early on.) | 2299 | * it too early on.) |
2300 | */ | 2300 | */ |
2301 | if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) | 2301 | if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) |
2302 | /* | 2302 | /* |
2303 | * Size is large, assume best to place the slab management obj | 2303 | * Size is large, assume best to place the slab management obj |
2304 | * off-slab (should allow better packing of objs). | 2304 | * off-slab (should allow better packing of objs). |
2305 | */ | 2305 | */ |
2306 | flags |= CFLGS_OFF_SLAB; | 2306 | flags |= CFLGS_OFF_SLAB; |
2307 | 2307 | ||
2308 | size = ALIGN(size, align); | 2308 | size = ALIGN(size, align); |
2309 | 2309 | ||
2310 | left_over = calculate_slab_order(cachep, size, align, flags); | 2310 | left_over = calculate_slab_order(cachep, size, align, flags); |
2311 | 2311 | ||
2312 | if (!cachep->num) { | 2312 | if (!cachep->num) { |
2313 | printk(KERN_ERR | 2313 | printk(KERN_ERR |
2314 | "kmem_cache_create: couldn't create cache %s.\n", name); | 2314 | "kmem_cache_create: couldn't create cache %s.\n", name); |
2315 | kmem_cache_free(&cache_cache, cachep); | 2315 | kmem_cache_free(&cache_cache, cachep); |
2316 | cachep = NULL; | 2316 | cachep = NULL; |
2317 | goto oops; | 2317 | goto oops; |
2318 | } | 2318 | } |
2319 | slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) | 2319 | slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) |
2320 | + sizeof(struct slab), align); | 2320 | + sizeof(struct slab), align); |
2321 | 2321 | ||
2322 | /* | 2322 | /* |
2323 | * If the slab has been placed off-slab, and we have enough space then | 2323 | * If the slab has been placed off-slab, and we have enough space then |
2324 | * move it on-slab. This is at the expense of any extra colouring. | 2324 | * move it on-slab. This is at the expense of any extra colouring. |
2325 | */ | 2325 | */ |
2326 | if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { | 2326 | if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { |
2327 | flags &= ~CFLGS_OFF_SLAB; | 2327 | flags &= ~CFLGS_OFF_SLAB; |
2328 | left_over -= slab_size; | 2328 | left_over -= slab_size; |
2329 | } | 2329 | } |
2330 | 2330 | ||
2331 | if (flags & CFLGS_OFF_SLAB) { | 2331 | if (flags & CFLGS_OFF_SLAB) { |
2332 | /* really off slab. No need for manual alignment */ | 2332 | /* really off slab. No need for manual alignment */ |
2333 | slab_size = | 2333 | slab_size = |
2334 | cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); | 2334 | cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); |
2335 | } | 2335 | } |
2336 | 2336 | ||
2337 | cachep->colour_off = cache_line_size(); | 2337 | cachep->colour_off = cache_line_size(); |
2338 | /* Offset must be a multiple of the alignment. */ | 2338 | /* Offset must be a multiple of the alignment. */ |
2339 | if (cachep->colour_off < align) | 2339 | if (cachep->colour_off < align) |
2340 | cachep->colour_off = align; | 2340 | cachep->colour_off = align; |
2341 | cachep->colour = left_over / cachep->colour_off; | 2341 | cachep->colour = left_over / cachep->colour_off; |
2342 | cachep->slab_size = slab_size; | 2342 | cachep->slab_size = slab_size; |
2343 | cachep->flags = flags; | 2343 | cachep->flags = flags; |
2344 | cachep->gfpflags = 0; | 2344 | cachep->gfpflags = 0; |
2345 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) | 2345 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) |
2346 | cachep->gfpflags |= GFP_DMA; | 2346 | cachep->gfpflags |= GFP_DMA; |
2347 | cachep->buffer_size = size; | 2347 | cachep->buffer_size = size; |
2348 | cachep->reciprocal_buffer_size = reciprocal_value(size); | 2348 | cachep->reciprocal_buffer_size = reciprocal_value(size); |
2349 | 2349 | ||
2350 | if (flags & CFLGS_OFF_SLAB) { | 2350 | if (flags & CFLGS_OFF_SLAB) { |
2351 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); | 2351 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); |
2352 | /* | 2352 | /* |
2353 | * This is a possibility for one of the malloc_sizes caches. | 2353 | * This is a possibility for one of the malloc_sizes caches. |
2354 | * But since we go off slab only for object size greater than | 2354 | * But since we go off slab only for object size greater than |
2355 | * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, | 2355 | * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, |
2356 | * this should not happen at all. | 2356 | * this should not happen at all. |
2357 | * But leave a BUG_ON for some lucky dude. | 2357 | * But leave a BUG_ON for some lucky dude. |
2358 | */ | 2358 | */ |
2359 | BUG_ON(!cachep->slabp_cache); | 2359 | BUG_ON(!cachep->slabp_cache); |
2360 | } | 2360 | } |
2361 | cachep->ctor = ctor; | 2361 | cachep->ctor = ctor; |
2362 | cachep->dtor = dtor; | 2362 | cachep->dtor = dtor; |
2363 | cachep->name = name; | 2363 | cachep->name = name; |
2364 | 2364 | ||
2365 | if (setup_cpu_cache(cachep)) { | 2365 | if (setup_cpu_cache(cachep)) { |
2366 | __kmem_cache_destroy(cachep); | 2366 | __kmem_cache_destroy(cachep); |
2367 | cachep = NULL; | 2367 | cachep = NULL; |
2368 | goto oops; | 2368 | goto oops; |
2369 | } | 2369 | } |
2370 | 2370 | ||
2371 | /* cache setup completed, link it into the list */ | 2371 | /* cache setup completed, link it into the list */ |
2372 | list_add(&cachep->next, &cache_chain); | 2372 | list_add(&cachep->next, &cache_chain); |
2373 | oops: | 2373 | oops: |
2374 | if (!cachep && (flags & SLAB_PANIC)) | 2374 | if (!cachep && (flags & SLAB_PANIC)) |
2375 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2375 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
2376 | name); | 2376 | name); |
2377 | mutex_unlock(&cache_chain_mutex); | 2377 | mutex_unlock(&cache_chain_mutex); |
2378 | return cachep; | 2378 | return cachep; |
2379 | } | 2379 | } |
2380 | EXPORT_SYMBOL(kmem_cache_create); | 2380 | EXPORT_SYMBOL(kmem_cache_create); |
2381 | 2381 | ||
2382 | #if DEBUG | 2382 | #if DEBUG |
2383 | static void check_irq_off(void) | 2383 | static void check_irq_off(void) |
2384 | { | 2384 | { |
2385 | BUG_ON(!irqs_disabled()); | 2385 | BUG_ON(!irqs_disabled()); |
2386 | } | 2386 | } |
2387 | 2387 | ||
2388 | static void check_irq_on(void) | 2388 | static void check_irq_on(void) |
2389 | { | 2389 | { |
2390 | BUG_ON(irqs_disabled()); | 2390 | BUG_ON(irqs_disabled()); |
2391 | } | 2391 | } |
2392 | 2392 | ||
2393 | static void check_spinlock_acquired(struct kmem_cache *cachep) | 2393 | static void check_spinlock_acquired(struct kmem_cache *cachep) |
2394 | { | 2394 | { |
2395 | #ifdef CONFIG_SMP | 2395 | #ifdef CONFIG_SMP |
2396 | check_irq_off(); | 2396 | check_irq_off(); |
2397 | assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); | 2397 | assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); |
2398 | #endif | 2398 | #endif |
2399 | } | 2399 | } |
2400 | 2400 | ||
2401 | static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) | 2401 | static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) |
2402 | { | 2402 | { |
2403 | #ifdef CONFIG_SMP | 2403 | #ifdef CONFIG_SMP |
2404 | check_irq_off(); | 2404 | check_irq_off(); |
2405 | assert_spin_locked(&cachep->nodelists[node]->list_lock); | 2405 | assert_spin_locked(&cachep->nodelists[node]->list_lock); |
2406 | #endif | 2406 | #endif |
2407 | } | 2407 | } |
2408 | 2408 | ||
2409 | #else | 2409 | #else |
2410 | #define check_irq_off() do { } while(0) | 2410 | #define check_irq_off() do { } while(0) |
2411 | #define check_irq_on() do { } while(0) | 2411 | #define check_irq_on() do { } while(0) |
2412 | #define check_spinlock_acquired(x) do { } while(0) | 2412 | #define check_spinlock_acquired(x) do { } while(0) |
2413 | #define check_spinlock_acquired_node(x, y) do { } while(0) | 2413 | #define check_spinlock_acquired_node(x, y) do { } while(0) |
2414 | #endif | 2414 | #endif |
2415 | 2415 | ||
2416 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | 2416 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
2417 | struct array_cache *ac, | 2417 | struct array_cache *ac, |
2418 | int force, int node); | 2418 | int force, int node); |
2419 | 2419 | ||
2420 | static void do_drain(void *arg) | 2420 | static void do_drain(void *arg) |
2421 | { | 2421 | { |
2422 | struct kmem_cache *cachep = arg; | 2422 | struct kmem_cache *cachep = arg; |
2423 | struct array_cache *ac; | 2423 | struct array_cache *ac; |
2424 | int node = numa_node_id(); | 2424 | int node = numa_node_id(); |
2425 | 2425 | ||
2426 | check_irq_off(); | 2426 | check_irq_off(); |
2427 | ac = cpu_cache_get(cachep); | 2427 | ac = cpu_cache_get(cachep); |
2428 | spin_lock(&cachep->nodelists[node]->list_lock); | 2428 | spin_lock(&cachep->nodelists[node]->list_lock); |
2429 | free_block(cachep, ac->entry, ac->avail, node); | 2429 | free_block(cachep, ac->entry, ac->avail, node); |
2430 | spin_unlock(&cachep->nodelists[node]->list_lock); | 2430 | spin_unlock(&cachep->nodelists[node]->list_lock); |
2431 | ac->avail = 0; | 2431 | ac->avail = 0; |
2432 | } | 2432 | } |
2433 | 2433 | ||
2434 | static void drain_cpu_caches(struct kmem_cache *cachep) | 2434 | static void drain_cpu_caches(struct kmem_cache *cachep) |
2435 | { | 2435 | { |
2436 | struct kmem_list3 *l3; | 2436 | struct kmem_list3 *l3; |
2437 | int node; | 2437 | int node; |
2438 | 2438 | ||
2439 | on_each_cpu(do_drain, cachep, 1, 1); | 2439 | on_each_cpu(do_drain, cachep, 1, 1); |
2440 | check_irq_on(); | 2440 | check_irq_on(); |
2441 | for_each_online_node(node) { | 2441 | for_each_online_node(node) { |
2442 | l3 = cachep->nodelists[node]; | 2442 | l3 = cachep->nodelists[node]; |
2443 | if (l3 && l3->alien) | 2443 | if (l3 && l3->alien) |
2444 | drain_alien_cache(cachep, l3->alien); | 2444 | drain_alien_cache(cachep, l3->alien); |
2445 | } | 2445 | } |
2446 | 2446 | ||
2447 | for_each_online_node(node) { | 2447 | for_each_online_node(node) { |
2448 | l3 = cachep->nodelists[node]; | 2448 | l3 = cachep->nodelists[node]; |
2449 | if (l3) | 2449 | if (l3) |
2450 | drain_array(cachep, l3, l3->shared, 1, node); | 2450 | drain_array(cachep, l3, l3->shared, 1, node); |
2451 | } | 2451 | } |
2452 | } | 2452 | } |
2453 | 2453 | ||
2454 | /* | 2454 | /* |
2455 | * Remove slabs from the list of free slabs. | 2455 | * Remove slabs from the list of free slabs. |
2456 | * Specify the number of slabs to drain in tofree. | 2456 | * Specify the number of slabs to drain in tofree. |
2457 | * | 2457 | * |
2458 | * Returns the actual number of slabs released. | 2458 | * Returns the actual number of slabs released. |
2459 | */ | 2459 | */ |
2460 | static int drain_freelist(struct kmem_cache *cache, | 2460 | static int drain_freelist(struct kmem_cache *cache, |
2461 | struct kmem_list3 *l3, int tofree) | 2461 | struct kmem_list3 *l3, int tofree) |
2462 | { | 2462 | { |
2463 | struct list_head *p; | 2463 | struct list_head *p; |
2464 | int nr_freed; | 2464 | int nr_freed; |
2465 | struct slab *slabp; | 2465 | struct slab *slabp; |
2466 | 2466 | ||
2467 | nr_freed = 0; | 2467 | nr_freed = 0; |
2468 | while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { | 2468 | while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { |
2469 | 2469 | ||
2470 | spin_lock_irq(&l3->list_lock); | 2470 | spin_lock_irq(&l3->list_lock); |
2471 | p = l3->slabs_free.prev; | 2471 | p = l3->slabs_free.prev; |
2472 | if (p == &l3->slabs_free) { | 2472 | if (p == &l3->slabs_free) { |
2473 | spin_unlock_irq(&l3->list_lock); | 2473 | spin_unlock_irq(&l3->list_lock); |
2474 | goto out; | 2474 | goto out; |
2475 | } | 2475 | } |
2476 | 2476 | ||
2477 | slabp = list_entry(p, struct slab, list); | 2477 | slabp = list_entry(p, struct slab, list); |
2478 | #if DEBUG | 2478 | #if DEBUG |
2479 | BUG_ON(slabp->inuse); | 2479 | BUG_ON(slabp->inuse); |
2480 | #endif | 2480 | #endif |
2481 | list_del(&slabp->list); | 2481 | list_del(&slabp->list); |
2482 | /* | 2482 | /* |
2483 | * Safe to drop the lock. The slab is no longer linked | 2483 | * Safe to drop the lock. The slab is no longer linked |
2484 | * to the cache. | 2484 | * to the cache. |
2485 | */ | 2485 | */ |
2486 | l3->free_objects -= cache->num; | 2486 | l3->free_objects -= cache->num; |
2487 | spin_unlock_irq(&l3->list_lock); | 2487 | spin_unlock_irq(&l3->list_lock); |
2488 | slab_destroy(cache, slabp); | 2488 | slab_destroy(cache, slabp); |
2489 | nr_freed++; | 2489 | nr_freed++; |
2490 | } | 2490 | } |
2491 | out: | 2491 | out: |
2492 | return nr_freed; | 2492 | return nr_freed; |
2493 | } | 2493 | } |
2494 | 2494 | ||
2495 | /* Called with cache_chain_mutex held to protect against cpu hotplug */ | 2495 | /* Called with cache_chain_mutex held to protect against cpu hotplug */ |
2496 | static int __cache_shrink(struct kmem_cache *cachep) | 2496 | static int __cache_shrink(struct kmem_cache *cachep) |
2497 | { | 2497 | { |
2498 | int ret = 0, i = 0; | 2498 | int ret = 0, i = 0; |
2499 | struct kmem_list3 *l3; | 2499 | struct kmem_list3 *l3; |
2500 | 2500 | ||
2501 | drain_cpu_caches(cachep); | 2501 | drain_cpu_caches(cachep); |
2502 | 2502 | ||
2503 | check_irq_on(); | 2503 | check_irq_on(); |
2504 | for_each_online_node(i) { | 2504 | for_each_online_node(i) { |
2505 | l3 = cachep->nodelists[i]; | 2505 | l3 = cachep->nodelists[i]; |
2506 | if (!l3) | 2506 | if (!l3) |
2507 | continue; | 2507 | continue; |
2508 | 2508 | ||
2509 | drain_freelist(cachep, l3, l3->free_objects); | 2509 | drain_freelist(cachep, l3, l3->free_objects); |
2510 | 2510 | ||
2511 | ret += !list_empty(&l3->slabs_full) || | 2511 | ret += !list_empty(&l3->slabs_full) || |
2512 | !list_empty(&l3->slabs_partial); | 2512 | !list_empty(&l3->slabs_partial); |
2513 | } | 2513 | } |
2514 | return (ret ? 1 : 0); | 2514 | return (ret ? 1 : 0); |
2515 | } | 2515 | } |
2516 | 2516 | ||
2517 | /** | 2517 | /** |
2518 | * kmem_cache_shrink - Shrink a cache. | 2518 | * kmem_cache_shrink - Shrink a cache. |
2519 | * @cachep: The cache to shrink. | 2519 | * @cachep: The cache to shrink. |
2520 | * | 2520 | * |
2521 | * Releases as many slabs as possible for a cache. | 2521 | * Releases as many slabs as possible for a cache. |
2522 | * To help debugging, a zero exit status indicates all slabs were released. | 2522 | * To help debugging, a zero exit status indicates all slabs were released. |
2523 | */ | 2523 | */ |
2524 | int kmem_cache_shrink(struct kmem_cache *cachep) | 2524 | int kmem_cache_shrink(struct kmem_cache *cachep) |
2525 | { | 2525 | { |
2526 | int ret; | 2526 | int ret; |
2527 | BUG_ON(!cachep || in_interrupt()); | 2527 | BUG_ON(!cachep || in_interrupt()); |
2528 | 2528 | ||
2529 | mutex_lock(&cache_chain_mutex); | 2529 | mutex_lock(&cache_chain_mutex); |
2530 | ret = __cache_shrink(cachep); | 2530 | ret = __cache_shrink(cachep); |
2531 | mutex_unlock(&cache_chain_mutex); | 2531 | mutex_unlock(&cache_chain_mutex); |
2532 | return ret; | 2532 | return ret; |
2533 | } | 2533 | } |
2534 | EXPORT_SYMBOL(kmem_cache_shrink); | 2534 | EXPORT_SYMBOL(kmem_cache_shrink); |
2535 | 2535 | ||
2536 | /** | 2536 | /** |
2537 | * kmem_cache_destroy - delete a cache | 2537 | * kmem_cache_destroy - delete a cache |
2538 | * @cachep: the cache to destroy | 2538 | * @cachep: the cache to destroy |
2539 | * | 2539 | * |
2540 | * Remove a &struct kmem_cache object from the slab cache. | 2540 | * Remove a &struct kmem_cache object from the slab cache. |
2541 | * | 2541 | * |
2542 | * It is expected this function will be called by a module when it is | 2542 | * It is expected this function will be called by a module when it is |
2543 | * unloaded. This will remove the cache completely, and avoid a duplicate | 2543 | * unloaded. This will remove the cache completely, and avoid a duplicate |
2544 | * cache being allocated each time a module is loaded and unloaded, if the | 2544 | * cache being allocated each time a module is loaded and unloaded, if the |
2545 | * module doesn't have persistent in-kernel storage across loads and unloads. | 2545 | * module doesn't have persistent in-kernel storage across loads and unloads. |
2546 | * | 2546 | * |
2547 | * The cache must be empty before calling this function. | 2547 | * The cache must be empty before calling this function. |
2548 | * | 2548 | * |
2549 | * The caller must guarantee that noone will allocate memory from the cache | 2549 | * The caller must guarantee that noone will allocate memory from the cache |
2550 | * during the kmem_cache_destroy(). | 2550 | * during the kmem_cache_destroy(). |
2551 | */ | 2551 | */ |
2552 | void kmem_cache_destroy(struct kmem_cache *cachep) | 2552 | void kmem_cache_destroy(struct kmem_cache *cachep) |
2553 | { | 2553 | { |
2554 | BUG_ON(!cachep || in_interrupt()); | 2554 | BUG_ON(!cachep || in_interrupt()); |
2555 | 2555 | ||
2556 | /* Find the cache in the chain of caches. */ | 2556 | /* Find the cache in the chain of caches. */ |
2557 | mutex_lock(&cache_chain_mutex); | 2557 | mutex_lock(&cache_chain_mutex); |
2558 | /* | 2558 | /* |
2559 | * the chain is never empty, cache_cache is never destroyed | 2559 | * the chain is never empty, cache_cache is never destroyed |
2560 | */ | 2560 | */ |
2561 | list_del(&cachep->next); | 2561 | list_del(&cachep->next); |
2562 | if (__cache_shrink(cachep)) { | 2562 | if (__cache_shrink(cachep)) { |
2563 | slab_error(cachep, "Can't free all objects"); | 2563 | slab_error(cachep, "Can't free all objects"); |
2564 | list_add(&cachep->next, &cache_chain); | 2564 | list_add(&cachep->next, &cache_chain); |
2565 | mutex_unlock(&cache_chain_mutex); | 2565 | mutex_unlock(&cache_chain_mutex); |
2566 | return; | 2566 | return; |
2567 | } | 2567 | } |
2568 | 2568 | ||
2569 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) | 2569 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) |
2570 | synchronize_rcu(); | 2570 | synchronize_rcu(); |
2571 | 2571 | ||
2572 | __kmem_cache_destroy(cachep); | 2572 | __kmem_cache_destroy(cachep); |
2573 | mutex_unlock(&cache_chain_mutex); | 2573 | mutex_unlock(&cache_chain_mutex); |
2574 | } | 2574 | } |
2575 | EXPORT_SYMBOL(kmem_cache_destroy); | 2575 | EXPORT_SYMBOL(kmem_cache_destroy); |
2576 | 2576 | ||
2577 | /* | 2577 | /* |
2578 | * Get the memory for a slab management obj. | 2578 | * Get the memory for a slab management obj. |
2579 | * For a slab cache when the slab descriptor is off-slab, slab descriptors | 2579 | * For a slab cache when the slab descriptor is off-slab, slab descriptors |
2580 | * always come from malloc_sizes caches. The slab descriptor cannot | 2580 | * always come from malloc_sizes caches. The slab descriptor cannot |
2581 | * come from the same cache which is getting created because, | 2581 | * come from the same cache which is getting created because, |
2582 | * when we are searching for an appropriate cache for these | 2582 | * when we are searching for an appropriate cache for these |
2583 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. | 2583 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. |
2584 | * If we are creating a malloc_sizes cache here it would not be visible to | 2584 | * If we are creating a malloc_sizes cache here it would not be visible to |
2585 | * kmem_find_general_cachep till the initialization is complete. | 2585 | * kmem_find_general_cachep till the initialization is complete. |
2586 | * Hence we cannot have slabp_cache same as the original cache. | 2586 | * Hence we cannot have slabp_cache same as the original cache. |
2587 | */ | 2587 | */ |
2588 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | 2588 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, |
2589 | int colour_off, gfp_t local_flags, | 2589 | int colour_off, gfp_t local_flags, |
2590 | int nodeid) | 2590 | int nodeid) |
2591 | { | 2591 | { |
2592 | struct slab *slabp; | 2592 | struct slab *slabp; |
2593 | 2593 | ||
2594 | if (OFF_SLAB(cachep)) { | 2594 | if (OFF_SLAB(cachep)) { |
2595 | /* Slab management obj is off-slab. */ | 2595 | /* Slab management obj is off-slab. */ |
2596 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, | 2596 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, |
2597 | local_flags & ~GFP_THISNODE, nodeid); | 2597 | local_flags & ~GFP_THISNODE, nodeid); |
2598 | if (!slabp) | 2598 | if (!slabp) |
2599 | return NULL; | 2599 | return NULL; |
2600 | } else { | 2600 | } else { |
2601 | slabp = objp + colour_off; | 2601 | slabp = objp + colour_off; |
2602 | colour_off += cachep->slab_size; | 2602 | colour_off += cachep->slab_size; |
2603 | } | 2603 | } |
2604 | slabp->inuse = 0; | 2604 | slabp->inuse = 0; |
2605 | slabp->colouroff = colour_off; | 2605 | slabp->colouroff = colour_off; |
2606 | slabp->s_mem = objp + colour_off; | 2606 | slabp->s_mem = objp + colour_off; |
2607 | slabp->nodeid = nodeid; | 2607 | slabp->nodeid = nodeid; |
2608 | return slabp; | 2608 | return slabp; |
2609 | } | 2609 | } |
2610 | 2610 | ||
2611 | static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) | 2611 | static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) |
2612 | { | 2612 | { |
2613 | return (kmem_bufctl_t *) (slabp + 1); | 2613 | return (kmem_bufctl_t *) (slabp + 1); |
2614 | } | 2614 | } |
2615 | 2615 | ||
2616 | static void cache_init_objs(struct kmem_cache *cachep, | 2616 | static void cache_init_objs(struct kmem_cache *cachep, |
2617 | struct slab *slabp, unsigned long ctor_flags) | 2617 | struct slab *slabp, unsigned long ctor_flags) |
2618 | { | 2618 | { |
2619 | int i; | 2619 | int i; |
2620 | 2620 | ||
2621 | for (i = 0; i < cachep->num; i++) { | 2621 | for (i = 0; i < cachep->num; i++) { |
2622 | void *objp = index_to_obj(cachep, slabp, i); | 2622 | void *objp = index_to_obj(cachep, slabp, i); |
2623 | #if DEBUG | 2623 | #if DEBUG |
2624 | /* need to poison the objs? */ | 2624 | /* need to poison the objs? */ |
2625 | if (cachep->flags & SLAB_POISON) | 2625 | if (cachep->flags & SLAB_POISON) |
2626 | poison_obj(cachep, objp, POISON_FREE); | 2626 | poison_obj(cachep, objp, POISON_FREE); |
2627 | if (cachep->flags & SLAB_STORE_USER) | 2627 | if (cachep->flags & SLAB_STORE_USER) |
2628 | *dbg_userword(cachep, objp) = NULL; | 2628 | *dbg_userword(cachep, objp) = NULL; |
2629 | 2629 | ||
2630 | if (cachep->flags & SLAB_RED_ZONE) { | 2630 | if (cachep->flags & SLAB_RED_ZONE) { |
2631 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; | 2631 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; |
2632 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2632 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
2633 | } | 2633 | } |
2634 | /* | 2634 | /* |
2635 | * Constructors are not allowed to allocate memory from the same | 2635 | * Constructors are not allowed to allocate memory from the same |
2636 | * cache which they are a constructor for. Otherwise, deadlock. | 2636 | * cache which they are a constructor for. Otherwise, deadlock. |
2637 | * They must also be threaded. | 2637 | * They must also be threaded. |
2638 | */ | 2638 | */ |
2639 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) | 2639 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) |
2640 | cachep->ctor(objp + obj_offset(cachep), cachep, | 2640 | cachep->ctor(objp + obj_offset(cachep), cachep, |
2641 | ctor_flags); | 2641 | ctor_flags); |
2642 | 2642 | ||
2643 | if (cachep->flags & SLAB_RED_ZONE) { | 2643 | if (cachep->flags & SLAB_RED_ZONE) { |
2644 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) | 2644 | if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) |
2645 | slab_error(cachep, "constructor overwrote the" | 2645 | slab_error(cachep, "constructor overwrote the" |
2646 | " end of an object"); | 2646 | " end of an object"); |
2647 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) | 2647 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) |
2648 | slab_error(cachep, "constructor overwrote the" | 2648 | slab_error(cachep, "constructor overwrote the" |
2649 | " start of an object"); | 2649 | " start of an object"); |
2650 | } | 2650 | } |
2651 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && | 2651 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && |
2652 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) | 2652 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) |
2653 | kernel_map_pages(virt_to_page(objp), | 2653 | kernel_map_pages(virt_to_page(objp), |
2654 | cachep->buffer_size / PAGE_SIZE, 0); | 2654 | cachep->buffer_size / PAGE_SIZE, 0); |
2655 | #else | 2655 | #else |
2656 | if (cachep->ctor) | 2656 | if (cachep->ctor) |
2657 | cachep->ctor(objp, cachep, ctor_flags); | 2657 | cachep->ctor(objp, cachep, ctor_flags); |
2658 | #endif | 2658 | #endif |
2659 | slab_bufctl(slabp)[i] = i + 1; | 2659 | slab_bufctl(slabp)[i] = i + 1; |
2660 | } | 2660 | } |
2661 | slab_bufctl(slabp)[i - 1] = BUFCTL_END; | 2661 | slab_bufctl(slabp)[i - 1] = BUFCTL_END; |
2662 | slabp->free = 0; | 2662 | slabp->free = 0; |
2663 | } | 2663 | } |
2664 | 2664 | ||
2665 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2665 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
2666 | { | 2666 | { |
2667 | if (CONFIG_ZONE_DMA_FLAG) { | 2667 | if (CONFIG_ZONE_DMA_FLAG) { |
2668 | if (flags & GFP_DMA) | 2668 | if (flags & GFP_DMA) |
2669 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); | 2669 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); |
2670 | else | 2670 | else |
2671 | BUG_ON(cachep->gfpflags & GFP_DMA); | 2671 | BUG_ON(cachep->gfpflags & GFP_DMA); |
2672 | } | 2672 | } |
2673 | } | 2673 | } |
2674 | 2674 | ||
2675 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, | 2675 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, |
2676 | int nodeid) | 2676 | int nodeid) |
2677 | { | 2677 | { |
2678 | void *objp = index_to_obj(cachep, slabp, slabp->free); | 2678 | void *objp = index_to_obj(cachep, slabp, slabp->free); |
2679 | kmem_bufctl_t next; | 2679 | kmem_bufctl_t next; |
2680 | 2680 | ||
2681 | slabp->inuse++; | 2681 | slabp->inuse++; |
2682 | next = slab_bufctl(slabp)[slabp->free]; | 2682 | next = slab_bufctl(slabp)[slabp->free]; |
2683 | #if DEBUG | 2683 | #if DEBUG |
2684 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; | 2684 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; |
2685 | WARN_ON(slabp->nodeid != nodeid); | 2685 | WARN_ON(slabp->nodeid != nodeid); |
2686 | #endif | 2686 | #endif |
2687 | slabp->free = next; | 2687 | slabp->free = next; |
2688 | 2688 | ||
2689 | return objp; | 2689 | return objp; |
2690 | } | 2690 | } |
2691 | 2691 | ||
2692 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, | 2692 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, |
2693 | void *objp, int nodeid) | 2693 | void *objp, int nodeid) |
2694 | { | 2694 | { |
2695 | unsigned int objnr = obj_to_index(cachep, slabp, objp); | 2695 | unsigned int objnr = obj_to_index(cachep, slabp, objp); |
2696 | 2696 | ||
2697 | #if DEBUG | 2697 | #if DEBUG |
2698 | /* Verify that the slab belongs to the intended node */ | 2698 | /* Verify that the slab belongs to the intended node */ |
2699 | WARN_ON(slabp->nodeid != nodeid); | 2699 | WARN_ON(slabp->nodeid != nodeid); |
2700 | 2700 | ||
2701 | if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { | 2701 | if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { |
2702 | printk(KERN_ERR "slab: double free detected in cache " | 2702 | printk(KERN_ERR "slab: double free detected in cache " |
2703 | "'%s', objp %p\n", cachep->name, objp); | 2703 | "'%s', objp %p\n", cachep->name, objp); |
2704 | BUG(); | 2704 | BUG(); |
2705 | } | 2705 | } |
2706 | #endif | 2706 | #endif |
2707 | slab_bufctl(slabp)[objnr] = slabp->free; | 2707 | slab_bufctl(slabp)[objnr] = slabp->free; |
2708 | slabp->free = objnr; | 2708 | slabp->free = objnr; |
2709 | slabp->inuse--; | 2709 | slabp->inuse--; |
2710 | } | 2710 | } |
2711 | 2711 | ||
2712 | /* | 2712 | /* |
2713 | * Map pages beginning at addr to the given cache and slab. This is required | 2713 | * Map pages beginning at addr to the given cache and slab. This is required |
2714 | * for the slab allocator to be able to lookup the cache and slab of a | 2714 | * for the slab allocator to be able to lookup the cache and slab of a |
2715 | * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging. | 2715 | * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging. |
2716 | */ | 2716 | */ |
2717 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | 2717 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, |
2718 | void *addr) | 2718 | void *addr) |
2719 | { | 2719 | { |
2720 | int nr_pages; | 2720 | int nr_pages; |
2721 | struct page *page; | 2721 | struct page *page; |
2722 | 2722 | ||
2723 | page = virt_to_page(addr); | 2723 | page = virt_to_page(addr); |
2724 | 2724 | ||
2725 | nr_pages = 1; | 2725 | nr_pages = 1; |
2726 | if (likely(!PageCompound(page))) | 2726 | if (likely(!PageCompound(page))) |
2727 | nr_pages <<= cache->gfporder; | 2727 | nr_pages <<= cache->gfporder; |
2728 | 2728 | ||
2729 | do { | 2729 | do { |
2730 | page_set_cache(page, cache); | 2730 | page_set_cache(page, cache); |
2731 | page_set_slab(page, slab); | 2731 | page_set_slab(page, slab); |
2732 | page++; | 2732 | page++; |
2733 | } while (--nr_pages); | 2733 | } while (--nr_pages); |
2734 | } | 2734 | } |
2735 | 2735 | ||
2736 | /* | 2736 | /* |
2737 | * Grow (by 1) the number of slabs within a cache. This is called by | 2737 | * Grow (by 1) the number of slabs within a cache. This is called by |
2738 | * kmem_cache_alloc() when there are no active objs left in a cache. | 2738 | * kmem_cache_alloc() when there are no active objs left in a cache. |
2739 | */ | 2739 | */ |
2740 | static int cache_grow(struct kmem_cache *cachep, | 2740 | static int cache_grow(struct kmem_cache *cachep, |
2741 | gfp_t flags, int nodeid, void *objp) | 2741 | gfp_t flags, int nodeid, void *objp) |
2742 | { | 2742 | { |
2743 | struct slab *slabp; | 2743 | struct slab *slabp; |
2744 | size_t offset; | 2744 | size_t offset; |
2745 | gfp_t local_flags; | 2745 | gfp_t local_flags; |
2746 | unsigned long ctor_flags; | 2746 | unsigned long ctor_flags; |
2747 | struct kmem_list3 *l3; | 2747 | struct kmem_list3 *l3; |
2748 | 2748 | ||
2749 | /* | 2749 | /* |
2750 | * Be lazy and only check for valid flags here, keeping it out of the | 2750 | * Be lazy and only check for valid flags here, keeping it out of the |
2751 | * critical path in kmem_cache_alloc(). | 2751 | * critical path in kmem_cache_alloc(). |
2752 | */ | 2752 | */ |
2753 | BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK)); | 2753 | BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK)); |
2754 | 2754 | ||
2755 | ctor_flags = SLAB_CTOR_CONSTRUCTOR; | 2755 | ctor_flags = SLAB_CTOR_CONSTRUCTOR; |
2756 | local_flags = (flags & GFP_LEVEL_MASK); | 2756 | local_flags = (flags & GFP_LEVEL_MASK); |
2757 | /* Take the l3 list lock to change the colour_next on this node */ | 2757 | /* Take the l3 list lock to change the colour_next on this node */ |
2758 | check_irq_off(); | 2758 | check_irq_off(); |
2759 | l3 = cachep->nodelists[nodeid]; | 2759 | l3 = cachep->nodelists[nodeid]; |
2760 | spin_lock(&l3->list_lock); | 2760 | spin_lock(&l3->list_lock); |
2761 | 2761 | ||
2762 | /* Get colour for the slab, and cal the next value. */ | 2762 | /* Get colour for the slab, and cal the next value. */ |
2763 | offset = l3->colour_next; | 2763 | offset = l3->colour_next; |
2764 | l3->colour_next++; | 2764 | l3->colour_next++; |
2765 | if (l3->colour_next >= cachep->colour) | 2765 | if (l3->colour_next >= cachep->colour) |
2766 | l3->colour_next = 0; | 2766 | l3->colour_next = 0; |
2767 | spin_unlock(&l3->list_lock); | 2767 | spin_unlock(&l3->list_lock); |
2768 | 2768 | ||
2769 | offset *= cachep->colour_off; | 2769 | offset *= cachep->colour_off; |
2770 | 2770 | ||
2771 | if (local_flags & __GFP_WAIT) | 2771 | if (local_flags & __GFP_WAIT) |
2772 | local_irq_enable(); | 2772 | local_irq_enable(); |
2773 | 2773 | ||
2774 | /* | 2774 | /* |
2775 | * The test for missing atomic flag is performed here, rather than | 2775 | * The test for missing atomic flag is performed here, rather than |
2776 | * the more obvious place, simply to reduce the critical path length | 2776 | * the more obvious place, simply to reduce the critical path length |
2777 | * in kmem_cache_alloc(). If a caller is seriously mis-behaving they | 2777 | * in kmem_cache_alloc(). If a caller is seriously mis-behaving they |
2778 | * will eventually be caught here (where it matters). | 2778 | * will eventually be caught here (where it matters). |
2779 | */ | 2779 | */ |
2780 | kmem_flagcheck(cachep, flags); | 2780 | kmem_flagcheck(cachep, flags); |
2781 | 2781 | ||
2782 | /* | 2782 | /* |
2783 | * Get mem for the objs. Attempt to allocate a physical page from | 2783 | * Get mem for the objs. Attempt to allocate a physical page from |
2784 | * 'nodeid'. | 2784 | * 'nodeid'. |
2785 | */ | 2785 | */ |
2786 | if (!objp) | 2786 | if (!objp) |
2787 | objp = kmem_getpages(cachep, flags, nodeid); | 2787 | objp = kmem_getpages(cachep, flags, nodeid); |
2788 | if (!objp) | 2788 | if (!objp) |
2789 | goto failed; | 2789 | goto failed; |
2790 | 2790 | ||
2791 | /* Get slab management. */ | 2791 | /* Get slab management. */ |
2792 | slabp = alloc_slabmgmt(cachep, objp, offset, | 2792 | slabp = alloc_slabmgmt(cachep, objp, offset, |
2793 | local_flags & ~GFP_THISNODE, nodeid); | 2793 | local_flags & ~GFP_THISNODE, nodeid); |
2794 | if (!slabp) | 2794 | if (!slabp) |
2795 | goto opps1; | 2795 | goto opps1; |
2796 | 2796 | ||
2797 | slabp->nodeid = nodeid; | 2797 | slabp->nodeid = nodeid; |
2798 | slab_map_pages(cachep, slabp, objp); | 2798 | slab_map_pages(cachep, slabp, objp); |
2799 | 2799 | ||
2800 | cache_init_objs(cachep, slabp, ctor_flags); | 2800 | cache_init_objs(cachep, slabp, ctor_flags); |
2801 | 2801 | ||
2802 | if (local_flags & __GFP_WAIT) | 2802 | if (local_flags & __GFP_WAIT) |
2803 | local_irq_disable(); | 2803 | local_irq_disable(); |
2804 | check_irq_off(); | 2804 | check_irq_off(); |
2805 | spin_lock(&l3->list_lock); | 2805 | spin_lock(&l3->list_lock); |
2806 | 2806 | ||
2807 | /* Make slab active. */ | 2807 | /* Make slab active. */ |
2808 | list_add_tail(&slabp->list, &(l3->slabs_free)); | 2808 | list_add_tail(&slabp->list, &(l3->slabs_free)); |
2809 | STATS_INC_GROWN(cachep); | 2809 | STATS_INC_GROWN(cachep); |
2810 | l3->free_objects += cachep->num; | 2810 | l3->free_objects += cachep->num; |
2811 | spin_unlock(&l3->list_lock); | 2811 | spin_unlock(&l3->list_lock); |
2812 | return 1; | 2812 | return 1; |
2813 | opps1: | 2813 | opps1: |
2814 | kmem_freepages(cachep, objp); | 2814 | kmem_freepages(cachep, objp); |
2815 | failed: | 2815 | failed: |
2816 | if (local_flags & __GFP_WAIT) | 2816 | if (local_flags & __GFP_WAIT) |
2817 | local_irq_disable(); | 2817 | local_irq_disable(); |
2818 | return 0; | 2818 | return 0; |
2819 | } | 2819 | } |
2820 | 2820 | ||
2821 | #if DEBUG | 2821 | #if DEBUG |
2822 | 2822 | ||
2823 | /* | 2823 | /* |
2824 | * Perform extra freeing checks: | 2824 | * Perform extra freeing checks: |
2825 | * - detect bad pointers. | 2825 | * - detect bad pointers. |
2826 | * - POISON/RED_ZONE checking | 2826 | * - POISON/RED_ZONE checking |
2827 | * - destructor calls, for caches with POISON+dtor | 2827 | * - destructor calls, for caches with POISON+dtor |
2828 | */ | 2828 | */ |
2829 | static void kfree_debugcheck(const void *objp) | 2829 | static void kfree_debugcheck(const void *objp) |
2830 | { | 2830 | { |
2831 | if (!virt_addr_valid(objp)) { | 2831 | if (!virt_addr_valid(objp)) { |
2832 | printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", | 2832 | printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", |
2833 | (unsigned long)objp); | 2833 | (unsigned long)objp); |
2834 | BUG(); | 2834 | BUG(); |
2835 | } | 2835 | } |
2836 | } | 2836 | } |
2837 | 2837 | ||
2838 | static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) | 2838 | static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) |
2839 | { | 2839 | { |
2840 | unsigned long long redzone1, redzone2; | 2840 | unsigned long long redzone1, redzone2; |
2841 | 2841 | ||
2842 | redzone1 = *dbg_redzone1(cache, obj); | 2842 | redzone1 = *dbg_redzone1(cache, obj); |
2843 | redzone2 = *dbg_redzone2(cache, obj); | 2843 | redzone2 = *dbg_redzone2(cache, obj); |
2844 | 2844 | ||
2845 | /* | 2845 | /* |
2846 | * Redzone is ok. | 2846 | * Redzone is ok. |
2847 | */ | 2847 | */ |
2848 | if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE) | 2848 | if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE) |
2849 | return; | 2849 | return; |
2850 | 2850 | ||
2851 | if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE) | 2851 | if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE) |
2852 | slab_error(cache, "double free detected"); | 2852 | slab_error(cache, "double free detected"); |
2853 | else | 2853 | else |
2854 | slab_error(cache, "memory outside object was overwritten"); | 2854 | slab_error(cache, "memory outside object was overwritten"); |
2855 | 2855 | ||
2856 | printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n", | 2856 | printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n", |
2857 | obj, redzone1, redzone2); | 2857 | obj, redzone1, redzone2); |
2858 | } | 2858 | } |
2859 | 2859 | ||
2860 | static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | 2860 | static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, |
2861 | void *caller) | 2861 | void *caller) |
2862 | { | 2862 | { |
2863 | struct page *page; | 2863 | struct page *page; |
2864 | unsigned int objnr; | 2864 | unsigned int objnr; |
2865 | struct slab *slabp; | 2865 | struct slab *slabp; |
2866 | 2866 | ||
2867 | objp -= obj_offset(cachep); | 2867 | objp -= obj_offset(cachep); |
2868 | kfree_debugcheck(objp); | 2868 | kfree_debugcheck(objp); |
2869 | page = virt_to_head_page(objp); | 2869 | page = virt_to_head_page(objp); |
2870 | 2870 | ||
2871 | slabp = page_get_slab(page); | 2871 | slabp = page_get_slab(page); |
2872 | 2872 | ||
2873 | if (cachep->flags & SLAB_RED_ZONE) { | 2873 | if (cachep->flags & SLAB_RED_ZONE) { |
2874 | verify_redzone_free(cachep, objp); | 2874 | verify_redzone_free(cachep, objp); |
2875 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; | 2875 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; |
2876 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2876 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
2877 | } | 2877 | } |
2878 | if (cachep->flags & SLAB_STORE_USER) | 2878 | if (cachep->flags & SLAB_STORE_USER) |
2879 | *dbg_userword(cachep, objp) = caller; | 2879 | *dbg_userword(cachep, objp) = caller; |
2880 | 2880 | ||
2881 | objnr = obj_to_index(cachep, slabp, objp); | 2881 | objnr = obj_to_index(cachep, slabp, objp); |
2882 | 2882 | ||
2883 | BUG_ON(objnr >= cachep->num); | 2883 | BUG_ON(objnr >= cachep->num); |
2884 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); | 2884 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); |
2885 | 2885 | ||
2886 | if (cachep->flags & SLAB_POISON && cachep->dtor) { | 2886 | if (cachep->flags & SLAB_POISON && cachep->dtor) { |
2887 | /* we want to cache poison the object, | 2887 | /* we want to cache poison the object, |
2888 | * call the destruction callback | 2888 | * call the destruction callback |
2889 | */ | 2889 | */ |
2890 | cachep->dtor(objp + obj_offset(cachep), cachep, 0); | 2890 | cachep->dtor(objp + obj_offset(cachep), cachep, 0); |
2891 | } | 2891 | } |
2892 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 2892 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
2893 | slab_bufctl(slabp)[objnr] = BUFCTL_FREE; | 2893 | slab_bufctl(slabp)[objnr] = BUFCTL_FREE; |
2894 | #endif | 2894 | #endif |
2895 | if (cachep->flags & SLAB_POISON) { | 2895 | if (cachep->flags & SLAB_POISON) { |
2896 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2896 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2897 | if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { | 2897 | if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
2898 | store_stackinfo(cachep, objp, (unsigned long)caller); | 2898 | store_stackinfo(cachep, objp, (unsigned long)caller); |
2899 | kernel_map_pages(virt_to_page(objp), | 2899 | kernel_map_pages(virt_to_page(objp), |
2900 | cachep->buffer_size / PAGE_SIZE, 0); | 2900 | cachep->buffer_size / PAGE_SIZE, 0); |
2901 | } else { | 2901 | } else { |
2902 | poison_obj(cachep, objp, POISON_FREE); | 2902 | poison_obj(cachep, objp, POISON_FREE); |
2903 | } | 2903 | } |
2904 | #else | 2904 | #else |
2905 | poison_obj(cachep, objp, POISON_FREE); | 2905 | poison_obj(cachep, objp, POISON_FREE); |
2906 | #endif | 2906 | #endif |
2907 | } | 2907 | } |
2908 | return objp; | 2908 | return objp; |
2909 | } | 2909 | } |
2910 | 2910 | ||
2911 | static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | 2911 | static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) |
2912 | { | 2912 | { |
2913 | kmem_bufctl_t i; | 2913 | kmem_bufctl_t i; |
2914 | int entries = 0; | 2914 | int entries = 0; |
2915 | 2915 | ||
2916 | /* Check slab's freelist to see if this obj is there. */ | 2916 | /* Check slab's freelist to see if this obj is there. */ |
2917 | for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { | 2917 | for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { |
2918 | entries++; | 2918 | entries++; |
2919 | if (entries > cachep->num || i >= cachep->num) | 2919 | if (entries > cachep->num || i >= cachep->num) |
2920 | goto bad; | 2920 | goto bad; |
2921 | } | 2921 | } |
2922 | if (entries != cachep->num - slabp->inuse) { | 2922 | if (entries != cachep->num - slabp->inuse) { |
2923 | bad: | 2923 | bad: |
2924 | printk(KERN_ERR "slab: Internal list corruption detected in " | 2924 | printk(KERN_ERR "slab: Internal list corruption detected in " |
2925 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2925 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
2926 | cachep->name, cachep->num, slabp, slabp->inuse); | 2926 | cachep->name, cachep->num, slabp, slabp->inuse); |
2927 | for (i = 0; | 2927 | for (i = 0; |
2928 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); | 2928 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); |
2929 | i++) { | 2929 | i++) { |
2930 | if (i % 16 == 0) | 2930 | if (i % 16 == 0) |
2931 | printk("\n%03x:", i); | 2931 | printk("\n%03x:", i); |
2932 | printk(" %02x", ((unsigned char *)slabp)[i]); | 2932 | printk(" %02x", ((unsigned char *)slabp)[i]); |
2933 | } | 2933 | } |
2934 | printk("\n"); | 2934 | printk("\n"); |
2935 | BUG(); | 2935 | BUG(); |
2936 | } | 2936 | } |
2937 | } | 2937 | } |
2938 | #else | 2938 | #else |
2939 | #define kfree_debugcheck(x) do { } while(0) | 2939 | #define kfree_debugcheck(x) do { } while(0) |
2940 | #define cache_free_debugcheck(x,objp,z) (objp) | 2940 | #define cache_free_debugcheck(x,objp,z) (objp) |
2941 | #define check_slabp(x,y) do { } while(0) | 2941 | #define check_slabp(x,y) do { } while(0) |
2942 | #endif | 2942 | #endif |
2943 | 2943 | ||
2944 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | 2944 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) |
2945 | { | 2945 | { |
2946 | int batchcount; | 2946 | int batchcount; |
2947 | struct kmem_list3 *l3; | 2947 | struct kmem_list3 *l3; |
2948 | struct array_cache *ac; | 2948 | struct array_cache *ac; |
2949 | int node; | 2949 | int node; |
2950 | 2950 | ||
2951 | node = numa_node_id(); | 2951 | node = numa_node_id(); |
2952 | 2952 | ||
2953 | check_irq_off(); | 2953 | check_irq_off(); |
2954 | ac = cpu_cache_get(cachep); | 2954 | ac = cpu_cache_get(cachep); |
2955 | retry: | 2955 | retry: |
2956 | batchcount = ac->batchcount; | 2956 | batchcount = ac->batchcount; |
2957 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 2957 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
2958 | /* | 2958 | /* |
2959 | * If there was little recent activity on this cache, then | 2959 | * If there was little recent activity on this cache, then |
2960 | * perform only a partial refill. Otherwise we could generate | 2960 | * perform only a partial refill. Otherwise we could generate |
2961 | * refill bouncing. | 2961 | * refill bouncing. |
2962 | */ | 2962 | */ |
2963 | batchcount = BATCHREFILL_LIMIT; | 2963 | batchcount = BATCHREFILL_LIMIT; |
2964 | } | 2964 | } |
2965 | l3 = cachep->nodelists[node]; | 2965 | l3 = cachep->nodelists[node]; |
2966 | 2966 | ||
2967 | BUG_ON(ac->avail > 0 || !l3); | 2967 | BUG_ON(ac->avail > 0 || !l3); |
2968 | spin_lock(&l3->list_lock); | 2968 | spin_lock(&l3->list_lock); |
2969 | 2969 | ||
2970 | /* See if we can refill from the shared array */ | 2970 | /* See if we can refill from the shared array */ |
2971 | if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) | 2971 | if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) |
2972 | goto alloc_done; | 2972 | goto alloc_done; |
2973 | 2973 | ||
2974 | while (batchcount > 0) { | 2974 | while (batchcount > 0) { |
2975 | struct list_head *entry; | 2975 | struct list_head *entry; |
2976 | struct slab *slabp; | 2976 | struct slab *slabp; |
2977 | /* Get slab alloc is to come from. */ | 2977 | /* Get slab alloc is to come from. */ |
2978 | entry = l3->slabs_partial.next; | 2978 | entry = l3->slabs_partial.next; |
2979 | if (entry == &l3->slabs_partial) { | 2979 | if (entry == &l3->slabs_partial) { |
2980 | l3->free_touched = 1; | 2980 | l3->free_touched = 1; |
2981 | entry = l3->slabs_free.next; | 2981 | entry = l3->slabs_free.next; |
2982 | if (entry == &l3->slabs_free) | 2982 | if (entry == &l3->slabs_free) |
2983 | goto must_grow; | 2983 | goto must_grow; |
2984 | } | 2984 | } |
2985 | 2985 | ||
2986 | slabp = list_entry(entry, struct slab, list); | 2986 | slabp = list_entry(entry, struct slab, list); |
2987 | check_slabp(cachep, slabp); | 2987 | check_slabp(cachep, slabp); |
2988 | check_spinlock_acquired(cachep); | 2988 | check_spinlock_acquired(cachep); |
2989 | 2989 | ||
2990 | /* | 2990 | /* |
2991 | * The slab was either on partial or free list so | 2991 | * The slab was either on partial or free list so |
2992 | * there must be at least one object available for | 2992 | * there must be at least one object available for |
2993 | * allocation. | 2993 | * allocation. |
2994 | */ | 2994 | */ |
2995 | BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num); | 2995 | BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num); |
2996 | 2996 | ||
2997 | while (slabp->inuse < cachep->num && batchcount--) { | 2997 | while (slabp->inuse < cachep->num && batchcount--) { |
2998 | STATS_INC_ALLOCED(cachep); | 2998 | STATS_INC_ALLOCED(cachep); |
2999 | STATS_INC_ACTIVE(cachep); | 2999 | STATS_INC_ACTIVE(cachep); |
3000 | STATS_SET_HIGH(cachep); | 3000 | STATS_SET_HIGH(cachep); |
3001 | 3001 | ||
3002 | ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, | 3002 | ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, |
3003 | node); | 3003 | node); |
3004 | } | 3004 | } |
3005 | check_slabp(cachep, slabp); | 3005 | check_slabp(cachep, slabp); |
3006 | 3006 | ||
3007 | /* move slabp to correct slabp list: */ | 3007 | /* move slabp to correct slabp list: */ |
3008 | list_del(&slabp->list); | 3008 | list_del(&slabp->list); |
3009 | if (slabp->free == BUFCTL_END) | 3009 | if (slabp->free == BUFCTL_END) |
3010 | list_add(&slabp->list, &l3->slabs_full); | 3010 | list_add(&slabp->list, &l3->slabs_full); |
3011 | else | 3011 | else |
3012 | list_add(&slabp->list, &l3->slabs_partial); | 3012 | list_add(&slabp->list, &l3->slabs_partial); |
3013 | } | 3013 | } |
3014 | 3014 | ||
3015 | must_grow: | 3015 | must_grow: |
3016 | l3->free_objects -= ac->avail; | 3016 | l3->free_objects -= ac->avail; |
3017 | alloc_done: | 3017 | alloc_done: |
3018 | spin_unlock(&l3->list_lock); | 3018 | spin_unlock(&l3->list_lock); |
3019 | 3019 | ||
3020 | if (unlikely(!ac->avail)) { | 3020 | if (unlikely(!ac->avail)) { |
3021 | int x; | 3021 | int x; |
3022 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); | 3022 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); |
3023 | 3023 | ||
3024 | /* cache_grow can reenable interrupts, then ac could change. */ | 3024 | /* cache_grow can reenable interrupts, then ac could change. */ |
3025 | ac = cpu_cache_get(cachep); | 3025 | ac = cpu_cache_get(cachep); |
3026 | if (!x && ac->avail == 0) /* no objects in sight? abort */ | 3026 | if (!x && ac->avail == 0) /* no objects in sight? abort */ |
3027 | return NULL; | 3027 | return NULL; |
3028 | 3028 | ||
3029 | if (!ac->avail) /* objects refilled by interrupt? */ | 3029 | if (!ac->avail) /* objects refilled by interrupt? */ |
3030 | goto retry; | 3030 | goto retry; |
3031 | } | 3031 | } |
3032 | ac->touched = 1; | 3032 | ac->touched = 1; |
3033 | return ac->entry[--ac->avail]; | 3033 | return ac->entry[--ac->avail]; |
3034 | } | 3034 | } |
3035 | 3035 | ||
3036 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | 3036 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
3037 | gfp_t flags) | 3037 | gfp_t flags) |
3038 | { | 3038 | { |
3039 | might_sleep_if(flags & __GFP_WAIT); | 3039 | might_sleep_if(flags & __GFP_WAIT); |
3040 | #if DEBUG | 3040 | #if DEBUG |
3041 | kmem_flagcheck(cachep, flags); | 3041 | kmem_flagcheck(cachep, flags); |
3042 | #endif | 3042 | #endif |
3043 | } | 3043 | } |
3044 | 3044 | ||
3045 | #if DEBUG | 3045 | #if DEBUG |
3046 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | 3046 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
3047 | gfp_t flags, void *objp, void *caller) | 3047 | gfp_t flags, void *objp, void *caller) |
3048 | { | 3048 | { |
3049 | if (!objp) | 3049 | if (!objp) |
3050 | return objp; | 3050 | return objp; |
3051 | if (cachep->flags & SLAB_POISON) { | 3051 | if (cachep->flags & SLAB_POISON) { |
3052 | #ifdef CONFIG_DEBUG_PAGEALLOC | 3052 | #ifdef CONFIG_DEBUG_PAGEALLOC |
3053 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) | 3053 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) |
3054 | kernel_map_pages(virt_to_page(objp), | 3054 | kernel_map_pages(virt_to_page(objp), |
3055 | cachep->buffer_size / PAGE_SIZE, 1); | 3055 | cachep->buffer_size / PAGE_SIZE, 1); |
3056 | else | 3056 | else |
3057 | check_poison_obj(cachep, objp); | 3057 | check_poison_obj(cachep, objp); |
3058 | #else | 3058 | #else |
3059 | check_poison_obj(cachep, objp); | 3059 | check_poison_obj(cachep, objp); |
3060 | #endif | 3060 | #endif |
3061 | poison_obj(cachep, objp, POISON_INUSE); | 3061 | poison_obj(cachep, objp, POISON_INUSE); |
3062 | } | 3062 | } |
3063 | if (cachep->flags & SLAB_STORE_USER) | 3063 | if (cachep->flags & SLAB_STORE_USER) |
3064 | *dbg_userword(cachep, objp) = caller; | 3064 | *dbg_userword(cachep, objp) = caller; |
3065 | 3065 | ||
3066 | if (cachep->flags & SLAB_RED_ZONE) { | 3066 | if (cachep->flags & SLAB_RED_ZONE) { |
3067 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || | 3067 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || |
3068 | *dbg_redzone2(cachep, objp) != RED_INACTIVE) { | 3068 | *dbg_redzone2(cachep, objp) != RED_INACTIVE) { |
3069 | slab_error(cachep, "double free, or memory outside" | 3069 | slab_error(cachep, "double free, or memory outside" |
3070 | " object was overwritten"); | 3070 | " object was overwritten"); |
3071 | printk(KERN_ERR | 3071 | printk(KERN_ERR |
3072 | "%p: redzone 1:0x%llx, redzone 2:0x%llx\n", | 3072 | "%p: redzone 1:0x%llx, redzone 2:0x%llx\n", |
3073 | objp, *dbg_redzone1(cachep, objp), | 3073 | objp, *dbg_redzone1(cachep, objp), |
3074 | *dbg_redzone2(cachep, objp)); | 3074 | *dbg_redzone2(cachep, objp)); |
3075 | } | 3075 | } |
3076 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 3076 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
3077 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 3077 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
3078 | } | 3078 | } |
3079 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 3079 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
3080 | { | 3080 | { |
3081 | struct slab *slabp; | 3081 | struct slab *slabp; |
3082 | unsigned objnr; | 3082 | unsigned objnr; |
3083 | 3083 | ||
3084 | slabp = page_get_slab(virt_to_head_page(objp)); | 3084 | slabp = page_get_slab(virt_to_head_page(objp)); |
3085 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 3085 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; |
3086 | slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; | 3086 | slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; |
3087 | } | 3087 | } |
3088 | #endif | 3088 | #endif |
3089 | objp += obj_offset(cachep); | 3089 | objp += obj_offset(cachep); |
3090 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 3090 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
3091 | cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR); | 3091 | cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR); |
3092 | #if ARCH_SLAB_MINALIGN | 3092 | #if ARCH_SLAB_MINALIGN |
3093 | if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { | 3093 | if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { |
3094 | printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", | 3094 | printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", |
3095 | objp, ARCH_SLAB_MINALIGN); | 3095 | objp, ARCH_SLAB_MINALIGN); |
3096 | } | 3096 | } |
3097 | #endif | 3097 | #endif |
3098 | return objp; | 3098 | return objp; |
3099 | } | 3099 | } |
3100 | #else | 3100 | #else |
3101 | #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) | 3101 | #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) |
3102 | #endif | 3102 | #endif |
3103 | 3103 | ||
3104 | #ifdef CONFIG_FAILSLAB | 3104 | #ifdef CONFIG_FAILSLAB |
3105 | 3105 | ||
3106 | static struct failslab_attr { | 3106 | static struct failslab_attr { |
3107 | 3107 | ||
3108 | struct fault_attr attr; | 3108 | struct fault_attr attr; |
3109 | 3109 | ||
3110 | u32 ignore_gfp_wait; | 3110 | u32 ignore_gfp_wait; |
3111 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS | 3111 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS |
3112 | struct dentry *ignore_gfp_wait_file; | 3112 | struct dentry *ignore_gfp_wait_file; |
3113 | #endif | 3113 | #endif |
3114 | 3114 | ||
3115 | } failslab = { | 3115 | } failslab = { |
3116 | .attr = FAULT_ATTR_INITIALIZER, | 3116 | .attr = FAULT_ATTR_INITIALIZER, |
3117 | .ignore_gfp_wait = 1, | 3117 | .ignore_gfp_wait = 1, |
3118 | }; | 3118 | }; |
3119 | 3119 | ||
3120 | static int __init setup_failslab(char *str) | 3120 | static int __init setup_failslab(char *str) |
3121 | { | 3121 | { |
3122 | return setup_fault_attr(&failslab.attr, str); | 3122 | return setup_fault_attr(&failslab.attr, str); |
3123 | } | 3123 | } |
3124 | __setup("failslab=", setup_failslab); | 3124 | __setup("failslab=", setup_failslab); |
3125 | 3125 | ||
3126 | static int should_failslab(struct kmem_cache *cachep, gfp_t flags) | 3126 | static int should_failslab(struct kmem_cache *cachep, gfp_t flags) |
3127 | { | 3127 | { |
3128 | if (cachep == &cache_cache) | 3128 | if (cachep == &cache_cache) |
3129 | return 0; | 3129 | return 0; |
3130 | if (flags & __GFP_NOFAIL) | 3130 | if (flags & __GFP_NOFAIL) |
3131 | return 0; | 3131 | return 0; |
3132 | if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT)) | 3132 | if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT)) |
3133 | return 0; | 3133 | return 0; |
3134 | 3134 | ||
3135 | return should_fail(&failslab.attr, obj_size(cachep)); | 3135 | return should_fail(&failslab.attr, obj_size(cachep)); |
3136 | } | 3136 | } |
3137 | 3137 | ||
3138 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS | 3138 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS |
3139 | 3139 | ||
3140 | static int __init failslab_debugfs(void) | 3140 | static int __init failslab_debugfs(void) |
3141 | { | 3141 | { |
3142 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; | 3142 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; |
3143 | struct dentry *dir; | 3143 | struct dentry *dir; |
3144 | int err; | 3144 | int err; |
3145 | 3145 | ||
3146 | err = init_fault_attr_dentries(&failslab.attr, "failslab"); | 3146 | err = init_fault_attr_dentries(&failslab.attr, "failslab"); |
3147 | if (err) | 3147 | if (err) |
3148 | return err; | 3148 | return err; |
3149 | dir = failslab.attr.dentries.dir; | 3149 | dir = failslab.attr.dentries.dir; |
3150 | 3150 | ||
3151 | failslab.ignore_gfp_wait_file = | 3151 | failslab.ignore_gfp_wait_file = |
3152 | debugfs_create_bool("ignore-gfp-wait", mode, dir, | 3152 | debugfs_create_bool("ignore-gfp-wait", mode, dir, |
3153 | &failslab.ignore_gfp_wait); | 3153 | &failslab.ignore_gfp_wait); |
3154 | 3154 | ||
3155 | if (!failslab.ignore_gfp_wait_file) { | 3155 | if (!failslab.ignore_gfp_wait_file) { |
3156 | err = -ENOMEM; | 3156 | err = -ENOMEM; |
3157 | debugfs_remove(failslab.ignore_gfp_wait_file); | 3157 | debugfs_remove(failslab.ignore_gfp_wait_file); |
3158 | cleanup_fault_attr_dentries(&failslab.attr); | 3158 | cleanup_fault_attr_dentries(&failslab.attr); |
3159 | } | 3159 | } |
3160 | 3160 | ||
3161 | return err; | 3161 | return err; |
3162 | } | 3162 | } |
3163 | 3163 | ||
3164 | late_initcall(failslab_debugfs); | 3164 | late_initcall(failslab_debugfs); |
3165 | 3165 | ||
3166 | #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ | 3166 | #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ |
3167 | 3167 | ||
3168 | #else /* CONFIG_FAILSLAB */ | 3168 | #else /* CONFIG_FAILSLAB */ |
3169 | 3169 | ||
3170 | static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags) | 3170 | static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags) |
3171 | { | 3171 | { |
3172 | return 0; | 3172 | return 0; |
3173 | } | 3173 | } |
3174 | 3174 | ||
3175 | #endif /* CONFIG_FAILSLAB */ | 3175 | #endif /* CONFIG_FAILSLAB */ |
3176 | 3176 | ||
3177 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 3177 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
3178 | { | 3178 | { |
3179 | void *objp; | 3179 | void *objp; |
3180 | struct array_cache *ac; | 3180 | struct array_cache *ac; |
3181 | 3181 | ||
3182 | check_irq_off(); | 3182 | check_irq_off(); |
3183 | 3183 | ||
3184 | ac = cpu_cache_get(cachep); | 3184 | ac = cpu_cache_get(cachep); |
3185 | if (likely(ac->avail)) { | 3185 | if (likely(ac->avail)) { |
3186 | STATS_INC_ALLOCHIT(cachep); | 3186 | STATS_INC_ALLOCHIT(cachep); |
3187 | ac->touched = 1; | 3187 | ac->touched = 1; |
3188 | objp = ac->entry[--ac->avail]; | 3188 | objp = ac->entry[--ac->avail]; |
3189 | } else { | 3189 | } else { |
3190 | STATS_INC_ALLOCMISS(cachep); | 3190 | STATS_INC_ALLOCMISS(cachep); |
3191 | objp = cache_alloc_refill(cachep, flags); | 3191 | objp = cache_alloc_refill(cachep, flags); |
3192 | } | 3192 | } |
3193 | return objp; | 3193 | return objp; |
3194 | } | 3194 | } |
3195 | 3195 | ||
3196 | #ifdef CONFIG_NUMA | 3196 | #ifdef CONFIG_NUMA |
3197 | /* | 3197 | /* |
3198 | * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. | 3198 | * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. |
3199 | * | 3199 | * |
3200 | * If we are in_interrupt, then process context, including cpusets and | 3200 | * If we are in_interrupt, then process context, including cpusets and |
3201 | * mempolicy, may not apply and should not be used for allocation policy. | 3201 | * mempolicy, may not apply and should not be used for allocation policy. |
3202 | */ | 3202 | */ |
3203 | static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | 3203 | static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) |
3204 | { | 3204 | { |
3205 | int nid_alloc, nid_here; | 3205 | int nid_alloc, nid_here; |
3206 | 3206 | ||
3207 | if (in_interrupt() || (flags & __GFP_THISNODE)) | 3207 | if (in_interrupt() || (flags & __GFP_THISNODE)) |
3208 | return NULL; | 3208 | return NULL; |
3209 | nid_alloc = nid_here = numa_node_id(); | 3209 | nid_alloc = nid_here = numa_node_id(); |
3210 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) | 3210 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) |
3211 | nid_alloc = cpuset_mem_spread_node(); | 3211 | nid_alloc = cpuset_mem_spread_node(); |
3212 | else if (current->mempolicy) | 3212 | else if (current->mempolicy) |
3213 | nid_alloc = slab_node(current->mempolicy); | 3213 | nid_alloc = slab_node(current->mempolicy); |
3214 | if (nid_alloc != nid_here) | 3214 | if (nid_alloc != nid_here) |
3215 | return ____cache_alloc_node(cachep, flags, nid_alloc); | 3215 | return ____cache_alloc_node(cachep, flags, nid_alloc); |
3216 | return NULL; | 3216 | return NULL; |
3217 | } | 3217 | } |
3218 | 3218 | ||
3219 | /* | 3219 | /* |
3220 | * Fallback function if there was no memory available and no objects on a | 3220 | * Fallback function if there was no memory available and no objects on a |
3221 | * certain node and fall back is permitted. First we scan all the | 3221 | * certain node and fall back is permitted. First we scan all the |
3222 | * available nodelists for available objects. If that fails then we | 3222 | * available nodelists for available objects. If that fails then we |
3223 | * perform an allocation without specifying a node. This allows the page | 3223 | * perform an allocation without specifying a node. This allows the page |
3224 | * allocator to do its reclaim / fallback magic. We then insert the | 3224 | * allocator to do its reclaim / fallback magic. We then insert the |
3225 | * slab into the proper nodelist and then allocate from it. | 3225 | * slab into the proper nodelist and then allocate from it. |
3226 | */ | 3226 | */ |
3227 | static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | 3227 | static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) |
3228 | { | 3228 | { |
3229 | struct zonelist *zonelist; | 3229 | struct zonelist *zonelist; |
3230 | gfp_t local_flags; | 3230 | gfp_t local_flags; |
3231 | struct zone **z; | 3231 | struct zone **z; |
3232 | void *obj = NULL; | 3232 | void *obj = NULL; |
3233 | int nid; | 3233 | int nid; |
3234 | 3234 | ||
3235 | if (flags & __GFP_THISNODE) | 3235 | if (flags & __GFP_THISNODE) |
3236 | return NULL; | 3236 | return NULL; |
3237 | 3237 | ||
3238 | zonelist = &NODE_DATA(slab_node(current->mempolicy)) | 3238 | zonelist = &NODE_DATA(slab_node(current->mempolicy)) |
3239 | ->node_zonelists[gfp_zone(flags)]; | 3239 | ->node_zonelists[gfp_zone(flags)]; |
3240 | local_flags = (flags & GFP_LEVEL_MASK); | 3240 | local_flags = (flags & GFP_LEVEL_MASK); |
3241 | 3241 | ||
3242 | retry: | 3242 | retry: |
3243 | /* | 3243 | /* |
3244 | * Look through allowed nodes for objects available | 3244 | * Look through allowed nodes for objects available |
3245 | * from existing per node queues. | 3245 | * from existing per node queues. |
3246 | */ | 3246 | */ |
3247 | for (z = zonelist->zones; *z && !obj; z++) { | 3247 | for (z = zonelist->zones; *z && !obj; z++) { |
3248 | nid = zone_to_nid(*z); | 3248 | nid = zone_to_nid(*z); |
3249 | 3249 | ||
3250 | if (cpuset_zone_allowed_hardwall(*z, flags) && | 3250 | if (cpuset_zone_allowed_hardwall(*z, flags) && |
3251 | cache->nodelists[nid] && | 3251 | cache->nodelists[nid] && |
3252 | cache->nodelists[nid]->free_objects) | 3252 | cache->nodelists[nid]->free_objects) |
3253 | obj = ____cache_alloc_node(cache, | 3253 | obj = ____cache_alloc_node(cache, |
3254 | flags | GFP_THISNODE, nid); | 3254 | flags | GFP_THISNODE, nid); |
3255 | } | 3255 | } |
3256 | 3256 | ||
3257 | if (!obj) { | 3257 | if (!obj) { |
3258 | /* | 3258 | /* |
3259 | * This allocation will be performed within the constraints | 3259 | * This allocation will be performed within the constraints |
3260 | * of the current cpuset / memory policy requirements. | 3260 | * of the current cpuset / memory policy requirements. |
3261 | * We may trigger various forms of reclaim on the allowed | 3261 | * We may trigger various forms of reclaim on the allowed |
3262 | * set and go into memory reserves if necessary. | 3262 | * set and go into memory reserves if necessary. |
3263 | */ | 3263 | */ |
3264 | if (local_flags & __GFP_WAIT) | 3264 | if (local_flags & __GFP_WAIT) |
3265 | local_irq_enable(); | 3265 | local_irq_enable(); |
3266 | kmem_flagcheck(cache, flags); | 3266 | kmem_flagcheck(cache, flags); |
3267 | obj = kmem_getpages(cache, flags, -1); | 3267 | obj = kmem_getpages(cache, flags, -1); |
3268 | if (local_flags & __GFP_WAIT) | 3268 | if (local_flags & __GFP_WAIT) |
3269 | local_irq_disable(); | 3269 | local_irq_disable(); |
3270 | if (obj) { | 3270 | if (obj) { |
3271 | /* | 3271 | /* |
3272 | * Insert into the appropriate per node queues | 3272 | * Insert into the appropriate per node queues |
3273 | */ | 3273 | */ |
3274 | nid = page_to_nid(virt_to_page(obj)); | 3274 | nid = page_to_nid(virt_to_page(obj)); |
3275 | if (cache_grow(cache, flags, nid, obj)) { | 3275 | if (cache_grow(cache, flags, nid, obj)) { |
3276 | obj = ____cache_alloc_node(cache, | 3276 | obj = ____cache_alloc_node(cache, |
3277 | flags | GFP_THISNODE, nid); | 3277 | flags | GFP_THISNODE, nid); |
3278 | if (!obj) | 3278 | if (!obj) |
3279 | /* | 3279 | /* |
3280 | * Another processor may allocate the | 3280 | * Another processor may allocate the |
3281 | * objects in the slab since we are | 3281 | * objects in the slab since we are |
3282 | * not holding any locks. | 3282 | * not holding any locks. |
3283 | */ | 3283 | */ |
3284 | goto retry; | 3284 | goto retry; |
3285 | } else { | 3285 | } else { |
3286 | /* cache_grow already freed obj */ | 3286 | /* cache_grow already freed obj */ |
3287 | obj = NULL; | 3287 | obj = NULL; |
3288 | } | 3288 | } |
3289 | } | 3289 | } |
3290 | } | 3290 | } |
3291 | return obj; | 3291 | return obj; |
3292 | } | 3292 | } |
3293 | 3293 | ||
3294 | /* | 3294 | /* |
3295 | * A interface to enable slab creation on nodeid | 3295 | * A interface to enable slab creation on nodeid |
3296 | */ | 3296 | */ |
3297 | static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | 3297 | static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
3298 | int nodeid) | 3298 | int nodeid) |
3299 | { | 3299 | { |
3300 | struct list_head *entry; | 3300 | struct list_head *entry; |
3301 | struct slab *slabp; | 3301 | struct slab *slabp; |
3302 | struct kmem_list3 *l3; | 3302 | struct kmem_list3 *l3; |
3303 | void *obj; | 3303 | void *obj; |
3304 | int x; | 3304 | int x; |
3305 | 3305 | ||
3306 | l3 = cachep->nodelists[nodeid]; | 3306 | l3 = cachep->nodelists[nodeid]; |
3307 | BUG_ON(!l3); | 3307 | BUG_ON(!l3); |
3308 | 3308 | ||
3309 | retry: | 3309 | retry: |
3310 | check_irq_off(); | 3310 | check_irq_off(); |
3311 | spin_lock(&l3->list_lock); | 3311 | spin_lock(&l3->list_lock); |
3312 | entry = l3->slabs_partial.next; | 3312 | entry = l3->slabs_partial.next; |
3313 | if (entry == &l3->slabs_partial) { | 3313 | if (entry == &l3->slabs_partial) { |
3314 | l3->free_touched = 1; | 3314 | l3->free_touched = 1; |
3315 | entry = l3->slabs_free.next; | 3315 | entry = l3->slabs_free.next; |
3316 | if (entry == &l3->slabs_free) | 3316 | if (entry == &l3->slabs_free) |
3317 | goto must_grow; | 3317 | goto must_grow; |
3318 | } | 3318 | } |
3319 | 3319 | ||
3320 | slabp = list_entry(entry, struct slab, list); | 3320 | slabp = list_entry(entry, struct slab, list); |
3321 | check_spinlock_acquired_node(cachep, nodeid); | 3321 | check_spinlock_acquired_node(cachep, nodeid); |
3322 | check_slabp(cachep, slabp); | 3322 | check_slabp(cachep, slabp); |
3323 | 3323 | ||
3324 | STATS_INC_NODEALLOCS(cachep); | 3324 | STATS_INC_NODEALLOCS(cachep); |
3325 | STATS_INC_ACTIVE(cachep); | 3325 | STATS_INC_ACTIVE(cachep); |
3326 | STATS_SET_HIGH(cachep); | 3326 | STATS_SET_HIGH(cachep); |
3327 | 3327 | ||
3328 | BUG_ON(slabp->inuse == cachep->num); | 3328 | BUG_ON(slabp->inuse == cachep->num); |
3329 | 3329 | ||
3330 | obj = slab_get_obj(cachep, slabp, nodeid); | 3330 | obj = slab_get_obj(cachep, slabp, nodeid); |
3331 | check_slabp(cachep, slabp); | 3331 | check_slabp(cachep, slabp); |
3332 | l3->free_objects--; | 3332 | l3->free_objects--; |
3333 | /* move slabp to correct slabp list: */ | 3333 | /* move slabp to correct slabp list: */ |
3334 | list_del(&slabp->list); | 3334 | list_del(&slabp->list); |
3335 | 3335 | ||
3336 | if (slabp->free == BUFCTL_END) | 3336 | if (slabp->free == BUFCTL_END) |
3337 | list_add(&slabp->list, &l3->slabs_full); | 3337 | list_add(&slabp->list, &l3->slabs_full); |
3338 | else | 3338 | else |
3339 | list_add(&slabp->list, &l3->slabs_partial); | 3339 | list_add(&slabp->list, &l3->slabs_partial); |
3340 | 3340 | ||
3341 | spin_unlock(&l3->list_lock); | 3341 | spin_unlock(&l3->list_lock); |
3342 | goto done; | 3342 | goto done; |
3343 | 3343 | ||
3344 | must_grow: | 3344 | must_grow: |
3345 | spin_unlock(&l3->list_lock); | 3345 | spin_unlock(&l3->list_lock); |
3346 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); | 3346 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); |
3347 | if (x) | 3347 | if (x) |
3348 | goto retry; | 3348 | goto retry; |
3349 | 3349 | ||
3350 | return fallback_alloc(cachep, flags); | 3350 | return fallback_alloc(cachep, flags); |
3351 | 3351 | ||
3352 | done: | 3352 | done: |
3353 | return obj; | 3353 | return obj; |
3354 | } | 3354 | } |
3355 | 3355 | ||
3356 | /** | 3356 | /** |
3357 | * kmem_cache_alloc_node - Allocate an object on the specified node | 3357 | * kmem_cache_alloc_node - Allocate an object on the specified node |
3358 | * @cachep: The cache to allocate from. | 3358 | * @cachep: The cache to allocate from. |
3359 | * @flags: See kmalloc(). | 3359 | * @flags: See kmalloc(). |
3360 | * @nodeid: node number of the target node. | 3360 | * @nodeid: node number of the target node. |
3361 | * @caller: return address of caller, used for debug information | 3361 | * @caller: return address of caller, used for debug information |
3362 | * | 3362 | * |
3363 | * Identical to kmem_cache_alloc but it will allocate memory on the given | 3363 | * Identical to kmem_cache_alloc but it will allocate memory on the given |
3364 | * node, which can improve the performance for cpu bound structures. | 3364 | * node, which can improve the performance for cpu bound structures. |
3365 | * | 3365 | * |
3366 | * Fallback to other node is possible if __GFP_THISNODE is not set. | 3366 | * Fallback to other node is possible if __GFP_THISNODE is not set. |
3367 | */ | 3367 | */ |
3368 | static __always_inline void * | 3368 | static __always_inline void * |
3369 | __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | 3369 | __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, |
3370 | void *caller) | 3370 | void *caller) |
3371 | { | 3371 | { |
3372 | unsigned long save_flags; | 3372 | unsigned long save_flags; |
3373 | void *ptr; | 3373 | void *ptr; |
3374 | 3374 | ||
3375 | if (should_failslab(cachep, flags)) | 3375 | if (should_failslab(cachep, flags)) |
3376 | return NULL; | 3376 | return NULL; |
3377 | 3377 | ||
3378 | cache_alloc_debugcheck_before(cachep, flags); | 3378 | cache_alloc_debugcheck_before(cachep, flags); |
3379 | local_irq_save(save_flags); | 3379 | local_irq_save(save_flags); |
3380 | 3380 | ||
3381 | if (unlikely(nodeid == -1)) | 3381 | if (unlikely(nodeid == -1)) |
3382 | nodeid = numa_node_id(); | 3382 | nodeid = numa_node_id(); |
3383 | 3383 | ||
3384 | if (unlikely(!cachep->nodelists[nodeid])) { | 3384 | if (unlikely(!cachep->nodelists[nodeid])) { |
3385 | /* Node not bootstrapped yet */ | 3385 | /* Node not bootstrapped yet */ |
3386 | ptr = fallback_alloc(cachep, flags); | 3386 | ptr = fallback_alloc(cachep, flags); |
3387 | goto out; | 3387 | goto out; |
3388 | } | 3388 | } |
3389 | 3389 | ||
3390 | if (nodeid == numa_node_id()) { | 3390 | if (nodeid == numa_node_id()) { |
3391 | /* | 3391 | /* |
3392 | * Use the locally cached objects if possible. | 3392 | * Use the locally cached objects if possible. |
3393 | * However ____cache_alloc does not allow fallback | 3393 | * However ____cache_alloc does not allow fallback |
3394 | * to other nodes. It may fail while we still have | 3394 | * to other nodes. It may fail while we still have |
3395 | * objects on other nodes available. | 3395 | * objects on other nodes available. |
3396 | */ | 3396 | */ |
3397 | ptr = ____cache_alloc(cachep, flags); | 3397 | ptr = ____cache_alloc(cachep, flags); |
3398 | if (ptr) | 3398 | if (ptr) |
3399 | goto out; | 3399 | goto out; |
3400 | } | 3400 | } |
3401 | /* ___cache_alloc_node can fall back to other nodes */ | 3401 | /* ___cache_alloc_node can fall back to other nodes */ |
3402 | ptr = ____cache_alloc_node(cachep, flags, nodeid); | 3402 | ptr = ____cache_alloc_node(cachep, flags, nodeid); |
3403 | out: | 3403 | out: |
3404 | local_irq_restore(save_flags); | 3404 | local_irq_restore(save_flags); |
3405 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); | 3405 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
3406 | 3406 | ||
3407 | return ptr; | 3407 | return ptr; |
3408 | } | 3408 | } |
3409 | 3409 | ||
3410 | static __always_inline void * | 3410 | static __always_inline void * |
3411 | __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) | 3411 | __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) |
3412 | { | 3412 | { |
3413 | void *objp; | 3413 | void *objp; |
3414 | 3414 | ||
3415 | if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { | 3415 | if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { |
3416 | objp = alternate_node_alloc(cache, flags); | 3416 | objp = alternate_node_alloc(cache, flags); |
3417 | if (objp) | 3417 | if (objp) |
3418 | goto out; | 3418 | goto out; |
3419 | } | 3419 | } |
3420 | objp = ____cache_alloc(cache, flags); | 3420 | objp = ____cache_alloc(cache, flags); |
3421 | 3421 | ||
3422 | /* | 3422 | /* |
3423 | * We may just have run out of memory on the local node. | 3423 | * We may just have run out of memory on the local node. |
3424 | * ____cache_alloc_node() knows how to locate memory on other nodes | 3424 | * ____cache_alloc_node() knows how to locate memory on other nodes |
3425 | */ | 3425 | */ |
3426 | if (!objp) | 3426 | if (!objp) |
3427 | objp = ____cache_alloc_node(cache, flags, numa_node_id()); | 3427 | objp = ____cache_alloc_node(cache, flags, numa_node_id()); |
3428 | 3428 | ||
3429 | out: | 3429 | out: |
3430 | return objp; | 3430 | return objp; |
3431 | } | 3431 | } |
3432 | #else | 3432 | #else |
3433 | 3433 | ||
3434 | static __always_inline void * | 3434 | static __always_inline void * |
3435 | __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 3435 | __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
3436 | { | 3436 | { |
3437 | return ____cache_alloc(cachep, flags); | 3437 | return ____cache_alloc(cachep, flags); |
3438 | } | 3438 | } |
3439 | 3439 | ||
3440 | #endif /* CONFIG_NUMA */ | 3440 | #endif /* CONFIG_NUMA */ |
3441 | 3441 | ||
3442 | static __always_inline void * | 3442 | static __always_inline void * |
3443 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | 3443 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) |
3444 | { | 3444 | { |
3445 | unsigned long save_flags; | 3445 | unsigned long save_flags; |
3446 | void *objp; | 3446 | void *objp; |
3447 | 3447 | ||
3448 | if (should_failslab(cachep, flags)) | 3448 | if (should_failslab(cachep, flags)) |
3449 | return NULL; | 3449 | return NULL; |
3450 | 3450 | ||
3451 | cache_alloc_debugcheck_before(cachep, flags); | 3451 | cache_alloc_debugcheck_before(cachep, flags); |
3452 | local_irq_save(save_flags); | 3452 | local_irq_save(save_flags); |
3453 | objp = __do_cache_alloc(cachep, flags); | 3453 | objp = __do_cache_alloc(cachep, flags); |
3454 | local_irq_restore(save_flags); | 3454 | local_irq_restore(save_flags); |
3455 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); | 3455 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); |
3456 | prefetchw(objp); | 3456 | prefetchw(objp); |
3457 | 3457 | ||
3458 | return objp; | 3458 | return objp; |
3459 | } | 3459 | } |
3460 | 3460 | ||
3461 | /* | 3461 | /* |
3462 | * Caller needs to acquire correct kmem_list's list_lock | 3462 | * Caller needs to acquire correct kmem_list's list_lock |
3463 | */ | 3463 | */ |
3464 | static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | 3464 | static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, |
3465 | int node) | 3465 | int node) |
3466 | { | 3466 | { |
3467 | int i; | 3467 | int i; |
3468 | struct kmem_list3 *l3; | 3468 | struct kmem_list3 *l3; |
3469 | 3469 | ||
3470 | for (i = 0; i < nr_objects; i++) { | 3470 | for (i = 0; i < nr_objects; i++) { |
3471 | void *objp = objpp[i]; | 3471 | void *objp = objpp[i]; |
3472 | struct slab *slabp; | 3472 | struct slab *slabp; |
3473 | 3473 | ||
3474 | slabp = virt_to_slab(objp); | 3474 | slabp = virt_to_slab(objp); |
3475 | l3 = cachep->nodelists[node]; | 3475 | l3 = cachep->nodelists[node]; |
3476 | list_del(&slabp->list); | 3476 | list_del(&slabp->list); |
3477 | check_spinlock_acquired_node(cachep, node); | 3477 | check_spinlock_acquired_node(cachep, node); |
3478 | check_slabp(cachep, slabp); | 3478 | check_slabp(cachep, slabp); |
3479 | slab_put_obj(cachep, slabp, objp, node); | 3479 | slab_put_obj(cachep, slabp, objp, node); |
3480 | STATS_DEC_ACTIVE(cachep); | 3480 | STATS_DEC_ACTIVE(cachep); |
3481 | l3->free_objects++; | 3481 | l3->free_objects++; |
3482 | check_slabp(cachep, slabp); | 3482 | check_slabp(cachep, slabp); |
3483 | 3483 | ||
3484 | /* fixup slab chains */ | 3484 | /* fixup slab chains */ |
3485 | if (slabp->inuse == 0) { | 3485 | if (slabp->inuse == 0) { |
3486 | if (l3->free_objects > l3->free_limit) { | 3486 | if (l3->free_objects > l3->free_limit) { |
3487 | l3->free_objects -= cachep->num; | 3487 | l3->free_objects -= cachep->num; |
3488 | /* No need to drop any previously held | 3488 | /* No need to drop any previously held |
3489 | * lock here, even if we have a off-slab slab | 3489 | * lock here, even if we have a off-slab slab |
3490 | * descriptor it is guaranteed to come from | 3490 | * descriptor it is guaranteed to come from |
3491 | * a different cache, refer to comments before | 3491 | * a different cache, refer to comments before |
3492 | * alloc_slabmgmt. | 3492 | * alloc_slabmgmt. |
3493 | */ | 3493 | */ |
3494 | slab_destroy(cachep, slabp); | 3494 | slab_destroy(cachep, slabp); |
3495 | } else { | 3495 | } else { |
3496 | list_add(&slabp->list, &l3->slabs_free); | 3496 | list_add(&slabp->list, &l3->slabs_free); |
3497 | } | 3497 | } |
3498 | } else { | 3498 | } else { |
3499 | /* Unconditionally move a slab to the end of the | 3499 | /* Unconditionally move a slab to the end of the |
3500 | * partial list on free - maximum time for the | 3500 | * partial list on free - maximum time for the |
3501 | * other objects to be freed, too. | 3501 | * other objects to be freed, too. |
3502 | */ | 3502 | */ |
3503 | list_add_tail(&slabp->list, &l3->slabs_partial); | 3503 | list_add_tail(&slabp->list, &l3->slabs_partial); |
3504 | } | 3504 | } |
3505 | } | 3505 | } |
3506 | } | 3506 | } |
3507 | 3507 | ||
3508 | static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | 3508 | static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) |
3509 | { | 3509 | { |
3510 | int batchcount; | 3510 | int batchcount; |
3511 | struct kmem_list3 *l3; | 3511 | struct kmem_list3 *l3; |
3512 | int node = numa_node_id(); | 3512 | int node = numa_node_id(); |
3513 | 3513 | ||
3514 | batchcount = ac->batchcount; | 3514 | batchcount = ac->batchcount; |
3515 | #if DEBUG | 3515 | #if DEBUG |
3516 | BUG_ON(!batchcount || batchcount > ac->avail); | 3516 | BUG_ON(!batchcount || batchcount > ac->avail); |
3517 | #endif | 3517 | #endif |
3518 | check_irq_off(); | 3518 | check_irq_off(); |
3519 | l3 = cachep->nodelists[node]; | 3519 | l3 = cachep->nodelists[node]; |
3520 | spin_lock(&l3->list_lock); | 3520 | spin_lock(&l3->list_lock); |
3521 | if (l3->shared) { | 3521 | if (l3->shared) { |
3522 | struct array_cache *shared_array = l3->shared; | 3522 | struct array_cache *shared_array = l3->shared; |
3523 | int max = shared_array->limit - shared_array->avail; | 3523 | int max = shared_array->limit - shared_array->avail; |
3524 | if (max) { | 3524 | if (max) { |
3525 | if (batchcount > max) | 3525 | if (batchcount > max) |
3526 | batchcount = max; | 3526 | batchcount = max; |
3527 | memcpy(&(shared_array->entry[shared_array->avail]), | 3527 | memcpy(&(shared_array->entry[shared_array->avail]), |
3528 | ac->entry, sizeof(void *) * batchcount); | 3528 | ac->entry, sizeof(void *) * batchcount); |
3529 | shared_array->avail += batchcount; | 3529 | shared_array->avail += batchcount; |
3530 | goto free_done; | 3530 | goto free_done; |
3531 | } | 3531 | } |
3532 | } | 3532 | } |
3533 | 3533 | ||
3534 | free_block(cachep, ac->entry, batchcount, node); | 3534 | free_block(cachep, ac->entry, batchcount, node); |
3535 | free_done: | 3535 | free_done: |
3536 | #if STATS | 3536 | #if STATS |
3537 | { | 3537 | { |
3538 | int i = 0; | 3538 | int i = 0; |
3539 | struct list_head *p; | 3539 | struct list_head *p; |
3540 | 3540 | ||
3541 | p = l3->slabs_free.next; | 3541 | p = l3->slabs_free.next; |
3542 | while (p != &(l3->slabs_free)) { | 3542 | while (p != &(l3->slabs_free)) { |
3543 | struct slab *slabp; | 3543 | struct slab *slabp; |
3544 | 3544 | ||
3545 | slabp = list_entry(p, struct slab, list); | 3545 | slabp = list_entry(p, struct slab, list); |
3546 | BUG_ON(slabp->inuse); | 3546 | BUG_ON(slabp->inuse); |
3547 | 3547 | ||
3548 | i++; | 3548 | i++; |
3549 | p = p->next; | 3549 | p = p->next; |
3550 | } | 3550 | } |
3551 | STATS_SET_FREEABLE(cachep, i); | 3551 | STATS_SET_FREEABLE(cachep, i); |
3552 | } | 3552 | } |
3553 | #endif | 3553 | #endif |
3554 | spin_unlock(&l3->list_lock); | 3554 | spin_unlock(&l3->list_lock); |
3555 | ac->avail -= batchcount; | 3555 | ac->avail -= batchcount; |
3556 | memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); | 3556 | memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); |
3557 | } | 3557 | } |
3558 | 3558 | ||
3559 | /* | 3559 | /* |
3560 | * Release an obj back to its cache. If the obj has a constructed state, it must | 3560 | * Release an obj back to its cache. If the obj has a constructed state, it must |
3561 | * be in this state _before_ it is released. Called with disabled ints. | 3561 | * be in this state _before_ it is released. Called with disabled ints. |
3562 | */ | 3562 | */ |
3563 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) | 3563 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
3564 | { | 3564 | { |
3565 | struct array_cache *ac = cpu_cache_get(cachep); | 3565 | struct array_cache *ac = cpu_cache_get(cachep); |
3566 | 3566 | ||
3567 | check_irq_off(); | 3567 | check_irq_off(); |
3568 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); | 3568 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); |
3569 | 3569 | ||
3570 | if (use_alien_caches && cache_free_alien(cachep, objp)) | 3570 | if (use_alien_caches && cache_free_alien(cachep, objp)) |
3571 | return; | 3571 | return; |
3572 | 3572 | ||
3573 | if (likely(ac->avail < ac->limit)) { | 3573 | if (likely(ac->avail < ac->limit)) { |
3574 | STATS_INC_FREEHIT(cachep); | 3574 | STATS_INC_FREEHIT(cachep); |
3575 | ac->entry[ac->avail++] = objp; | 3575 | ac->entry[ac->avail++] = objp; |
3576 | return; | 3576 | return; |
3577 | } else { | 3577 | } else { |
3578 | STATS_INC_FREEMISS(cachep); | 3578 | STATS_INC_FREEMISS(cachep); |
3579 | cache_flusharray(cachep, ac); | 3579 | cache_flusharray(cachep, ac); |
3580 | ac->entry[ac->avail++] = objp; | 3580 | ac->entry[ac->avail++] = objp; |
3581 | } | 3581 | } |
3582 | } | 3582 | } |
3583 | 3583 | ||
3584 | /** | 3584 | /** |
3585 | * kmem_cache_alloc - Allocate an object | 3585 | * kmem_cache_alloc - Allocate an object |
3586 | * @cachep: The cache to allocate from. | 3586 | * @cachep: The cache to allocate from. |
3587 | * @flags: See kmalloc(). | 3587 | * @flags: See kmalloc(). |
3588 | * | 3588 | * |
3589 | * Allocate an object from this cache. The flags are only relevant | 3589 | * Allocate an object from this cache. The flags are only relevant |
3590 | * if the cache has no available objects. | 3590 | * if the cache has no available objects. |
3591 | */ | 3591 | */ |
3592 | void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 3592 | void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
3593 | { | 3593 | { |
3594 | return __cache_alloc(cachep, flags, __builtin_return_address(0)); | 3594 | return __cache_alloc(cachep, flags, __builtin_return_address(0)); |
3595 | } | 3595 | } |
3596 | EXPORT_SYMBOL(kmem_cache_alloc); | 3596 | EXPORT_SYMBOL(kmem_cache_alloc); |
3597 | 3597 | ||
3598 | /** | 3598 | /** |
3599 | * kmem_cache_zalloc - Allocate an object. The memory is set to zero. | 3599 | * kmem_cache_zalloc - Allocate an object. The memory is set to zero. |
3600 | * @cache: The cache to allocate from. | 3600 | * @cache: The cache to allocate from. |
3601 | * @flags: See kmalloc(). | 3601 | * @flags: See kmalloc(). |
3602 | * | 3602 | * |
3603 | * Allocate an object from this cache and set the allocated memory to zero. | 3603 | * Allocate an object from this cache and set the allocated memory to zero. |
3604 | * The flags are only relevant if the cache has no available objects. | 3604 | * The flags are only relevant if the cache has no available objects. |
3605 | */ | 3605 | */ |
3606 | void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags) | 3606 | void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags) |
3607 | { | 3607 | { |
3608 | void *ret = __cache_alloc(cache, flags, __builtin_return_address(0)); | 3608 | void *ret = __cache_alloc(cache, flags, __builtin_return_address(0)); |
3609 | if (ret) | 3609 | if (ret) |
3610 | memset(ret, 0, obj_size(cache)); | 3610 | memset(ret, 0, obj_size(cache)); |
3611 | return ret; | 3611 | return ret; |
3612 | } | 3612 | } |
3613 | EXPORT_SYMBOL(kmem_cache_zalloc); | 3613 | EXPORT_SYMBOL(kmem_cache_zalloc); |
3614 | 3614 | ||
3615 | /** | 3615 | /** |
3616 | * kmem_ptr_validate - check if an untrusted pointer might | 3616 | * kmem_ptr_validate - check if an untrusted pointer might |
3617 | * be a slab entry. | 3617 | * be a slab entry. |
3618 | * @cachep: the cache we're checking against | 3618 | * @cachep: the cache we're checking against |
3619 | * @ptr: pointer to validate | 3619 | * @ptr: pointer to validate |
3620 | * | 3620 | * |
3621 | * This verifies that the untrusted pointer looks sane: | 3621 | * This verifies that the untrusted pointer looks sane: |
3622 | * it is _not_ a guarantee that the pointer is actually | 3622 | * it is _not_ a guarantee that the pointer is actually |
3623 | * part of the slab cache in question, but it at least | 3623 | * part of the slab cache in question, but it at least |
3624 | * validates that the pointer can be dereferenced and | 3624 | * validates that the pointer can be dereferenced and |
3625 | * looks half-way sane. | 3625 | * looks half-way sane. |
3626 | * | 3626 | * |
3627 | * Currently only used for dentry validation. | 3627 | * Currently only used for dentry validation. |
3628 | */ | 3628 | */ |
3629 | int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) | 3629 | int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) |
3630 | { | 3630 | { |
3631 | unsigned long addr = (unsigned long)ptr; | 3631 | unsigned long addr = (unsigned long)ptr; |
3632 | unsigned long min_addr = PAGE_OFFSET; | 3632 | unsigned long min_addr = PAGE_OFFSET; |
3633 | unsigned long align_mask = BYTES_PER_WORD - 1; | 3633 | unsigned long align_mask = BYTES_PER_WORD - 1; |
3634 | unsigned long size = cachep->buffer_size; | 3634 | unsigned long size = cachep->buffer_size; |
3635 | struct page *page; | 3635 | struct page *page; |
3636 | 3636 | ||
3637 | if (unlikely(addr < min_addr)) | 3637 | if (unlikely(addr < min_addr)) |
3638 | goto out; | 3638 | goto out; |
3639 | if (unlikely(addr > (unsigned long)high_memory - size)) | 3639 | if (unlikely(addr > (unsigned long)high_memory - size)) |
3640 | goto out; | 3640 | goto out; |
3641 | if (unlikely(addr & align_mask)) | 3641 | if (unlikely(addr & align_mask)) |
3642 | goto out; | 3642 | goto out; |
3643 | if (unlikely(!kern_addr_valid(addr))) | 3643 | if (unlikely(!kern_addr_valid(addr))) |
3644 | goto out; | 3644 | goto out; |
3645 | if (unlikely(!kern_addr_valid(addr + size - 1))) | 3645 | if (unlikely(!kern_addr_valid(addr + size - 1))) |
3646 | goto out; | 3646 | goto out; |
3647 | page = virt_to_page(ptr); | 3647 | page = virt_to_page(ptr); |
3648 | if (unlikely(!PageSlab(page))) | 3648 | if (unlikely(!PageSlab(page))) |
3649 | goto out; | 3649 | goto out; |
3650 | if (unlikely(page_get_cache(page) != cachep)) | 3650 | if (unlikely(page_get_cache(page) != cachep)) |
3651 | goto out; | 3651 | goto out; |
3652 | return 1; | 3652 | return 1; |
3653 | out: | 3653 | out: |
3654 | return 0; | 3654 | return 0; |
3655 | } | 3655 | } |
3656 | 3656 | ||
3657 | #ifdef CONFIG_NUMA | 3657 | #ifdef CONFIG_NUMA |
3658 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 3658 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) |
3659 | { | 3659 | { |
3660 | return __cache_alloc_node(cachep, flags, nodeid, | 3660 | return __cache_alloc_node(cachep, flags, nodeid, |
3661 | __builtin_return_address(0)); | 3661 | __builtin_return_address(0)); |
3662 | } | 3662 | } |
3663 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 3663 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
3664 | 3664 | ||
3665 | static __always_inline void * | 3665 | static __always_inline void * |
3666 | __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) | 3666 | __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) |
3667 | { | 3667 | { |
3668 | struct kmem_cache *cachep; | 3668 | struct kmem_cache *cachep; |
3669 | 3669 | ||
3670 | cachep = kmem_find_general_cachep(size, flags); | 3670 | cachep = kmem_find_general_cachep(size, flags); |
3671 | if (unlikely(cachep == NULL)) | 3671 | if (unlikely(cachep == NULL)) |
3672 | return NULL; | 3672 | return NULL; |
3673 | return kmem_cache_alloc_node(cachep, flags, node); | 3673 | return kmem_cache_alloc_node(cachep, flags, node); |
3674 | } | 3674 | } |
3675 | 3675 | ||
3676 | #ifdef CONFIG_DEBUG_SLAB | 3676 | #ifdef CONFIG_DEBUG_SLAB |
3677 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 3677 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
3678 | { | 3678 | { |
3679 | return __do_kmalloc_node(size, flags, node, | 3679 | return __do_kmalloc_node(size, flags, node, |
3680 | __builtin_return_address(0)); | 3680 | __builtin_return_address(0)); |
3681 | } | 3681 | } |
3682 | EXPORT_SYMBOL(__kmalloc_node); | 3682 | EXPORT_SYMBOL(__kmalloc_node); |
3683 | 3683 | ||
3684 | void *__kmalloc_node_track_caller(size_t size, gfp_t flags, | 3684 | void *__kmalloc_node_track_caller(size_t size, gfp_t flags, |
3685 | int node, void *caller) | 3685 | int node, void *caller) |
3686 | { | 3686 | { |
3687 | return __do_kmalloc_node(size, flags, node, caller); | 3687 | return __do_kmalloc_node(size, flags, node, caller); |
3688 | } | 3688 | } |
3689 | EXPORT_SYMBOL(__kmalloc_node_track_caller); | 3689 | EXPORT_SYMBOL(__kmalloc_node_track_caller); |
3690 | #else | 3690 | #else |
3691 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 3691 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
3692 | { | 3692 | { |
3693 | return __do_kmalloc_node(size, flags, node, NULL); | 3693 | return __do_kmalloc_node(size, flags, node, NULL); |
3694 | } | 3694 | } |
3695 | EXPORT_SYMBOL(__kmalloc_node); | 3695 | EXPORT_SYMBOL(__kmalloc_node); |
3696 | #endif /* CONFIG_DEBUG_SLAB */ | 3696 | #endif /* CONFIG_DEBUG_SLAB */ |
3697 | #endif /* CONFIG_NUMA */ | 3697 | #endif /* CONFIG_NUMA */ |
3698 | 3698 | ||
3699 | /** | 3699 | /** |
3700 | * __do_kmalloc - allocate memory | 3700 | * __do_kmalloc - allocate memory |
3701 | * @size: how many bytes of memory are required. | 3701 | * @size: how many bytes of memory are required. |
3702 | * @flags: the type of memory to allocate (see kmalloc). | 3702 | * @flags: the type of memory to allocate (see kmalloc). |
3703 | * @caller: function caller for debug tracking of the caller | 3703 | * @caller: function caller for debug tracking of the caller |
3704 | */ | 3704 | */ |
3705 | static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, | 3705 | static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, |
3706 | void *caller) | 3706 | void *caller) |
3707 | { | 3707 | { |
3708 | struct kmem_cache *cachep; | 3708 | struct kmem_cache *cachep; |
3709 | 3709 | ||
3710 | /* If you want to save a few bytes .text space: replace | 3710 | /* If you want to save a few bytes .text space: replace |
3711 | * __ with kmem_. | 3711 | * __ with kmem_. |
3712 | * Then kmalloc uses the uninlined functions instead of the inline | 3712 | * Then kmalloc uses the uninlined functions instead of the inline |
3713 | * functions. | 3713 | * functions. |
3714 | */ | 3714 | */ |
3715 | cachep = __find_general_cachep(size, flags); | 3715 | cachep = __find_general_cachep(size, flags); |
3716 | if (unlikely(cachep == NULL)) | 3716 | if (unlikely(cachep == NULL)) |
3717 | return NULL; | 3717 | return NULL; |
3718 | return __cache_alloc(cachep, flags, caller); | 3718 | return __cache_alloc(cachep, flags, caller); |
3719 | } | 3719 | } |
3720 | 3720 | ||
3721 | 3721 | ||
3722 | #ifdef CONFIG_DEBUG_SLAB | 3722 | #ifdef CONFIG_DEBUG_SLAB |
3723 | void *__kmalloc(size_t size, gfp_t flags) | 3723 | void *__kmalloc(size_t size, gfp_t flags) |
3724 | { | 3724 | { |
3725 | return __do_kmalloc(size, flags, __builtin_return_address(0)); | 3725 | return __do_kmalloc(size, flags, __builtin_return_address(0)); |
3726 | } | 3726 | } |
3727 | EXPORT_SYMBOL(__kmalloc); | 3727 | EXPORT_SYMBOL(__kmalloc); |
3728 | 3728 | ||
3729 | void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) | 3729 | void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) |
3730 | { | 3730 | { |
3731 | return __do_kmalloc(size, flags, caller); | 3731 | return __do_kmalloc(size, flags, caller); |
3732 | } | 3732 | } |
3733 | EXPORT_SYMBOL(__kmalloc_track_caller); | 3733 | EXPORT_SYMBOL(__kmalloc_track_caller); |
3734 | 3734 | ||
3735 | #else | 3735 | #else |
3736 | void *__kmalloc(size_t size, gfp_t flags) | 3736 | void *__kmalloc(size_t size, gfp_t flags) |
3737 | { | 3737 | { |
3738 | return __do_kmalloc(size, flags, NULL); | 3738 | return __do_kmalloc(size, flags, NULL); |
3739 | } | 3739 | } |
3740 | EXPORT_SYMBOL(__kmalloc); | 3740 | EXPORT_SYMBOL(__kmalloc); |
3741 | #endif | 3741 | #endif |
3742 | 3742 | ||
3743 | /** | 3743 | /** |
3744 | * krealloc - reallocate memory. The contents will remain unchanged. | 3744 | * krealloc - reallocate memory. The contents will remain unchanged. |
3745 | * | 3745 | * |
3746 | * @p: object to reallocate memory for. | 3746 | * @p: object to reallocate memory for. |
3747 | * @new_size: how many bytes of memory are required. | 3747 | * @new_size: how many bytes of memory are required. |
3748 | * @flags: the type of memory to allocate. | 3748 | * @flags: the type of memory to allocate. |
3749 | * | 3749 | * |
3750 | * The contents of the object pointed to are preserved up to the | 3750 | * The contents of the object pointed to are preserved up to the |
3751 | * lesser of the new and old sizes. If @p is %NULL, krealloc() | 3751 | * lesser of the new and old sizes. If @p is %NULL, krealloc() |
3752 | * behaves exactly like kmalloc(). If @size is 0 and @p is not a | 3752 | * behaves exactly like kmalloc(). If @size is 0 and @p is not a |
3753 | * %NULL pointer, the object pointed to is freed. | 3753 | * %NULL pointer, the object pointed to is freed. |
3754 | */ | 3754 | */ |
3755 | void *krealloc(const void *p, size_t new_size, gfp_t flags) | 3755 | void *krealloc(const void *p, size_t new_size, gfp_t flags) |
3756 | { | 3756 | { |
3757 | struct kmem_cache *cache, *new_cache; | 3757 | struct kmem_cache *cache, *new_cache; |
3758 | void *ret; | 3758 | void *ret; |
3759 | 3759 | ||
3760 | if (unlikely(!p)) | 3760 | if (unlikely(!p)) |
3761 | return kmalloc_track_caller(new_size, flags); | 3761 | return kmalloc_track_caller(new_size, flags); |
3762 | 3762 | ||
3763 | if (unlikely(!new_size)) { | 3763 | if (unlikely(!new_size)) { |
3764 | kfree(p); | 3764 | kfree(p); |
3765 | return NULL; | 3765 | return NULL; |
3766 | } | 3766 | } |
3767 | 3767 | ||
3768 | cache = virt_to_cache(p); | 3768 | cache = virt_to_cache(p); |
3769 | new_cache = __find_general_cachep(new_size, flags); | 3769 | new_cache = __find_general_cachep(new_size, flags); |
3770 | 3770 | ||
3771 | /* | 3771 | /* |
3772 | * If new size fits in the current cache, bail out. | 3772 | * If new size fits in the current cache, bail out. |
3773 | */ | 3773 | */ |
3774 | if (likely(cache == new_cache)) | 3774 | if (likely(cache == new_cache)) |
3775 | return (void *)p; | 3775 | return (void *)p; |
3776 | 3776 | ||
3777 | /* | 3777 | /* |
3778 | * We are on the slow-path here so do not use __cache_alloc | 3778 | * We are on the slow-path here so do not use __cache_alloc |
3779 | * because it bloats kernel text. | 3779 | * because it bloats kernel text. |
3780 | */ | 3780 | */ |
3781 | ret = kmalloc_track_caller(new_size, flags); | 3781 | ret = kmalloc_track_caller(new_size, flags); |
3782 | if (ret) { | 3782 | if (ret) { |
3783 | memcpy(ret, p, min(new_size, ksize(p))); | 3783 | memcpy(ret, p, min(new_size, ksize(p))); |
3784 | kfree(p); | 3784 | kfree(p); |
3785 | } | 3785 | } |
3786 | return ret; | 3786 | return ret; |
3787 | } | 3787 | } |
3788 | EXPORT_SYMBOL(krealloc); | 3788 | EXPORT_SYMBOL(krealloc); |
3789 | 3789 | ||
3790 | /** | 3790 | /** |
3791 | * kmem_cache_free - Deallocate an object | 3791 | * kmem_cache_free - Deallocate an object |
3792 | * @cachep: The cache the allocation was from. | 3792 | * @cachep: The cache the allocation was from. |
3793 | * @objp: The previously allocated object. | 3793 | * @objp: The previously allocated object. |
3794 | * | 3794 | * |
3795 | * Free an object which was previously allocated from this | 3795 | * Free an object which was previously allocated from this |
3796 | * cache. | 3796 | * cache. |
3797 | */ | 3797 | */ |
3798 | void kmem_cache_free(struct kmem_cache *cachep, void *objp) | 3798 | void kmem_cache_free(struct kmem_cache *cachep, void *objp) |
3799 | { | 3799 | { |
3800 | unsigned long flags; | 3800 | unsigned long flags; |
3801 | 3801 | ||
3802 | BUG_ON(virt_to_cache(objp) != cachep); | 3802 | BUG_ON(virt_to_cache(objp) != cachep); |
3803 | 3803 | ||
3804 | local_irq_save(flags); | 3804 | local_irq_save(flags); |
3805 | debug_check_no_locks_freed(objp, obj_size(cachep)); | 3805 | debug_check_no_locks_freed(objp, obj_size(cachep)); |
3806 | __cache_free(cachep, objp); | 3806 | __cache_free(cachep, objp); |
3807 | local_irq_restore(flags); | 3807 | local_irq_restore(flags); |
3808 | } | 3808 | } |
3809 | EXPORT_SYMBOL(kmem_cache_free); | 3809 | EXPORT_SYMBOL(kmem_cache_free); |
3810 | 3810 | ||
3811 | /** | 3811 | /** |
3812 | * kfree - free previously allocated memory | 3812 | * kfree - free previously allocated memory |
3813 | * @objp: pointer returned by kmalloc. | 3813 | * @objp: pointer returned by kmalloc. |
3814 | * | 3814 | * |
3815 | * If @objp is NULL, no operation is performed. | 3815 | * If @objp is NULL, no operation is performed. |
3816 | * | 3816 | * |
3817 | * Don't free memory not originally allocated by kmalloc() | 3817 | * Don't free memory not originally allocated by kmalloc() |
3818 | * or you will run into trouble. | 3818 | * or you will run into trouble. |
3819 | */ | 3819 | */ |
3820 | void kfree(const void *objp) | 3820 | void kfree(const void *objp) |
3821 | { | 3821 | { |
3822 | struct kmem_cache *c; | 3822 | struct kmem_cache *c; |
3823 | unsigned long flags; | 3823 | unsigned long flags; |
3824 | 3824 | ||
3825 | if (unlikely(!objp)) | 3825 | if (unlikely(!objp)) |
3826 | return; | 3826 | return; |
3827 | local_irq_save(flags); | 3827 | local_irq_save(flags); |
3828 | kfree_debugcheck(objp); | 3828 | kfree_debugcheck(objp); |
3829 | c = virt_to_cache(objp); | 3829 | c = virt_to_cache(objp); |
3830 | debug_check_no_locks_freed(objp, obj_size(c)); | 3830 | debug_check_no_locks_freed(objp, obj_size(c)); |
3831 | __cache_free(c, (void *)objp); | 3831 | __cache_free(c, (void *)objp); |
3832 | local_irq_restore(flags); | 3832 | local_irq_restore(flags); |
3833 | } | 3833 | } |
3834 | EXPORT_SYMBOL(kfree); | 3834 | EXPORT_SYMBOL(kfree); |
3835 | 3835 | ||
3836 | unsigned int kmem_cache_size(struct kmem_cache *cachep) | 3836 | unsigned int kmem_cache_size(struct kmem_cache *cachep) |
3837 | { | 3837 | { |
3838 | return obj_size(cachep); | 3838 | return obj_size(cachep); |
3839 | } | 3839 | } |
3840 | EXPORT_SYMBOL(kmem_cache_size); | 3840 | EXPORT_SYMBOL(kmem_cache_size); |
3841 | 3841 | ||
3842 | const char *kmem_cache_name(struct kmem_cache *cachep) | 3842 | const char *kmem_cache_name(struct kmem_cache *cachep) |
3843 | { | 3843 | { |
3844 | return cachep->name; | 3844 | return cachep->name; |
3845 | } | 3845 | } |
3846 | EXPORT_SYMBOL_GPL(kmem_cache_name); | 3846 | EXPORT_SYMBOL_GPL(kmem_cache_name); |
3847 | 3847 | ||
3848 | /* | 3848 | /* |
3849 | * This initializes kmem_list3 or resizes varioius caches for all nodes. | 3849 | * This initializes kmem_list3 or resizes varioius caches for all nodes. |
3850 | */ | 3850 | */ |
3851 | static int alloc_kmemlist(struct kmem_cache *cachep) | 3851 | static int alloc_kmemlist(struct kmem_cache *cachep) |
3852 | { | 3852 | { |
3853 | int node; | 3853 | int node; |
3854 | struct kmem_list3 *l3; | 3854 | struct kmem_list3 *l3; |
3855 | struct array_cache *new_shared; | 3855 | struct array_cache *new_shared; |
3856 | struct array_cache **new_alien = NULL; | 3856 | struct array_cache **new_alien = NULL; |
3857 | 3857 | ||
3858 | for_each_online_node(node) { | 3858 | for_each_online_node(node) { |
3859 | 3859 | ||
3860 | if (use_alien_caches) { | 3860 | if (use_alien_caches) { |
3861 | new_alien = alloc_alien_cache(node, cachep->limit); | 3861 | new_alien = alloc_alien_cache(node, cachep->limit); |
3862 | if (!new_alien) | 3862 | if (!new_alien) |
3863 | goto fail; | 3863 | goto fail; |
3864 | } | 3864 | } |
3865 | 3865 | ||
3866 | new_shared = NULL; | 3866 | new_shared = NULL; |
3867 | if (cachep->shared) { | 3867 | if (cachep->shared) { |
3868 | new_shared = alloc_arraycache(node, | 3868 | new_shared = alloc_arraycache(node, |
3869 | cachep->shared*cachep->batchcount, | 3869 | cachep->shared*cachep->batchcount, |
3870 | 0xbaadf00d); | 3870 | 0xbaadf00d); |
3871 | if (!new_shared) { | 3871 | if (!new_shared) { |
3872 | free_alien_cache(new_alien); | 3872 | free_alien_cache(new_alien); |
3873 | goto fail; | 3873 | goto fail; |
3874 | } | 3874 | } |
3875 | } | 3875 | } |
3876 | 3876 | ||
3877 | l3 = cachep->nodelists[node]; | 3877 | l3 = cachep->nodelists[node]; |
3878 | if (l3) { | 3878 | if (l3) { |
3879 | struct array_cache *shared = l3->shared; | 3879 | struct array_cache *shared = l3->shared; |
3880 | 3880 | ||
3881 | spin_lock_irq(&l3->list_lock); | 3881 | spin_lock_irq(&l3->list_lock); |
3882 | 3882 | ||
3883 | if (shared) | 3883 | if (shared) |
3884 | free_block(cachep, shared->entry, | 3884 | free_block(cachep, shared->entry, |
3885 | shared->avail, node); | 3885 | shared->avail, node); |
3886 | 3886 | ||
3887 | l3->shared = new_shared; | 3887 | l3->shared = new_shared; |
3888 | if (!l3->alien) { | 3888 | if (!l3->alien) { |
3889 | l3->alien = new_alien; | 3889 | l3->alien = new_alien; |
3890 | new_alien = NULL; | 3890 | new_alien = NULL; |
3891 | } | 3891 | } |
3892 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3892 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3893 | cachep->batchcount + cachep->num; | 3893 | cachep->batchcount + cachep->num; |
3894 | spin_unlock_irq(&l3->list_lock); | 3894 | spin_unlock_irq(&l3->list_lock); |
3895 | kfree(shared); | 3895 | kfree(shared); |
3896 | free_alien_cache(new_alien); | 3896 | free_alien_cache(new_alien); |
3897 | continue; | 3897 | continue; |
3898 | } | 3898 | } |
3899 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); | 3899 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); |
3900 | if (!l3) { | 3900 | if (!l3) { |
3901 | free_alien_cache(new_alien); | 3901 | free_alien_cache(new_alien); |
3902 | kfree(new_shared); | 3902 | kfree(new_shared); |
3903 | goto fail; | 3903 | goto fail; |
3904 | } | 3904 | } |
3905 | 3905 | ||
3906 | kmem_list3_init(l3); | 3906 | kmem_list3_init(l3); |
3907 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 3907 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
3908 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 3908 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
3909 | l3->shared = new_shared; | 3909 | l3->shared = new_shared; |
3910 | l3->alien = new_alien; | 3910 | l3->alien = new_alien; |
3911 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3911 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3912 | cachep->batchcount + cachep->num; | 3912 | cachep->batchcount + cachep->num; |
3913 | cachep->nodelists[node] = l3; | 3913 | cachep->nodelists[node] = l3; |
3914 | } | 3914 | } |
3915 | return 0; | 3915 | return 0; |
3916 | 3916 | ||
3917 | fail: | 3917 | fail: |
3918 | if (!cachep->next.next) { | 3918 | if (!cachep->next.next) { |
3919 | /* Cache is not active yet. Roll back what we did */ | 3919 | /* Cache is not active yet. Roll back what we did */ |
3920 | node--; | 3920 | node--; |
3921 | while (node >= 0) { | 3921 | while (node >= 0) { |
3922 | if (cachep->nodelists[node]) { | 3922 | if (cachep->nodelists[node]) { |
3923 | l3 = cachep->nodelists[node]; | 3923 | l3 = cachep->nodelists[node]; |
3924 | 3924 | ||
3925 | kfree(l3->shared); | 3925 | kfree(l3->shared); |
3926 | free_alien_cache(l3->alien); | 3926 | free_alien_cache(l3->alien); |
3927 | kfree(l3); | 3927 | kfree(l3); |
3928 | cachep->nodelists[node] = NULL; | 3928 | cachep->nodelists[node] = NULL; |
3929 | } | 3929 | } |
3930 | node--; | 3930 | node--; |
3931 | } | 3931 | } |
3932 | } | 3932 | } |
3933 | return -ENOMEM; | 3933 | return -ENOMEM; |
3934 | } | 3934 | } |
3935 | 3935 | ||
3936 | struct ccupdate_struct { | 3936 | struct ccupdate_struct { |
3937 | struct kmem_cache *cachep; | 3937 | struct kmem_cache *cachep; |
3938 | struct array_cache *new[NR_CPUS]; | 3938 | struct array_cache *new[NR_CPUS]; |
3939 | }; | 3939 | }; |
3940 | 3940 | ||
3941 | static void do_ccupdate_local(void *info) | 3941 | static void do_ccupdate_local(void *info) |
3942 | { | 3942 | { |
3943 | struct ccupdate_struct *new = info; | 3943 | struct ccupdate_struct *new = info; |
3944 | struct array_cache *old; | 3944 | struct array_cache *old; |
3945 | 3945 | ||
3946 | check_irq_off(); | 3946 | check_irq_off(); |
3947 | old = cpu_cache_get(new->cachep); | 3947 | old = cpu_cache_get(new->cachep); |
3948 | 3948 | ||
3949 | new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; | 3949 | new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; |
3950 | new->new[smp_processor_id()] = old; | 3950 | new->new[smp_processor_id()] = old; |
3951 | } | 3951 | } |
3952 | 3952 | ||
3953 | /* Always called with the cache_chain_mutex held */ | 3953 | /* Always called with the cache_chain_mutex held */ |
3954 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | 3954 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
3955 | int batchcount, int shared) | 3955 | int batchcount, int shared) |
3956 | { | 3956 | { |
3957 | struct ccupdate_struct *new; | 3957 | struct ccupdate_struct *new; |
3958 | int i; | 3958 | int i; |
3959 | 3959 | ||
3960 | new = kzalloc(sizeof(*new), GFP_KERNEL); | 3960 | new = kzalloc(sizeof(*new), GFP_KERNEL); |
3961 | if (!new) | 3961 | if (!new) |
3962 | return -ENOMEM; | 3962 | return -ENOMEM; |
3963 | 3963 | ||
3964 | for_each_online_cpu(i) { | 3964 | for_each_online_cpu(i) { |
3965 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, | 3965 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, |
3966 | batchcount); | 3966 | batchcount); |
3967 | if (!new->new[i]) { | 3967 | if (!new->new[i]) { |
3968 | for (i--; i >= 0; i--) | 3968 | for (i--; i >= 0; i--) |
3969 | kfree(new->new[i]); | 3969 | kfree(new->new[i]); |
3970 | kfree(new); | 3970 | kfree(new); |
3971 | return -ENOMEM; | 3971 | return -ENOMEM; |
3972 | } | 3972 | } |
3973 | } | 3973 | } |
3974 | new->cachep = cachep; | 3974 | new->cachep = cachep; |
3975 | 3975 | ||
3976 | on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); | 3976 | on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); |
3977 | 3977 | ||
3978 | check_irq_on(); | 3978 | check_irq_on(); |
3979 | cachep->batchcount = batchcount; | 3979 | cachep->batchcount = batchcount; |
3980 | cachep->limit = limit; | 3980 | cachep->limit = limit; |
3981 | cachep->shared = shared; | 3981 | cachep->shared = shared; |
3982 | 3982 | ||
3983 | for_each_online_cpu(i) { | 3983 | for_each_online_cpu(i) { |
3984 | struct array_cache *ccold = new->new[i]; | 3984 | struct array_cache *ccold = new->new[i]; |
3985 | if (!ccold) | 3985 | if (!ccold) |
3986 | continue; | 3986 | continue; |
3987 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3987 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
3988 | free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); | 3988 | free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); |
3989 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3989 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
3990 | kfree(ccold); | 3990 | kfree(ccold); |
3991 | } | 3991 | } |
3992 | kfree(new); | 3992 | kfree(new); |
3993 | return alloc_kmemlist(cachep); | 3993 | return alloc_kmemlist(cachep); |
3994 | } | 3994 | } |
3995 | 3995 | ||
3996 | /* Called with cache_chain_mutex held always */ | 3996 | /* Called with cache_chain_mutex held always */ |
3997 | static int enable_cpucache(struct kmem_cache *cachep) | 3997 | static int enable_cpucache(struct kmem_cache *cachep) |
3998 | { | 3998 | { |
3999 | int err; | 3999 | int err; |
4000 | int limit, shared; | 4000 | int limit, shared; |
4001 | 4001 | ||
4002 | /* | 4002 | /* |
4003 | * The head array serves three purposes: | 4003 | * The head array serves three purposes: |
4004 | * - create a LIFO ordering, i.e. return objects that are cache-warm | 4004 | * - create a LIFO ordering, i.e. return objects that are cache-warm |
4005 | * - reduce the number of spinlock operations. | 4005 | * - reduce the number of spinlock operations. |
4006 | * - reduce the number of linked list operations on the slab and | 4006 | * - reduce the number of linked list operations on the slab and |
4007 | * bufctl chains: array operations are cheaper. | 4007 | * bufctl chains: array operations are cheaper. |
4008 | * The numbers are guessed, we should auto-tune as described by | 4008 | * The numbers are guessed, we should auto-tune as described by |
4009 | * Bonwick. | 4009 | * Bonwick. |
4010 | */ | 4010 | */ |
4011 | if (cachep->buffer_size > 131072) | 4011 | if (cachep->buffer_size > 131072) |
4012 | limit = 1; | 4012 | limit = 1; |
4013 | else if (cachep->buffer_size > PAGE_SIZE) | 4013 | else if (cachep->buffer_size > PAGE_SIZE) |
4014 | limit = 8; | 4014 | limit = 8; |
4015 | else if (cachep->buffer_size > 1024) | 4015 | else if (cachep->buffer_size > 1024) |
4016 | limit = 24; | 4016 | limit = 24; |
4017 | else if (cachep->buffer_size > 256) | 4017 | else if (cachep->buffer_size > 256) |
4018 | limit = 54; | 4018 | limit = 54; |
4019 | else | 4019 | else |
4020 | limit = 120; | 4020 | limit = 120; |
4021 | 4021 | ||
4022 | /* | 4022 | /* |
4023 | * CPU bound tasks (e.g. network routing) can exhibit cpu bound | 4023 | * CPU bound tasks (e.g. network routing) can exhibit cpu bound |
4024 | * allocation behaviour: Most allocs on one cpu, most free operations | 4024 | * allocation behaviour: Most allocs on one cpu, most free operations |
4025 | * on another cpu. For these cases, an efficient object passing between | 4025 | * on another cpu. For these cases, an efficient object passing between |
4026 | * cpus is necessary. This is provided by a shared array. The array | 4026 | * cpus is necessary. This is provided by a shared array. The array |
4027 | * replaces Bonwick's magazine layer. | 4027 | * replaces Bonwick's magazine layer. |
4028 | * On uniprocessor, it's functionally equivalent (but less efficient) | 4028 | * On uniprocessor, it's functionally equivalent (but less efficient) |
4029 | * to a larger limit. Thus disabled by default. | 4029 | * to a larger limit. Thus disabled by default. |
4030 | */ | 4030 | */ |
4031 | shared = 0; | 4031 | shared = 0; |
4032 | if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1) | 4032 | if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1) |
4033 | shared = 8; | 4033 | shared = 8; |
4034 | 4034 | ||
4035 | #if DEBUG | 4035 | #if DEBUG |
4036 | /* | 4036 | /* |
4037 | * With debugging enabled, large batchcount lead to excessively long | 4037 | * With debugging enabled, large batchcount lead to excessively long |
4038 | * periods with disabled local interrupts. Limit the batchcount | 4038 | * periods with disabled local interrupts. Limit the batchcount |
4039 | */ | 4039 | */ |
4040 | if (limit > 32) | 4040 | if (limit > 32) |
4041 | limit = 32; | 4041 | limit = 32; |
4042 | #endif | 4042 | #endif |
4043 | err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); | 4043 | err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); |
4044 | if (err) | 4044 | if (err) |
4045 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", | 4045 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", |
4046 | cachep->name, -err); | 4046 | cachep->name, -err); |
4047 | return err; | 4047 | return err; |
4048 | } | 4048 | } |
4049 | 4049 | ||
4050 | /* | 4050 | /* |
4051 | * Drain an array if it contains any elements taking the l3 lock only if | 4051 | * Drain an array if it contains any elements taking the l3 lock only if |
4052 | * necessary. Note that the l3 listlock also protects the array_cache | 4052 | * necessary. Note that the l3 listlock also protects the array_cache |
4053 | * if drain_array() is used on the shared array. | 4053 | * if drain_array() is used on the shared array. |
4054 | */ | 4054 | */ |
4055 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | 4055 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
4056 | struct array_cache *ac, int force, int node) | 4056 | struct array_cache *ac, int force, int node) |
4057 | { | 4057 | { |
4058 | int tofree; | 4058 | int tofree; |
4059 | 4059 | ||
4060 | if (!ac || !ac->avail) | 4060 | if (!ac || !ac->avail) |
4061 | return; | 4061 | return; |
4062 | if (ac->touched && !force) { | 4062 | if (ac->touched && !force) { |
4063 | ac->touched = 0; | 4063 | ac->touched = 0; |
4064 | } else { | 4064 | } else { |
4065 | spin_lock_irq(&l3->list_lock); | 4065 | spin_lock_irq(&l3->list_lock); |
4066 | if (ac->avail) { | 4066 | if (ac->avail) { |
4067 | tofree = force ? ac->avail : (ac->limit + 4) / 5; | 4067 | tofree = force ? ac->avail : (ac->limit + 4) / 5; |
4068 | if (tofree > ac->avail) | 4068 | if (tofree > ac->avail) |
4069 | tofree = (ac->avail + 1) / 2; | 4069 | tofree = (ac->avail + 1) / 2; |
4070 | free_block(cachep, ac->entry, tofree, node); | 4070 | free_block(cachep, ac->entry, tofree, node); |
4071 | ac->avail -= tofree; | 4071 | ac->avail -= tofree; |
4072 | memmove(ac->entry, &(ac->entry[tofree]), | 4072 | memmove(ac->entry, &(ac->entry[tofree]), |
4073 | sizeof(void *) * ac->avail); | 4073 | sizeof(void *) * ac->avail); |
4074 | } | 4074 | } |
4075 | spin_unlock_irq(&l3->list_lock); | 4075 | spin_unlock_irq(&l3->list_lock); |
4076 | } | 4076 | } |
4077 | } | 4077 | } |
4078 | 4078 | ||
4079 | /** | 4079 | /** |
4080 | * cache_reap - Reclaim memory from caches. | 4080 | * cache_reap - Reclaim memory from caches. |
4081 | * @w: work descriptor | 4081 | * @w: work descriptor |
4082 | * | 4082 | * |
4083 | * Called from workqueue/eventd every few seconds. | 4083 | * Called from workqueue/eventd every few seconds. |
4084 | * Purpose: | 4084 | * Purpose: |
4085 | * - clear the per-cpu caches for this CPU. | 4085 | * - clear the per-cpu caches for this CPU. |
4086 | * - return freeable pages to the main free memory pool. | 4086 | * - return freeable pages to the main free memory pool. |
4087 | * | 4087 | * |
4088 | * If we cannot acquire the cache chain mutex then just give up - we'll try | 4088 | * If we cannot acquire the cache chain mutex then just give up - we'll try |
4089 | * again on the next iteration. | 4089 | * again on the next iteration. |
4090 | */ | 4090 | */ |
4091 | static void cache_reap(struct work_struct *w) | 4091 | static void cache_reap(struct work_struct *w) |
4092 | { | 4092 | { |
4093 | struct kmem_cache *searchp; | 4093 | struct kmem_cache *searchp; |
4094 | struct kmem_list3 *l3; | 4094 | struct kmem_list3 *l3; |
4095 | int node = numa_node_id(); | 4095 | int node = numa_node_id(); |
4096 | struct delayed_work *work = | 4096 | struct delayed_work *work = |
4097 | container_of(w, struct delayed_work, work); | 4097 | container_of(w, struct delayed_work, work); |
4098 | 4098 | ||
4099 | if (!mutex_trylock(&cache_chain_mutex)) | 4099 | if (!mutex_trylock(&cache_chain_mutex)) |
4100 | /* Give up. Setup the next iteration. */ | 4100 | /* Give up. Setup the next iteration. */ |
4101 | goto out; | 4101 | goto out; |
4102 | 4102 | ||
4103 | list_for_each_entry(searchp, &cache_chain, next) { | 4103 | list_for_each_entry(searchp, &cache_chain, next) { |
4104 | check_irq_on(); | 4104 | check_irq_on(); |
4105 | 4105 | ||
4106 | /* | 4106 | /* |
4107 | * We only take the l3 lock if absolutely necessary and we | 4107 | * We only take the l3 lock if absolutely necessary and we |
4108 | * have established with reasonable certainty that | 4108 | * have established with reasonable certainty that |
4109 | * we can do some work if the lock was obtained. | 4109 | * we can do some work if the lock was obtained. |
4110 | */ | 4110 | */ |
4111 | l3 = searchp->nodelists[node]; | 4111 | l3 = searchp->nodelists[node]; |
4112 | 4112 | ||
4113 | reap_alien(searchp, l3); | 4113 | reap_alien(searchp, l3); |
4114 | 4114 | ||
4115 | drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); | 4115 | drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); |
4116 | 4116 | ||
4117 | /* | 4117 | /* |
4118 | * These are racy checks but it does not matter | 4118 | * These are racy checks but it does not matter |
4119 | * if we skip one check or scan twice. | 4119 | * if we skip one check or scan twice. |
4120 | */ | 4120 | */ |
4121 | if (time_after(l3->next_reap, jiffies)) | 4121 | if (time_after(l3->next_reap, jiffies)) |
4122 | goto next; | 4122 | goto next; |
4123 | 4123 | ||
4124 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; | 4124 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; |
4125 | 4125 | ||
4126 | drain_array(searchp, l3, l3->shared, 0, node); | 4126 | drain_array(searchp, l3, l3->shared, 0, node); |
4127 | 4127 | ||
4128 | if (l3->free_touched) | 4128 | if (l3->free_touched) |
4129 | l3->free_touched = 0; | 4129 | l3->free_touched = 0; |
4130 | else { | 4130 | else { |
4131 | int freed; | 4131 | int freed; |
4132 | 4132 | ||
4133 | freed = drain_freelist(searchp, l3, (l3->free_limit + | 4133 | freed = drain_freelist(searchp, l3, (l3->free_limit + |
4134 | 5 * searchp->num - 1) / (5 * searchp->num)); | 4134 | 5 * searchp->num - 1) / (5 * searchp->num)); |
4135 | STATS_ADD_REAPED(searchp, freed); | 4135 | STATS_ADD_REAPED(searchp, freed); |
4136 | } | 4136 | } |
4137 | next: | 4137 | next: |
4138 | cond_resched(); | 4138 | cond_resched(); |
4139 | } | 4139 | } |
4140 | check_irq_on(); | 4140 | check_irq_on(); |
4141 | mutex_unlock(&cache_chain_mutex); | 4141 | mutex_unlock(&cache_chain_mutex); |
4142 | next_reap_node(); | 4142 | next_reap_node(); |
4143 | refresh_cpu_vm_stats(smp_processor_id()); | 4143 | refresh_cpu_vm_stats(smp_processor_id()); |
4144 | out: | 4144 | out: |
4145 | /* Set up the next iteration */ | 4145 | /* Set up the next iteration */ |
4146 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); | 4146 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); |
4147 | } | 4147 | } |
4148 | 4148 | ||
4149 | #ifdef CONFIG_PROC_FS | 4149 | #ifdef CONFIG_PROC_FS |
4150 | 4150 | ||
4151 | static void print_slabinfo_header(struct seq_file *m) | 4151 | static void print_slabinfo_header(struct seq_file *m) |
4152 | { | 4152 | { |
4153 | /* | 4153 | /* |
4154 | * Output format version, so at least we can change it | 4154 | * Output format version, so at least we can change it |
4155 | * without _too_ many complaints. | 4155 | * without _too_ many complaints. |
4156 | */ | 4156 | */ |
4157 | #if STATS | 4157 | #if STATS |
4158 | seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); | 4158 | seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); |
4159 | #else | 4159 | #else |
4160 | seq_puts(m, "slabinfo - version: 2.1\n"); | 4160 | seq_puts(m, "slabinfo - version: 2.1\n"); |
4161 | #endif | 4161 | #endif |
4162 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> " | 4162 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> " |
4163 | "<objperslab> <pagesperslab>"); | 4163 | "<objperslab> <pagesperslab>"); |
4164 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); | 4164 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); |
4165 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); | 4165 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); |
4166 | #if STATS | 4166 | #if STATS |
4167 | seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " | 4167 | seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " |
4168 | "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); | 4168 | "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); |
4169 | seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); | 4169 | seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); |
4170 | #endif | 4170 | #endif |
4171 | seq_putc(m, '\n'); | 4171 | seq_putc(m, '\n'); |
4172 | } | 4172 | } |
4173 | 4173 | ||
4174 | static void *s_start(struct seq_file *m, loff_t *pos) | 4174 | static void *s_start(struct seq_file *m, loff_t *pos) |
4175 | { | 4175 | { |
4176 | loff_t n = *pos; | 4176 | loff_t n = *pos; |
4177 | struct list_head *p; | 4177 | struct list_head *p; |
4178 | 4178 | ||
4179 | mutex_lock(&cache_chain_mutex); | 4179 | mutex_lock(&cache_chain_mutex); |
4180 | if (!n) | 4180 | if (!n) |
4181 | print_slabinfo_header(m); | 4181 | print_slabinfo_header(m); |
4182 | p = cache_chain.next; | 4182 | p = cache_chain.next; |
4183 | while (n--) { | 4183 | while (n--) { |
4184 | p = p->next; | 4184 | p = p->next; |
4185 | if (p == &cache_chain) | 4185 | if (p == &cache_chain) |
4186 | return NULL; | 4186 | return NULL; |
4187 | } | 4187 | } |
4188 | return list_entry(p, struct kmem_cache, next); | 4188 | return list_entry(p, struct kmem_cache, next); |
4189 | } | 4189 | } |
4190 | 4190 | ||
4191 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) | 4191 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) |
4192 | { | 4192 | { |
4193 | struct kmem_cache *cachep = p; | 4193 | struct kmem_cache *cachep = p; |
4194 | ++*pos; | 4194 | ++*pos; |
4195 | return cachep->next.next == &cache_chain ? | 4195 | return cachep->next.next == &cache_chain ? |
4196 | NULL : list_entry(cachep->next.next, struct kmem_cache, next); | 4196 | NULL : list_entry(cachep->next.next, struct kmem_cache, next); |
4197 | } | 4197 | } |
4198 | 4198 | ||
4199 | static void s_stop(struct seq_file *m, void *p) | 4199 | static void s_stop(struct seq_file *m, void *p) |
4200 | { | 4200 | { |
4201 | mutex_unlock(&cache_chain_mutex); | 4201 | mutex_unlock(&cache_chain_mutex); |
4202 | } | 4202 | } |
4203 | 4203 | ||
4204 | static int s_show(struct seq_file *m, void *p) | 4204 | static int s_show(struct seq_file *m, void *p) |
4205 | { | 4205 | { |
4206 | struct kmem_cache *cachep = p; | 4206 | struct kmem_cache *cachep = p; |
4207 | struct slab *slabp; | 4207 | struct slab *slabp; |
4208 | unsigned long active_objs; | 4208 | unsigned long active_objs; |
4209 | unsigned long num_objs; | 4209 | unsigned long num_objs; |
4210 | unsigned long active_slabs = 0; | 4210 | unsigned long active_slabs = 0; |
4211 | unsigned long num_slabs, free_objects = 0, shared_avail = 0; | 4211 | unsigned long num_slabs, free_objects = 0, shared_avail = 0; |
4212 | const char *name; | 4212 | const char *name; |
4213 | char *error = NULL; | 4213 | char *error = NULL; |
4214 | int node; | 4214 | int node; |
4215 | struct kmem_list3 *l3; | 4215 | struct kmem_list3 *l3; |
4216 | 4216 | ||
4217 | active_objs = 0; | 4217 | active_objs = 0; |
4218 | num_slabs = 0; | 4218 | num_slabs = 0; |
4219 | for_each_online_node(node) { | 4219 | for_each_online_node(node) { |
4220 | l3 = cachep->nodelists[node]; | 4220 | l3 = cachep->nodelists[node]; |
4221 | if (!l3) | 4221 | if (!l3) |
4222 | continue; | 4222 | continue; |
4223 | 4223 | ||
4224 | check_irq_on(); | 4224 | check_irq_on(); |
4225 | spin_lock_irq(&l3->list_lock); | 4225 | spin_lock_irq(&l3->list_lock); |
4226 | 4226 | ||
4227 | list_for_each_entry(slabp, &l3->slabs_full, list) { | 4227 | list_for_each_entry(slabp, &l3->slabs_full, list) { |
4228 | if (slabp->inuse != cachep->num && !error) | 4228 | if (slabp->inuse != cachep->num && !error) |
4229 | error = "slabs_full accounting error"; | 4229 | error = "slabs_full accounting error"; |
4230 | active_objs += cachep->num; | 4230 | active_objs += cachep->num; |
4231 | active_slabs++; | 4231 | active_slabs++; |
4232 | } | 4232 | } |
4233 | list_for_each_entry(slabp, &l3->slabs_partial, list) { | 4233 | list_for_each_entry(slabp, &l3->slabs_partial, list) { |
4234 | if (slabp->inuse == cachep->num && !error) | 4234 | if (slabp->inuse == cachep->num && !error) |
4235 | error = "slabs_partial inuse accounting error"; | 4235 | error = "slabs_partial inuse accounting error"; |
4236 | if (!slabp->inuse && !error) | 4236 | if (!slabp->inuse && !error) |
4237 | error = "slabs_partial/inuse accounting error"; | 4237 | error = "slabs_partial/inuse accounting error"; |
4238 | active_objs += slabp->inuse; | 4238 | active_objs += slabp->inuse; |
4239 | active_slabs++; | 4239 | active_slabs++; |
4240 | } | 4240 | } |
4241 | list_for_each_entry(slabp, &l3->slabs_free, list) { | 4241 | list_for_each_entry(slabp, &l3->slabs_free, list) { |
4242 | if (slabp->inuse && !error) | 4242 | if (slabp->inuse && !error) |
4243 | error = "slabs_free/inuse accounting error"; | 4243 | error = "slabs_free/inuse accounting error"; |
4244 | num_slabs++; | 4244 | num_slabs++; |
4245 | } | 4245 | } |
4246 | free_objects += l3->free_objects; | 4246 | free_objects += l3->free_objects; |
4247 | if (l3->shared) | 4247 | if (l3->shared) |
4248 | shared_avail += l3->shared->avail; | 4248 | shared_avail += l3->shared->avail; |
4249 | 4249 | ||
4250 | spin_unlock_irq(&l3->list_lock); | 4250 | spin_unlock_irq(&l3->list_lock); |
4251 | } | 4251 | } |
4252 | num_slabs += active_slabs; | 4252 | num_slabs += active_slabs; |
4253 | num_objs = num_slabs * cachep->num; | 4253 | num_objs = num_slabs * cachep->num; |
4254 | if (num_objs - active_objs != free_objects && !error) | 4254 | if (num_objs - active_objs != free_objects && !error) |
4255 | error = "free_objects accounting error"; | 4255 | error = "free_objects accounting error"; |
4256 | 4256 | ||
4257 | name = cachep->name; | 4257 | name = cachep->name; |
4258 | if (error) | 4258 | if (error) |
4259 | printk(KERN_ERR "slab: cache %s error: %s\n", name, error); | 4259 | printk(KERN_ERR "slab: cache %s error: %s\n", name, error); |
4260 | 4260 | ||
4261 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", | 4261 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", |
4262 | name, active_objs, num_objs, cachep->buffer_size, | 4262 | name, active_objs, num_objs, cachep->buffer_size, |
4263 | cachep->num, (1 << cachep->gfporder)); | 4263 | cachep->num, (1 << cachep->gfporder)); |
4264 | seq_printf(m, " : tunables %4u %4u %4u", | 4264 | seq_printf(m, " : tunables %4u %4u %4u", |
4265 | cachep->limit, cachep->batchcount, cachep->shared); | 4265 | cachep->limit, cachep->batchcount, cachep->shared); |
4266 | seq_printf(m, " : slabdata %6lu %6lu %6lu", | 4266 | seq_printf(m, " : slabdata %6lu %6lu %6lu", |
4267 | active_slabs, num_slabs, shared_avail); | 4267 | active_slabs, num_slabs, shared_avail); |
4268 | #if STATS | 4268 | #if STATS |
4269 | { /* list3 stats */ | 4269 | { /* list3 stats */ |
4270 | unsigned long high = cachep->high_mark; | 4270 | unsigned long high = cachep->high_mark; |
4271 | unsigned long allocs = cachep->num_allocations; | 4271 | unsigned long allocs = cachep->num_allocations; |
4272 | unsigned long grown = cachep->grown; | 4272 | unsigned long grown = cachep->grown; |
4273 | unsigned long reaped = cachep->reaped; | 4273 | unsigned long reaped = cachep->reaped; |
4274 | unsigned long errors = cachep->errors; | 4274 | unsigned long errors = cachep->errors; |
4275 | unsigned long max_freeable = cachep->max_freeable; | 4275 | unsigned long max_freeable = cachep->max_freeable; |
4276 | unsigned long node_allocs = cachep->node_allocs; | 4276 | unsigned long node_allocs = cachep->node_allocs; |
4277 | unsigned long node_frees = cachep->node_frees; | 4277 | unsigned long node_frees = cachep->node_frees; |
4278 | unsigned long overflows = cachep->node_overflow; | 4278 | unsigned long overflows = cachep->node_overflow; |
4279 | 4279 | ||
4280 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 4280 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ |
4281 | %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, | 4281 | %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, |
4282 | reaped, errors, max_freeable, node_allocs, | 4282 | reaped, errors, max_freeable, node_allocs, |
4283 | node_frees, overflows); | 4283 | node_frees, overflows); |
4284 | } | 4284 | } |
4285 | /* cpu stats */ | 4285 | /* cpu stats */ |
4286 | { | 4286 | { |
4287 | unsigned long allochit = atomic_read(&cachep->allochit); | 4287 | unsigned long allochit = atomic_read(&cachep->allochit); |
4288 | unsigned long allocmiss = atomic_read(&cachep->allocmiss); | 4288 | unsigned long allocmiss = atomic_read(&cachep->allocmiss); |
4289 | unsigned long freehit = atomic_read(&cachep->freehit); | 4289 | unsigned long freehit = atomic_read(&cachep->freehit); |
4290 | unsigned long freemiss = atomic_read(&cachep->freemiss); | 4290 | unsigned long freemiss = atomic_read(&cachep->freemiss); |
4291 | 4291 | ||
4292 | seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", | 4292 | seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", |
4293 | allochit, allocmiss, freehit, freemiss); | 4293 | allochit, allocmiss, freehit, freemiss); |
4294 | } | 4294 | } |
4295 | #endif | 4295 | #endif |
4296 | seq_putc(m, '\n'); | 4296 | seq_putc(m, '\n'); |
4297 | return 0; | 4297 | return 0; |
4298 | } | 4298 | } |
4299 | 4299 | ||
4300 | /* | 4300 | /* |
4301 | * slabinfo_op - iterator that generates /proc/slabinfo | 4301 | * slabinfo_op - iterator that generates /proc/slabinfo |
4302 | * | 4302 | * |
4303 | * Output layout: | 4303 | * Output layout: |
4304 | * cache-name | 4304 | * cache-name |
4305 | * num-active-objs | 4305 | * num-active-objs |
4306 | * total-objs | 4306 | * total-objs |
4307 | * object size | 4307 | * object size |
4308 | * num-active-slabs | 4308 | * num-active-slabs |
4309 | * total-slabs | 4309 | * total-slabs |
4310 | * num-pages-per-slab | 4310 | * num-pages-per-slab |
4311 | * + further values on SMP and with statistics enabled | 4311 | * + further values on SMP and with statistics enabled |
4312 | */ | 4312 | */ |
4313 | 4313 | ||
4314 | const struct seq_operations slabinfo_op = { | 4314 | const struct seq_operations slabinfo_op = { |
4315 | .start = s_start, | 4315 | .start = s_start, |
4316 | .next = s_next, | 4316 | .next = s_next, |
4317 | .stop = s_stop, | 4317 | .stop = s_stop, |
4318 | .show = s_show, | 4318 | .show = s_show, |
4319 | }; | 4319 | }; |
4320 | 4320 | ||
4321 | #define MAX_SLABINFO_WRITE 128 | 4321 | #define MAX_SLABINFO_WRITE 128 |
4322 | /** | 4322 | /** |
4323 | * slabinfo_write - Tuning for the slab allocator | 4323 | * slabinfo_write - Tuning for the slab allocator |
4324 | * @file: unused | 4324 | * @file: unused |
4325 | * @buffer: user buffer | 4325 | * @buffer: user buffer |
4326 | * @count: data length | 4326 | * @count: data length |
4327 | * @ppos: unused | 4327 | * @ppos: unused |
4328 | */ | 4328 | */ |
4329 | ssize_t slabinfo_write(struct file *file, const char __user * buffer, | 4329 | ssize_t slabinfo_write(struct file *file, const char __user * buffer, |
4330 | size_t count, loff_t *ppos) | 4330 | size_t count, loff_t *ppos) |
4331 | { | 4331 | { |
4332 | char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; | 4332 | char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; |
4333 | int limit, batchcount, shared, res; | 4333 | int limit, batchcount, shared, res; |
4334 | struct kmem_cache *cachep; | 4334 | struct kmem_cache *cachep; |
4335 | 4335 | ||
4336 | if (count > MAX_SLABINFO_WRITE) | 4336 | if (count > MAX_SLABINFO_WRITE) |
4337 | return -EINVAL; | 4337 | return -EINVAL; |
4338 | if (copy_from_user(&kbuf, buffer, count)) | 4338 | if (copy_from_user(&kbuf, buffer, count)) |
4339 | return -EFAULT; | 4339 | return -EFAULT; |
4340 | kbuf[MAX_SLABINFO_WRITE] = '\0'; | 4340 | kbuf[MAX_SLABINFO_WRITE] = '\0'; |
4341 | 4341 | ||
4342 | tmp = strchr(kbuf, ' '); | 4342 | tmp = strchr(kbuf, ' '); |
4343 | if (!tmp) | 4343 | if (!tmp) |
4344 | return -EINVAL; | 4344 | return -EINVAL; |
4345 | *tmp = '\0'; | 4345 | *tmp = '\0'; |
4346 | tmp++; | 4346 | tmp++; |
4347 | if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3) | 4347 | if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3) |
4348 | return -EINVAL; | 4348 | return -EINVAL; |
4349 | 4349 | ||
4350 | /* Find the cache in the chain of caches. */ | 4350 | /* Find the cache in the chain of caches. */ |
4351 | mutex_lock(&cache_chain_mutex); | 4351 | mutex_lock(&cache_chain_mutex); |
4352 | res = -EINVAL; | 4352 | res = -EINVAL; |
4353 | list_for_each_entry(cachep, &cache_chain, next) { | 4353 | list_for_each_entry(cachep, &cache_chain, next) { |
4354 | if (!strcmp(cachep->name, kbuf)) { | 4354 | if (!strcmp(cachep->name, kbuf)) { |
4355 | if (limit < 1 || batchcount < 1 || | 4355 | if (limit < 1 || batchcount < 1 || |
4356 | batchcount > limit || shared < 0) { | 4356 | batchcount > limit || shared < 0) { |
4357 | res = 0; | 4357 | res = 0; |
4358 | } else { | 4358 | } else { |
4359 | res = do_tune_cpucache(cachep, limit, | 4359 | res = do_tune_cpucache(cachep, limit, |
4360 | batchcount, shared); | 4360 | batchcount, shared); |
4361 | } | 4361 | } |
4362 | break; | 4362 | break; |
4363 | } | 4363 | } |
4364 | } | 4364 | } |
4365 | mutex_unlock(&cache_chain_mutex); | 4365 | mutex_unlock(&cache_chain_mutex); |
4366 | if (res >= 0) | 4366 | if (res >= 0) |
4367 | res = count; | 4367 | res = count; |
4368 | return res; | 4368 | return res; |
4369 | } | 4369 | } |
4370 | 4370 | ||
4371 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 4371 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
4372 | 4372 | ||
4373 | static void *leaks_start(struct seq_file *m, loff_t *pos) | 4373 | static void *leaks_start(struct seq_file *m, loff_t *pos) |
4374 | { | 4374 | { |
4375 | loff_t n = *pos; | 4375 | loff_t n = *pos; |
4376 | struct list_head *p; | 4376 | struct list_head *p; |
4377 | 4377 | ||
4378 | mutex_lock(&cache_chain_mutex); | 4378 | mutex_lock(&cache_chain_mutex); |
4379 | p = cache_chain.next; | 4379 | p = cache_chain.next; |
4380 | while (n--) { | 4380 | while (n--) { |
4381 | p = p->next; | 4381 | p = p->next; |
4382 | if (p == &cache_chain) | 4382 | if (p == &cache_chain) |
4383 | return NULL; | 4383 | return NULL; |
4384 | } | 4384 | } |
4385 | return list_entry(p, struct kmem_cache, next); | 4385 | return list_entry(p, struct kmem_cache, next); |
4386 | } | 4386 | } |
4387 | 4387 | ||
4388 | static inline int add_caller(unsigned long *n, unsigned long v) | 4388 | static inline int add_caller(unsigned long *n, unsigned long v) |
4389 | { | 4389 | { |
4390 | unsigned long *p; | 4390 | unsigned long *p; |
4391 | int l; | 4391 | int l; |
4392 | if (!v) | 4392 | if (!v) |
4393 | return 1; | 4393 | return 1; |
4394 | l = n[1]; | 4394 | l = n[1]; |
4395 | p = n + 2; | 4395 | p = n + 2; |
4396 | while (l) { | 4396 | while (l) { |
4397 | int i = l/2; | 4397 | int i = l/2; |
4398 | unsigned long *q = p + 2 * i; | 4398 | unsigned long *q = p + 2 * i; |
4399 | if (*q == v) { | 4399 | if (*q == v) { |
4400 | q[1]++; | 4400 | q[1]++; |
4401 | return 1; | 4401 | return 1; |
4402 | } | 4402 | } |
4403 | if (*q > v) { | 4403 | if (*q > v) { |
4404 | l = i; | 4404 | l = i; |
4405 | } else { | 4405 | } else { |
4406 | p = q + 2; | 4406 | p = q + 2; |
4407 | l -= i + 1; | 4407 | l -= i + 1; |
4408 | } | 4408 | } |
4409 | } | 4409 | } |
4410 | if (++n[1] == n[0]) | 4410 | if (++n[1] == n[0]) |
4411 | return 0; | 4411 | return 0; |
4412 | memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n)); | 4412 | memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n)); |
4413 | p[0] = v; | 4413 | p[0] = v; |
4414 | p[1] = 1; | 4414 | p[1] = 1; |
4415 | return 1; | 4415 | return 1; |
4416 | } | 4416 | } |
4417 | 4417 | ||
4418 | static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) | 4418 | static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) |
4419 | { | 4419 | { |
4420 | void *p; | 4420 | void *p; |
4421 | int i; | 4421 | int i; |
4422 | if (n[0] == n[1]) | 4422 | if (n[0] == n[1]) |
4423 | return; | 4423 | return; |
4424 | for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { | 4424 | for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { |
4425 | if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) | 4425 | if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) |
4426 | continue; | 4426 | continue; |
4427 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | 4427 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) |
4428 | return; | 4428 | return; |
4429 | } | 4429 | } |
4430 | } | 4430 | } |
4431 | 4431 | ||
4432 | static void show_symbol(struct seq_file *m, unsigned long address) | 4432 | static void show_symbol(struct seq_file *m, unsigned long address) |
4433 | { | 4433 | { |
4434 | #ifdef CONFIG_KALLSYMS | 4434 | #ifdef CONFIG_KALLSYMS |
4435 | char *modname; | ||
4436 | const char *name; | ||
4437 | unsigned long offset, size; | 4435 | unsigned long offset, size; |
4438 | char namebuf[KSYM_NAME_LEN+1]; | 4436 | char modname[MODULE_NAME_LEN + 1], name[KSYM_NAME_LEN + 1]; |
4439 | 4437 | ||
4440 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | 4438 | if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) { |
4441 | |||
4442 | if (name) { | ||
4443 | seq_printf(m, "%s+%#lx/%#lx", name, offset, size); | 4439 | seq_printf(m, "%s+%#lx/%#lx", name, offset, size); |
4444 | if (modname) | 4440 | if (modname[0]) |
4445 | seq_printf(m, " [%s]", modname); | 4441 | seq_printf(m, " [%s]", modname); |
4446 | return; | 4442 | return; |
4447 | } | 4443 | } |
4448 | #endif | 4444 | #endif |
4449 | seq_printf(m, "%p", (void *)address); | 4445 | seq_printf(m, "%p", (void *)address); |
4450 | } | 4446 | } |
4451 | 4447 | ||
4452 | static int leaks_show(struct seq_file *m, void *p) | 4448 | static int leaks_show(struct seq_file *m, void *p) |
4453 | { | 4449 | { |
4454 | struct kmem_cache *cachep = p; | 4450 | struct kmem_cache *cachep = p; |
4455 | struct slab *slabp; | 4451 | struct slab *slabp; |
4456 | struct kmem_list3 *l3; | 4452 | struct kmem_list3 *l3; |
4457 | const char *name; | 4453 | const char *name; |
4458 | unsigned long *n = m->private; | 4454 | unsigned long *n = m->private; |
4459 | int node; | 4455 | int node; |
4460 | int i; | 4456 | int i; |
4461 | 4457 | ||
4462 | if (!(cachep->flags & SLAB_STORE_USER)) | 4458 | if (!(cachep->flags & SLAB_STORE_USER)) |
4463 | return 0; | 4459 | return 0; |
4464 | if (!(cachep->flags & SLAB_RED_ZONE)) | 4460 | if (!(cachep->flags & SLAB_RED_ZONE)) |
4465 | return 0; | 4461 | return 0; |
4466 | 4462 | ||
4467 | /* OK, we can do it */ | 4463 | /* OK, we can do it */ |
4468 | 4464 | ||
4469 | n[1] = 0; | 4465 | n[1] = 0; |
4470 | 4466 | ||
4471 | for_each_online_node(node) { | 4467 | for_each_online_node(node) { |
4472 | l3 = cachep->nodelists[node]; | 4468 | l3 = cachep->nodelists[node]; |
4473 | if (!l3) | 4469 | if (!l3) |
4474 | continue; | 4470 | continue; |
4475 | 4471 | ||
4476 | check_irq_on(); | 4472 | check_irq_on(); |
4477 | spin_lock_irq(&l3->list_lock); | 4473 | spin_lock_irq(&l3->list_lock); |
4478 | 4474 | ||
4479 | list_for_each_entry(slabp, &l3->slabs_full, list) | 4475 | list_for_each_entry(slabp, &l3->slabs_full, list) |
4480 | handle_slab(n, cachep, slabp); | 4476 | handle_slab(n, cachep, slabp); |
4481 | list_for_each_entry(slabp, &l3->slabs_partial, list) | 4477 | list_for_each_entry(slabp, &l3->slabs_partial, list) |
4482 | handle_slab(n, cachep, slabp); | 4478 | handle_slab(n, cachep, slabp); |
4483 | spin_unlock_irq(&l3->list_lock); | 4479 | spin_unlock_irq(&l3->list_lock); |
4484 | } | 4480 | } |
4485 | name = cachep->name; | 4481 | name = cachep->name; |
4486 | if (n[0] == n[1]) { | 4482 | if (n[0] == n[1]) { |
4487 | /* Increase the buffer size */ | 4483 | /* Increase the buffer size */ |
4488 | mutex_unlock(&cache_chain_mutex); | 4484 | mutex_unlock(&cache_chain_mutex); |
4489 | m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); | 4485 | m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); |
4490 | if (!m->private) { | 4486 | if (!m->private) { |
4491 | /* Too bad, we are really out */ | 4487 | /* Too bad, we are really out */ |
4492 | m->private = n; | 4488 | m->private = n; |
4493 | mutex_lock(&cache_chain_mutex); | 4489 | mutex_lock(&cache_chain_mutex); |
4494 | return -ENOMEM; | 4490 | return -ENOMEM; |
4495 | } | 4491 | } |
4496 | *(unsigned long *)m->private = n[0] * 2; | 4492 | *(unsigned long *)m->private = n[0] * 2; |
4497 | kfree(n); | 4493 | kfree(n); |
4498 | mutex_lock(&cache_chain_mutex); | 4494 | mutex_lock(&cache_chain_mutex); |
4499 | /* Now make sure this entry will be retried */ | 4495 | /* Now make sure this entry will be retried */ |
4500 | m->count = m->size; | 4496 | m->count = m->size; |
4501 | return 0; | 4497 | return 0; |
4502 | } | 4498 | } |
4503 | for (i = 0; i < n[1]; i++) { | 4499 | for (i = 0; i < n[1]; i++) { |
4504 | seq_printf(m, "%s: %lu ", name, n[2*i+3]); | 4500 | seq_printf(m, "%s: %lu ", name, n[2*i+3]); |
4505 | show_symbol(m, n[2*i+2]); | 4501 | show_symbol(m, n[2*i+2]); |
4506 | seq_putc(m, '\n'); | 4502 | seq_putc(m, '\n'); |
4507 | } | 4503 | } |
4508 | 4504 | ||
4509 | return 0; | 4505 | return 0; |
4510 | } | 4506 | } |
4511 | 4507 | ||
4512 | const struct seq_operations slabstats_op = { | 4508 | const struct seq_operations slabstats_op = { |
4513 | .start = leaks_start, | 4509 | .start = leaks_start, |
4514 | .next = s_next, | 4510 | .next = s_next, |
4515 | .stop = s_stop, | 4511 | .stop = s_stop, |
4516 | .show = leaks_show, | 4512 | .show = leaks_show, |
4517 | }; | 4513 | }; |
4518 | #endif | 4514 | #endif |
4519 | #endif | 4515 | #endif |
4520 | 4516 | ||
4521 | /** | 4517 | /** |
4522 | * ksize - get the actual amount of memory allocated for a given object | 4518 | * ksize - get the actual amount of memory allocated for a given object |
4523 | * @objp: Pointer to the object | 4519 | * @objp: Pointer to the object |
4524 | * | 4520 | * |
4525 | * kmalloc may internally round up allocations and return more memory | 4521 | * kmalloc may internally round up allocations and return more memory |
4526 | * than requested. ksize() can be used to determine the actual amount of | 4522 | * than requested. ksize() can be used to determine the actual amount of |
4527 | * memory allocated. The caller may use this additional memory, even though | 4523 | * memory allocated. The caller may use this additional memory, even though |
4528 | * a smaller amount of memory was initially specified with the kmalloc call. | 4524 | * a smaller amount of memory was initially specified with the kmalloc call. |
4529 | * The caller must guarantee that objp points to a valid object previously | 4525 | * The caller must guarantee that objp points to a valid object previously |
4530 | * allocated with either kmalloc() or kmem_cache_alloc(). The object | 4526 | * allocated with either kmalloc() or kmem_cache_alloc(). The object |
4531 | * must not be freed during the duration of the call. | 4527 | * must not be freed during the duration of the call. |
4532 | */ | 4528 | */ |
4533 | size_t ksize(const void *objp) | 4529 | size_t ksize(const void *objp) |
4534 | { | 4530 | { |
4535 | if (unlikely(objp == NULL)) | 4531 | if (unlikely(objp == NULL)) |
4536 | return 0; | 4532 | return 0; |
4537 | 4533 | ||
4538 | return obj_size(virt_to_cache(objp)); | 4534 | return obj_size(virt_to_cache(objp)); |
4539 | } | 4535 | } |
4540 | 4536 |