Commit a3dd3323058d281abd584b15ad4c5b65064d7a61
kexec: remove KMSG_DUMP_KEXEC
KMSG_DUMP_KEXEC is useless because we already save kernel messages inside /proc/vmcore, and it is unsafe to allow modules to do other stuffs in a crash dump scenario. [akpm@linux-foundation.org: fix powerpc build] Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com> Reported-by: Vivek Goyal <vgoyal@redhat.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Acked-by: Jarod Wilson <jarod@redhat.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 5 changed files with 2 additions and 9 deletions Inline Diff
1 | /* | 1 | /* |
2 | * c 2001 PPC 64 Team, IBM Corp | 2 | * c 2001 PPC 64 Team, IBM Corp |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
6 | * as published by the Free Software Foundation; either version | 6 | * as published by the Free Software Foundation; either version |
7 | * 2 of the License, or (at your option) any later version. | 7 | * 2 of the License, or (at your option) any later version. |
8 | * | 8 | * |
9 | * /dev/nvram driver for PPC64 | 9 | * /dev/nvram driver for PPC64 |
10 | * | 10 | * |
11 | * This perhaps should live in drivers/char | 11 | * This perhaps should live in drivers/char |
12 | */ | 12 | */ |
13 | 13 | ||
14 | 14 | ||
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/kmsg_dump.h> | 20 | #include <linux/kmsg_dump.h> |
21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
22 | #include <linux/zlib.h> | 22 | #include <linux/zlib.h> |
23 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
24 | #include <asm/nvram.h> | 24 | #include <asm/nvram.h> |
25 | #include <asm/rtas.h> | 25 | #include <asm/rtas.h> |
26 | #include <asm/prom.h> | 26 | #include <asm/prom.h> |
27 | #include <asm/machdep.h> | 27 | #include <asm/machdep.h> |
28 | 28 | ||
29 | /* Max bytes to read/write in one go */ | 29 | /* Max bytes to read/write in one go */ |
30 | #define NVRW_CNT 0x20 | 30 | #define NVRW_CNT 0x20 |
31 | 31 | ||
32 | static unsigned int nvram_size; | 32 | static unsigned int nvram_size; |
33 | static int nvram_fetch, nvram_store; | 33 | static int nvram_fetch, nvram_store; |
34 | static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ | 34 | static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ |
35 | static DEFINE_SPINLOCK(nvram_lock); | 35 | static DEFINE_SPINLOCK(nvram_lock); |
36 | 36 | ||
37 | struct err_log_info { | 37 | struct err_log_info { |
38 | int error_type; | 38 | int error_type; |
39 | unsigned int seq_num; | 39 | unsigned int seq_num; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | struct nvram_os_partition { | 42 | struct nvram_os_partition { |
43 | const char *name; | 43 | const char *name; |
44 | int req_size; /* desired size, in bytes */ | 44 | int req_size; /* desired size, in bytes */ |
45 | int min_size; /* minimum acceptable size (0 means req_size) */ | 45 | int min_size; /* minimum acceptable size (0 means req_size) */ |
46 | long size; /* size of data portion (excluding err_log_info) */ | 46 | long size; /* size of data portion (excluding err_log_info) */ |
47 | long index; /* offset of data portion of partition */ | 47 | long index; /* offset of data portion of partition */ |
48 | }; | 48 | }; |
49 | 49 | ||
50 | static struct nvram_os_partition rtas_log_partition = { | 50 | static struct nvram_os_partition rtas_log_partition = { |
51 | .name = "ibm,rtas-log", | 51 | .name = "ibm,rtas-log", |
52 | .req_size = 2079, | 52 | .req_size = 2079, |
53 | .min_size = 1055, | 53 | .min_size = 1055, |
54 | .index = -1 | 54 | .index = -1 |
55 | }; | 55 | }; |
56 | 56 | ||
57 | static struct nvram_os_partition oops_log_partition = { | 57 | static struct nvram_os_partition oops_log_partition = { |
58 | .name = "lnx,oops-log", | 58 | .name = "lnx,oops-log", |
59 | .req_size = 4000, | 59 | .req_size = 4000, |
60 | .min_size = 2000, | 60 | .min_size = 2000, |
61 | .index = -1 | 61 | .index = -1 |
62 | }; | 62 | }; |
63 | 63 | ||
64 | static const char *pseries_nvram_os_partitions[] = { | 64 | static const char *pseries_nvram_os_partitions[] = { |
65 | "ibm,rtas-log", | 65 | "ibm,rtas-log", |
66 | "lnx,oops-log", | 66 | "lnx,oops-log", |
67 | NULL | 67 | NULL |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static void oops_to_nvram(struct kmsg_dumper *dumper, | 70 | static void oops_to_nvram(struct kmsg_dumper *dumper, |
71 | enum kmsg_dump_reason reason, | 71 | enum kmsg_dump_reason reason, |
72 | const char *old_msgs, unsigned long old_len, | 72 | const char *old_msgs, unsigned long old_len, |
73 | const char *new_msgs, unsigned long new_len); | 73 | const char *new_msgs, unsigned long new_len); |
74 | 74 | ||
75 | static struct kmsg_dumper nvram_kmsg_dumper = { | 75 | static struct kmsg_dumper nvram_kmsg_dumper = { |
76 | .dump = oops_to_nvram | 76 | .dump = oops_to_nvram |
77 | }; | 77 | }; |
78 | 78 | ||
79 | /* See clobbering_unread_rtas_event() */ | 79 | /* See clobbering_unread_rtas_event() */ |
80 | #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */ | 80 | #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */ |
81 | static unsigned long last_unread_rtas_event; /* timestamp */ | 81 | static unsigned long last_unread_rtas_event; /* timestamp */ |
82 | 82 | ||
83 | /* | 83 | /* |
84 | * For capturing and compressing an oops or panic report... | 84 | * For capturing and compressing an oops or panic report... |
85 | 85 | ||
86 | * big_oops_buf[] holds the uncompressed text we're capturing. | 86 | * big_oops_buf[] holds the uncompressed text we're capturing. |
87 | * | 87 | * |
88 | * oops_buf[] holds the compressed text, preceded by a prefix. | 88 | * oops_buf[] holds the compressed text, preceded by a prefix. |
89 | * The prefix is just a u16 holding the length of the compressed* text. | 89 | * The prefix is just a u16 holding the length of the compressed* text. |
90 | * (*Or uncompressed, if compression fails.) oops_buf[] gets written | 90 | * (*Or uncompressed, if compression fails.) oops_buf[] gets written |
91 | * to NVRAM. | 91 | * to NVRAM. |
92 | * | 92 | * |
93 | * oops_len points to the prefix. oops_data points to the compressed text. | 93 | * oops_len points to the prefix. oops_data points to the compressed text. |
94 | * | 94 | * |
95 | * +- oops_buf | 95 | * +- oops_buf |
96 | * | +- oops_data | 96 | * | +- oops_data |
97 | * v v | 97 | * v v |
98 | * +------------+-----------------------------------------------+ | 98 | * +------------+-----------------------------------------------+ |
99 | * | length | text | | 99 | * | length | text | |
100 | * | (2 bytes) | (oops_data_sz bytes) | | 100 | * | (2 bytes) | (oops_data_sz bytes) | |
101 | * +------------+-----------------------------------------------+ | 101 | * +------------+-----------------------------------------------+ |
102 | * ^ | 102 | * ^ |
103 | * +- oops_len | 103 | * +- oops_len |
104 | * | 104 | * |
105 | * We preallocate these buffers during init to avoid kmalloc during oops/panic. | 105 | * We preallocate these buffers during init to avoid kmalloc during oops/panic. |
106 | */ | 106 | */ |
107 | static size_t big_oops_buf_sz; | 107 | static size_t big_oops_buf_sz; |
108 | static char *big_oops_buf, *oops_buf; | 108 | static char *big_oops_buf, *oops_buf; |
109 | static u16 *oops_len; | 109 | static u16 *oops_len; |
110 | static char *oops_data; | 110 | static char *oops_data; |
111 | static size_t oops_data_sz; | 111 | static size_t oops_data_sz; |
112 | 112 | ||
113 | /* Compression parameters */ | 113 | /* Compression parameters */ |
114 | #define COMPR_LEVEL 6 | 114 | #define COMPR_LEVEL 6 |
115 | #define WINDOW_BITS 12 | 115 | #define WINDOW_BITS 12 |
116 | #define MEM_LEVEL 4 | 116 | #define MEM_LEVEL 4 |
117 | static struct z_stream_s stream; | 117 | static struct z_stream_s stream; |
118 | 118 | ||
119 | static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) | 119 | static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) |
120 | { | 120 | { |
121 | unsigned int i; | 121 | unsigned int i; |
122 | unsigned long len; | 122 | unsigned long len; |
123 | int done; | 123 | int done; |
124 | unsigned long flags; | 124 | unsigned long flags; |
125 | char *p = buf; | 125 | char *p = buf; |
126 | 126 | ||
127 | 127 | ||
128 | if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) | 128 | if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) |
129 | return -ENODEV; | 129 | return -ENODEV; |
130 | 130 | ||
131 | if (*index >= nvram_size) | 131 | if (*index >= nvram_size) |
132 | return 0; | 132 | return 0; |
133 | 133 | ||
134 | i = *index; | 134 | i = *index; |
135 | if (i + count > nvram_size) | 135 | if (i + count > nvram_size) |
136 | count = nvram_size - i; | 136 | count = nvram_size - i; |
137 | 137 | ||
138 | spin_lock_irqsave(&nvram_lock, flags); | 138 | spin_lock_irqsave(&nvram_lock, flags); |
139 | 139 | ||
140 | for (; count != 0; count -= len) { | 140 | for (; count != 0; count -= len) { |
141 | len = count; | 141 | len = count; |
142 | if (len > NVRW_CNT) | 142 | if (len > NVRW_CNT) |
143 | len = NVRW_CNT; | 143 | len = NVRW_CNT; |
144 | 144 | ||
145 | if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), | 145 | if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), |
146 | len) != 0) || len != done) { | 146 | len) != 0) || len != done) { |
147 | spin_unlock_irqrestore(&nvram_lock, flags); | 147 | spin_unlock_irqrestore(&nvram_lock, flags); |
148 | return -EIO; | 148 | return -EIO; |
149 | } | 149 | } |
150 | 150 | ||
151 | memcpy(p, nvram_buf, len); | 151 | memcpy(p, nvram_buf, len); |
152 | 152 | ||
153 | p += len; | 153 | p += len; |
154 | i += len; | 154 | i += len; |
155 | } | 155 | } |
156 | 156 | ||
157 | spin_unlock_irqrestore(&nvram_lock, flags); | 157 | spin_unlock_irqrestore(&nvram_lock, flags); |
158 | 158 | ||
159 | *index = i; | 159 | *index = i; |
160 | return p - buf; | 160 | return p - buf; |
161 | } | 161 | } |
162 | 162 | ||
163 | static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) | 163 | static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) |
164 | { | 164 | { |
165 | unsigned int i; | 165 | unsigned int i; |
166 | unsigned long len; | 166 | unsigned long len; |
167 | int done; | 167 | int done; |
168 | unsigned long flags; | 168 | unsigned long flags; |
169 | const char *p = buf; | 169 | const char *p = buf; |
170 | 170 | ||
171 | if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) | 171 | if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) |
172 | return -ENODEV; | 172 | return -ENODEV; |
173 | 173 | ||
174 | if (*index >= nvram_size) | 174 | if (*index >= nvram_size) |
175 | return 0; | 175 | return 0; |
176 | 176 | ||
177 | i = *index; | 177 | i = *index; |
178 | if (i + count > nvram_size) | 178 | if (i + count > nvram_size) |
179 | count = nvram_size - i; | 179 | count = nvram_size - i; |
180 | 180 | ||
181 | spin_lock_irqsave(&nvram_lock, flags); | 181 | spin_lock_irqsave(&nvram_lock, flags); |
182 | 182 | ||
183 | for (; count != 0; count -= len) { | 183 | for (; count != 0; count -= len) { |
184 | len = count; | 184 | len = count; |
185 | if (len > NVRW_CNT) | 185 | if (len > NVRW_CNT) |
186 | len = NVRW_CNT; | 186 | len = NVRW_CNT; |
187 | 187 | ||
188 | memcpy(nvram_buf, p, len); | 188 | memcpy(nvram_buf, p, len); |
189 | 189 | ||
190 | if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), | 190 | if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), |
191 | len) != 0) || len != done) { | 191 | len) != 0) || len != done) { |
192 | spin_unlock_irqrestore(&nvram_lock, flags); | 192 | spin_unlock_irqrestore(&nvram_lock, flags); |
193 | return -EIO; | 193 | return -EIO; |
194 | } | 194 | } |
195 | 195 | ||
196 | p += len; | 196 | p += len; |
197 | i += len; | 197 | i += len; |
198 | } | 198 | } |
199 | spin_unlock_irqrestore(&nvram_lock, flags); | 199 | spin_unlock_irqrestore(&nvram_lock, flags); |
200 | 200 | ||
201 | *index = i; | 201 | *index = i; |
202 | return p - buf; | 202 | return p - buf; |
203 | } | 203 | } |
204 | 204 | ||
205 | static ssize_t pSeries_nvram_get_size(void) | 205 | static ssize_t pSeries_nvram_get_size(void) |
206 | { | 206 | { |
207 | return nvram_size ? nvram_size : -ENODEV; | 207 | return nvram_size ? nvram_size : -ENODEV; |
208 | } | 208 | } |
209 | 209 | ||
210 | 210 | ||
211 | /* nvram_write_os_partition, nvram_write_error_log | 211 | /* nvram_write_os_partition, nvram_write_error_log |
212 | * | 212 | * |
213 | * We need to buffer the error logs into nvram to ensure that we have | 213 | * We need to buffer the error logs into nvram to ensure that we have |
214 | * the failure information to decode. If we have a severe error there | 214 | * the failure information to decode. If we have a severe error there |
215 | * is no way to guarantee that the OS or the machine is in a state to | 215 | * is no way to guarantee that the OS or the machine is in a state to |
216 | * get back to user land and write the error to disk. For example if | 216 | * get back to user land and write the error to disk. For example if |
217 | * the SCSI device driver causes a Machine Check by writing to a bad | 217 | * the SCSI device driver causes a Machine Check by writing to a bad |
218 | * IO address, there is no way of guaranteeing that the device driver | 218 | * IO address, there is no way of guaranteeing that the device driver |
219 | * is in any state that is would also be able to write the error data | 219 | * is in any state that is would also be able to write the error data |
220 | * captured to disk, thus we buffer it in NVRAM for analysis on the | 220 | * captured to disk, thus we buffer it in NVRAM for analysis on the |
221 | * next boot. | 221 | * next boot. |
222 | * | 222 | * |
223 | * In NVRAM the partition containing the error log buffer will looks like: | 223 | * In NVRAM the partition containing the error log buffer will looks like: |
224 | * Header (in bytes): | 224 | * Header (in bytes): |
225 | * +-----------+----------+--------+------------+------------------+ | 225 | * +-----------+----------+--------+------------+------------------+ |
226 | * | signature | checksum | length | name | data | | 226 | * | signature | checksum | length | name | data | |
227 | * |0 |1 |2 3|4 15|16 length-1| | 227 | * |0 |1 |2 3|4 15|16 length-1| |
228 | * +-----------+----------+--------+------------+------------------+ | 228 | * +-----------+----------+--------+------------+------------------+ |
229 | * | 229 | * |
230 | * The 'data' section would look like (in bytes): | 230 | * The 'data' section would look like (in bytes): |
231 | * +--------------+------------+-----------------------------------+ | 231 | * +--------------+------------+-----------------------------------+ |
232 | * | event_logged | sequence # | error log | | 232 | * | event_logged | sequence # | error log | |
233 | * |0 3|4 7|8 error_log_size-1| | 233 | * |0 3|4 7|8 error_log_size-1| |
234 | * +--------------+------------+-----------------------------------+ | 234 | * +--------------+------------+-----------------------------------+ |
235 | * | 235 | * |
236 | * event_logged: 0 if event has not been logged to syslog, 1 if it has | 236 | * event_logged: 0 if event has not been logged to syslog, 1 if it has |
237 | * sequence #: The unique sequence # for each event. (until it wraps) | 237 | * sequence #: The unique sequence # for each event. (until it wraps) |
238 | * error log: The error log from event_scan | 238 | * error log: The error log from event_scan |
239 | */ | 239 | */ |
240 | int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, | 240 | int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, |
241 | int length, unsigned int err_type, unsigned int error_log_cnt) | 241 | int length, unsigned int err_type, unsigned int error_log_cnt) |
242 | { | 242 | { |
243 | int rc; | 243 | int rc; |
244 | loff_t tmp_index; | 244 | loff_t tmp_index; |
245 | struct err_log_info info; | 245 | struct err_log_info info; |
246 | 246 | ||
247 | if (part->index == -1) { | 247 | if (part->index == -1) { |
248 | return -ESPIPE; | 248 | return -ESPIPE; |
249 | } | 249 | } |
250 | 250 | ||
251 | if (length > part->size) { | 251 | if (length > part->size) { |
252 | length = part->size; | 252 | length = part->size; |
253 | } | 253 | } |
254 | 254 | ||
255 | info.error_type = err_type; | 255 | info.error_type = err_type; |
256 | info.seq_num = error_log_cnt; | 256 | info.seq_num = error_log_cnt; |
257 | 257 | ||
258 | tmp_index = part->index; | 258 | tmp_index = part->index; |
259 | 259 | ||
260 | rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); | 260 | rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); |
261 | if (rc <= 0) { | 261 | if (rc <= 0) { |
262 | pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); | 262 | pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); |
263 | return rc; | 263 | return rc; |
264 | } | 264 | } |
265 | 265 | ||
266 | rc = ppc_md.nvram_write(buff, length, &tmp_index); | 266 | rc = ppc_md.nvram_write(buff, length, &tmp_index); |
267 | if (rc <= 0) { | 267 | if (rc <= 0) { |
268 | pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); | 268 | pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); |
269 | return rc; | 269 | return rc; |
270 | } | 270 | } |
271 | 271 | ||
272 | return 0; | 272 | return 0; |
273 | } | 273 | } |
274 | 274 | ||
275 | int nvram_write_error_log(char * buff, int length, | 275 | int nvram_write_error_log(char * buff, int length, |
276 | unsigned int err_type, unsigned int error_log_cnt) | 276 | unsigned int err_type, unsigned int error_log_cnt) |
277 | { | 277 | { |
278 | int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, | 278 | int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, |
279 | err_type, error_log_cnt); | 279 | err_type, error_log_cnt); |
280 | if (!rc) | 280 | if (!rc) |
281 | last_unread_rtas_event = get_seconds(); | 281 | last_unread_rtas_event = get_seconds(); |
282 | return rc; | 282 | return rc; |
283 | } | 283 | } |
284 | 284 | ||
285 | /* nvram_read_error_log | 285 | /* nvram_read_error_log |
286 | * | 286 | * |
287 | * Reads nvram for error log for at most 'length' | 287 | * Reads nvram for error log for at most 'length' |
288 | */ | 288 | */ |
289 | int nvram_read_error_log(char * buff, int length, | 289 | int nvram_read_error_log(char * buff, int length, |
290 | unsigned int * err_type, unsigned int * error_log_cnt) | 290 | unsigned int * err_type, unsigned int * error_log_cnt) |
291 | { | 291 | { |
292 | int rc; | 292 | int rc; |
293 | loff_t tmp_index; | 293 | loff_t tmp_index; |
294 | struct err_log_info info; | 294 | struct err_log_info info; |
295 | 295 | ||
296 | if (rtas_log_partition.index == -1) | 296 | if (rtas_log_partition.index == -1) |
297 | return -1; | 297 | return -1; |
298 | 298 | ||
299 | if (length > rtas_log_partition.size) | 299 | if (length > rtas_log_partition.size) |
300 | length = rtas_log_partition.size; | 300 | length = rtas_log_partition.size; |
301 | 301 | ||
302 | tmp_index = rtas_log_partition.index; | 302 | tmp_index = rtas_log_partition.index; |
303 | 303 | ||
304 | rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); | 304 | rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); |
305 | if (rc <= 0) { | 305 | if (rc <= 0) { |
306 | printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); | 306 | printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); |
307 | return rc; | 307 | return rc; |
308 | } | 308 | } |
309 | 309 | ||
310 | rc = ppc_md.nvram_read(buff, length, &tmp_index); | 310 | rc = ppc_md.nvram_read(buff, length, &tmp_index); |
311 | if (rc <= 0) { | 311 | if (rc <= 0) { |
312 | printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); | 312 | printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); |
313 | return rc; | 313 | return rc; |
314 | } | 314 | } |
315 | 315 | ||
316 | *error_log_cnt = info.seq_num; | 316 | *error_log_cnt = info.seq_num; |
317 | *err_type = info.error_type; | 317 | *err_type = info.error_type; |
318 | 318 | ||
319 | return 0; | 319 | return 0; |
320 | } | 320 | } |
321 | 321 | ||
322 | /* This doesn't actually zero anything, but it sets the event_logged | 322 | /* This doesn't actually zero anything, but it sets the event_logged |
323 | * word to tell that this event is safely in syslog. | 323 | * word to tell that this event is safely in syslog. |
324 | */ | 324 | */ |
325 | int nvram_clear_error_log(void) | 325 | int nvram_clear_error_log(void) |
326 | { | 326 | { |
327 | loff_t tmp_index; | 327 | loff_t tmp_index; |
328 | int clear_word = ERR_FLAG_ALREADY_LOGGED; | 328 | int clear_word = ERR_FLAG_ALREADY_LOGGED; |
329 | int rc; | 329 | int rc; |
330 | 330 | ||
331 | if (rtas_log_partition.index == -1) | 331 | if (rtas_log_partition.index == -1) |
332 | return -1; | 332 | return -1; |
333 | 333 | ||
334 | tmp_index = rtas_log_partition.index; | 334 | tmp_index = rtas_log_partition.index; |
335 | 335 | ||
336 | rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); | 336 | rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); |
337 | if (rc <= 0) { | 337 | if (rc <= 0) { |
338 | printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); | 338 | printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); |
339 | return rc; | 339 | return rc; |
340 | } | 340 | } |
341 | last_unread_rtas_event = 0; | 341 | last_unread_rtas_event = 0; |
342 | 342 | ||
343 | return 0; | 343 | return 0; |
344 | } | 344 | } |
345 | 345 | ||
346 | /* pseries_nvram_init_os_partition | 346 | /* pseries_nvram_init_os_partition |
347 | * | 347 | * |
348 | * This sets up a partition with an "OS" signature. | 348 | * This sets up a partition with an "OS" signature. |
349 | * | 349 | * |
350 | * The general strategy is the following: | 350 | * The general strategy is the following: |
351 | * 1.) If a partition with the indicated name already exists... | 351 | * 1.) If a partition with the indicated name already exists... |
352 | * - If it's large enough, use it. | 352 | * - If it's large enough, use it. |
353 | * - Otherwise, recycle it and keep going. | 353 | * - Otherwise, recycle it and keep going. |
354 | * 2.) Search for a free partition that is large enough. | 354 | * 2.) Search for a free partition that is large enough. |
355 | * 3.) If there's not a free partition large enough, recycle any obsolete | 355 | * 3.) If there's not a free partition large enough, recycle any obsolete |
356 | * OS partitions and try again. | 356 | * OS partitions and try again. |
357 | * 4.) Will first try getting a chunk that will satisfy the requested size. | 357 | * 4.) Will first try getting a chunk that will satisfy the requested size. |
358 | * 5.) If a chunk of the requested size cannot be allocated, then try finding | 358 | * 5.) If a chunk of the requested size cannot be allocated, then try finding |
359 | * a chunk that will satisfy the minum needed. | 359 | * a chunk that will satisfy the minum needed. |
360 | * | 360 | * |
361 | * Returns 0 on success, else -1. | 361 | * Returns 0 on success, else -1. |
362 | */ | 362 | */ |
363 | static int __init pseries_nvram_init_os_partition(struct nvram_os_partition | 363 | static int __init pseries_nvram_init_os_partition(struct nvram_os_partition |
364 | *part) | 364 | *part) |
365 | { | 365 | { |
366 | loff_t p; | 366 | loff_t p; |
367 | int size; | 367 | int size; |
368 | 368 | ||
369 | /* Scan nvram for partitions */ | 369 | /* Scan nvram for partitions */ |
370 | nvram_scan_partitions(); | 370 | nvram_scan_partitions(); |
371 | 371 | ||
372 | /* Look for ours */ | 372 | /* Look for ours */ |
373 | p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size); | 373 | p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size); |
374 | 374 | ||
375 | /* Found one but too small, remove it */ | 375 | /* Found one but too small, remove it */ |
376 | if (p && size < part->min_size) { | 376 | if (p && size < part->min_size) { |
377 | pr_info("nvram: Found too small %s partition," | 377 | pr_info("nvram: Found too small %s partition," |
378 | " removing it...\n", part->name); | 378 | " removing it...\n", part->name); |
379 | nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL); | 379 | nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL); |
380 | p = 0; | 380 | p = 0; |
381 | } | 381 | } |
382 | 382 | ||
383 | /* Create one if we didn't find */ | 383 | /* Create one if we didn't find */ |
384 | if (!p) { | 384 | if (!p) { |
385 | p = nvram_create_partition(part->name, NVRAM_SIG_OS, | 385 | p = nvram_create_partition(part->name, NVRAM_SIG_OS, |
386 | part->req_size, part->min_size); | 386 | part->req_size, part->min_size); |
387 | if (p == -ENOSPC) { | 387 | if (p == -ENOSPC) { |
388 | pr_info("nvram: No room to create %s partition, " | 388 | pr_info("nvram: No room to create %s partition, " |
389 | "deleting any obsolete OS partitions...\n", | 389 | "deleting any obsolete OS partitions...\n", |
390 | part->name); | 390 | part->name); |
391 | nvram_remove_partition(NULL, NVRAM_SIG_OS, | 391 | nvram_remove_partition(NULL, NVRAM_SIG_OS, |
392 | pseries_nvram_os_partitions); | 392 | pseries_nvram_os_partitions); |
393 | p = nvram_create_partition(part->name, NVRAM_SIG_OS, | 393 | p = nvram_create_partition(part->name, NVRAM_SIG_OS, |
394 | part->req_size, part->min_size); | 394 | part->req_size, part->min_size); |
395 | } | 395 | } |
396 | } | 396 | } |
397 | 397 | ||
398 | if (p <= 0) { | 398 | if (p <= 0) { |
399 | pr_err("nvram: Failed to find or create %s" | 399 | pr_err("nvram: Failed to find or create %s" |
400 | " partition, err %d\n", part->name, (int)p); | 400 | " partition, err %d\n", part->name, (int)p); |
401 | return -1; | 401 | return -1; |
402 | } | 402 | } |
403 | 403 | ||
404 | part->index = p; | 404 | part->index = p; |
405 | part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info); | 405 | part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info); |
406 | 406 | ||
407 | return 0; | 407 | return 0; |
408 | } | 408 | } |
409 | 409 | ||
410 | static void __init nvram_init_oops_partition(int rtas_partition_exists) | 410 | static void __init nvram_init_oops_partition(int rtas_partition_exists) |
411 | { | 411 | { |
412 | int rc; | 412 | int rc; |
413 | 413 | ||
414 | rc = pseries_nvram_init_os_partition(&oops_log_partition); | 414 | rc = pseries_nvram_init_os_partition(&oops_log_partition); |
415 | if (rc != 0) { | 415 | if (rc != 0) { |
416 | if (!rtas_partition_exists) | 416 | if (!rtas_partition_exists) |
417 | return; | 417 | return; |
418 | pr_notice("nvram: Using %s partition to log both" | 418 | pr_notice("nvram: Using %s partition to log both" |
419 | " RTAS errors and oops/panic reports\n", | 419 | " RTAS errors and oops/panic reports\n", |
420 | rtas_log_partition.name); | 420 | rtas_log_partition.name); |
421 | memcpy(&oops_log_partition, &rtas_log_partition, | 421 | memcpy(&oops_log_partition, &rtas_log_partition, |
422 | sizeof(rtas_log_partition)); | 422 | sizeof(rtas_log_partition)); |
423 | } | 423 | } |
424 | oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL); | 424 | oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL); |
425 | if (!oops_buf) { | 425 | if (!oops_buf) { |
426 | pr_err("nvram: No memory for %s partition\n", | 426 | pr_err("nvram: No memory for %s partition\n", |
427 | oops_log_partition.name); | 427 | oops_log_partition.name); |
428 | return; | 428 | return; |
429 | } | 429 | } |
430 | oops_len = (u16*) oops_buf; | 430 | oops_len = (u16*) oops_buf; |
431 | oops_data = oops_buf + sizeof(u16); | 431 | oops_data = oops_buf + sizeof(u16); |
432 | oops_data_sz = oops_log_partition.size - sizeof(u16); | 432 | oops_data_sz = oops_log_partition.size - sizeof(u16); |
433 | 433 | ||
434 | /* | 434 | /* |
435 | * Figure compression (preceded by elimination of each line's <n> | 435 | * Figure compression (preceded by elimination of each line's <n> |
436 | * severity prefix) will reduce the oops/panic report to at most | 436 | * severity prefix) will reduce the oops/panic report to at most |
437 | * 45% of its original size. | 437 | * 45% of its original size. |
438 | */ | 438 | */ |
439 | big_oops_buf_sz = (oops_data_sz * 100) / 45; | 439 | big_oops_buf_sz = (oops_data_sz * 100) / 45; |
440 | big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); | 440 | big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); |
441 | if (big_oops_buf) { | 441 | if (big_oops_buf) { |
442 | stream.workspace = kmalloc(zlib_deflate_workspacesize( | 442 | stream.workspace = kmalloc(zlib_deflate_workspacesize( |
443 | WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); | 443 | WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); |
444 | if (!stream.workspace) { | 444 | if (!stream.workspace) { |
445 | pr_err("nvram: No memory for compression workspace; " | 445 | pr_err("nvram: No memory for compression workspace; " |
446 | "skipping compression of %s partition data\n", | 446 | "skipping compression of %s partition data\n", |
447 | oops_log_partition.name); | 447 | oops_log_partition.name); |
448 | kfree(big_oops_buf); | 448 | kfree(big_oops_buf); |
449 | big_oops_buf = NULL; | 449 | big_oops_buf = NULL; |
450 | } | 450 | } |
451 | } else { | 451 | } else { |
452 | pr_err("No memory for uncompressed %s data; " | 452 | pr_err("No memory for uncompressed %s data; " |
453 | "skipping compression\n", oops_log_partition.name); | 453 | "skipping compression\n", oops_log_partition.name); |
454 | stream.workspace = NULL; | 454 | stream.workspace = NULL; |
455 | } | 455 | } |
456 | 456 | ||
457 | rc = kmsg_dump_register(&nvram_kmsg_dumper); | 457 | rc = kmsg_dump_register(&nvram_kmsg_dumper); |
458 | if (rc != 0) { | 458 | if (rc != 0) { |
459 | pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); | 459 | pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); |
460 | kfree(oops_buf); | 460 | kfree(oops_buf); |
461 | kfree(big_oops_buf); | 461 | kfree(big_oops_buf); |
462 | kfree(stream.workspace); | 462 | kfree(stream.workspace); |
463 | } | 463 | } |
464 | } | 464 | } |
465 | 465 | ||
466 | static int __init pseries_nvram_init_log_partitions(void) | 466 | static int __init pseries_nvram_init_log_partitions(void) |
467 | { | 467 | { |
468 | int rc; | 468 | int rc; |
469 | 469 | ||
470 | rc = pseries_nvram_init_os_partition(&rtas_log_partition); | 470 | rc = pseries_nvram_init_os_partition(&rtas_log_partition); |
471 | nvram_init_oops_partition(rc == 0); | 471 | nvram_init_oops_partition(rc == 0); |
472 | return 0; | 472 | return 0; |
473 | } | 473 | } |
474 | machine_arch_initcall(pseries, pseries_nvram_init_log_partitions); | 474 | machine_arch_initcall(pseries, pseries_nvram_init_log_partitions); |
475 | 475 | ||
476 | int __init pSeries_nvram_init(void) | 476 | int __init pSeries_nvram_init(void) |
477 | { | 477 | { |
478 | struct device_node *nvram; | 478 | struct device_node *nvram; |
479 | const unsigned int *nbytes_p; | 479 | const unsigned int *nbytes_p; |
480 | unsigned int proplen; | 480 | unsigned int proplen; |
481 | 481 | ||
482 | nvram = of_find_node_by_type(NULL, "nvram"); | 482 | nvram = of_find_node_by_type(NULL, "nvram"); |
483 | if (nvram == NULL) | 483 | if (nvram == NULL) |
484 | return -ENODEV; | 484 | return -ENODEV; |
485 | 485 | ||
486 | nbytes_p = of_get_property(nvram, "#bytes", &proplen); | 486 | nbytes_p = of_get_property(nvram, "#bytes", &proplen); |
487 | if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { | 487 | if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { |
488 | of_node_put(nvram); | 488 | of_node_put(nvram); |
489 | return -EIO; | 489 | return -EIO; |
490 | } | 490 | } |
491 | 491 | ||
492 | nvram_size = *nbytes_p; | 492 | nvram_size = *nbytes_p; |
493 | 493 | ||
494 | nvram_fetch = rtas_token("nvram-fetch"); | 494 | nvram_fetch = rtas_token("nvram-fetch"); |
495 | nvram_store = rtas_token("nvram-store"); | 495 | nvram_store = rtas_token("nvram-store"); |
496 | printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); | 496 | printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); |
497 | of_node_put(nvram); | 497 | of_node_put(nvram); |
498 | 498 | ||
499 | ppc_md.nvram_read = pSeries_nvram_read; | 499 | ppc_md.nvram_read = pSeries_nvram_read; |
500 | ppc_md.nvram_write = pSeries_nvram_write; | 500 | ppc_md.nvram_write = pSeries_nvram_write; |
501 | ppc_md.nvram_size = pSeries_nvram_get_size; | 501 | ppc_md.nvram_size = pSeries_nvram_get_size; |
502 | 502 | ||
503 | return 0; | 503 | return 0; |
504 | } | 504 | } |
505 | 505 | ||
506 | /* | 506 | /* |
507 | * Try to capture the last capture_len bytes of the printk buffer. Return | 507 | * Try to capture the last capture_len bytes of the printk buffer. Return |
508 | * the amount actually captured. | 508 | * the amount actually captured. |
509 | */ | 509 | */ |
510 | static size_t capture_last_msgs(const char *old_msgs, size_t old_len, | 510 | static size_t capture_last_msgs(const char *old_msgs, size_t old_len, |
511 | const char *new_msgs, size_t new_len, | 511 | const char *new_msgs, size_t new_len, |
512 | char *captured, size_t capture_len) | 512 | char *captured, size_t capture_len) |
513 | { | 513 | { |
514 | if (new_len >= capture_len) { | 514 | if (new_len >= capture_len) { |
515 | memcpy(captured, new_msgs + (new_len - capture_len), | 515 | memcpy(captured, new_msgs + (new_len - capture_len), |
516 | capture_len); | 516 | capture_len); |
517 | return capture_len; | 517 | return capture_len; |
518 | } else { | 518 | } else { |
519 | /* Grab the end of old_msgs. */ | 519 | /* Grab the end of old_msgs. */ |
520 | size_t old_tail_len = min(old_len, capture_len - new_len); | 520 | size_t old_tail_len = min(old_len, capture_len - new_len); |
521 | memcpy(captured, old_msgs + (old_len - old_tail_len), | 521 | memcpy(captured, old_msgs + (old_len - old_tail_len), |
522 | old_tail_len); | 522 | old_tail_len); |
523 | memcpy(captured + old_tail_len, new_msgs, new_len); | 523 | memcpy(captured + old_tail_len, new_msgs, new_len); |
524 | return old_tail_len + new_len; | 524 | return old_tail_len + new_len; |
525 | } | 525 | } |
526 | } | 526 | } |
527 | 527 | ||
528 | /* | 528 | /* |
529 | * Are we using the ibm,rtas-log for oops/panic reports? And if so, | 529 | * Are we using the ibm,rtas-log for oops/panic reports? And if so, |
530 | * would logging this oops/panic overwrite an RTAS event that rtas_errd | 530 | * would logging this oops/panic overwrite an RTAS event that rtas_errd |
531 | * hasn't had a chance to read and process? Return 1 if so, else 0. | 531 | * hasn't had a chance to read and process? Return 1 if so, else 0. |
532 | * | 532 | * |
533 | * We assume that if rtas_errd hasn't read the RTAS event in | 533 | * We assume that if rtas_errd hasn't read the RTAS event in |
534 | * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. | 534 | * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. |
535 | */ | 535 | */ |
536 | static int clobbering_unread_rtas_event(void) | 536 | static int clobbering_unread_rtas_event(void) |
537 | { | 537 | { |
538 | return (oops_log_partition.index == rtas_log_partition.index | 538 | return (oops_log_partition.index == rtas_log_partition.index |
539 | && last_unread_rtas_event | 539 | && last_unread_rtas_event |
540 | && get_seconds() - last_unread_rtas_event <= | 540 | && get_seconds() - last_unread_rtas_event <= |
541 | NVRAM_RTAS_READ_TIMEOUT); | 541 | NVRAM_RTAS_READ_TIMEOUT); |
542 | } | 542 | } |
543 | 543 | ||
544 | /* Squeeze out each line's <n> severity prefix. */ | 544 | /* Squeeze out each line's <n> severity prefix. */ |
545 | static size_t elide_severities(char *buf, size_t len) | 545 | static size_t elide_severities(char *buf, size_t len) |
546 | { | 546 | { |
547 | char *in, *out, *buf_end = buf + len; | 547 | char *in, *out, *buf_end = buf + len; |
548 | /* Assume a <n> at the very beginning marks the start of a line. */ | 548 | /* Assume a <n> at the very beginning marks the start of a line. */ |
549 | int newline = 1; | 549 | int newline = 1; |
550 | 550 | ||
551 | in = out = buf; | 551 | in = out = buf; |
552 | while (in < buf_end) { | 552 | while (in < buf_end) { |
553 | if (newline && in+3 <= buf_end && | 553 | if (newline && in+3 <= buf_end && |
554 | *in == '<' && isdigit(in[1]) && in[2] == '>') { | 554 | *in == '<' && isdigit(in[1]) && in[2] == '>') { |
555 | in += 3; | 555 | in += 3; |
556 | newline = 0; | 556 | newline = 0; |
557 | } else { | 557 | } else { |
558 | newline = (*in == '\n'); | 558 | newline = (*in == '\n'); |
559 | *out++ = *in++; | 559 | *out++ = *in++; |
560 | } | 560 | } |
561 | } | 561 | } |
562 | return out - buf; | 562 | return out - buf; |
563 | } | 563 | } |
564 | 564 | ||
565 | /* Derived from logfs_compress() */ | 565 | /* Derived from logfs_compress() */ |
566 | static int nvram_compress(const void *in, void *out, size_t inlen, | 566 | static int nvram_compress(const void *in, void *out, size_t inlen, |
567 | size_t outlen) | 567 | size_t outlen) |
568 | { | 568 | { |
569 | int err, ret; | 569 | int err, ret; |
570 | 570 | ||
571 | ret = -EIO; | 571 | ret = -EIO; |
572 | err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, | 572 | err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, |
573 | MEM_LEVEL, Z_DEFAULT_STRATEGY); | 573 | MEM_LEVEL, Z_DEFAULT_STRATEGY); |
574 | if (err != Z_OK) | 574 | if (err != Z_OK) |
575 | goto error; | 575 | goto error; |
576 | 576 | ||
577 | stream.next_in = in; | 577 | stream.next_in = in; |
578 | stream.avail_in = inlen; | 578 | stream.avail_in = inlen; |
579 | stream.total_in = 0; | 579 | stream.total_in = 0; |
580 | stream.next_out = out; | 580 | stream.next_out = out; |
581 | stream.avail_out = outlen; | 581 | stream.avail_out = outlen; |
582 | stream.total_out = 0; | 582 | stream.total_out = 0; |
583 | 583 | ||
584 | err = zlib_deflate(&stream, Z_FINISH); | 584 | err = zlib_deflate(&stream, Z_FINISH); |
585 | if (err != Z_STREAM_END) | 585 | if (err != Z_STREAM_END) |
586 | goto error; | 586 | goto error; |
587 | 587 | ||
588 | err = zlib_deflateEnd(&stream); | 588 | err = zlib_deflateEnd(&stream); |
589 | if (err != Z_OK) | 589 | if (err != Z_OK) |
590 | goto error; | 590 | goto error; |
591 | 591 | ||
592 | if (stream.total_out >= stream.total_in) | 592 | if (stream.total_out >= stream.total_in) |
593 | goto error; | 593 | goto error; |
594 | 594 | ||
595 | ret = stream.total_out; | 595 | ret = stream.total_out; |
596 | error: | 596 | error: |
597 | return ret; | 597 | return ret; |
598 | } | 598 | } |
599 | 599 | ||
600 | /* Compress the text from big_oops_buf into oops_buf. */ | 600 | /* Compress the text from big_oops_buf into oops_buf. */ |
601 | static int zip_oops(size_t text_len) | 601 | static int zip_oops(size_t text_len) |
602 | { | 602 | { |
603 | int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, | 603 | int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, |
604 | oops_data_sz); | 604 | oops_data_sz); |
605 | if (zipped_len < 0) { | 605 | if (zipped_len < 0) { |
606 | pr_err("nvram: compression failed; returned %d\n", zipped_len); | 606 | pr_err("nvram: compression failed; returned %d\n", zipped_len); |
607 | pr_err("nvram: logging uncompressed oops/panic report\n"); | 607 | pr_err("nvram: logging uncompressed oops/panic report\n"); |
608 | return -1; | 608 | return -1; |
609 | } | 609 | } |
610 | *oops_len = (u16) zipped_len; | 610 | *oops_len = (u16) zipped_len; |
611 | return 0; | 611 | return 0; |
612 | } | 612 | } |
613 | 613 | ||
614 | /* | 614 | /* |
615 | * This is our kmsg_dump callback, called after an oops or panic report | 615 | * This is our kmsg_dump callback, called after an oops or panic report |
616 | * has been written to the printk buffer. We want to capture as much | 616 | * has been written to the printk buffer. We want to capture as much |
617 | * of the printk buffer as possible. First, capture as much as we can | 617 | * of the printk buffer as possible. First, capture as much as we can |
618 | * that we think will compress sufficiently to fit in the lnx,oops-log | 618 | * that we think will compress sufficiently to fit in the lnx,oops-log |
619 | * partition. If that's too much, go back and capture uncompressed text. | 619 | * partition. If that's too much, go back and capture uncompressed text. |
620 | */ | 620 | */ |
621 | static void oops_to_nvram(struct kmsg_dumper *dumper, | 621 | static void oops_to_nvram(struct kmsg_dumper *dumper, |
622 | enum kmsg_dump_reason reason, | 622 | enum kmsg_dump_reason reason, |
623 | const char *old_msgs, unsigned long old_len, | 623 | const char *old_msgs, unsigned long old_len, |
624 | const char *new_msgs, unsigned long new_len) | 624 | const char *new_msgs, unsigned long new_len) |
625 | { | 625 | { |
626 | static unsigned int oops_count = 0; | 626 | static unsigned int oops_count = 0; |
627 | static bool panicking = false; | 627 | static bool panicking = false; |
628 | static DEFINE_SPINLOCK(lock); | 628 | static DEFINE_SPINLOCK(lock); |
629 | unsigned long flags; | 629 | unsigned long flags; |
630 | size_t text_len; | 630 | size_t text_len; |
631 | unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; | 631 | unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; |
632 | int rc = -1; | 632 | int rc = -1; |
633 | 633 | ||
634 | switch (reason) { | 634 | switch (reason) { |
635 | case KMSG_DUMP_RESTART: | 635 | case KMSG_DUMP_RESTART: |
636 | case KMSG_DUMP_HALT: | 636 | case KMSG_DUMP_HALT: |
637 | case KMSG_DUMP_POWEROFF: | 637 | case KMSG_DUMP_POWEROFF: |
638 | /* These are almost always orderly shutdowns. */ | 638 | /* These are almost always orderly shutdowns. */ |
639 | return; | 639 | return; |
640 | case KMSG_DUMP_OOPS: | 640 | case KMSG_DUMP_OOPS: |
641 | case KMSG_DUMP_KEXEC: | ||
642 | break; | 641 | break; |
643 | case KMSG_DUMP_PANIC: | 642 | case KMSG_DUMP_PANIC: |
644 | panicking = true; | 643 | panicking = true; |
645 | break; | 644 | break; |
646 | case KMSG_DUMP_EMERG: | 645 | case KMSG_DUMP_EMERG: |
647 | if (panicking) | 646 | if (panicking) |
648 | /* Panic report already captured. */ | 647 | /* Panic report already captured. */ |
649 | return; | 648 | return; |
650 | break; | 649 | break; |
651 | default: | 650 | default: |
652 | pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", | 651 | pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", |
653 | __FUNCTION__, (int) reason); | 652 | __FUNCTION__, (int) reason); |
654 | return; | 653 | return; |
655 | } | 654 | } |
656 | 655 | ||
657 | if (clobbering_unread_rtas_event()) | 656 | if (clobbering_unread_rtas_event()) |
658 | return; | 657 | return; |
659 | 658 | ||
660 | if (!spin_trylock_irqsave(&lock, flags)) | 659 | if (!spin_trylock_irqsave(&lock, flags)) |
661 | return; | 660 | return; |
662 | 661 | ||
663 | if (big_oops_buf) { | 662 | if (big_oops_buf) { |
664 | text_len = capture_last_msgs(old_msgs, old_len, | 663 | text_len = capture_last_msgs(old_msgs, old_len, |
665 | new_msgs, new_len, big_oops_buf, big_oops_buf_sz); | 664 | new_msgs, new_len, big_oops_buf, big_oops_buf_sz); |
666 | text_len = elide_severities(big_oops_buf, text_len); | 665 | text_len = elide_severities(big_oops_buf, text_len); |
667 | rc = zip_oops(text_len); | 666 | rc = zip_oops(text_len); |
668 | } | 667 | } |
669 | if (rc != 0) { | 668 | if (rc != 0) { |
670 | text_len = capture_last_msgs(old_msgs, old_len, | 669 | text_len = capture_last_msgs(old_msgs, old_len, |
671 | new_msgs, new_len, oops_data, oops_data_sz); | 670 | new_msgs, new_len, oops_data, oops_data_sz); |
672 | err_type = ERR_TYPE_KERNEL_PANIC; | 671 | err_type = ERR_TYPE_KERNEL_PANIC; |
673 | *oops_len = (u16) text_len; | 672 | *oops_len = (u16) text_len; |
674 | } | 673 | } |
675 | 674 | ||
676 | (void) nvram_write_os_partition(&oops_log_partition, oops_buf, | 675 | (void) nvram_write_os_partition(&oops_log_partition, oops_buf, |
677 | (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); | 676 | (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); |
678 | 677 | ||
679 | spin_unlock_irqrestore(&lock, flags); | 678 | spin_unlock_irqrestore(&lock, flags); |
680 | } | 679 | } |
681 | 680 |
1 | /* | 1 | /* |
2 | * RAM Oops/Panic logger | 2 | * RAM Oops/Panic logger |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com> | 4 | * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License | 7 | * modify it under the terms of the GNU General Public License |
8 | * version 2 as published by the Free Software Foundation. | 8 | * version 2 as published by the Free Software Foundation. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, but | 10 | * This program is distributed in the hope that it will be useful, but |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * General Public License for more details. | 13 | * General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
18 | * 02110-1301 USA | 18 | * 02110-1301 USA |
19 | * | 19 | * |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 22 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
23 | 23 | ||
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
27 | #include <linux/kmsg_dump.h> | 27 | #include <linux/kmsg_dump.h> |
28 | #include <linux/time.h> | 28 | #include <linux/time.h> |
29 | #include <linux/err.h> | 29 | #include <linux/err.h> |
30 | #include <linux/io.h> | 30 | #include <linux/io.h> |
31 | #include <linux/ioport.h> | 31 | #include <linux/ioport.h> |
32 | #include <linux/platform_device.h> | 32 | #include <linux/platform_device.h> |
33 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
34 | #include <linux/ramoops.h> | 34 | #include <linux/ramoops.h> |
35 | 35 | ||
36 | #define RAMOOPS_KERNMSG_HDR "====" | 36 | #define RAMOOPS_KERNMSG_HDR "====" |
37 | #define MIN_MEM_SIZE 4096UL | 37 | #define MIN_MEM_SIZE 4096UL |
38 | 38 | ||
39 | static ulong record_size = MIN_MEM_SIZE; | 39 | static ulong record_size = MIN_MEM_SIZE; |
40 | module_param(record_size, ulong, 0400); | 40 | module_param(record_size, ulong, 0400); |
41 | MODULE_PARM_DESC(record_size, | 41 | MODULE_PARM_DESC(record_size, |
42 | "size of each dump done on oops/panic"); | 42 | "size of each dump done on oops/panic"); |
43 | 43 | ||
44 | static ulong mem_address; | 44 | static ulong mem_address; |
45 | module_param(mem_address, ulong, 0400); | 45 | module_param(mem_address, ulong, 0400); |
46 | MODULE_PARM_DESC(mem_address, | 46 | MODULE_PARM_DESC(mem_address, |
47 | "start of reserved RAM used to store oops/panic logs"); | 47 | "start of reserved RAM used to store oops/panic logs"); |
48 | 48 | ||
49 | static ulong mem_size; | 49 | static ulong mem_size; |
50 | module_param(mem_size, ulong, 0400); | 50 | module_param(mem_size, ulong, 0400); |
51 | MODULE_PARM_DESC(mem_size, | 51 | MODULE_PARM_DESC(mem_size, |
52 | "size of reserved RAM used to store oops/panic logs"); | 52 | "size of reserved RAM used to store oops/panic logs"); |
53 | 53 | ||
54 | static int dump_oops = 1; | 54 | static int dump_oops = 1; |
55 | module_param(dump_oops, int, 0600); | 55 | module_param(dump_oops, int, 0600); |
56 | MODULE_PARM_DESC(dump_oops, | 56 | MODULE_PARM_DESC(dump_oops, |
57 | "set to 1 to dump oopses, 0 to only dump panics (default 1)"); | 57 | "set to 1 to dump oopses, 0 to only dump panics (default 1)"); |
58 | 58 | ||
59 | static struct ramoops_context { | 59 | static struct ramoops_context { |
60 | struct kmsg_dumper dump; | 60 | struct kmsg_dumper dump; |
61 | void *virt_addr; | 61 | void *virt_addr; |
62 | phys_addr_t phys_addr; | 62 | phys_addr_t phys_addr; |
63 | unsigned long size; | 63 | unsigned long size; |
64 | unsigned long record_size; | 64 | unsigned long record_size; |
65 | int dump_oops; | 65 | int dump_oops; |
66 | int count; | 66 | int count; |
67 | int max_count; | 67 | int max_count; |
68 | } oops_cxt; | 68 | } oops_cxt; |
69 | 69 | ||
70 | static struct platform_device *dummy; | 70 | static struct platform_device *dummy; |
71 | static struct ramoops_platform_data *dummy_data; | 71 | static struct ramoops_platform_data *dummy_data; |
72 | 72 | ||
73 | static void ramoops_do_dump(struct kmsg_dumper *dumper, | 73 | static void ramoops_do_dump(struct kmsg_dumper *dumper, |
74 | enum kmsg_dump_reason reason, const char *s1, unsigned long l1, | 74 | enum kmsg_dump_reason reason, const char *s1, unsigned long l1, |
75 | const char *s2, unsigned long l2) | 75 | const char *s2, unsigned long l2) |
76 | { | 76 | { |
77 | struct ramoops_context *cxt = container_of(dumper, | 77 | struct ramoops_context *cxt = container_of(dumper, |
78 | struct ramoops_context, dump); | 78 | struct ramoops_context, dump); |
79 | unsigned long s1_start, s2_start; | 79 | unsigned long s1_start, s2_start; |
80 | unsigned long l1_cpy, l2_cpy; | 80 | unsigned long l1_cpy, l2_cpy; |
81 | int res, hdr_size; | 81 | int res, hdr_size; |
82 | char *buf, *buf_orig; | 82 | char *buf, *buf_orig; |
83 | struct timeval timestamp; | 83 | struct timeval timestamp; |
84 | 84 | ||
85 | if (reason != KMSG_DUMP_OOPS && | 85 | if (reason != KMSG_DUMP_OOPS && |
86 | reason != KMSG_DUMP_PANIC && | 86 | reason != KMSG_DUMP_PANIC) |
87 | reason != KMSG_DUMP_KEXEC) | ||
88 | return; | 87 | return; |
89 | 88 | ||
90 | /* Only dump oopses if dump_oops is set */ | 89 | /* Only dump oopses if dump_oops is set */ |
91 | if (reason == KMSG_DUMP_OOPS && !cxt->dump_oops) | 90 | if (reason == KMSG_DUMP_OOPS && !cxt->dump_oops) |
92 | return; | 91 | return; |
93 | 92 | ||
94 | buf = cxt->virt_addr + (cxt->count * cxt->record_size); | 93 | buf = cxt->virt_addr + (cxt->count * cxt->record_size); |
95 | buf_orig = buf; | 94 | buf_orig = buf; |
96 | 95 | ||
97 | memset(buf, '\0', cxt->record_size); | 96 | memset(buf, '\0', cxt->record_size); |
98 | res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR); | 97 | res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR); |
99 | buf += res; | 98 | buf += res; |
100 | do_gettimeofday(×tamp); | 99 | do_gettimeofday(×tamp); |
101 | res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec); | 100 | res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec); |
102 | buf += res; | 101 | buf += res; |
103 | 102 | ||
104 | hdr_size = buf - buf_orig; | 103 | hdr_size = buf - buf_orig; |
105 | l2_cpy = min(l2, cxt->record_size - hdr_size); | 104 | l2_cpy = min(l2, cxt->record_size - hdr_size); |
106 | l1_cpy = min(l1, cxt->record_size - hdr_size - l2_cpy); | 105 | l1_cpy = min(l1, cxt->record_size - hdr_size - l2_cpy); |
107 | 106 | ||
108 | s2_start = l2 - l2_cpy; | 107 | s2_start = l2 - l2_cpy; |
109 | s1_start = l1 - l1_cpy; | 108 | s1_start = l1 - l1_cpy; |
110 | 109 | ||
111 | memcpy(buf, s1 + s1_start, l1_cpy); | 110 | memcpy(buf, s1 + s1_start, l1_cpy); |
112 | memcpy(buf + l1_cpy, s2 + s2_start, l2_cpy); | 111 | memcpy(buf + l1_cpy, s2 + s2_start, l2_cpy); |
113 | 112 | ||
114 | cxt->count = (cxt->count + 1) % cxt->max_count; | 113 | cxt->count = (cxt->count + 1) % cxt->max_count; |
115 | } | 114 | } |
116 | 115 | ||
117 | static int __init ramoops_probe(struct platform_device *pdev) | 116 | static int __init ramoops_probe(struct platform_device *pdev) |
118 | { | 117 | { |
119 | struct ramoops_platform_data *pdata = pdev->dev.platform_data; | 118 | struct ramoops_platform_data *pdata = pdev->dev.platform_data; |
120 | struct ramoops_context *cxt = &oops_cxt; | 119 | struct ramoops_context *cxt = &oops_cxt; |
121 | int err = -EINVAL; | 120 | int err = -EINVAL; |
122 | 121 | ||
123 | if (!pdata->mem_size || !pdata->record_size) { | 122 | if (!pdata->mem_size || !pdata->record_size) { |
124 | pr_err("The memory size and the record size must be " | 123 | pr_err("The memory size and the record size must be " |
125 | "non-zero\n"); | 124 | "non-zero\n"); |
126 | goto fail3; | 125 | goto fail3; |
127 | } | 126 | } |
128 | 127 | ||
129 | rounddown_pow_of_two(pdata->mem_size); | 128 | rounddown_pow_of_two(pdata->mem_size); |
130 | rounddown_pow_of_two(pdata->record_size); | 129 | rounddown_pow_of_two(pdata->record_size); |
131 | 130 | ||
132 | /* Check for the minimum memory size */ | 131 | /* Check for the minimum memory size */ |
133 | if (pdata->mem_size < MIN_MEM_SIZE && | 132 | if (pdata->mem_size < MIN_MEM_SIZE && |
134 | pdata->record_size < MIN_MEM_SIZE) { | 133 | pdata->record_size < MIN_MEM_SIZE) { |
135 | pr_err("memory size too small, minium is %lu\n", MIN_MEM_SIZE); | 134 | pr_err("memory size too small, minium is %lu\n", MIN_MEM_SIZE); |
136 | goto fail3; | 135 | goto fail3; |
137 | } | 136 | } |
138 | 137 | ||
139 | if (pdata->mem_size < pdata->record_size) { | 138 | if (pdata->mem_size < pdata->record_size) { |
140 | pr_err("The memory size must be larger than the " | 139 | pr_err("The memory size must be larger than the " |
141 | "records size\n"); | 140 | "records size\n"); |
142 | goto fail3; | 141 | goto fail3; |
143 | } | 142 | } |
144 | 143 | ||
145 | cxt->max_count = pdata->mem_size / pdata->record_size; | 144 | cxt->max_count = pdata->mem_size / pdata->record_size; |
146 | cxt->count = 0; | 145 | cxt->count = 0; |
147 | cxt->size = pdata->mem_size; | 146 | cxt->size = pdata->mem_size; |
148 | cxt->phys_addr = pdata->mem_address; | 147 | cxt->phys_addr = pdata->mem_address; |
149 | cxt->record_size = pdata->record_size; | 148 | cxt->record_size = pdata->record_size; |
150 | cxt->dump_oops = pdata->dump_oops; | 149 | cxt->dump_oops = pdata->dump_oops; |
151 | /* | 150 | /* |
152 | * Update the module parameter variables as well so they are visible | 151 | * Update the module parameter variables as well so they are visible |
153 | * through /sys/module/ramoops/parameters/ | 152 | * through /sys/module/ramoops/parameters/ |
154 | */ | 153 | */ |
155 | mem_size = pdata->mem_size; | 154 | mem_size = pdata->mem_size; |
156 | mem_address = pdata->mem_address; | 155 | mem_address = pdata->mem_address; |
157 | record_size = pdata->record_size; | 156 | record_size = pdata->record_size; |
158 | dump_oops = pdata->dump_oops; | 157 | dump_oops = pdata->dump_oops; |
159 | 158 | ||
160 | if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { | 159 | if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { |
161 | pr_err("request mem region failed\n"); | 160 | pr_err("request mem region failed\n"); |
162 | err = -EINVAL; | 161 | err = -EINVAL; |
163 | goto fail3; | 162 | goto fail3; |
164 | } | 163 | } |
165 | 164 | ||
166 | cxt->virt_addr = ioremap(cxt->phys_addr, cxt->size); | 165 | cxt->virt_addr = ioremap(cxt->phys_addr, cxt->size); |
167 | if (!cxt->virt_addr) { | 166 | if (!cxt->virt_addr) { |
168 | pr_err("ioremap failed\n"); | 167 | pr_err("ioremap failed\n"); |
169 | goto fail2; | 168 | goto fail2; |
170 | } | 169 | } |
171 | 170 | ||
172 | cxt->dump.dump = ramoops_do_dump; | 171 | cxt->dump.dump = ramoops_do_dump; |
173 | err = kmsg_dump_register(&cxt->dump); | 172 | err = kmsg_dump_register(&cxt->dump); |
174 | if (err) { | 173 | if (err) { |
175 | pr_err("registering kmsg dumper failed\n"); | 174 | pr_err("registering kmsg dumper failed\n"); |
176 | goto fail1; | 175 | goto fail1; |
177 | } | 176 | } |
178 | 177 | ||
179 | return 0; | 178 | return 0; |
180 | 179 | ||
181 | fail1: | 180 | fail1: |
182 | iounmap(cxt->virt_addr); | 181 | iounmap(cxt->virt_addr); |
183 | fail2: | 182 | fail2: |
184 | release_mem_region(cxt->phys_addr, cxt->size); | 183 | release_mem_region(cxt->phys_addr, cxt->size); |
185 | fail3: | 184 | fail3: |
186 | return err; | 185 | return err; |
187 | } | 186 | } |
188 | 187 | ||
189 | static int __exit ramoops_remove(struct platform_device *pdev) | 188 | static int __exit ramoops_remove(struct platform_device *pdev) |
190 | { | 189 | { |
191 | struct ramoops_context *cxt = &oops_cxt; | 190 | struct ramoops_context *cxt = &oops_cxt; |
192 | 191 | ||
193 | if (kmsg_dump_unregister(&cxt->dump) < 0) | 192 | if (kmsg_dump_unregister(&cxt->dump) < 0) |
194 | pr_warn("could not unregister kmsg_dumper\n"); | 193 | pr_warn("could not unregister kmsg_dumper\n"); |
195 | 194 | ||
196 | iounmap(cxt->virt_addr); | 195 | iounmap(cxt->virt_addr); |
197 | release_mem_region(cxt->phys_addr, cxt->size); | 196 | release_mem_region(cxt->phys_addr, cxt->size); |
198 | return 0; | 197 | return 0; |
199 | } | 198 | } |
200 | 199 | ||
201 | static struct platform_driver ramoops_driver = { | 200 | static struct platform_driver ramoops_driver = { |
202 | .remove = __exit_p(ramoops_remove), | 201 | .remove = __exit_p(ramoops_remove), |
203 | .driver = { | 202 | .driver = { |
204 | .name = "ramoops", | 203 | .name = "ramoops", |
205 | .owner = THIS_MODULE, | 204 | .owner = THIS_MODULE, |
206 | }, | 205 | }, |
207 | }; | 206 | }; |
208 | 207 | ||
209 | static int __init ramoops_init(void) | 208 | static int __init ramoops_init(void) |
210 | { | 209 | { |
211 | int ret; | 210 | int ret; |
212 | ret = platform_driver_probe(&ramoops_driver, ramoops_probe); | 211 | ret = platform_driver_probe(&ramoops_driver, ramoops_probe); |
213 | if (ret == -ENODEV) { | 212 | if (ret == -ENODEV) { |
214 | /* | 213 | /* |
215 | * If we didn't find a platform device, we use module parameters | 214 | * If we didn't find a platform device, we use module parameters |
216 | * building platform data on the fly. | 215 | * building platform data on the fly. |
217 | */ | 216 | */ |
218 | pr_info("platform device not found, using module parameters\n"); | 217 | pr_info("platform device not found, using module parameters\n"); |
219 | dummy_data = kzalloc(sizeof(struct ramoops_platform_data), | 218 | dummy_data = kzalloc(sizeof(struct ramoops_platform_data), |
220 | GFP_KERNEL); | 219 | GFP_KERNEL); |
221 | if (!dummy_data) | 220 | if (!dummy_data) |
222 | return -ENOMEM; | 221 | return -ENOMEM; |
223 | dummy_data->mem_size = mem_size; | 222 | dummy_data->mem_size = mem_size; |
224 | dummy_data->mem_address = mem_address; | 223 | dummy_data->mem_address = mem_address; |
225 | dummy_data->record_size = record_size; | 224 | dummy_data->record_size = record_size; |
226 | dummy_data->dump_oops = dump_oops; | 225 | dummy_data->dump_oops = dump_oops; |
227 | dummy = platform_create_bundle(&ramoops_driver, ramoops_probe, | 226 | dummy = platform_create_bundle(&ramoops_driver, ramoops_probe, |
228 | NULL, 0, dummy_data, | 227 | NULL, 0, dummy_data, |
229 | sizeof(struct ramoops_platform_data)); | 228 | sizeof(struct ramoops_platform_data)); |
230 | 229 | ||
231 | if (IS_ERR(dummy)) | 230 | if (IS_ERR(dummy)) |
232 | ret = PTR_ERR(dummy); | 231 | ret = PTR_ERR(dummy); |
233 | else | 232 | else |
234 | ret = 0; | 233 | ret = 0; |
235 | } | 234 | } |
236 | 235 | ||
237 | return ret; | 236 | return ret; |
238 | } | 237 | } |
239 | 238 | ||
240 | static void __exit ramoops_exit(void) | 239 | static void __exit ramoops_exit(void) |
241 | { | 240 | { |
242 | platform_driver_unregister(&ramoops_driver); | 241 | platform_driver_unregister(&ramoops_driver); |
243 | kfree(dummy_data); | 242 | kfree(dummy_data); |
244 | } | 243 | } |
245 | 244 | ||
246 | module_init(ramoops_init); | 245 | module_init(ramoops_init); |
247 | module_exit(ramoops_exit); | 246 | module_exit(ramoops_exit); |
248 | 247 | ||
249 | MODULE_LICENSE("GPL"); | 248 | MODULE_LICENSE("GPL"); |
250 | MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>"); | 249 | MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>"); |
251 | MODULE_DESCRIPTION("RAM Oops/Panic logger/driver"); | 250 | MODULE_DESCRIPTION("RAM Oops/Panic logger/driver"); |
252 | 251 |
1 | /* | 1 | /* |
2 | * MTD Oops/Panic logger | 2 | * MTD Oops/Panic logger |
3 | * | 3 | * |
4 | * Copyright ยฉ 2007 Nokia Corporation. All rights reserved. | 4 | * Copyright ยฉ 2007 Nokia Corporation. All rights reserved. |
5 | * | 5 | * |
6 | * Author: Richard Purdie <rpurdie@openedhand.com> | 6 | * Author: Richard Purdie <rpurdie@openedhand.com> |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License | 9 | * modify it under the terms of the GNU General Public License |
10 | * version 2 as published by the Free Software Foundation. | 10 | * version 2 as published by the Free Software Foundation. |
11 | * | 11 | * |
12 | * This program is distributed in the hope that it will be useful, but | 12 | * This program is distributed in the hope that it will be useful, but |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 13 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * General Public License for more details. | 15 | * General Public License for more details. |
16 | * | 16 | * |
17 | * You should have received a copy of the GNU General Public License | 17 | * You should have received a copy of the GNU General Public License |
18 | * along with this program; if not, write to the Free Software | 18 | * along with this program; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | 19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
20 | * 02110-1301 USA | 20 | * 02110-1301 USA |
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <linux/console.h> | 26 | #include <linux/console.h> |
27 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
28 | #include <linux/workqueue.h> | 28 | #include <linux/workqueue.h> |
29 | #include <linux/sched.h> | 29 | #include <linux/sched.h> |
30 | #include <linux/wait.h> | 30 | #include <linux/wait.h> |
31 | #include <linux/delay.h> | 31 | #include <linux/delay.h> |
32 | #include <linux/interrupt.h> | 32 | #include <linux/interrupt.h> |
33 | #include <linux/mtd/mtd.h> | 33 | #include <linux/mtd/mtd.h> |
34 | #include <linux/kmsg_dump.h> | 34 | #include <linux/kmsg_dump.h> |
35 | 35 | ||
36 | /* Maximum MTD partition size */ | 36 | /* Maximum MTD partition size */ |
37 | #define MTDOOPS_MAX_MTD_SIZE (8 * 1024 * 1024) | 37 | #define MTDOOPS_MAX_MTD_SIZE (8 * 1024 * 1024) |
38 | 38 | ||
39 | #define MTDOOPS_KERNMSG_MAGIC 0x5d005d00 | 39 | #define MTDOOPS_KERNMSG_MAGIC 0x5d005d00 |
40 | #define MTDOOPS_HEADER_SIZE 8 | 40 | #define MTDOOPS_HEADER_SIZE 8 |
41 | 41 | ||
42 | static unsigned long record_size = 4096; | 42 | static unsigned long record_size = 4096; |
43 | module_param(record_size, ulong, 0400); | 43 | module_param(record_size, ulong, 0400); |
44 | MODULE_PARM_DESC(record_size, | 44 | MODULE_PARM_DESC(record_size, |
45 | "record size for MTD OOPS pages in bytes (default 4096)"); | 45 | "record size for MTD OOPS pages in bytes (default 4096)"); |
46 | 46 | ||
47 | static char mtddev[80]; | 47 | static char mtddev[80]; |
48 | module_param_string(mtddev, mtddev, 80, 0400); | 48 | module_param_string(mtddev, mtddev, 80, 0400); |
49 | MODULE_PARM_DESC(mtddev, | 49 | MODULE_PARM_DESC(mtddev, |
50 | "name or index number of the MTD device to use"); | 50 | "name or index number of the MTD device to use"); |
51 | 51 | ||
52 | static int dump_oops = 1; | 52 | static int dump_oops = 1; |
53 | module_param(dump_oops, int, 0600); | 53 | module_param(dump_oops, int, 0600); |
54 | MODULE_PARM_DESC(dump_oops, | 54 | MODULE_PARM_DESC(dump_oops, |
55 | "set to 1 to dump oopses, 0 to only dump panics (default 1)"); | 55 | "set to 1 to dump oopses, 0 to only dump panics (default 1)"); |
56 | 56 | ||
57 | static struct mtdoops_context { | 57 | static struct mtdoops_context { |
58 | struct kmsg_dumper dump; | 58 | struct kmsg_dumper dump; |
59 | 59 | ||
60 | int mtd_index; | 60 | int mtd_index; |
61 | struct work_struct work_erase; | 61 | struct work_struct work_erase; |
62 | struct work_struct work_write; | 62 | struct work_struct work_write; |
63 | struct mtd_info *mtd; | 63 | struct mtd_info *mtd; |
64 | int oops_pages; | 64 | int oops_pages; |
65 | int nextpage; | 65 | int nextpage; |
66 | int nextcount; | 66 | int nextcount; |
67 | unsigned long *oops_page_used; | 67 | unsigned long *oops_page_used; |
68 | 68 | ||
69 | void *oops_buf; | 69 | void *oops_buf; |
70 | } oops_cxt; | 70 | } oops_cxt; |
71 | 71 | ||
72 | static void mark_page_used(struct mtdoops_context *cxt, int page) | 72 | static void mark_page_used(struct mtdoops_context *cxt, int page) |
73 | { | 73 | { |
74 | set_bit(page, cxt->oops_page_used); | 74 | set_bit(page, cxt->oops_page_used); |
75 | } | 75 | } |
76 | 76 | ||
77 | static void mark_page_unused(struct mtdoops_context *cxt, int page) | 77 | static void mark_page_unused(struct mtdoops_context *cxt, int page) |
78 | { | 78 | { |
79 | clear_bit(page, cxt->oops_page_used); | 79 | clear_bit(page, cxt->oops_page_used); |
80 | } | 80 | } |
81 | 81 | ||
82 | static int page_is_used(struct mtdoops_context *cxt, int page) | 82 | static int page_is_used(struct mtdoops_context *cxt, int page) |
83 | { | 83 | { |
84 | return test_bit(page, cxt->oops_page_used); | 84 | return test_bit(page, cxt->oops_page_used); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void mtdoops_erase_callback(struct erase_info *done) | 87 | static void mtdoops_erase_callback(struct erase_info *done) |
88 | { | 88 | { |
89 | wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv; | 89 | wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv; |
90 | wake_up(wait_q); | 90 | wake_up(wait_q); |
91 | } | 91 | } |
92 | 92 | ||
93 | static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset) | 93 | static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset) |
94 | { | 94 | { |
95 | struct mtd_info *mtd = cxt->mtd; | 95 | struct mtd_info *mtd = cxt->mtd; |
96 | u32 start_page_offset = mtd_div_by_eb(offset, mtd) * mtd->erasesize; | 96 | u32 start_page_offset = mtd_div_by_eb(offset, mtd) * mtd->erasesize; |
97 | u32 start_page = start_page_offset / record_size; | 97 | u32 start_page = start_page_offset / record_size; |
98 | u32 erase_pages = mtd->erasesize / record_size; | 98 | u32 erase_pages = mtd->erasesize / record_size; |
99 | struct erase_info erase; | 99 | struct erase_info erase; |
100 | DECLARE_WAITQUEUE(wait, current); | 100 | DECLARE_WAITQUEUE(wait, current); |
101 | wait_queue_head_t wait_q; | 101 | wait_queue_head_t wait_q; |
102 | int ret; | 102 | int ret; |
103 | int page; | 103 | int page; |
104 | 104 | ||
105 | init_waitqueue_head(&wait_q); | 105 | init_waitqueue_head(&wait_q); |
106 | erase.mtd = mtd; | 106 | erase.mtd = mtd; |
107 | erase.callback = mtdoops_erase_callback; | 107 | erase.callback = mtdoops_erase_callback; |
108 | erase.addr = offset; | 108 | erase.addr = offset; |
109 | erase.len = mtd->erasesize; | 109 | erase.len = mtd->erasesize; |
110 | erase.priv = (u_long)&wait_q; | 110 | erase.priv = (u_long)&wait_q; |
111 | 111 | ||
112 | set_current_state(TASK_INTERRUPTIBLE); | 112 | set_current_state(TASK_INTERRUPTIBLE); |
113 | add_wait_queue(&wait_q, &wait); | 113 | add_wait_queue(&wait_q, &wait); |
114 | 114 | ||
115 | ret = mtd_erase(mtd, &erase); | 115 | ret = mtd_erase(mtd, &erase); |
116 | if (ret) { | 116 | if (ret) { |
117 | set_current_state(TASK_RUNNING); | 117 | set_current_state(TASK_RUNNING); |
118 | remove_wait_queue(&wait_q, &wait); | 118 | remove_wait_queue(&wait_q, &wait); |
119 | printk(KERN_WARNING "mtdoops: erase of region [0x%llx, 0x%llx] on \"%s\" failed\n", | 119 | printk(KERN_WARNING "mtdoops: erase of region [0x%llx, 0x%llx] on \"%s\" failed\n", |
120 | (unsigned long long)erase.addr, | 120 | (unsigned long long)erase.addr, |
121 | (unsigned long long)erase.len, mtddev); | 121 | (unsigned long long)erase.len, mtddev); |
122 | return ret; | 122 | return ret; |
123 | } | 123 | } |
124 | 124 | ||
125 | schedule(); /* Wait for erase to finish. */ | 125 | schedule(); /* Wait for erase to finish. */ |
126 | remove_wait_queue(&wait_q, &wait); | 126 | remove_wait_queue(&wait_q, &wait); |
127 | 127 | ||
128 | /* Mark pages as unused */ | 128 | /* Mark pages as unused */ |
129 | for (page = start_page; page < start_page + erase_pages; page++) | 129 | for (page = start_page; page < start_page + erase_pages; page++) |
130 | mark_page_unused(cxt, page); | 130 | mark_page_unused(cxt, page); |
131 | 131 | ||
132 | return 0; | 132 | return 0; |
133 | } | 133 | } |
134 | 134 | ||
135 | static void mtdoops_inc_counter(struct mtdoops_context *cxt) | 135 | static void mtdoops_inc_counter(struct mtdoops_context *cxt) |
136 | { | 136 | { |
137 | cxt->nextpage++; | 137 | cxt->nextpage++; |
138 | if (cxt->nextpage >= cxt->oops_pages) | 138 | if (cxt->nextpage >= cxt->oops_pages) |
139 | cxt->nextpage = 0; | 139 | cxt->nextpage = 0; |
140 | cxt->nextcount++; | 140 | cxt->nextcount++; |
141 | if (cxt->nextcount == 0xffffffff) | 141 | if (cxt->nextcount == 0xffffffff) |
142 | cxt->nextcount = 0; | 142 | cxt->nextcount = 0; |
143 | 143 | ||
144 | if (page_is_used(cxt, cxt->nextpage)) { | 144 | if (page_is_used(cxt, cxt->nextpage)) { |
145 | schedule_work(&cxt->work_erase); | 145 | schedule_work(&cxt->work_erase); |
146 | return; | 146 | return; |
147 | } | 147 | } |
148 | 148 | ||
149 | printk(KERN_DEBUG "mtdoops: ready %d, %d (no erase)\n", | 149 | printk(KERN_DEBUG "mtdoops: ready %d, %d (no erase)\n", |
150 | cxt->nextpage, cxt->nextcount); | 150 | cxt->nextpage, cxt->nextcount); |
151 | } | 151 | } |
152 | 152 | ||
153 | /* Scheduled work - when we can't proceed without erasing a block */ | 153 | /* Scheduled work - when we can't proceed without erasing a block */ |
154 | static void mtdoops_workfunc_erase(struct work_struct *work) | 154 | static void mtdoops_workfunc_erase(struct work_struct *work) |
155 | { | 155 | { |
156 | struct mtdoops_context *cxt = | 156 | struct mtdoops_context *cxt = |
157 | container_of(work, struct mtdoops_context, work_erase); | 157 | container_of(work, struct mtdoops_context, work_erase); |
158 | struct mtd_info *mtd = cxt->mtd; | 158 | struct mtd_info *mtd = cxt->mtd; |
159 | int i = 0, j, ret, mod; | 159 | int i = 0, j, ret, mod; |
160 | 160 | ||
161 | /* We were unregistered */ | 161 | /* We were unregistered */ |
162 | if (!mtd) | 162 | if (!mtd) |
163 | return; | 163 | return; |
164 | 164 | ||
165 | mod = (cxt->nextpage * record_size) % mtd->erasesize; | 165 | mod = (cxt->nextpage * record_size) % mtd->erasesize; |
166 | if (mod != 0) { | 166 | if (mod != 0) { |
167 | cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / record_size); | 167 | cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / record_size); |
168 | if (cxt->nextpage >= cxt->oops_pages) | 168 | if (cxt->nextpage >= cxt->oops_pages) |
169 | cxt->nextpage = 0; | 169 | cxt->nextpage = 0; |
170 | } | 170 | } |
171 | 171 | ||
172 | while (mtd_can_have_bb(mtd)) { | 172 | while (mtd_can_have_bb(mtd)) { |
173 | ret = mtd_block_isbad(mtd, cxt->nextpage * record_size); | 173 | ret = mtd_block_isbad(mtd, cxt->nextpage * record_size); |
174 | if (!ret) | 174 | if (!ret) |
175 | break; | 175 | break; |
176 | if (ret < 0) { | 176 | if (ret < 0) { |
177 | printk(KERN_ERR "mtdoops: block_isbad failed, aborting\n"); | 177 | printk(KERN_ERR "mtdoops: block_isbad failed, aborting\n"); |
178 | return; | 178 | return; |
179 | } | 179 | } |
180 | badblock: | 180 | badblock: |
181 | printk(KERN_WARNING "mtdoops: bad block at %08lx\n", | 181 | printk(KERN_WARNING "mtdoops: bad block at %08lx\n", |
182 | cxt->nextpage * record_size); | 182 | cxt->nextpage * record_size); |
183 | i++; | 183 | i++; |
184 | cxt->nextpage = cxt->nextpage + (mtd->erasesize / record_size); | 184 | cxt->nextpage = cxt->nextpage + (mtd->erasesize / record_size); |
185 | if (cxt->nextpage >= cxt->oops_pages) | 185 | if (cxt->nextpage >= cxt->oops_pages) |
186 | cxt->nextpage = 0; | 186 | cxt->nextpage = 0; |
187 | if (i == cxt->oops_pages / (mtd->erasesize / record_size)) { | 187 | if (i == cxt->oops_pages / (mtd->erasesize / record_size)) { |
188 | printk(KERN_ERR "mtdoops: all blocks bad!\n"); | 188 | printk(KERN_ERR "mtdoops: all blocks bad!\n"); |
189 | return; | 189 | return; |
190 | } | 190 | } |
191 | } | 191 | } |
192 | 192 | ||
193 | for (j = 0, ret = -1; (j < 3) && (ret < 0); j++) | 193 | for (j = 0, ret = -1; (j < 3) && (ret < 0); j++) |
194 | ret = mtdoops_erase_block(cxt, cxt->nextpage * record_size); | 194 | ret = mtdoops_erase_block(cxt, cxt->nextpage * record_size); |
195 | 195 | ||
196 | if (ret >= 0) { | 196 | if (ret >= 0) { |
197 | printk(KERN_DEBUG "mtdoops: ready %d, %d\n", | 197 | printk(KERN_DEBUG "mtdoops: ready %d, %d\n", |
198 | cxt->nextpage, cxt->nextcount); | 198 | cxt->nextpage, cxt->nextcount); |
199 | return; | 199 | return; |
200 | } | 200 | } |
201 | 201 | ||
202 | if (mtd_can_have_bb(mtd) && ret == -EIO) { | 202 | if (mtd_can_have_bb(mtd) && ret == -EIO) { |
203 | ret = mtd_block_markbad(mtd, cxt->nextpage * record_size); | 203 | ret = mtd_block_markbad(mtd, cxt->nextpage * record_size); |
204 | if (ret < 0) { | 204 | if (ret < 0) { |
205 | printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n"); | 205 | printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n"); |
206 | return; | 206 | return; |
207 | } | 207 | } |
208 | } | 208 | } |
209 | goto badblock; | 209 | goto badblock; |
210 | } | 210 | } |
211 | 211 | ||
212 | static void mtdoops_write(struct mtdoops_context *cxt, int panic) | 212 | static void mtdoops_write(struct mtdoops_context *cxt, int panic) |
213 | { | 213 | { |
214 | struct mtd_info *mtd = cxt->mtd; | 214 | struct mtd_info *mtd = cxt->mtd; |
215 | size_t retlen; | 215 | size_t retlen; |
216 | u32 *hdr; | 216 | u32 *hdr; |
217 | int ret; | 217 | int ret; |
218 | 218 | ||
219 | /* Add mtdoops header to the buffer */ | 219 | /* Add mtdoops header to the buffer */ |
220 | hdr = cxt->oops_buf; | 220 | hdr = cxt->oops_buf; |
221 | hdr[0] = cxt->nextcount; | 221 | hdr[0] = cxt->nextcount; |
222 | hdr[1] = MTDOOPS_KERNMSG_MAGIC; | 222 | hdr[1] = MTDOOPS_KERNMSG_MAGIC; |
223 | 223 | ||
224 | if (panic) { | 224 | if (panic) { |
225 | ret = mtd_panic_write(mtd, cxt->nextpage * record_size, | 225 | ret = mtd_panic_write(mtd, cxt->nextpage * record_size, |
226 | record_size, &retlen, cxt->oops_buf); | 226 | record_size, &retlen, cxt->oops_buf); |
227 | if (ret == -EOPNOTSUPP) { | 227 | if (ret == -EOPNOTSUPP) { |
228 | printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n"); | 228 | printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n"); |
229 | return; | 229 | return; |
230 | } | 230 | } |
231 | } else | 231 | } else |
232 | ret = mtd_write(mtd, cxt->nextpage * record_size, | 232 | ret = mtd_write(mtd, cxt->nextpage * record_size, |
233 | record_size, &retlen, cxt->oops_buf); | 233 | record_size, &retlen, cxt->oops_buf); |
234 | 234 | ||
235 | if (retlen != record_size || ret < 0) | 235 | if (retlen != record_size || ret < 0) |
236 | printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n", | 236 | printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n", |
237 | cxt->nextpage * record_size, retlen, record_size, ret); | 237 | cxt->nextpage * record_size, retlen, record_size, ret); |
238 | mark_page_used(cxt, cxt->nextpage); | 238 | mark_page_used(cxt, cxt->nextpage); |
239 | memset(cxt->oops_buf, 0xff, record_size); | 239 | memset(cxt->oops_buf, 0xff, record_size); |
240 | 240 | ||
241 | mtdoops_inc_counter(cxt); | 241 | mtdoops_inc_counter(cxt); |
242 | } | 242 | } |
243 | 243 | ||
244 | static void mtdoops_workfunc_write(struct work_struct *work) | 244 | static void mtdoops_workfunc_write(struct work_struct *work) |
245 | { | 245 | { |
246 | struct mtdoops_context *cxt = | 246 | struct mtdoops_context *cxt = |
247 | container_of(work, struct mtdoops_context, work_write); | 247 | container_of(work, struct mtdoops_context, work_write); |
248 | 248 | ||
249 | mtdoops_write(cxt, 0); | 249 | mtdoops_write(cxt, 0); |
250 | } | 250 | } |
251 | 251 | ||
252 | static void find_next_position(struct mtdoops_context *cxt) | 252 | static void find_next_position(struct mtdoops_context *cxt) |
253 | { | 253 | { |
254 | struct mtd_info *mtd = cxt->mtd; | 254 | struct mtd_info *mtd = cxt->mtd; |
255 | int ret, page, maxpos = 0; | 255 | int ret, page, maxpos = 0; |
256 | u32 count[2], maxcount = 0xffffffff; | 256 | u32 count[2], maxcount = 0xffffffff; |
257 | size_t retlen; | 257 | size_t retlen; |
258 | 258 | ||
259 | for (page = 0; page < cxt->oops_pages; page++) { | 259 | for (page = 0; page < cxt->oops_pages; page++) { |
260 | if (mtd_can_have_bb(mtd) && | 260 | if (mtd_can_have_bb(mtd) && |
261 | mtd_block_isbad(mtd, page * record_size)) | 261 | mtd_block_isbad(mtd, page * record_size)) |
262 | continue; | 262 | continue; |
263 | /* Assume the page is used */ | 263 | /* Assume the page is used */ |
264 | mark_page_used(cxt, page); | 264 | mark_page_used(cxt, page); |
265 | ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE, | 265 | ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE, |
266 | &retlen, (u_char *)&count[0]); | 266 | &retlen, (u_char *)&count[0]); |
267 | if (retlen != MTDOOPS_HEADER_SIZE || | 267 | if (retlen != MTDOOPS_HEADER_SIZE || |
268 | (ret < 0 && !mtd_is_bitflip(ret))) { | 268 | (ret < 0 && !mtd_is_bitflip(ret))) { |
269 | printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n", | 269 | printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n", |
270 | page * record_size, retlen, | 270 | page * record_size, retlen, |
271 | MTDOOPS_HEADER_SIZE, ret); | 271 | MTDOOPS_HEADER_SIZE, ret); |
272 | continue; | 272 | continue; |
273 | } | 273 | } |
274 | 274 | ||
275 | if (count[0] == 0xffffffff && count[1] == 0xffffffff) | 275 | if (count[0] == 0xffffffff && count[1] == 0xffffffff) |
276 | mark_page_unused(cxt, page); | 276 | mark_page_unused(cxt, page); |
277 | if (count[0] == 0xffffffff) | 277 | if (count[0] == 0xffffffff) |
278 | continue; | 278 | continue; |
279 | if (maxcount == 0xffffffff) { | 279 | if (maxcount == 0xffffffff) { |
280 | maxcount = count[0]; | 280 | maxcount = count[0]; |
281 | maxpos = page; | 281 | maxpos = page; |
282 | } else if (count[0] < 0x40000000 && maxcount > 0xc0000000) { | 282 | } else if (count[0] < 0x40000000 && maxcount > 0xc0000000) { |
283 | maxcount = count[0]; | 283 | maxcount = count[0]; |
284 | maxpos = page; | 284 | maxpos = page; |
285 | } else if (count[0] > maxcount && count[0] < 0xc0000000) { | 285 | } else if (count[0] > maxcount && count[0] < 0xc0000000) { |
286 | maxcount = count[0]; | 286 | maxcount = count[0]; |
287 | maxpos = page; | 287 | maxpos = page; |
288 | } else if (count[0] > maxcount && count[0] > 0xc0000000 | 288 | } else if (count[0] > maxcount && count[0] > 0xc0000000 |
289 | && maxcount > 0x80000000) { | 289 | && maxcount > 0x80000000) { |
290 | maxcount = count[0]; | 290 | maxcount = count[0]; |
291 | maxpos = page; | 291 | maxpos = page; |
292 | } | 292 | } |
293 | } | 293 | } |
294 | if (maxcount == 0xffffffff) { | 294 | if (maxcount == 0xffffffff) { |
295 | cxt->nextpage = 0; | 295 | cxt->nextpage = 0; |
296 | cxt->nextcount = 1; | 296 | cxt->nextcount = 1; |
297 | schedule_work(&cxt->work_erase); | 297 | schedule_work(&cxt->work_erase); |
298 | return; | 298 | return; |
299 | } | 299 | } |
300 | 300 | ||
301 | cxt->nextpage = maxpos; | 301 | cxt->nextpage = maxpos; |
302 | cxt->nextcount = maxcount; | 302 | cxt->nextcount = maxcount; |
303 | 303 | ||
304 | mtdoops_inc_counter(cxt); | 304 | mtdoops_inc_counter(cxt); |
305 | } | 305 | } |
306 | 306 | ||
307 | static void mtdoops_do_dump(struct kmsg_dumper *dumper, | 307 | static void mtdoops_do_dump(struct kmsg_dumper *dumper, |
308 | enum kmsg_dump_reason reason, const char *s1, unsigned long l1, | 308 | enum kmsg_dump_reason reason, const char *s1, unsigned long l1, |
309 | const char *s2, unsigned long l2) | 309 | const char *s2, unsigned long l2) |
310 | { | 310 | { |
311 | struct mtdoops_context *cxt = container_of(dumper, | 311 | struct mtdoops_context *cxt = container_of(dumper, |
312 | struct mtdoops_context, dump); | 312 | struct mtdoops_context, dump); |
313 | unsigned long s1_start, s2_start; | 313 | unsigned long s1_start, s2_start; |
314 | unsigned long l1_cpy, l2_cpy; | 314 | unsigned long l1_cpy, l2_cpy; |
315 | char *dst; | 315 | char *dst; |
316 | 316 | ||
317 | if (reason != KMSG_DUMP_OOPS && | 317 | if (reason != KMSG_DUMP_OOPS && |
318 | reason != KMSG_DUMP_PANIC && | 318 | reason != KMSG_DUMP_PANIC) |
319 | reason != KMSG_DUMP_KEXEC) | ||
320 | return; | 319 | return; |
321 | 320 | ||
322 | /* Only dump oopses if dump_oops is set */ | 321 | /* Only dump oopses if dump_oops is set */ |
323 | if (reason == KMSG_DUMP_OOPS && !dump_oops) | 322 | if (reason == KMSG_DUMP_OOPS && !dump_oops) |
324 | return; | 323 | return; |
325 | 324 | ||
326 | dst = cxt->oops_buf + MTDOOPS_HEADER_SIZE; /* Skip the header */ | 325 | dst = cxt->oops_buf + MTDOOPS_HEADER_SIZE; /* Skip the header */ |
327 | l2_cpy = min(l2, record_size - MTDOOPS_HEADER_SIZE); | 326 | l2_cpy = min(l2, record_size - MTDOOPS_HEADER_SIZE); |
328 | l1_cpy = min(l1, record_size - MTDOOPS_HEADER_SIZE - l2_cpy); | 327 | l1_cpy = min(l1, record_size - MTDOOPS_HEADER_SIZE - l2_cpy); |
329 | 328 | ||
330 | s2_start = l2 - l2_cpy; | 329 | s2_start = l2 - l2_cpy; |
331 | s1_start = l1 - l1_cpy; | 330 | s1_start = l1 - l1_cpy; |
332 | 331 | ||
333 | memcpy(dst, s1 + s1_start, l1_cpy); | 332 | memcpy(dst, s1 + s1_start, l1_cpy); |
334 | memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); | 333 | memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); |
335 | 334 | ||
336 | /* Panics must be written immediately */ | 335 | /* Panics must be written immediately */ |
337 | if (reason != KMSG_DUMP_OOPS) | 336 | if (reason != KMSG_DUMP_OOPS) |
338 | mtdoops_write(cxt, 1); | 337 | mtdoops_write(cxt, 1); |
339 | 338 | ||
340 | /* For other cases, schedule work to write it "nicely" */ | 339 | /* For other cases, schedule work to write it "nicely" */ |
341 | schedule_work(&cxt->work_write); | 340 | schedule_work(&cxt->work_write); |
342 | } | 341 | } |
343 | 342 | ||
344 | static void mtdoops_notify_add(struct mtd_info *mtd) | 343 | static void mtdoops_notify_add(struct mtd_info *mtd) |
345 | { | 344 | { |
346 | struct mtdoops_context *cxt = &oops_cxt; | 345 | struct mtdoops_context *cxt = &oops_cxt; |
347 | u64 mtdoops_pages = div_u64(mtd->size, record_size); | 346 | u64 mtdoops_pages = div_u64(mtd->size, record_size); |
348 | int err; | 347 | int err; |
349 | 348 | ||
350 | if (!strcmp(mtd->name, mtddev)) | 349 | if (!strcmp(mtd->name, mtddev)) |
351 | cxt->mtd_index = mtd->index; | 350 | cxt->mtd_index = mtd->index; |
352 | 351 | ||
353 | if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) | 352 | if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) |
354 | return; | 353 | return; |
355 | 354 | ||
356 | if (mtd->size < mtd->erasesize * 2) { | 355 | if (mtd->size < mtd->erasesize * 2) { |
357 | printk(KERN_ERR "mtdoops: MTD partition %d not big enough for mtdoops\n", | 356 | printk(KERN_ERR "mtdoops: MTD partition %d not big enough for mtdoops\n", |
358 | mtd->index); | 357 | mtd->index); |
359 | return; | 358 | return; |
360 | } | 359 | } |
361 | if (mtd->erasesize < record_size) { | 360 | if (mtd->erasesize < record_size) { |
362 | printk(KERN_ERR "mtdoops: eraseblock size of MTD partition %d too small\n", | 361 | printk(KERN_ERR "mtdoops: eraseblock size of MTD partition %d too small\n", |
363 | mtd->index); | 362 | mtd->index); |
364 | return; | 363 | return; |
365 | } | 364 | } |
366 | if (mtd->size > MTDOOPS_MAX_MTD_SIZE) { | 365 | if (mtd->size > MTDOOPS_MAX_MTD_SIZE) { |
367 | printk(KERN_ERR "mtdoops: mtd%d is too large (limit is %d MiB)\n", | 366 | printk(KERN_ERR "mtdoops: mtd%d is too large (limit is %d MiB)\n", |
368 | mtd->index, MTDOOPS_MAX_MTD_SIZE / 1024 / 1024); | 367 | mtd->index, MTDOOPS_MAX_MTD_SIZE / 1024 / 1024); |
369 | return; | 368 | return; |
370 | } | 369 | } |
371 | 370 | ||
372 | /* oops_page_used is a bit field */ | 371 | /* oops_page_used is a bit field */ |
373 | cxt->oops_page_used = vmalloc(DIV_ROUND_UP(mtdoops_pages, | 372 | cxt->oops_page_used = vmalloc(DIV_ROUND_UP(mtdoops_pages, |
374 | BITS_PER_LONG) * sizeof(unsigned long)); | 373 | BITS_PER_LONG) * sizeof(unsigned long)); |
375 | if (!cxt->oops_page_used) { | 374 | if (!cxt->oops_page_used) { |
376 | printk(KERN_ERR "mtdoops: could not allocate page array\n"); | 375 | printk(KERN_ERR "mtdoops: could not allocate page array\n"); |
377 | return; | 376 | return; |
378 | } | 377 | } |
379 | 378 | ||
380 | cxt->dump.dump = mtdoops_do_dump; | 379 | cxt->dump.dump = mtdoops_do_dump; |
381 | err = kmsg_dump_register(&cxt->dump); | 380 | err = kmsg_dump_register(&cxt->dump); |
382 | if (err) { | 381 | if (err) { |
383 | printk(KERN_ERR "mtdoops: registering kmsg dumper failed, error %d\n", err); | 382 | printk(KERN_ERR "mtdoops: registering kmsg dumper failed, error %d\n", err); |
384 | vfree(cxt->oops_page_used); | 383 | vfree(cxt->oops_page_used); |
385 | cxt->oops_page_used = NULL; | 384 | cxt->oops_page_used = NULL; |
386 | return; | 385 | return; |
387 | } | 386 | } |
388 | 387 | ||
389 | cxt->mtd = mtd; | 388 | cxt->mtd = mtd; |
390 | cxt->oops_pages = (int)mtd->size / record_size; | 389 | cxt->oops_pages = (int)mtd->size / record_size; |
391 | find_next_position(cxt); | 390 | find_next_position(cxt); |
392 | printk(KERN_INFO "mtdoops: Attached to MTD device %d\n", mtd->index); | 391 | printk(KERN_INFO "mtdoops: Attached to MTD device %d\n", mtd->index); |
393 | } | 392 | } |
394 | 393 | ||
395 | static void mtdoops_notify_remove(struct mtd_info *mtd) | 394 | static void mtdoops_notify_remove(struct mtd_info *mtd) |
396 | { | 395 | { |
397 | struct mtdoops_context *cxt = &oops_cxt; | 396 | struct mtdoops_context *cxt = &oops_cxt; |
398 | 397 | ||
399 | if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) | 398 | if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) |
400 | return; | 399 | return; |
401 | 400 | ||
402 | if (kmsg_dump_unregister(&cxt->dump) < 0) | 401 | if (kmsg_dump_unregister(&cxt->dump) < 0) |
403 | printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n"); | 402 | printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n"); |
404 | 403 | ||
405 | cxt->mtd = NULL; | 404 | cxt->mtd = NULL; |
406 | flush_work_sync(&cxt->work_erase); | 405 | flush_work_sync(&cxt->work_erase); |
407 | flush_work_sync(&cxt->work_write); | 406 | flush_work_sync(&cxt->work_write); |
408 | } | 407 | } |
409 | 408 | ||
410 | 409 | ||
411 | static struct mtd_notifier mtdoops_notifier = { | 410 | static struct mtd_notifier mtdoops_notifier = { |
412 | .add = mtdoops_notify_add, | 411 | .add = mtdoops_notify_add, |
413 | .remove = mtdoops_notify_remove, | 412 | .remove = mtdoops_notify_remove, |
414 | }; | 413 | }; |
415 | 414 | ||
416 | static int __init mtdoops_init(void) | 415 | static int __init mtdoops_init(void) |
417 | { | 416 | { |
418 | struct mtdoops_context *cxt = &oops_cxt; | 417 | struct mtdoops_context *cxt = &oops_cxt; |
419 | int mtd_index; | 418 | int mtd_index; |
420 | char *endp; | 419 | char *endp; |
421 | 420 | ||
422 | if (strlen(mtddev) == 0) { | 421 | if (strlen(mtddev) == 0) { |
423 | printk(KERN_ERR "mtdoops: mtd device (mtddev=name/number) must be supplied\n"); | 422 | printk(KERN_ERR "mtdoops: mtd device (mtddev=name/number) must be supplied\n"); |
424 | return -EINVAL; | 423 | return -EINVAL; |
425 | } | 424 | } |
426 | if ((record_size & 4095) != 0) { | 425 | if ((record_size & 4095) != 0) { |
427 | printk(KERN_ERR "mtdoops: record_size must be a multiple of 4096\n"); | 426 | printk(KERN_ERR "mtdoops: record_size must be a multiple of 4096\n"); |
428 | return -EINVAL; | 427 | return -EINVAL; |
429 | } | 428 | } |
430 | if (record_size < 4096) { | 429 | if (record_size < 4096) { |
431 | printk(KERN_ERR "mtdoops: record_size must be over 4096 bytes\n"); | 430 | printk(KERN_ERR "mtdoops: record_size must be over 4096 bytes\n"); |
432 | return -EINVAL; | 431 | return -EINVAL; |
433 | } | 432 | } |
434 | 433 | ||
435 | /* Setup the MTD device to use */ | 434 | /* Setup the MTD device to use */ |
436 | cxt->mtd_index = -1; | 435 | cxt->mtd_index = -1; |
437 | mtd_index = simple_strtoul(mtddev, &endp, 0); | 436 | mtd_index = simple_strtoul(mtddev, &endp, 0); |
438 | if (*endp == '\0') | 437 | if (*endp == '\0') |
439 | cxt->mtd_index = mtd_index; | 438 | cxt->mtd_index = mtd_index; |
440 | 439 | ||
441 | cxt->oops_buf = vmalloc(record_size); | 440 | cxt->oops_buf = vmalloc(record_size); |
442 | if (!cxt->oops_buf) { | 441 | if (!cxt->oops_buf) { |
443 | printk(KERN_ERR "mtdoops: failed to allocate buffer workspace\n"); | 442 | printk(KERN_ERR "mtdoops: failed to allocate buffer workspace\n"); |
444 | return -ENOMEM; | 443 | return -ENOMEM; |
445 | } | 444 | } |
446 | memset(cxt->oops_buf, 0xff, record_size); | 445 | memset(cxt->oops_buf, 0xff, record_size); |
447 | 446 | ||
448 | INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase); | 447 | INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase); |
449 | INIT_WORK(&cxt->work_write, mtdoops_workfunc_write); | 448 | INIT_WORK(&cxt->work_write, mtdoops_workfunc_write); |
450 | 449 | ||
451 | register_mtd_user(&mtdoops_notifier); | 450 | register_mtd_user(&mtdoops_notifier); |
452 | return 0; | 451 | return 0; |
453 | } | 452 | } |
454 | 453 | ||
455 | static void __exit mtdoops_exit(void) | 454 | static void __exit mtdoops_exit(void) |
456 | { | 455 | { |
457 | struct mtdoops_context *cxt = &oops_cxt; | 456 | struct mtdoops_context *cxt = &oops_cxt; |
458 | 457 | ||
459 | unregister_mtd_user(&mtdoops_notifier); | 458 | unregister_mtd_user(&mtdoops_notifier); |
460 | vfree(cxt->oops_buf); | 459 | vfree(cxt->oops_buf); |
461 | vfree(cxt->oops_page_used); | 460 | vfree(cxt->oops_page_used); |
462 | } | 461 | } |
463 | 462 | ||
464 | 463 | ||
465 | module_init(mtdoops_init); | 464 | module_init(mtdoops_init); |
466 | module_exit(mtdoops_exit); | 465 | module_exit(mtdoops_exit); |
467 | 466 | ||
468 | MODULE_LICENSE("GPL"); | 467 | MODULE_LICENSE("GPL"); |
469 | MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>"); | 468 | MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>"); |
470 | MODULE_DESCRIPTION("MTD Oops/Panic console logger/driver"); | 469 | MODULE_DESCRIPTION("MTD Oops/Panic console logger/driver"); |
471 | 470 |
1 | /* | 1 | /* |
2 | * linux/include/kmsg_dump.h | 2 | * linux/include/kmsg_dump.h |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Net Insight AB | 4 | * Copyright (C) 2009 Net Insight AB |
5 | * | 5 | * |
6 | * Author: Simon Kagstrom <simon.kagstrom@netinsight.net> | 6 | * Author: Simon Kagstrom <simon.kagstrom@netinsight.net> |
7 | * | 7 | * |
8 | * This file is subject to the terms and conditions of the GNU General Public | 8 | * This file is subject to the terms and conditions of the GNU General Public |
9 | * License. See the file COPYING in the main directory of this archive | 9 | * License. See the file COPYING in the main directory of this archive |
10 | * for more details. | 10 | * for more details. |
11 | */ | 11 | */ |
12 | #ifndef _LINUX_KMSG_DUMP_H | 12 | #ifndef _LINUX_KMSG_DUMP_H |
13 | #define _LINUX_KMSG_DUMP_H | 13 | #define _LINUX_KMSG_DUMP_H |
14 | 14 | ||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
17 | 17 | ||
18 | enum kmsg_dump_reason { | 18 | enum kmsg_dump_reason { |
19 | KMSG_DUMP_OOPS, | 19 | KMSG_DUMP_OOPS, |
20 | KMSG_DUMP_PANIC, | 20 | KMSG_DUMP_PANIC, |
21 | KMSG_DUMP_KEXEC, | ||
22 | KMSG_DUMP_RESTART, | 21 | KMSG_DUMP_RESTART, |
23 | KMSG_DUMP_HALT, | 22 | KMSG_DUMP_HALT, |
24 | KMSG_DUMP_POWEROFF, | 23 | KMSG_DUMP_POWEROFF, |
25 | KMSG_DUMP_EMERG, | 24 | KMSG_DUMP_EMERG, |
26 | }; | 25 | }; |
27 | 26 | ||
28 | /** | 27 | /** |
29 | * struct kmsg_dumper - kernel crash message dumper structure | 28 | * struct kmsg_dumper - kernel crash message dumper structure |
30 | * @dump: The callback which gets called on crashes. The buffer is passed | 29 | * @dump: The callback which gets called on crashes. The buffer is passed |
31 | * as two sections, where s1 (length l1) contains the older | 30 | * as two sections, where s1 (length l1) contains the older |
32 | * messages and s2 (length l2) contains the newer. | 31 | * messages and s2 (length l2) contains the newer. |
33 | * @list: Entry in the dumper list (private) | 32 | * @list: Entry in the dumper list (private) |
34 | * @registered: Flag that specifies if this is already registered | 33 | * @registered: Flag that specifies if this is already registered |
35 | */ | 34 | */ |
36 | struct kmsg_dumper { | 35 | struct kmsg_dumper { |
37 | void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason, | 36 | void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason, |
38 | const char *s1, unsigned long l1, | 37 | const char *s1, unsigned long l1, |
39 | const char *s2, unsigned long l2); | 38 | const char *s2, unsigned long l2); |
40 | struct list_head list; | 39 | struct list_head list; |
41 | int registered; | 40 | int registered; |
42 | }; | 41 | }; |
43 | 42 | ||
44 | #ifdef CONFIG_PRINTK | 43 | #ifdef CONFIG_PRINTK |
45 | void kmsg_dump(enum kmsg_dump_reason reason); | 44 | void kmsg_dump(enum kmsg_dump_reason reason); |
46 | 45 | ||
47 | int kmsg_dump_register(struct kmsg_dumper *dumper); | 46 | int kmsg_dump_register(struct kmsg_dumper *dumper); |
48 | 47 | ||
49 | int kmsg_dump_unregister(struct kmsg_dumper *dumper); | 48 | int kmsg_dump_unregister(struct kmsg_dumper *dumper); |
50 | #else | 49 | #else |
51 | static inline void kmsg_dump(enum kmsg_dump_reason reason) | 50 | static inline void kmsg_dump(enum kmsg_dump_reason reason) |
52 | { | 51 | { |
53 | } | 52 | } |
54 | 53 | ||
55 | static inline int kmsg_dump_register(struct kmsg_dumper *dumper) | 54 | static inline int kmsg_dump_register(struct kmsg_dumper *dumper) |
56 | { | 55 | { |
57 | return -EINVAL; | 56 | return -EINVAL; |
58 | } | 57 | } |
59 | 58 | ||
60 | static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper) | 59 | static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper) |
61 | { | 60 | { |
62 | return -EINVAL; | 61 | return -EINVAL; |
63 | } | 62 | } |
64 | #endif | 63 | #endif |
65 | 64 | ||
66 | #endif /* _LINUX_KMSG_DUMP_H */ | 65 | #endif /* _LINUX_KMSG_DUMP_H */ |
67 | 66 |
1 | /* | 1 | /* |
2 | * kexec.c - kexec system call | 2 | * kexec.c - kexec system call |
3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> | 3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> |
4 | * | 4 | * |
5 | * This source code is licensed under the GNU General Public License, | 5 | * This source code is licensed under the GNU General Public License, |
6 | * Version 2. See the file COPYING for more details. | 6 | * Version 2. See the file COPYING for more details. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/file.h> | 11 | #include <linux/file.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/kexec.h> | 14 | #include <linux/kexec.h> |
15 | #include <linux/mutex.h> | 15 | #include <linux/mutex.h> |
16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
17 | #include <linux/highmem.h> | 17 | #include <linux/highmem.h> |
18 | #include <linux/syscalls.h> | 18 | #include <linux/syscalls.h> |
19 | #include <linux/reboot.h> | 19 | #include <linux/reboot.h> |
20 | #include <linux/ioport.h> | 20 | #include <linux/ioport.h> |
21 | #include <linux/hardirq.h> | 21 | #include <linux/hardirq.h> |
22 | #include <linux/elf.h> | 22 | #include <linux/elf.h> |
23 | #include <linux/elfcore.h> | 23 | #include <linux/elfcore.h> |
24 | #include <generated/utsrelease.h> | 24 | #include <generated/utsrelease.h> |
25 | #include <linux/utsname.h> | 25 | #include <linux/utsname.h> |
26 | #include <linux/numa.h> | 26 | #include <linux/numa.h> |
27 | #include <linux/suspend.h> | 27 | #include <linux/suspend.h> |
28 | #include <linux/device.h> | 28 | #include <linux/device.h> |
29 | #include <linux/freezer.h> | 29 | #include <linux/freezer.h> |
30 | #include <linux/pm.h> | 30 | #include <linux/pm.h> |
31 | #include <linux/cpu.h> | 31 | #include <linux/cpu.h> |
32 | #include <linux/console.h> | 32 | #include <linux/console.h> |
33 | #include <linux/vmalloc.h> | 33 | #include <linux/vmalloc.h> |
34 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
35 | #include <linux/kmsg_dump.h> | ||
36 | #include <linux/syscore_ops.h> | 35 | #include <linux/syscore_ops.h> |
37 | 36 | ||
38 | #include <asm/page.h> | 37 | #include <asm/page.h> |
39 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
40 | #include <asm/io.h> | 39 | #include <asm/io.h> |
41 | #include <asm/system.h> | 40 | #include <asm/system.h> |
42 | #include <asm/sections.h> | 41 | #include <asm/sections.h> |
43 | 42 | ||
44 | /* Per cpu memory for storing cpu states in case of system crash. */ | 43 | /* Per cpu memory for storing cpu states in case of system crash. */ |
45 | note_buf_t __percpu *crash_notes; | 44 | note_buf_t __percpu *crash_notes; |
46 | 45 | ||
47 | /* vmcoreinfo stuff */ | 46 | /* vmcoreinfo stuff */ |
48 | static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; | 47 | static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; |
49 | u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | 48 | u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; |
50 | size_t vmcoreinfo_size; | 49 | size_t vmcoreinfo_size; |
51 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); | 50 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); |
52 | 51 | ||
53 | /* Location of the reserved area for the crash kernel */ | 52 | /* Location of the reserved area for the crash kernel */ |
54 | struct resource crashk_res = { | 53 | struct resource crashk_res = { |
55 | .name = "Crash kernel", | 54 | .name = "Crash kernel", |
56 | .start = 0, | 55 | .start = 0, |
57 | .end = 0, | 56 | .end = 0, |
58 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | 57 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
59 | }; | 58 | }; |
60 | 59 | ||
61 | int kexec_should_crash(struct task_struct *p) | 60 | int kexec_should_crash(struct task_struct *p) |
62 | { | 61 | { |
63 | if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) | 62 | if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) |
64 | return 1; | 63 | return 1; |
65 | return 0; | 64 | return 0; |
66 | } | 65 | } |
67 | 66 | ||
68 | /* | 67 | /* |
69 | * When kexec transitions to the new kernel there is a one-to-one | 68 | * When kexec transitions to the new kernel there is a one-to-one |
70 | * mapping between physical and virtual addresses. On processors | 69 | * mapping between physical and virtual addresses. On processors |
71 | * where you can disable the MMU this is trivial, and easy. For | 70 | * where you can disable the MMU this is trivial, and easy. For |
72 | * others it is still a simple predictable page table to setup. | 71 | * others it is still a simple predictable page table to setup. |
73 | * | 72 | * |
74 | * In that environment kexec copies the new kernel to its final | 73 | * In that environment kexec copies the new kernel to its final |
75 | * resting place. This means I can only support memory whose | 74 | * resting place. This means I can only support memory whose |
76 | * physical address can fit in an unsigned long. In particular | 75 | * physical address can fit in an unsigned long. In particular |
77 | * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. | 76 | * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. |
78 | * If the assembly stub has more restrictive requirements | 77 | * If the assembly stub has more restrictive requirements |
79 | * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be | 78 | * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be |
80 | * defined more restrictively in <asm/kexec.h>. | 79 | * defined more restrictively in <asm/kexec.h>. |
81 | * | 80 | * |
82 | * The code for the transition from the current kernel to the | 81 | * The code for the transition from the current kernel to the |
83 | * the new kernel is placed in the control_code_buffer, whose size | 82 | * the new kernel is placed in the control_code_buffer, whose size |
84 | * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single | 83 | * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single |
85 | * page of memory is necessary, but some architectures require more. | 84 | * page of memory is necessary, but some architectures require more. |
86 | * Because this memory must be identity mapped in the transition from | 85 | * Because this memory must be identity mapped in the transition from |
87 | * virtual to physical addresses it must live in the range | 86 | * virtual to physical addresses it must live in the range |
88 | * 0 - TASK_SIZE, as only the user space mappings are arbitrarily | 87 | * 0 - TASK_SIZE, as only the user space mappings are arbitrarily |
89 | * modifiable. | 88 | * modifiable. |
90 | * | 89 | * |
91 | * The assembly stub in the control code buffer is passed a linked list | 90 | * The assembly stub in the control code buffer is passed a linked list |
92 | * of descriptor pages detailing the source pages of the new kernel, | 91 | * of descriptor pages detailing the source pages of the new kernel, |
93 | * and the destination addresses of those source pages. As this data | 92 | * and the destination addresses of those source pages. As this data |
94 | * structure is not used in the context of the current OS, it must | 93 | * structure is not used in the context of the current OS, it must |
95 | * be self-contained. | 94 | * be self-contained. |
96 | * | 95 | * |
97 | * The code has been made to work with highmem pages and will use a | 96 | * The code has been made to work with highmem pages and will use a |
98 | * destination page in its final resting place (if it happens | 97 | * destination page in its final resting place (if it happens |
99 | * to allocate it). The end product of this is that most of the | 98 | * to allocate it). The end product of this is that most of the |
100 | * physical address space, and most of RAM can be used. | 99 | * physical address space, and most of RAM can be used. |
101 | * | 100 | * |
102 | * Future directions include: | 101 | * Future directions include: |
103 | * - allocating a page table with the control code buffer identity | 102 | * - allocating a page table with the control code buffer identity |
104 | * mapped, to simplify machine_kexec and make kexec_on_panic more | 103 | * mapped, to simplify machine_kexec and make kexec_on_panic more |
105 | * reliable. | 104 | * reliable. |
106 | */ | 105 | */ |
107 | 106 | ||
108 | /* | 107 | /* |
109 | * KIMAGE_NO_DEST is an impossible destination address..., for | 108 | * KIMAGE_NO_DEST is an impossible destination address..., for |
110 | * allocating pages whose destination address we do not care about. | 109 | * allocating pages whose destination address we do not care about. |
111 | */ | 110 | */ |
112 | #define KIMAGE_NO_DEST (-1UL) | 111 | #define KIMAGE_NO_DEST (-1UL) |
113 | 112 | ||
114 | static int kimage_is_destination_range(struct kimage *image, | 113 | static int kimage_is_destination_range(struct kimage *image, |
115 | unsigned long start, unsigned long end); | 114 | unsigned long start, unsigned long end); |
116 | static struct page *kimage_alloc_page(struct kimage *image, | 115 | static struct page *kimage_alloc_page(struct kimage *image, |
117 | gfp_t gfp_mask, | 116 | gfp_t gfp_mask, |
118 | unsigned long dest); | 117 | unsigned long dest); |
119 | 118 | ||
120 | static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, | 119 | static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, |
121 | unsigned long nr_segments, | 120 | unsigned long nr_segments, |
122 | struct kexec_segment __user *segments) | 121 | struct kexec_segment __user *segments) |
123 | { | 122 | { |
124 | size_t segment_bytes; | 123 | size_t segment_bytes; |
125 | struct kimage *image; | 124 | struct kimage *image; |
126 | unsigned long i; | 125 | unsigned long i; |
127 | int result; | 126 | int result; |
128 | 127 | ||
129 | /* Allocate a controlling structure */ | 128 | /* Allocate a controlling structure */ |
130 | result = -ENOMEM; | 129 | result = -ENOMEM; |
131 | image = kzalloc(sizeof(*image), GFP_KERNEL); | 130 | image = kzalloc(sizeof(*image), GFP_KERNEL); |
132 | if (!image) | 131 | if (!image) |
133 | goto out; | 132 | goto out; |
134 | 133 | ||
135 | image->head = 0; | 134 | image->head = 0; |
136 | image->entry = &image->head; | 135 | image->entry = &image->head; |
137 | image->last_entry = &image->head; | 136 | image->last_entry = &image->head; |
138 | image->control_page = ~0; /* By default this does not apply */ | 137 | image->control_page = ~0; /* By default this does not apply */ |
139 | image->start = entry; | 138 | image->start = entry; |
140 | image->type = KEXEC_TYPE_DEFAULT; | 139 | image->type = KEXEC_TYPE_DEFAULT; |
141 | 140 | ||
142 | /* Initialize the list of control pages */ | 141 | /* Initialize the list of control pages */ |
143 | INIT_LIST_HEAD(&image->control_pages); | 142 | INIT_LIST_HEAD(&image->control_pages); |
144 | 143 | ||
145 | /* Initialize the list of destination pages */ | 144 | /* Initialize the list of destination pages */ |
146 | INIT_LIST_HEAD(&image->dest_pages); | 145 | INIT_LIST_HEAD(&image->dest_pages); |
147 | 146 | ||
148 | /* Initialize the list of unusable pages */ | 147 | /* Initialize the list of unusable pages */ |
149 | INIT_LIST_HEAD(&image->unuseable_pages); | 148 | INIT_LIST_HEAD(&image->unuseable_pages); |
150 | 149 | ||
151 | /* Read in the segments */ | 150 | /* Read in the segments */ |
152 | image->nr_segments = nr_segments; | 151 | image->nr_segments = nr_segments; |
153 | segment_bytes = nr_segments * sizeof(*segments); | 152 | segment_bytes = nr_segments * sizeof(*segments); |
154 | result = copy_from_user(image->segment, segments, segment_bytes); | 153 | result = copy_from_user(image->segment, segments, segment_bytes); |
155 | if (result) { | 154 | if (result) { |
156 | result = -EFAULT; | 155 | result = -EFAULT; |
157 | goto out; | 156 | goto out; |
158 | } | 157 | } |
159 | 158 | ||
160 | /* | 159 | /* |
161 | * Verify we have good destination addresses. The caller is | 160 | * Verify we have good destination addresses. The caller is |
162 | * responsible for making certain we don't attempt to load | 161 | * responsible for making certain we don't attempt to load |
163 | * the new image into invalid or reserved areas of RAM. This | 162 | * the new image into invalid or reserved areas of RAM. This |
164 | * just verifies it is an address we can use. | 163 | * just verifies it is an address we can use. |
165 | * | 164 | * |
166 | * Since the kernel does everything in page size chunks ensure | 165 | * Since the kernel does everything in page size chunks ensure |
167 | * the destination addresses are page aligned. Too many | 166 | * the destination addresses are page aligned. Too many |
168 | * special cases crop of when we don't do this. The most | 167 | * special cases crop of when we don't do this. The most |
169 | * insidious is getting overlapping destination addresses | 168 | * insidious is getting overlapping destination addresses |
170 | * simply because addresses are changed to page size | 169 | * simply because addresses are changed to page size |
171 | * granularity. | 170 | * granularity. |
172 | */ | 171 | */ |
173 | result = -EADDRNOTAVAIL; | 172 | result = -EADDRNOTAVAIL; |
174 | for (i = 0; i < nr_segments; i++) { | 173 | for (i = 0; i < nr_segments; i++) { |
175 | unsigned long mstart, mend; | 174 | unsigned long mstart, mend; |
176 | 175 | ||
177 | mstart = image->segment[i].mem; | 176 | mstart = image->segment[i].mem; |
178 | mend = mstart + image->segment[i].memsz; | 177 | mend = mstart + image->segment[i].memsz; |
179 | if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) | 178 | if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) |
180 | goto out; | 179 | goto out; |
181 | if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) | 180 | if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) |
182 | goto out; | 181 | goto out; |
183 | } | 182 | } |
184 | 183 | ||
185 | /* Verify our destination addresses do not overlap. | 184 | /* Verify our destination addresses do not overlap. |
186 | * If we alloed overlapping destination addresses | 185 | * If we alloed overlapping destination addresses |
187 | * through very weird things can happen with no | 186 | * through very weird things can happen with no |
188 | * easy explanation as one segment stops on another. | 187 | * easy explanation as one segment stops on another. |
189 | */ | 188 | */ |
190 | result = -EINVAL; | 189 | result = -EINVAL; |
191 | for (i = 0; i < nr_segments; i++) { | 190 | for (i = 0; i < nr_segments; i++) { |
192 | unsigned long mstart, mend; | 191 | unsigned long mstart, mend; |
193 | unsigned long j; | 192 | unsigned long j; |
194 | 193 | ||
195 | mstart = image->segment[i].mem; | 194 | mstart = image->segment[i].mem; |
196 | mend = mstart + image->segment[i].memsz; | 195 | mend = mstart + image->segment[i].memsz; |
197 | for (j = 0; j < i; j++) { | 196 | for (j = 0; j < i; j++) { |
198 | unsigned long pstart, pend; | 197 | unsigned long pstart, pend; |
199 | pstart = image->segment[j].mem; | 198 | pstart = image->segment[j].mem; |
200 | pend = pstart + image->segment[j].memsz; | 199 | pend = pstart + image->segment[j].memsz; |
201 | /* Do the segments overlap ? */ | 200 | /* Do the segments overlap ? */ |
202 | if ((mend > pstart) && (mstart < pend)) | 201 | if ((mend > pstart) && (mstart < pend)) |
203 | goto out; | 202 | goto out; |
204 | } | 203 | } |
205 | } | 204 | } |
206 | 205 | ||
207 | /* Ensure our buffer sizes are strictly less than | 206 | /* Ensure our buffer sizes are strictly less than |
208 | * our memory sizes. This should always be the case, | 207 | * our memory sizes. This should always be the case, |
209 | * and it is easier to check up front than to be surprised | 208 | * and it is easier to check up front than to be surprised |
210 | * later on. | 209 | * later on. |
211 | */ | 210 | */ |
212 | result = -EINVAL; | 211 | result = -EINVAL; |
213 | for (i = 0; i < nr_segments; i++) { | 212 | for (i = 0; i < nr_segments; i++) { |
214 | if (image->segment[i].bufsz > image->segment[i].memsz) | 213 | if (image->segment[i].bufsz > image->segment[i].memsz) |
215 | goto out; | 214 | goto out; |
216 | } | 215 | } |
217 | 216 | ||
218 | result = 0; | 217 | result = 0; |
219 | out: | 218 | out: |
220 | if (result == 0) | 219 | if (result == 0) |
221 | *rimage = image; | 220 | *rimage = image; |
222 | else | 221 | else |
223 | kfree(image); | 222 | kfree(image); |
224 | 223 | ||
225 | return result; | 224 | return result; |
226 | 225 | ||
227 | } | 226 | } |
228 | 227 | ||
229 | static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, | 228 | static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, |
230 | unsigned long nr_segments, | 229 | unsigned long nr_segments, |
231 | struct kexec_segment __user *segments) | 230 | struct kexec_segment __user *segments) |
232 | { | 231 | { |
233 | int result; | 232 | int result; |
234 | struct kimage *image; | 233 | struct kimage *image; |
235 | 234 | ||
236 | /* Allocate and initialize a controlling structure */ | 235 | /* Allocate and initialize a controlling structure */ |
237 | image = NULL; | 236 | image = NULL; |
238 | result = do_kimage_alloc(&image, entry, nr_segments, segments); | 237 | result = do_kimage_alloc(&image, entry, nr_segments, segments); |
239 | if (result) | 238 | if (result) |
240 | goto out; | 239 | goto out; |
241 | 240 | ||
242 | *rimage = image; | 241 | *rimage = image; |
243 | 242 | ||
244 | /* | 243 | /* |
245 | * Find a location for the control code buffer, and add it | 244 | * Find a location for the control code buffer, and add it |
246 | * the vector of segments so that it's pages will also be | 245 | * the vector of segments so that it's pages will also be |
247 | * counted as destination pages. | 246 | * counted as destination pages. |
248 | */ | 247 | */ |
249 | result = -ENOMEM; | 248 | result = -ENOMEM; |
250 | image->control_code_page = kimage_alloc_control_pages(image, | 249 | image->control_code_page = kimage_alloc_control_pages(image, |
251 | get_order(KEXEC_CONTROL_PAGE_SIZE)); | 250 | get_order(KEXEC_CONTROL_PAGE_SIZE)); |
252 | if (!image->control_code_page) { | 251 | if (!image->control_code_page) { |
253 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); | 252 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); |
254 | goto out; | 253 | goto out; |
255 | } | 254 | } |
256 | 255 | ||
257 | image->swap_page = kimage_alloc_control_pages(image, 0); | 256 | image->swap_page = kimage_alloc_control_pages(image, 0); |
258 | if (!image->swap_page) { | 257 | if (!image->swap_page) { |
259 | printk(KERN_ERR "Could not allocate swap buffer\n"); | 258 | printk(KERN_ERR "Could not allocate swap buffer\n"); |
260 | goto out; | 259 | goto out; |
261 | } | 260 | } |
262 | 261 | ||
263 | result = 0; | 262 | result = 0; |
264 | out: | 263 | out: |
265 | if (result == 0) | 264 | if (result == 0) |
266 | *rimage = image; | 265 | *rimage = image; |
267 | else | 266 | else |
268 | kfree(image); | 267 | kfree(image); |
269 | 268 | ||
270 | return result; | 269 | return result; |
271 | } | 270 | } |
272 | 271 | ||
273 | static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, | 272 | static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, |
274 | unsigned long nr_segments, | 273 | unsigned long nr_segments, |
275 | struct kexec_segment __user *segments) | 274 | struct kexec_segment __user *segments) |
276 | { | 275 | { |
277 | int result; | 276 | int result; |
278 | struct kimage *image; | 277 | struct kimage *image; |
279 | unsigned long i; | 278 | unsigned long i; |
280 | 279 | ||
281 | image = NULL; | 280 | image = NULL; |
282 | /* Verify we have a valid entry point */ | 281 | /* Verify we have a valid entry point */ |
283 | if ((entry < crashk_res.start) || (entry > crashk_res.end)) { | 282 | if ((entry < crashk_res.start) || (entry > crashk_res.end)) { |
284 | result = -EADDRNOTAVAIL; | 283 | result = -EADDRNOTAVAIL; |
285 | goto out; | 284 | goto out; |
286 | } | 285 | } |
287 | 286 | ||
288 | /* Allocate and initialize a controlling structure */ | 287 | /* Allocate and initialize a controlling structure */ |
289 | result = do_kimage_alloc(&image, entry, nr_segments, segments); | 288 | result = do_kimage_alloc(&image, entry, nr_segments, segments); |
290 | if (result) | 289 | if (result) |
291 | goto out; | 290 | goto out; |
292 | 291 | ||
293 | /* Enable the special crash kernel control page | 292 | /* Enable the special crash kernel control page |
294 | * allocation policy. | 293 | * allocation policy. |
295 | */ | 294 | */ |
296 | image->control_page = crashk_res.start; | 295 | image->control_page = crashk_res.start; |
297 | image->type = KEXEC_TYPE_CRASH; | 296 | image->type = KEXEC_TYPE_CRASH; |
298 | 297 | ||
299 | /* | 298 | /* |
300 | * Verify we have good destination addresses. Normally | 299 | * Verify we have good destination addresses. Normally |
301 | * the caller is responsible for making certain we don't | 300 | * the caller is responsible for making certain we don't |
302 | * attempt to load the new image into invalid or reserved | 301 | * attempt to load the new image into invalid or reserved |
303 | * areas of RAM. But crash kernels are preloaded into a | 302 | * areas of RAM. But crash kernels are preloaded into a |
304 | * reserved area of ram. We must ensure the addresses | 303 | * reserved area of ram. We must ensure the addresses |
305 | * are in the reserved area otherwise preloading the | 304 | * are in the reserved area otherwise preloading the |
306 | * kernel could corrupt things. | 305 | * kernel could corrupt things. |
307 | */ | 306 | */ |
308 | result = -EADDRNOTAVAIL; | 307 | result = -EADDRNOTAVAIL; |
309 | for (i = 0; i < nr_segments; i++) { | 308 | for (i = 0; i < nr_segments; i++) { |
310 | unsigned long mstart, mend; | 309 | unsigned long mstart, mend; |
311 | 310 | ||
312 | mstart = image->segment[i].mem; | 311 | mstart = image->segment[i].mem; |
313 | mend = mstart + image->segment[i].memsz - 1; | 312 | mend = mstart + image->segment[i].memsz - 1; |
314 | /* Ensure we are within the crash kernel limits */ | 313 | /* Ensure we are within the crash kernel limits */ |
315 | if ((mstart < crashk_res.start) || (mend > crashk_res.end)) | 314 | if ((mstart < crashk_res.start) || (mend > crashk_res.end)) |
316 | goto out; | 315 | goto out; |
317 | } | 316 | } |
318 | 317 | ||
319 | /* | 318 | /* |
320 | * Find a location for the control code buffer, and add | 319 | * Find a location for the control code buffer, and add |
321 | * the vector of segments so that it's pages will also be | 320 | * the vector of segments so that it's pages will also be |
322 | * counted as destination pages. | 321 | * counted as destination pages. |
323 | */ | 322 | */ |
324 | result = -ENOMEM; | 323 | result = -ENOMEM; |
325 | image->control_code_page = kimage_alloc_control_pages(image, | 324 | image->control_code_page = kimage_alloc_control_pages(image, |
326 | get_order(KEXEC_CONTROL_PAGE_SIZE)); | 325 | get_order(KEXEC_CONTROL_PAGE_SIZE)); |
327 | if (!image->control_code_page) { | 326 | if (!image->control_code_page) { |
328 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); | 327 | printk(KERN_ERR "Could not allocate control_code_buffer\n"); |
329 | goto out; | 328 | goto out; |
330 | } | 329 | } |
331 | 330 | ||
332 | result = 0; | 331 | result = 0; |
333 | out: | 332 | out: |
334 | if (result == 0) | 333 | if (result == 0) |
335 | *rimage = image; | 334 | *rimage = image; |
336 | else | 335 | else |
337 | kfree(image); | 336 | kfree(image); |
338 | 337 | ||
339 | return result; | 338 | return result; |
340 | } | 339 | } |
341 | 340 | ||
342 | static int kimage_is_destination_range(struct kimage *image, | 341 | static int kimage_is_destination_range(struct kimage *image, |
343 | unsigned long start, | 342 | unsigned long start, |
344 | unsigned long end) | 343 | unsigned long end) |
345 | { | 344 | { |
346 | unsigned long i; | 345 | unsigned long i; |
347 | 346 | ||
348 | for (i = 0; i < image->nr_segments; i++) { | 347 | for (i = 0; i < image->nr_segments; i++) { |
349 | unsigned long mstart, mend; | 348 | unsigned long mstart, mend; |
350 | 349 | ||
351 | mstart = image->segment[i].mem; | 350 | mstart = image->segment[i].mem; |
352 | mend = mstart + image->segment[i].memsz; | 351 | mend = mstart + image->segment[i].memsz; |
353 | if ((end > mstart) && (start < mend)) | 352 | if ((end > mstart) && (start < mend)) |
354 | return 1; | 353 | return 1; |
355 | } | 354 | } |
356 | 355 | ||
357 | return 0; | 356 | return 0; |
358 | } | 357 | } |
359 | 358 | ||
360 | static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | 359 | static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) |
361 | { | 360 | { |
362 | struct page *pages; | 361 | struct page *pages; |
363 | 362 | ||
364 | pages = alloc_pages(gfp_mask, order); | 363 | pages = alloc_pages(gfp_mask, order); |
365 | if (pages) { | 364 | if (pages) { |
366 | unsigned int count, i; | 365 | unsigned int count, i; |
367 | pages->mapping = NULL; | 366 | pages->mapping = NULL; |
368 | set_page_private(pages, order); | 367 | set_page_private(pages, order); |
369 | count = 1 << order; | 368 | count = 1 << order; |
370 | for (i = 0; i < count; i++) | 369 | for (i = 0; i < count; i++) |
371 | SetPageReserved(pages + i); | 370 | SetPageReserved(pages + i); |
372 | } | 371 | } |
373 | 372 | ||
374 | return pages; | 373 | return pages; |
375 | } | 374 | } |
376 | 375 | ||
377 | static void kimage_free_pages(struct page *page) | 376 | static void kimage_free_pages(struct page *page) |
378 | { | 377 | { |
379 | unsigned int order, count, i; | 378 | unsigned int order, count, i; |
380 | 379 | ||
381 | order = page_private(page); | 380 | order = page_private(page); |
382 | count = 1 << order; | 381 | count = 1 << order; |
383 | for (i = 0; i < count; i++) | 382 | for (i = 0; i < count; i++) |
384 | ClearPageReserved(page + i); | 383 | ClearPageReserved(page + i); |
385 | __free_pages(page, order); | 384 | __free_pages(page, order); |
386 | } | 385 | } |
387 | 386 | ||
388 | static void kimage_free_page_list(struct list_head *list) | 387 | static void kimage_free_page_list(struct list_head *list) |
389 | { | 388 | { |
390 | struct list_head *pos, *next; | 389 | struct list_head *pos, *next; |
391 | 390 | ||
392 | list_for_each_safe(pos, next, list) { | 391 | list_for_each_safe(pos, next, list) { |
393 | struct page *page; | 392 | struct page *page; |
394 | 393 | ||
395 | page = list_entry(pos, struct page, lru); | 394 | page = list_entry(pos, struct page, lru); |
396 | list_del(&page->lru); | 395 | list_del(&page->lru); |
397 | kimage_free_pages(page); | 396 | kimage_free_pages(page); |
398 | } | 397 | } |
399 | } | 398 | } |
400 | 399 | ||
401 | static struct page *kimage_alloc_normal_control_pages(struct kimage *image, | 400 | static struct page *kimage_alloc_normal_control_pages(struct kimage *image, |
402 | unsigned int order) | 401 | unsigned int order) |
403 | { | 402 | { |
404 | /* Control pages are special, they are the intermediaries | 403 | /* Control pages are special, they are the intermediaries |
405 | * that are needed while we copy the rest of the pages | 404 | * that are needed while we copy the rest of the pages |
406 | * to their final resting place. As such they must | 405 | * to their final resting place. As such they must |
407 | * not conflict with either the destination addresses | 406 | * not conflict with either the destination addresses |
408 | * or memory the kernel is already using. | 407 | * or memory the kernel is already using. |
409 | * | 408 | * |
410 | * The only case where we really need more than one of | 409 | * The only case where we really need more than one of |
411 | * these are for architectures where we cannot disable | 410 | * these are for architectures where we cannot disable |
412 | * the MMU and must instead generate an identity mapped | 411 | * the MMU and must instead generate an identity mapped |
413 | * page table for all of the memory. | 412 | * page table for all of the memory. |
414 | * | 413 | * |
415 | * At worst this runs in O(N) of the image size. | 414 | * At worst this runs in O(N) of the image size. |
416 | */ | 415 | */ |
417 | struct list_head extra_pages; | 416 | struct list_head extra_pages; |
418 | struct page *pages; | 417 | struct page *pages; |
419 | unsigned int count; | 418 | unsigned int count; |
420 | 419 | ||
421 | count = 1 << order; | 420 | count = 1 << order; |
422 | INIT_LIST_HEAD(&extra_pages); | 421 | INIT_LIST_HEAD(&extra_pages); |
423 | 422 | ||
424 | /* Loop while I can allocate a page and the page allocated | 423 | /* Loop while I can allocate a page and the page allocated |
425 | * is a destination page. | 424 | * is a destination page. |
426 | */ | 425 | */ |
427 | do { | 426 | do { |
428 | unsigned long pfn, epfn, addr, eaddr; | 427 | unsigned long pfn, epfn, addr, eaddr; |
429 | 428 | ||
430 | pages = kimage_alloc_pages(GFP_KERNEL, order); | 429 | pages = kimage_alloc_pages(GFP_KERNEL, order); |
431 | if (!pages) | 430 | if (!pages) |
432 | break; | 431 | break; |
433 | pfn = page_to_pfn(pages); | 432 | pfn = page_to_pfn(pages); |
434 | epfn = pfn + count; | 433 | epfn = pfn + count; |
435 | addr = pfn << PAGE_SHIFT; | 434 | addr = pfn << PAGE_SHIFT; |
436 | eaddr = epfn << PAGE_SHIFT; | 435 | eaddr = epfn << PAGE_SHIFT; |
437 | if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || | 436 | if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || |
438 | kimage_is_destination_range(image, addr, eaddr)) { | 437 | kimage_is_destination_range(image, addr, eaddr)) { |
439 | list_add(&pages->lru, &extra_pages); | 438 | list_add(&pages->lru, &extra_pages); |
440 | pages = NULL; | 439 | pages = NULL; |
441 | } | 440 | } |
442 | } while (!pages); | 441 | } while (!pages); |
443 | 442 | ||
444 | if (pages) { | 443 | if (pages) { |
445 | /* Remember the allocated page... */ | 444 | /* Remember the allocated page... */ |
446 | list_add(&pages->lru, &image->control_pages); | 445 | list_add(&pages->lru, &image->control_pages); |
447 | 446 | ||
448 | /* Because the page is already in it's destination | 447 | /* Because the page is already in it's destination |
449 | * location we will never allocate another page at | 448 | * location we will never allocate another page at |
450 | * that address. Therefore kimage_alloc_pages | 449 | * that address. Therefore kimage_alloc_pages |
451 | * will not return it (again) and we don't need | 450 | * will not return it (again) and we don't need |
452 | * to give it an entry in image->segment[]. | 451 | * to give it an entry in image->segment[]. |
453 | */ | 452 | */ |
454 | } | 453 | } |
455 | /* Deal with the destination pages I have inadvertently allocated. | 454 | /* Deal with the destination pages I have inadvertently allocated. |
456 | * | 455 | * |
457 | * Ideally I would convert multi-page allocations into single | 456 | * Ideally I would convert multi-page allocations into single |
458 | * page allocations, and add everything to image->dest_pages. | 457 | * page allocations, and add everything to image->dest_pages. |
459 | * | 458 | * |
460 | * For now it is simpler to just free the pages. | 459 | * For now it is simpler to just free the pages. |
461 | */ | 460 | */ |
462 | kimage_free_page_list(&extra_pages); | 461 | kimage_free_page_list(&extra_pages); |
463 | 462 | ||
464 | return pages; | 463 | return pages; |
465 | } | 464 | } |
466 | 465 | ||
467 | static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | 466 | static struct page *kimage_alloc_crash_control_pages(struct kimage *image, |
468 | unsigned int order) | 467 | unsigned int order) |
469 | { | 468 | { |
470 | /* Control pages are special, they are the intermediaries | 469 | /* Control pages are special, they are the intermediaries |
471 | * that are needed while we copy the rest of the pages | 470 | * that are needed while we copy the rest of the pages |
472 | * to their final resting place. As such they must | 471 | * to their final resting place. As such they must |
473 | * not conflict with either the destination addresses | 472 | * not conflict with either the destination addresses |
474 | * or memory the kernel is already using. | 473 | * or memory the kernel is already using. |
475 | * | 474 | * |
476 | * Control pages are also the only pags we must allocate | 475 | * Control pages are also the only pags we must allocate |
477 | * when loading a crash kernel. All of the other pages | 476 | * when loading a crash kernel. All of the other pages |
478 | * are specified by the segments and we just memcpy | 477 | * are specified by the segments and we just memcpy |
479 | * into them directly. | 478 | * into them directly. |
480 | * | 479 | * |
481 | * The only case where we really need more than one of | 480 | * The only case where we really need more than one of |
482 | * these are for architectures where we cannot disable | 481 | * these are for architectures where we cannot disable |
483 | * the MMU and must instead generate an identity mapped | 482 | * the MMU and must instead generate an identity mapped |
484 | * page table for all of the memory. | 483 | * page table for all of the memory. |
485 | * | 484 | * |
486 | * Given the low demand this implements a very simple | 485 | * Given the low demand this implements a very simple |
487 | * allocator that finds the first hole of the appropriate | 486 | * allocator that finds the first hole of the appropriate |
488 | * size in the reserved memory region, and allocates all | 487 | * size in the reserved memory region, and allocates all |
489 | * of the memory up to and including the hole. | 488 | * of the memory up to and including the hole. |
490 | */ | 489 | */ |
491 | unsigned long hole_start, hole_end, size; | 490 | unsigned long hole_start, hole_end, size; |
492 | struct page *pages; | 491 | struct page *pages; |
493 | 492 | ||
494 | pages = NULL; | 493 | pages = NULL; |
495 | size = (1 << order) << PAGE_SHIFT; | 494 | size = (1 << order) << PAGE_SHIFT; |
496 | hole_start = (image->control_page + (size - 1)) & ~(size - 1); | 495 | hole_start = (image->control_page + (size - 1)) & ~(size - 1); |
497 | hole_end = hole_start + size - 1; | 496 | hole_end = hole_start + size - 1; |
498 | while (hole_end <= crashk_res.end) { | 497 | while (hole_end <= crashk_res.end) { |
499 | unsigned long i; | 498 | unsigned long i; |
500 | 499 | ||
501 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) | 500 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) |
502 | break; | 501 | break; |
503 | if (hole_end > crashk_res.end) | 502 | if (hole_end > crashk_res.end) |
504 | break; | 503 | break; |
505 | /* See if I overlap any of the segments */ | 504 | /* See if I overlap any of the segments */ |
506 | for (i = 0; i < image->nr_segments; i++) { | 505 | for (i = 0; i < image->nr_segments; i++) { |
507 | unsigned long mstart, mend; | 506 | unsigned long mstart, mend; |
508 | 507 | ||
509 | mstart = image->segment[i].mem; | 508 | mstart = image->segment[i].mem; |
510 | mend = mstart + image->segment[i].memsz - 1; | 509 | mend = mstart + image->segment[i].memsz - 1; |
511 | if ((hole_end >= mstart) && (hole_start <= mend)) { | 510 | if ((hole_end >= mstart) && (hole_start <= mend)) { |
512 | /* Advance the hole to the end of the segment */ | 511 | /* Advance the hole to the end of the segment */ |
513 | hole_start = (mend + (size - 1)) & ~(size - 1); | 512 | hole_start = (mend + (size - 1)) & ~(size - 1); |
514 | hole_end = hole_start + size - 1; | 513 | hole_end = hole_start + size - 1; |
515 | break; | 514 | break; |
516 | } | 515 | } |
517 | } | 516 | } |
518 | /* If I don't overlap any segments I have found my hole! */ | 517 | /* If I don't overlap any segments I have found my hole! */ |
519 | if (i == image->nr_segments) { | 518 | if (i == image->nr_segments) { |
520 | pages = pfn_to_page(hole_start >> PAGE_SHIFT); | 519 | pages = pfn_to_page(hole_start >> PAGE_SHIFT); |
521 | break; | 520 | break; |
522 | } | 521 | } |
523 | } | 522 | } |
524 | if (pages) | 523 | if (pages) |
525 | image->control_page = hole_end; | 524 | image->control_page = hole_end; |
526 | 525 | ||
527 | return pages; | 526 | return pages; |
528 | } | 527 | } |
529 | 528 | ||
530 | 529 | ||
531 | struct page *kimage_alloc_control_pages(struct kimage *image, | 530 | struct page *kimage_alloc_control_pages(struct kimage *image, |
532 | unsigned int order) | 531 | unsigned int order) |
533 | { | 532 | { |
534 | struct page *pages = NULL; | 533 | struct page *pages = NULL; |
535 | 534 | ||
536 | switch (image->type) { | 535 | switch (image->type) { |
537 | case KEXEC_TYPE_DEFAULT: | 536 | case KEXEC_TYPE_DEFAULT: |
538 | pages = kimage_alloc_normal_control_pages(image, order); | 537 | pages = kimage_alloc_normal_control_pages(image, order); |
539 | break; | 538 | break; |
540 | case KEXEC_TYPE_CRASH: | 539 | case KEXEC_TYPE_CRASH: |
541 | pages = kimage_alloc_crash_control_pages(image, order); | 540 | pages = kimage_alloc_crash_control_pages(image, order); |
542 | break; | 541 | break; |
543 | } | 542 | } |
544 | 543 | ||
545 | return pages; | 544 | return pages; |
546 | } | 545 | } |
547 | 546 | ||
548 | static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) | 547 | static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) |
549 | { | 548 | { |
550 | if (*image->entry != 0) | 549 | if (*image->entry != 0) |
551 | image->entry++; | 550 | image->entry++; |
552 | 551 | ||
553 | if (image->entry == image->last_entry) { | 552 | if (image->entry == image->last_entry) { |
554 | kimage_entry_t *ind_page; | 553 | kimage_entry_t *ind_page; |
555 | struct page *page; | 554 | struct page *page; |
556 | 555 | ||
557 | page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); | 556 | page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); |
558 | if (!page) | 557 | if (!page) |
559 | return -ENOMEM; | 558 | return -ENOMEM; |
560 | 559 | ||
561 | ind_page = page_address(page); | 560 | ind_page = page_address(page); |
562 | *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; | 561 | *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; |
563 | image->entry = ind_page; | 562 | image->entry = ind_page; |
564 | image->last_entry = ind_page + | 563 | image->last_entry = ind_page + |
565 | ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); | 564 | ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); |
566 | } | 565 | } |
567 | *image->entry = entry; | 566 | *image->entry = entry; |
568 | image->entry++; | 567 | image->entry++; |
569 | *image->entry = 0; | 568 | *image->entry = 0; |
570 | 569 | ||
571 | return 0; | 570 | return 0; |
572 | } | 571 | } |
573 | 572 | ||
574 | static int kimage_set_destination(struct kimage *image, | 573 | static int kimage_set_destination(struct kimage *image, |
575 | unsigned long destination) | 574 | unsigned long destination) |
576 | { | 575 | { |
577 | int result; | 576 | int result; |
578 | 577 | ||
579 | destination &= PAGE_MASK; | 578 | destination &= PAGE_MASK; |
580 | result = kimage_add_entry(image, destination | IND_DESTINATION); | 579 | result = kimage_add_entry(image, destination | IND_DESTINATION); |
581 | if (result == 0) | 580 | if (result == 0) |
582 | image->destination = destination; | 581 | image->destination = destination; |
583 | 582 | ||
584 | return result; | 583 | return result; |
585 | } | 584 | } |
586 | 585 | ||
587 | 586 | ||
588 | static int kimage_add_page(struct kimage *image, unsigned long page) | 587 | static int kimage_add_page(struct kimage *image, unsigned long page) |
589 | { | 588 | { |
590 | int result; | 589 | int result; |
591 | 590 | ||
592 | page &= PAGE_MASK; | 591 | page &= PAGE_MASK; |
593 | result = kimage_add_entry(image, page | IND_SOURCE); | 592 | result = kimage_add_entry(image, page | IND_SOURCE); |
594 | if (result == 0) | 593 | if (result == 0) |
595 | image->destination += PAGE_SIZE; | 594 | image->destination += PAGE_SIZE; |
596 | 595 | ||
597 | return result; | 596 | return result; |
598 | } | 597 | } |
599 | 598 | ||
600 | 599 | ||
601 | static void kimage_free_extra_pages(struct kimage *image) | 600 | static void kimage_free_extra_pages(struct kimage *image) |
602 | { | 601 | { |
603 | /* Walk through and free any extra destination pages I may have */ | 602 | /* Walk through and free any extra destination pages I may have */ |
604 | kimage_free_page_list(&image->dest_pages); | 603 | kimage_free_page_list(&image->dest_pages); |
605 | 604 | ||
606 | /* Walk through and free any unusable pages I have cached */ | 605 | /* Walk through and free any unusable pages I have cached */ |
607 | kimage_free_page_list(&image->unuseable_pages); | 606 | kimage_free_page_list(&image->unuseable_pages); |
608 | 607 | ||
609 | } | 608 | } |
610 | static void kimage_terminate(struct kimage *image) | 609 | static void kimage_terminate(struct kimage *image) |
611 | { | 610 | { |
612 | if (*image->entry != 0) | 611 | if (*image->entry != 0) |
613 | image->entry++; | 612 | image->entry++; |
614 | 613 | ||
615 | *image->entry = IND_DONE; | 614 | *image->entry = IND_DONE; |
616 | } | 615 | } |
617 | 616 | ||
618 | #define for_each_kimage_entry(image, ptr, entry) \ | 617 | #define for_each_kimage_entry(image, ptr, entry) \ |
619 | for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ | 618 | for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ |
620 | ptr = (entry & IND_INDIRECTION)? \ | 619 | ptr = (entry & IND_INDIRECTION)? \ |
621 | phys_to_virt((entry & PAGE_MASK)): ptr +1) | 620 | phys_to_virt((entry & PAGE_MASK)): ptr +1) |
622 | 621 | ||
623 | static void kimage_free_entry(kimage_entry_t entry) | 622 | static void kimage_free_entry(kimage_entry_t entry) |
624 | { | 623 | { |
625 | struct page *page; | 624 | struct page *page; |
626 | 625 | ||
627 | page = pfn_to_page(entry >> PAGE_SHIFT); | 626 | page = pfn_to_page(entry >> PAGE_SHIFT); |
628 | kimage_free_pages(page); | 627 | kimage_free_pages(page); |
629 | } | 628 | } |
630 | 629 | ||
631 | static void kimage_free(struct kimage *image) | 630 | static void kimage_free(struct kimage *image) |
632 | { | 631 | { |
633 | kimage_entry_t *ptr, entry; | 632 | kimage_entry_t *ptr, entry; |
634 | kimage_entry_t ind = 0; | 633 | kimage_entry_t ind = 0; |
635 | 634 | ||
636 | if (!image) | 635 | if (!image) |
637 | return; | 636 | return; |
638 | 637 | ||
639 | kimage_free_extra_pages(image); | 638 | kimage_free_extra_pages(image); |
640 | for_each_kimage_entry(image, ptr, entry) { | 639 | for_each_kimage_entry(image, ptr, entry) { |
641 | if (entry & IND_INDIRECTION) { | 640 | if (entry & IND_INDIRECTION) { |
642 | /* Free the previous indirection page */ | 641 | /* Free the previous indirection page */ |
643 | if (ind & IND_INDIRECTION) | 642 | if (ind & IND_INDIRECTION) |
644 | kimage_free_entry(ind); | 643 | kimage_free_entry(ind); |
645 | /* Save this indirection page until we are | 644 | /* Save this indirection page until we are |
646 | * done with it. | 645 | * done with it. |
647 | */ | 646 | */ |
648 | ind = entry; | 647 | ind = entry; |
649 | } | 648 | } |
650 | else if (entry & IND_SOURCE) | 649 | else if (entry & IND_SOURCE) |
651 | kimage_free_entry(entry); | 650 | kimage_free_entry(entry); |
652 | } | 651 | } |
653 | /* Free the final indirection page */ | 652 | /* Free the final indirection page */ |
654 | if (ind & IND_INDIRECTION) | 653 | if (ind & IND_INDIRECTION) |
655 | kimage_free_entry(ind); | 654 | kimage_free_entry(ind); |
656 | 655 | ||
657 | /* Handle any machine specific cleanup */ | 656 | /* Handle any machine specific cleanup */ |
658 | machine_kexec_cleanup(image); | 657 | machine_kexec_cleanup(image); |
659 | 658 | ||
660 | /* Free the kexec control pages... */ | 659 | /* Free the kexec control pages... */ |
661 | kimage_free_page_list(&image->control_pages); | 660 | kimage_free_page_list(&image->control_pages); |
662 | kfree(image); | 661 | kfree(image); |
663 | } | 662 | } |
664 | 663 | ||
665 | static kimage_entry_t *kimage_dst_used(struct kimage *image, | 664 | static kimage_entry_t *kimage_dst_used(struct kimage *image, |
666 | unsigned long page) | 665 | unsigned long page) |
667 | { | 666 | { |
668 | kimage_entry_t *ptr, entry; | 667 | kimage_entry_t *ptr, entry; |
669 | unsigned long destination = 0; | 668 | unsigned long destination = 0; |
670 | 669 | ||
671 | for_each_kimage_entry(image, ptr, entry) { | 670 | for_each_kimage_entry(image, ptr, entry) { |
672 | if (entry & IND_DESTINATION) | 671 | if (entry & IND_DESTINATION) |
673 | destination = entry & PAGE_MASK; | 672 | destination = entry & PAGE_MASK; |
674 | else if (entry & IND_SOURCE) { | 673 | else if (entry & IND_SOURCE) { |
675 | if (page == destination) | 674 | if (page == destination) |
676 | return ptr; | 675 | return ptr; |
677 | destination += PAGE_SIZE; | 676 | destination += PAGE_SIZE; |
678 | } | 677 | } |
679 | } | 678 | } |
680 | 679 | ||
681 | return NULL; | 680 | return NULL; |
682 | } | 681 | } |
683 | 682 | ||
684 | static struct page *kimage_alloc_page(struct kimage *image, | 683 | static struct page *kimage_alloc_page(struct kimage *image, |
685 | gfp_t gfp_mask, | 684 | gfp_t gfp_mask, |
686 | unsigned long destination) | 685 | unsigned long destination) |
687 | { | 686 | { |
688 | /* | 687 | /* |
689 | * Here we implement safeguards to ensure that a source page | 688 | * Here we implement safeguards to ensure that a source page |
690 | * is not copied to its destination page before the data on | 689 | * is not copied to its destination page before the data on |
691 | * the destination page is no longer useful. | 690 | * the destination page is no longer useful. |
692 | * | 691 | * |
693 | * To do this we maintain the invariant that a source page is | 692 | * To do this we maintain the invariant that a source page is |
694 | * either its own destination page, or it is not a | 693 | * either its own destination page, or it is not a |
695 | * destination page at all. | 694 | * destination page at all. |
696 | * | 695 | * |
697 | * That is slightly stronger than required, but the proof | 696 | * That is slightly stronger than required, but the proof |
698 | * that no problems will not occur is trivial, and the | 697 | * that no problems will not occur is trivial, and the |
699 | * implementation is simply to verify. | 698 | * implementation is simply to verify. |
700 | * | 699 | * |
701 | * When allocating all pages normally this algorithm will run | 700 | * When allocating all pages normally this algorithm will run |
702 | * in O(N) time, but in the worst case it will run in O(N^2) | 701 | * in O(N) time, but in the worst case it will run in O(N^2) |
703 | * time. If the runtime is a problem the data structures can | 702 | * time. If the runtime is a problem the data structures can |
704 | * be fixed. | 703 | * be fixed. |
705 | */ | 704 | */ |
706 | struct page *page; | 705 | struct page *page; |
707 | unsigned long addr; | 706 | unsigned long addr; |
708 | 707 | ||
709 | /* | 708 | /* |
710 | * Walk through the list of destination pages, and see if I | 709 | * Walk through the list of destination pages, and see if I |
711 | * have a match. | 710 | * have a match. |
712 | */ | 711 | */ |
713 | list_for_each_entry(page, &image->dest_pages, lru) { | 712 | list_for_each_entry(page, &image->dest_pages, lru) { |
714 | addr = page_to_pfn(page) << PAGE_SHIFT; | 713 | addr = page_to_pfn(page) << PAGE_SHIFT; |
715 | if (addr == destination) { | 714 | if (addr == destination) { |
716 | list_del(&page->lru); | 715 | list_del(&page->lru); |
717 | return page; | 716 | return page; |
718 | } | 717 | } |
719 | } | 718 | } |
720 | page = NULL; | 719 | page = NULL; |
721 | while (1) { | 720 | while (1) { |
722 | kimage_entry_t *old; | 721 | kimage_entry_t *old; |
723 | 722 | ||
724 | /* Allocate a page, if we run out of memory give up */ | 723 | /* Allocate a page, if we run out of memory give up */ |
725 | page = kimage_alloc_pages(gfp_mask, 0); | 724 | page = kimage_alloc_pages(gfp_mask, 0); |
726 | if (!page) | 725 | if (!page) |
727 | return NULL; | 726 | return NULL; |
728 | /* If the page cannot be used file it away */ | 727 | /* If the page cannot be used file it away */ |
729 | if (page_to_pfn(page) > | 728 | if (page_to_pfn(page) > |
730 | (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { | 729 | (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { |
731 | list_add(&page->lru, &image->unuseable_pages); | 730 | list_add(&page->lru, &image->unuseable_pages); |
732 | continue; | 731 | continue; |
733 | } | 732 | } |
734 | addr = page_to_pfn(page) << PAGE_SHIFT; | 733 | addr = page_to_pfn(page) << PAGE_SHIFT; |
735 | 734 | ||
736 | /* If it is the destination page we want use it */ | 735 | /* If it is the destination page we want use it */ |
737 | if (addr == destination) | 736 | if (addr == destination) |
738 | break; | 737 | break; |
739 | 738 | ||
740 | /* If the page is not a destination page use it */ | 739 | /* If the page is not a destination page use it */ |
741 | if (!kimage_is_destination_range(image, addr, | 740 | if (!kimage_is_destination_range(image, addr, |
742 | addr + PAGE_SIZE)) | 741 | addr + PAGE_SIZE)) |
743 | break; | 742 | break; |
744 | 743 | ||
745 | /* | 744 | /* |
746 | * I know that the page is someones destination page. | 745 | * I know that the page is someones destination page. |
747 | * See if there is already a source page for this | 746 | * See if there is already a source page for this |
748 | * destination page. And if so swap the source pages. | 747 | * destination page. And if so swap the source pages. |
749 | */ | 748 | */ |
750 | old = kimage_dst_used(image, addr); | 749 | old = kimage_dst_used(image, addr); |
751 | if (old) { | 750 | if (old) { |
752 | /* If so move it */ | 751 | /* If so move it */ |
753 | unsigned long old_addr; | 752 | unsigned long old_addr; |
754 | struct page *old_page; | 753 | struct page *old_page; |
755 | 754 | ||
756 | old_addr = *old & PAGE_MASK; | 755 | old_addr = *old & PAGE_MASK; |
757 | old_page = pfn_to_page(old_addr >> PAGE_SHIFT); | 756 | old_page = pfn_to_page(old_addr >> PAGE_SHIFT); |
758 | copy_highpage(page, old_page); | 757 | copy_highpage(page, old_page); |
759 | *old = addr | (*old & ~PAGE_MASK); | 758 | *old = addr | (*old & ~PAGE_MASK); |
760 | 759 | ||
761 | /* The old page I have found cannot be a | 760 | /* The old page I have found cannot be a |
762 | * destination page, so return it if it's | 761 | * destination page, so return it if it's |
763 | * gfp_flags honor the ones passed in. | 762 | * gfp_flags honor the ones passed in. |
764 | */ | 763 | */ |
765 | if (!(gfp_mask & __GFP_HIGHMEM) && | 764 | if (!(gfp_mask & __GFP_HIGHMEM) && |
766 | PageHighMem(old_page)) { | 765 | PageHighMem(old_page)) { |
767 | kimage_free_pages(old_page); | 766 | kimage_free_pages(old_page); |
768 | continue; | 767 | continue; |
769 | } | 768 | } |
770 | addr = old_addr; | 769 | addr = old_addr; |
771 | page = old_page; | 770 | page = old_page; |
772 | break; | 771 | break; |
773 | } | 772 | } |
774 | else { | 773 | else { |
775 | /* Place the page on the destination list I | 774 | /* Place the page on the destination list I |
776 | * will use it later. | 775 | * will use it later. |
777 | */ | 776 | */ |
778 | list_add(&page->lru, &image->dest_pages); | 777 | list_add(&page->lru, &image->dest_pages); |
779 | } | 778 | } |
780 | } | 779 | } |
781 | 780 | ||
782 | return page; | 781 | return page; |
783 | } | 782 | } |
784 | 783 | ||
785 | static int kimage_load_normal_segment(struct kimage *image, | 784 | static int kimage_load_normal_segment(struct kimage *image, |
786 | struct kexec_segment *segment) | 785 | struct kexec_segment *segment) |
787 | { | 786 | { |
788 | unsigned long maddr; | 787 | unsigned long maddr; |
789 | unsigned long ubytes, mbytes; | 788 | unsigned long ubytes, mbytes; |
790 | int result; | 789 | int result; |
791 | unsigned char __user *buf; | 790 | unsigned char __user *buf; |
792 | 791 | ||
793 | result = 0; | 792 | result = 0; |
794 | buf = segment->buf; | 793 | buf = segment->buf; |
795 | ubytes = segment->bufsz; | 794 | ubytes = segment->bufsz; |
796 | mbytes = segment->memsz; | 795 | mbytes = segment->memsz; |
797 | maddr = segment->mem; | 796 | maddr = segment->mem; |
798 | 797 | ||
799 | result = kimage_set_destination(image, maddr); | 798 | result = kimage_set_destination(image, maddr); |
800 | if (result < 0) | 799 | if (result < 0) |
801 | goto out; | 800 | goto out; |
802 | 801 | ||
803 | while (mbytes) { | 802 | while (mbytes) { |
804 | struct page *page; | 803 | struct page *page; |
805 | char *ptr; | 804 | char *ptr; |
806 | size_t uchunk, mchunk; | 805 | size_t uchunk, mchunk; |
807 | 806 | ||
808 | page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); | 807 | page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); |
809 | if (!page) { | 808 | if (!page) { |
810 | result = -ENOMEM; | 809 | result = -ENOMEM; |
811 | goto out; | 810 | goto out; |
812 | } | 811 | } |
813 | result = kimage_add_page(image, page_to_pfn(page) | 812 | result = kimage_add_page(image, page_to_pfn(page) |
814 | << PAGE_SHIFT); | 813 | << PAGE_SHIFT); |
815 | if (result < 0) | 814 | if (result < 0) |
816 | goto out; | 815 | goto out; |
817 | 816 | ||
818 | ptr = kmap(page); | 817 | ptr = kmap(page); |
819 | /* Start with a clear page */ | 818 | /* Start with a clear page */ |
820 | clear_page(ptr); | 819 | clear_page(ptr); |
821 | ptr += maddr & ~PAGE_MASK; | 820 | ptr += maddr & ~PAGE_MASK; |
822 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); | 821 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); |
823 | if (mchunk > mbytes) | 822 | if (mchunk > mbytes) |
824 | mchunk = mbytes; | 823 | mchunk = mbytes; |
825 | 824 | ||
826 | uchunk = mchunk; | 825 | uchunk = mchunk; |
827 | if (uchunk > ubytes) | 826 | if (uchunk > ubytes) |
828 | uchunk = ubytes; | 827 | uchunk = ubytes; |
829 | 828 | ||
830 | result = copy_from_user(ptr, buf, uchunk); | 829 | result = copy_from_user(ptr, buf, uchunk); |
831 | kunmap(page); | 830 | kunmap(page); |
832 | if (result) { | 831 | if (result) { |
833 | result = -EFAULT; | 832 | result = -EFAULT; |
834 | goto out; | 833 | goto out; |
835 | } | 834 | } |
836 | ubytes -= uchunk; | 835 | ubytes -= uchunk; |
837 | maddr += mchunk; | 836 | maddr += mchunk; |
838 | buf += mchunk; | 837 | buf += mchunk; |
839 | mbytes -= mchunk; | 838 | mbytes -= mchunk; |
840 | } | 839 | } |
841 | out: | 840 | out: |
842 | return result; | 841 | return result; |
843 | } | 842 | } |
844 | 843 | ||
845 | static int kimage_load_crash_segment(struct kimage *image, | 844 | static int kimage_load_crash_segment(struct kimage *image, |
846 | struct kexec_segment *segment) | 845 | struct kexec_segment *segment) |
847 | { | 846 | { |
848 | /* For crash dumps kernels we simply copy the data from | 847 | /* For crash dumps kernels we simply copy the data from |
849 | * user space to it's destination. | 848 | * user space to it's destination. |
850 | * We do things a page at a time for the sake of kmap. | 849 | * We do things a page at a time for the sake of kmap. |
851 | */ | 850 | */ |
852 | unsigned long maddr; | 851 | unsigned long maddr; |
853 | unsigned long ubytes, mbytes; | 852 | unsigned long ubytes, mbytes; |
854 | int result; | 853 | int result; |
855 | unsigned char __user *buf; | 854 | unsigned char __user *buf; |
856 | 855 | ||
857 | result = 0; | 856 | result = 0; |
858 | buf = segment->buf; | 857 | buf = segment->buf; |
859 | ubytes = segment->bufsz; | 858 | ubytes = segment->bufsz; |
860 | mbytes = segment->memsz; | 859 | mbytes = segment->memsz; |
861 | maddr = segment->mem; | 860 | maddr = segment->mem; |
862 | while (mbytes) { | 861 | while (mbytes) { |
863 | struct page *page; | 862 | struct page *page; |
864 | char *ptr; | 863 | char *ptr; |
865 | size_t uchunk, mchunk; | 864 | size_t uchunk, mchunk; |
866 | 865 | ||
867 | page = pfn_to_page(maddr >> PAGE_SHIFT); | 866 | page = pfn_to_page(maddr >> PAGE_SHIFT); |
868 | if (!page) { | 867 | if (!page) { |
869 | result = -ENOMEM; | 868 | result = -ENOMEM; |
870 | goto out; | 869 | goto out; |
871 | } | 870 | } |
872 | ptr = kmap(page); | 871 | ptr = kmap(page); |
873 | ptr += maddr & ~PAGE_MASK; | 872 | ptr += maddr & ~PAGE_MASK; |
874 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); | 873 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); |
875 | if (mchunk > mbytes) | 874 | if (mchunk > mbytes) |
876 | mchunk = mbytes; | 875 | mchunk = mbytes; |
877 | 876 | ||
878 | uchunk = mchunk; | 877 | uchunk = mchunk; |
879 | if (uchunk > ubytes) { | 878 | if (uchunk > ubytes) { |
880 | uchunk = ubytes; | 879 | uchunk = ubytes; |
881 | /* Zero the trailing part of the page */ | 880 | /* Zero the trailing part of the page */ |
882 | memset(ptr + uchunk, 0, mchunk - uchunk); | 881 | memset(ptr + uchunk, 0, mchunk - uchunk); |
883 | } | 882 | } |
884 | result = copy_from_user(ptr, buf, uchunk); | 883 | result = copy_from_user(ptr, buf, uchunk); |
885 | kexec_flush_icache_page(page); | 884 | kexec_flush_icache_page(page); |
886 | kunmap(page); | 885 | kunmap(page); |
887 | if (result) { | 886 | if (result) { |
888 | result = -EFAULT; | 887 | result = -EFAULT; |
889 | goto out; | 888 | goto out; |
890 | } | 889 | } |
891 | ubytes -= uchunk; | 890 | ubytes -= uchunk; |
892 | maddr += mchunk; | 891 | maddr += mchunk; |
893 | buf += mchunk; | 892 | buf += mchunk; |
894 | mbytes -= mchunk; | 893 | mbytes -= mchunk; |
895 | } | 894 | } |
896 | out: | 895 | out: |
897 | return result; | 896 | return result; |
898 | } | 897 | } |
899 | 898 | ||
900 | static int kimage_load_segment(struct kimage *image, | 899 | static int kimage_load_segment(struct kimage *image, |
901 | struct kexec_segment *segment) | 900 | struct kexec_segment *segment) |
902 | { | 901 | { |
903 | int result = -ENOMEM; | 902 | int result = -ENOMEM; |
904 | 903 | ||
905 | switch (image->type) { | 904 | switch (image->type) { |
906 | case KEXEC_TYPE_DEFAULT: | 905 | case KEXEC_TYPE_DEFAULT: |
907 | result = kimage_load_normal_segment(image, segment); | 906 | result = kimage_load_normal_segment(image, segment); |
908 | break; | 907 | break; |
909 | case KEXEC_TYPE_CRASH: | 908 | case KEXEC_TYPE_CRASH: |
910 | result = kimage_load_crash_segment(image, segment); | 909 | result = kimage_load_crash_segment(image, segment); |
911 | break; | 910 | break; |
912 | } | 911 | } |
913 | 912 | ||
914 | return result; | 913 | return result; |
915 | } | 914 | } |
916 | 915 | ||
917 | /* | 916 | /* |
918 | * Exec Kernel system call: for obvious reasons only root may call it. | 917 | * Exec Kernel system call: for obvious reasons only root may call it. |
919 | * | 918 | * |
920 | * This call breaks up into three pieces. | 919 | * This call breaks up into three pieces. |
921 | * - A generic part which loads the new kernel from the current | 920 | * - A generic part which loads the new kernel from the current |
922 | * address space, and very carefully places the data in the | 921 | * address space, and very carefully places the data in the |
923 | * allocated pages. | 922 | * allocated pages. |
924 | * | 923 | * |
925 | * - A generic part that interacts with the kernel and tells all of | 924 | * - A generic part that interacts with the kernel and tells all of |
926 | * the devices to shut down. Preventing on-going dmas, and placing | 925 | * the devices to shut down. Preventing on-going dmas, and placing |
927 | * the devices in a consistent state so a later kernel can | 926 | * the devices in a consistent state so a later kernel can |
928 | * reinitialize them. | 927 | * reinitialize them. |
929 | * | 928 | * |
930 | * - A machine specific part that includes the syscall number | 929 | * - A machine specific part that includes the syscall number |
931 | * and the copies the image to it's final destination. And | 930 | * and the copies the image to it's final destination. And |
932 | * jumps into the image at entry. | 931 | * jumps into the image at entry. |
933 | * | 932 | * |
934 | * kexec does not sync, or unmount filesystems so if you need | 933 | * kexec does not sync, or unmount filesystems so if you need |
935 | * that to happen you need to do that yourself. | 934 | * that to happen you need to do that yourself. |
936 | */ | 935 | */ |
937 | struct kimage *kexec_image; | 936 | struct kimage *kexec_image; |
938 | struct kimage *kexec_crash_image; | 937 | struct kimage *kexec_crash_image; |
939 | 938 | ||
940 | static DEFINE_MUTEX(kexec_mutex); | 939 | static DEFINE_MUTEX(kexec_mutex); |
941 | 940 | ||
942 | SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, | 941 | SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, |
943 | struct kexec_segment __user *, segments, unsigned long, flags) | 942 | struct kexec_segment __user *, segments, unsigned long, flags) |
944 | { | 943 | { |
945 | struct kimage **dest_image, *image; | 944 | struct kimage **dest_image, *image; |
946 | int result; | 945 | int result; |
947 | 946 | ||
948 | /* We only trust the superuser with rebooting the system. */ | 947 | /* We only trust the superuser with rebooting the system. */ |
949 | if (!capable(CAP_SYS_BOOT)) | 948 | if (!capable(CAP_SYS_BOOT)) |
950 | return -EPERM; | 949 | return -EPERM; |
951 | 950 | ||
952 | /* | 951 | /* |
953 | * Verify we have a legal set of flags | 952 | * Verify we have a legal set of flags |
954 | * This leaves us room for future extensions. | 953 | * This leaves us room for future extensions. |
955 | */ | 954 | */ |
956 | if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) | 955 | if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) |
957 | return -EINVAL; | 956 | return -EINVAL; |
958 | 957 | ||
959 | /* Verify we are on the appropriate architecture */ | 958 | /* Verify we are on the appropriate architecture */ |
960 | if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && | 959 | if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && |
961 | ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) | 960 | ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) |
962 | return -EINVAL; | 961 | return -EINVAL; |
963 | 962 | ||
964 | /* Put an artificial cap on the number | 963 | /* Put an artificial cap on the number |
965 | * of segments passed to kexec_load. | 964 | * of segments passed to kexec_load. |
966 | */ | 965 | */ |
967 | if (nr_segments > KEXEC_SEGMENT_MAX) | 966 | if (nr_segments > KEXEC_SEGMENT_MAX) |
968 | return -EINVAL; | 967 | return -EINVAL; |
969 | 968 | ||
970 | image = NULL; | 969 | image = NULL; |
971 | result = 0; | 970 | result = 0; |
972 | 971 | ||
973 | /* Because we write directly to the reserved memory | 972 | /* Because we write directly to the reserved memory |
974 | * region when loading crash kernels we need a mutex here to | 973 | * region when loading crash kernels we need a mutex here to |
975 | * prevent multiple crash kernels from attempting to load | 974 | * prevent multiple crash kernels from attempting to load |
976 | * simultaneously, and to prevent a crash kernel from loading | 975 | * simultaneously, and to prevent a crash kernel from loading |
977 | * over the top of a in use crash kernel. | 976 | * over the top of a in use crash kernel. |
978 | * | 977 | * |
979 | * KISS: always take the mutex. | 978 | * KISS: always take the mutex. |
980 | */ | 979 | */ |
981 | if (!mutex_trylock(&kexec_mutex)) | 980 | if (!mutex_trylock(&kexec_mutex)) |
982 | return -EBUSY; | 981 | return -EBUSY; |
983 | 982 | ||
984 | dest_image = &kexec_image; | 983 | dest_image = &kexec_image; |
985 | if (flags & KEXEC_ON_CRASH) | 984 | if (flags & KEXEC_ON_CRASH) |
986 | dest_image = &kexec_crash_image; | 985 | dest_image = &kexec_crash_image; |
987 | if (nr_segments > 0) { | 986 | if (nr_segments > 0) { |
988 | unsigned long i; | 987 | unsigned long i; |
989 | 988 | ||
990 | /* Loading another kernel to reboot into */ | 989 | /* Loading another kernel to reboot into */ |
991 | if ((flags & KEXEC_ON_CRASH) == 0) | 990 | if ((flags & KEXEC_ON_CRASH) == 0) |
992 | result = kimage_normal_alloc(&image, entry, | 991 | result = kimage_normal_alloc(&image, entry, |
993 | nr_segments, segments); | 992 | nr_segments, segments); |
994 | /* Loading another kernel to switch to if this one crashes */ | 993 | /* Loading another kernel to switch to if this one crashes */ |
995 | else if (flags & KEXEC_ON_CRASH) { | 994 | else if (flags & KEXEC_ON_CRASH) { |
996 | /* Free any current crash dump kernel before | 995 | /* Free any current crash dump kernel before |
997 | * we corrupt it. | 996 | * we corrupt it. |
998 | */ | 997 | */ |
999 | kimage_free(xchg(&kexec_crash_image, NULL)); | 998 | kimage_free(xchg(&kexec_crash_image, NULL)); |
1000 | result = kimage_crash_alloc(&image, entry, | 999 | result = kimage_crash_alloc(&image, entry, |
1001 | nr_segments, segments); | 1000 | nr_segments, segments); |
1002 | crash_map_reserved_pages(); | 1001 | crash_map_reserved_pages(); |
1003 | } | 1002 | } |
1004 | if (result) | 1003 | if (result) |
1005 | goto out; | 1004 | goto out; |
1006 | 1005 | ||
1007 | if (flags & KEXEC_PRESERVE_CONTEXT) | 1006 | if (flags & KEXEC_PRESERVE_CONTEXT) |
1008 | image->preserve_context = 1; | 1007 | image->preserve_context = 1; |
1009 | result = machine_kexec_prepare(image); | 1008 | result = machine_kexec_prepare(image); |
1010 | if (result) | 1009 | if (result) |
1011 | goto out; | 1010 | goto out; |
1012 | 1011 | ||
1013 | for (i = 0; i < nr_segments; i++) { | 1012 | for (i = 0; i < nr_segments; i++) { |
1014 | result = kimage_load_segment(image, &image->segment[i]); | 1013 | result = kimage_load_segment(image, &image->segment[i]); |
1015 | if (result) | 1014 | if (result) |
1016 | goto out; | 1015 | goto out; |
1017 | } | 1016 | } |
1018 | kimage_terminate(image); | 1017 | kimage_terminate(image); |
1019 | if (flags & KEXEC_ON_CRASH) | 1018 | if (flags & KEXEC_ON_CRASH) |
1020 | crash_unmap_reserved_pages(); | 1019 | crash_unmap_reserved_pages(); |
1021 | } | 1020 | } |
1022 | /* Install the new kernel, and Uninstall the old */ | 1021 | /* Install the new kernel, and Uninstall the old */ |
1023 | image = xchg(dest_image, image); | 1022 | image = xchg(dest_image, image); |
1024 | 1023 | ||
1025 | out: | 1024 | out: |
1026 | mutex_unlock(&kexec_mutex); | 1025 | mutex_unlock(&kexec_mutex); |
1027 | kimage_free(image); | 1026 | kimage_free(image); |
1028 | 1027 | ||
1029 | return result; | 1028 | return result; |
1030 | } | 1029 | } |
1031 | 1030 | ||
1032 | /* | 1031 | /* |
1033 | * Add and remove page tables for crashkernel memory | 1032 | * Add and remove page tables for crashkernel memory |
1034 | * | 1033 | * |
1035 | * Provide an empty default implementation here -- architecture | 1034 | * Provide an empty default implementation here -- architecture |
1036 | * code may override this | 1035 | * code may override this |
1037 | */ | 1036 | */ |
1038 | void __weak crash_map_reserved_pages(void) | 1037 | void __weak crash_map_reserved_pages(void) |
1039 | {} | 1038 | {} |
1040 | 1039 | ||
1041 | void __weak crash_unmap_reserved_pages(void) | 1040 | void __weak crash_unmap_reserved_pages(void) |
1042 | {} | 1041 | {} |
1043 | 1042 | ||
1044 | #ifdef CONFIG_COMPAT | 1043 | #ifdef CONFIG_COMPAT |
1045 | asmlinkage long compat_sys_kexec_load(unsigned long entry, | 1044 | asmlinkage long compat_sys_kexec_load(unsigned long entry, |
1046 | unsigned long nr_segments, | 1045 | unsigned long nr_segments, |
1047 | struct compat_kexec_segment __user *segments, | 1046 | struct compat_kexec_segment __user *segments, |
1048 | unsigned long flags) | 1047 | unsigned long flags) |
1049 | { | 1048 | { |
1050 | struct compat_kexec_segment in; | 1049 | struct compat_kexec_segment in; |
1051 | struct kexec_segment out, __user *ksegments; | 1050 | struct kexec_segment out, __user *ksegments; |
1052 | unsigned long i, result; | 1051 | unsigned long i, result; |
1053 | 1052 | ||
1054 | /* Don't allow clients that don't understand the native | 1053 | /* Don't allow clients that don't understand the native |
1055 | * architecture to do anything. | 1054 | * architecture to do anything. |
1056 | */ | 1055 | */ |
1057 | if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) | 1056 | if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) |
1058 | return -EINVAL; | 1057 | return -EINVAL; |
1059 | 1058 | ||
1060 | if (nr_segments > KEXEC_SEGMENT_MAX) | 1059 | if (nr_segments > KEXEC_SEGMENT_MAX) |
1061 | return -EINVAL; | 1060 | return -EINVAL; |
1062 | 1061 | ||
1063 | ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); | 1062 | ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); |
1064 | for (i=0; i < nr_segments; i++) { | 1063 | for (i=0; i < nr_segments; i++) { |
1065 | result = copy_from_user(&in, &segments[i], sizeof(in)); | 1064 | result = copy_from_user(&in, &segments[i], sizeof(in)); |
1066 | if (result) | 1065 | if (result) |
1067 | return -EFAULT; | 1066 | return -EFAULT; |
1068 | 1067 | ||
1069 | out.buf = compat_ptr(in.buf); | 1068 | out.buf = compat_ptr(in.buf); |
1070 | out.bufsz = in.bufsz; | 1069 | out.bufsz = in.bufsz; |
1071 | out.mem = in.mem; | 1070 | out.mem = in.mem; |
1072 | out.memsz = in.memsz; | 1071 | out.memsz = in.memsz; |
1073 | 1072 | ||
1074 | result = copy_to_user(&ksegments[i], &out, sizeof(out)); | 1073 | result = copy_to_user(&ksegments[i], &out, sizeof(out)); |
1075 | if (result) | 1074 | if (result) |
1076 | return -EFAULT; | 1075 | return -EFAULT; |
1077 | } | 1076 | } |
1078 | 1077 | ||
1079 | return sys_kexec_load(entry, nr_segments, ksegments, flags); | 1078 | return sys_kexec_load(entry, nr_segments, ksegments, flags); |
1080 | } | 1079 | } |
1081 | #endif | 1080 | #endif |
1082 | 1081 | ||
1083 | void crash_kexec(struct pt_regs *regs) | 1082 | void crash_kexec(struct pt_regs *regs) |
1084 | { | 1083 | { |
1085 | /* Take the kexec_mutex here to prevent sys_kexec_load | 1084 | /* Take the kexec_mutex here to prevent sys_kexec_load |
1086 | * running on one cpu from replacing the crash kernel | 1085 | * running on one cpu from replacing the crash kernel |
1087 | * we are using after a panic on a different cpu. | 1086 | * we are using after a panic on a different cpu. |
1088 | * | 1087 | * |
1089 | * If the crash kernel was not located in a fixed area | 1088 | * If the crash kernel was not located in a fixed area |
1090 | * of memory the xchg(&kexec_crash_image) would be | 1089 | * of memory the xchg(&kexec_crash_image) would be |
1091 | * sufficient. But since I reuse the memory... | 1090 | * sufficient. But since I reuse the memory... |
1092 | */ | 1091 | */ |
1093 | if (mutex_trylock(&kexec_mutex)) { | 1092 | if (mutex_trylock(&kexec_mutex)) { |
1094 | if (kexec_crash_image) { | 1093 | if (kexec_crash_image) { |
1095 | struct pt_regs fixed_regs; | 1094 | struct pt_regs fixed_regs; |
1096 | |||
1097 | kmsg_dump(KMSG_DUMP_KEXEC); | ||
1098 | 1095 | ||
1099 | crash_setup_regs(&fixed_regs, regs); | 1096 | crash_setup_regs(&fixed_regs, regs); |
1100 | crash_save_vmcoreinfo(); | 1097 | crash_save_vmcoreinfo(); |
1101 | machine_crash_shutdown(&fixed_regs); | 1098 | machine_crash_shutdown(&fixed_regs); |
1102 | machine_kexec(kexec_crash_image); | 1099 | machine_kexec(kexec_crash_image); |
1103 | } | 1100 | } |
1104 | mutex_unlock(&kexec_mutex); | 1101 | mutex_unlock(&kexec_mutex); |
1105 | } | 1102 | } |
1106 | } | 1103 | } |
1107 | 1104 | ||
1108 | size_t crash_get_memory_size(void) | 1105 | size_t crash_get_memory_size(void) |
1109 | { | 1106 | { |
1110 | size_t size = 0; | 1107 | size_t size = 0; |
1111 | mutex_lock(&kexec_mutex); | 1108 | mutex_lock(&kexec_mutex); |
1112 | if (crashk_res.end != crashk_res.start) | 1109 | if (crashk_res.end != crashk_res.start) |
1113 | size = resource_size(&crashk_res); | 1110 | size = resource_size(&crashk_res); |
1114 | mutex_unlock(&kexec_mutex); | 1111 | mutex_unlock(&kexec_mutex); |
1115 | return size; | 1112 | return size; |
1116 | } | 1113 | } |
1117 | 1114 | ||
1118 | void __weak crash_free_reserved_phys_range(unsigned long begin, | 1115 | void __weak crash_free_reserved_phys_range(unsigned long begin, |
1119 | unsigned long end) | 1116 | unsigned long end) |
1120 | { | 1117 | { |
1121 | unsigned long addr; | 1118 | unsigned long addr; |
1122 | 1119 | ||
1123 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 1120 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
1124 | ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); | 1121 | ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); |
1125 | init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); | 1122 | init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); |
1126 | free_page((unsigned long)__va(addr)); | 1123 | free_page((unsigned long)__va(addr)); |
1127 | totalram_pages++; | 1124 | totalram_pages++; |
1128 | } | 1125 | } |
1129 | } | 1126 | } |
1130 | 1127 | ||
1131 | int crash_shrink_memory(unsigned long new_size) | 1128 | int crash_shrink_memory(unsigned long new_size) |
1132 | { | 1129 | { |
1133 | int ret = 0; | 1130 | int ret = 0; |
1134 | unsigned long start, end; | 1131 | unsigned long start, end; |
1135 | 1132 | ||
1136 | mutex_lock(&kexec_mutex); | 1133 | mutex_lock(&kexec_mutex); |
1137 | 1134 | ||
1138 | if (kexec_crash_image) { | 1135 | if (kexec_crash_image) { |
1139 | ret = -ENOENT; | 1136 | ret = -ENOENT; |
1140 | goto unlock; | 1137 | goto unlock; |
1141 | } | 1138 | } |
1142 | start = crashk_res.start; | 1139 | start = crashk_res.start; |
1143 | end = crashk_res.end; | 1140 | end = crashk_res.end; |
1144 | 1141 | ||
1145 | if (new_size >= end - start + 1) { | 1142 | if (new_size >= end - start + 1) { |
1146 | ret = -EINVAL; | 1143 | ret = -EINVAL; |
1147 | if (new_size == end - start + 1) | 1144 | if (new_size == end - start + 1) |
1148 | ret = 0; | 1145 | ret = 0; |
1149 | goto unlock; | 1146 | goto unlock; |
1150 | } | 1147 | } |
1151 | 1148 | ||
1152 | start = roundup(start, KEXEC_CRASH_MEM_ALIGN); | 1149 | start = roundup(start, KEXEC_CRASH_MEM_ALIGN); |
1153 | end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); | 1150 | end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); |
1154 | 1151 | ||
1155 | crash_map_reserved_pages(); | 1152 | crash_map_reserved_pages(); |
1156 | crash_free_reserved_phys_range(end, crashk_res.end); | 1153 | crash_free_reserved_phys_range(end, crashk_res.end); |
1157 | 1154 | ||
1158 | if ((start == end) && (crashk_res.parent != NULL)) | 1155 | if ((start == end) && (crashk_res.parent != NULL)) |
1159 | release_resource(&crashk_res); | 1156 | release_resource(&crashk_res); |
1160 | crashk_res.end = end - 1; | 1157 | crashk_res.end = end - 1; |
1161 | crash_unmap_reserved_pages(); | 1158 | crash_unmap_reserved_pages(); |
1162 | 1159 | ||
1163 | unlock: | 1160 | unlock: |
1164 | mutex_unlock(&kexec_mutex); | 1161 | mutex_unlock(&kexec_mutex); |
1165 | return ret; | 1162 | return ret; |
1166 | } | 1163 | } |
1167 | 1164 | ||
1168 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, | 1165 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, |
1169 | size_t data_len) | 1166 | size_t data_len) |
1170 | { | 1167 | { |
1171 | struct elf_note note; | 1168 | struct elf_note note; |
1172 | 1169 | ||
1173 | note.n_namesz = strlen(name) + 1; | 1170 | note.n_namesz = strlen(name) + 1; |
1174 | note.n_descsz = data_len; | 1171 | note.n_descsz = data_len; |
1175 | note.n_type = type; | 1172 | note.n_type = type; |
1176 | memcpy(buf, ¬e, sizeof(note)); | 1173 | memcpy(buf, ¬e, sizeof(note)); |
1177 | buf += (sizeof(note) + 3)/4; | 1174 | buf += (sizeof(note) + 3)/4; |
1178 | memcpy(buf, name, note.n_namesz); | 1175 | memcpy(buf, name, note.n_namesz); |
1179 | buf += (note.n_namesz + 3)/4; | 1176 | buf += (note.n_namesz + 3)/4; |
1180 | memcpy(buf, data, note.n_descsz); | 1177 | memcpy(buf, data, note.n_descsz); |
1181 | buf += (note.n_descsz + 3)/4; | 1178 | buf += (note.n_descsz + 3)/4; |
1182 | 1179 | ||
1183 | return buf; | 1180 | return buf; |
1184 | } | 1181 | } |
1185 | 1182 | ||
1186 | static void final_note(u32 *buf) | 1183 | static void final_note(u32 *buf) |
1187 | { | 1184 | { |
1188 | struct elf_note note; | 1185 | struct elf_note note; |
1189 | 1186 | ||
1190 | note.n_namesz = 0; | 1187 | note.n_namesz = 0; |
1191 | note.n_descsz = 0; | 1188 | note.n_descsz = 0; |
1192 | note.n_type = 0; | 1189 | note.n_type = 0; |
1193 | memcpy(buf, ¬e, sizeof(note)); | 1190 | memcpy(buf, ¬e, sizeof(note)); |
1194 | } | 1191 | } |
1195 | 1192 | ||
1196 | void crash_save_cpu(struct pt_regs *regs, int cpu) | 1193 | void crash_save_cpu(struct pt_regs *regs, int cpu) |
1197 | { | 1194 | { |
1198 | struct elf_prstatus prstatus; | 1195 | struct elf_prstatus prstatus; |
1199 | u32 *buf; | 1196 | u32 *buf; |
1200 | 1197 | ||
1201 | if ((cpu < 0) || (cpu >= nr_cpu_ids)) | 1198 | if ((cpu < 0) || (cpu >= nr_cpu_ids)) |
1202 | return; | 1199 | return; |
1203 | 1200 | ||
1204 | /* Using ELF notes here is opportunistic. | 1201 | /* Using ELF notes here is opportunistic. |
1205 | * I need a well defined structure format | 1202 | * I need a well defined structure format |
1206 | * for the data I pass, and I need tags | 1203 | * for the data I pass, and I need tags |
1207 | * on the data to indicate what information I have | 1204 | * on the data to indicate what information I have |
1208 | * squirrelled away. ELF notes happen to provide | 1205 | * squirrelled away. ELF notes happen to provide |
1209 | * all of that, so there is no need to invent something new. | 1206 | * all of that, so there is no need to invent something new. |
1210 | */ | 1207 | */ |
1211 | buf = (u32*)per_cpu_ptr(crash_notes, cpu); | 1208 | buf = (u32*)per_cpu_ptr(crash_notes, cpu); |
1212 | if (!buf) | 1209 | if (!buf) |
1213 | return; | 1210 | return; |
1214 | memset(&prstatus, 0, sizeof(prstatus)); | 1211 | memset(&prstatus, 0, sizeof(prstatus)); |
1215 | prstatus.pr_pid = current->pid; | 1212 | prstatus.pr_pid = current->pid; |
1216 | elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); | 1213 | elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); |
1217 | buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, | 1214 | buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, |
1218 | &prstatus, sizeof(prstatus)); | 1215 | &prstatus, sizeof(prstatus)); |
1219 | final_note(buf); | 1216 | final_note(buf); |
1220 | } | 1217 | } |
1221 | 1218 | ||
1222 | static int __init crash_notes_memory_init(void) | 1219 | static int __init crash_notes_memory_init(void) |
1223 | { | 1220 | { |
1224 | /* Allocate memory for saving cpu registers. */ | 1221 | /* Allocate memory for saving cpu registers. */ |
1225 | crash_notes = alloc_percpu(note_buf_t); | 1222 | crash_notes = alloc_percpu(note_buf_t); |
1226 | if (!crash_notes) { | 1223 | if (!crash_notes) { |
1227 | printk("Kexec: Memory allocation for saving cpu register" | 1224 | printk("Kexec: Memory allocation for saving cpu register" |
1228 | " states failed\n"); | 1225 | " states failed\n"); |
1229 | return -ENOMEM; | 1226 | return -ENOMEM; |
1230 | } | 1227 | } |
1231 | return 0; | 1228 | return 0; |
1232 | } | 1229 | } |
1233 | module_init(crash_notes_memory_init) | 1230 | module_init(crash_notes_memory_init) |
1234 | 1231 | ||
1235 | 1232 | ||
1236 | /* | 1233 | /* |
1237 | * parsing the "crashkernel" commandline | 1234 | * parsing the "crashkernel" commandline |
1238 | * | 1235 | * |
1239 | * this code is intended to be called from architecture specific code | 1236 | * this code is intended to be called from architecture specific code |
1240 | */ | 1237 | */ |
1241 | 1238 | ||
1242 | 1239 | ||
1243 | /* | 1240 | /* |
1244 | * This function parses command lines in the format | 1241 | * This function parses command lines in the format |
1245 | * | 1242 | * |
1246 | * crashkernel=ramsize-range:size[,...][@offset] | 1243 | * crashkernel=ramsize-range:size[,...][@offset] |
1247 | * | 1244 | * |
1248 | * The function returns 0 on success and -EINVAL on failure. | 1245 | * The function returns 0 on success and -EINVAL on failure. |
1249 | */ | 1246 | */ |
1250 | static int __init parse_crashkernel_mem(char *cmdline, | 1247 | static int __init parse_crashkernel_mem(char *cmdline, |
1251 | unsigned long long system_ram, | 1248 | unsigned long long system_ram, |
1252 | unsigned long long *crash_size, | 1249 | unsigned long long *crash_size, |
1253 | unsigned long long *crash_base) | 1250 | unsigned long long *crash_base) |
1254 | { | 1251 | { |
1255 | char *cur = cmdline, *tmp; | 1252 | char *cur = cmdline, *tmp; |
1256 | 1253 | ||
1257 | /* for each entry of the comma-separated list */ | 1254 | /* for each entry of the comma-separated list */ |
1258 | do { | 1255 | do { |
1259 | unsigned long long start, end = ULLONG_MAX, size; | 1256 | unsigned long long start, end = ULLONG_MAX, size; |
1260 | 1257 | ||
1261 | /* get the start of the range */ | 1258 | /* get the start of the range */ |
1262 | start = memparse(cur, &tmp); | 1259 | start = memparse(cur, &tmp); |
1263 | if (cur == tmp) { | 1260 | if (cur == tmp) { |
1264 | pr_warning("crashkernel: Memory value expected\n"); | 1261 | pr_warning("crashkernel: Memory value expected\n"); |
1265 | return -EINVAL; | 1262 | return -EINVAL; |
1266 | } | 1263 | } |
1267 | cur = tmp; | 1264 | cur = tmp; |
1268 | if (*cur != '-') { | 1265 | if (*cur != '-') { |
1269 | pr_warning("crashkernel: '-' expected\n"); | 1266 | pr_warning("crashkernel: '-' expected\n"); |
1270 | return -EINVAL; | 1267 | return -EINVAL; |
1271 | } | 1268 | } |
1272 | cur++; | 1269 | cur++; |
1273 | 1270 | ||
1274 | /* if no ':' is here, than we read the end */ | 1271 | /* if no ':' is here, than we read the end */ |
1275 | if (*cur != ':') { | 1272 | if (*cur != ':') { |
1276 | end = memparse(cur, &tmp); | 1273 | end = memparse(cur, &tmp); |
1277 | if (cur == tmp) { | 1274 | if (cur == tmp) { |
1278 | pr_warning("crashkernel: Memory " | 1275 | pr_warning("crashkernel: Memory " |
1279 | "value expected\n"); | 1276 | "value expected\n"); |
1280 | return -EINVAL; | 1277 | return -EINVAL; |
1281 | } | 1278 | } |
1282 | cur = tmp; | 1279 | cur = tmp; |
1283 | if (end <= start) { | 1280 | if (end <= start) { |
1284 | pr_warning("crashkernel: end <= start\n"); | 1281 | pr_warning("crashkernel: end <= start\n"); |
1285 | return -EINVAL; | 1282 | return -EINVAL; |
1286 | } | 1283 | } |
1287 | } | 1284 | } |
1288 | 1285 | ||
1289 | if (*cur != ':') { | 1286 | if (*cur != ':') { |
1290 | pr_warning("crashkernel: ':' expected\n"); | 1287 | pr_warning("crashkernel: ':' expected\n"); |
1291 | return -EINVAL; | 1288 | return -EINVAL; |
1292 | } | 1289 | } |
1293 | cur++; | 1290 | cur++; |
1294 | 1291 | ||
1295 | size = memparse(cur, &tmp); | 1292 | size = memparse(cur, &tmp); |
1296 | if (cur == tmp) { | 1293 | if (cur == tmp) { |
1297 | pr_warning("Memory value expected\n"); | 1294 | pr_warning("Memory value expected\n"); |
1298 | return -EINVAL; | 1295 | return -EINVAL; |
1299 | } | 1296 | } |
1300 | cur = tmp; | 1297 | cur = tmp; |
1301 | if (size >= system_ram) { | 1298 | if (size >= system_ram) { |
1302 | pr_warning("crashkernel: invalid size\n"); | 1299 | pr_warning("crashkernel: invalid size\n"); |
1303 | return -EINVAL; | 1300 | return -EINVAL; |
1304 | } | 1301 | } |
1305 | 1302 | ||
1306 | /* match ? */ | 1303 | /* match ? */ |
1307 | if (system_ram >= start && system_ram < end) { | 1304 | if (system_ram >= start && system_ram < end) { |
1308 | *crash_size = size; | 1305 | *crash_size = size; |
1309 | break; | 1306 | break; |
1310 | } | 1307 | } |
1311 | } while (*cur++ == ','); | 1308 | } while (*cur++ == ','); |
1312 | 1309 | ||
1313 | if (*crash_size > 0) { | 1310 | if (*crash_size > 0) { |
1314 | while (*cur && *cur != ' ' && *cur != '@') | 1311 | while (*cur && *cur != ' ' && *cur != '@') |
1315 | cur++; | 1312 | cur++; |
1316 | if (*cur == '@') { | 1313 | if (*cur == '@') { |
1317 | cur++; | 1314 | cur++; |
1318 | *crash_base = memparse(cur, &tmp); | 1315 | *crash_base = memparse(cur, &tmp); |
1319 | if (cur == tmp) { | 1316 | if (cur == tmp) { |
1320 | pr_warning("Memory value expected " | 1317 | pr_warning("Memory value expected " |
1321 | "after '@'\n"); | 1318 | "after '@'\n"); |
1322 | return -EINVAL; | 1319 | return -EINVAL; |
1323 | } | 1320 | } |
1324 | } | 1321 | } |
1325 | } | 1322 | } |
1326 | 1323 | ||
1327 | return 0; | 1324 | return 0; |
1328 | } | 1325 | } |
1329 | 1326 | ||
1330 | /* | 1327 | /* |
1331 | * That function parses "simple" (old) crashkernel command lines like | 1328 | * That function parses "simple" (old) crashkernel command lines like |
1332 | * | 1329 | * |
1333 | * crashkernel=size[@offset] | 1330 | * crashkernel=size[@offset] |
1334 | * | 1331 | * |
1335 | * It returns 0 on success and -EINVAL on failure. | 1332 | * It returns 0 on success and -EINVAL on failure. |
1336 | */ | 1333 | */ |
1337 | static int __init parse_crashkernel_simple(char *cmdline, | 1334 | static int __init parse_crashkernel_simple(char *cmdline, |
1338 | unsigned long long *crash_size, | 1335 | unsigned long long *crash_size, |
1339 | unsigned long long *crash_base) | 1336 | unsigned long long *crash_base) |
1340 | { | 1337 | { |
1341 | char *cur = cmdline; | 1338 | char *cur = cmdline; |
1342 | 1339 | ||
1343 | *crash_size = memparse(cmdline, &cur); | 1340 | *crash_size = memparse(cmdline, &cur); |
1344 | if (cmdline == cur) { | 1341 | if (cmdline == cur) { |
1345 | pr_warning("crashkernel: memory value expected\n"); | 1342 | pr_warning("crashkernel: memory value expected\n"); |
1346 | return -EINVAL; | 1343 | return -EINVAL; |
1347 | } | 1344 | } |
1348 | 1345 | ||
1349 | if (*cur == '@') | 1346 | if (*cur == '@') |
1350 | *crash_base = memparse(cur+1, &cur); | 1347 | *crash_base = memparse(cur+1, &cur); |
1351 | 1348 | ||
1352 | return 0; | 1349 | return 0; |
1353 | } | 1350 | } |
1354 | 1351 | ||
1355 | /* | 1352 | /* |
1356 | * That function is the entry point for command line parsing and should be | 1353 | * That function is the entry point for command line parsing and should be |
1357 | * called from the arch-specific code. | 1354 | * called from the arch-specific code. |
1358 | */ | 1355 | */ |
1359 | int __init parse_crashkernel(char *cmdline, | 1356 | int __init parse_crashkernel(char *cmdline, |
1360 | unsigned long long system_ram, | 1357 | unsigned long long system_ram, |
1361 | unsigned long long *crash_size, | 1358 | unsigned long long *crash_size, |
1362 | unsigned long long *crash_base) | 1359 | unsigned long long *crash_base) |
1363 | { | 1360 | { |
1364 | char *p = cmdline, *ck_cmdline = NULL; | 1361 | char *p = cmdline, *ck_cmdline = NULL; |
1365 | char *first_colon, *first_space; | 1362 | char *first_colon, *first_space; |
1366 | 1363 | ||
1367 | BUG_ON(!crash_size || !crash_base); | 1364 | BUG_ON(!crash_size || !crash_base); |
1368 | *crash_size = 0; | 1365 | *crash_size = 0; |
1369 | *crash_base = 0; | 1366 | *crash_base = 0; |
1370 | 1367 | ||
1371 | /* find crashkernel and use the last one if there are more */ | 1368 | /* find crashkernel and use the last one if there are more */ |
1372 | p = strstr(p, "crashkernel="); | 1369 | p = strstr(p, "crashkernel="); |
1373 | while (p) { | 1370 | while (p) { |
1374 | ck_cmdline = p; | 1371 | ck_cmdline = p; |
1375 | p = strstr(p+1, "crashkernel="); | 1372 | p = strstr(p+1, "crashkernel="); |
1376 | } | 1373 | } |
1377 | 1374 | ||
1378 | if (!ck_cmdline) | 1375 | if (!ck_cmdline) |
1379 | return -EINVAL; | 1376 | return -EINVAL; |
1380 | 1377 | ||
1381 | ck_cmdline += 12; /* strlen("crashkernel=") */ | 1378 | ck_cmdline += 12; /* strlen("crashkernel=") */ |
1382 | 1379 | ||
1383 | /* | 1380 | /* |
1384 | * if the commandline contains a ':', then that's the extended | 1381 | * if the commandline contains a ':', then that's the extended |
1385 | * syntax -- if not, it must be the classic syntax | 1382 | * syntax -- if not, it must be the classic syntax |
1386 | */ | 1383 | */ |
1387 | first_colon = strchr(ck_cmdline, ':'); | 1384 | first_colon = strchr(ck_cmdline, ':'); |
1388 | first_space = strchr(ck_cmdline, ' '); | 1385 | first_space = strchr(ck_cmdline, ' '); |
1389 | if (first_colon && (!first_space || first_colon < first_space)) | 1386 | if (first_colon && (!first_space || first_colon < first_space)) |
1390 | return parse_crashkernel_mem(ck_cmdline, system_ram, | 1387 | return parse_crashkernel_mem(ck_cmdline, system_ram, |
1391 | crash_size, crash_base); | 1388 | crash_size, crash_base); |
1392 | else | 1389 | else |
1393 | return parse_crashkernel_simple(ck_cmdline, crash_size, | 1390 | return parse_crashkernel_simple(ck_cmdline, crash_size, |
1394 | crash_base); | 1391 | crash_base); |
1395 | 1392 | ||
1396 | return 0; | 1393 | return 0; |
1397 | } | 1394 | } |
1398 | 1395 | ||
1399 | 1396 | ||
1400 | static void update_vmcoreinfo_note(void) | 1397 | static void update_vmcoreinfo_note(void) |
1401 | { | 1398 | { |
1402 | u32 *buf = vmcoreinfo_note; | 1399 | u32 *buf = vmcoreinfo_note; |
1403 | 1400 | ||
1404 | if (!vmcoreinfo_size) | 1401 | if (!vmcoreinfo_size) |
1405 | return; | 1402 | return; |
1406 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, | 1403 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, |
1407 | vmcoreinfo_size); | 1404 | vmcoreinfo_size); |
1408 | final_note(buf); | 1405 | final_note(buf); |
1409 | } | 1406 | } |
1410 | 1407 | ||
1411 | void crash_save_vmcoreinfo(void) | 1408 | void crash_save_vmcoreinfo(void) |
1412 | { | 1409 | { |
1413 | vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); | 1410 | vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); |
1414 | update_vmcoreinfo_note(); | 1411 | update_vmcoreinfo_note(); |
1415 | } | 1412 | } |
1416 | 1413 | ||
1417 | void vmcoreinfo_append_str(const char *fmt, ...) | 1414 | void vmcoreinfo_append_str(const char *fmt, ...) |
1418 | { | 1415 | { |
1419 | va_list args; | 1416 | va_list args; |
1420 | char buf[0x50]; | 1417 | char buf[0x50]; |
1421 | int r; | 1418 | int r; |
1422 | 1419 | ||
1423 | va_start(args, fmt); | 1420 | va_start(args, fmt); |
1424 | r = vsnprintf(buf, sizeof(buf), fmt, args); | 1421 | r = vsnprintf(buf, sizeof(buf), fmt, args); |
1425 | va_end(args); | 1422 | va_end(args); |
1426 | 1423 | ||
1427 | if (r + vmcoreinfo_size > vmcoreinfo_max_size) | 1424 | if (r + vmcoreinfo_size > vmcoreinfo_max_size) |
1428 | r = vmcoreinfo_max_size - vmcoreinfo_size; | 1425 | r = vmcoreinfo_max_size - vmcoreinfo_size; |
1429 | 1426 | ||
1430 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); | 1427 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); |
1431 | 1428 | ||
1432 | vmcoreinfo_size += r; | 1429 | vmcoreinfo_size += r; |
1433 | } | 1430 | } |
1434 | 1431 | ||
1435 | /* | 1432 | /* |
1436 | * provide an empty default implementation here -- architecture | 1433 | * provide an empty default implementation here -- architecture |
1437 | * code may override this | 1434 | * code may override this |
1438 | */ | 1435 | */ |
1439 | void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void) | 1436 | void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void) |
1440 | {} | 1437 | {} |
1441 | 1438 | ||
1442 | unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void) | 1439 | unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void) |
1443 | { | 1440 | { |
1444 | return __pa((unsigned long)(char *)&vmcoreinfo_note); | 1441 | return __pa((unsigned long)(char *)&vmcoreinfo_note); |
1445 | } | 1442 | } |
1446 | 1443 | ||
1447 | static int __init crash_save_vmcoreinfo_init(void) | 1444 | static int __init crash_save_vmcoreinfo_init(void) |
1448 | { | 1445 | { |
1449 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); | 1446 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); |
1450 | VMCOREINFO_PAGESIZE(PAGE_SIZE); | 1447 | VMCOREINFO_PAGESIZE(PAGE_SIZE); |
1451 | 1448 | ||
1452 | VMCOREINFO_SYMBOL(init_uts_ns); | 1449 | VMCOREINFO_SYMBOL(init_uts_ns); |
1453 | VMCOREINFO_SYMBOL(node_online_map); | 1450 | VMCOREINFO_SYMBOL(node_online_map); |
1454 | VMCOREINFO_SYMBOL(swapper_pg_dir); | 1451 | VMCOREINFO_SYMBOL(swapper_pg_dir); |
1455 | VMCOREINFO_SYMBOL(_stext); | 1452 | VMCOREINFO_SYMBOL(_stext); |
1456 | VMCOREINFO_SYMBOL(vmlist); | 1453 | VMCOREINFO_SYMBOL(vmlist); |
1457 | 1454 | ||
1458 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 1455 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
1459 | VMCOREINFO_SYMBOL(mem_map); | 1456 | VMCOREINFO_SYMBOL(mem_map); |
1460 | VMCOREINFO_SYMBOL(contig_page_data); | 1457 | VMCOREINFO_SYMBOL(contig_page_data); |
1461 | #endif | 1458 | #endif |
1462 | #ifdef CONFIG_SPARSEMEM | 1459 | #ifdef CONFIG_SPARSEMEM |
1463 | VMCOREINFO_SYMBOL(mem_section); | 1460 | VMCOREINFO_SYMBOL(mem_section); |
1464 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); | 1461 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); |
1465 | VMCOREINFO_STRUCT_SIZE(mem_section); | 1462 | VMCOREINFO_STRUCT_SIZE(mem_section); |
1466 | VMCOREINFO_OFFSET(mem_section, section_mem_map); | 1463 | VMCOREINFO_OFFSET(mem_section, section_mem_map); |
1467 | #endif | 1464 | #endif |
1468 | VMCOREINFO_STRUCT_SIZE(page); | 1465 | VMCOREINFO_STRUCT_SIZE(page); |
1469 | VMCOREINFO_STRUCT_SIZE(pglist_data); | 1466 | VMCOREINFO_STRUCT_SIZE(pglist_data); |
1470 | VMCOREINFO_STRUCT_SIZE(zone); | 1467 | VMCOREINFO_STRUCT_SIZE(zone); |
1471 | VMCOREINFO_STRUCT_SIZE(free_area); | 1468 | VMCOREINFO_STRUCT_SIZE(free_area); |
1472 | VMCOREINFO_STRUCT_SIZE(list_head); | 1469 | VMCOREINFO_STRUCT_SIZE(list_head); |
1473 | VMCOREINFO_SIZE(nodemask_t); | 1470 | VMCOREINFO_SIZE(nodemask_t); |
1474 | VMCOREINFO_OFFSET(page, flags); | 1471 | VMCOREINFO_OFFSET(page, flags); |
1475 | VMCOREINFO_OFFSET(page, _count); | 1472 | VMCOREINFO_OFFSET(page, _count); |
1476 | VMCOREINFO_OFFSET(page, mapping); | 1473 | VMCOREINFO_OFFSET(page, mapping); |
1477 | VMCOREINFO_OFFSET(page, lru); | 1474 | VMCOREINFO_OFFSET(page, lru); |
1478 | VMCOREINFO_OFFSET(pglist_data, node_zones); | 1475 | VMCOREINFO_OFFSET(pglist_data, node_zones); |
1479 | VMCOREINFO_OFFSET(pglist_data, nr_zones); | 1476 | VMCOREINFO_OFFSET(pglist_data, nr_zones); |
1480 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | 1477 | #ifdef CONFIG_FLAT_NODE_MEM_MAP |
1481 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); | 1478 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); |
1482 | #endif | 1479 | #endif |
1483 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); | 1480 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); |
1484 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); | 1481 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); |
1485 | VMCOREINFO_OFFSET(pglist_data, node_id); | 1482 | VMCOREINFO_OFFSET(pglist_data, node_id); |
1486 | VMCOREINFO_OFFSET(zone, free_area); | 1483 | VMCOREINFO_OFFSET(zone, free_area); |
1487 | VMCOREINFO_OFFSET(zone, vm_stat); | 1484 | VMCOREINFO_OFFSET(zone, vm_stat); |
1488 | VMCOREINFO_OFFSET(zone, spanned_pages); | 1485 | VMCOREINFO_OFFSET(zone, spanned_pages); |
1489 | VMCOREINFO_OFFSET(free_area, free_list); | 1486 | VMCOREINFO_OFFSET(free_area, free_list); |
1490 | VMCOREINFO_OFFSET(list_head, next); | 1487 | VMCOREINFO_OFFSET(list_head, next); |
1491 | VMCOREINFO_OFFSET(list_head, prev); | 1488 | VMCOREINFO_OFFSET(list_head, prev); |
1492 | VMCOREINFO_OFFSET(vm_struct, addr); | 1489 | VMCOREINFO_OFFSET(vm_struct, addr); |
1493 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); | 1490 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); |
1494 | log_buf_kexec_setup(); | 1491 | log_buf_kexec_setup(); |
1495 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | 1492 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); |
1496 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | 1493 | VMCOREINFO_NUMBER(NR_FREE_PAGES); |
1497 | VMCOREINFO_NUMBER(PG_lru); | 1494 | VMCOREINFO_NUMBER(PG_lru); |
1498 | VMCOREINFO_NUMBER(PG_private); | 1495 | VMCOREINFO_NUMBER(PG_private); |
1499 | VMCOREINFO_NUMBER(PG_swapcache); | 1496 | VMCOREINFO_NUMBER(PG_swapcache); |
1500 | 1497 | ||
1501 | arch_crash_save_vmcoreinfo(); | 1498 | arch_crash_save_vmcoreinfo(); |
1502 | update_vmcoreinfo_note(); | 1499 | update_vmcoreinfo_note(); |
1503 | 1500 | ||
1504 | return 0; | 1501 | return 0; |
1505 | } | 1502 | } |
1506 | 1503 | ||
1507 | module_init(crash_save_vmcoreinfo_init) | 1504 | module_init(crash_save_vmcoreinfo_init) |
1508 | 1505 | ||
1509 | /* | 1506 | /* |
1510 | * Move into place and start executing a preloaded standalone | 1507 | * Move into place and start executing a preloaded standalone |
1511 | * executable. If nothing was preloaded return an error. | 1508 | * executable. If nothing was preloaded return an error. |
1512 | */ | 1509 | */ |
1513 | int kernel_kexec(void) | 1510 | int kernel_kexec(void) |
1514 | { | 1511 | { |
1515 | int error = 0; | 1512 | int error = 0; |
1516 | 1513 | ||
1517 | if (!mutex_trylock(&kexec_mutex)) | 1514 | if (!mutex_trylock(&kexec_mutex)) |
1518 | return -EBUSY; | 1515 | return -EBUSY; |
1519 | if (!kexec_image) { | 1516 | if (!kexec_image) { |
1520 | error = -EINVAL; | 1517 | error = -EINVAL; |
1521 | goto Unlock; | 1518 | goto Unlock; |
1522 | } | 1519 | } |
1523 | 1520 | ||
1524 | #ifdef CONFIG_KEXEC_JUMP | 1521 | #ifdef CONFIG_KEXEC_JUMP |
1525 | if (kexec_image->preserve_context) { | 1522 | if (kexec_image->preserve_context) { |
1526 | lock_system_sleep(); | 1523 | lock_system_sleep(); |
1527 | pm_prepare_console(); | 1524 | pm_prepare_console(); |
1528 | error = freeze_processes(); | 1525 | error = freeze_processes(); |
1529 | if (error) { | 1526 | if (error) { |
1530 | error = -EBUSY; | 1527 | error = -EBUSY; |
1531 | goto Restore_console; | 1528 | goto Restore_console; |
1532 | } | 1529 | } |
1533 | suspend_console(); | 1530 | suspend_console(); |
1534 | error = dpm_suspend_start(PMSG_FREEZE); | 1531 | error = dpm_suspend_start(PMSG_FREEZE); |
1535 | if (error) | 1532 | if (error) |
1536 | goto Resume_console; | 1533 | goto Resume_console; |
1537 | /* At this point, dpm_suspend_start() has been called, | 1534 | /* At this point, dpm_suspend_start() has been called, |
1538 | * but *not* dpm_suspend_noirq(). We *must* call | 1535 | * but *not* dpm_suspend_noirq(). We *must* call |
1539 | * dpm_suspend_noirq() now. Otherwise, drivers for | 1536 | * dpm_suspend_noirq() now. Otherwise, drivers for |
1540 | * some devices (e.g. interrupt controllers) become | 1537 | * some devices (e.g. interrupt controllers) become |
1541 | * desynchronized with the actual state of the | 1538 | * desynchronized with the actual state of the |
1542 | * hardware at resume time, and evil weirdness ensues. | 1539 | * hardware at resume time, and evil weirdness ensues. |
1543 | */ | 1540 | */ |
1544 | error = dpm_suspend_noirq(PMSG_FREEZE); | 1541 | error = dpm_suspend_noirq(PMSG_FREEZE); |
1545 | if (error) | 1542 | if (error) |
1546 | goto Resume_devices; | 1543 | goto Resume_devices; |
1547 | error = disable_nonboot_cpus(); | 1544 | error = disable_nonboot_cpus(); |
1548 | if (error) | 1545 | if (error) |
1549 | goto Enable_cpus; | 1546 | goto Enable_cpus; |
1550 | local_irq_disable(); | 1547 | local_irq_disable(); |
1551 | error = syscore_suspend(); | 1548 | error = syscore_suspend(); |
1552 | if (error) | 1549 | if (error) |
1553 | goto Enable_irqs; | 1550 | goto Enable_irqs; |
1554 | } else | 1551 | } else |
1555 | #endif | 1552 | #endif |
1556 | { | 1553 | { |
1557 | kernel_restart_prepare(NULL); | 1554 | kernel_restart_prepare(NULL); |
1558 | printk(KERN_EMERG "Starting new kernel\n"); | 1555 | printk(KERN_EMERG "Starting new kernel\n"); |
1559 | machine_shutdown(); | 1556 | machine_shutdown(); |
1560 | } | 1557 | } |
1561 | 1558 | ||
1562 | machine_kexec(kexec_image); | 1559 | machine_kexec(kexec_image); |
1563 | 1560 | ||
1564 | #ifdef CONFIG_KEXEC_JUMP | 1561 | #ifdef CONFIG_KEXEC_JUMP |
1565 | if (kexec_image->preserve_context) { | 1562 | if (kexec_image->preserve_context) { |
1566 | syscore_resume(); | 1563 | syscore_resume(); |
1567 | Enable_irqs: | 1564 | Enable_irqs: |
1568 | local_irq_enable(); | 1565 | local_irq_enable(); |
1569 | Enable_cpus: | 1566 | Enable_cpus: |
1570 | enable_nonboot_cpus(); | 1567 | enable_nonboot_cpus(); |
1571 | dpm_resume_noirq(PMSG_RESTORE); | 1568 | dpm_resume_noirq(PMSG_RESTORE); |
1572 | Resume_devices: | 1569 | Resume_devices: |
1573 | dpm_resume_end(PMSG_RESTORE); | 1570 | dpm_resume_end(PMSG_RESTORE); |
1574 | Resume_console: | 1571 | Resume_console: |
1575 | resume_console(); | 1572 | resume_console(); |
1576 | thaw_processes(); | 1573 | thaw_processes(); |
1577 | Restore_console: | 1574 | Restore_console: |
1578 | pm_restore_console(); | 1575 | pm_restore_console(); |
1579 | unlock_system_sleep(); | 1576 | unlock_system_sleep(); |
1580 | } | 1577 | } |
1581 | #endif | 1578 | #endif |
1582 | 1579 | ||
1583 | Unlock: | 1580 | Unlock: |
1584 | mutex_unlock(&kexec_mutex); | 1581 | mutex_unlock(&kexec_mutex); |
1585 | return error; | 1582 | return error; |
1586 | } | 1583 | } |
1587 | 1584 |
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87
-
mentioned in commit 381b87