Commit a3dd3323058d281abd584b15ad4c5b65064d7a61

Authored by WANG Cong
Committed by Linus Torvalds
1 parent 9512938b88

kexec: remove KMSG_DUMP_KEXEC

KMSG_DUMP_KEXEC is useless because we already save kernel messages inside
/proc/vmcore, and it is unsafe to allow modules to do other stuffs in a
crash dump scenario.

[akpm@linux-foundation.org: fix powerpc build]
Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Reported-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 2 additions and 9 deletions Inline Diff

arch/powerpc/platforms/pseries/nvram.c
1 /* 1 /*
2 * c 2001 PPC 64 Team, IBM Corp 2 * c 2001 PPC 64 Team, IBM Corp
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 * 8 *
9 * /dev/nvram driver for PPC64 9 * /dev/nvram driver for PPC64
10 * 10 *
11 * This perhaps should live in drivers/char 11 * This perhaps should live in drivers/char
12 */ 12 */
13 13
14 14
15 #include <linux/types.h> 15 #include <linux/types.h>
16 #include <linux/errno.h> 16 #include <linux/errno.h>
17 #include <linux/init.h> 17 #include <linux/init.h>
18 #include <linux/spinlock.h> 18 #include <linux/spinlock.h>
19 #include <linux/slab.h> 19 #include <linux/slab.h>
20 #include <linux/kmsg_dump.h> 20 #include <linux/kmsg_dump.h>
21 #include <linux/ctype.h> 21 #include <linux/ctype.h>
22 #include <linux/zlib.h> 22 #include <linux/zlib.h>
23 #include <asm/uaccess.h> 23 #include <asm/uaccess.h>
24 #include <asm/nvram.h> 24 #include <asm/nvram.h>
25 #include <asm/rtas.h> 25 #include <asm/rtas.h>
26 #include <asm/prom.h> 26 #include <asm/prom.h>
27 #include <asm/machdep.h> 27 #include <asm/machdep.h>
28 28
29 /* Max bytes to read/write in one go */ 29 /* Max bytes to read/write in one go */
30 #define NVRW_CNT 0x20 30 #define NVRW_CNT 0x20
31 31
32 static unsigned int nvram_size; 32 static unsigned int nvram_size;
33 static int nvram_fetch, nvram_store; 33 static int nvram_fetch, nvram_store;
34 static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ 34 static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
35 static DEFINE_SPINLOCK(nvram_lock); 35 static DEFINE_SPINLOCK(nvram_lock);
36 36
37 struct err_log_info { 37 struct err_log_info {
38 int error_type; 38 int error_type;
39 unsigned int seq_num; 39 unsigned int seq_num;
40 }; 40 };
41 41
42 struct nvram_os_partition { 42 struct nvram_os_partition {
43 const char *name; 43 const char *name;
44 int req_size; /* desired size, in bytes */ 44 int req_size; /* desired size, in bytes */
45 int min_size; /* minimum acceptable size (0 means req_size) */ 45 int min_size; /* minimum acceptable size (0 means req_size) */
46 long size; /* size of data portion (excluding err_log_info) */ 46 long size; /* size of data portion (excluding err_log_info) */
47 long index; /* offset of data portion of partition */ 47 long index; /* offset of data portion of partition */
48 }; 48 };
49 49
50 static struct nvram_os_partition rtas_log_partition = { 50 static struct nvram_os_partition rtas_log_partition = {
51 .name = "ibm,rtas-log", 51 .name = "ibm,rtas-log",
52 .req_size = 2079, 52 .req_size = 2079,
53 .min_size = 1055, 53 .min_size = 1055,
54 .index = -1 54 .index = -1
55 }; 55 };
56 56
57 static struct nvram_os_partition oops_log_partition = { 57 static struct nvram_os_partition oops_log_partition = {
58 .name = "lnx,oops-log", 58 .name = "lnx,oops-log",
59 .req_size = 4000, 59 .req_size = 4000,
60 .min_size = 2000, 60 .min_size = 2000,
61 .index = -1 61 .index = -1
62 }; 62 };
63 63
64 static const char *pseries_nvram_os_partitions[] = { 64 static const char *pseries_nvram_os_partitions[] = {
65 "ibm,rtas-log", 65 "ibm,rtas-log",
66 "lnx,oops-log", 66 "lnx,oops-log",
67 NULL 67 NULL
68 }; 68 };
69 69
70 static void oops_to_nvram(struct kmsg_dumper *dumper, 70 static void oops_to_nvram(struct kmsg_dumper *dumper,
71 enum kmsg_dump_reason reason, 71 enum kmsg_dump_reason reason,
72 const char *old_msgs, unsigned long old_len, 72 const char *old_msgs, unsigned long old_len,
73 const char *new_msgs, unsigned long new_len); 73 const char *new_msgs, unsigned long new_len);
74 74
75 static struct kmsg_dumper nvram_kmsg_dumper = { 75 static struct kmsg_dumper nvram_kmsg_dumper = {
76 .dump = oops_to_nvram 76 .dump = oops_to_nvram
77 }; 77 };
78 78
79 /* See clobbering_unread_rtas_event() */ 79 /* See clobbering_unread_rtas_event() */
80 #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */ 80 #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
81 static unsigned long last_unread_rtas_event; /* timestamp */ 81 static unsigned long last_unread_rtas_event; /* timestamp */
82 82
83 /* 83 /*
84 * For capturing and compressing an oops or panic report... 84 * For capturing and compressing an oops or panic report...
85 85
86 * big_oops_buf[] holds the uncompressed text we're capturing. 86 * big_oops_buf[] holds the uncompressed text we're capturing.
87 * 87 *
88 * oops_buf[] holds the compressed text, preceded by a prefix. 88 * oops_buf[] holds the compressed text, preceded by a prefix.
89 * The prefix is just a u16 holding the length of the compressed* text. 89 * The prefix is just a u16 holding the length of the compressed* text.
90 * (*Or uncompressed, if compression fails.) oops_buf[] gets written 90 * (*Or uncompressed, if compression fails.) oops_buf[] gets written
91 * to NVRAM. 91 * to NVRAM.
92 * 92 *
93 * oops_len points to the prefix. oops_data points to the compressed text. 93 * oops_len points to the prefix. oops_data points to the compressed text.
94 * 94 *
95 * +- oops_buf 95 * +- oops_buf
96 * | +- oops_data 96 * | +- oops_data
97 * v v 97 * v v
98 * +------------+-----------------------------------------------+ 98 * +------------+-----------------------------------------------+
99 * | length | text | 99 * | length | text |
100 * | (2 bytes) | (oops_data_sz bytes) | 100 * | (2 bytes) | (oops_data_sz bytes) |
101 * +------------+-----------------------------------------------+ 101 * +------------+-----------------------------------------------+
102 * ^ 102 * ^
103 * +- oops_len 103 * +- oops_len
104 * 104 *
105 * We preallocate these buffers during init to avoid kmalloc during oops/panic. 105 * We preallocate these buffers during init to avoid kmalloc during oops/panic.
106 */ 106 */
107 static size_t big_oops_buf_sz; 107 static size_t big_oops_buf_sz;
108 static char *big_oops_buf, *oops_buf; 108 static char *big_oops_buf, *oops_buf;
109 static u16 *oops_len; 109 static u16 *oops_len;
110 static char *oops_data; 110 static char *oops_data;
111 static size_t oops_data_sz; 111 static size_t oops_data_sz;
112 112
113 /* Compression parameters */ 113 /* Compression parameters */
114 #define COMPR_LEVEL 6 114 #define COMPR_LEVEL 6
115 #define WINDOW_BITS 12 115 #define WINDOW_BITS 12
116 #define MEM_LEVEL 4 116 #define MEM_LEVEL 4
117 static struct z_stream_s stream; 117 static struct z_stream_s stream;
118 118
119 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) 119 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
120 { 120 {
121 unsigned int i; 121 unsigned int i;
122 unsigned long len; 122 unsigned long len;
123 int done; 123 int done;
124 unsigned long flags; 124 unsigned long flags;
125 char *p = buf; 125 char *p = buf;
126 126
127 127
128 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) 128 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
129 return -ENODEV; 129 return -ENODEV;
130 130
131 if (*index >= nvram_size) 131 if (*index >= nvram_size)
132 return 0; 132 return 0;
133 133
134 i = *index; 134 i = *index;
135 if (i + count > nvram_size) 135 if (i + count > nvram_size)
136 count = nvram_size - i; 136 count = nvram_size - i;
137 137
138 spin_lock_irqsave(&nvram_lock, flags); 138 spin_lock_irqsave(&nvram_lock, flags);
139 139
140 for (; count != 0; count -= len) { 140 for (; count != 0; count -= len) {
141 len = count; 141 len = count;
142 if (len > NVRW_CNT) 142 if (len > NVRW_CNT)
143 len = NVRW_CNT; 143 len = NVRW_CNT;
144 144
145 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), 145 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
146 len) != 0) || len != done) { 146 len) != 0) || len != done) {
147 spin_unlock_irqrestore(&nvram_lock, flags); 147 spin_unlock_irqrestore(&nvram_lock, flags);
148 return -EIO; 148 return -EIO;
149 } 149 }
150 150
151 memcpy(p, nvram_buf, len); 151 memcpy(p, nvram_buf, len);
152 152
153 p += len; 153 p += len;
154 i += len; 154 i += len;
155 } 155 }
156 156
157 spin_unlock_irqrestore(&nvram_lock, flags); 157 spin_unlock_irqrestore(&nvram_lock, flags);
158 158
159 *index = i; 159 *index = i;
160 return p - buf; 160 return p - buf;
161 } 161 }
162 162
163 static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) 163 static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
164 { 164 {
165 unsigned int i; 165 unsigned int i;
166 unsigned long len; 166 unsigned long len;
167 int done; 167 int done;
168 unsigned long flags; 168 unsigned long flags;
169 const char *p = buf; 169 const char *p = buf;
170 170
171 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) 171 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
172 return -ENODEV; 172 return -ENODEV;
173 173
174 if (*index >= nvram_size) 174 if (*index >= nvram_size)
175 return 0; 175 return 0;
176 176
177 i = *index; 177 i = *index;
178 if (i + count > nvram_size) 178 if (i + count > nvram_size)
179 count = nvram_size - i; 179 count = nvram_size - i;
180 180
181 spin_lock_irqsave(&nvram_lock, flags); 181 spin_lock_irqsave(&nvram_lock, flags);
182 182
183 for (; count != 0; count -= len) { 183 for (; count != 0; count -= len) {
184 len = count; 184 len = count;
185 if (len > NVRW_CNT) 185 if (len > NVRW_CNT)
186 len = NVRW_CNT; 186 len = NVRW_CNT;
187 187
188 memcpy(nvram_buf, p, len); 188 memcpy(nvram_buf, p, len);
189 189
190 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), 190 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
191 len) != 0) || len != done) { 191 len) != 0) || len != done) {
192 spin_unlock_irqrestore(&nvram_lock, flags); 192 spin_unlock_irqrestore(&nvram_lock, flags);
193 return -EIO; 193 return -EIO;
194 } 194 }
195 195
196 p += len; 196 p += len;
197 i += len; 197 i += len;
198 } 198 }
199 spin_unlock_irqrestore(&nvram_lock, flags); 199 spin_unlock_irqrestore(&nvram_lock, flags);
200 200
201 *index = i; 201 *index = i;
202 return p - buf; 202 return p - buf;
203 } 203 }
204 204
205 static ssize_t pSeries_nvram_get_size(void) 205 static ssize_t pSeries_nvram_get_size(void)
206 { 206 {
207 return nvram_size ? nvram_size : -ENODEV; 207 return nvram_size ? nvram_size : -ENODEV;
208 } 208 }
209 209
210 210
211 /* nvram_write_os_partition, nvram_write_error_log 211 /* nvram_write_os_partition, nvram_write_error_log
212 * 212 *
213 * We need to buffer the error logs into nvram to ensure that we have 213 * We need to buffer the error logs into nvram to ensure that we have
214 * the failure information to decode. If we have a severe error there 214 * the failure information to decode. If we have a severe error there
215 * is no way to guarantee that the OS or the machine is in a state to 215 * is no way to guarantee that the OS or the machine is in a state to
216 * get back to user land and write the error to disk. For example if 216 * get back to user land and write the error to disk. For example if
217 * the SCSI device driver causes a Machine Check by writing to a bad 217 * the SCSI device driver causes a Machine Check by writing to a bad
218 * IO address, there is no way of guaranteeing that the device driver 218 * IO address, there is no way of guaranteeing that the device driver
219 * is in any state that is would also be able to write the error data 219 * is in any state that is would also be able to write the error data
220 * captured to disk, thus we buffer it in NVRAM for analysis on the 220 * captured to disk, thus we buffer it in NVRAM for analysis on the
221 * next boot. 221 * next boot.
222 * 222 *
223 * In NVRAM the partition containing the error log buffer will looks like: 223 * In NVRAM the partition containing the error log buffer will looks like:
224 * Header (in bytes): 224 * Header (in bytes):
225 * +-----------+----------+--------+------------+------------------+ 225 * +-----------+----------+--------+------------+------------------+
226 * | signature | checksum | length | name | data | 226 * | signature | checksum | length | name | data |
227 * |0 |1 |2 3|4 15|16 length-1| 227 * |0 |1 |2 3|4 15|16 length-1|
228 * +-----------+----------+--------+------------+------------------+ 228 * +-----------+----------+--------+------------+------------------+
229 * 229 *
230 * The 'data' section would look like (in bytes): 230 * The 'data' section would look like (in bytes):
231 * +--------------+------------+-----------------------------------+ 231 * +--------------+------------+-----------------------------------+
232 * | event_logged | sequence # | error log | 232 * | event_logged | sequence # | error log |
233 * |0 3|4 7|8 error_log_size-1| 233 * |0 3|4 7|8 error_log_size-1|
234 * +--------------+------------+-----------------------------------+ 234 * +--------------+------------+-----------------------------------+
235 * 235 *
236 * event_logged: 0 if event has not been logged to syslog, 1 if it has 236 * event_logged: 0 if event has not been logged to syslog, 1 if it has
237 * sequence #: The unique sequence # for each event. (until it wraps) 237 * sequence #: The unique sequence # for each event. (until it wraps)
238 * error log: The error log from event_scan 238 * error log: The error log from event_scan
239 */ 239 */
240 int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, 240 int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
241 int length, unsigned int err_type, unsigned int error_log_cnt) 241 int length, unsigned int err_type, unsigned int error_log_cnt)
242 { 242 {
243 int rc; 243 int rc;
244 loff_t tmp_index; 244 loff_t tmp_index;
245 struct err_log_info info; 245 struct err_log_info info;
246 246
247 if (part->index == -1) { 247 if (part->index == -1) {
248 return -ESPIPE; 248 return -ESPIPE;
249 } 249 }
250 250
251 if (length > part->size) { 251 if (length > part->size) {
252 length = part->size; 252 length = part->size;
253 } 253 }
254 254
255 info.error_type = err_type; 255 info.error_type = err_type;
256 info.seq_num = error_log_cnt; 256 info.seq_num = error_log_cnt;
257 257
258 tmp_index = part->index; 258 tmp_index = part->index;
259 259
260 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); 260 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
261 if (rc <= 0) { 261 if (rc <= 0) {
262 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); 262 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
263 return rc; 263 return rc;
264 } 264 }
265 265
266 rc = ppc_md.nvram_write(buff, length, &tmp_index); 266 rc = ppc_md.nvram_write(buff, length, &tmp_index);
267 if (rc <= 0) { 267 if (rc <= 0) {
268 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); 268 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
269 return rc; 269 return rc;
270 } 270 }
271 271
272 return 0; 272 return 0;
273 } 273 }
274 274
275 int nvram_write_error_log(char * buff, int length, 275 int nvram_write_error_log(char * buff, int length,
276 unsigned int err_type, unsigned int error_log_cnt) 276 unsigned int err_type, unsigned int error_log_cnt)
277 { 277 {
278 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, 278 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
279 err_type, error_log_cnt); 279 err_type, error_log_cnt);
280 if (!rc) 280 if (!rc)
281 last_unread_rtas_event = get_seconds(); 281 last_unread_rtas_event = get_seconds();
282 return rc; 282 return rc;
283 } 283 }
284 284
285 /* nvram_read_error_log 285 /* nvram_read_error_log
286 * 286 *
287 * Reads nvram for error log for at most 'length' 287 * Reads nvram for error log for at most 'length'
288 */ 288 */
289 int nvram_read_error_log(char * buff, int length, 289 int nvram_read_error_log(char * buff, int length,
290 unsigned int * err_type, unsigned int * error_log_cnt) 290 unsigned int * err_type, unsigned int * error_log_cnt)
291 { 291 {
292 int rc; 292 int rc;
293 loff_t tmp_index; 293 loff_t tmp_index;
294 struct err_log_info info; 294 struct err_log_info info;
295 295
296 if (rtas_log_partition.index == -1) 296 if (rtas_log_partition.index == -1)
297 return -1; 297 return -1;
298 298
299 if (length > rtas_log_partition.size) 299 if (length > rtas_log_partition.size)
300 length = rtas_log_partition.size; 300 length = rtas_log_partition.size;
301 301
302 tmp_index = rtas_log_partition.index; 302 tmp_index = rtas_log_partition.index;
303 303
304 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); 304 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
305 if (rc <= 0) { 305 if (rc <= 0) {
306 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 306 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
307 return rc; 307 return rc;
308 } 308 }
309 309
310 rc = ppc_md.nvram_read(buff, length, &tmp_index); 310 rc = ppc_md.nvram_read(buff, length, &tmp_index);
311 if (rc <= 0) { 311 if (rc <= 0) {
312 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 312 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
313 return rc; 313 return rc;
314 } 314 }
315 315
316 *error_log_cnt = info.seq_num; 316 *error_log_cnt = info.seq_num;
317 *err_type = info.error_type; 317 *err_type = info.error_type;
318 318
319 return 0; 319 return 0;
320 } 320 }
321 321
322 /* This doesn't actually zero anything, but it sets the event_logged 322 /* This doesn't actually zero anything, but it sets the event_logged
323 * word to tell that this event is safely in syslog. 323 * word to tell that this event is safely in syslog.
324 */ 324 */
325 int nvram_clear_error_log(void) 325 int nvram_clear_error_log(void)
326 { 326 {
327 loff_t tmp_index; 327 loff_t tmp_index;
328 int clear_word = ERR_FLAG_ALREADY_LOGGED; 328 int clear_word = ERR_FLAG_ALREADY_LOGGED;
329 int rc; 329 int rc;
330 330
331 if (rtas_log_partition.index == -1) 331 if (rtas_log_partition.index == -1)
332 return -1; 332 return -1;
333 333
334 tmp_index = rtas_log_partition.index; 334 tmp_index = rtas_log_partition.index;
335 335
336 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); 336 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
337 if (rc <= 0) { 337 if (rc <= 0) {
338 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); 338 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
339 return rc; 339 return rc;
340 } 340 }
341 last_unread_rtas_event = 0; 341 last_unread_rtas_event = 0;
342 342
343 return 0; 343 return 0;
344 } 344 }
345 345
346 /* pseries_nvram_init_os_partition 346 /* pseries_nvram_init_os_partition
347 * 347 *
348 * This sets up a partition with an "OS" signature. 348 * This sets up a partition with an "OS" signature.
349 * 349 *
350 * The general strategy is the following: 350 * The general strategy is the following:
351 * 1.) If a partition with the indicated name already exists... 351 * 1.) If a partition with the indicated name already exists...
352 * - If it's large enough, use it. 352 * - If it's large enough, use it.
353 * - Otherwise, recycle it and keep going. 353 * - Otherwise, recycle it and keep going.
354 * 2.) Search for a free partition that is large enough. 354 * 2.) Search for a free partition that is large enough.
355 * 3.) If there's not a free partition large enough, recycle any obsolete 355 * 3.) If there's not a free partition large enough, recycle any obsolete
356 * OS partitions and try again. 356 * OS partitions and try again.
357 * 4.) Will first try getting a chunk that will satisfy the requested size. 357 * 4.) Will first try getting a chunk that will satisfy the requested size.
358 * 5.) If a chunk of the requested size cannot be allocated, then try finding 358 * 5.) If a chunk of the requested size cannot be allocated, then try finding
359 * a chunk that will satisfy the minum needed. 359 * a chunk that will satisfy the minum needed.
360 * 360 *
361 * Returns 0 on success, else -1. 361 * Returns 0 on success, else -1.
362 */ 362 */
363 static int __init pseries_nvram_init_os_partition(struct nvram_os_partition 363 static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
364 *part) 364 *part)
365 { 365 {
366 loff_t p; 366 loff_t p;
367 int size; 367 int size;
368 368
369 /* Scan nvram for partitions */ 369 /* Scan nvram for partitions */
370 nvram_scan_partitions(); 370 nvram_scan_partitions();
371 371
372 /* Look for ours */ 372 /* Look for ours */
373 p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size); 373 p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
374 374
375 /* Found one but too small, remove it */ 375 /* Found one but too small, remove it */
376 if (p && size < part->min_size) { 376 if (p && size < part->min_size) {
377 pr_info("nvram: Found too small %s partition," 377 pr_info("nvram: Found too small %s partition,"
378 " removing it...\n", part->name); 378 " removing it...\n", part->name);
379 nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL); 379 nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
380 p = 0; 380 p = 0;
381 } 381 }
382 382
383 /* Create one if we didn't find */ 383 /* Create one if we didn't find */
384 if (!p) { 384 if (!p) {
385 p = nvram_create_partition(part->name, NVRAM_SIG_OS, 385 p = nvram_create_partition(part->name, NVRAM_SIG_OS,
386 part->req_size, part->min_size); 386 part->req_size, part->min_size);
387 if (p == -ENOSPC) { 387 if (p == -ENOSPC) {
388 pr_info("nvram: No room to create %s partition, " 388 pr_info("nvram: No room to create %s partition, "
389 "deleting any obsolete OS partitions...\n", 389 "deleting any obsolete OS partitions...\n",
390 part->name); 390 part->name);
391 nvram_remove_partition(NULL, NVRAM_SIG_OS, 391 nvram_remove_partition(NULL, NVRAM_SIG_OS,
392 pseries_nvram_os_partitions); 392 pseries_nvram_os_partitions);
393 p = nvram_create_partition(part->name, NVRAM_SIG_OS, 393 p = nvram_create_partition(part->name, NVRAM_SIG_OS,
394 part->req_size, part->min_size); 394 part->req_size, part->min_size);
395 } 395 }
396 } 396 }
397 397
398 if (p <= 0) { 398 if (p <= 0) {
399 pr_err("nvram: Failed to find or create %s" 399 pr_err("nvram: Failed to find or create %s"
400 " partition, err %d\n", part->name, (int)p); 400 " partition, err %d\n", part->name, (int)p);
401 return -1; 401 return -1;
402 } 402 }
403 403
404 part->index = p; 404 part->index = p;
405 part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info); 405 part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
406 406
407 return 0; 407 return 0;
408 } 408 }
409 409
410 static void __init nvram_init_oops_partition(int rtas_partition_exists) 410 static void __init nvram_init_oops_partition(int rtas_partition_exists)
411 { 411 {
412 int rc; 412 int rc;
413 413
414 rc = pseries_nvram_init_os_partition(&oops_log_partition); 414 rc = pseries_nvram_init_os_partition(&oops_log_partition);
415 if (rc != 0) { 415 if (rc != 0) {
416 if (!rtas_partition_exists) 416 if (!rtas_partition_exists)
417 return; 417 return;
418 pr_notice("nvram: Using %s partition to log both" 418 pr_notice("nvram: Using %s partition to log both"
419 " RTAS errors and oops/panic reports\n", 419 " RTAS errors and oops/panic reports\n",
420 rtas_log_partition.name); 420 rtas_log_partition.name);
421 memcpy(&oops_log_partition, &rtas_log_partition, 421 memcpy(&oops_log_partition, &rtas_log_partition,
422 sizeof(rtas_log_partition)); 422 sizeof(rtas_log_partition));
423 } 423 }
424 oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL); 424 oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
425 if (!oops_buf) { 425 if (!oops_buf) {
426 pr_err("nvram: No memory for %s partition\n", 426 pr_err("nvram: No memory for %s partition\n",
427 oops_log_partition.name); 427 oops_log_partition.name);
428 return; 428 return;
429 } 429 }
430 oops_len = (u16*) oops_buf; 430 oops_len = (u16*) oops_buf;
431 oops_data = oops_buf + sizeof(u16); 431 oops_data = oops_buf + sizeof(u16);
432 oops_data_sz = oops_log_partition.size - sizeof(u16); 432 oops_data_sz = oops_log_partition.size - sizeof(u16);
433 433
434 /* 434 /*
435 * Figure compression (preceded by elimination of each line's <n> 435 * Figure compression (preceded by elimination of each line's <n>
436 * severity prefix) will reduce the oops/panic report to at most 436 * severity prefix) will reduce the oops/panic report to at most
437 * 45% of its original size. 437 * 45% of its original size.
438 */ 438 */
439 big_oops_buf_sz = (oops_data_sz * 100) / 45; 439 big_oops_buf_sz = (oops_data_sz * 100) / 45;
440 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); 440 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
441 if (big_oops_buf) { 441 if (big_oops_buf) {
442 stream.workspace = kmalloc(zlib_deflate_workspacesize( 442 stream.workspace = kmalloc(zlib_deflate_workspacesize(
443 WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); 443 WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
444 if (!stream.workspace) { 444 if (!stream.workspace) {
445 pr_err("nvram: No memory for compression workspace; " 445 pr_err("nvram: No memory for compression workspace; "
446 "skipping compression of %s partition data\n", 446 "skipping compression of %s partition data\n",
447 oops_log_partition.name); 447 oops_log_partition.name);
448 kfree(big_oops_buf); 448 kfree(big_oops_buf);
449 big_oops_buf = NULL; 449 big_oops_buf = NULL;
450 } 450 }
451 } else { 451 } else {
452 pr_err("No memory for uncompressed %s data; " 452 pr_err("No memory for uncompressed %s data; "
453 "skipping compression\n", oops_log_partition.name); 453 "skipping compression\n", oops_log_partition.name);
454 stream.workspace = NULL; 454 stream.workspace = NULL;
455 } 455 }
456 456
457 rc = kmsg_dump_register(&nvram_kmsg_dumper); 457 rc = kmsg_dump_register(&nvram_kmsg_dumper);
458 if (rc != 0) { 458 if (rc != 0) {
459 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); 459 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
460 kfree(oops_buf); 460 kfree(oops_buf);
461 kfree(big_oops_buf); 461 kfree(big_oops_buf);
462 kfree(stream.workspace); 462 kfree(stream.workspace);
463 } 463 }
464 } 464 }
465 465
466 static int __init pseries_nvram_init_log_partitions(void) 466 static int __init pseries_nvram_init_log_partitions(void)
467 { 467 {
468 int rc; 468 int rc;
469 469
470 rc = pseries_nvram_init_os_partition(&rtas_log_partition); 470 rc = pseries_nvram_init_os_partition(&rtas_log_partition);
471 nvram_init_oops_partition(rc == 0); 471 nvram_init_oops_partition(rc == 0);
472 return 0; 472 return 0;
473 } 473 }
474 machine_arch_initcall(pseries, pseries_nvram_init_log_partitions); 474 machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
475 475
476 int __init pSeries_nvram_init(void) 476 int __init pSeries_nvram_init(void)
477 { 477 {
478 struct device_node *nvram; 478 struct device_node *nvram;
479 const unsigned int *nbytes_p; 479 const unsigned int *nbytes_p;
480 unsigned int proplen; 480 unsigned int proplen;
481 481
482 nvram = of_find_node_by_type(NULL, "nvram"); 482 nvram = of_find_node_by_type(NULL, "nvram");
483 if (nvram == NULL) 483 if (nvram == NULL)
484 return -ENODEV; 484 return -ENODEV;
485 485
486 nbytes_p = of_get_property(nvram, "#bytes", &proplen); 486 nbytes_p = of_get_property(nvram, "#bytes", &proplen);
487 if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { 487 if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
488 of_node_put(nvram); 488 of_node_put(nvram);
489 return -EIO; 489 return -EIO;
490 } 490 }
491 491
492 nvram_size = *nbytes_p; 492 nvram_size = *nbytes_p;
493 493
494 nvram_fetch = rtas_token("nvram-fetch"); 494 nvram_fetch = rtas_token("nvram-fetch");
495 nvram_store = rtas_token("nvram-store"); 495 nvram_store = rtas_token("nvram-store");
496 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); 496 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
497 of_node_put(nvram); 497 of_node_put(nvram);
498 498
499 ppc_md.nvram_read = pSeries_nvram_read; 499 ppc_md.nvram_read = pSeries_nvram_read;
500 ppc_md.nvram_write = pSeries_nvram_write; 500 ppc_md.nvram_write = pSeries_nvram_write;
501 ppc_md.nvram_size = pSeries_nvram_get_size; 501 ppc_md.nvram_size = pSeries_nvram_get_size;
502 502
503 return 0; 503 return 0;
504 } 504 }
505 505
506 /* 506 /*
507 * Try to capture the last capture_len bytes of the printk buffer. Return 507 * Try to capture the last capture_len bytes of the printk buffer. Return
508 * the amount actually captured. 508 * the amount actually captured.
509 */ 509 */
510 static size_t capture_last_msgs(const char *old_msgs, size_t old_len, 510 static size_t capture_last_msgs(const char *old_msgs, size_t old_len,
511 const char *new_msgs, size_t new_len, 511 const char *new_msgs, size_t new_len,
512 char *captured, size_t capture_len) 512 char *captured, size_t capture_len)
513 { 513 {
514 if (new_len >= capture_len) { 514 if (new_len >= capture_len) {
515 memcpy(captured, new_msgs + (new_len - capture_len), 515 memcpy(captured, new_msgs + (new_len - capture_len),
516 capture_len); 516 capture_len);
517 return capture_len; 517 return capture_len;
518 } else { 518 } else {
519 /* Grab the end of old_msgs. */ 519 /* Grab the end of old_msgs. */
520 size_t old_tail_len = min(old_len, capture_len - new_len); 520 size_t old_tail_len = min(old_len, capture_len - new_len);
521 memcpy(captured, old_msgs + (old_len - old_tail_len), 521 memcpy(captured, old_msgs + (old_len - old_tail_len),
522 old_tail_len); 522 old_tail_len);
523 memcpy(captured + old_tail_len, new_msgs, new_len); 523 memcpy(captured + old_tail_len, new_msgs, new_len);
524 return old_tail_len + new_len; 524 return old_tail_len + new_len;
525 } 525 }
526 } 526 }
527 527
528 /* 528 /*
529 * Are we using the ibm,rtas-log for oops/panic reports? And if so, 529 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
530 * would logging this oops/panic overwrite an RTAS event that rtas_errd 530 * would logging this oops/panic overwrite an RTAS event that rtas_errd
531 * hasn't had a chance to read and process? Return 1 if so, else 0. 531 * hasn't had a chance to read and process? Return 1 if so, else 0.
532 * 532 *
533 * We assume that if rtas_errd hasn't read the RTAS event in 533 * We assume that if rtas_errd hasn't read the RTAS event in
534 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. 534 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
535 */ 535 */
536 static int clobbering_unread_rtas_event(void) 536 static int clobbering_unread_rtas_event(void)
537 { 537 {
538 return (oops_log_partition.index == rtas_log_partition.index 538 return (oops_log_partition.index == rtas_log_partition.index
539 && last_unread_rtas_event 539 && last_unread_rtas_event
540 && get_seconds() - last_unread_rtas_event <= 540 && get_seconds() - last_unread_rtas_event <=
541 NVRAM_RTAS_READ_TIMEOUT); 541 NVRAM_RTAS_READ_TIMEOUT);
542 } 542 }
543 543
544 /* Squeeze out each line's <n> severity prefix. */ 544 /* Squeeze out each line's <n> severity prefix. */
545 static size_t elide_severities(char *buf, size_t len) 545 static size_t elide_severities(char *buf, size_t len)
546 { 546 {
547 char *in, *out, *buf_end = buf + len; 547 char *in, *out, *buf_end = buf + len;
548 /* Assume a <n> at the very beginning marks the start of a line. */ 548 /* Assume a <n> at the very beginning marks the start of a line. */
549 int newline = 1; 549 int newline = 1;
550 550
551 in = out = buf; 551 in = out = buf;
552 while (in < buf_end) { 552 while (in < buf_end) {
553 if (newline && in+3 <= buf_end && 553 if (newline && in+3 <= buf_end &&
554 *in == '<' && isdigit(in[1]) && in[2] == '>') { 554 *in == '<' && isdigit(in[1]) && in[2] == '>') {
555 in += 3; 555 in += 3;
556 newline = 0; 556 newline = 0;
557 } else { 557 } else {
558 newline = (*in == '\n'); 558 newline = (*in == '\n');
559 *out++ = *in++; 559 *out++ = *in++;
560 } 560 }
561 } 561 }
562 return out - buf; 562 return out - buf;
563 } 563 }
564 564
565 /* Derived from logfs_compress() */ 565 /* Derived from logfs_compress() */
566 static int nvram_compress(const void *in, void *out, size_t inlen, 566 static int nvram_compress(const void *in, void *out, size_t inlen,
567 size_t outlen) 567 size_t outlen)
568 { 568 {
569 int err, ret; 569 int err, ret;
570 570
571 ret = -EIO; 571 ret = -EIO;
572 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, 572 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
573 MEM_LEVEL, Z_DEFAULT_STRATEGY); 573 MEM_LEVEL, Z_DEFAULT_STRATEGY);
574 if (err != Z_OK) 574 if (err != Z_OK)
575 goto error; 575 goto error;
576 576
577 stream.next_in = in; 577 stream.next_in = in;
578 stream.avail_in = inlen; 578 stream.avail_in = inlen;
579 stream.total_in = 0; 579 stream.total_in = 0;
580 stream.next_out = out; 580 stream.next_out = out;
581 stream.avail_out = outlen; 581 stream.avail_out = outlen;
582 stream.total_out = 0; 582 stream.total_out = 0;
583 583
584 err = zlib_deflate(&stream, Z_FINISH); 584 err = zlib_deflate(&stream, Z_FINISH);
585 if (err != Z_STREAM_END) 585 if (err != Z_STREAM_END)
586 goto error; 586 goto error;
587 587
588 err = zlib_deflateEnd(&stream); 588 err = zlib_deflateEnd(&stream);
589 if (err != Z_OK) 589 if (err != Z_OK)
590 goto error; 590 goto error;
591 591
592 if (stream.total_out >= stream.total_in) 592 if (stream.total_out >= stream.total_in)
593 goto error; 593 goto error;
594 594
595 ret = stream.total_out; 595 ret = stream.total_out;
596 error: 596 error:
597 return ret; 597 return ret;
598 } 598 }
599 599
600 /* Compress the text from big_oops_buf into oops_buf. */ 600 /* Compress the text from big_oops_buf into oops_buf. */
601 static int zip_oops(size_t text_len) 601 static int zip_oops(size_t text_len)
602 { 602 {
603 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, 603 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
604 oops_data_sz); 604 oops_data_sz);
605 if (zipped_len < 0) { 605 if (zipped_len < 0) {
606 pr_err("nvram: compression failed; returned %d\n", zipped_len); 606 pr_err("nvram: compression failed; returned %d\n", zipped_len);
607 pr_err("nvram: logging uncompressed oops/panic report\n"); 607 pr_err("nvram: logging uncompressed oops/panic report\n");
608 return -1; 608 return -1;
609 } 609 }
610 *oops_len = (u16) zipped_len; 610 *oops_len = (u16) zipped_len;
611 return 0; 611 return 0;
612 } 612 }
613 613
614 /* 614 /*
615 * This is our kmsg_dump callback, called after an oops or panic report 615 * This is our kmsg_dump callback, called after an oops or panic report
616 * has been written to the printk buffer. We want to capture as much 616 * has been written to the printk buffer. We want to capture as much
617 * of the printk buffer as possible. First, capture as much as we can 617 * of the printk buffer as possible. First, capture as much as we can
618 * that we think will compress sufficiently to fit in the lnx,oops-log 618 * that we think will compress sufficiently to fit in the lnx,oops-log
619 * partition. If that's too much, go back and capture uncompressed text. 619 * partition. If that's too much, go back and capture uncompressed text.
620 */ 620 */
621 static void oops_to_nvram(struct kmsg_dumper *dumper, 621 static void oops_to_nvram(struct kmsg_dumper *dumper,
622 enum kmsg_dump_reason reason, 622 enum kmsg_dump_reason reason,
623 const char *old_msgs, unsigned long old_len, 623 const char *old_msgs, unsigned long old_len,
624 const char *new_msgs, unsigned long new_len) 624 const char *new_msgs, unsigned long new_len)
625 { 625 {
626 static unsigned int oops_count = 0; 626 static unsigned int oops_count = 0;
627 static bool panicking = false; 627 static bool panicking = false;
628 static DEFINE_SPINLOCK(lock); 628 static DEFINE_SPINLOCK(lock);
629 unsigned long flags; 629 unsigned long flags;
630 size_t text_len; 630 size_t text_len;
631 unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; 631 unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
632 int rc = -1; 632 int rc = -1;
633 633
634 switch (reason) { 634 switch (reason) {
635 case KMSG_DUMP_RESTART: 635 case KMSG_DUMP_RESTART:
636 case KMSG_DUMP_HALT: 636 case KMSG_DUMP_HALT:
637 case KMSG_DUMP_POWEROFF: 637 case KMSG_DUMP_POWEROFF:
638 /* These are almost always orderly shutdowns. */ 638 /* These are almost always orderly shutdowns. */
639 return; 639 return;
640 case KMSG_DUMP_OOPS: 640 case KMSG_DUMP_OOPS:
641 case KMSG_DUMP_KEXEC:
642 break; 641 break;
643 case KMSG_DUMP_PANIC: 642 case KMSG_DUMP_PANIC:
644 panicking = true; 643 panicking = true;
645 break; 644 break;
646 case KMSG_DUMP_EMERG: 645 case KMSG_DUMP_EMERG:
647 if (panicking) 646 if (panicking)
648 /* Panic report already captured. */ 647 /* Panic report already captured. */
649 return; 648 return;
650 break; 649 break;
651 default: 650 default:
652 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", 651 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
653 __FUNCTION__, (int) reason); 652 __FUNCTION__, (int) reason);
654 return; 653 return;
655 } 654 }
656 655
657 if (clobbering_unread_rtas_event()) 656 if (clobbering_unread_rtas_event())
658 return; 657 return;
659 658
660 if (!spin_trylock_irqsave(&lock, flags)) 659 if (!spin_trylock_irqsave(&lock, flags))
661 return; 660 return;
662 661
663 if (big_oops_buf) { 662 if (big_oops_buf) {
664 text_len = capture_last_msgs(old_msgs, old_len, 663 text_len = capture_last_msgs(old_msgs, old_len,
665 new_msgs, new_len, big_oops_buf, big_oops_buf_sz); 664 new_msgs, new_len, big_oops_buf, big_oops_buf_sz);
666 text_len = elide_severities(big_oops_buf, text_len); 665 text_len = elide_severities(big_oops_buf, text_len);
667 rc = zip_oops(text_len); 666 rc = zip_oops(text_len);
668 } 667 }
669 if (rc != 0) { 668 if (rc != 0) {
670 text_len = capture_last_msgs(old_msgs, old_len, 669 text_len = capture_last_msgs(old_msgs, old_len,
671 new_msgs, new_len, oops_data, oops_data_sz); 670 new_msgs, new_len, oops_data, oops_data_sz);
672 err_type = ERR_TYPE_KERNEL_PANIC; 671 err_type = ERR_TYPE_KERNEL_PANIC;
673 *oops_len = (u16) text_len; 672 *oops_len = (u16) text_len;
674 } 673 }
675 674
676 (void) nvram_write_os_partition(&oops_log_partition, oops_buf, 675 (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
677 (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); 676 (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count);
678 677
679 spin_unlock_irqrestore(&lock, flags); 678 spin_unlock_irqrestore(&lock, flags);
680 } 679 }
681 680
drivers/char/ramoops.c
1 /* 1 /*
2 * RAM Oops/Panic logger 2 * RAM Oops/Panic logger
3 * 3 *
4 * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com> 4 * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com>
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation. 8 * version 2 as published by the Free Software Foundation.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, but 10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details. 13 * General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA 18 * 02110-1301 USA
19 * 19 *
20 */ 20 */
21 21
22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
23 23
24 #include <linux/kernel.h> 24 #include <linux/kernel.h>
25 #include <linux/err.h> 25 #include <linux/err.h>
26 #include <linux/module.h> 26 #include <linux/module.h>
27 #include <linux/kmsg_dump.h> 27 #include <linux/kmsg_dump.h>
28 #include <linux/time.h> 28 #include <linux/time.h>
29 #include <linux/err.h> 29 #include <linux/err.h>
30 #include <linux/io.h> 30 #include <linux/io.h>
31 #include <linux/ioport.h> 31 #include <linux/ioport.h>
32 #include <linux/platform_device.h> 32 #include <linux/platform_device.h>
33 #include <linux/slab.h> 33 #include <linux/slab.h>
34 #include <linux/ramoops.h> 34 #include <linux/ramoops.h>
35 35
36 #define RAMOOPS_KERNMSG_HDR "====" 36 #define RAMOOPS_KERNMSG_HDR "===="
37 #define MIN_MEM_SIZE 4096UL 37 #define MIN_MEM_SIZE 4096UL
38 38
39 static ulong record_size = MIN_MEM_SIZE; 39 static ulong record_size = MIN_MEM_SIZE;
40 module_param(record_size, ulong, 0400); 40 module_param(record_size, ulong, 0400);
41 MODULE_PARM_DESC(record_size, 41 MODULE_PARM_DESC(record_size,
42 "size of each dump done on oops/panic"); 42 "size of each dump done on oops/panic");
43 43
44 static ulong mem_address; 44 static ulong mem_address;
45 module_param(mem_address, ulong, 0400); 45 module_param(mem_address, ulong, 0400);
46 MODULE_PARM_DESC(mem_address, 46 MODULE_PARM_DESC(mem_address,
47 "start of reserved RAM used to store oops/panic logs"); 47 "start of reserved RAM used to store oops/panic logs");
48 48
49 static ulong mem_size; 49 static ulong mem_size;
50 module_param(mem_size, ulong, 0400); 50 module_param(mem_size, ulong, 0400);
51 MODULE_PARM_DESC(mem_size, 51 MODULE_PARM_DESC(mem_size,
52 "size of reserved RAM used to store oops/panic logs"); 52 "size of reserved RAM used to store oops/panic logs");
53 53
54 static int dump_oops = 1; 54 static int dump_oops = 1;
55 module_param(dump_oops, int, 0600); 55 module_param(dump_oops, int, 0600);
56 MODULE_PARM_DESC(dump_oops, 56 MODULE_PARM_DESC(dump_oops,
57 "set to 1 to dump oopses, 0 to only dump panics (default 1)"); 57 "set to 1 to dump oopses, 0 to only dump panics (default 1)");
58 58
59 static struct ramoops_context { 59 static struct ramoops_context {
60 struct kmsg_dumper dump; 60 struct kmsg_dumper dump;
61 void *virt_addr; 61 void *virt_addr;
62 phys_addr_t phys_addr; 62 phys_addr_t phys_addr;
63 unsigned long size; 63 unsigned long size;
64 unsigned long record_size; 64 unsigned long record_size;
65 int dump_oops; 65 int dump_oops;
66 int count; 66 int count;
67 int max_count; 67 int max_count;
68 } oops_cxt; 68 } oops_cxt;
69 69
70 static struct platform_device *dummy; 70 static struct platform_device *dummy;
71 static struct ramoops_platform_data *dummy_data; 71 static struct ramoops_platform_data *dummy_data;
72 72
73 static void ramoops_do_dump(struct kmsg_dumper *dumper, 73 static void ramoops_do_dump(struct kmsg_dumper *dumper,
74 enum kmsg_dump_reason reason, const char *s1, unsigned long l1, 74 enum kmsg_dump_reason reason, const char *s1, unsigned long l1,
75 const char *s2, unsigned long l2) 75 const char *s2, unsigned long l2)
76 { 76 {
77 struct ramoops_context *cxt = container_of(dumper, 77 struct ramoops_context *cxt = container_of(dumper,
78 struct ramoops_context, dump); 78 struct ramoops_context, dump);
79 unsigned long s1_start, s2_start; 79 unsigned long s1_start, s2_start;
80 unsigned long l1_cpy, l2_cpy; 80 unsigned long l1_cpy, l2_cpy;
81 int res, hdr_size; 81 int res, hdr_size;
82 char *buf, *buf_orig; 82 char *buf, *buf_orig;
83 struct timeval timestamp; 83 struct timeval timestamp;
84 84
85 if (reason != KMSG_DUMP_OOPS && 85 if (reason != KMSG_DUMP_OOPS &&
86 reason != KMSG_DUMP_PANIC && 86 reason != KMSG_DUMP_PANIC)
87 reason != KMSG_DUMP_KEXEC)
88 return; 87 return;
89 88
90 /* Only dump oopses if dump_oops is set */ 89 /* Only dump oopses if dump_oops is set */
91 if (reason == KMSG_DUMP_OOPS && !cxt->dump_oops) 90 if (reason == KMSG_DUMP_OOPS && !cxt->dump_oops)
92 return; 91 return;
93 92
94 buf = cxt->virt_addr + (cxt->count * cxt->record_size); 93 buf = cxt->virt_addr + (cxt->count * cxt->record_size);
95 buf_orig = buf; 94 buf_orig = buf;
96 95
97 memset(buf, '\0', cxt->record_size); 96 memset(buf, '\0', cxt->record_size);
98 res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR); 97 res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR);
99 buf += res; 98 buf += res;
100 do_gettimeofday(&timestamp); 99 do_gettimeofday(&timestamp);
101 res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec); 100 res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec);
102 buf += res; 101 buf += res;
103 102
104 hdr_size = buf - buf_orig; 103 hdr_size = buf - buf_orig;
105 l2_cpy = min(l2, cxt->record_size - hdr_size); 104 l2_cpy = min(l2, cxt->record_size - hdr_size);
106 l1_cpy = min(l1, cxt->record_size - hdr_size - l2_cpy); 105 l1_cpy = min(l1, cxt->record_size - hdr_size - l2_cpy);
107 106
108 s2_start = l2 - l2_cpy; 107 s2_start = l2 - l2_cpy;
109 s1_start = l1 - l1_cpy; 108 s1_start = l1 - l1_cpy;
110 109
111 memcpy(buf, s1 + s1_start, l1_cpy); 110 memcpy(buf, s1 + s1_start, l1_cpy);
112 memcpy(buf + l1_cpy, s2 + s2_start, l2_cpy); 111 memcpy(buf + l1_cpy, s2 + s2_start, l2_cpy);
113 112
114 cxt->count = (cxt->count + 1) % cxt->max_count; 113 cxt->count = (cxt->count + 1) % cxt->max_count;
115 } 114 }
116 115
117 static int __init ramoops_probe(struct platform_device *pdev) 116 static int __init ramoops_probe(struct platform_device *pdev)
118 { 117 {
119 struct ramoops_platform_data *pdata = pdev->dev.platform_data; 118 struct ramoops_platform_data *pdata = pdev->dev.platform_data;
120 struct ramoops_context *cxt = &oops_cxt; 119 struct ramoops_context *cxt = &oops_cxt;
121 int err = -EINVAL; 120 int err = -EINVAL;
122 121
123 if (!pdata->mem_size || !pdata->record_size) { 122 if (!pdata->mem_size || !pdata->record_size) {
124 pr_err("The memory size and the record size must be " 123 pr_err("The memory size and the record size must be "
125 "non-zero\n"); 124 "non-zero\n");
126 goto fail3; 125 goto fail3;
127 } 126 }
128 127
129 rounddown_pow_of_two(pdata->mem_size); 128 rounddown_pow_of_two(pdata->mem_size);
130 rounddown_pow_of_two(pdata->record_size); 129 rounddown_pow_of_two(pdata->record_size);
131 130
132 /* Check for the minimum memory size */ 131 /* Check for the minimum memory size */
133 if (pdata->mem_size < MIN_MEM_SIZE && 132 if (pdata->mem_size < MIN_MEM_SIZE &&
134 pdata->record_size < MIN_MEM_SIZE) { 133 pdata->record_size < MIN_MEM_SIZE) {
135 pr_err("memory size too small, minium is %lu\n", MIN_MEM_SIZE); 134 pr_err("memory size too small, minium is %lu\n", MIN_MEM_SIZE);
136 goto fail3; 135 goto fail3;
137 } 136 }
138 137
139 if (pdata->mem_size < pdata->record_size) { 138 if (pdata->mem_size < pdata->record_size) {
140 pr_err("The memory size must be larger than the " 139 pr_err("The memory size must be larger than the "
141 "records size\n"); 140 "records size\n");
142 goto fail3; 141 goto fail3;
143 } 142 }
144 143
145 cxt->max_count = pdata->mem_size / pdata->record_size; 144 cxt->max_count = pdata->mem_size / pdata->record_size;
146 cxt->count = 0; 145 cxt->count = 0;
147 cxt->size = pdata->mem_size; 146 cxt->size = pdata->mem_size;
148 cxt->phys_addr = pdata->mem_address; 147 cxt->phys_addr = pdata->mem_address;
149 cxt->record_size = pdata->record_size; 148 cxt->record_size = pdata->record_size;
150 cxt->dump_oops = pdata->dump_oops; 149 cxt->dump_oops = pdata->dump_oops;
151 /* 150 /*
152 * Update the module parameter variables as well so they are visible 151 * Update the module parameter variables as well so they are visible
153 * through /sys/module/ramoops/parameters/ 152 * through /sys/module/ramoops/parameters/
154 */ 153 */
155 mem_size = pdata->mem_size; 154 mem_size = pdata->mem_size;
156 mem_address = pdata->mem_address; 155 mem_address = pdata->mem_address;
157 record_size = pdata->record_size; 156 record_size = pdata->record_size;
158 dump_oops = pdata->dump_oops; 157 dump_oops = pdata->dump_oops;
159 158
160 if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { 159 if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
161 pr_err("request mem region failed\n"); 160 pr_err("request mem region failed\n");
162 err = -EINVAL; 161 err = -EINVAL;
163 goto fail3; 162 goto fail3;
164 } 163 }
165 164
166 cxt->virt_addr = ioremap(cxt->phys_addr, cxt->size); 165 cxt->virt_addr = ioremap(cxt->phys_addr, cxt->size);
167 if (!cxt->virt_addr) { 166 if (!cxt->virt_addr) {
168 pr_err("ioremap failed\n"); 167 pr_err("ioremap failed\n");
169 goto fail2; 168 goto fail2;
170 } 169 }
171 170
172 cxt->dump.dump = ramoops_do_dump; 171 cxt->dump.dump = ramoops_do_dump;
173 err = kmsg_dump_register(&cxt->dump); 172 err = kmsg_dump_register(&cxt->dump);
174 if (err) { 173 if (err) {
175 pr_err("registering kmsg dumper failed\n"); 174 pr_err("registering kmsg dumper failed\n");
176 goto fail1; 175 goto fail1;
177 } 176 }
178 177
179 return 0; 178 return 0;
180 179
181 fail1: 180 fail1:
182 iounmap(cxt->virt_addr); 181 iounmap(cxt->virt_addr);
183 fail2: 182 fail2:
184 release_mem_region(cxt->phys_addr, cxt->size); 183 release_mem_region(cxt->phys_addr, cxt->size);
185 fail3: 184 fail3:
186 return err; 185 return err;
187 } 186 }
188 187
189 static int __exit ramoops_remove(struct platform_device *pdev) 188 static int __exit ramoops_remove(struct platform_device *pdev)
190 { 189 {
191 struct ramoops_context *cxt = &oops_cxt; 190 struct ramoops_context *cxt = &oops_cxt;
192 191
193 if (kmsg_dump_unregister(&cxt->dump) < 0) 192 if (kmsg_dump_unregister(&cxt->dump) < 0)
194 pr_warn("could not unregister kmsg_dumper\n"); 193 pr_warn("could not unregister kmsg_dumper\n");
195 194
196 iounmap(cxt->virt_addr); 195 iounmap(cxt->virt_addr);
197 release_mem_region(cxt->phys_addr, cxt->size); 196 release_mem_region(cxt->phys_addr, cxt->size);
198 return 0; 197 return 0;
199 } 198 }
200 199
201 static struct platform_driver ramoops_driver = { 200 static struct platform_driver ramoops_driver = {
202 .remove = __exit_p(ramoops_remove), 201 .remove = __exit_p(ramoops_remove),
203 .driver = { 202 .driver = {
204 .name = "ramoops", 203 .name = "ramoops",
205 .owner = THIS_MODULE, 204 .owner = THIS_MODULE,
206 }, 205 },
207 }; 206 };
208 207
209 static int __init ramoops_init(void) 208 static int __init ramoops_init(void)
210 { 209 {
211 int ret; 210 int ret;
212 ret = platform_driver_probe(&ramoops_driver, ramoops_probe); 211 ret = platform_driver_probe(&ramoops_driver, ramoops_probe);
213 if (ret == -ENODEV) { 212 if (ret == -ENODEV) {
214 /* 213 /*
215 * If we didn't find a platform device, we use module parameters 214 * If we didn't find a platform device, we use module parameters
216 * building platform data on the fly. 215 * building platform data on the fly.
217 */ 216 */
218 pr_info("platform device not found, using module parameters\n"); 217 pr_info("platform device not found, using module parameters\n");
219 dummy_data = kzalloc(sizeof(struct ramoops_platform_data), 218 dummy_data = kzalloc(sizeof(struct ramoops_platform_data),
220 GFP_KERNEL); 219 GFP_KERNEL);
221 if (!dummy_data) 220 if (!dummy_data)
222 return -ENOMEM; 221 return -ENOMEM;
223 dummy_data->mem_size = mem_size; 222 dummy_data->mem_size = mem_size;
224 dummy_data->mem_address = mem_address; 223 dummy_data->mem_address = mem_address;
225 dummy_data->record_size = record_size; 224 dummy_data->record_size = record_size;
226 dummy_data->dump_oops = dump_oops; 225 dummy_data->dump_oops = dump_oops;
227 dummy = platform_create_bundle(&ramoops_driver, ramoops_probe, 226 dummy = platform_create_bundle(&ramoops_driver, ramoops_probe,
228 NULL, 0, dummy_data, 227 NULL, 0, dummy_data,
229 sizeof(struct ramoops_platform_data)); 228 sizeof(struct ramoops_platform_data));
230 229
231 if (IS_ERR(dummy)) 230 if (IS_ERR(dummy))
232 ret = PTR_ERR(dummy); 231 ret = PTR_ERR(dummy);
233 else 232 else
234 ret = 0; 233 ret = 0;
235 } 234 }
236 235
237 return ret; 236 return ret;
238 } 237 }
239 238
240 static void __exit ramoops_exit(void) 239 static void __exit ramoops_exit(void)
241 { 240 {
242 platform_driver_unregister(&ramoops_driver); 241 platform_driver_unregister(&ramoops_driver);
243 kfree(dummy_data); 242 kfree(dummy_data);
244 } 243 }
245 244
246 module_init(ramoops_init); 245 module_init(ramoops_init);
247 module_exit(ramoops_exit); 246 module_exit(ramoops_exit);
248 247
249 MODULE_LICENSE("GPL"); 248 MODULE_LICENSE("GPL");
250 MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>"); 249 MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>");
251 MODULE_DESCRIPTION("RAM Oops/Panic logger/driver"); 250 MODULE_DESCRIPTION("RAM Oops/Panic logger/driver");
252 251
drivers/mtd/mtdoops.c
1 /* 1 /*
2 * MTD Oops/Panic logger 2 * MTD Oops/Panic logger
3 * 3 *
4 * Copyright © 2007 Nokia Corporation. All rights reserved. 4 * Copyright © 2007 Nokia Corporation. All rights reserved.
5 * 5 *
6 * Author: Richard Purdie <rpurdie@openedhand.com> 6 * Author: Richard Purdie <rpurdie@openedhand.com>
7 * 7 *
8 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License 9 * modify it under the terms of the GNU General Public License
10 * version 2 as published by the Free Software Foundation. 10 * version 2 as published by the Free Software Foundation.
11 * 11 *
12 * This program is distributed in the hope that it will be useful, but 12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of 13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details. 15 * General Public License for more details.
16 * 16 *
17 * You should have received a copy of the GNU General Public License 17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software 18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
20 * 02110-1301 USA 20 * 02110-1301 USA
21 * 21 *
22 */ 22 */
23 23
24 #include <linux/kernel.h> 24 #include <linux/kernel.h>
25 #include <linux/module.h> 25 #include <linux/module.h>
26 #include <linux/console.h> 26 #include <linux/console.h>
27 #include <linux/vmalloc.h> 27 #include <linux/vmalloc.h>
28 #include <linux/workqueue.h> 28 #include <linux/workqueue.h>
29 #include <linux/sched.h> 29 #include <linux/sched.h>
30 #include <linux/wait.h> 30 #include <linux/wait.h>
31 #include <linux/delay.h> 31 #include <linux/delay.h>
32 #include <linux/interrupt.h> 32 #include <linux/interrupt.h>
33 #include <linux/mtd/mtd.h> 33 #include <linux/mtd/mtd.h>
34 #include <linux/kmsg_dump.h> 34 #include <linux/kmsg_dump.h>
35 35
36 /* Maximum MTD partition size */ 36 /* Maximum MTD partition size */
37 #define MTDOOPS_MAX_MTD_SIZE (8 * 1024 * 1024) 37 #define MTDOOPS_MAX_MTD_SIZE (8 * 1024 * 1024)
38 38
39 #define MTDOOPS_KERNMSG_MAGIC 0x5d005d00 39 #define MTDOOPS_KERNMSG_MAGIC 0x5d005d00
40 #define MTDOOPS_HEADER_SIZE 8 40 #define MTDOOPS_HEADER_SIZE 8
41 41
42 static unsigned long record_size = 4096; 42 static unsigned long record_size = 4096;
43 module_param(record_size, ulong, 0400); 43 module_param(record_size, ulong, 0400);
44 MODULE_PARM_DESC(record_size, 44 MODULE_PARM_DESC(record_size,
45 "record size for MTD OOPS pages in bytes (default 4096)"); 45 "record size for MTD OOPS pages in bytes (default 4096)");
46 46
47 static char mtddev[80]; 47 static char mtddev[80];
48 module_param_string(mtddev, mtddev, 80, 0400); 48 module_param_string(mtddev, mtddev, 80, 0400);
49 MODULE_PARM_DESC(mtddev, 49 MODULE_PARM_DESC(mtddev,
50 "name or index number of the MTD device to use"); 50 "name or index number of the MTD device to use");
51 51
52 static int dump_oops = 1; 52 static int dump_oops = 1;
53 module_param(dump_oops, int, 0600); 53 module_param(dump_oops, int, 0600);
54 MODULE_PARM_DESC(dump_oops, 54 MODULE_PARM_DESC(dump_oops,
55 "set to 1 to dump oopses, 0 to only dump panics (default 1)"); 55 "set to 1 to dump oopses, 0 to only dump panics (default 1)");
56 56
57 static struct mtdoops_context { 57 static struct mtdoops_context {
58 struct kmsg_dumper dump; 58 struct kmsg_dumper dump;
59 59
60 int mtd_index; 60 int mtd_index;
61 struct work_struct work_erase; 61 struct work_struct work_erase;
62 struct work_struct work_write; 62 struct work_struct work_write;
63 struct mtd_info *mtd; 63 struct mtd_info *mtd;
64 int oops_pages; 64 int oops_pages;
65 int nextpage; 65 int nextpage;
66 int nextcount; 66 int nextcount;
67 unsigned long *oops_page_used; 67 unsigned long *oops_page_used;
68 68
69 void *oops_buf; 69 void *oops_buf;
70 } oops_cxt; 70 } oops_cxt;
71 71
72 static void mark_page_used(struct mtdoops_context *cxt, int page) 72 static void mark_page_used(struct mtdoops_context *cxt, int page)
73 { 73 {
74 set_bit(page, cxt->oops_page_used); 74 set_bit(page, cxt->oops_page_used);
75 } 75 }
76 76
77 static void mark_page_unused(struct mtdoops_context *cxt, int page) 77 static void mark_page_unused(struct mtdoops_context *cxt, int page)
78 { 78 {
79 clear_bit(page, cxt->oops_page_used); 79 clear_bit(page, cxt->oops_page_used);
80 } 80 }
81 81
82 static int page_is_used(struct mtdoops_context *cxt, int page) 82 static int page_is_used(struct mtdoops_context *cxt, int page)
83 { 83 {
84 return test_bit(page, cxt->oops_page_used); 84 return test_bit(page, cxt->oops_page_used);
85 } 85 }
86 86
87 static void mtdoops_erase_callback(struct erase_info *done) 87 static void mtdoops_erase_callback(struct erase_info *done)
88 { 88 {
89 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv; 89 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv;
90 wake_up(wait_q); 90 wake_up(wait_q);
91 } 91 }
92 92
93 static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset) 93 static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset)
94 { 94 {
95 struct mtd_info *mtd = cxt->mtd; 95 struct mtd_info *mtd = cxt->mtd;
96 u32 start_page_offset = mtd_div_by_eb(offset, mtd) * mtd->erasesize; 96 u32 start_page_offset = mtd_div_by_eb(offset, mtd) * mtd->erasesize;
97 u32 start_page = start_page_offset / record_size; 97 u32 start_page = start_page_offset / record_size;
98 u32 erase_pages = mtd->erasesize / record_size; 98 u32 erase_pages = mtd->erasesize / record_size;
99 struct erase_info erase; 99 struct erase_info erase;
100 DECLARE_WAITQUEUE(wait, current); 100 DECLARE_WAITQUEUE(wait, current);
101 wait_queue_head_t wait_q; 101 wait_queue_head_t wait_q;
102 int ret; 102 int ret;
103 int page; 103 int page;
104 104
105 init_waitqueue_head(&wait_q); 105 init_waitqueue_head(&wait_q);
106 erase.mtd = mtd; 106 erase.mtd = mtd;
107 erase.callback = mtdoops_erase_callback; 107 erase.callback = mtdoops_erase_callback;
108 erase.addr = offset; 108 erase.addr = offset;
109 erase.len = mtd->erasesize; 109 erase.len = mtd->erasesize;
110 erase.priv = (u_long)&wait_q; 110 erase.priv = (u_long)&wait_q;
111 111
112 set_current_state(TASK_INTERRUPTIBLE); 112 set_current_state(TASK_INTERRUPTIBLE);
113 add_wait_queue(&wait_q, &wait); 113 add_wait_queue(&wait_q, &wait);
114 114
115 ret = mtd_erase(mtd, &erase); 115 ret = mtd_erase(mtd, &erase);
116 if (ret) { 116 if (ret) {
117 set_current_state(TASK_RUNNING); 117 set_current_state(TASK_RUNNING);
118 remove_wait_queue(&wait_q, &wait); 118 remove_wait_queue(&wait_q, &wait);
119 printk(KERN_WARNING "mtdoops: erase of region [0x%llx, 0x%llx] on \"%s\" failed\n", 119 printk(KERN_WARNING "mtdoops: erase of region [0x%llx, 0x%llx] on \"%s\" failed\n",
120 (unsigned long long)erase.addr, 120 (unsigned long long)erase.addr,
121 (unsigned long long)erase.len, mtddev); 121 (unsigned long long)erase.len, mtddev);
122 return ret; 122 return ret;
123 } 123 }
124 124
125 schedule(); /* Wait for erase to finish. */ 125 schedule(); /* Wait for erase to finish. */
126 remove_wait_queue(&wait_q, &wait); 126 remove_wait_queue(&wait_q, &wait);
127 127
128 /* Mark pages as unused */ 128 /* Mark pages as unused */
129 for (page = start_page; page < start_page + erase_pages; page++) 129 for (page = start_page; page < start_page + erase_pages; page++)
130 mark_page_unused(cxt, page); 130 mark_page_unused(cxt, page);
131 131
132 return 0; 132 return 0;
133 } 133 }
134 134
135 static void mtdoops_inc_counter(struct mtdoops_context *cxt) 135 static void mtdoops_inc_counter(struct mtdoops_context *cxt)
136 { 136 {
137 cxt->nextpage++; 137 cxt->nextpage++;
138 if (cxt->nextpage >= cxt->oops_pages) 138 if (cxt->nextpage >= cxt->oops_pages)
139 cxt->nextpage = 0; 139 cxt->nextpage = 0;
140 cxt->nextcount++; 140 cxt->nextcount++;
141 if (cxt->nextcount == 0xffffffff) 141 if (cxt->nextcount == 0xffffffff)
142 cxt->nextcount = 0; 142 cxt->nextcount = 0;
143 143
144 if (page_is_used(cxt, cxt->nextpage)) { 144 if (page_is_used(cxt, cxt->nextpage)) {
145 schedule_work(&cxt->work_erase); 145 schedule_work(&cxt->work_erase);
146 return; 146 return;
147 } 147 }
148 148
149 printk(KERN_DEBUG "mtdoops: ready %d, %d (no erase)\n", 149 printk(KERN_DEBUG "mtdoops: ready %d, %d (no erase)\n",
150 cxt->nextpage, cxt->nextcount); 150 cxt->nextpage, cxt->nextcount);
151 } 151 }
152 152
153 /* Scheduled work - when we can't proceed without erasing a block */ 153 /* Scheduled work - when we can't proceed without erasing a block */
154 static void mtdoops_workfunc_erase(struct work_struct *work) 154 static void mtdoops_workfunc_erase(struct work_struct *work)
155 { 155 {
156 struct mtdoops_context *cxt = 156 struct mtdoops_context *cxt =
157 container_of(work, struct mtdoops_context, work_erase); 157 container_of(work, struct mtdoops_context, work_erase);
158 struct mtd_info *mtd = cxt->mtd; 158 struct mtd_info *mtd = cxt->mtd;
159 int i = 0, j, ret, mod; 159 int i = 0, j, ret, mod;
160 160
161 /* We were unregistered */ 161 /* We were unregistered */
162 if (!mtd) 162 if (!mtd)
163 return; 163 return;
164 164
165 mod = (cxt->nextpage * record_size) % mtd->erasesize; 165 mod = (cxt->nextpage * record_size) % mtd->erasesize;
166 if (mod != 0) { 166 if (mod != 0) {
167 cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / record_size); 167 cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / record_size);
168 if (cxt->nextpage >= cxt->oops_pages) 168 if (cxt->nextpage >= cxt->oops_pages)
169 cxt->nextpage = 0; 169 cxt->nextpage = 0;
170 } 170 }
171 171
172 while (mtd_can_have_bb(mtd)) { 172 while (mtd_can_have_bb(mtd)) {
173 ret = mtd_block_isbad(mtd, cxt->nextpage * record_size); 173 ret = mtd_block_isbad(mtd, cxt->nextpage * record_size);
174 if (!ret) 174 if (!ret)
175 break; 175 break;
176 if (ret < 0) { 176 if (ret < 0) {
177 printk(KERN_ERR "mtdoops: block_isbad failed, aborting\n"); 177 printk(KERN_ERR "mtdoops: block_isbad failed, aborting\n");
178 return; 178 return;
179 } 179 }
180 badblock: 180 badblock:
181 printk(KERN_WARNING "mtdoops: bad block at %08lx\n", 181 printk(KERN_WARNING "mtdoops: bad block at %08lx\n",
182 cxt->nextpage * record_size); 182 cxt->nextpage * record_size);
183 i++; 183 i++;
184 cxt->nextpage = cxt->nextpage + (mtd->erasesize / record_size); 184 cxt->nextpage = cxt->nextpage + (mtd->erasesize / record_size);
185 if (cxt->nextpage >= cxt->oops_pages) 185 if (cxt->nextpage >= cxt->oops_pages)
186 cxt->nextpage = 0; 186 cxt->nextpage = 0;
187 if (i == cxt->oops_pages / (mtd->erasesize / record_size)) { 187 if (i == cxt->oops_pages / (mtd->erasesize / record_size)) {
188 printk(KERN_ERR "mtdoops: all blocks bad!\n"); 188 printk(KERN_ERR "mtdoops: all blocks bad!\n");
189 return; 189 return;
190 } 190 }
191 } 191 }
192 192
193 for (j = 0, ret = -1; (j < 3) && (ret < 0); j++) 193 for (j = 0, ret = -1; (j < 3) && (ret < 0); j++)
194 ret = mtdoops_erase_block(cxt, cxt->nextpage * record_size); 194 ret = mtdoops_erase_block(cxt, cxt->nextpage * record_size);
195 195
196 if (ret >= 0) { 196 if (ret >= 0) {
197 printk(KERN_DEBUG "mtdoops: ready %d, %d\n", 197 printk(KERN_DEBUG "mtdoops: ready %d, %d\n",
198 cxt->nextpage, cxt->nextcount); 198 cxt->nextpage, cxt->nextcount);
199 return; 199 return;
200 } 200 }
201 201
202 if (mtd_can_have_bb(mtd) && ret == -EIO) { 202 if (mtd_can_have_bb(mtd) && ret == -EIO) {
203 ret = mtd_block_markbad(mtd, cxt->nextpage * record_size); 203 ret = mtd_block_markbad(mtd, cxt->nextpage * record_size);
204 if (ret < 0) { 204 if (ret < 0) {
205 printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n"); 205 printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n");
206 return; 206 return;
207 } 207 }
208 } 208 }
209 goto badblock; 209 goto badblock;
210 } 210 }
211 211
212 static void mtdoops_write(struct mtdoops_context *cxt, int panic) 212 static void mtdoops_write(struct mtdoops_context *cxt, int panic)
213 { 213 {
214 struct mtd_info *mtd = cxt->mtd; 214 struct mtd_info *mtd = cxt->mtd;
215 size_t retlen; 215 size_t retlen;
216 u32 *hdr; 216 u32 *hdr;
217 int ret; 217 int ret;
218 218
219 /* Add mtdoops header to the buffer */ 219 /* Add mtdoops header to the buffer */
220 hdr = cxt->oops_buf; 220 hdr = cxt->oops_buf;
221 hdr[0] = cxt->nextcount; 221 hdr[0] = cxt->nextcount;
222 hdr[1] = MTDOOPS_KERNMSG_MAGIC; 222 hdr[1] = MTDOOPS_KERNMSG_MAGIC;
223 223
224 if (panic) { 224 if (panic) {
225 ret = mtd_panic_write(mtd, cxt->nextpage * record_size, 225 ret = mtd_panic_write(mtd, cxt->nextpage * record_size,
226 record_size, &retlen, cxt->oops_buf); 226 record_size, &retlen, cxt->oops_buf);
227 if (ret == -EOPNOTSUPP) { 227 if (ret == -EOPNOTSUPP) {
228 printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n"); 228 printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
229 return; 229 return;
230 } 230 }
231 } else 231 } else
232 ret = mtd_write(mtd, cxt->nextpage * record_size, 232 ret = mtd_write(mtd, cxt->nextpage * record_size,
233 record_size, &retlen, cxt->oops_buf); 233 record_size, &retlen, cxt->oops_buf);
234 234
235 if (retlen != record_size || ret < 0) 235 if (retlen != record_size || ret < 0)
236 printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n", 236 printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n",
237 cxt->nextpage * record_size, retlen, record_size, ret); 237 cxt->nextpage * record_size, retlen, record_size, ret);
238 mark_page_used(cxt, cxt->nextpage); 238 mark_page_used(cxt, cxt->nextpage);
239 memset(cxt->oops_buf, 0xff, record_size); 239 memset(cxt->oops_buf, 0xff, record_size);
240 240
241 mtdoops_inc_counter(cxt); 241 mtdoops_inc_counter(cxt);
242 } 242 }
243 243
244 static void mtdoops_workfunc_write(struct work_struct *work) 244 static void mtdoops_workfunc_write(struct work_struct *work)
245 { 245 {
246 struct mtdoops_context *cxt = 246 struct mtdoops_context *cxt =
247 container_of(work, struct mtdoops_context, work_write); 247 container_of(work, struct mtdoops_context, work_write);
248 248
249 mtdoops_write(cxt, 0); 249 mtdoops_write(cxt, 0);
250 } 250 }
251 251
252 static void find_next_position(struct mtdoops_context *cxt) 252 static void find_next_position(struct mtdoops_context *cxt)
253 { 253 {
254 struct mtd_info *mtd = cxt->mtd; 254 struct mtd_info *mtd = cxt->mtd;
255 int ret, page, maxpos = 0; 255 int ret, page, maxpos = 0;
256 u32 count[2], maxcount = 0xffffffff; 256 u32 count[2], maxcount = 0xffffffff;
257 size_t retlen; 257 size_t retlen;
258 258
259 for (page = 0; page < cxt->oops_pages; page++) { 259 for (page = 0; page < cxt->oops_pages; page++) {
260 if (mtd_can_have_bb(mtd) && 260 if (mtd_can_have_bb(mtd) &&
261 mtd_block_isbad(mtd, page * record_size)) 261 mtd_block_isbad(mtd, page * record_size))
262 continue; 262 continue;
263 /* Assume the page is used */ 263 /* Assume the page is used */
264 mark_page_used(cxt, page); 264 mark_page_used(cxt, page);
265 ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE, 265 ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE,
266 &retlen, (u_char *)&count[0]); 266 &retlen, (u_char *)&count[0]);
267 if (retlen != MTDOOPS_HEADER_SIZE || 267 if (retlen != MTDOOPS_HEADER_SIZE ||
268 (ret < 0 && !mtd_is_bitflip(ret))) { 268 (ret < 0 && !mtd_is_bitflip(ret))) {
269 printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n", 269 printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n",
270 page * record_size, retlen, 270 page * record_size, retlen,
271 MTDOOPS_HEADER_SIZE, ret); 271 MTDOOPS_HEADER_SIZE, ret);
272 continue; 272 continue;
273 } 273 }
274 274
275 if (count[0] == 0xffffffff && count[1] == 0xffffffff) 275 if (count[0] == 0xffffffff && count[1] == 0xffffffff)
276 mark_page_unused(cxt, page); 276 mark_page_unused(cxt, page);
277 if (count[0] == 0xffffffff) 277 if (count[0] == 0xffffffff)
278 continue; 278 continue;
279 if (maxcount == 0xffffffff) { 279 if (maxcount == 0xffffffff) {
280 maxcount = count[0]; 280 maxcount = count[0];
281 maxpos = page; 281 maxpos = page;
282 } else if (count[0] < 0x40000000 && maxcount > 0xc0000000) { 282 } else if (count[0] < 0x40000000 && maxcount > 0xc0000000) {
283 maxcount = count[0]; 283 maxcount = count[0];
284 maxpos = page; 284 maxpos = page;
285 } else if (count[0] > maxcount && count[0] < 0xc0000000) { 285 } else if (count[0] > maxcount && count[0] < 0xc0000000) {
286 maxcount = count[0]; 286 maxcount = count[0];
287 maxpos = page; 287 maxpos = page;
288 } else if (count[0] > maxcount && count[0] > 0xc0000000 288 } else if (count[0] > maxcount && count[0] > 0xc0000000
289 && maxcount > 0x80000000) { 289 && maxcount > 0x80000000) {
290 maxcount = count[0]; 290 maxcount = count[0];
291 maxpos = page; 291 maxpos = page;
292 } 292 }
293 } 293 }
294 if (maxcount == 0xffffffff) { 294 if (maxcount == 0xffffffff) {
295 cxt->nextpage = 0; 295 cxt->nextpage = 0;
296 cxt->nextcount = 1; 296 cxt->nextcount = 1;
297 schedule_work(&cxt->work_erase); 297 schedule_work(&cxt->work_erase);
298 return; 298 return;
299 } 299 }
300 300
301 cxt->nextpage = maxpos; 301 cxt->nextpage = maxpos;
302 cxt->nextcount = maxcount; 302 cxt->nextcount = maxcount;
303 303
304 mtdoops_inc_counter(cxt); 304 mtdoops_inc_counter(cxt);
305 } 305 }
306 306
307 static void mtdoops_do_dump(struct kmsg_dumper *dumper, 307 static void mtdoops_do_dump(struct kmsg_dumper *dumper,
308 enum kmsg_dump_reason reason, const char *s1, unsigned long l1, 308 enum kmsg_dump_reason reason, const char *s1, unsigned long l1,
309 const char *s2, unsigned long l2) 309 const char *s2, unsigned long l2)
310 { 310 {
311 struct mtdoops_context *cxt = container_of(dumper, 311 struct mtdoops_context *cxt = container_of(dumper,
312 struct mtdoops_context, dump); 312 struct mtdoops_context, dump);
313 unsigned long s1_start, s2_start; 313 unsigned long s1_start, s2_start;
314 unsigned long l1_cpy, l2_cpy; 314 unsigned long l1_cpy, l2_cpy;
315 char *dst; 315 char *dst;
316 316
317 if (reason != KMSG_DUMP_OOPS && 317 if (reason != KMSG_DUMP_OOPS &&
318 reason != KMSG_DUMP_PANIC && 318 reason != KMSG_DUMP_PANIC)
319 reason != KMSG_DUMP_KEXEC)
320 return; 319 return;
321 320
322 /* Only dump oopses if dump_oops is set */ 321 /* Only dump oopses if dump_oops is set */
323 if (reason == KMSG_DUMP_OOPS && !dump_oops) 322 if (reason == KMSG_DUMP_OOPS && !dump_oops)
324 return; 323 return;
325 324
326 dst = cxt->oops_buf + MTDOOPS_HEADER_SIZE; /* Skip the header */ 325 dst = cxt->oops_buf + MTDOOPS_HEADER_SIZE; /* Skip the header */
327 l2_cpy = min(l2, record_size - MTDOOPS_HEADER_SIZE); 326 l2_cpy = min(l2, record_size - MTDOOPS_HEADER_SIZE);
328 l1_cpy = min(l1, record_size - MTDOOPS_HEADER_SIZE - l2_cpy); 327 l1_cpy = min(l1, record_size - MTDOOPS_HEADER_SIZE - l2_cpy);
329 328
330 s2_start = l2 - l2_cpy; 329 s2_start = l2 - l2_cpy;
331 s1_start = l1 - l1_cpy; 330 s1_start = l1 - l1_cpy;
332 331
333 memcpy(dst, s1 + s1_start, l1_cpy); 332 memcpy(dst, s1 + s1_start, l1_cpy);
334 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); 333 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
335 334
336 /* Panics must be written immediately */ 335 /* Panics must be written immediately */
337 if (reason != KMSG_DUMP_OOPS) 336 if (reason != KMSG_DUMP_OOPS)
338 mtdoops_write(cxt, 1); 337 mtdoops_write(cxt, 1);
339 338
340 /* For other cases, schedule work to write it "nicely" */ 339 /* For other cases, schedule work to write it "nicely" */
341 schedule_work(&cxt->work_write); 340 schedule_work(&cxt->work_write);
342 } 341 }
343 342
344 static void mtdoops_notify_add(struct mtd_info *mtd) 343 static void mtdoops_notify_add(struct mtd_info *mtd)
345 { 344 {
346 struct mtdoops_context *cxt = &oops_cxt; 345 struct mtdoops_context *cxt = &oops_cxt;
347 u64 mtdoops_pages = div_u64(mtd->size, record_size); 346 u64 mtdoops_pages = div_u64(mtd->size, record_size);
348 int err; 347 int err;
349 348
350 if (!strcmp(mtd->name, mtddev)) 349 if (!strcmp(mtd->name, mtddev))
351 cxt->mtd_index = mtd->index; 350 cxt->mtd_index = mtd->index;
352 351
353 if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) 352 if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0)
354 return; 353 return;
355 354
356 if (mtd->size < mtd->erasesize * 2) { 355 if (mtd->size < mtd->erasesize * 2) {
357 printk(KERN_ERR "mtdoops: MTD partition %d not big enough for mtdoops\n", 356 printk(KERN_ERR "mtdoops: MTD partition %d not big enough for mtdoops\n",
358 mtd->index); 357 mtd->index);
359 return; 358 return;
360 } 359 }
361 if (mtd->erasesize < record_size) { 360 if (mtd->erasesize < record_size) {
362 printk(KERN_ERR "mtdoops: eraseblock size of MTD partition %d too small\n", 361 printk(KERN_ERR "mtdoops: eraseblock size of MTD partition %d too small\n",
363 mtd->index); 362 mtd->index);
364 return; 363 return;
365 } 364 }
366 if (mtd->size > MTDOOPS_MAX_MTD_SIZE) { 365 if (mtd->size > MTDOOPS_MAX_MTD_SIZE) {
367 printk(KERN_ERR "mtdoops: mtd%d is too large (limit is %d MiB)\n", 366 printk(KERN_ERR "mtdoops: mtd%d is too large (limit is %d MiB)\n",
368 mtd->index, MTDOOPS_MAX_MTD_SIZE / 1024 / 1024); 367 mtd->index, MTDOOPS_MAX_MTD_SIZE / 1024 / 1024);
369 return; 368 return;
370 } 369 }
371 370
372 /* oops_page_used is a bit field */ 371 /* oops_page_used is a bit field */
373 cxt->oops_page_used = vmalloc(DIV_ROUND_UP(mtdoops_pages, 372 cxt->oops_page_used = vmalloc(DIV_ROUND_UP(mtdoops_pages,
374 BITS_PER_LONG) * sizeof(unsigned long)); 373 BITS_PER_LONG) * sizeof(unsigned long));
375 if (!cxt->oops_page_used) { 374 if (!cxt->oops_page_used) {
376 printk(KERN_ERR "mtdoops: could not allocate page array\n"); 375 printk(KERN_ERR "mtdoops: could not allocate page array\n");
377 return; 376 return;
378 } 377 }
379 378
380 cxt->dump.dump = mtdoops_do_dump; 379 cxt->dump.dump = mtdoops_do_dump;
381 err = kmsg_dump_register(&cxt->dump); 380 err = kmsg_dump_register(&cxt->dump);
382 if (err) { 381 if (err) {
383 printk(KERN_ERR "mtdoops: registering kmsg dumper failed, error %d\n", err); 382 printk(KERN_ERR "mtdoops: registering kmsg dumper failed, error %d\n", err);
384 vfree(cxt->oops_page_used); 383 vfree(cxt->oops_page_used);
385 cxt->oops_page_used = NULL; 384 cxt->oops_page_used = NULL;
386 return; 385 return;
387 } 386 }
388 387
389 cxt->mtd = mtd; 388 cxt->mtd = mtd;
390 cxt->oops_pages = (int)mtd->size / record_size; 389 cxt->oops_pages = (int)mtd->size / record_size;
391 find_next_position(cxt); 390 find_next_position(cxt);
392 printk(KERN_INFO "mtdoops: Attached to MTD device %d\n", mtd->index); 391 printk(KERN_INFO "mtdoops: Attached to MTD device %d\n", mtd->index);
393 } 392 }
394 393
395 static void mtdoops_notify_remove(struct mtd_info *mtd) 394 static void mtdoops_notify_remove(struct mtd_info *mtd)
396 { 395 {
397 struct mtdoops_context *cxt = &oops_cxt; 396 struct mtdoops_context *cxt = &oops_cxt;
398 397
399 if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) 398 if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0)
400 return; 399 return;
401 400
402 if (kmsg_dump_unregister(&cxt->dump) < 0) 401 if (kmsg_dump_unregister(&cxt->dump) < 0)
403 printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n"); 402 printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n");
404 403
405 cxt->mtd = NULL; 404 cxt->mtd = NULL;
406 flush_work_sync(&cxt->work_erase); 405 flush_work_sync(&cxt->work_erase);
407 flush_work_sync(&cxt->work_write); 406 flush_work_sync(&cxt->work_write);
408 } 407 }
409 408
410 409
411 static struct mtd_notifier mtdoops_notifier = { 410 static struct mtd_notifier mtdoops_notifier = {
412 .add = mtdoops_notify_add, 411 .add = mtdoops_notify_add,
413 .remove = mtdoops_notify_remove, 412 .remove = mtdoops_notify_remove,
414 }; 413 };
415 414
416 static int __init mtdoops_init(void) 415 static int __init mtdoops_init(void)
417 { 416 {
418 struct mtdoops_context *cxt = &oops_cxt; 417 struct mtdoops_context *cxt = &oops_cxt;
419 int mtd_index; 418 int mtd_index;
420 char *endp; 419 char *endp;
421 420
422 if (strlen(mtddev) == 0) { 421 if (strlen(mtddev) == 0) {
423 printk(KERN_ERR "mtdoops: mtd device (mtddev=name/number) must be supplied\n"); 422 printk(KERN_ERR "mtdoops: mtd device (mtddev=name/number) must be supplied\n");
424 return -EINVAL; 423 return -EINVAL;
425 } 424 }
426 if ((record_size & 4095) != 0) { 425 if ((record_size & 4095) != 0) {
427 printk(KERN_ERR "mtdoops: record_size must be a multiple of 4096\n"); 426 printk(KERN_ERR "mtdoops: record_size must be a multiple of 4096\n");
428 return -EINVAL; 427 return -EINVAL;
429 } 428 }
430 if (record_size < 4096) { 429 if (record_size < 4096) {
431 printk(KERN_ERR "mtdoops: record_size must be over 4096 bytes\n"); 430 printk(KERN_ERR "mtdoops: record_size must be over 4096 bytes\n");
432 return -EINVAL; 431 return -EINVAL;
433 } 432 }
434 433
435 /* Setup the MTD device to use */ 434 /* Setup the MTD device to use */
436 cxt->mtd_index = -1; 435 cxt->mtd_index = -1;
437 mtd_index = simple_strtoul(mtddev, &endp, 0); 436 mtd_index = simple_strtoul(mtddev, &endp, 0);
438 if (*endp == '\0') 437 if (*endp == '\0')
439 cxt->mtd_index = mtd_index; 438 cxt->mtd_index = mtd_index;
440 439
441 cxt->oops_buf = vmalloc(record_size); 440 cxt->oops_buf = vmalloc(record_size);
442 if (!cxt->oops_buf) { 441 if (!cxt->oops_buf) {
443 printk(KERN_ERR "mtdoops: failed to allocate buffer workspace\n"); 442 printk(KERN_ERR "mtdoops: failed to allocate buffer workspace\n");
444 return -ENOMEM; 443 return -ENOMEM;
445 } 444 }
446 memset(cxt->oops_buf, 0xff, record_size); 445 memset(cxt->oops_buf, 0xff, record_size);
447 446
448 INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase); 447 INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase);
449 INIT_WORK(&cxt->work_write, mtdoops_workfunc_write); 448 INIT_WORK(&cxt->work_write, mtdoops_workfunc_write);
450 449
451 register_mtd_user(&mtdoops_notifier); 450 register_mtd_user(&mtdoops_notifier);
452 return 0; 451 return 0;
453 } 452 }
454 453
455 static void __exit mtdoops_exit(void) 454 static void __exit mtdoops_exit(void)
456 { 455 {
457 struct mtdoops_context *cxt = &oops_cxt; 456 struct mtdoops_context *cxt = &oops_cxt;
458 457
459 unregister_mtd_user(&mtdoops_notifier); 458 unregister_mtd_user(&mtdoops_notifier);
460 vfree(cxt->oops_buf); 459 vfree(cxt->oops_buf);
461 vfree(cxt->oops_page_used); 460 vfree(cxt->oops_page_used);
462 } 461 }
463 462
464 463
465 module_init(mtdoops_init); 464 module_init(mtdoops_init);
466 module_exit(mtdoops_exit); 465 module_exit(mtdoops_exit);
467 466
468 MODULE_LICENSE("GPL"); 467 MODULE_LICENSE("GPL");
469 MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>"); 468 MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>");
470 MODULE_DESCRIPTION("MTD Oops/Panic console logger/driver"); 469 MODULE_DESCRIPTION("MTD Oops/Panic console logger/driver");
471 470
include/linux/kmsg_dump.h
1 /* 1 /*
2 * linux/include/kmsg_dump.h 2 * linux/include/kmsg_dump.h
3 * 3 *
4 * Copyright (C) 2009 Net Insight AB 4 * Copyright (C) 2009 Net Insight AB
5 * 5 *
6 * Author: Simon Kagstrom <simon.kagstrom@netinsight.net> 6 * Author: Simon Kagstrom <simon.kagstrom@netinsight.net>
7 * 7 *
8 * This file is subject to the terms and conditions of the GNU General Public 8 * This file is subject to the terms and conditions of the GNU General Public
9 * License. See the file COPYING in the main directory of this archive 9 * License. See the file COPYING in the main directory of this archive
10 * for more details. 10 * for more details.
11 */ 11 */
12 #ifndef _LINUX_KMSG_DUMP_H 12 #ifndef _LINUX_KMSG_DUMP_H
13 #define _LINUX_KMSG_DUMP_H 13 #define _LINUX_KMSG_DUMP_H
14 14
15 #include <linux/errno.h> 15 #include <linux/errno.h>
16 #include <linux/list.h> 16 #include <linux/list.h>
17 17
18 enum kmsg_dump_reason { 18 enum kmsg_dump_reason {
19 KMSG_DUMP_OOPS, 19 KMSG_DUMP_OOPS,
20 KMSG_DUMP_PANIC, 20 KMSG_DUMP_PANIC,
21 KMSG_DUMP_KEXEC,
22 KMSG_DUMP_RESTART, 21 KMSG_DUMP_RESTART,
23 KMSG_DUMP_HALT, 22 KMSG_DUMP_HALT,
24 KMSG_DUMP_POWEROFF, 23 KMSG_DUMP_POWEROFF,
25 KMSG_DUMP_EMERG, 24 KMSG_DUMP_EMERG,
26 }; 25 };
27 26
28 /** 27 /**
29 * struct kmsg_dumper - kernel crash message dumper structure 28 * struct kmsg_dumper - kernel crash message dumper structure
30 * @dump: The callback which gets called on crashes. The buffer is passed 29 * @dump: The callback which gets called on crashes. The buffer is passed
31 * as two sections, where s1 (length l1) contains the older 30 * as two sections, where s1 (length l1) contains the older
32 * messages and s2 (length l2) contains the newer. 31 * messages and s2 (length l2) contains the newer.
33 * @list: Entry in the dumper list (private) 32 * @list: Entry in the dumper list (private)
34 * @registered: Flag that specifies if this is already registered 33 * @registered: Flag that specifies if this is already registered
35 */ 34 */
36 struct kmsg_dumper { 35 struct kmsg_dumper {
37 void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason, 36 void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
38 const char *s1, unsigned long l1, 37 const char *s1, unsigned long l1,
39 const char *s2, unsigned long l2); 38 const char *s2, unsigned long l2);
40 struct list_head list; 39 struct list_head list;
41 int registered; 40 int registered;
42 }; 41 };
43 42
44 #ifdef CONFIG_PRINTK 43 #ifdef CONFIG_PRINTK
45 void kmsg_dump(enum kmsg_dump_reason reason); 44 void kmsg_dump(enum kmsg_dump_reason reason);
46 45
47 int kmsg_dump_register(struct kmsg_dumper *dumper); 46 int kmsg_dump_register(struct kmsg_dumper *dumper);
48 47
49 int kmsg_dump_unregister(struct kmsg_dumper *dumper); 48 int kmsg_dump_unregister(struct kmsg_dumper *dumper);
50 #else 49 #else
51 static inline void kmsg_dump(enum kmsg_dump_reason reason) 50 static inline void kmsg_dump(enum kmsg_dump_reason reason)
52 { 51 {
53 } 52 }
54 53
55 static inline int kmsg_dump_register(struct kmsg_dumper *dumper) 54 static inline int kmsg_dump_register(struct kmsg_dumper *dumper)
56 { 55 {
57 return -EINVAL; 56 return -EINVAL;
58 } 57 }
59 58
60 static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper) 59 static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper)
61 { 60 {
62 return -EINVAL; 61 return -EINVAL;
63 } 62 }
64 #endif 63 #endif
65 64
66 #endif /* _LINUX_KMSG_DUMP_H */ 65 #endif /* _LINUX_KMSG_DUMP_H */
67 66
1 /* 1 /*
2 * kexec.c - kexec system call 2 * kexec.c - kexec system call
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 * 4 *
5 * This source code is licensed under the GNU General Public License, 5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9 #include <linux/capability.h> 9 #include <linux/capability.h>
10 #include <linux/mm.h> 10 #include <linux/mm.h>
11 #include <linux/file.h> 11 #include <linux/file.h>
12 #include <linux/slab.h> 12 #include <linux/slab.h>
13 #include <linux/fs.h> 13 #include <linux/fs.h>
14 #include <linux/kexec.h> 14 #include <linux/kexec.h>
15 #include <linux/mutex.h> 15 #include <linux/mutex.h>
16 #include <linux/list.h> 16 #include <linux/list.h>
17 #include <linux/highmem.h> 17 #include <linux/highmem.h>
18 #include <linux/syscalls.h> 18 #include <linux/syscalls.h>
19 #include <linux/reboot.h> 19 #include <linux/reboot.h>
20 #include <linux/ioport.h> 20 #include <linux/ioport.h>
21 #include <linux/hardirq.h> 21 #include <linux/hardirq.h>
22 #include <linux/elf.h> 22 #include <linux/elf.h>
23 #include <linux/elfcore.h> 23 #include <linux/elfcore.h>
24 #include <generated/utsrelease.h> 24 #include <generated/utsrelease.h>
25 #include <linux/utsname.h> 25 #include <linux/utsname.h>
26 #include <linux/numa.h> 26 #include <linux/numa.h>
27 #include <linux/suspend.h> 27 #include <linux/suspend.h>
28 #include <linux/device.h> 28 #include <linux/device.h>
29 #include <linux/freezer.h> 29 #include <linux/freezer.h>
30 #include <linux/pm.h> 30 #include <linux/pm.h>
31 #include <linux/cpu.h> 31 #include <linux/cpu.h>
32 #include <linux/console.h> 32 #include <linux/console.h>
33 #include <linux/vmalloc.h> 33 #include <linux/vmalloc.h>
34 #include <linux/swap.h> 34 #include <linux/swap.h>
35 #include <linux/kmsg_dump.h>
36 #include <linux/syscore_ops.h> 35 #include <linux/syscore_ops.h>
37 36
38 #include <asm/page.h> 37 #include <asm/page.h>
39 #include <asm/uaccess.h> 38 #include <asm/uaccess.h>
40 #include <asm/io.h> 39 #include <asm/io.h>
41 #include <asm/system.h> 40 #include <asm/system.h>
42 #include <asm/sections.h> 41 #include <asm/sections.h>
43 42
44 /* Per cpu memory for storing cpu states in case of system crash. */ 43 /* Per cpu memory for storing cpu states in case of system crash. */
45 note_buf_t __percpu *crash_notes; 44 note_buf_t __percpu *crash_notes;
46 45
47 /* vmcoreinfo stuff */ 46 /* vmcoreinfo stuff */
48 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 47 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
49 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 48 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
50 size_t vmcoreinfo_size; 49 size_t vmcoreinfo_size;
51 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 50 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
52 51
53 /* Location of the reserved area for the crash kernel */ 52 /* Location of the reserved area for the crash kernel */
54 struct resource crashk_res = { 53 struct resource crashk_res = {
55 .name = "Crash kernel", 54 .name = "Crash kernel",
56 .start = 0, 55 .start = 0,
57 .end = 0, 56 .end = 0,
58 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 57 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
59 }; 58 };
60 59
61 int kexec_should_crash(struct task_struct *p) 60 int kexec_should_crash(struct task_struct *p)
62 { 61 {
63 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 62 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
64 return 1; 63 return 1;
65 return 0; 64 return 0;
66 } 65 }
67 66
68 /* 67 /*
69 * When kexec transitions to the new kernel there is a one-to-one 68 * When kexec transitions to the new kernel there is a one-to-one
70 * mapping between physical and virtual addresses. On processors 69 * mapping between physical and virtual addresses. On processors
71 * where you can disable the MMU this is trivial, and easy. For 70 * where you can disable the MMU this is trivial, and easy. For
72 * others it is still a simple predictable page table to setup. 71 * others it is still a simple predictable page table to setup.
73 * 72 *
74 * In that environment kexec copies the new kernel to its final 73 * In that environment kexec copies the new kernel to its final
75 * resting place. This means I can only support memory whose 74 * resting place. This means I can only support memory whose
76 * physical address can fit in an unsigned long. In particular 75 * physical address can fit in an unsigned long. In particular
77 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 76 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
78 * If the assembly stub has more restrictive requirements 77 * If the assembly stub has more restrictive requirements
79 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 78 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
80 * defined more restrictively in <asm/kexec.h>. 79 * defined more restrictively in <asm/kexec.h>.
81 * 80 *
82 * The code for the transition from the current kernel to the 81 * The code for the transition from the current kernel to the
83 * the new kernel is placed in the control_code_buffer, whose size 82 * the new kernel is placed in the control_code_buffer, whose size
84 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single 83 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
85 * page of memory is necessary, but some architectures require more. 84 * page of memory is necessary, but some architectures require more.
86 * Because this memory must be identity mapped in the transition from 85 * Because this memory must be identity mapped in the transition from
87 * virtual to physical addresses it must live in the range 86 * virtual to physical addresses it must live in the range
88 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 87 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
89 * modifiable. 88 * modifiable.
90 * 89 *
91 * The assembly stub in the control code buffer is passed a linked list 90 * The assembly stub in the control code buffer is passed a linked list
92 * of descriptor pages detailing the source pages of the new kernel, 91 * of descriptor pages detailing the source pages of the new kernel,
93 * and the destination addresses of those source pages. As this data 92 * and the destination addresses of those source pages. As this data
94 * structure is not used in the context of the current OS, it must 93 * structure is not used in the context of the current OS, it must
95 * be self-contained. 94 * be self-contained.
96 * 95 *
97 * The code has been made to work with highmem pages and will use a 96 * The code has been made to work with highmem pages and will use a
98 * destination page in its final resting place (if it happens 97 * destination page in its final resting place (if it happens
99 * to allocate it). The end product of this is that most of the 98 * to allocate it). The end product of this is that most of the
100 * physical address space, and most of RAM can be used. 99 * physical address space, and most of RAM can be used.
101 * 100 *
102 * Future directions include: 101 * Future directions include:
103 * - allocating a page table with the control code buffer identity 102 * - allocating a page table with the control code buffer identity
104 * mapped, to simplify machine_kexec and make kexec_on_panic more 103 * mapped, to simplify machine_kexec and make kexec_on_panic more
105 * reliable. 104 * reliable.
106 */ 105 */
107 106
108 /* 107 /*
109 * KIMAGE_NO_DEST is an impossible destination address..., for 108 * KIMAGE_NO_DEST is an impossible destination address..., for
110 * allocating pages whose destination address we do not care about. 109 * allocating pages whose destination address we do not care about.
111 */ 110 */
112 #define KIMAGE_NO_DEST (-1UL) 111 #define KIMAGE_NO_DEST (-1UL)
113 112
114 static int kimage_is_destination_range(struct kimage *image, 113 static int kimage_is_destination_range(struct kimage *image,
115 unsigned long start, unsigned long end); 114 unsigned long start, unsigned long end);
116 static struct page *kimage_alloc_page(struct kimage *image, 115 static struct page *kimage_alloc_page(struct kimage *image,
117 gfp_t gfp_mask, 116 gfp_t gfp_mask,
118 unsigned long dest); 117 unsigned long dest);
119 118
120 static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, 119 static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
121 unsigned long nr_segments, 120 unsigned long nr_segments,
122 struct kexec_segment __user *segments) 121 struct kexec_segment __user *segments)
123 { 122 {
124 size_t segment_bytes; 123 size_t segment_bytes;
125 struct kimage *image; 124 struct kimage *image;
126 unsigned long i; 125 unsigned long i;
127 int result; 126 int result;
128 127
129 /* Allocate a controlling structure */ 128 /* Allocate a controlling structure */
130 result = -ENOMEM; 129 result = -ENOMEM;
131 image = kzalloc(sizeof(*image), GFP_KERNEL); 130 image = kzalloc(sizeof(*image), GFP_KERNEL);
132 if (!image) 131 if (!image)
133 goto out; 132 goto out;
134 133
135 image->head = 0; 134 image->head = 0;
136 image->entry = &image->head; 135 image->entry = &image->head;
137 image->last_entry = &image->head; 136 image->last_entry = &image->head;
138 image->control_page = ~0; /* By default this does not apply */ 137 image->control_page = ~0; /* By default this does not apply */
139 image->start = entry; 138 image->start = entry;
140 image->type = KEXEC_TYPE_DEFAULT; 139 image->type = KEXEC_TYPE_DEFAULT;
141 140
142 /* Initialize the list of control pages */ 141 /* Initialize the list of control pages */
143 INIT_LIST_HEAD(&image->control_pages); 142 INIT_LIST_HEAD(&image->control_pages);
144 143
145 /* Initialize the list of destination pages */ 144 /* Initialize the list of destination pages */
146 INIT_LIST_HEAD(&image->dest_pages); 145 INIT_LIST_HEAD(&image->dest_pages);
147 146
148 /* Initialize the list of unusable pages */ 147 /* Initialize the list of unusable pages */
149 INIT_LIST_HEAD(&image->unuseable_pages); 148 INIT_LIST_HEAD(&image->unuseable_pages);
150 149
151 /* Read in the segments */ 150 /* Read in the segments */
152 image->nr_segments = nr_segments; 151 image->nr_segments = nr_segments;
153 segment_bytes = nr_segments * sizeof(*segments); 152 segment_bytes = nr_segments * sizeof(*segments);
154 result = copy_from_user(image->segment, segments, segment_bytes); 153 result = copy_from_user(image->segment, segments, segment_bytes);
155 if (result) { 154 if (result) {
156 result = -EFAULT; 155 result = -EFAULT;
157 goto out; 156 goto out;
158 } 157 }
159 158
160 /* 159 /*
161 * Verify we have good destination addresses. The caller is 160 * Verify we have good destination addresses. The caller is
162 * responsible for making certain we don't attempt to load 161 * responsible for making certain we don't attempt to load
163 * the new image into invalid or reserved areas of RAM. This 162 * the new image into invalid or reserved areas of RAM. This
164 * just verifies it is an address we can use. 163 * just verifies it is an address we can use.
165 * 164 *
166 * Since the kernel does everything in page size chunks ensure 165 * Since the kernel does everything in page size chunks ensure
167 * the destination addresses are page aligned. Too many 166 * the destination addresses are page aligned. Too many
168 * special cases crop of when we don't do this. The most 167 * special cases crop of when we don't do this. The most
169 * insidious is getting overlapping destination addresses 168 * insidious is getting overlapping destination addresses
170 * simply because addresses are changed to page size 169 * simply because addresses are changed to page size
171 * granularity. 170 * granularity.
172 */ 171 */
173 result = -EADDRNOTAVAIL; 172 result = -EADDRNOTAVAIL;
174 for (i = 0; i < nr_segments; i++) { 173 for (i = 0; i < nr_segments; i++) {
175 unsigned long mstart, mend; 174 unsigned long mstart, mend;
176 175
177 mstart = image->segment[i].mem; 176 mstart = image->segment[i].mem;
178 mend = mstart + image->segment[i].memsz; 177 mend = mstart + image->segment[i].memsz;
179 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 178 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
180 goto out; 179 goto out;
181 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 180 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
182 goto out; 181 goto out;
183 } 182 }
184 183
185 /* Verify our destination addresses do not overlap. 184 /* Verify our destination addresses do not overlap.
186 * If we alloed overlapping destination addresses 185 * If we alloed overlapping destination addresses
187 * through very weird things can happen with no 186 * through very weird things can happen with no
188 * easy explanation as one segment stops on another. 187 * easy explanation as one segment stops on another.
189 */ 188 */
190 result = -EINVAL; 189 result = -EINVAL;
191 for (i = 0; i < nr_segments; i++) { 190 for (i = 0; i < nr_segments; i++) {
192 unsigned long mstart, mend; 191 unsigned long mstart, mend;
193 unsigned long j; 192 unsigned long j;
194 193
195 mstart = image->segment[i].mem; 194 mstart = image->segment[i].mem;
196 mend = mstart + image->segment[i].memsz; 195 mend = mstart + image->segment[i].memsz;
197 for (j = 0; j < i; j++) { 196 for (j = 0; j < i; j++) {
198 unsigned long pstart, pend; 197 unsigned long pstart, pend;
199 pstart = image->segment[j].mem; 198 pstart = image->segment[j].mem;
200 pend = pstart + image->segment[j].memsz; 199 pend = pstart + image->segment[j].memsz;
201 /* Do the segments overlap ? */ 200 /* Do the segments overlap ? */
202 if ((mend > pstart) && (mstart < pend)) 201 if ((mend > pstart) && (mstart < pend))
203 goto out; 202 goto out;
204 } 203 }
205 } 204 }
206 205
207 /* Ensure our buffer sizes are strictly less than 206 /* Ensure our buffer sizes are strictly less than
208 * our memory sizes. This should always be the case, 207 * our memory sizes. This should always be the case,
209 * and it is easier to check up front than to be surprised 208 * and it is easier to check up front than to be surprised
210 * later on. 209 * later on.
211 */ 210 */
212 result = -EINVAL; 211 result = -EINVAL;
213 for (i = 0; i < nr_segments; i++) { 212 for (i = 0; i < nr_segments; i++) {
214 if (image->segment[i].bufsz > image->segment[i].memsz) 213 if (image->segment[i].bufsz > image->segment[i].memsz)
215 goto out; 214 goto out;
216 } 215 }
217 216
218 result = 0; 217 result = 0;
219 out: 218 out:
220 if (result == 0) 219 if (result == 0)
221 *rimage = image; 220 *rimage = image;
222 else 221 else
223 kfree(image); 222 kfree(image);
224 223
225 return result; 224 return result;
226 225
227 } 226 }
228 227
229 static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, 228 static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
230 unsigned long nr_segments, 229 unsigned long nr_segments,
231 struct kexec_segment __user *segments) 230 struct kexec_segment __user *segments)
232 { 231 {
233 int result; 232 int result;
234 struct kimage *image; 233 struct kimage *image;
235 234
236 /* Allocate and initialize a controlling structure */ 235 /* Allocate and initialize a controlling structure */
237 image = NULL; 236 image = NULL;
238 result = do_kimage_alloc(&image, entry, nr_segments, segments); 237 result = do_kimage_alloc(&image, entry, nr_segments, segments);
239 if (result) 238 if (result)
240 goto out; 239 goto out;
241 240
242 *rimage = image; 241 *rimage = image;
243 242
244 /* 243 /*
245 * Find a location for the control code buffer, and add it 244 * Find a location for the control code buffer, and add it
246 * the vector of segments so that it's pages will also be 245 * the vector of segments so that it's pages will also be
247 * counted as destination pages. 246 * counted as destination pages.
248 */ 247 */
249 result = -ENOMEM; 248 result = -ENOMEM;
250 image->control_code_page = kimage_alloc_control_pages(image, 249 image->control_code_page = kimage_alloc_control_pages(image,
251 get_order(KEXEC_CONTROL_PAGE_SIZE)); 250 get_order(KEXEC_CONTROL_PAGE_SIZE));
252 if (!image->control_code_page) { 251 if (!image->control_code_page) {
253 printk(KERN_ERR "Could not allocate control_code_buffer\n"); 252 printk(KERN_ERR "Could not allocate control_code_buffer\n");
254 goto out; 253 goto out;
255 } 254 }
256 255
257 image->swap_page = kimage_alloc_control_pages(image, 0); 256 image->swap_page = kimage_alloc_control_pages(image, 0);
258 if (!image->swap_page) { 257 if (!image->swap_page) {
259 printk(KERN_ERR "Could not allocate swap buffer\n"); 258 printk(KERN_ERR "Could not allocate swap buffer\n");
260 goto out; 259 goto out;
261 } 260 }
262 261
263 result = 0; 262 result = 0;
264 out: 263 out:
265 if (result == 0) 264 if (result == 0)
266 *rimage = image; 265 *rimage = image;
267 else 266 else
268 kfree(image); 267 kfree(image);
269 268
270 return result; 269 return result;
271 } 270 }
272 271
273 static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, 272 static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
274 unsigned long nr_segments, 273 unsigned long nr_segments,
275 struct kexec_segment __user *segments) 274 struct kexec_segment __user *segments)
276 { 275 {
277 int result; 276 int result;
278 struct kimage *image; 277 struct kimage *image;
279 unsigned long i; 278 unsigned long i;
280 279
281 image = NULL; 280 image = NULL;
282 /* Verify we have a valid entry point */ 281 /* Verify we have a valid entry point */
283 if ((entry < crashk_res.start) || (entry > crashk_res.end)) { 282 if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
284 result = -EADDRNOTAVAIL; 283 result = -EADDRNOTAVAIL;
285 goto out; 284 goto out;
286 } 285 }
287 286
288 /* Allocate and initialize a controlling structure */ 287 /* Allocate and initialize a controlling structure */
289 result = do_kimage_alloc(&image, entry, nr_segments, segments); 288 result = do_kimage_alloc(&image, entry, nr_segments, segments);
290 if (result) 289 if (result)
291 goto out; 290 goto out;
292 291
293 /* Enable the special crash kernel control page 292 /* Enable the special crash kernel control page
294 * allocation policy. 293 * allocation policy.
295 */ 294 */
296 image->control_page = crashk_res.start; 295 image->control_page = crashk_res.start;
297 image->type = KEXEC_TYPE_CRASH; 296 image->type = KEXEC_TYPE_CRASH;
298 297
299 /* 298 /*
300 * Verify we have good destination addresses. Normally 299 * Verify we have good destination addresses. Normally
301 * the caller is responsible for making certain we don't 300 * the caller is responsible for making certain we don't
302 * attempt to load the new image into invalid or reserved 301 * attempt to load the new image into invalid or reserved
303 * areas of RAM. But crash kernels are preloaded into a 302 * areas of RAM. But crash kernels are preloaded into a
304 * reserved area of ram. We must ensure the addresses 303 * reserved area of ram. We must ensure the addresses
305 * are in the reserved area otherwise preloading the 304 * are in the reserved area otherwise preloading the
306 * kernel could corrupt things. 305 * kernel could corrupt things.
307 */ 306 */
308 result = -EADDRNOTAVAIL; 307 result = -EADDRNOTAVAIL;
309 for (i = 0; i < nr_segments; i++) { 308 for (i = 0; i < nr_segments; i++) {
310 unsigned long mstart, mend; 309 unsigned long mstart, mend;
311 310
312 mstart = image->segment[i].mem; 311 mstart = image->segment[i].mem;
313 mend = mstart + image->segment[i].memsz - 1; 312 mend = mstart + image->segment[i].memsz - 1;
314 /* Ensure we are within the crash kernel limits */ 313 /* Ensure we are within the crash kernel limits */
315 if ((mstart < crashk_res.start) || (mend > crashk_res.end)) 314 if ((mstart < crashk_res.start) || (mend > crashk_res.end))
316 goto out; 315 goto out;
317 } 316 }
318 317
319 /* 318 /*
320 * Find a location for the control code buffer, and add 319 * Find a location for the control code buffer, and add
321 * the vector of segments so that it's pages will also be 320 * the vector of segments so that it's pages will also be
322 * counted as destination pages. 321 * counted as destination pages.
323 */ 322 */
324 result = -ENOMEM; 323 result = -ENOMEM;
325 image->control_code_page = kimage_alloc_control_pages(image, 324 image->control_code_page = kimage_alloc_control_pages(image,
326 get_order(KEXEC_CONTROL_PAGE_SIZE)); 325 get_order(KEXEC_CONTROL_PAGE_SIZE));
327 if (!image->control_code_page) { 326 if (!image->control_code_page) {
328 printk(KERN_ERR "Could not allocate control_code_buffer\n"); 327 printk(KERN_ERR "Could not allocate control_code_buffer\n");
329 goto out; 328 goto out;
330 } 329 }
331 330
332 result = 0; 331 result = 0;
333 out: 332 out:
334 if (result == 0) 333 if (result == 0)
335 *rimage = image; 334 *rimage = image;
336 else 335 else
337 kfree(image); 336 kfree(image);
338 337
339 return result; 338 return result;
340 } 339 }
341 340
342 static int kimage_is_destination_range(struct kimage *image, 341 static int kimage_is_destination_range(struct kimage *image,
343 unsigned long start, 342 unsigned long start,
344 unsigned long end) 343 unsigned long end)
345 { 344 {
346 unsigned long i; 345 unsigned long i;
347 346
348 for (i = 0; i < image->nr_segments; i++) { 347 for (i = 0; i < image->nr_segments; i++) {
349 unsigned long mstart, mend; 348 unsigned long mstart, mend;
350 349
351 mstart = image->segment[i].mem; 350 mstart = image->segment[i].mem;
352 mend = mstart + image->segment[i].memsz; 351 mend = mstart + image->segment[i].memsz;
353 if ((end > mstart) && (start < mend)) 352 if ((end > mstart) && (start < mend))
354 return 1; 353 return 1;
355 } 354 }
356 355
357 return 0; 356 return 0;
358 } 357 }
359 358
360 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 359 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
361 { 360 {
362 struct page *pages; 361 struct page *pages;
363 362
364 pages = alloc_pages(gfp_mask, order); 363 pages = alloc_pages(gfp_mask, order);
365 if (pages) { 364 if (pages) {
366 unsigned int count, i; 365 unsigned int count, i;
367 pages->mapping = NULL; 366 pages->mapping = NULL;
368 set_page_private(pages, order); 367 set_page_private(pages, order);
369 count = 1 << order; 368 count = 1 << order;
370 for (i = 0; i < count; i++) 369 for (i = 0; i < count; i++)
371 SetPageReserved(pages + i); 370 SetPageReserved(pages + i);
372 } 371 }
373 372
374 return pages; 373 return pages;
375 } 374 }
376 375
377 static void kimage_free_pages(struct page *page) 376 static void kimage_free_pages(struct page *page)
378 { 377 {
379 unsigned int order, count, i; 378 unsigned int order, count, i;
380 379
381 order = page_private(page); 380 order = page_private(page);
382 count = 1 << order; 381 count = 1 << order;
383 for (i = 0; i < count; i++) 382 for (i = 0; i < count; i++)
384 ClearPageReserved(page + i); 383 ClearPageReserved(page + i);
385 __free_pages(page, order); 384 __free_pages(page, order);
386 } 385 }
387 386
388 static void kimage_free_page_list(struct list_head *list) 387 static void kimage_free_page_list(struct list_head *list)
389 { 388 {
390 struct list_head *pos, *next; 389 struct list_head *pos, *next;
391 390
392 list_for_each_safe(pos, next, list) { 391 list_for_each_safe(pos, next, list) {
393 struct page *page; 392 struct page *page;
394 393
395 page = list_entry(pos, struct page, lru); 394 page = list_entry(pos, struct page, lru);
396 list_del(&page->lru); 395 list_del(&page->lru);
397 kimage_free_pages(page); 396 kimage_free_pages(page);
398 } 397 }
399 } 398 }
400 399
401 static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 400 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
402 unsigned int order) 401 unsigned int order)
403 { 402 {
404 /* Control pages are special, they are the intermediaries 403 /* Control pages are special, they are the intermediaries
405 * that are needed while we copy the rest of the pages 404 * that are needed while we copy the rest of the pages
406 * to their final resting place. As such they must 405 * to their final resting place. As such they must
407 * not conflict with either the destination addresses 406 * not conflict with either the destination addresses
408 * or memory the kernel is already using. 407 * or memory the kernel is already using.
409 * 408 *
410 * The only case where we really need more than one of 409 * The only case where we really need more than one of
411 * these are for architectures where we cannot disable 410 * these are for architectures where we cannot disable
412 * the MMU and must instead generate an identity mapped 411 * the MMU and must instead generate an identity mapped
413 * page table for all of the memory. 412 * page table for all of the memory.
414 * 413 *
415 * At worst this runs in O(N) of the image size. 414 * At worst this runs in O(N) of the image size.
416 */ 415 */
417 struct list_head extra_pages; 416 struct list_head extra_pages;
418 struct page *pages; 417 struct page *pages;
419 unsigned int count; 418 unsigned int count;
420 419
421 count = 1 << order; 420 count = 1 << order;
422 INIT_LIST_HEAD(&extra_pages); 421 INIT_LIST_HEAD(&extra_pages);
423 422
424 /* Loop while I can allocate a page and the page allocated 423 /* Loop while I can allocate a page and the page allocated
425 * is a destination page. 424 * is a destination page.
426 */ 425 */
427 do { 426 do {
428 unsigned long pfn, epfn, addr, eaddr; 427 unsigned long pfn, epfn, addr, eaddr;
429 428
430 pages = kimage_alloc_pages(GFP_KERNEL, order); 429 pages = kimage_alloc_pages(GFP_KERNEL, order);
431 if (!pages) 430 if (!pages)
432 break; 431 break;
433 pfn = page_to_pfn(pages); 432 pfn = page_to_pfn(pages);
434 epfn = pfn + count; 433 epfn = pfn + count;
435 addr = pfn << PAGE_SHIFT; 434 addr = pfn << PAGE_SHIFT;
436 eaddr = epfn << PAGE_SHIFT; 435 eaddr = epfn << PAGE_SHIFT;
437 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 436 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
438 kimage_is_destination_range(image, addr, eaddr)) { 437 kimage_is_destination_range(image, addr, eaddr)) {
439 list_add(&pages->lru, &extra_pages); 438 list_add(&pages->lru, &extra_pages);
440 pages = NULL; 439 pages = NULL;
441 } 440 }
442 } while (!pages); 441 } while (!pages);
443 442
444 if (pages) { 443 if (pages) {
445 /* Remember the allocated page... */ 444 /* Remember the allocated page... */
446 list_add(&pages->lru, &image->control_pages); 445 list_add(&pages->lru, &image->control_pages);
447 446
448 /* Because the page is already in it's destination 447 /* Because the page is already in it's destination
449 * location we will never allocate another page at 448 * location we will never allocate another page at
450 * that address. Therefore kimage_alloc_pages 449 * that address. Therefore kimage_alloc_pages
451 * will not return it (again) and we don't need 450 * will not return it (again) and we don't need
452 * to give it an entry in image->segment[]. 451 * to give it an entry in image->segment[].
453 */ 452 */
454 } 453 }
455 /* Deal with the destination pages I have inadvertently allocated. 454 /* Deal with the destination pages I have inadvertently allocated.
456 * 455 *
457 * Ideally I would convert multi-page allocations into single 456 * Ideally I would convert multi-page allocations into single
458 * page allocations, and add everything to image->dest_pages. 457 * page allocations, and add everything to image->dest_pages.
459 * 458 *
460 * For now it is simpler to just free the pages. 459 * For now it is simpler to just free the pages.
461 */ 460 */
462 kimage_free_page_list(&extra_pages); 461 kimage_free_page_list(&extra_pages);
463 462
464 return pages; 463 return pages;
465 } 464 }
466 465
467 static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 466 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
468 unsigned int order) 467 unsigned int order)
469 { 468 {
470 /* Control pages are special, they are the intermediaries 469 /* Control pages are special, they are the intermediaries
471 * that are needed while we copy the rest of the pages 470 * that are needed while we copy the rest of the pages
472 * to their final resting place. As such they must 471 * to their final resting place. As such they must
473 * not conflict with either the destination addresses 472 * not conflict with either the destination addresses
474 * or memory the kernel is already using. 473 * or memory the kernel is already using.
475 * 474 *
476 * Control pages are also the only pags we must allocate 475 * Control pages are also the only pags we must allocate
477 * when loading a crash kernel. All of the other pages 476 * when loading a crash kernel. All of the other pages
478 * are specified by the segments and we just memcpy 477 * are specified by the segments and we just memcpy
479 * into them directly. 478 * into them directly.
480 * 479 *
481 * The only case where we really need more than one of 480 * The only case where we really need more than one of
482 * these are for architectures where we cannot disable 481 * these are for architectures where we cannot disable
483 * the MMU and must instead generate an identity mapped 482 * the MMU and must instead generate an identity mapped
484 * page table for all of the memory. 483 * page table for all of the memory.
485 * 484 *
486 * Given the low demand this implements a very simple 485 * Given the low demand this implements a very simple
487 * allocator that finds the first hole of the appropriate 486 * allocator that finds the first hole of the appropriate
488 * size in the reserved memory region, and allocates all 487 * size in the reserved memory region, and allocates all
489 * of the memory up to and including the hole. 488 * of the memory up to and including the hole.
490 */ 489 */
491 unsigned long hole_start, hole_end, size; 490 unsigned long hole_start, hole_end, size;
492 struct page *pages; 491 struct page *pages;
493 492
494 pages = NULL; 493 pages = NULL;
495 size = (1 << order) << PAGE_SHIFT; 494 size = (1 << order) << PAGE_SHIFT;
496 hole_start = (image->control_page + (size - 1)) & ~(size - 1); 495 hole_start = (image->control_page + (size - 1)) & ~(size - 1);
497 hole_end = hole_start + size - 1; 496 hole_end = hole_start + size - 1;
498 while (hole_end <= crashk_res.end) { 497 while (hole_end <= crashk_res.end) {
499 unsigned long i; 498 unsigned long i;
500 499
501 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 500 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
502 break; 501 break;
503 if (hole_end > crashk_res.end) 502 if (hole_end > crashk_res.end)
504 break; 503 break;
505 /* See if I overlap any of the segments */ 504 /* See if I overlap any of the segments */
506 for (i = 0; i < image->nr_segments; i++) { 505 for (i = 0; i < image->nr_segments; i++) {
507 unsigned long mstart, mend; 506 unsigned long mstart, mend;
508 507
509 mstart = image->segment[i].mem; 508 mstart = image->segment[i].mem;
510 mend = mstart + image->segment[i].memsz - 1; 509 mend = mstart + image->segment[i].memsz - 1;
511 if ((hole_end >= mstart) && (hole_start <= mend)) { 510 if ((hole_end >= mstart) && (hole_start <= mend)) {
512 /* Advance the hole to the end of the segment */ 511 /* Advance the hole to the end of the segment */
513 hole_start = (mend + (size - 1)) & ~(size - 1); 512 hole_start = (mend + (size - 1)) & ~(size - 1);
514 hole_end = hole_start + size - 1; 513 hole_end = hole_start + size - 1;
515 break; 514 break;
516 } 515 }
517 } 516 }
518 /* If I don't overlap any segments I have found my hole! */ 517 /* If I don't overlap any segments I have found my hole! */
519 if (i == image->nr_segments) { 518 if (i == image->nr_segments) {
520 pages = pfn_to_page(hole_start >> PAGE_SHIFT); 519 pages = pfn_to_page(hole_start >> PAGE_SHIFT);
521 break; 520 break;
522 } 521 }
523 } 522 }
524 if (pages) 523 if (pages)
525 image->control_page = hole_end; 524 image->control_page = hole_end;
526 525
527 return pages; 526 return pages;
528 } 527 }
529 528
530 529
531 struct page *kimage_alloc_control_pages(struct kimage *image, 530 struct page *kimage_alloc_control_pages(struct kimage *image,
532 unsigned int order) 531 unsigned int order)
533 { 532 {
534 struct page *pages = NULL; 533 struct page *pages = NULL;
535 534
536 switch (image->type) { 535 switch (image->type) {
537 case KEXEC_TYPE_DEFAULT: 536 case KEXEC_TYPE_DEFAULT:
538 pages = kimage_alloc_normal_control_pages(image, order); 537 pages = kimage_alloc_normal_control_pages(image, order);
539 break; 538 break;
540 case KEXEC_TYPE_CRASH: 539 case KEXEC_TYPE_CRASH:
541 pages = kimage_alloc_crash_control_pages(image, order); 540 pages = kimage_alloc_crash_control_pages(image, order);
542 break; 541 break;
543 } 542 }
544 543
545 return pages; 544 return pages;
546 } 545 }
547 546
548 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 547 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
549 { 548 {
550 if (*image->entry != 0) 549 if (*image->entry != 0)
551 image->entry++; 550 image->entry++;
552 551
553 if (image->entry == image->last_entry) { 552 if (image->entry == image->last_entry) {
554 kimage_entry_t *ind_page; 553 kimage_entry_t *ind_page;
555 struct page *page; 554 struct page *page;
556 555
557 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 556 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
558 if (!page) 557 if (!page)
559 return -ENOMEM; 558 return -ENOMEM;
560 559
561 ind_page = page_address(page); 560 ind_page = page_address(page);
562 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 561 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
563 image->entry = ind_page; 562 image->entry = ind_page;
564 image->last_entry = ind_page + 563 image->last_entry = ind_page +
565 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 564 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
566 } 565 }
567 *image->entry = entry; 566 *image->entry = entry;
568 image->entry++; 567 image->entry++;
569 *image->entry = 0; 568 *image->entry = 0;
570 569
571 return 0; 570 return 0;
572 } 571 }
573 572
574 static int kimage_set_destination(struct kimage *image, 573 static int kimage_set_destination(struct kimage *image,
575 unsigned long destination) 574 unsigned long destination)
576 { 575 {
577 int result; 576 int result;
578 577
579 destination &= PAGE_MASK; 578 destination &= PAGE_MASK;
580 result = kimage_add_entry(image, destination | IND_DESTINATION); 579 result = kimage_add_entry(image, destination | IND_DESTINATION);
581 if (result == 0) 580 if (result == 0)
582 image->destination = destination; 581 image->destination = destination;
583 582
584 return result; 583 return result;
585 } 584 }
586 585
587 586
588 static int kimage_add_page(struct kimage *image, unsigned long page) 587 static int kimage_add_page(struct kimage *image, unsigned long page)
589 { 588 {
590 int result; 589 int result;
591 590
592 page &= PAGE_MASK; 591 page &= PAGE_MASK;
593 result = kimage_add_entry(image, page | IND_SOURCE); 592 result = kimage_add_entry(image, page | IND_SOURCE);
594 if (result == 0) 593 if (result == 0)
595 image->destination += PAGE_SIZE; 594 image->destination += PAGE_SIZE;
596 595
597 return result; 596 return result;
598 } 597 }
599 598
600 599
601 static void kimage_free_extra_pages(struct kimage *image) 600 static void kimage_free_extra_pages(struct kimage *image)
602 { 601 {
603 /* Walk through and free any extra destination pages I may have */ 602 /* Walk through and free any extra destination pages I may have */
604 kimage_free_page_list(&image->dest_pages); 603 kimage_free_page_list(&image->dest_pages);
605 604
606 /* Walk through and free any unusable pages I have cached */ 605 /* Walk through and free any unusable pages I have cached */
607 kimage_free_page_list(&image->unuseable_pages); 606 kimage_free_page_list(&image->unuseable_pages);
608 607
609 } 608 }
610 static void kimage_terminate(struct kimage *image) 609 static void kimage_terminate(struct kimage *image)
611 { 610 {
612 if (*image->entry != 0) 611 if (*image->entry != 0)
613 image->entry++; 612 image->entry++;
614 613
615 *image->entry = IND_DONE; 614 *image->entry = IND_DONE;
616 } 615 }
617 616
618 #define for_each_kimage_entry(image, ptr, entry) \ 617 #define for_each_kimage_entry(image, ptr, entry) \
619 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 618 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
620 ptr = (entry & IND_INDIRECTION)? \ 619 ptr = (entry & IND_INDIRECTION)? \
621 phys_to_virt((entry & PAGE_MASK)): ptr +1) 620 phys_to_virt((entry & PAGE_MASK)): ptr +1)
622 621
623 static void kimage_free_entry(kimage_entry_t entry) 622 static void kimage_free_entry(kimage_entry_t entry)
624 { 623 {
625 struct page *page; 624 struct page *page;
626 625
627 page = pfn_to_page(entry >> PAGE_SHIFT); 626 page = pfn_to_page(entry >> PAGE_SHIFT);
628 kimage_free_pages(page); 627 kimage_free_pages(page);
629 } 628 }
630 629
631 static void kimage_free(struct kimage *image) 630 static void kimage_free(struct kimage *image)
632 { 631 {
633 kimage_entry_t *ptr, entry; 632 kimage_entry_t *ptr, entry;
634 kimage_entry_t ind = 0; 633 kimage_entry_t ind = 0;
635 634
636 if (!image) 635 if (!image)
637 return; 636 return;
638 637
639 kimage_free_extra_pages(image); 638 kimage_free_extra_pages(image);
640 for_each_kimage_entry(image, ptr, entry) { 639 for_each_kimage_entry(image, ptr, entry) {
641 if (entry & IND_INDIRECTION) { 640 if (entry & IND_INDIRECTION) {
642 /* Free the previous indirection page */ 641 /* Free the previous indirection page */
643 if (ind & IND_INDIRECTION) 642 if (ind & IND_INDIRECTION)
644 kimage_free_entry(ind); 643 kimage_free_entry(ind);
645 /* Save this indirection page until we are 644 /* Save this indirection page until we are
646 * done with it. 645 * done with it.
647 */ 646 */
648 ind = entry; 647 ind = entry;
649 } 648 }
650 else if (entry & IND_SOURCE) 649 else if (entry & IND_SOURCE)
651 kimage_free_entry(entry); 650 kimage_free_entry(entry);
652 } 651 }
653 /* Free the final indirection page */ 652 /* Free the final indirection page */
654 if (ind & IND_INDIRECTION) 653 if (ind & IND_INDIRECTION)
655 kimage_free_entry(ind); 654 kimage_free_entry(ind);
656 655
657 /* Handle any machine specific cleanup */ 656 /* Handle any machine specific cleanup */
658 machine_kexec_cleanup(image); 657 machine_kexec_cleanup(image);
659 658
660 /* Free the kexec control pages... */ 659 /* Free the kexec control pages... */
661 kimage_free_page_list(&image->control_pages); 660 kimage_free_page_list(&image->control_pages);
662 kfree(image); 661 kfree(image);
663 } 662 }
664 663
665 static kimage_entry_t *kimage_dst_used(struct kimage *image, 664 static kimage_entry_t *kimage_dst_used(struct kimage *image,
666 unsigned long page) 665 unsigned long page)
667 { 666 {
668 kimage_entry_t *ptr, entry; 667 kimage_entry_t *ptr, entry;
669 unsigned long destination = 0; 668 unsigned long destination = 0;
670 669
671 for_each_kimage_entry(image, ptr, entry) { 670 for_each_kimage_entry(image, ptr, entry) {
672 if (entry & IND_DESTINATION) 671 if (entry & IND_DESTINATION)
673 destination = entry & PAGE_MASK; 672 destination = entry & PAGE_MASK;
674 else if (entry & IND_SOURCE) { 673 else if (entry & IND_SOURCE) {
675 if (page == destination) 674 if (page == destination)
676 return ptr; 675 return ptr;
677 destination += PAGE_SIZE; 676 destination += PAGE_SIZE;
678 } 677 }
679 } 678 }
680 679
681 return NULL; 680 return NULL;
682 } 681 }
683 682
684 static struct page *kimage_alloc_page(struct kimage *image, 683 static struct page *kimage_alloc_page(struct kimage *image,
685 gfp_t gfp_mask, 684 gfp_t gfp_mask,
686 unsigned long destination) 685 unsigned long destination)
687 { 686 {
688 /* 687 /*
689 * Here we implement safeguards to ensure that a source page 688 * Here we implement safeguards to ensure that a source page
690 * is not copied to its destination page before the data on 689 * is not copied to its destination page before the data on
691 * the destination page is no longer useful. 690 * the destination page is no longer useful.
692 * 691 *
693 * To do this we maintain the invariant that a source page is 692 * To do this we maintain the invariant that a source page is
694 * either its own destination page, or it is not a 693 * either its own destination page, or it is not a
695 * destination page at all. 694 * destination page at all.
696 * 695 *
697 * That is slightly stronger than required, but the proof 696 * That is slightly stronger than required, but the proof
698 * that no problems will not occur is trivial, and the 697 * that no problems will not occur is trivial, and the
699 * implementation is simply to verify. 698 * implementation is simply to verify.
700 * 699 *
701 * When allocating all pages normally this algorithm will run 700 * When allocating all pages normally this algorithm will run
702 * in O(N) time, but in the worst case it will run in O(N^2) 701 * in O(N) time, but in the worst case it will run in O(N^2)
703 * time. If the runtime is a problem the data structures can 702 * time. If the runtime is a problem the data structures can
704 * be fixed. 703 * be fixed.
705 */ 704 */
706 struct page *page; 705 struct page *page;
707 unsigned long addr; 706 unsigned long addr;
708 707
709 /* 708 /*
710 * Walk through the list of destination pages, and see if I 709 * Walk through the list of destination pages, and see if I
711 * have a match. 710 * have a match.
712 */ 711 */
713 list_for_each_entry(page, &image->dest_pages, lru) { 712 list_for_each_entry(page, &image->dest_pages, lru) {
714 addr = page_to_pfn(page) << PAGE_SHIFT; 713 addr = page_to_pfn(page) << PAGE_SHIFT;
715 if (addr == destination) { 714 if (addr == destination) {
716 list_del(&page->lru); 715 list_del(&page->lru);
717 return page; 716 return page;
718 } 717 }
719 } 718 }
720 page = NULL; 719 page = NULL;
721 while (1) { 720 while (1) {
722 kimage_entry_t *old; 721 kimage_entry_t *old;
723 722
724 /* Allocate a page, if we run out of memory give up */ 723 /* Allocate a page, if we run out of memory give up */
725 page = kimage_alloc_pages(gfp_mask, 0); 724 page = kimage_alloc_pages(gfp_mask, 0);
726 if (!page) 725 if (!page)
727 return NULL; 726 return NULL;
728 /* If the page cannot be used file it away */ 727 /* If the page cannot be used file it away */
729 if (page_to_pfn(page) > 728 if (page_to_pfn(page) >
730 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 729 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
731 list_add(&page->lru, &image->unuseable_pages); 730 list_add(&page->lru, &image->unuseable_pages);
732 continue; 731 continue;
733 } 732 }
734 addr = page_to_pfn(page) << PAGE_SHIFT; 733 addr = page_to_pfn(page) << PAGE_SHIFT;
735 734
736 /* If it is the destination page we want use it */ 735 /* If it is the destination page we want use it */
737 if (addr == destination) 736 if (addr == destination)
738 break; 737 break;
739 738
740 /* If the page is not a destination page use it */ 739 /* If the page is not a destination page use it */
741 if (!kimage_is_destination_range(image, addr, 740 if (!kimage_is_destination_range(image, addr,
742 addr + PAGE_SIZE)) 741 addr + PAGE_SIZE))
743 break; 742 break;
744 743
745 /* 744 /*
746 * I know that the page is someones destination page. 745 * I know that the page is someones destination page.
747 * See if there is already a source page for this 746 * See if there is already a source page for this
748 * destination page. And if so swap the source pages. 747 * destination page. And if so swap the source pages.
749 */ 748 */
750 old = kimage_dst_used(image, addr); 749 old = kimage_dst_used(image, addr);
751 if (old) { 750 if (old) {
752 /* If so move it */ 751 /* If so move it */
753 unsigned long old_addr; 752 unsigned long old_addr;
754 struct page *old_page; 753 struct page *old_page;
755 754
756 old_addr = *old & PAGE_MASK; 755 old_addr = *old & PAGE_MASK;
757 old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 756 old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
758 copy_highpage(page, old_page); 757 copy_highpage(page, old_page);
759 *old = addr | (*old & ~PAGE_MASK); 758 *old = addr | (*old & ~PAGE_MASK);
760 759
761 /* The old page I have found cannot be a 760 /* The old page I have found cannot be a
762 * destination page, so return it if it's 761 * destination page, so return it if it's
763 * gfp_flags honor the ones passed in. 762 * gfp_flags honor the ones passed in.
764 */ 763 */
765 if (!(gfp_mask & __GFP_HIGHMEM) && 764 if (!(gfp_mask & __GFP_HIGHMEM) &&
766 PageHighMem(old_page)) { 765 PageHighMem(old_page)) {
767 kimage_free_pages(old_page); 766 kimage_free_pages(old_page);
768 continue; 767 continue;
769 } 768 }
770 addr = old_addr; 769 addr = old_addr;
771 page = old_page; 770 page = old_page;
772 break; 771 break;
773 } 772 }
774 else { 773 else {
775 /* Place the page on the destination list I 774 /* Place the page on the destination list I
776 * will use it later. 775 * will use it later.
777 */ 776 */
778 list_add(&page->lru, &image->dest_pages); 777 list_add(&page->lru, &image->dest_pages);
779 } 778 }
780 } 779 }
781 780
782 return page; 781 return page;
783 } 782 }
784 783
785 static int kimage_load_normal_segment(struct kimage *image, 784 static int kimage_load_normal_segment(struct kimage *image,
786 struct kexec_segment *segment) 785 struct kexec_segment *segment)
787 { 786 {
788 unsigned long maddr; 787 unsigned long maddr;
789 unsigned long ubytes, mbytes; 788 unsigned long ubytes, mbytes;
790 int result; 789 int result;
791 unsigned char __user *buf; 790 unsigned char __user *buf;
792 791
793 result = 0; 792 result = 0;
794 buf = segment->buf; 793 buf = segment->buf;
795 ubytes = segment->bufsz; 794 ubytes = segment->bufsz;
796 mbytes = segment->memsz; 795 mbytes = segment->memsz;
797 maddr = segment->mem; 796 maddr = segment->mem;
798 797
799 result = kimage_set_destination(image, maddr); 798 result = kimage_set_destination(image, maddr);
800 if (result < 0) 799 if (result < 0)
801 goto out; 800 goto out;
802 801
803 while (mbytes) { 802 while (mbytes) {
804 struct page *page; 803 struct page *page;
805 char *ptr; 804 char *ptr;
806 size_t uchunk, mchunk; 805 size_t uchunk, mchunk;
807 806
808 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 807 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
809 if (!page) { 808 if (!page) {
810 result = -ENOMEM; 809 result = -ENOMEM;
811 goto out; 810 goto out;
812 } 811 }
813 result = kimage_add_page(image, page_to_pfn(page) 812 result = kimage_add_page(image, page_to_pfn(page)
814 << PAGE_SHIFT); 813 << PAGE_SHIFT);
815 if (result < 0) 814 if (result < 0)
816 goto out; 815 goto out;
817 816
818 ptr = kmap(page); 817 ptr = kmap(page);
819 /* Start with a clear page */ 818 /* Start with a clear page */
820 clear_page(ptr); 819 clear_page(ptr);
821 ptr += maddr & ~PAGE_MASK; 820 ptr += maddr & ~PAGE_MASK;
822 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); 821 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
823 if (mchunk > mbytes) 822 if (mchunk > mbytes)
824 mchunk = mbytes; 823 mchunk = mbytes;
825 824
826 uchunk = mchunk; 825 uchunk = mchunk;
827 if (uchunk > ubytes) 826 if (uchunk > ubytes)
828 uchunk = ubytes; 827 uchunk = ubytes;
829 828
830 result = copy_from_user(ptr, buf, uchunk); 829 result = copy_from_user(ptr, buf, uchunk);
831 kunmap(page); 830 kunmap(page);
832 if (result) { 831 if (result) {
833 result = -EFAULT; 832 result = -EFAULT;
834 goto out; 833 goto out;
835 } 834 }
836 ubytes -= uchunk; 835 ubytes -= uchunk;
837 maddr += mchunk; 836 maddr += mchunk;
838 buf += mchunk; 837 buf += mchunk;
839 mbytes -= mchunk; 838 mbytes -= mchunk;
840 } 839 }
841 out: 840 out:
842 return result; 841 return result;
843 } 842 }
844 843
845 static int kimage_load_crash_segment(struct kimage *image, 844 static int kimage_load_crash_segment(struct kimage *image,
846 struct kexec_segment *segment) 845 struct kexec_segment *segment)
847 { 846 {
848 /* For crash dumps kernels we simply copy the data from 847 /* For crash dumps kernels we simply copy the data from
849 * user space to it's destination. 848 * user space to it's destination.
850 * We do things a page at a time for the sake of kmap. 849 * We do things a page at a time for the sake of kmap.
851 */ 850 */
852 unsigned long maddr; 851 unsigned long maddr;
853 unsigned long ubytes, mbytes; 852 unsigned long ubytes, mbytes;
854 int result; 853 int result;
855 unsigned char __user *buf; 854 unsigned char __user *buf;
856 855
857 result = 0; 856 result = 0;
858 buf = segment->buf; 857 buf = segment->buf;
859 ubytes = segment->bufsz; 858 ubytes = segment->bufsz;
860 mbytes = segment->memsz; 859 mbytes = segment->memsz;
861 maddr = segment->mem; 860 maddr = segment->mem;
862 while (mbytes) { 861 while (mbytes) {
863 struct page *page; 862 struct page *page;
864 char *ptr; 863 char *ptr;
865 size_t uchunk, mchunk; 864 size_t uchunk, mchunk;
866 865
867 page = pfn_to_page(maddr >> PAGE_SHIFT); 866 page = pfn_to_page(maddr >> PAGE_SHIFT);
868 if (!page) { 867 if (!page) {
869 result = -ENOMEM; 868 result = -ENOMEM;
870 goto out; 869 goto out;
871 } 870 }
872 ptr = kmap(page); 871 ptr = kmap(page);
873 ptr += maddr & ~PAGE_MASK; 872 ptr += maddr & ~PAGE_MASK;
874 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); 873 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
875 if (mchunk > mbytes) 874 if (mchunk > mbytes)
876 mchunk = mbytes; 875 mchunk = mbytes;
877 876
878 uchunk = mchunk; 877 uchunk = mchunk;
879 if (uchunk > ubytes) { 878 if (uchunk > ubytes) {
880 uchunk = ubytes; 879 uchunk = ubytes;
881 /* Zero the trailing part of the page */ 880 /* Zero the trailing part of the page */
882 memset(ptr + uchunk, 0, mchunk - uchunk); 881 memset(ptr + uchunk, 0, mchunk - uchunk);
883 } 882 }
884 result = copy_from_user(ptr, buf, uchunk); 883 result = copy_from_user(ptr, buf, uchunk);
885 kexec_flush_icache_page(page); 884 kexec_flush_icache_page(page);
886 kunmap(page); 885 kunmap(page);
887 if (result) { 886 if (result) {
888 result = -EFAULT; 887 result = -EFAULT;
889 goto out; 888 goto out;
890 } 889 }
891 ubytes -= uchunk; 890 ubytes -= uchunk;
892 maddr += mchunk; 891 maddr += mchunk;
893 buf += mchunk; 892 buf += mchunk;
894 mbytes -= mchunk; 893 mbytes -= mchunk;
895 } 894 }
896 out: 895 out:
897 return result; 896 return result;
898 } 897 }
899 898
900 static int kimage_load_segment(struct kimage *image, 899 static int kimage_load_segment(struct kimage *image,
901 struct kexec_segment *segment) 900 struct kexec_segment *segment)
902 { 901 {
903 int result = -ENOMEM; 902 int result = -ENOMEM;
904 903
905 switch (image->type) { 904 switch (image->type) {
906 case KEXEC_TYPE_DEFAULT: 905 case KEXEC_TYPE_DEFAULT:
907 result = kimage_load_normal_segment(image, segment); 906 result = kimage_load_normal_segment(image, segment);
908 break; 907 break;
909 case KEXEC_TYPE_CRASH: 908 case KEXEC_TYPE_CRASH:
910 result = kimage_load_crash_segment(image, segment); 909 result = kimage_load_crash_segment(image, segment);
911 break; 910 break;
912 } 911 }
913 912
914 return result; 913 return result;
915 } 914 }
916 915
917 /* 916 /*
918 * Exec Kernel system call: for obvious reasons only root may call it. 917 * Exec Kernel system call: for obvious reasons only root may call it.
919 * 918 *
920 * This call breaks up into three pieces. 919 * This call breaks up into three pieces.
921 * - A generic part which loads the new kernel from the current 920 * - A generic part which loads the new kernel from the current
922 * address space, and very carefully places the data in the 921 * address space, and very carefully places the data in the
923 * allocated pages. 922 * allocated pages.
924 * 923 *
925 * - A generic part that interacts with the kernel and tells all of 924 * - A generic part that interacts with the kernel and tells all of
926 * the devices to shut down. Preventing on-going dmas, and placing 925 * the devices to shut down. Preventing on-going dmas, and placing
927 * the devices in a consistent state so a later kernel can 926 * the devices in a consistent state so a later kernel can
928 * reinitialize them. 927 * reinitialize them.
929 * 928 *
930 * - A machine specific part that includes the syscall number 929 * - A machine specific part that includes the syscall number
931 * and the copies the image to it's final destination. And 930 * and the copies the image to it's final destination. And
932 * jumps into the image at entry. 931 * jumps into the image at entry.
933 * 932 *
934 * kexec does not sync, or unmount filesystems so if you need 933 * kexec does not sync, or unmount filesystems so if you need
935 * that to happen you need to do that yourself. 934 * that to happen you need to do that yourself.
936 */ 935 */
937 struct kimage *kexec_image; 936 struct kimage *kexec_image;
938 struct kimage *kexec_crash_image; 937 struct kimage *kexec_crash_image;
939 938
940 static DEFINE_MUTEX(kexec_mutex); 939 static DEFINE_MUTEX(kexec_mutex);
941 940
942 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, 941 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
943 struct kexec_segment __user *, segments, unsigned long, flags) 942 struct kexec_segment __user *, segments, unsigned long, flags)
944 { 943 {
945 struct kimage **dest_image, *image; 944 struct kimage **dest_image, *image;
946 int result; 945 int result;
947 946
948 /* We only trust the superuser with rebooting the system. */ 947 /* We only trust the superuser with rebooting the system. */
949 if (!capable(CAP_SYS_BOOT)) 948 if (!capable(CAP_SYS_BOOT))
950 return -EPERM; 949 return -EPERM;
951 950
952 /* 951 /*
953 * Verify we have a legal set of flags 952 * Verify we have a legal set of flags
954 * This leaves us room for future extensions. 953 * This leaves us room for future extensions.
955 */ 954 */
956 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) 955 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
957 return -EINVAL; 956 return -EINVAL;
958 957
959 /* Verify we are on the appropriate architecture */ 958 /* Verify we are on the appropriate architecture */
960 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && 959 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
961 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) 960 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
962 return -EINVAL; 961 return -EINVAL;
963 962
964 /* Put an artificial cap on the number 963 /* Put an artificial cap on the number
965 * of segments passed to kexec_load. 964 * of segments passed to kexec_load.
966 */ 965 */
967 if (nr_segments > KEXEC_SEGMENT_MAX) 966 if (nr_segments > KEXEC_SEGMENT_MAX)
968 return -EINVAL; 967 return -EINVAL;
969 968
970 image = NULL; 969 image = NULL;
971 result = 0; 970 result = 0;
972 971
973 /* Because we write directly to the reserved memory 972 /* Because we write directly to the reserved memory
974 * region when loading crash kernels we need a mutex here to 973 * region when loading crash kernels we need a mutex here to
975 * prevent multiple crash kernels from attempting to load 974 * prevent multiple crash kernels from attempting to load
976 * simultaneously, and to prevent a crash kernel from loading 975 * simultaneously, and to prevent a crash kernel from loading
977 * over the top of a in use crash kernel. 976 * over the top of a in use crash kernel.
978 * 977 *
979 * KISS: always take the mutex. 978 * KISS: always take the mutex.
980 */ 979 */
981 if (!mutex_trylock(&kexec_mutex)) 980 if (!mutex_trylock(&kexec_mutex))
982 return -EBUSY; 981 return -EBUSY;
983 982
984 dest_image = &kexec_image; 983 dest_image = &kexec_image;
985 if (flags & KEXEC_ON_CRASH) 984 if (flags & KEXEC_ON_CRASH)
986 dest_image = &kexec_crash_image; 985 dest_image = &kexec_crash_image;
987 if (nr_segments > 0) { 986 if (nr_segments > 0) {
988 unsigned long i; 987 unsigned long i;
989 988
990 /* Loading another kernel to reboot into */ 989 /* Loading another kernel to reboot into */
991 if ((flags & KEXEC_ON_CRASH) == 0) 990 if ((flags & KEXEC_ON_CRASH) == 0)
992 result = kimage_normal_alloc(&image, entry, 991 result = kimage_normal_alloc(&image, entry,
993 nr_segments, segments); 992 nr_segments, segments);
994 /* Loading another kernel to switch to if this one crashes */ 993 /* Loading another kernel to switch to if this one crashes */
995 else if (flags & KEXEC_ON_CRASH) { 994 else if (flags & KEXEC_ON_CRASH) {
996 /* Free any current crash dump kernel before 995 /* Free any current crash dump kernel before
997 * we corrupt it. 996 * we corrupt it.
998 */ 997 */
999 kimage_free(xchg(&kexec_crash_image, NULL)); 998 kimage_free(xchg(&kexec_crash_image, NULL));
1000 result = kimage_crash_alloc(&image, entry, 999 result = kimage_crash_alloc(&image, entry,
1001 nr_segments, segments); 1000 nr_segments, segments);
1002 crash_map_reserved_pages(); 1001 crash_map_reserved_pages();
1003 } 1002 }
1004 if (result) 1003 if (result)
1005 goto out; 1004 goto out;
1006 1005
1007 if (flags & KEXEC_PRESERVE_CONTEXT) 1006 if (flags & KEXEC_PRESERVE_CONTEXT)
1008 image->preserve_context = 1; 1007 image->preserve_context = 1;
1009 result = machine_kexec_prepare(image); 1008 result = machine_kexec_prepare(image);
1010 if (result) 1009 if (result)
1011 goto out; 1010 goto out;
1012 1011
1013 for (i = 0; i < nr_segments; i++) { 1012 for (i = 0; i < nr_segments; i++) {
1014 result = kimage_load_segment(image, &image->segment[i]); 1013 result = kimage_load_segment(image, &image->segment[i]);
1015 if (result) 1014 if (result)
1016 goto out; 1015 goto out;
1017 } 1016 }
1018 kimage_terminate(image); 1017 kimage_terminate(image);
1019 if (flags & KEXEC_ON_CRASH) 1018 if (flags & KEXEC_ON_CRASH)
1020 crash_unmap_reserved_pages(); 1019 crash_unmap_reserved_pages();
1021 } 1020 }
1022 /* Install the new kernel, and Uninstall the old */ 1021 /* Install the new kernel, and Uninstall the old */
1023 image = xchg(dest_image, image); 1022 image = xchg(dest_image, image);
1024 1023
1025 out: 1024 out:
1026 mutex_unlock(&kexec_mutex); 1025 mutex_unlock(&kexec_mutex);
1027 kimage_free(image); 1026 kimage_free(image);
1028 1027
1029 return result; 1028 return result;
1030 } 1029 }
1031 1030
1032 /* 1031 /*
1033 * Add and remove page tables for crashkernel memory 1032 * Add and remove page tables for crashkernel memory
1034 * 1033 *
1035 * Provide an empty default implementation here -- architecture 1034 * Provide an empty default implementation here -- architecture
1036 * code may override this 1035 * code may override this
1037 */ 1036 */
1038 void __weak crash_map_reserved_pages(void) 1037 void __weak crash_map_reserved_pages(void)
1039 {} 1038 {}
1040 1039
1041 void __weak crash_unmap_reserved_pages(void) 1040 void __weak crash_unmap_reserved_pages(void)
1042 {} 1041 {}
1043 1042
1044 #ifdef CONFIG_COMPAT 1043 #ifdef CONFIG_COMPAT
1045 asmlinkage long compat_sys_kexec_load(unsigned long entry, 1044 asmlinkage long compat_sys_kexec_load(unsigned long entry,
1046 unsigned long nr_segments, 1045 unsigned long nr_segments,
1047 struct compat_kexec_segment __user *segments, 1046 struct compat_kexec_segment __user *segments,
1048 unsigned long flags) 1047 unsigned long flags)
1049 { 1048 {
1050 struct compat_kexec_segment in; 1049 struct compat_kexec_segment in;
1051 struct kexec_segment out, __user *ksegments; 1050 struct kexec_segment out, __user *ksegments;
1052 unsigned long i, result; 1051 unsigned long i, result;
1053 1052
1054 /* Don't allow clients that don't understand the native 1053 /* Don't allow clients that don't understand the native
1055 * architecture to do anything. 1054 * architecture to do anything.
1056 */ 1055 */
1057 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) 1056 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
1058 return -EINVAL; 1057 return -EINVAL;
1059 1058
1060 if (nr_segments > KEXEC_SEGMENT_MAX) 1059 if (nr_segments > KEXEC_SEGMENT_MAX)
1061 return -EINVAL; 1060 return -EINVAL;
1062 1061
1063 ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); 1062 ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
1064 for (i=0; i < nr_segments; i++) { 1063 for (i=0; i < nr_segments; i++) {
1065 result = copy_from_user(&in, &segments[i], sizeof(in)); 1064 result = copy_from_user(&in, &segments[i], sizeof(in));
1066 if (result) 1065 if (result)
1067 return -EFAULT; 1066 return -EFAULT;
1068 1067
1069 out.buf = compat_ptr(in.buf); 1068 out.buf = compat_ptr(in.buf);
1070 out.bufsz = in.bufsz; 1069 out.bufsz = in.bufsz;
1071 out.mem = in.mem; 1070 out.mem = in.mem;
1072 out.memsz = in.memsz; 1071 out.memsz = in.memsz;
1073 1072
1074 result = copy_to_user(&ksegments[i], &out, sizeof(out)); 1073 result = copy_to_user(&ksegments[i], &out, sizeof(out));
1075 if (result) 1074 if (result)
1076 return -EFAULT; 1075 return -EFAULT;
1077 } 1076 }
1078 1077
1079 return sys_kexec_load(entry, nr_segments, ksegments, flags); 1078 return sys_kexec_load(entry, nr_segments, ksegments, flags);
1080 } 1079 }
1081 #endif 1080 #endif
1082 1081
1083 void crash_kexec(struct pt_regs *regs) 1082 void crash_kexec(struct pt_regs *regs)
1084 { 1083 {
1085 /* Take the kexec_mutex here to prevent sys_kexec_load 1084 /* Take the kexec_mutex here to prevent sys_kexec_load
1086 * running on one cpu from replacing the crash kernel 1085 * running on one cpu from replacing the crash kernel
1087 * we are using after a panic on a different cpu. 1086 * we are using after a panic on a different cpu.
1088 * 1087 *
1089 * If the crash kernel was not located in a fixed area 1088 * If the crash kernel was not located in a fixed area
1090 * of memory the xchg(&kexec_crash_image) would be 1089 * of memory the xchg(&kexec_crash_image) would be
1091 * sufficient. But since I reuse the memory... 1090 * sufficient. But since I reuse the memory...
1092 */ 1091 */
1093 if (mutex_trylock(&kexec_mutex)) { 1092 if (mutex_trylock(&kexec_mutex)) {
1094 if (kexec_crash_image) { 1093 if (kexec_crash_image) {
1095 struct pt_regs fixed_regs; 1094 struct pt_regs fixed_regs;
1096
1097 kmsg_dump(KMSG_DUMP_KEXEC);
1098 1095
1099 crash_setup_regs(&fixed_regs, regs); 1096 crash_setup_regs(&fixed_regs, regs);
1100 crash_save_vmcoreinfo(); 1097 crash_save_vmcoreinfo();
1101 machine_crash_shutdown(&fixed_regs); 1098 machine_crash_shutdown(&fixed_regs);
1102 machine_kexec(kexec_crash_image); 1099 machine_kexec(kexec_crash_image);
1103 } 1100 }
1104 mutex_unlock(&kexec_mutex); 1101 mutex_unlock(&kexec_mutex);
1105 } 1102 }
1106 } 1103 }
1107 1104
1108 size_t crash_get_memory_size(void) 1105 size_t crash_get_memory_size(void)
1109 { 1106 {
1110 size_t size = 0; 1107 size_t size = 0;
1111 mutex_lock(&kexec_mutex); 1108 mutex_lock(&kexec_mutex);
1112 if (crashk_res.end != crashk_res.start) 1109 if (crashk_res.end != crashk_res.start)
1113 size = resource_size(&crashk_res); 1110 size = resource_size(&crashk_res);
1114 mutex_unlock(&kexec_mutex); 1111 mutex_unlock(&kexec_mutex);
1115 return size; 1112 return size;
1116 } 1113 }
1117 1114
1118 void __weak crash_free_reserved_phys_range(unsigned long begin, 1115 void __weak crash_free_reserved_phys_range(unsigned long begin,
1119 unsigned long end) 1116 unsigned long end)
1120 { 1117 {
1121 unsigned long addr; 1118 unsigned long addr;
1122 1119
1123 for (addr = begin; addr < end; addr += PAGE_SIZE) { 1120 for (addr = begin; addr < end; addr += PAGE_SIZE) {
1124 ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); 1121 ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
1125 init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); 1122 init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
1126 free_page((unsigned long)__va(addr)); 1123 free_page((unsigned long)__va(addr));
1127 totalram_pages++; 1124 totalram_pages++;
1128 } 1125 }
1129 } 1126 }
1130 1127
1131 int crash_shrink_memory(unsigned long new_size) 1128 int crash_shrink_memory(unsigned long new_size)
1132 { 1129 {
1133 int ret = 0; 1130 int ret = 0;
1134 unsigned long start, end; 1131 unsigned long start, end;
1135 1132
1136 mutex_lock(&kexec_mutex); 1133 mutex_lock(&kexec_mutex);
1137 1134
1138 if (kexec_crash_image) { 1135 if (kexec_crash_image) {
1139 ret = -ENOENT; 1136 ret = -ENOENT;
1140 goto unlock; 1137 goto unlock;
1141 } 1138 }
1142 start = crashk_res.start; 1139 start = crashk_res.start;
1143 end = crashk_res.end; 1140 end = crashk_res.end;
1144 1141
1145 if (new_size >= end - start + 1) { 1142 if (new_size >= end - start + 1) {
1146 ret = -EINVAL; 1143 ret = -EINVAL;
1147 if (new_size == end - start + 1) 1144 if (new_size == end - start + 1)
1148 ret = 0; 1145 ret = 0;
1149 goto unlock; 1146 goto unlock;
1150 } 1147 }
1151 1148
1152 start = roundup(start, KEXEC_CRASH_MEM_ALIGN); 1149 start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
1153 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); 1150 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
1154 1151
1155 crash_map_reserved_pages(); 1152 crash_map_reserved_pages();
1156 crash_free_reserved_phys_range(end, crashk_res.end); 1153 crash_free_reserved_phys_range(end, crashk_res.end);
1157 1154
1158 if ((start == end) && (crashk_res.parent != NULL)) 1155 if ((start == end) && (crashk_res.parent != NULL))
1159 release_resource(&crashk_res); 1156 release_resource(&crashk_res);
1160 crashk_res.end = end - 1; 1157 crashk_res.end = end - 1;
1161 crash_unmap_reserved_pages(); 1158 crash_unmap_reserved_pages();
1162 1159
1163 unlock: 1160 unlock:
1164 mutex_unlock(&kexec_mutex); 1161 mutex_unlock(&kexec_mutex);
1165 return ret; 1162 return ret;
1166 } 1163 }
1167 1164
1168 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 1165 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1169 size_t data_len) 1166 size_t data_len)
1170 { 1167 {
1171 struct elf_note note; 1168 struct elf_note note;
1172 1169
1173 note.n_namesz = strlen(name) + 1; 1170 note.n_namesz = strlen(name) + 1;
1174 note.n_descsz = data_len; 1171 note.n_descsz = data_len;
1175 note.n_type = type; 1172 note.n_type = type;
1176 memcpy(buf, &note, sizeof(note)); 1173 memcpy(buf, &note, sizeof(note));
1177 buf += (sizeof(note) + 3)/4; 1174 buf += (sizeof(note) + 3)/4;
1178 memcpy(buf, name, note.n_namesz); 1175 memcpy(buf, name, note.n_namesz);
1179 buf += (note.n_namesz + 3)/4; 1176 buf += (note.n_namesz + 3)/4;
1180 memcpy(buf, data, note.n_descsz); 1177 memcpy(buf, data, note.n_descsz);
1181 buf += (note.n_descsz + 3)/4; 1178 buf += (note.n_descsz + 3)/4;
1182 1179
1183 return buf; 1180 return buf;
1184 } 1181 }
1185 1182
1186 static void final_note(u32 *buf) 1183 static void final_note(u32 *buf)
1187 { 1184 {
1188 struct elf_note note; 1185 struct elf_note note;
1189 1186
1190 note.n_namesz = 0; 1187 note.n_namesz = 0;
1191 note.n_descsz = 0; 1188 note.n_descsz = 0;
1192 note.n_type = 0; 1189 note.n_type = 0;
1193 memcpy(buf, &note, sizeof(note)); 1190 memcpy(buf, &note, sizeof(note));
1194 } 1191 }
1195 1192
1196 void crash_save_cpu(struct pt_regs *regs, int cpu) 1193 void crash_save_cpu(struct pt_regs *regs, int cpu)
1197 { 1194 {
1198 struct elf_prstatus prstatus; 1195 struct elf_prstatus prstatus;
1199 u32 *buf; 1196 u32 *buf;
1200 1197
1201 if ((cpu < 0) || (cpu >= nr_cpu_ids)) 1198 if ((cpu < 0) || (cpu >= nr_cpu_ids))
1202 return; 1199 return;
1203 1200
1204 /* Using ELF notes here is opportunistic. 1201 /* Using ELF notes here is opportunistic.
1205 * I need a well defined structure format 1202 * I need a well defined structure format
1206 * for the data I pass, and I need tags 1203 * for the data I pass, and I need tags
1207 * on the data to indicate what information I have 1204 * on the data to indicate what information I have
1208 * squirrelled away. ELF notes happen to provide 1205 * squirrelled away. ELF notes happen to provide
1209 * all of that, so there is no need to invent something new. 1206 * all of that, so there is no need to invent something new.
1210 */ 1207 */
1211 buf = (u32*)per_cpu_ptr(crash_notes, cpu); 1208 buf = (u32*)per_cpu_ptr(crash_notes, cpu);
1212 if (!buf) 1209 if (!buf)
1213 return; 1210 return;
1214 memset(&prstatus, 0, sizeof(prstatus)); 1211 memset(&prstatus, 0, sizeof(prstatus));
1215 prstatus.pr_pid = current->pid; 1212 prstatus.pr_pid = current->pid;
1216 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 1213 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1217 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 1214 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1218 &prstatus, sizeof(prstatus)); 1215 &prstatus, sizeof(prstatus));
1219 final_note(buf); 1216 final_note(buf);
1220 } 1217 }
1221 1218
1222 static int __init crash_notes_memory_init(void) 1219 static int __init crash_notes_memory_init(void)
1223 { 1220 {
1224 /* Allocate memory for saving cpu registers. */ 1221 /* Allocate memory for saving cpu registers. */
1225 crash_notes = alloc_percpu(note_buf_t); 1222 crash_notes = alloc_percpu(note_buf_t);
1226 if (!crash_notes) { 1223 if (!crash_notes) {
1227 printk("Kexec: Memory allocation for saving cpu register" 1224 printk("Kexec: Memory allocation for saving cpu register"
1228 " states failed\n"); 1225 " states failed\n");
1229 return -ENOMEM; 1226 return -ENOMEM;
1230 } 1227 }
1231 return 0; 1228 return 0;
1232 } 1229 }
1233 module_init(crash_notes_memory_init) 1230 module_init(crash_notes_memory_init)
1234 1231
1235 1232
1236 /* 1233 /*
1237 * parsing the "crashkernel" commandline 1234 * parsing the "crashkernel" commandline
1238 * 1235 *
1239 * this code is intended to be called from architecture specific code 1236 * this code is intended to be called from architecture specific code
1240 */ 1237 */
1241 1238
1242 1239
1243 /* 1240 /*
1244 * This function parses command lines in the format 1241 * This function parses command lines in the format
1245 * 1242 *
1246 * crashkernel=ramsize-range:size[,...][@offset] 1243 * crashkernel=ramsize-range:size[,...][@offset]
1247 * 1244 *
1248 * The function returns 0 on success and -EINVAL on failure. 1245 * The function returns 0 on success and -EINVAL on failure.
1249 */ 1246 */
1250 static int __init parse_crashkernel_mem(char *cmdline, 1247 static int __init parse_crashkernel_mem(char *cmdline,
1251 unsigned long long system_ram, 1248 unsigned long long system_ram,
1252 unsigned long long *crash_size, 1249 unsigned long long *crash_size,
1253 unsigned long long *crash_base) 1250 unsigned long long *crash_base)
1254 { 1251 {
1255 char *cur = cmdline, *tmp; 1252 char *cur = cmdline, *tmp;
1256 1253
1257 /* for each entry of the comma-separated list */ 1254 /* for each entry of the comma-separated list */
1258 do { 1255 do {
1259 unsigned long long start, end = ULLONG_MAX, size; 1256 unsigned long long start, end = ULLONG_MAX, size;
1260 1257
1261 /* get the start of the range */ 1258 /* get the start of the range */
1262 start = memparse(cur, &tmp); 1259 start = memparse(cur, &tmp);
1263 if (cur == tmp) { 1260 if (cur == tmp) {
1264 pr_warning("crashkernel: Memory value expected\n"); 1261 pr_warning("crashkernel: Memory value expected\n");
1265 return -EINVAL; 1262 return -EINVAL;
1266 } 1263 }
1267 cur = tmp; 1264 cur = tmp;
1268 if (*cur != '-') { 1265 if (*cur != '-') {
1269 pr_warning("crashkernel: '-' expected\n"); 1266 pr_warning("crashkernel: '-' expected\n");
1270 return -EINVAL; 1267 return -EINVAL;
1271 } 1268 }
1272 cur++; 1269 cur++;
1273 1270
1274 /* if no ':' is here, than we read the end */ 1271 /* if no ':' is here, than we read the end */
1275 if (*cur != ':') { 1272 if (*cur != ':') {
1276 end = memparse(cur, &tmp); 1273 end = memparse(cur, &tmp);
1277 if (cur == tmp) { 1274 if (cur == tmp) {
1278 pr_warning("crashkernel: Memory " 1275 pr_warning("crashkernel: Memory "
1279 "value expected\n"); 1276 "value expected\n");
1280 return -EINVAL; 1277 return -EINVAL;
1281 } 1278 }
1282 cur = tmp; 1279 cur = tmp;
1283 if (end <= start) { 1280 if (end <= start) {
1284 pr_warning("crashkernel: end <= start\n"); 1281 pr_warning("crashkernel: end <= start\n");
1285 return -EINVAL; 1282 return -EINVAL;
1286 } 1283 }
1287 } 1284 }
1288 1285
1289 if (*cur != ':') { 1286 if (*cur != ':') {
1290 pr_warning("crashkernel: ':' expected\n"); 1287 pr_warning("crashkernel: ':' expected\n");
1291 return -EINVAL; 1288 return -EINVAL;
1292 } 1289 }
1293 cur++; 1290 cur++;
1294 1291
1295 size = memparse(cur, &tmp); 1292 size = memparse(cur, &tmp);
1296 if (cur == tmp) { 1293 if (cur == tmp) {
1297 pr_warning("Memory value expected\n"); 1294 pr_warning("Memory value expected\n");
1298 return -EINVAL; 1295 return -EINVAL;
1299 } 1296 }
1300 cur = tmp; 1297 cur = tmp;
1301 if (size >= system_ram) { 1298 if (size >= system_ram) {
1302 pr_warning("crashkernel: invalid size\n"); 1299 pr_warning("crashkernel: invalid size\n");
1303 return -EINVAL; 1300 return -EINVAL;
1304 } 1301 }
1305 1302
1306 /* match ? */ 1303 /* match ? */
1307 if (system_ram >= start && system_ram < end) { 1304 if (system_ram >= start && system_ram < end) {
1308 *crash_size = size; 1305 *crash_size = size;
1309 break; 1306 break;
1310 } 1307 }
1311 } while (*cur++ == ','); 1308 } while (*cur++ == ',');
1312 1309
1313 if (*crash_size > 0) { 1310 if (*crash_size > 0) {
1314 while (*cur && *cur != ' ' && *cur != '@') 1311 while (*cur && *cur != ' ' && *cur != '@')
1315 cur++; 1312 cur++;
1316 if (*cur == '@') { 1313 if (*cur == '@') {
1317 cur++; 1314 cur++;
1318 *crash_base = memparse(cur, &tmp); 1315 *crash_base = memparse(cur, &tmp);
1319 if (cur == tmp) { 1316 if (cur == tmp) {
1320 pr_warning("Memory value expected " 1317 pr_warning("Memory value expected "
1321 "after '@'\n"); 1318 "after '@'\n");
1322 return -EINVAL; 1319 return -EINVAL;
1323 } 1320 }
1324 } 1321 }
1325 } 1322 }
1326 1323
1327 return 0; 1324 return 0;
1328 } 1325 }
1329 1326
1330 /* 1327 /*
1331 * That function parses "simple" (old) crashkernel command lines like 1328 * That function parses "simple" (old) crashkernel command lines like
1332 * 1329 *
1333 * crashkernel=size[@offset] 1330 * crashkernel=size[@offset]
1334 * 1331 *
1335 * It returns 0 on success and -EINVAL on failure. 1332 * It returns 0 on success and -EINVAL on failure.
1336 */ 1333 */
1337 static int __init parse_crashkernel_simple(char *cmdline, 1334 static int __init parse_crashkernel_simple(char *cmdline,
1338 unsigned long long *crash_size, 1335 unsigned long long *crash_size,
1339 unsigned long long *crash_base) 1336 unsigned long long *crash_base)
1340 { 1337 {
1341 char *cur = cmdline; 1338 char *cur = cmdline;
1342 1339
1343 *crash_size = memparse(cmdline, &cur); 1340 *crash_size = memparse(cmdline, &cur);
1344 if (cmdline == cur) { 1341 if (cmdline == cur) {
1345 pr_warning("crashkernel: memory value expected\n"); 1342 pr_warning("crashkernel: memory value expected\n");
1346 return -EINVAL; 1343 return -EINVAL;
1347 } 1344 }
1348 1345
1349 if (*cur == '@') 1346 if (*cur == '@')
1350 *crash_base = memparse(cur+1, &cur); 1347 *crash_base = memparse(cur+1, &cur);
1351 1348
1352 return 0; 1349 return 0;
1353 } 1350 }
1354 1351
1355 /* 1352 /*
1356 * That function is the entry point for command line parsing and should be 1353 * That function is the entry point for command line parsing and should be
1357 * called from the arch-specific code. 1354 * called from the arch-specific code.
1358 */ 1355 */
1359 int __init parse_crashkernel(char *cmdline, 1356 int __init parse_crashkernel(char *cmdline,
1360 unsigned long long system_ram, 1357 unsigned long long system_ram,
1361 unsigned long long *crash_size, 1358 unsigned long long *crash_size,
1362 unsigned long long *crash_base) 1359 unsigned long long *crash_base)
1363 { 1360 {
1364 char *p = cmdline, *ck_cmdline = NULL; 1361 char *p = cmdline, *ck_cmdline = NULL;
1365 char *first_colon, *first_space; 1362 char *first_colon, *first_space;
1366 1363
1367 BUG_ON(!crash_size || !crash_base); 1364 BUG_ON(!crash_size || !crash_base);
1368 *crash_size = 0; 1365 *crash_size = 0;
1369 *crash_base = 0; 1366 *crash_base = 0;
1370 1367
1371 /* find crashkernel and use the last one if there are more */ 1368 /* find crashkernel and use the last one if there are more */
1372 p = strstr(p, "crashkernel="); 1369 p = strstr(p, "crashkernel=");
1373 while (p) { 1370 while (p) {
1374 ck_cmdline = p; 1371 ck_cmdline = p;
1375 p = strstr(p+1, "crashkernel="); 1372 p = strstr(p+1, "crashkernel=");
1376 } 1373 }
1377 1374
1378 if (!ck_cmdline) 1375 if (!ck_cmdline)
1379 return -EINVAL; 1376 return -EINVAL;
1380 1377
1381 ck_cmdline += 12; /* strlen("crashkernel=") */ 1378 ck_cmdline += 12; /* strlen("crashkernel=") */
1382 1379
1383 /* 1380 /*
1384 * if the commandline contains a ':', then that's the extended 1381 * if the commandline contains a ':', then that's the extended
1385 * syntax -- if not, it must be the classic syntax 1382 * syntax -- if not, it must be the classic syntax
1386 */ 1383 */
1387 first_colon = strchr(ck_cmdline, ':'); 1384 first_colon = strchr(ck_cmdline, ':');
1388 first_space = strchr(ck_cmdline, ' '); 1385 first_space = strchr(ck_cmdline, ' ');
1389 if (first_colon && (!first_space || first_colon < first_space)) 1386 if (first_colon && (!first_space || first_colon < first_space))
1390 return parse_crashkernel_mem(ck_cmdline, system_ram, 1387 return parse_crashkernel_mem(ck_cmdline, system_ram,
1391 crash_size, crash_base); 1388 crash_size, crash_base);
1392 else 1389 else
1393 return parse_crashkernel_simple(ck_cmdline, crash_size, 1390 return parse_crashkernel_simple(ck_cmdline, crash_size,
1394 crash_base); 1391 crash_base);
1395 1392
1396 return 0; 1393 return 0;
1397 } 1394 }
1398 1395
1399 1396
1400 static void update_vmcoreinfo_note(void) 1397 static void update_vmcoreinfo_note(void)
1401 { 1398 {
1402 u32 *buf = vmcoreinfo_note; 1399 u32 *buf = vmcoreinfo_note;
1403 1400
1404 if (!vmcoreinfo_size) 1401 if (!vmcoreinfo_size)
1405 return; 1402 return;
1406 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 1403 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1407 vmcoreinfo_size); 1404 vmcoreinfo_size);
1408 final_note(buf); 1405 final_note(buf);
1409 } 1406 }
1410 1407
1411 void crash_save_vmcoreinfo(void) 1408 void crash_save_vmcoreinfo(void)
1412 { 1409 {
1413 vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); 1410 vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
1414 update_vmcoreinfo_note(); 1411 update_vmcoreinfo_note();
1415 } 1412 }
1416 1413
1417 void vmcoreinfo_append_str(const char *fmt, ...) 1414 void vmcoreinfo_append_str(const char *fmt, ...)
1418 { 1415 {
1419 va_list args; 1416 va_list args;
1420 char buf[0x50]; 1417 char buf[0x50];
1421 int r; 1418 int r;
1422 1419
1423 va_start(args, fmt); 1420 va_start(args, fmt);
1424 r = vsnprintf(buf, sizeof(buf), fmt, args); 1421 r = vsnprintf(buf, sizeof(buf), fmt, args);
1425 va_end(args); 1422 va_end(args);
1426 1423
1427 if (r + vmcoreinfo_size > vmcoreinfo_max_size) 1424 if (r + vmcoreinfo_size > vmcoreinfo_max_size)
1428 r = vmcoreinfo_max_size - vmcoreinfo_size; 1425 r = vmcoreinfo_max_size - vmcoreinfo_size;
1429 1426
1430 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1427 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1431 1428
1432 vmcoreinfo_size += r; 1429 vmcoreinfo_size += r;
1433 } 1430 }
1434 1431
1435 /* 1432 /*
1436 * provide an empty default implementation here -- architecture 1433 * provide an empty default implementation here -- architecture
1437 * code may override this 1434 * code may override this
1438 */ 1435 */
1439 void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void) 1436 void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
1440 {} 1437 {}
1441 1438
1442 unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void) 1439 unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
1443 { 1440 {
1444 return __pa((unsigned long)(char *)&vmcoreinfo_note); 1441 return __pa((unsigned long)(char *)&vmcoreinfo_note);
1445 } 1442 }
1446 1443
1447 static int __init crash_save_vmcoreinfo_init(void) 1444 static int __init crash_save_vmcoreinfo_init(void)
1448 { 1445 {
1449 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 1446 VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1450 VMCOREINFO_PAGESIZE(PAGE_SIZE); 1447 VMCOREINFO_PAGESIZE(PAGE_SIZE);
1451 1448
1452 VMCOREINFO_SYMBOL(init_uts_ns); 1449 VMCOREINFO_SYMBOL(init_uts_ns);
1453 VMCOREINFO_SYMBOL(node_online_map); 1450 VMCOREINFO_SYMBOL(node_online_map);
1454 VMCOREINFO_SYMBOL(swapper_pg_dir); 1451 VMCOREINFO_SYMBOL(swapper_pg_dir);
1455 VMCOREINFO_SYMBOL(_stext); 1452 VMCOREINFO_SYMBOL(_stext);
1456 VMCOREINFO_SYMBOL(vmlist); 1453 VMCOREINFO_SYMBOL(vmlist);
1457 1454
1458 #ifndef CONFIG_NEED_MULTIPLE_NODES 1455 #ifndef CONFIG_NEED_MULTIPLE_NODES
1459 VMCOREINFO_SYMBOL(mem_map); 1456 VMCOREINFO_SYMBOL(mem_map);
1460 VMCOREINFO_SYMBOL(contig_page_data); 1457 VMCOREINFO_SYMBOL(contig_page_data);
1461 #endif 1458 #endif
1462 #ifdef CONFIG_SPARSEMEM 1459 #ifdef CONFIG_SPARSEMEM
1463 VMCOREINFO_SYMBOL(mem_section); 1460 VMCOREINFO_SYMBOL(mem_section);
1464 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 1461 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1465 VMCOREINFO_STRUCT_SIZE(mem_section); 1462 VMCOREINFO_STRUCT_SIZE(mem_section);
1466 VMCOREINFO_OFFSET(mem_section, section_mem_map); 1463 VMCOREINFO_OFFSET(mem_section, section_mem_map);
1467 #endif 1464 #endif
1468 VMCOREINFO_STRUCT_SIZE(page); 1465 VMCOREINFO_STRUCT_SIZE(page);
1469 VMCOREINFO_STRUCT_SIZE(pglist_data); 1466 VMCOREINFO_STRUCT_SIZE(pglist_data);
1470 VMCOREINFO_STRUCT_SIZE(zone); 1467 VMCOREINFO_STRUCT_SIZE(zone);
1471 VMCOREINFO_STRUCT_SIZE(free_area); 1468 VMCOREINFO_STRUCT_SIZE(free_area);
1472 VMCOREINFO_STRUCT_SIZE(list_head); 1469 VMCOREINFO_STRUCT_SIZE(list_head);
1473 VMCOREINFO_SIZE(nodemask_t); 1470 VMCOREINFO_SIZE(nodemask_t);
1474 VMCOREINFO_OFFSET(page, flags); 1471 VMCOREINFO_OFFSET(page, flags);
1475 VMCOREINFO_OFFSET(page, _count); 1472 VMCOREINFO_OFFSET(page, _count);
1476 VMCOREINFO_OFFSET(page, mapping); 1473 VMCOREINFO_OFFSET(page, mapping);
1477 VMCOREINFO_OFFSET(page, lru); 1474 VMCOREINFO_OFFSET(page, lru);
1478 VMCOREINFO_OFFSET(pglist_data, node_zones); 1475 VMCOREINFO_OFFSET(pglist_data, node_zones);
1479 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1476 VMCOREINFO_OFFSET(pglist_data, nr_zones);
1480 #ifdef CONFIG_FLAT_NODE_MEM_MAP 1477 #ifdef CONFIG_FLAT_NODE_MEM_MAP
1481 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 1478 VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1482 #endif 1479 #endif
1483 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 1480 VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1484 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 1481 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1485 VMCOREINFO_OFFSET(pglist_data, node_id); 1482 VMCOREINFO_OFFSET(pglist_data, node_id);
1486 VMCOREINFO_OFFSET(zone, free_area); 1483 VMCOREINFO_OFFSET(zone, free_area);
1487 VMCOREINFO_OFFSET(zone, vm_stat); 1484 VMCOREINFO_OFFSET(zone, vm_stat);
1488 VMCOREINFO_OFFSET(zone, spanned_pages); 1485 VMCOREINFO_OFFSET(zone, spanned_pages);
1489 VMCOREINFO_OFFSET(free_area, free_list); 1486 VMCOREINFO_OFFSET(free_area, free_list);
1490 VMCOREINFO_OFFSET(list_head, next); 1487 VMCOREINFO_OFFSET(list_head, next);
1491 VMCOREINFO_OFFSET(list_head, prev); 1488 VMCOREINFO_OFFSET(list_head, prev);
1492 VMCOREINFO_OFFSET(vm_struct, addr); 1489 VMCOREINFO_OFFSET(vm_struct, addr);
1493 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1490 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1494 log_buf_kexec_setup(); 1491 log_buf_kexec_setup();
1495 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1492 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1496 VMCOREINFO_NUMBER(NR_FREE_PAGES); 1493 VMCOREINFO_NUMBER(NR_FREE_PAGES);
1497 VMCOREINFO_NUMBER(PG_lru); 1494 VMCOREINFO_NUMBER(PG_lru);
1498 VMCOREINFO_NUMBER(PG_private); 1495 VMCOREINFO_NUMBER(PG_private);
1499 VMCOREINFO_NUMBER(PG_swapcache); 1496 VMCOREINFO_NUMBER(PG_swapcache);
1500 1497
1501 arch_crash_save_vmcoreinfo(); 1498 arch_crash_save_vmcoreinfo();
1502 update_vmcoreinfo_note(); 1499 update_vmcoreinfo_note();
1503 1500
1504 return 0; 1501 return 0;
1505 } 1502 }
1506 1503
1507 module_init(crash_save_vmcoreinfo_init) 1504 module_init(crash_save_vmcoreinfo_init)
1508 1505
1509 /* 1506 /*
1510 * Move into place and start executing a preloaded standalone 1507 * Move into place and start executing a preloaded standalone
1511 * executable. If nothing was preloaded return an error. 1508 * executable. If nothing was preloaded return an error.
1512 */ 1509 */
1513 int kernel_kexec(void) 1510 int kernel_kexec(void)
1514 { 1511 {
1515 int error = 0; 1512 int error = 0;
1516 1513
1517 if (!mutex_trylock(&kexec_mutex)) 1514 if (!mutex_trylock(&kexec_mutex))
1518 return -EBUSY; 1515 return -EBUSY;
1519 if (!kexec_image) { 1516 if (!kexec_image) {
1520 error = -EINVAL; 1517 error = -EINVAL;
1521 goto Unlock; 1518 goto Unlock;
1522 } 1519 }
1523 1520
1524 #ifdef CONFIG_KEXEC_JUMP 1521 #ifdef CONFIG_KEXEC_JUMP
1525 if (kexec_image->preserve_context) { 1522 if (kexec_image->preserve_context) {
1526 lock_system_sleep(); 1523 lock_system_sleep();
1527 pm_prepare_console(); 1524 pm_prepare_console();
1528 error = freeze_processes(); 1525 error = freeze_processes();
1529 if (error) { 1526 if (error) {
1530 error = -EBUSY; 1527 error = -EBUSY;
1531 goto Restore_console; 1528 goto Restore_console;
1532 } 1529 }
1533 suspend_console(); 1530 suspend_console();
1534 error = dpm_suspend_start(PMSG_FREEZE); 1531 error = dpm_suspend_start(PMSG_FREEZE);
1535 if (error) 1532 if (error)
1536 goto Resume_console; 1533 goto Resume_console;
1537 /* At this point, dpm_suspend_start() has been called, 1534 /* At this point, dpm_suspend_start() has been called,
1538 * but *not* dpm_suspend_noirq(). We *must* call 1535 * but *not* dpm_suspend_noirq(). We *must* call
1539 * dpm_suspend_noirq() now. Otherwise, drivers for 1536 * dpm_suspend_noirq() now. Otherwise, drivers for
1540 * some devices (e.g. interrupt controllers) become 1537 * some devices (e.g. interrupt controllers) become
1541 * desynchronized with the actual state of the 1538 * desynchronized with the actual state of the
1542 * hardware at resume time, and evil weirdness ensues. 1539 * hardware at resume time, and evil weirdness ensues.
1543 */ 1540 */
1544 error = dpm_suspend_noirq(PMSG_FREEZE); 1541 error = dpm_suspend_noirq(PMSG_FREEZE);
1545 if (error) 1542 if (error)
1546 goto Resume_devices; 1543 goto Resume_devices;
1547 error = disable_nonboot_cpus(); 1544 error = disable_nonboot_cpus();
1548 if (error) 1545 if (error)
1549 goto Enable_cpus; 1546 goto Enable_cpus;
1550 local_irq_disable(); 1547 local_irq_disable();
1551 error = syscore_suspend(); 1548 error = syscore_suspend();
1552 if (error) 1549 if (error)
1553 goto Enable_irqs; 1550 goto Enable_irqs;
1554 } else 1551 } else
1555 #endif 1552 #endif
1556 { 1553 {
1557 kernel_restart_prepare(NULL); 1554 kernel_restart_prepare(NULL);
1558 printk(KERN_EMERG "Starting new kernel\n"); 1555 printk(KERN_EMERG "Starting new kernel\n");
1559 machine_shutdown(); 1556 machine_shutdown();
1560 } 1557 }
1561 1558
1562 machine_kexec(kexec_image); 1559 machine_kexec(kexec_image);
1563 1560
1564 #ifdef CONFIG_KEXEC_JUMP 1561 #ifdef CONFIG_KEXEC_JUMP
1565 if (kexec_image->preserve_context) { 1562 if (kexec_image->preserve_context) {
1566 syscore_resume(); 1563 syscore_resume();
1567 Enable_irqs: 1564 Enable_irqs:
1568 local_irq_enable(); 1565 local_irq_enable();
1569 Enable_cpus: 1566 Enable_cpus:
1570 enable_nonboot_cpus(); 1567 enable_nonboot_cpus();
1571 dpm_resume_noirq(PMSG_RESTORE); 1568 dpm_resume_noirq(PMSG_RESTORE);
1572 Resume_devices: 1569 Resume_devices:
1573 dpm_resume_end(PMSG_RESTORE); 1570 dpm_resume_end(PMSG_RESTORE);
1574 Resume_console: 1571 Resume_console:
1575 resume_console(); 1572 resume_console();
1576 thaw_processes(); 1573 thaw_processes();
1577 Restore_console: 1574 Restore_console:
1578 pm_restore_console(); 1575 pm_restore_console();
1579 unlock_system_sleep(); 1576 unlock_system_sleep();
1580 } 1577 }
1581 #endif 1578 #endif
1582 1579
1583 Unlock: 1580 Unlock:
1584 mutex_unlock(&kexec_mutex); 1581 mutex_unlock(&kexec_mutex);
1585 return error; 1582 return error;
1586 } 1583 }
1587 1584