Commit 7fccf0326536c1b245b98740d489abb9aab69a12

Authored by WANG Cong
Committed by Linus Torvalds
1 parent f1d8269802

kernel/kexec.c: make 'kimage_terminate' void

Since kimage_terminate() always returns 0, make it void.

Signed-off-by: WANG Cong <wangcong@zeuux.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 2 additions and 6 deletions Inline Diff

1 /* 1 /*
2 * kexec.c - kexec system call 2 * kexec.c - kexec system call
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
4 * 4 *
5 * This source code is licensed under the GNU General Public License, 5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9 #include <linux/capability.h> 9 #include <linux/capability.h>
10 #include <linux/mm.h> 10 #include <linux/mm.h>
11 #include <linux/file.h> 11 #include <linux/file.h>
12 #include <linux/slab.h> 12 #include <linux/slab.h>
13 #include <linux/fs.h> 13 #include <linux/fs.h>
14 #include <linux/kexec.h> 14 #include <linux/kexec.h>
15 #include <linux/spinlock.h> 15 #include <linux/spinlock.h>
16 #include <linux/list.h> 16 #include <linux/list.h>
17 #include <linux/highmem.h> 17 #include <linux/highmem.h>
18 #include <linux/syscalls.h> 18 #include <linux/syscalls.h>
19 #include <linux/reboot.h> 19 #include <linux/reboot.h>
20 #include <linux/ioport.h> 20 #include <linux/ioport.h>
21 #include <linux/hardirq.h> 21 #include <linux/hardirq.h>
22 #include <linux/elf.h> 22 #include <linux/elf.h>
23 #include <linux/elfcore.h> 23 #include <linux/elfcore.h>
24 #include <linux/utsrelease.h> 24 #include <linux/utsrelease.h>
25 #include <linux/utsname.h> 25 #include <linux/utsname.h>
26 #include <linux/numa.h> 26 #include <linux/numa.h>
27 27
28 #include <asm/page.h> 28 #include <asm/page.h>
29 #include <asm/uaccess.h> 29 #include <asm/uaccess.h>
30 #include <asm/io.h> 30 #include <asm/io.h>
31 #include <asm/system.h> 31 #include <asm/system.h>
32 #include <asm/sections.h> 32 #include <asm/sections.h>
33 33
34 /* Per cpu memory for storing cpu states in case of system crash. */ 34 /* Per cpu memory for storing cpu states in case of system crash. */
35 note_buf_t* crash_notes; 35 note_buf_t* crash_notes;
36 36
37 /* vmcoreinfo stuff */ 37 /* vmcoreinfo stuff */
38 unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 38 unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
39 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 39 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
40 size_t vmcoreinfo_size; 40 size_t vmcoreinfo_size;
41 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 41 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
42 42
43 /* Location of the reserved area for the crash kernel */ 43 /* Location of the reserved area for the crash kernel */
44 struct resource crashk_res = { 44 struct resource crashk_res = {
45 .name = "Crash kernel", 45 .name = "Crash kernel",
46 .start = 0, 46 .start = 0,
47 .end = 0, 47 .end = 0,
48 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 48 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
49 }; 49 };
50 50
51 int kexec_should_crash(struct task_struct *p) 51 int kexec_should_crash(struct task_struct *p)
52 { 52 {
53 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 53 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
54 return 1; 54 return 1;
55 return 0; 55 return 0;
56 } 56 }
57 57
58 /* 58 /*
59 * When kexec transitions to the new kernel there is a one-to-one 59 * When kexec transitions to the new kernel there is a one-to-one
60 * mapping between physical and virtual addresses. On processors 60 * mapping between physical and virtual addresses. On processors
61 * where you can disable the MMU this is trivial, and easy. For 61 * where you can disable the MMU this is trivial, and easy. For
62 * others it is still a simple predictable page table to setup. 62 * others it is still a simple predictable page table to setup.
63 * 63 *
64 * In that environment kexec copies the new kernel to its final 64 * In that environment kexec copies the new kernel to its final
65 * resting place. This means I can only support memory whose 65 * resting place. This means I can only support memory whose
66 * physical address can fit in an unsigned long. In particular 66 * physical address can fit in an unsigned long. In particular
67 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 67 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
68 * If the assembly stub has more restrictive requirements 68 * If the assembly stub has more restrictive requirements
69 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 69 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
70 * defined more restrictively in <asm/kexec.h>. 70 * defined more restrictively in <asm/kexec.h>.
71 * 71 *
72 * The code for the transition from the current kernel to the 72 * The code for the transition from the current kernel to the
73 * the new kernel is placed in the control_code_buffer, whose size 73 * the new kernel is placed in the control_code_buffer, whose size
74 * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single 74 * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
75 * page of memory is necessary, but some architectures require more. 75 * page of memory is necessary, but some architectures require more.
76 * Because this memory must be identity mapped in the transition from 76 * Because this memory must be identity mapped in the transition from
77 * virtual to physical addresses it must live in the range 77 * virtual to physical addresses it must live in the range
78 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 78 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
79 * modifiable. 79 * modifiable.
80 * 80 *
81 * The assembly stub in the control code buffer is passed a linked list 81 * The assembly stub in the control code buffer is passed a linked list
82 * of descriptor pages detailing the source pages of the new kernel, 82 * of descriptor pages detailing the source pages of the new kernel,
83 * and the destination addresses of those source pages. As this data 83 * and the destination addresses of those source pages. As this data
84 * structure is not used in the context of the current OS, it must 84 * structure is not used in the context of the current OS, it must
85 * be self-contained. 85 * be self-contained.
86 * 86 *
87 * The code has been made to work with highmem pages and will use a 87 * The code has been made to work with highmem pages and will use a
88 * destination page in its final resting place (if it happens 88 * destination page in its final resting place (if it happens
89 * to allocate it). The end product of this is that most of the 89 * to allocate it). The end product of this is that most of the
90 * physical address space, and most of RAM can be used. 90 * physical address space, and most of RAM can be used.
91 * 91 *
92 * Future directions include: 92 * Future directions include:
93 * - allocating a page table with the control code buffer identity 93 * - allocating a page table with the control code buffer identity
94 * mapped, to simplify machine_kexec and make kexec_on_panic more 94 * mapped, to simplify machine_kexec and make kexec_on_panic more
95 * reliable. 95 * reliable.
96 */ 96 */
97 97
98 /* 98 /*
99 * KIMAGE_NO_DEST is an impossible destination address..., for 99 * KIMAGE_NO_DEST is an impossible destination address..., for
100 * allocating pages whose destination address we do not care about. 100 * allocating pages whose destination address we do not care about.
101 */ 101 */
102 #define KIMAGE_NO_DEST (-1UL) 102 #define KIMAGE_NO_DEST (-1UL)
103 103
104 static int kimage_is_destination_range(struct kimage *image, 104 static int kimage_is_destination_range(struct kimage *image,
105 unsigned long start, unsigned long end); 105 unsigned long start, unsigned long end);
106 static struct page *kimage_alloc_page(struct kimage *image, 106 static struct page *kimage_alloc_page(struct kimage *image,
107 gfp_t gfp_mask, 107 gfp_t gfp_mask,
108 unsigned long dest); 108 unsigned long dest);
109 109
110 static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, 110 static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
111 unsigned long nr_segments, 111 unsigned long nr_segments,
112 struct kexec_segment __user *segments) 112 struct kexec_segment __user *segments)
113 { 113 {
114 size_t segment_bytes; 114 size_t segment_bytes;
115 struct kimage *image; 115 struct kimage *image;
116 unsigned long i; 116 unsigned long i;
117 int result; 117 int result;
118 118
119 /* Allocate a controlling structure */ 119 /* Allocate a controlling structure */
120 result = -ENOMEM; 120 result = -ENOMEM;
121 image = kzalloc(sizeof(*image), GFP_KERNEL); 121 image = kzalloc(sizeof(*image), GFP_KERNEL);
122 if (!image) 122 if (!image)
123 goto out; 123 goto out;
124 124
125 image->head = 0; 125 image->head = 0;
126 image->entry = &image->head; 126 image->entry = &image->head;
127 image->last_entry = &image->head; 127 image->last_entry = &image->head;
128 image->control_page = ~0; /* By default this does not apply */ 128 image->control_page = ~0; /* By default this does not apply */
129 image->start = entry; 129 image->start = entry;
130 image->type = KEXEC_TYPE_DEFAULT; 130 image->type = KEXEC_TYPE_DEFAULT;
131 131
132 /* Initialize the list of control pages */ 132 /* Initialize the list of control pages */
133 INIT_LIST_HEAD(&image->control_pages); 133 INIT_LIST_HEAD(&image->control_pages);
134 134
135 /* Initialize the list of destination pages */ 135 /* Initialize the list of destination pages */
136 INIT_LIST_HEAD(&image->dest_pages); 136 INIT_LIST_HEAD(&image->dest_pages);
137 137
138 /* Initialize the list of unuseable pages */ 138 /* Initialize the list of unuseable pages */
139 INIT_LIST_HEAD(&image->unuseable_pages); 139 INIT_LIST_HEAD(&image->unuseable_pages);
140 140
141 /* Read in the segments */ 141 /* Read in the segments */
142 image->nr_segments = nr_segments; 142 image->nr_segments = nr_segments;
143 segment_bytes = nr_segments * sizeof(*segments); 143 segment_bytes = nr_segments * sizeof(*segments);
144 result = copy_from_user(image->segment, segments, segment_bytes); 144 result = copy_from_user(image->segment, segments, segment_bytes);
145 if (result) 145 if (result)
146 goto out; 146 goto out;
147 147
148 /* 148 /*
149 * Verify we have good destination addresses. The caller is 149 * Verify we have good destination addresses. The caller is
150 * responsible for making certain we don't attempt to load 150 * responsible for making certain we don't attempt to load
151 * the new image into invalid or reserved areas of RAM. This 151 * the new image into invalid or reserved areas of RAM. This
152 * just verifies it is an address we can use. 152 * just verifies it is an address we can use.
153 * 153 *
154 * Since the kernel does everything in page size chunks ensure 154 * Since the kernel does everything in page size chunks ensure
155 * the destination addreses are page aligned. Too many 155 * the destination addreses are page aligned. Too many
156 * special cases crop of when we don't do this. The most 156 * special cases crop of when we don't do this. The most
157 * insidious is getting overlapping destination addresses 157 * insidious is getting overlapping destination addresses
158 * simply because addresses are changed to page size 158 * simply because addresses are changed to page size
159 * granularity. 159 * granularity.
160 */ 160 */
161 result = -EADDRNOTAVAIL; 161 result = -EADDRNOTAVAIL;
162 for (i = 0; i < nr_segments; i++) { 162 for (i = 0; i < nr_segments; i++) {
163 unsigned long mstart, mend; 163 unsigned long mstart, mend;
164 164
165 mstart = image->segment[i].mem; 165 mstart = image->segment[i].mem;
166 mend = mstart + image->segment[i].memsz; 166 mend = mstart + image->segment[i].memsz;
167 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 167 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
168 goto out; 168 goto out;
169 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 169 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
170 goto out; 170 goto out;
171 } 171 }
172 172
173 /* Verify our destination addresses do not overlap. 173 /* Verify our destination addresses do not overlap.
174 * If we alloed overlapping destination addresses 174 * If we alloed overlapping destination addresses
175 * through very weird things can happen with no 175 * through very weird things can happen with no
176 * easy explanation as one segment stops on another. 176 * easy explanation as one segment stops on another.
177 */ 177 */
178 result = -EINVAL; 178 result = -EINVAL;
179 for (i = 0; i < nr_segments; i++) { 179 for (i = 0; i < nr_segments; i++) {
180 unsigned long mstart, mend; 180 unsigned long mstart, mend;
181 unsigned long j; 181 unsigned long j;
182 182
183 mstart = image->segment[i].mem; 183 mstart = image->segment[i].mem;
184 mend = mstart + image->segment[i].memsz; 184 mend = mstart + image->segment[i].memsz;
185 for (j = 0; j < i; j++) { 185 for (j = 0; j < i; j++) {
186 unsigned long pstart, pend; 186 unsigned long pstart, pend;
187 pstart = image->segment[j].mem; 187 pstart = image->segment[j].mem;
188 pend = pstart + image->segment[j].memsz; 188 pend = pstart + image->segment[j].memsz;
189 /* Do the segments overlap ? */ 189 /* Do the segments overlap ? */
190 if ((mend > pstart) && (mstart < pend)) 190 if ((mend > pstart) && (mstart < pend))
191 goto out; 191 goto out;
192 } 192 }
193 } 193 }
194 194
195 /* Ensure our buffer sizes are strictly less than 195 /* Ensure our buffer sizes are strictly less than
196 * our memory sizes. This should always be the case, 196 * our memory sizes. This should always be the case,
197 * and it is easier to check up front than to be surprised 197 * and it is easier to check up front than to be surprised
198 * later on. 198 * later on.
199 */ 199 */
200 result = -EINVAL; 200 result = -EINVAL;
201 for (i = 0; i < nr_segments; i++) { 201 for (i = 0; i < nr_segments; i++) {
202 if (image->segment[i].bufsz > image->segment[i].memsz) 202 if (image->segment[i].bufsz > image->segment[i].memsz)
203 goto out; 203 goto out;
204 } 204 }
205 205
206 result = 0; 206 result = 0;
207 out: 207 out:
208 if (result == 0) 208 if (result == 0)
209 *rimage = image; 209 *rimage = image;
210 else 210 else
211 kfree(image); 211 kfree(image);
212 212
213 return result; 213 return result;
214 214
215 } 215 }
216 216
217 static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, 217 static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
218 unsigned long nr_segments, 218 unsigned long nr_segments,
219 struct kexec_segment __user *segments) 219 struct kexec_segment __user *segments)
220 { 220 {
221 int result; 221 int result;
222 struct kimage *image; 222 struct kimage *image;
223 223
224 /* Allocate and initialize a controlling structure */ 224 /* Allocate and initialize a controlling structure */
225 image = NULL; 225 image = NULL;
226 result = do_kimage_alloc(&image, entry, nr_segments, segments); 226 result = do_kimage_alloc(&image, entry, nr_segments, segments);
227 if (result) 227 if (result)
228 goto out; 228 goto out;
229 229
230 *rimage = image; 230 *rimage = image;
231 231
232 /* 232 /*
233 * Find a location for the control code buffer, and add it 233 * Find a location for the control code buffer, and add it
234 * the vector of segments so that it's pages will also be 234 * the vector of segments so that it's pages will also be
235 * counted as destination pages. 235 * counted as destination pages.
236 */ 236 */
237 result = -ENOMEM; 237 result = -ENOMEM;
238 image->control_code_page = kimage_alloc_control_pages(image, 238 image->control_code_page = kimage_alloc_control_pages(image,
239 get_order(KEXEC_CONTROL_CODE_SIZE)); 239 get_order(KEXEC_CONTROL_CODE_SIZE));
240 if (!image->control_code_page) { 240 if (!image->control_code_page) {
241 printk(KERN_ERR "Could not allocate control_code_buffer\n"); 241 printk(KERN_ERR "Could not allocate control_code_buffer\n");
242 goto out; 242 goto out;
243 } 243 }
244 244
245 result = 0; 245 result = 0;
246 out: 246 out:
247 if (result == 0) 247 if (result == 0)
248 *rimage = image; 248 *rimage = image;
249 else 249 else
250 kfree(image); 250 kfree(image);
251 251
252 return result; 252 return result;
253 } 253 }
254 254
255 static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, 255 static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
256 unsigned long nr_segments, 256 unsigned long nr_segments,
257 struct kexec_segment __user *segments) 257 struct kexec_segment __user *segments)
258 { 258 {
259 int result; 259 int result;
260 struct kimage *image; 260 struct kimage *image;
261 unsigned long i; 261 unsigned long i;
262 262
263 image = NULL; 263 image = NULL;
264 /* Verify we have a valid entry point */ 264 /* Verify we have a valid entry point */
265 if ((entry < crashk_res.start) || (entry > crashk_res.end)) { 265 if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
266 result = -EADDRNOTAVAIL; 266 result = -EADDRNOTAVAIL;
267 goto out; 267 goto out;
268 } 268 }
269 269
270 /* Allocate and initialize a controlling structure */ 270 /* Allocate and initialize a controlling structure */
271 result = do_kimage_alloc(&image, entry, nr_segments, segments); 271 result = do_kimage_alloc(&image, entry, nr_segments, segments);
272 if (result) 272 if (result)
273 goto out; 273 goto out;
274 274
275 /* Enable the special crash kernel control page 275 /* Enable the special crash kernel control page
276 * allocation policy. 276 * allocation policy.
277 */ 277 */
278 image->control_page = crashk_res.start; 278 image->control_page = crashk_res.start;
279 image->type = KEXEC_TYPE_CRASH; 279 image->type = KEXEC_TYPE_CRASH;
280 280
281 /* 281 /*
282 * Verify we have good destination addresses. Normally 282 * Verify we have good destination addresses. Normally
283 * the caller is responsible for making certain we don't 283 * the caller is responsible for making certain we don't
284 * attempt to load the new image into invalid or reserved 284 * attempt to load the new image into invalid or reserved
285 * areas of RAM. But crash kernels are preloaded into a 285 * areas of RAM. But crash kernels are preloaded into a
286 * reserved area of ram. We must ensure the addresses 286 * reserved area of ram. We must ensure the addresses
287 * are in the reserved area otherwise preloading the 287 * are in the reserved area otherwise preloading the
288 * kernel could corrupt things. 288 * kernel could corrupt things.
289 */ 289 */
290 result = -EADDRNOTAVAIL; 290 result = -EADDRNOTAVAIL;
291 for (i = 0; i < nr_segments; i++) { 291 for (i = 0; i < nr_segments; i++) {
292 unsigned long mstart, mend; 292 unsigned long mstart, mend;
293 293
294 mstart = image->segment[i].mem; 294 mstart = image->segment[i].mem;
295 mend = mstart + image->segment[i].memsz - 1; 295 mend = mstart + image->segment[i].memsz - 1;
296 /* Ensure we are within the crash kernel limits */ 296 /* Ensure we are within the crash kernel limits */
297 if ((mstart < crashk_res.start) || (mend > crashk_res.end)) 297 if ((mstart < crashk_res.start) || (mend > crashk_res.end))
298 goto out; 298 goto out;
299 } 299 }
300 300
301 /* 301 /*
302 * Find a location for the control code buffer, and add 302 * Find a location for the control code buffer, and add
303 * the vector of segments so that it's pages will also be 303 * the vector of segments so that it's pages will also be
304 * counted as destination pages. 304 * counted as destination pages.
305 */ 305 */
306 result = -ENOMEM; 306 result = -ENOMEM;
307 image->control_code_page = kimage_alloc_control_pages(image, 307 image->control_code_page = kimage_alloc_control_pages(image,
308 get_order(KEXEC_CONTROL_CODE_SIZE)); 308 get_order(KEXEC_CONTROL_CODE_SIZE));
309 if (!image->control_code_page) { 309 if (!image->control_code_page) {
310 printk(KERN_ERR "Could not allocate control_code_buffer\n"); 310 printk(KERN_ERR "Could not allocate control_code_buffer\n");
311 goto out; 311 goto out;
312 } 312 }
313 313
314 result = 0; 314 result = 0;
315 out: 315 out:
316 if (result == 0) 316 if (result == 0)
317 *rimage = image; 317 *rimage = image;
318 else 318 else
319 kfree(image); 319 kfree(image);
320 320
321 return result; 321 return result;
322 } 322 }
323 323
324 static int kimage_is_destination_range(struct kimage *image, 324 static int kimage_is_destination_range(struct kimage *image,
325 unsigned long start, 325 unsigned long start,
326 unsigned long end) 326 unsigned long end)
327 { 327 {
328 unsigned long i; 328 unsigned long i;
329 329
330 for (i = 0; i < image->nr_segments; i++) { 330 for (i = 0; i < image->nr_segments; i++) {
331 unsigned long mstart, mend; 331 unsigned long mstart, mend;
332 332
333 mstart = image->segment[i].mem; 333 mstart = image->segment[i].mem;
334 mend = mstart + image->segment[i].memsz; 334 mend = mstart + image->segment[i].memsz;
335 if ((end > mstart) && (start < mend)) 335 if ((end > mstart) && (start < mend))
336 return 1; 336 return 1;
337 } 337 }
338 338
339 return 0; 339 return 0;
340 } 340 }
341 341
342 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 342 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
343 { 343 {
344 struct page *pages; 344 struct page *pages;
345 345
346 pages = alloc_pages(gfp_mask, order); 346 pages = alloc_pages(gfp_mask, order);
347 if (pages) { 347 if (pages) {
348 unsigned int count, i; 348 unsigned int count, i;
349 pages->mapping = NULL; 349 pages->mapping = NULL;
350 set_page_private(pages, order); 350 set_page_private(pages, order);
351 count = 1 << order; 351 count = 1 << order;
352 for (i = 0; i < count; i++) 352 for (i = 0; i < count; i++)
353 SetPageReserved(pages + i); 353 SetPageReserved(pages + i);
354 } 354 }
355 355
356 return pages; 356 return pages;
357 } 357 }
358 358
359 static void kimage_free_pages(struct page *page) 359 static void kimage_free_pages(struct page *page)
360 { 360 {
361 unsigned int order, count, i; 361 unsigned int order, count, i;
362 362
363 order = page_private(page); 363 order = page_private(page);
364 count = 1 << order; 364 count = 1 << order;
365 for (i = 0; i < count; i++) 365 for (i = 0; i < count; i++)
366 ClearPageReserved(page + i); 366 ClearPageReserved(page + i);
367 __free_pages(page, order); 367 __free_pages(page, order);
368 } 368 }
369 369
370 static void kimage_free_page_list(struct list_head *list) 370 static void kimage_free_page_list(struct list_head *list)
371 { 371 {
372 struct list_head *pos, *next; 372 struct list_head *pos, *next;
373 373
374 list_for_each_safe(pos, next, list) { 374 list_for_each_safe(pos, next, list) {
375 struct page *page; 375 struct page *page;
376 376
377 page = list_entry(pos, struct page, lru); 377 page = list_entry(pos, struct page, lru);
378 list_del(&page->lru); 378 list_del(&page->lru);
379 kimage_free_pages(page); 379 kimage_free_pages(page);
380 } 380 }
381 } 381 }
382 382
383 static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 383 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
384 unsigned int order) 384 unsigned int order)
385 { 385 {
386 /* Control pages are special, they are the intermediaries 386 /* Control pages are special, they are the intermediaries
387 * that are needed while we copy the rest of the pages 387 * that are needed while we copy the rest of the pages
388 * to their final resting place. As such they must 388 * to their final resting place. As such they must
389 * not conflict with either the destination addresses 389 * not conflict with either the destination addresses
390 * or memory the kernel is already using. 390 * or memory the kernel is already using.
391 * 391 *
392 * The only case where we really need more than one of 392 * The only case where we really need more than one of
393 * these are for architectures where we cannot disable 393 * these are for architectures where we cannot disable
394 * the MMU and must instead generate an identity mapped 394 * the MMU and must instead generate an identity mapped
395 * page table for all of the memory. 395 * page table for all of the memory.
396 * 396 *
397 * At worst this runs in O(N) of the image size. 397 * At worst this runs in O(N) of the image size.
398 */ 398 */
399 struct list_head extra_pages; 399 struct list_head extra_pages;
400 struct page *pages; 400 struct page *pages;
401 unsigned int count; 401 unsigned int count;
402 402
403 count = 1 << order; 403 count = 1 << order;
404 INIT_LIST_HEAD(&extra_pages); 404 INIT_LIST_HEAD(&extra_pages);
405 405
406 /* Loop while I can allocate a page and the page allocated 406 /* Loop while I can allocate a page and the page allocated
407 * is a destination page. 407 * is a destination page.
408 */ 408 */
409 do { 409 do {
410 unsigned long pfn, epfn, addr, eaddr; 410 unsigned long pfn, epfn, addr, eaddr;
411 411
412 pages = kimage_alloc_pages(GFP_KERNEL, order); 412 pages = kimage_alloc_pages(GFP_KERNEL, order);
413 if (!pages) 413 if (!pages)
414 break; 414 break;
415 pfn = page_to_pfn(pages); 415 pfn = page_to_pfn(pages);
416 epfn = pfn + count; 416 epfn = pfn + count;
417 addr = pfn << PAGE_SHIFT; 417 addr = pfn << PAGE_SHIFT;
418 eaddr = epfn << PAGE_SHIFT; 418 eaddr = epfn << PAGE_SHIFT;
419 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 419 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
420 kimage_is_destination_range(image, addr, eaddr)) { 420 kimage_is_destination_range(image, addr, eaddr)) {
421 list_add(&pages->lru, &extra_pages); 421 list_add(&pages->lru, &extra_pages);
422 pages = NULL; 422 pages = NULL;
423 } 423 }
424 } while (!pages); 424 } while (!pages);
425 425
426 if (pages) { 426 if (pages) {
427 /* Remember the allocated page... */ 427 /* Remember the allocated page... */
428 list_add(&pages->lru, &image->control_pages); 428 list_add(&pages->lru, &image->control_pages);
429 429
430 /* Because the page is already in it's destination 430 /* Because the page is already in it's destination
431 * location we will never allocate another page at 431 * location we will never allocate another page at
432 * that address. Therefore kimage_alloc_pages 432 * that address. Therefore kimage_alloc_pages
433 * will not return it (again) and we don't need 433 * will not return it (again) and we don't need
434 * to give it an entry in image->segment[]. 434 * to give it an entry in image->segment[].
435 */ 435 */
436 } 436 }
437 /* Deal with the destination pages I have inadvertently allocated. 437 /* Deal with the destination pages I have inadvertently allocated.
438 * 438 *
439 * Ideally I would convert multi-page allocations into single 439 * Ideally I would convert multi-page allocations into single
440 * page allocations, and add everyting to image->dest_pages. 440 * page allocations, and add everyting to image->dest_pages.
441 * 441 *
442 * For now it is simpler to just free the pages. 442 * For now it is simpler to just free the pages.
443 */ 443 */
444 kimage_free_page_list(&extra_pages); 444 kimage_free_page_list(&extra_pages);
445 445
446 return pages; 446 return pages;
447 } 447 }
448 448
449 static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 449 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
450 unsigned int order) 450 unsigned int order)
451 { 451 {
452 /* Control pages are special, they are the intermediaries 452 /* Control pages are special, they are the intermediaries
453 * that are needed while we copy the rest of the pages 453 * that are needed while we copy the rest of the pages
454 * to their final resting place. As such they must 454 * to their final resting place. As such they must
455 * not conflict with either the destination addresses 455 * not conflict with either the destination addresses
456 * or memory the kernel is already using. 456 * or memory the kernel is already using.
457 * 457 *
458 * Control pages are also the only pags we must allocate 458 * Control pages are also the only pags we must allocate
459 * when loading a crash kernel. All of the other pages 459 * when loading a crash kernel. All of the other pages
460 * are specified by the segments and we just memcpy 460 * are specified by the segments and we just memcpy
461 * into them directly. 461 * into them directly.
462 * 462 *
463 * The only case where we really need more than one of 463 * The only case where we really need more than one of
464 * these are for architectures where we cannot disable 464 * these are for architectures where we cannot disable
465 * the MMU and must instead generate an identity mapped 465 * the MMU and must instead generate an identity mapped
466 * page table for all of the memory. 466 * page table for all of the memory.
467 * 467 *
468 * Given the low demand this implements a very simple 468 * Given the low demand this implements a very simple
469 * allocator that finds the first hole of the appropriate 469 * allocator that finds the first hole of the appropriate
470 * size in the reserved memory region, and allocates all 470 * size in the reserved memory region, and allocates all
471 * of the memory up to and including the hole. 471 * of the memory up to and including the hole.
472 */ 472 */
473 unsigned long hole_start, hole_end, size; 473 unsigned long hole_start, hole_end, size;
474 struct page *pages; 474 struct page *pages;
475 475
476 pages = NULL; 476 pages = NULL;
477 size = (1 << order) << PAGE_SHIFT; 477 size = (1 << order) << PAGE_SHIFT;
478 hole_start = (image->control_page + (size - 1)) & ~(size - 1); 478 hole_start = (image->control_page + (size - 1)) & ~(size - 1);
479 hole_end = hole_start + size - 1; 479 hole_end = hole_start + size - 1;
480 while (hole_end <= crashk_res.end) { 480 while (hole_end <= crashk_res.end) {
481 unsigned long i; 481 unsigned long i;
482 482
483 if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) 483 if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
484 break; 484 break;
485 if (hole_end > crashk_res.end) 485 if (hole_end > crashk_res.end)
486 break; 486 break;
487 /* See if I overlap any of the segments */ 487 /* See if I overlap any of the segments */
488 for (i = 0; i < image->nr_segments; i++) { 488 for (i = 0; i < image->nr_segments; i++) {
489 unsigned long mstart, mend; 489 unsigned long mstart, mend;
490 490
491 mstart = image->segment[i].mem; 491 mstart = image->segment[i].mem;
492 mend = mstart + image->segment[i].memsz - 1; 492 mend = mstart + image->segment[i].memsz - 1;
493 if ((hole_end >= mstart) && (hole_start <= mend)) { 493 if ((hole_end >= mstart) && (hole_start <= mend)) {
494 /* Advance the hole to the end of the segment */ 494 /* Advance the hole to the end of the segment */
495 hole_start = (mend + (size - 1)) & ~(size - 1); 495 hole_start = (mend + (size - 1)) & ~(size - 1);
496 hole_end = hole_start + size - 1; 496 hole_end = hole_start + size - 1;
497 break; 497 break;
498 } 498 }
499 } 499 }
500 /* If I don't overlap any segments I have found my hole! */ 500 /* If I don't overlap any segments I have found my hole! */
501 if (i == image->nr_segments) { 501 if (i == image->nr_segments) {
502 pages = pfn_to_page(hole_start >> PAGE_SHIFT); 502 pages = pfn_to_page(hole_start >> PAGE_SHIFT);
503 break; 503 break;
504 } 504 }
505 } 505 }
506 if (pages) 506 if (pages)
507 image->control_page = hole_end; 507 image->control_page = hole_end;
508 508
509 return pages; 509 return pages;
510 } 510 }
511 511
512 512
513 struct page *kimage_alloc_control_pages(struct kimage *image, 513 struct page *kimage_alloc_control_pages(struct kimage *image,
514 unsigned int order) 514 unsigned int order)
515 { 515 {
516 struct page *pages = NULL; 516 struct page *pages = NULL;
517 517
518 switch (image->type) { 518 switch (image->type) {
519 case KEXEC_TYPE_DEFAULT: 519 case KEXEC_TYPE_DEFAULT:
520 pages = kimage_alloc_normal_control_pages(image, order); 520 pages = kimage_alloc_normal_control_pages(image, order);
521 break; 521 break;
522 case KEXEC_TYPE_CRASH: 522 case KEXEC_TYPE_CRASH:
523 pages = kimage_alloc_crash_control_pages(image, order); 523 pages = kimage_alloc_crash_control_pages(image, order);
524 break; 524 break;
525 } 525 }
526 526
527 return pages; 527 return pages;
528 } 528 }
529 529
530 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 530 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
531 { 531 {
532 if (*image->entry != 0) 532 if (*image->entry != 0)
533 image->entry++; 533 image->entry++;
534 534
535 if (image->entry == image->last_entry) { 535 if (image->entry == image->last_entry) {
536 kimage_entry_t *ind_page; 536 kimage_entry_t *ind_page;
537 struct page *page; 537 struct page *page;
538 538
539 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 539 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
540 if (!page) 540 if (!page)
541 return -ENOMEM; 541 return -ENOMEM;
542 542
543 ind_page = page_address(page); 543 ind_page = page_address(page);
544 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 544 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
545 image->entry = ind_page; 545 image->entry = ind_page;
546 image->last_entry = ind_page + 546 image->last_entry = ind_page +
547 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 547 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
548 } 548 }
549 *image->entry = entry; 549 *image->entry = entry;
550 image->entry++; 550 image->entry++;
551 *image->entry = 0; 551 *image->entry = 0;
552 552
553 return 0; 553 return 0;
554 } 554 }
555 555
556 static int kimage_set_destination(struct kimage *image, 556 static int kimage_set_destination(struct kimage *image,
557 unsigned long destination) 557 unsigned long destination)
558 { 558 {
559 int result; 559 int result;
560 560
561 destination &= PAGE_MASK; 561 destination &= PAGE_MASK;
562 result = kimage_add_entry(image, destination | IND_DESTINATION); 562 result = kimage_add_entry(image, destination | IND_DESTINATION);
563 if (result == 0) 563 if (result == 0)
564 image->destination = destination; 564 image->destination = destination;
565 565
566 return result; 566 return result;
567 } 567 }
568 568
569 569
570 static int kimage_add_page(struct kimage *image, unsigned long page) 570 static int kimage_add_page(struct kimage *image, unsigned long page)
571 { 571 {
572 int result; 572 int result;
573 573
574 page &= PAGE_MASK; 574 page &= PAGE_MASK;
575 result = kimage_add_entry(image, page | IND_SOURCE); 575 result = kimage_add_entry(image, page | IND_SOURCE);
576 if (result == 0) 576 if (result == 0)
577 image->destination += PAGE_SIZE; 577 image->destination += PAGE_SIZE;
578 578
579 return result; 579 return result;
580 } 580 }
581 581
582 582
583 static void kimage_free_extra_pages(struct kimage *image) 583 static void kimage_free_extra_pages(struct kimage *image)
584 { 584 {
585 /* Walk through and free any extra destination pages I may have */ 585 /* Walk through and free any extra destination pages I may have */
586 kimage_free_page_list(&image->dest_pages); 586 kimage_free_page_list(&image->dest_pages);
587 587
588 /* Walk through and free any unuseable pages I have cached */ 588 /* Walk through and free any unuseable pages I have cached */
589 kimage_free_page_list(&image->unuseable_pages); 589 kimage_free_page_list(&image->unuseable_pages);
590 590
591 } 591 }
592 static int kimage_terminate(struct kimage *image) 592 static void kimage_terminate(struct kimage *image)
593 { 593 {
594 if (*image->entry != 0) 594 if (*image->entry != 0)
595 image->entry++; 595 image->entry++;
596 596
597 *image->entry = IND_DONE; 597 *image->entry = IND_DONE;
598
599 return 0;
600 } 598 }
601 599
602 #define for_each_kimage_entry(image, ptr, entry) \ 600 #define for_each_kimage_entry(image, ptr, entry) \
603 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 601 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
604 ptr = (entry & IND_INDIRECTION)? \ 602 ptr = (entry & IND_INDIRECTION)? \
605 phys_to_virt((entry & PAGE_MASK)): ptr +1) 603 phys_to_virt((entry & PAGE_MASK)): ptr +1)
606 604
607 static void kimage_free_entry(kimage_entry_t entry) 605 static void kimage_free_entry(kimage_entry_t entry)
608 { 606 {
609 struct page *page; 607 struct page *page;
610 608
611 page = pfn_to_page(entry >> PAGE_SHIFT); 609 page = pfn_to_page(entry >> PAGE_SHIFT);
612 kimage_free_pages(page); 610 kimage_free_pages(page);
613 } 611 }
614 612
615 static void kimage_free(struct kimage *image) 613 static void kimage_free(struct kimage *image)
616 { 614 {
617 kimage_entry_t *ptr, entry; 615 kimage_entry_t *ptr, entry;
618 kimage_entry_t ind = 0; 616 kimage_entry_t ind = 0;
619 617
620 if (!image) 618 if (!image)
621 return; 619 return;
622 620
623 kimage_free_extra_pages(image); 621 kimage_free_extra_pages(image);
624 for_each_kimage_entry(image, ptr, entry) { 622 for_each_kimage_entry(image, ptr, entry) {
625 if (entry & IND_INDIRECTION) { 623 if (entry & IND_INDIRECTION) {
626 /* Free the previous indirection page */ 624 /* Free the previous indirection page */
627 if (ind & IND_INDIRECTION) 625 if (ind & IND_INDIRECTION)
628 kimage_free_entry(ind); 626 kimage_free_entry(ind);
629 /* Save this indirection page until we are 627 /* Save this indirection page until we are
630 * done with it. 628 * done with it.
631 */ 629 */
632 ind = entry; 630 ind = entry;
633 } 631 }
634 else if (entry & IND_SOURCE) 632 else if (entry & IND_SOURCE)
635 kimage_free_entry(entry); 633 kimage_free_entry(entry);
636 } 634 }
637 /* Free the final indirection page */ 635 /* Free the final indirection page */
638 if (ind & IND_INDIRECTION) 636 if (ind & IND_INDIRECTION)
639 kimage_free_entry(ind); 637 kimage_free_entry(ind);
640 638
641 /* Handle any machine specific cleanup */ 639 /* Handle any machine specific cleanup */
642 machine_kexec_cleanup(image); 640 machine_kexec_cleanup(image);
643 641
644 /* Free the kexec control pages... */ 642 /* Free the kexec control pages... */
645 kimage_free_page_list(&image->control_pages); 643 kimage_free_page_list(&image->control_pages);
646 kfree(image); 644 kfree(image);
647 } 645 }
648 646
649 static kimage_entry_t *kimage_dst_used(struct kimage *image, 647 static kimage_entry_t *kimage_dst_used(struct kimage *image,
650 unsigned long page) 648 unsigned long page)
651 { 649 {
652 kimage_entry_t *ptr, entry; 650 kimage_entry_t *ptr, entry;
653 unsigned long destination = 0; 651 unsigned long destination = 0;
654 652
655 for_each_kimage_entry(image, ptr, entry) { 653 for_each_kimage_entry(image, ptr, entry) {
656 if (entry & IND_DESTINATION) 654 if (entry & IND_DESTINATION)
657 destination = entry & PAGE_MASK; 655 destination = entry & PAGE_MASK;
658 else if (entry & IND_SOURCE) { 656 else if (entry & IND_SOURCE) {
659 if (page == destination) 657 if (page == destination)
660 return ptr; 658 return ptr;
661 destination += PAGE_SIZE; 659 destination += PAGE_SIZE;
662 } 660 }
663 } 661 }
664 662
665 return NULL; 663 return NULL;
666 } 664 }
667 665
668 static struct page *kimage_alloc_page(struct kimage *image, 666 static struct page *kimage_alloc_page(struct kimage *image,
669 gfp_t gfp_mask, 667 gfp_t gfp_mask,
670 unsigned long destination) 668 unsigned long destination)
671 { 669 {
672 /* 670 /*
673 * Here we implement safeguards to ensure that a source page 671 * Here we implement safeguards to ensure that a source page
674 * is not copied to its destination page before the data on 672 * is not copied to its destination page before the data on
675 * the destination page is no longer useful. 673 * the destination page is no longer useful.
676 * 674 *
677 * To do this we maintain the invariant that a source page is 675 * To do this we maintain the invariant that a source page is
678 * either its own destination page, or it is not a 676 * either its own destination page, or it is not a
679 * destination page at all. 677 * destination page at all.
680 * 678 *
681 * That is slightly stronger than required, but the proof 679 * That is slightly stronger than required, but the proof
682 * that no problems will not occur is trivial, and the 680 * that no problems will not occur is trivial, and the
683 * implementation is simply to verify. 681 * implementation is simply to verify.
684 * 682 *
685 * When allocating all pages normally this algorithm will run 683 * When allocating all pages normally this algorithm will run
686 * in O(N) time, but in the worst case it will run in O(N^2) 684 * in O(N) time, but in the worst case it will run in O(N^2)
687 * time. If the runtime is a problem the data structures can 685 * time. If the runtime is a problem the data structures can
688 * be fixed. 686 * be fixed.
689 */ 687 */
690 struct page *page; 688 struct page *page;
691 unsigned long addr; 689 unsigned long addr;
692 690
693 /* 691 /*
694 * Walk through the list of destination pages, and see if I 692 * Walk through the list of destination pages, and see if I
695 * have a match. 693 * have a match.
696 */ 694 */
697 list_for_each_entry(page, &image->dest_pages, lru) { 695 list_for_each_entry(page, &image->dest_pages, lru) {
698 addr = page_to_pfn(page) << PAGE_SHIFT; 696 addr = page_to_pfn(page) << PAGE_SHIFT;
699 if (addr == destination) { 697 if (addr == destination) {
700 list_del(&page->lru); 698 list_del(&page->lru);
701 return page; 699 return page;
702 } 700 }
703 } 701 }
704 page = NULL; 702 page = NULL;
705 while (1) { 703 while (1) {
706 kimage_entry_t *old; 704 kimage_entry_t *old;
707 705
708 /* Allocate a page, if we run out of memory give up */ 706 /* Allocate a page, if we run out of memory give up */
709 page = kimage_alloc_pages(gfp_mask, 0); 707 page = kimage_alloc_pages(gfp_mask, 0);
710 if (!page) 708 if (!page)
711 return NULL; 709 return NULL;
712 /* If the page cannot be used file it away */ 710 /* If the page cannot be used file it away */
713 if (page_to_pfn(page) > 711 if (page_to_pfn(page) >
714 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 712 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
715 list_add(&page->lru, &image->unuseable_pages); 713 list_add(&page->lru, &image->unuseable_pages);
716 continue; 714 continue;
717 } 715 }
718 addr = page_to_pfn(page) << PAGE_SHIFT; 716 addr = page_to_pfn(page) << PAGE_SHIFT;
719 717
720 /* If it is the destination page we want use it */ 718 /* If it is the destination page we want use it */
721 if (addr == destination) 719 if (addr == destination)
722 break; 720 break;
723 721
724 /* If the page is not a destination page use it */ 722 /* If the page is not a destination page use it */
725 if (!kimage_is_destination_range(image, addr, 723 if (!kimage_is_destination_range(image, addr,
726 addr + PAGE_SIZE)) 724 addr + PAGE_SIZE))
727 break; 725 break;
728 726
729 /* 727 /*
730 * I know that the page is someones destination page. 728 * I know that the page is someones destination page.
731 * See if there is already a source page for this 729 * See if there is already a source page for this
732 * destination page. And if so swap the source pages. 730 * destination page. And if so swap the source pages.
733 */ 731 */
734 old = kimage_dst_used(image, addr); 732 old = kimage_dst_used(image, addr);
735 if (old) { 733 if (old) {
736 /* If so move it */ 734 /* If so move it */
737 unsigned long old_addr; 735 unsigned long old_addr;
738 struct page *old_page; 736 struct page *old_page;
739 737
740 old_addr = *old & PAGE_MASK; 738 old_addr = *old & PAGE_MASK;
741 old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 739 old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
742 copy_highpage(page, old_page); 740 copy_highpage(page, old_page);
743 *old = addr | (*old & ~PAGE_MASK); 741 *old = addr | (*old & ~PAGE_MASK);
744 742
745 /* The old page I have found cannot be a 743 /* The old page I have found cannot be a
746 * destination page, so return it. 744 * destination page, so return it.
747 */ 745 */
748 addr = old_addr; 746 addr = old_addr;
749 page = old_page; 747 page = old_page;
750 break; 748 break;
751 } 749 }
752 else { 750 else {
753 /* Place the page on the destination list I 751 /* Place the page on the destination list I
754 * will use it later. 752 * will use it later.
755 */ 753 */
756 list_add(&page->lru, &image->dest_pages); 754 list_add(&page->lru, &image->dest_pages);
757 } 755 }
758 } 756 }
759 757
760 return page; 758 return page;
761 } 759 }
762 760
763 static int kimage_load_normal_segment(struct kimage *image, 761 static int kimage_load_normal_segment(struct kimage *image,
764 struct kexec_segment *segment) 762 struct kexec_segment *segment)
765 { 763 {
766 unsigned long maddr; 764 unsigned long maddr;
767 unsigned long ubytes, mbytes; 765 unsigned long ubytes, mbytes;
768 int result; 766 int result;
769 unsigned char __user *buf; 767 unsigned char __user *buf;
770 768
771 result = 0; 769 result = 0;
772 buf = segment->buf; 770 buf = segment->buf;
773 ubytes = segment->bufsz; 771 ubytes = segment->bufsz;
774 mbytes = segment->memsz; 772 mbytes = segment->memsz;
775 maddr = segment->mem; 773 maddr = segment->mem;
776 774
777 result = kimage_set_destination(image, maddr); 775 result = kimage_set_destination(image, maddr);
778 if (result < 0) 776 if (result < 0)
779 goto out; 777 goto out;
780 778
781 while (mbytes) { 779 while (mbytes) {
782 struct page *page; 780 struct page *page;
783 char *ptr; 781 char *ptr;
784 size_t uchunk, mchunk; 782 size_t uchunk, mchunk;
785 783
786 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 784 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
787 if (!page) { 785 if (!page) {
788 result = -ENOMEM; 786 result = -ENOMEM;
789 goto out; 787 goto out;
790 } 788 }
791 result = kimage_add_page(image, page_to_pfn(page) 789 result = kimage_add_page(image, page_to_pfn(page)
792 << PAGE_SHIFT); 790 << PAGE_SHIFT);
793 if (result < 0) 791 if (result < 0)
794 goto out; 792 goto out;
795 793
796 ptr = kmap(page); 794 ptr = kmap(page);
797 /* Start with a clear page */ 795 /* Start with a clear page */
798 memset(ptr, 0, PAGE_SIZE); 796 memset(ptr, 0, PAGE_SIZE);
799 ptr += maddr & ~PAGE_MASK; 797 ptr += maddr & ~PAGE_MASK;
800 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); 798 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
801 if (mchunk > mbytes) 799 if (mchunk > mbytes)
802 mchunk = mbytes; 800 mchunk = mbytes;
803 801
804 uchunk = mchunk; 802 uchunk = mchunk;
805 if (uchunk > ubytes) 803 if (uchunk > ubytes)
806 uchunk = ubytes; 804 uchunk = ubytes;
807 805
808 result = copy_from_user(ptr, buf, uchunk); 806 result = copy_from_user(ptr, buf, uchunk);
809 kunmap(page); 807 kunmap(page);
810 if (result) { 808 if (result) {
811 result = (result < 0) ? result : -EIO; 809 result = (result < 0) ? result : -EIO;
812 goto out; 810 goto out;
813 } 811 }
814 ubytes -= uchunk; 812 ubytes -= uchunk;
815 maddr += mchunk; 813 maddr += mchunk;
816 buf += mchunk; 814 buf += mchunk;
817 mbytes -= mchunk; 815 mbytes -= mchunk;
818 } 816 }
819 out: 817 out:
820 return result; 818 return result;
821 } 819 }
822 820
823 static int kimage_load_crash_segment(struct kimage *image, 821 static int kimage_load_crash_segment(struct kimage *image,
824 struct kexec_segment *segment) 822 struct kexec_segment *segment)
825 { 823 {
826 /* For crash dumps kernels we simply copy the data from 824 /* For crash dumps kernels we simply copy the data from
827 * user space to it's destination. 825 * user space to it's destination.
828 * We do things a page at a time for the sake of kmap. 826 * We do things a page at a time for the sake of kmap.
829 */ 827 */
830 unsigned long maddr; 828 unsigned long maddr;
831 unsigned long ubytes, mbytes; 829 unsigned long ubytes, mbytes;
832 int result; 830 int result;
833 unsigned char __user *buf; 831 unsigned char __user *buf;
834 832
835 result = 0; 833 result = 0;
836 buf = segment->buf; 834 buf = segment->buf;
837 ubytes = segment->bufsz; 835 ubytes = segment->bufsz;
838 mbytes = segment->memsz; 836 mbytes = segment->memsz;
839 maddr = segment->mem; 837 maddr = segment->mem;
840 while (mbytes) { 838 while (mbytes) {
841 struct page *page; 839 struct page *page;
842 char *ptr; 840 char *ptr;
843 size_t uchunk, mchunk; 841 size_t uchunk, mchunk;
844 842
845 page = pfn_to_page(maddr >> PAGE_SHIFT); 843 page = pfn_to_page(maddr >> PAGE_SHIFT);
846 if (!page) { 844 if (!page) {
847 result = -ENOMEM; 845 result = -ENOMEM;
848 goto out; 846 goto out;
849 } 847 }
850 ptr = kmap(page); 848 ptr = kmap(page);
851 ptr += maddr & ~PAGE_MASK; 849 ptr += maddr & ~PAGE_MASK;
852 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); 850 mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
853 if (mchunk > mbytes) 851 if (mchunk > mbytes)
854 mchunk = mbytes; 852 mchunk = mbytes;
855 853
856 uchunk = mchunk; 854 uchunk = mchunk;
857 if (uchunk > ubytes) { 855 if (uchunk > ubytes) {
858 uchunk = ubytes; 856 uchunk = ubytes;
859 /* Zero the trailing part of the page */ 857 /* Zero the trailing part of the page */
860 memset(ptr + uchunk, 0, mchunk - uchunk); 858 memset(ptr + uchunk, 0, mchunk - uchunk);
861 } 859 }
862 result = copy_from_user(ptr, buf, uchunk); 860 result = copy_from_user(ptr, buf, uchunk);
863 kexec_flush_icache_page(page); 861 kexec_flush_icache_page(page);
864 kunmap(page); 862 kunmap(page);
865 if (result) { 863 if (result) {
866 result = (result < 0) ? result : -EIO; 864 result = (result < 0) ? result : -EIO;
867 goto out; 865 goto out;
868 } 866 }
869 ubytes -= uchunk; 867 ubytes -= uchunk;
870 maddr += mchunk; 868 maddr += mchunk;
871 buf += mchunk; 869 buf += mchunk;
872 mbytes -= mchunk; 870 mbytes -= mchunk;
873 } 871 }
874 out: 872 out:
875 return result; 873 return result;
876 } 874 }
877 875
878 static int kimage_load_segment(struct kimage *image, 876 static int kimage_load_segment(struct kimage *image,
879 struct kexec_segment *segment) 877 struct kexec_segment *segment)
880 { 878 {
881 int result = -ENOMEM; 879 int result = -ENOMEM;
882 880
883 switch (image->type) { 881 switch (image->type) {
884 case KEXEC_TYPE_DEFAULT: 882 case KEXEC_TYPE_DEFAULT:
885 result = kimage_load_normal_segment(image, segment); 883 result = kimage_load_normal_segment(image, segment);
886 break; 884 break;
887 case KEXEC_TYPE_CRASH: 885 case KEXEC_TYPE_CRASH:
888 result = kimage_load_crash_segment(image, segment); 886 result = kimage_load_crash_segment(image, segment);
889 break; 887 break;
890 } 888 }
891 889
892 return result; 890 return result;
893 } 891 }
894 892
895 /* 893 /*
896 * Exec Kernel system call: for obvious reasons only root may call it. 894 * Exec Kernel system call: for obvious reasons only root may call it.
897 * 895 *
898 * This call breaks up into three pieces. 896 * This call breaks up into three pieces.
899 * - A generic part which loads the new kernel from the current 897 * - A generic part which loads the new kernel from the current
900 * address space, and very carefully places the data in the 898 * address space, and very carefully places the data in the
901 * allocated pages. 899 * allocated pages.
902 * 900 *
903 * - A generic part that interacts with the kernel and tells all of 901 * - A generic part that interacts with the kernel and tells all of
904 * the devices to shut down. Preventing on-going dmas, and placing 902 * the devices to shut down. Preventing on-going dmas, and placing
905 * the devices in a consistent state so a later kernel can 903 * the devices in a consistent state so a later kernel can
906 * reinitialize them. 904 * reinitialize them.
907 * 905 *
908 * - A machine specific part that includes the syscall number 906 * - A machine specific part that includes the syscall number
909 * and the copies the image to it's final destination. And 907 * and the copies the image to it's final destination. And
910 * jumps into the image at entry. 908 * jumps into the image at entry.
911 * 909 *
912 * kexec does not sync, or unmount filesystems so if you need 910 * kexec does not sync, or unmount filesystems so if you need
913 * that to happen you need to do that yourself. 911 * that to happen you need to do that yourself.
914 */ 912 */
915 struct kimage *kexec_image; 913 struct kimage *kexec_image;
916 struct kimage *kexec_crash_image; 914 struct kimage *kexec_crash_image;
917 /* 915 /*
918 * A home grown binary mutex. 916 * A home grown binary mutex.
919 * Nothing can wait so this mutex is safe to use 917 * Nothing can wait so this mutex is safe to use
920 * in interrupt context :) 918 * in interrupt context :)
921 */ 919 */
922 static int kexec_lock; 920 static int kexec_lock;
923 921
924 asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, 922 asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
925 struct kexec_segment __user *segments, 923 struct kexec_segment __user *segments,
926 unsigned long flags) 924 unsigned long flags)
927 { 925 {
928 struct kimage **dest_image, *image; 926 struct kimage **dest_image, *image;
929 int locked; 927 int locked;
930 int result; 928 int result;
931 929
932 /* We only trust the superuser with rebooting the system. */ 930 /* We only trust the superuser with rebooting the system. */
933 if (!capable(CAP_SYS_BOOT)) 931 if (!capable(CAP_SYS_BOOT))
934 return -EPERM; 932 return -EPERM;
935 933
936 /* 934 /*
937 * Verify we have a legal set of flags 935 * Verify we have a legal set of flags
938 * This leaves us room for future extensions. 936 * This leaves us room for future extensions.
939 */ 937 */
940 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) 938 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
941 return -EINVAL; 939 return -EINVAL;
942 940
943 /* Verify we are on the appropriate architecture */ 941 /* Verify we are on the appropriate architecture */
944 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && 942 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
945 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) 943 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
946 return -EINVAL; 944 return -EINVAL;
947 945
948 /* Put an artificial cap on the number 946 /* Put an artificial cap on the number
949 * of segments passed to kexec_load. 947 * of segments passed to kexec_load.
950 */ 948 */
951 if (nr_segments > KEXEC_SEGMENT_MAX) 949 if (nr_segments > KEXEC_SEGMENT_MAX)
952 return -EINVAL; 950 return -EINVAL;
953 951
954 image = NULL; 952 image = NULL;
955 result = 0; 953 result = 0;
956 954
957 /* Because we write directly to the reserved memory 955 /* Because we write directly to the reserved memory
958 * region when loading crash kernels we need a mutex here to 956 * region when loading crash kernels we need a mutex here to
959 * prevent multiple crash kernels from attempting to load 957 * prevent multiple crash kernels from attempting to load
960 * simultaneously, and to prevent a crash kernel from loading 958 * simultaneously, and to prevent a crash kernel from loading
961 * over the top of a in use crash kernel. 959 * over the top of a in use crash kernel.
962 * 960 *
963 * KISS: always take the mutex. 961 * KISS: always take the mutex.
964 */ 962 */
965 locked = xchg(&kexec_lock, 1); 963 locked = xchg(&kexec_lock, 1);
966 if (locked) 964 if (locked)
967 return -EBUSY; 965 return -EBUSY;
968 966
969 dest_image = &kexec_image; 967 dest_image = &kexec_image;
970 if (flags & KEXEC_ON_CRASH) 968 if (flags & KEXEC_ON_CRASH)
971 dest_image = &kexec_crash_image; 969 dest_image = &kexec_crash_image;
972 if (nr_segments > 0) { 970 if (nr_segments > 0) {
973 unsigned long i; 971 unsigned long i;
974 972
975 /* Loading another kernel to reboot into */ 973 /* Loading another kernel to reboot into */
976 if ((flags & KEXEC_ON_CRASH) == 0) 974 if ((flags & KEXEC_ON_CRASH) == 0)
977 result = kimage_normal_alloc(&image, entry, 975 result = kimage_normal_alloc(&image, entry,
978 nr_segments, segments); 976 nr_segments, segments);
979 /* Loading another kernel to switch to if this one crashes */ 977 /* Loading another kernel to switch to if this one crashes */
980 else if (flags & KEXEC_ON_CRASH) { 978 else if (flags & KEXEC_ON_CRASH) {
981 /* Free any current crash dump kernel before 979 /* Free any current crash dump kernel before
982 * we corrupt it. 980 * we corrupt it.
983 */ 981 */
984 kimage_free(xchg(&kexec_crash_image, NULL)); 982 kimage_free(xchg(&kexec_crash_image, NULL));
985 result = kimage_crash_alloc(&image, entry, 983 result = kimage_crash_alloc(&image, entry,
986 nr_segments, segments); 984 nr_segments, segments);
987 } 985 }
988 if (result) 986 if (result)
989 goto out; 987 goto out;
990 988
991 result = machine_kexec_prepare(image); 989 result = machine_kexec_prepare(image);
992 if (result) 990 if (result)
993 goto out; 991 goto out;
994 992
995 for (i = 0; i < nr_segments; i++) { 993 for (i = 0; i < nr_segments; i++) {
996 result = kimage_load_segment(image, &image->segment[i]); 994 result = kimage_load_segment(image, &image->segment[i]);
997 if (result) 995 if (result)
998 goto out; 996 goto out;
999 } 997 }
1000 result = kimage_terminate(image); 998 kimage_terminate(image);
1001 if (result)
1002 goto out;
1003 } 999 }
1004 /* Install the new kernel, and Uninstall the old */ 1000 /* Install the new kernel, and Uninstall the old */
1005 image = xchg(dest_image, image); 1001 image = xchg(dest_image, image);
1006 1002
1007 out: 1003 out:
1008 locked = xchg(&kexec_lock, 0); /* Release the mutex */ 1004 locked = xchg(&kexec_lock, 0); /* Release the mutex */
1009 BUG_ON(!locked); 1005 BUG_ON(!locked);
1010 kimage_free(image); 1006 kimage_free(image);
1011 1007
1012 return result; 1008 return result;
1013 } 1009 }
1014 1010
1015 #ifdef CONFIG_COMPAT 1011 #ifdef CONFIG_COMPAT
1016 asmlinkage long compat_sys_kexec_load(unsigned long entry, 1012 asmlinkage long compat_sys_kexec_load(unsigned long entry,
1017 unsigned long nr_segments, 1013 unsigned long nr_segments,
1018 struct compat_kexec_segment __user *segments, 1014 struct compat_kexec_segment __user *segments,
1019 unsigned long flags) 1015 unsigned long flags)
1020 { 1016 {
1021 struct compat_kexec_segment in; 1017 struct compat_kexec_segment in;
1022 struct kexec_segment out, __user *ksegments; 1018 struct kexec_segment out, __user *ksegments;
1023 unsigned long i, result; 1019 unsigned long i, result;
1024 1020
1025 /* Don't allow clients that don't understand the native 1021 /* Don't allow clients that don't understand the native
1026 * architecture to do anything. 1022 * architecture to do anything.
1027 */ 1023 */
1028 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) 1024 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
1029 return -EINVAL; 1025 return -EINVAL;
1030 1026
1031 if (nr_segments > KEXEC_SEGMENT_MAX) 1027 if (nr_segments > KEXEC_SEGMENT_MAX)
1032 return -EINVAL; 1028 return -EINVAL;
1033 1029
1034 ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); 1030 ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
1035 for (i=0; i < nr_segments; i++) { 1031 for (i=0; i < nr_segments; i++) {
1036 result = copy_from_user(&in, &segments[i], sizeof(in)); 1032 result = copy_from_user(&in, &segments[i], sizeof(in));
1037 if (result) 1033 if (result)
1038 return -EFAULT; 1034 return -EFAULT;
1039 1035
1040 out.buf = compat_ptr(in.buf); 1036 out.buf = compat_ptr(in.buf);
1041 out.bufsz = in.bufsz; 1037 out.bufsz = in.bufsz;
1042 out.mem = in.mem; 1038 out.mem = in.mem;
1043 out.memsz = in.memsz; 1039 out.memsz = in.memsz;
1044 1040
1045 result = copy_to_user(&ksegments[i], &out, sizeof(out)); 1041 result = copy_to_user(&ksegments[i], &out, sizeof(out));
1046 if (result) 1042 if (result)
1047 return -EFAULT; 1043 return -EFAULT;
1048 } 1044 }
1049 1045
1050 return sys_kexec_load(entry, nr_segments, ksegments, flags); 1046 return sys_kexec_load(entry, nr_segments, ksegments, flags);
1051 } 1047 }
1052 #endif 1048 #endif
1053 1049
1054 void crash_kexec(struct pt_regs *regs) 1050 void crash_kexec(struct pt_regs *regs)
1055 { 1051 {
1056 int locked; 1052 int locked;
1057 1053
1058 1054
1059 /* Take the kexec_lock here to prevent sys_kexec_load 1055 /* Take the kexec_lock here to prevent sys_kexec_load
1060 * running on one cpu from replacing the crash kernel 1056 * running on one cpu from replacing the crash kernel
1061 * we are using after a panic on a different cpu. 1057 * we are using after a panic on a different cpu.
1062 * 1058 *
1063 * If the crash kernel was not located in a fixed area 1059 * If the crash kernel was not located in a fixed area
1064 * of memory the xchg(&kexec_crash_image) would be 1060 * of memory the xchg(&kexec_crash_image) would be
1065 * sufficient. But since I reuse the memory... 1061 * sufficient. But since I reuse the memory...
1066 */ 1062 */
1067 locked = xchg(&kexec_lock, 1); 1063 locked = xchg(&kexec_lock, 1);
1068 if (!locked) { 1064 if (!locked) {
1069 if (kexec_crash_image) { 1065 if (kexec_crash_image) {
1070 struct pt_regs fixed_regs; 1066 struct pt_regs fixed_regs;
1071 crash_setup_regs(&fixed_regs, regs); 1067 crash_setup_regs(&fixed_regs, regs);
1072 crash_save_vmcoreinfo(); 1068 crash_save_vmcoreinfo();
1073 machine_crash_shutdown(&fixed_regs); 1069 machine_crash_shutdown(&fixed_regs);
1074 machine_kexec(kexec_crash_image); 1070 machine_kexec(kexec_crash_image);
1075 } 1071 }
1076 locked = xchg(&kexec_lock, 0); 1072 locked = xchg(&kexec_lock, 0);
1077 BUG_ON(!locked); 1073 BUG_ON(!locked);
1078 } 1074 }
1079 } 1075 }
1080 1076
1081 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 1077 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
1082 size_t data_len) 1078 size_t data_len)
1083 { 1079 {
1084 struct elf_note note; 1080 struct elf_note note;
1085 1081
1086 note.n_namesz = strlen(name) + 1; 1082 note.n_namesz = strlen(name) + 1;
1087 note.n_descsz = data_len; 1083 note.n_descsz = data_len;
1088 note.n_type = type; 1084 note.n_type = type;
1089 memcpy(buf, &note, sizeof(note)); 1085 memcpy(buf, &note, sizeof(note));
1090 buf += (sizeof(note) + 3)/4; 1086 buf += (sizeof(note) + 3)/4;
1091 memcpy(buf, name, note.n_namesz); 1087 memcpy(buf, name, note.n_namesz);
1092 buf += (note.n_namesz + 3)/4; 1088 buf += (note.n_namesz + 3)/4;
1093 memcpy(buf, data, note.n_descsz); 1089 memcpy(buf, data, note.n_descsz);
1094 buf += (note.n_descsz + 3)/4; 1090 buf += (note.n_descsz + 3)/4;
1095 1091
1096 return buf; 1092 return buf;
1097 } 1093 }
1098 1094
1099 static void final_note(u32 *buf) 1095 static void final_note(u32 *buf)
1100 { 1096 {
1101 struct elf_note note; 1097 struct elf_note note;
1102 1098
1103 note.n_namesz = 0; 1099 note.n_namesz = 0;
1104 note.n_descsz = 0; 1100 note.n_descsz = 0;
1105 note.n_type = 0; 1101 note.n_type = 0;
1106 memcpy(buf, &note, sizeof(note)); 1102 memcpy(buf, &note, sizeof(note));
1107 } 1103 }
1108 1104
1109 void crash_save_cpu(struct pt_regs *regs, int cpu) 1105 void crash_save_cpu(struct pt_regs *regs, int cpu)
1110 { 1106 {
1111 struct elf_prstatus prstatus; 1107 struct elf_prstatus prstatus;
1112 u32 *buf; 1108 u32 *buf;
1113 1109
1114 if ((cpu < 0) || (cpu >= NR_CPUS)) 1110 if ((cpu < 0) || (cpu >= NR_CPUS))
1115 return; 1111 return;
1116 1112
1117 /* Using ELF notes here is opportunistic. 1113 /* Using ELF notes here is opportunistic.
1118 * I need a well defined structure format 1114 * I need a well defined structure format
1119 * for the data I pass, and I need tags 1115 * for the data I pass, and I need tags
1120 * on the data to indicate what information I have 1116 * on the data to indicate what information I have
1121 * squirrelled away. ELF notes happen to provide 1117 * squirrelled away. ELF notes happen to provide
1122 * all of that, so there is no need to invent something new. 1118 * all of that, so there is no need to invent something new.
1123 */ 1119 */
1124 buf = (u32*)per_cpu_ptr(crash_notes, cpu); 1120 buf = (u32*)per_cpu_ptr(crash_notes, cpu);
1125 if (!buf) 1121 if (!buf)
1126 return; 1122 return;
1127 memset(&prstatus, 0, sizeof(prstatus)); 1123 memset(&prstatus, 0, sizeof(prstatus));
1128 prstatus.pr_pid = current->pid; 1124 prstatus.pr_pid = current->pid;
1129 elf_core_copy_regs(&prstatus.pr_reg, regs); 1125 elf_core_copy_regs(&prstatus.pr_reg, regs);
1130 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 1126 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1131 &prstatus, sizeof(prstatus)); 1127 &prstatus, sizeof(prstatus));
1132 final_note(buf); 1128 final_note(buf);
1133 } 1129 }
1134 1130
1135 static int __init crash_notes_memory_init(void) 1131 static int __init crash_notes_memory_init(void)
1136 { 1132 {
1137 /* Allocate memory for saving cpu registers. */ 1133 /* Allocate memory for saving cpu registers. */
1138 crash_notes = alloc_percpu(note_buf_t); 1134 crash_notes = alloc_percpu(note_buf_t);
1139 if (!crash_notes) { 1135 if (!crash_notes) {
1140 printk("Kexec: Memory allocation for saving cpu register" 1136 printk("Kexec: Memory allocation for saving cpu register"
1141 " states failed\n"); 1137 " states failed\n");
1142 return -ENOMEM; 1138 return -ENOMEM;
1143 } 1139 }
1144 return 0; 1140 return 0;
1145 } 1141 }
1146 module_init(crash_notes_memory_init) 1142 module_init(crash_notes_memory_init)
1147 1143
1148 1144
1149 /* 1145 /*
1150 * parsing the "crashkernel" commandline 1146 * parsing the "crashkernel" commandline
1151 * 1147 *
1152 * this code is intended to be called from architecture specific code 1148 * this code is intended to be called from architecture specific code
1153 */ 1149 */
1154 1150
1155 1151
1156 /* 1152 /*
1157 * This function parses command lines in the format 1153 * This function parses command lines in the format
1158 * 1154 *
1159 * crashkernel=ramsize-range:size[,...][@offset] 1155 * crashkernel=ramsize-range:size[,...][@offset]
1160 * 1156 *
1161 * The function returns 0 on success and -EINVAL on failure. 1157 * The function returns 0 on success and -EINVAL on failure.
1162 */ 1158 */
1163 static int __init parse_crashkernel_mem(char *cmdline, 1159 static int __init parse_crashkernel_mem(char *cmdline,
1164 unsigned long long system_ram, 1160 unsigned long long system_ram,
1165 unsigned long long *crash_size, 1161 unsigned long long *crash_size,
1166 unsigned long long *crash_base) 1162 unsigned long long *crash_base)
1167 { 1163 {
1168 char *cur = cmdline, *tmp; 1164 char *cur = cmdline, *tmp;
1169 1165
1170 /* for each entry of the comma-separated list */ 1166 /* for each entry of the comma-separated list */
1171 do { 1167 do {
1172 unsigned long long start, end = ULLONG_MAX, size; 1168 unsigned long long start, end = ULLONG_MAX, size;
1173 1169
1174 /* get the start of the range */ 1170 /* get the start of the range */
1175 start = memparse(cur, &tmp); 1171 start = memparse(cur, &tmp);
1176 if (cur == tmp) { 1172 if (cur == tmp) {
1177 pr_warning("crashkernel: Memory value expected\n"); 1173 pr_warning("crashkernel: Memory value expected\n");
1178 return -EINVAL; 1174 return -EINVAL;
1179 } 1175 }
1180 cur = tmp; 1176 cur = tmp;
1181 if (*cur != '-') { 1177 if (*cur != '-') {
1182 pr_warning("crashkernel: '-' expected\n"); 1178 pr_warning("crashkernel: '-' expected\n");
1183 return -EINVAL; 1179 return -EINVAL;
1184 } 1180 }
1185 cur++; 1181 cur++;
1186 1182
1187 /* if no ':' is here, than we read the end */ 1183 /* if no ':' is here, than we read the end */
1188 if (*cur != ':') { 1184 if (*cur != ':') {
1189 end = memparse(cur, &tmp); 1185 end = memparse(cur, &tmp);
1190 if (cur == tmp) { 1186 if (cur == tmp) {
1191 pr_warning("crashkernel: Memory " 1187 pr_warning("crashkernel: Memory "
1192 "value expected\n"); 1188 "value expected\n");
1193 return -EINVAL; 1189 return -EINVAL;
1194 } 1190 }
1195 cur = tmp; 1191 cur = tmp;
1196 if (end <= start) { 1192 if (end <= start) {
1197 pr_warning("crashkernel: end <= start\n"); 1193 pr_warning("crashkernel: end <= start\n");
1198 return -EINVAL; 1194 return -EINVAL;
1199 } 1195 }
1200 } 1196 }
1201 1197
1202 if (*cur != ':') { 1198 if (*cur != ':') {
1203 pr_warning("crashkernel: ':' expected\n"); 1199 pr_warning("crashkernel: ':' expected\n");
1204 return -EINVAL; 1200 return -EINVAL;
1205 } 1201 }
1206 cur++; 1202 cur++;
1207 1203
1208 size = memparse(cur, &tmp); 1204 size = memparse(cur, &tmp);
1209 if (cur == tmp) { 1205 if (cur == tmp) {
1210 pr_warning("Memory value expected\n"); 1206 pr_warning("Memory value expected\n");
1211 return -EINVAL; 1207 return -EINVAL;
1212 } 1208 }
1213 cur = tmp; 1209 cur = tmp;
1214 if (size >= system_ram) { 1210 if (size >= system_ram) {
1215 pr_warning("crashkernel: invalid size\n"); 1211 pr_warning("crashkernel: invalid size\n");
1216 return -EINVAL; 1212 return -EINVAL;
1217 } 1213 }
1218 1214
1219 /* match ? */ 1215 /* match ? */
1220 if (system_ram >= start && system_ram < end) { 1216 if (system_ram >= start && system_ram < end) {
1221 *crash_size = size; 1217 *crash_size = size;
1222 break; 1218 break;
1223 } 1219 }
1224 } while (*cur++ == ','); 1220 } while (*cur++ == ',');
1225 1221
1226 if (*crash_size > 0) { 1222 if (*crash_size > 0) {
1227 while (*cur != ' ' && *cur != '@') 1223 while (*cur != ' ' && *cur != '@')
1228 cur++; 1224 cur++;
1229 if (*cur == '@') { 1225 if (*cur == '@') {
1230 cur++; 1226 cur++;
1231 *crash_base = memparse(cur, &tmp); 1227 *crash_base = memparse(cur, &tmp);
1232 if (cur == tmp) { 1228 if (cur == tmp) {
1233 pr_warning("Memory value expected " 1229 pr_warning("Memory value expected "
1234 "after '@'\n"); 1230 "after '@'\n");
1235 return -EINVAL; 1231 return -EINVAL;
1236 } 1232 }
1237 } 1233 }
1238 } 1234 }
1239 1235
1240 return 0; 1236 return 0;
1241 } 1237 }
1242 1238
1243 /* 1239 /*
1244 * That function parses "simple" (old) crashkernel command lines like 1240 * That function parses "simple" (old) crashkernel command lines like
1245 * 1241 *
1246 * crashkernel=size[@offset] 1242 * crashkernel=size[@offset]
1247 * 1243 *
1248 * It returns 0 on success and -EINVAL on failure. 1244 * It returns 0 on success and -EINVAL on failure.
1249 */ 1245 */
1250 static int __init parse_crashkernel_simple(char *cmdline, 1246 static int __init parse_crashkernel_simple(char *cmdline,
1251 unsigned long long *crash_size, 1247 unsigned long long *crash_size,
1252 unsigned long long *crash_base) 1248 unsigned long long *crash_base)
1253 { 1249 {
1254 char *cur = cmdline; 1250 char *cur = cmdline;
1255 1251
1256 *crash_size = memparse(cmdline, &cur); 1252 *crash_size = memparse(cmdline, &cur);
1257 if (cmdline == cur) { 1253 if (cmdline == cur) {
1258 pr_warning("crashkernel: memory value expected\n"); 1254 pr_warning("crashkernel: memory value expected\n");
1259 return -EINVAL; 1255 return -EINVAL;
1260 } 1256 }
1261 1257
1262 if (*cur == '@') 1258 if (*cur == '@')
1263 *crash_base = memparse(cur+1, &cur); 1259 *crash_base = memparse(cur+1, &cur);
1264 1260
1265 return 0; 1261 return 0;
1266 } 1262 }
1267 1263
1268 /* 1264 /*
1269 * That function is the entry point for command line parsing and should be 1265 * That function is the entry point for command line parsing and should be
1270 * called from the arch-specific code. 1266 * called from the arch-specific code.
1271 */ 1267 */
1272 int __init parse_crashkernel(char *cmdline, 1268 int __init parse_crashkernel(char *cmdline,
1273 unsigned long long system_ram, 1269 unsigned long long system_ram,
1274 unsigned long long *crash_size, 1270 unsigned long long *crash_size,
1275 unsigned long long *crash_base) 1271 unsigned long long *crash_base)
1276 { 1272 {
1277 char *p = cmdline, *ck_cmdline = NULL; 1273 char *p = cmdline, *ck_cmdline = NULL;
1278 char *first_colon, *first_space; 1274 char *first_colon, *first_space;
1279 1275
1280 BUG_ON(!crash_size || !crash_base); 1276 BUG_ON(!crash_size || !crash_base);
1281 *crash_size = 0; 1277 *crash_size = 0;
1282 *crash_base = 0; 1278 *crash_base = 0;
1283 1279
1284 /* find crashkernel and use the last one if there are more */ 1280 /* find crashkernel and use the last one if there are more */
1285 p = strstr(p, "crashkernel="); 1281 p = strstr(p, "crashkernel=");
1286 while (p) { 1282 while (p) {
1287 ck_cmdline = p; 1283 ck_cmdline = p;
1288 p = strstr(p+1, "crashkernel="); 1284 p = strstr(p+1, "crashkernel=");
1289 } 1285 }
1290 1286
1291 if (!ck_cmdline) 1287 if (!ck_cmdline)
1292 return -EINVAL; 1288 return -EINVAL;
1293 1289
1294 ck_cmdline += 12; /* strlen("crashkernel=") */ 1290 ck_cmdline += 12; /* strlen("crashkernel=") */
1295 1291
1296 /* 1292 /*
1297 * if the commandline contains a ':', then that's the extended 1293 * if the commandline contains a ':', then that's the extended
1298 * syntax -- if not, it must be the classic syntax 1294 * syntax -- if not, it must be the classic syntax
1299 */ 1295 */
1300 first_colon = strchr(ck_cmdline, ':'); 1296 first_colon = strchr(ck_cmdline, ':');
1301 first_space = strchr(ck_cmdline, ' '); 1297 first_space = strchr(ck_cmdline, ' ');
1302 if (first_colon && (!first_space || first_colon < first_space)) 1298 if (first_colon && (!first_space || first_colon < first_space))
1303 return parse_crashkernel_mem(ck_cmdline, system_ram, 1299 return parse_crashkernel_mem(ck_cmdline, system_ram,
1304 crash_size, crash_base); 1300 crash_size, crash_base);
1305 else 1301 else
1306 return parse_crashkernel_simple(ck_cmdline, crash_size, 1302 return parse_crashkernel_simple(ck_cmdline, crash_size,
1307 crash_base); 1303 crash_base);
1308 1304
1309 return 0; 1305 return 0;
1310 } 1306 }
1311 1307
1312 1308
1313 1309
1314 void crash_save_vmcoreinfo(void) 1310 void crash_save_vmcoreinfo(void)
1315 { 1311 {
1316 u32 *buf; 1312 u32 *buf;
1317 1313
1318 if (!vmcoreinfo_size) 1314 if (!vmcoreinfo_size)
1319 return; 1315 return;
1320 1316
1321 vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); 1317 vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
1322 1318
1323 buf = (u32 *)vmcoreinfo_note; 1319 buf = (u32 *)vmcoreinfo_note;
1324 1320
1325 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 1321 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1326 vmcoreinfo_size); 1322 vmcoreinfo_size);
1327 1323
1328 final_note(buf); 1324 final_note(buf);
1329 } 1325 }
1330 1326
1331 void vmcoreinfo_append_str(const char *fmt, ...) 1327 void vmcoreinfo_append_str(const char *fmt, ...)
1332 { 1328 {
1333 va_list args; 1329 va_list args;
1334 char buf[0x50]; 1330 char buf[0x50];
1335 int r; 1331 int r;
1336 1332
1337 va_start(args, fmt); 1333 va_start(args, fmt);
1338 r = vsnprintf(buf, sizeof(buf), fmt, args); 1334 r = vsnprintf(buf, sizeof(buf), fmt, args);
1339 va_end(args); 1335 va_end(args);
1340 1336
1341 if (r + vmcoreinfo_size > vmcoreinfo_max_size) 1337 if (r + vmcoreinfo_size > vmcoreinfo_max_size)
1342 r = vmcoreinfo_max_size - vmcoreinfo_size; 1338 r = vmcoreinfo_max_size - vmcoreinfo_size;
1343 1339
1344 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1340 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1345 1341
1346 vmcoreinfo_size += r; 1342 vmcoreinfo_size += r;
1347 } 1343 }
1348 1344
1349 /* 1345 /*
1350 * provide an empty default implementation here -- architecture 1346 * provide an empty default implementation here -- architecture
1351 * code may override this 1347 * code may override this
1352 */ 1348 */
1353 void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void) 1349 void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
1354 {} 1350 {}
1355 1351
1356 unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void) 1352 unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
1357 { 1353 {
1358 return __pa((unsigned long)(char *)&vmcoreinfo_note); 1354 return __pa((unsigned long)(char *)&vmcoreinfo_note);
1359 } 1355 }
1360 1356
1361 static int __init crash_save_vmcoreinfo_init(void) 1357 static int __init crash_save_vmcoreinfo_init(void)
1362 { 1358 {
1363 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 1359 VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1364 VMCOREINFO_PAGESIZE(PAGE_SIZE); 1360 VMCOREINFO_PAGESIZE(PAGE_SIZE);
1365 1361
1366 VMCOREINFO_SYMBOL(init_uts_ns); 1362 VMCOREINFO_SYMBOL(init_uts_ns);
1367 VMCOREINFO_SYMBOL(node_online_map); 1363 VMCOREINFO_SYMBOL(node_online_map);
1368 VMCOREINFO_SYMBOL(swapper_pg_dir); 1364 VMCOREINFO_SYMBOL(swapper_pg_dir);
1369 VMCOREINFO_SYMBOL(_stext); 1365 VMCOREINFO_SYMBOL(_stext);
1370 1366
1371 #ifndef CONFIG_NEED_MULTIPLE_NODES 1367 #ifndef CONFIG_NEED_MULTIPLE_NODES
1372 VMCOREINFO_SYMBOL(mem_map); 1368 VMCOREINFO_SYMBOL(mem_map);
1373 VMCOREINFO_SYMBOL(contig_page_data); 1369 VMCOREINFO_SYMBOL(contig_page_data);
1374 #endif 1370 #endif
1375 #ifdef CONFIG_SPARSEMEM 1371 #ifdef CONFIG_SPARSEMEM
1376 VMCOREINFO_SYMBOL(mem_section); 1372 VMCOREINFO_SYMBOL(mem_section);
1377 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 1373 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1378 VMCOREINFO_STRUCT_SIZE(mem_section); 1374 VMCOREINFO_STRUCT_SIZE(mem_section);
1379 VMCOREINFO_OFFSET(mem_section, section_mem_map); 1375 VMCOREINFO_OFFSET(mem_section, section_mem_map);
1380 #endif 1376 #endif
1381 VMCOREINFO_STRUCT_SIZE(page); 1377 VMCOREINFO_STRUCT_SIZE(page);
1382 VMCOREINFO_STRUCT_SIZE(pglist_data); 1378 VMCOREINFO_STRUCT_SIZE(pglist_data);
1383 VMCOREINFO_STRUCT_SIZE(zone); 1379 VMCOREINFO_STRUCT_SIZE(zone);
1384 VMCOREINFO_STRUCT_SIZE(free_area); 1380 VMCOREINFO_STRUCT_SIZE(free_area);
1385 VMCOREINFO_STRUCT_SIZE(list_head); 1381 VMCOREINFO_STRUCT_SIZE(list_head);
1386 VMCOREINFO_SIZE(nodemask_t); 1382 VMCOREINFO_SIZE(nodemask_t);
1387 VMCOREINFO_OFFSET(page, flags); 1383 VMCOREINFO_OFFSET(page, flags);
1388 VMCOREINFO_OFFSET(page, _count); 1384 VMCOREINFO_OFFSET(page, _count);
1389 VMCOREINFO_OFFSET(page, mapping); 1385 VMCOREINFO_OFFSET(page, mapping);
1390 VMCOREINFO_OFFSET(page, lru); 1386 VMCOREINFO_OFFSET(page, lru);
1391 VMCOREINFO_OFFSET(pglist_data, node_zones); 1387 VMCOREINFO_OFFSET(pglist_data, node_zones);
1392 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1388 VMCOREINFO_OFFSET(pglist_data, nr_zones);
1393 #ifdef CONFIG_FLAT_NODE_MEM_MAP 1389 #ifdef CONFIG_FLAT_NODE_MEM_MAP
1394 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 1390 VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1395 #endif 1391 #endif
1396 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 1392 VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1397 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 1393 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1398 VMCOREINFO_OFFSET(pglist_data, node_id); 1394 VMCOREINFO_OFFSET(pglist_data, node_id);
1399 VMCOREINFO_OFFSET(zone, free_area); 1395 VMCOREINFO_OFFSET(zone, free_area);
1400 VMCOREINFO_OFFSET(zone, vm_stat); 1396 VMCOREINFO_OFFSET(zone, vm_stat);
1401 VMCOREINFO_OFFSET(zone, spanned_pages); 1397 VMCOREINFO_OFFSET(zone, spanned_pages);
1402 VMCOREINFO_OFFSET(free_area, free_list); 1398 VMCOREINFO_OFFSET(free_area, free_list);
1403 VMCOREINFO_OFFSET(list_head, next); 1399 VMCOREINFO_OFFSET(list_head, next);
1404 VMCOREINFO_OFFSET(list_head, prev); 1400 VMCOREINFO_OFFSET(list_head, prev);
1405 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1401 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1406 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1402 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1407 VMCOREINFO_NUMBER(NR_FREE_PAGES); 1403 VMCOREINFO_NUMBER(NR_FREE_PAGES);
1408 VMCOREINFO_NUMBER(PG_lru); 1404 VMCOREINFO_NUMBER(PG_lru);
1409 VMCOREINFO_NUMBER(PG_private); 1405 VMCOREINFO_NUMBER(PG_private);
1410 VMCOREINFO_NUMBER(PG_swapcache); 1406 VMCOREINFO_NUMBER(PG_swapcache);
1411 1407
1412 arch_crash_save_vmcoreinfo(); 1408 arch_crash_save_vmcoreinfo();
1413 1409
1414 return 0; 1410 return 0;
1415 } 1411 }
1416 1412
1417 module_init(crash_save_vmcoreinfo_init) 1413 module_init(crash_save_vmcoreinfo_init)
1418 1414