Commit 74dfd666de861c97d47bdbd892f6d21b801d0247

Authored by Rafael J. Wysocki
Committed by Linus Torvalds
1 parent 7be9823491

swsusp: do not use page flags

Make swsusp use memory bitmaps instead of page flags for marking 'nosave' and
free pages.  This allows us to 'recycle' two page flags that can be used for
other purposes.  Also, the memory needed to store the bitmaps is allocated
when necessary (ie.  before the suspend) and freed after the resume which is
more reasonable.

The patch is designed to minimize the amount of changes and there are some
nice simplifications and optimizations possible on top of it.  I am going to
implement them separately in the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 7 changed files with 283 additions and 82 deletions Side-by-side Diff

arch/x86_64/kernel/e820.c
... ... @@ -17,6 +17,8 @@
17 17 #include <linux/kexec.h>
18 18 #include <linux/module.h>
19 19 #include <linux/mm.h>
  20 +#include <linux/suspend.h>
  21 +#include <linux/pfn.h>
20 22  
21 23 #include <asm/pgtable.h>
22 24 #include <asm/page.h>
... ... @@ -256,22 +258,6 @@
256 258 }
257 259 }
258 260  
259   -/* Mark pages corresponding to given address range as nosave */
260   -static void __init
261   -e820_mark_nosave_range(unsigned long start, unsigned long end)
262   -{
263   - unsigned long pfn, max_pfn;
264   -
265   - if (start >= end)
266   - return;
267   -
268   - printk("Nosave address range: %016lx - %016lx\n", start, end);
269   - max_pfn = end >> PAGE_SHIFT;
270   - for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
271   - if (pfn_valid(pfn))
272   - SetPageNosave(pfn_to_page(pfn));
273   -}
274   -
275 261 /*
276 262 * Find the ranges of physical addresses that do not correspond to
277 263 * e820 RAM areas and mark the corresponding pages as nosave for software
278 264  
... ... @@ -290,13 +276,13 @@
290 276 struct e820entry *ei = &e820.map[i];
291 277  
292 278 if (paddr < ei->addr)
293   - e820_mark_nosave_range(paddr,
294   - round_up(ei->addr, PAGE_SIZE));
  279 + register_nosave_region(PFN_DOWN(paddr),
  280 + PFN_UP(ei->addr));
295 281  
296 282 paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
297 283 if (ei->type != E820_RAM)
298   - e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
299   - paddr);
  284 + register_nosave_region(PFN_UP(ei->addr),
  285 + PFN_DOWN(paddr));
300 286  
301 287 if (paddr >= (end_pfn << PAGE_SHIFT))
302 288 break;
include/linux/suspend.h
... ... @@ -24,63 +24,41 @@
24 24 extern void drain_local_pages(void);
25 25 extern void mark_free_pages(struct zone *zone);
26 26  
27   -#ifdef CONFIG_PM
28   -/* kernel/power/swsusp.c */
29   -extern int software_suspend(void);
30   -
31   -#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
  27 +#if defined(CONFIG_PM) && defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
32 28 extern int pm_prepare_console(void);
33 29 extern void pm_restore_console(void);
34 30 #else
35 31 static inline int pm_prepare_console(void) { return 0; }
36 32 static inline void pm_restore_console(void) {}
37   -#endif /* defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) */
  33 +#endif
  34 +
  35 +#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
  36 +/* kernel/power/swsusp.c */
  37 +extern int software_suspend(void);
  38 +/* kernel/power/snapshot.c */
  39 +extern void __init register_nosave_region(unsigned long, unsigned long);
  40 +extern int swsusp_page_is_forbidden(struct page *);
  41 +extern void swsusp_set_page_free(struct page *);
  42 +extern void swsusp_unset_page_free(struct page *);
  43 +extern unsigned long get_safe_page(gfp_t gfp_mask);
38 44 #else
39 45 static inline int software_suspend(void)
40 46 {
41 47 printk("Warning: fake suspend called\n");
42 48 return -ENOSYS;
43 49 }
44   -#endif /* CONFIG_PM */
45 50  
  51 +static inline void register_nosave_region(unsigned long b, unsigned long e) {}
  52 +static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
  53 +static inline void swsusp_set_page_free(struct page *p) {}
  54 +static inline void swsusp_unset_page_free(struct page *p) {}
  55 +#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
  56 +
46 57 void save_processor_state(void);
47 58 void restore_processor_state(void);
48 59 struct saved_context;
49 60 void __save_processor_state(struct saved_context *ctxt);
50 61 void __restore_processor_state(struct saved_context *ctxt);
51   -unsigned long get_safe_page(gfp_t gfp_mask);
52   -
53   -/* Page management functions for the software suspend (swsusp) */
54   -
55   -static inline void swsusp_set_page_forbidden(struct page *page)
56   -{
57   - SetPageNosave(page);
58   -}
59   -
60   -static inline int swsusp_page_is_forbidden(struct page *page)
61   -{
62   - return PageNosave(page);
63   -}
64   -
65   -static inline void swsusp_unset_page_forbidden(struct page *page)
66   -{
67   - ClearPageNosave(page);
68   -}
69   -
70   -static inline void swsusp_set_page_free(struct page *page)
71   -{
72   - SetPageNosaveFree(page);
73   -}
74   -
75   -static inline int swsusp_page_is_free(struct page *page)
76   -{
77   - return PageNosaveFree(page);
78   -}
79   -
80   -static inline void swsusp_unset_page_free(struct page *page)
81   -{
82   - ClearPageNosaveFree(page);
83   -}
84 62  
85 63 /*
86 64 * XXX: We try to keep some more pages free so that I/O operations succeed
... ... @@ -139,14 +139,19 @@
139 139 mdelay(5000);
140 140 goto Thaw;
141 141 }
  142 + /* Allocate memory management structures */
  143 + error = create_basic_memory_bitmaps();
  144 + if (error)
  145 + goto Thaw;
  146 +
142 147 /* Free memory before shutting down devices. */
143 148 error = swsusp_shrink_memory();
144 149 if (error)
145   - goto Thaw;
  150 + goto Finish;
146 151  
147 152 error = platform_prepare();
148 153 if (error)
149   - goto Thaw;
  154 + goto Finish;
150 155  
151 156 suspend_console();
152 157 error = device_suspend(PMSG_FREEZE);
... ... @@ -181,7 +186,7 @@
181 186 power_down();
182 187 else {
183 188 swsusp_free();
184   - goto Thaw;
  189 + goto Finish;
185 190 }
186 191 } else {
187 192 pr_debug("PM: Image restored successfully.\n");
... ... @@ -194,6 +199,8 @@
194 199 platform_finish();
195 200 device_resume();
196 201 resume_console();
  202 + Finish:
  203 + free_basic_memory_bitmaps();
197 204 Thaw:
198 205 unprepare_processes();
199 206 return error;
200 207  
201 208  
202 209  
... ... @@ -239,13 +246,15 @@
239 246 }
240 247  
241 248 pr_debug("PM: Checking swsusp image.\n");
242   -
243 249 error = swsusp_check();
244 250 if (error)
245   - goto Done;
  251 + goto Unlock;
246 252  
247   - pr_debug("PM: Preparing processes for restore.\n");
  253 + error = create_basic_memory_bitmaps();
  254 + if (error)
  255 + goto Unlock;
248 256  
  257 + pr_debug("PM: Preparing processes for restore.\n");
249 258 error = prepare_processes();
250 259 if (error) {
251 260 swsusp_close();
252 261  
... ... @@ -280,7 +289,9 @@
280 289 printk(KERN_ERR "PM: Restore failed, recovering.\n");
281 290 unprepare_processes();
282 291 Done:
  292 + free_basic_memory_bitmaps();
283 293 /* For success case, the suspend path will release the lock */
  294 + Unlock:
284 295 mutex_unlock(&pm_mutex);
285 296 pr_debug("PM: Resume from disk failed.\n");
286 297 return 0;
... ... @@ -244,6 +244,7 @@
244 244 return error;
245 245 }
246 246  
  247 +#ifdef CONFIG_SOFTWARE_SUSPEND
247 248 /*
248 249 * This is main interface to the outside world. It needs to be
249 250 * called from process context.
... ... @@ -252,6 +253,7 @@
252 253 {
253 254 return enter_state(PM_SUSPEND_DISK);
254 255 }
  256 +#endif
255 257  
256 258  
257 259 /**
kernel/power/power.h
... ... @@ -48,6 +48,8 @@
48 48 extern asmlinkage int swsusp_arch_suspend(void);
49 49 extern asmlinkage int swsusp_arch_resume(void);
50 50  
  51 +extern int create_basic_memory_bitmaps(void);
  52 +extern void free_basic_memory_bitmaps(void);
51 53 extern unsigned int count_data_pages(void);
52 54  
53 55 /**
kernel/power/snapshot.c
... ... @@ -21,6 +21,7 @@
21 21 #include <linux/kernel.h>
22 22 #include <linux/pm.h>
23 23 #include <linux/device.h>
  24 +#include <linux/init.h>
24 25 #include <linux/bootmem.h>
25 26 #include <linux/syscalls.h>
26 27 #include <linux/console.h>
... ... @@ -34,6 +35,10 @@
34 35  
35 36 #include "power.h"
36 37  
  38 +static int swsusp_page_is_free(struct page *);
  39 +static void swsusp_set_page_forbidden(struct page *);
  40 +static void swsusp_unset_page_forbidden(struct page *);
  41 +
37 42 /* List of PBEs needed for restoring the pages that were allocated before
38 43 * the suspend and included in the suspend image, but have also been
39 44 * allocated by the "resume" kernel, so their contents cannot be written
... ... @@ -224,11 +229,6 @@
224 229 * of type unsigned long each). It also contains the pfns that
225 230 * correspond to the start and end of the represented memory area and
226 231 * the number of bit chunks in the block.
227   - *
228   - * NOTE: Memory bitmaps are used for two types of operations only:
229   - * "set a bit" and "find the next bit set". Moreover, the searching
230   - * is always carried out after all of the "set a bit" operations
231   - * on given bitmap.
232 232 */
233 233  
234 234 #define BM_END_OF_MAP (~0UL)
235 235  
236 236  
... ... @@ -443,15 +443,13 @@
443 443 }
444 444  
445 445 /**
446   - * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds
  446 + * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
447 447 * to given pfn. The cur_zone_bm member of @bm and the cur_block member
448 448 * of @bm->cur_zone_bm are updated.
449   - *
450   - * If the bit cannot be set, the function returns -EINVAL .
451 449 */
452 450  
453   -static int
454   -memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
  451 +static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
  452 + void **addr, unsigned int *bit_nr)
455 453 {
456 454 struct zone_bitmap *zone_bm;
457 455 struct bm_block *bb;
... ... @@ -463,8 +461,8 @@
463 461 /* We don't assume that the zones are sorted by pfns */
464 462 while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
465 463 zone_bm = zone_bm->next;
466   - if (unlikely(!zone_bm))
467   - return -EINVAL;
  464 +
  465 + BUG_ON(!zone_bm);
468 466 }
469 467 bm->cur.zone_bm = zone_bm;
470 468 }
471 469  
472 470  
... ... @@ -475,15 +473,42 @@
475 473  
476 474 while (pfn >= bb->end_pfn) {
477 475 bb = bb->next;
478   - if (unlikely(!bb))
479   - return -EINVAL;
  476 +
  477 + BUG_ON(!bb);
480 478 }
481 479 zone_bm->cur_block = bb;
482 480 pfn -= bb->start_pfn;
483   - set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK);
484   - return 0;
  481 + *bit_nr = pfn % BM_BITS_PER_CHUNK;
  482 + *addr = bb->data + pfn / BM_BITS_PER_CHUNK;
485 483 }
486 484  
  485 +static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
  486 +{
  487 + void *addr;
  488 + unsigned int bit;
  489 +
  490 + memory_bm_find_bit(bm, pfn, &addr, &bit);
  491 + set_bit(bit, addr);
  492 +}
  493 +
  494 +static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
  495 +{
  496 + void *addr;
  497 + unsigned int bit;
  498 +
  499 + memory_bm_find_bit(bm, pfn, &addr, &bit);
  500 + clear_bit(bit, addr);
  501 +}
  502 +
  503 +static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
  504 +{
  505 + void *addr;
  506 + unsigned int bit;
  507 +
  508 + memory_bm_find_bit(bm, pfn, &addr, &bit);
  509 + return test_bit(bit, addr);
  510 +}
  511 +
487 512 /* Two auxiliary functions for memory_bm_next_pfn */
488 513  
489 514 /* Find the first set bit in the given chunk, if there is one */
... ... @@ -561,6 +586,199 @@
561 586 bm->cur.chunk = chunk;
562 587 bm->cur.bit = bit;
563 588 return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
  589 +}
  590 +
  591 +/**
  592 + * This structure represents a range of page frames the contents of which
  593 + * should not be saved during the suspend.
  594 + */
  595 +
  596 +struct nosave_region {
  597 + struct list_head list;
  598 + unsigned long start_pfn;
  599 + unsigned long end_pfn;
  600 +};
  601 +
  602 +static LIST_HEAD(nosave_regions);
  603 +
  604 +/**
  605 + * register_nosave_region - register a range of page frames the contents
  606 + * of which should not be saved during the suspend (to be used in the early
  607 + * initialization code)
  608 + */
  609 +
  610 +void __init
  611 +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
  612 +{
  613 + struct nosave_region *region;
  614 +
  615 + if (start_pfn >= end_pfn)
  616 + return;
  617 +
  618 + if (!list_empty(&nosave_regions)) {
  619 + /* Try to extend the previous region (they should be sorted) */
  620 + region = list_entry(nosave_regions.prev,
  621 + struct nosave_region, list);
  622 + if (region->end_pfn == start_pfn) {
  623 + region->end_pfn = end_pfn;
  624 + goto Report;
  625 + }
  626 + }
  627 + /* This allocation cannot fail */
  628 + region = alloc_bootmem_low(sizeof(struct nosave_region));
  629 + region->start_pfn = start_pfn;
  630 + region->end_pfn = end_pfn;
  631 + list_add_tail(&region->list, &nosave_regions);
  632 + Report:
  633 + printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
  634 + start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
  635 +}
  636 +
  637 +/*
  638 + * Set bits in this map correspond to the page frames the contents of which
  639 + * should not be saved during the suspend.
  640 + */
  641 +static struct memory_bitmap *forbidden_pages_map;
  642 +
  643 +/* Set bits in this map correspond to free page frames. */
  644 +static struct memory_bitmap *free_pages_map;
  645 +
  646 +/*
  647 + * Each page frame allocated for creating the image is marked by setting the
  648 + * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
  649 + */
  650 +
  651 +void swsusp_set_page_free(struct page *page)
  652 +{
  653 + if (free_pages_map)
  654 + memory_bm_set_bit(free_pages_map, page_to_pfn(page));
  655 +}
  656 +
  657 +static int swsusp_page_is_free(struct page *page)
  658 +{
  659 + return free_pages_map ?
  660 + memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
  661 +}
  662 +
  663 +void swsusp_unset_page_free(struct page *page)
  664 +{
  665 + if (free_pages_map)
  666 + memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
  667 +}
  668 +
  669 +static void swsusp_set_page_forbidden(struct page *page)
  670 +{
  671 + if (forbidden_pages_map)
  672 + memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
  673 +}
  674 +
  675 +int swsusp_page_is_forbidden(struct page *page)
  676 +{
  677 + return forbidden_pages_map ?
  678 + memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
  679 +}
  680 +
  681 +static void swsusp_unset_page_forbidden(struct page *page)
  682 +{
  683 + if (forbidden_pages_map)
  684 + memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
  685 +}
  686 +
  687 +/**
  688 + * mark_nosave_pages - set bits corresponding to the page frames the
  689 + * contents of which should not be saved in a given bitmap.
  690 + */
  691 +
  692 +static void mark_nosave_pages(struct memory_bitmap *bm)
  693 +{
  694 + struct nosave_region *region;
  695 +
  696 + if (list_empty(&nosave_regions))
  697 + return;
  698 +
  699 + list_for_each_entry(region, &nosave_regions, list) {
  700 + unsigned long pfn;
  701 +
  702 + printk("swsusp: Marking nosave pages: %016lx - %016lx\n",
  703 + region->start_pfn << PAGE_SHIFT,
  704 + region->end_pfn << PAGE_SHIFT);
  705 +
  706 + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
  707 + memory_bm_set_bit(bm, pfn);
  708 + }
  709 +}
  710 +
  711 +/**
  712 + * create_basic_memory_bitmaps - create bitmaps needed for marking page
  713 + * frames that should not be saved and free page frames. The pointers
  714 + * forbidden_pages_map and free_pages_map are only modified if everything
  715 + * goes well, because we don't want the bits to be used before both bitmaps
  716 + * are set up.
  717 + */
  718 +
  719 +int create_basic_memory_bitmaps(void)
  720 +{
  721 + struct memory_bitmap *bm1, *bm2;
  722 + int error = 0;
  723 +
  724 + BUG_ON(forbidden_pages_map || free_pages_map);
  725 +
  726 + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC);
  727 + if (!bm1)
  728 + return -ENOMEM;
  729 +
  730 + error = memory_bm_create(bm1, GFP_ATOMIC | __GFP_COLD, PG_ANY);
  731 + if (error)
  732 + goto Free_first_object;
  733 +
  734 + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_ATOMIC);
  735 + if (!bm2)
  736 + goto Free_first_bitmap;
  737 +
  738 + error = memory_bm_create(bm2, GFP_ATOMIC | __GFP_COLD, PG_ANY);
  739 + if (error)
  740 + goto Free_second_object;
  741 +
  742 + forbidden_pages_map = bm1;
  743 + free_pages_map = bm2;
  744 + mark_nosave_pages(forbidden_pages_map);
  745 +
  746 + printk("swsusp: Basic memory bitmaps created\n");
  747 +
  748 + return 0;
  749 +
  750 + Free_second_object:
  751 + kfree(bm2);
  752 + Free_first_bitmap:
  753 + memory_bm_free(bm1, PG_UNSAFE_CLEAR);
  754 + Free_first_object:
  755 + kfree(bm1);
  756 + return -ENOMEM;
  757 +}
  758 +
  759 +/**
  760 + * free_basic_memory_bitmaps - free memory bitmaps allocated by
  761 + * create_basic_memory_bitmaps(). The auxiliary pointers are necessary
  762 + * so that the bitmaps themselves are not referred to while they are being
  763 + * freed.
  764 + */
  765 +
  766 +void free_basic_memory_bitmaps(void)
  767 +{
  768 + struct memory_bitmap *bm1, *bm2;
  769 +
  770 + BUG_ON(!(forbidden_pages_map && free_pages_map));
  771 +
  772 + bm1 = forbidden_pages_map;
  773 + bm2 = free_pages_map;
  774 + forbidden_pages_map = NULL;
  775 + free_pages_map = NULL;
  776 + memory_bm_free(bm1, PG_UNSAFE_CLEAR);
  777 + kfree(bm1);
  778 + memory_bm_free(bm2, PG_UNSAFE_CLEAR);
  779 + kfree(bm2);
  780 +
  781 + printk("swsusp: Basic memory bitmaps freed\n");
564 782 }
565 783  
566 784 /**
... ... @@ -52,6 +52,9 @@
52 52 if ((filp->f_flags & O_ACCMODE) == O_RDWR)
53 53 return -ENOSYS;
54 54  
  55 + if(create_basic_memory_bitmaps())
  56 + return -ENOMEM;
  57 +
55 58 nonseekable_open(inode, filp);
56 59 data = &snapshot_state;
57 60 filp->private_data = data;
... ... @@ -77,6 +80,7 @@
77 80 struct snapshot_data *data;
78 81  
79 82 swsusp_free();
  83 + free_basic_memory_bitmaps();
80 84 data = filp->private_data;
81 85 free_all_swap_pages(data->swap, data->bitmap);
82 86 free_bitmap(data->bitmap);