Commit 208d54e5513c0c02d85af0990901354c74364d5c

Authored by Dave Hansen
Committed by Linus Torvalds
1 parent c6a57e19e4

[PATCH] memory hotplug locking: node_size_lock

pgdat->node_size_lock is basically only neeeded in one place in the normal
code: show_mem(), which is the arch-specific sysrq-m printing function.

Strictly speaking, the architectures not doing memory hotplug do no need this
locking in show_mem().  However, they are all included for completeness.  This
should also make any future consolidation of all of the implementations a
little more straightforward.

This lock is also held in the sparsemem code during a memory removal, as
sections are invalidated.  This is the place there pfn_valid() is made false
for a memory area that's being removed.  The lock is only required when doing
pfn_valid() operations on memory which the user does not already have a
reference on the page, such as in show_mem().

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 9 changed files with 76 additions and 2 deletions Side-by-side Diff

arch/alpha/mm/numa.c
... ... @@ -371,6 +371,8 @@
371 371 show_free_areas();
372 372 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
373 373 for_each_online_node(nid) {
  374 + unsigned long flags;
  375 + pgdat_resize_lock(NODE_DATA(nid), &flags);
374 376 i = node_spanned_pages(nid);
375 377 while (i-- > 0) {
376 378 struct page *page = nid_page_nr(nid, i);
... ... @@ -384,6 +386,7 @@
384 386 else
385 387 shared += page_count(page) - 1;
386 388 }
  389 + pgdat_resize_unlock(NODE_DATA(nid), &flags);
387 390 }
388 391 printk("%ld pages of RAM\n",total);
389 392 printk("%ld free pages\n",free);
arch/i386/mm/pgtable.c
... ... @@ -31,11 +31,13 @@
31 31 pg_data_t *pgdat;
32 32 unsigned long i;
33 33 struct page_state ps;
  34 + unsigned long flags;
34 35  
35 36 printk(KERN_INFO "Mem-info:\n");
36 37 show_free_areas();
37 38 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
38 39 for_each_pgdat(pgdat) {
  40 + pgdat_resize_lock(pgdat, &flags);
39 41 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
40 42 page = pgdat_page_nr(pgdat, i);
41 43 total++;
... ... @@ -48,6 +50,7 @@
48 50 else if (page_count(page))
49 51 shared += page_count(page) - 1;
50 52 }
  53 + pgdat_resize_unlock(pgdat, &flags);
51 54 }
52 55 printk(KERN_INFO "%d pages of RAM\n", total);
53 56 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
arch/ia64/mm/discontig.c
... ... @@ -555,9 +555,13 @@
555 555 show_free_areas();
556 556 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
557 557 for_each_pgdat(pgdat) {
558   - unsigned long present = pgdat->node_present_pages;
  558 + unsigned long present;
  559 + unsigned long flags;
559 560 int shared = 0, cached = 0, reserved = 0;
  561 +
560 562 printk("Node ID: %d\n", pgdat->node_id);
  563 + pgdat_resize_lock(pgdat, &flags);
  564 + present = pgdat->node_present_pages;
561 565 for(i = 0; i < pgdat->node_spanned_pages; i++) {
562 566 struct page *page;
563 567 if (pfn_valid(pgdat->node_start_pfn + i))
... ... @@ -571,6 +575,7 @@
571 575 else if (page_count(page))
572 576 shared += page_count(page)-1;
573 577 }
  578 + pgdat_resize_unlock(pgdat, &flags);
574 579 total_present += present;
575 580 total_reserved += reserved;
576 581 total_cached += cached;
... ... @@ -48,6 +48,8 @@
48 48 show_free_areas();
49 49 printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
50 50 for_each_pgdat(pgdat) {
  51 + unsigned long flags;
  52 + pgdat_resize_lock(pgdat, &flags);
51 53 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
52 54 page = pgdat_page_nr(pgdat, i);
53 55 total++;
... ... @@ -60,6 +62,7 @@
60 62 else if (page_count(page))
61 63 shared += page_count(page) - 1;
62 64 }
  65 + pgdat_resize_unlock(pgdat, &flags);
63 66 }
64 67 printk("%d pages of RAM\n", total);
65 68 printk("%d pages of HIGHMEM\n",highmem);
66 69  
... ... @@ -150,10 +153,14 @@
150 153 int reservedpages, nid, i;
151 154  
152 155 reservedpages = 0;
153   - for_each_online_node(nid)
  156 + for_each_online_node(nid) {
  157 + unsigned long flags;
  158 + pgdat_resize_lock(NODE_DATA(nid), &flags);
154 159 for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
155 160 if (PageReserved(nid_page_nr(nid, i)))
156 161 reservedpages++;
  162 + pgdat_resize_unlock(NODE_DATA(nid), &flags);
  163 + }
157 164  
158 165 return reservedpages;
159 166 }
arch/parisc/mm/init.c
... ... @@ -505,7 +505,9 @@
505 505  
506 506 for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
507 507 struct page *p;
  508 + unsigned long flags;
508 509  
  510 + pgdat_resize_lock(NODE_DATA(i), &flags);
509 511 p = nid_page_nr(i, j) - node_start_pfn(i);
510 512  
511 513 total++;
... ... @@ -517,6 +519,7 @@
517 519 free++;
518 520 else
519 521 shared += page_count(p) - 1;
  522 + pgdat_resize_unlock(NODE_DATA(i), &flags);
520 523 }
521 524 }
522 525 #endif
arch/ppc64/mm/init.c
... ... @@ -104,6 +104,8 @@
104 104 show_free_areas();
105 105 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
106 106 for_each_pgdat(pgdat) {
  107 + unsigned long flags;
  108 + pgdat_resize_lock(pgdat, &flags);
107 109 for (i = 0; i < pgdat->node_spanned_pages; i++) {
108 110 page = pgdat_page_nr(pgdat, i);
109 111 total++;
... ... @@ -114,6 +116,7 @@
114 116 else if (page_count(page))
115 117 shared += page_count(page) - 1;
116 118 }
  119 + pgdat_resize_unlock(pgdat, &flags);
117 120 }
118 121 printk("%ld pages of RAM\n", total);
119 122 printk("%ld reserved pages\n", reserved);
120 123  
... ... @@ -647,11 +650,14 @@
647 650 #endif
648 651  
649 652 for_each_pgdat(pgdat) {
  653 + unsigned long flags;
  654 + pgdat_resize_lock(pgdat, &flags);
650 655 for (i = 0; i < pgdat->node_spanned_pages; i++) {
651 656 page = pgdat_page_nr(pgdat, i);
652 657 if (PageReserved(page))
653 658 reservedpages++;
654 659 }
  660 + pgdat_resize_unlock(pgdat, &flags);
655 661 }
656 662  
657 663 codesize = (unsigned long)&_etext - (unsigned long)&_stext;
include/linux/memory_hotplug.h
  1 +#ifndef __LINUX_MEMORY_HOTPLUG_H
  2 +#define __LINUX_MEMORY_HOTPLUG_H
  3 +
  4 +#include <linux/mmzone.h>
  5 +#include <linux/spinlock.h>
  6 +
  7 +#ifdef CONFIG_MEMORY_HOTPLUG
  8 +/*
  9 + * pgdat resizing functions
  10 + */
  11 +static inline
  12 +void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
  13 +{
  14 + spin_lock_irqsave(&pgdat->node_size_lock, *flags);
  15 +}
  16 +static inline
  17 +void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
  18 +{
  19 + spin_lock_irqrestore(&pgdat->node_size_lock, *flags);
  20 +}
  21 +static inline
  22 +void pgdat_resize_init(struct pglist_data *pgdat)
  23 +{
  24 + spin_lock_init(&pgdat->node_size_lock);
  25 +}
  26 +#else /* ! CONFIG_MEMORY_HOTPLUG */
  27 +/*
  28 + * Stub functions for when hotplug is off
  29 + */
  30 +static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
  31 +static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
  32 +static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
  33 +#endif
  34 +#endif /* __LINUX_MEMORY_HOTPLUG_H */
include/linux/mmzone.h
... ... @@ -273,6 +273,16 @@
273 273 struct page *node_mem_map;
274 274 #endif
275 275 struct bootmem_data *bdata;
  276 +#ifdef CONFIG_MEMORY_HOTPLUG
  277 + /*
  278 + * Must be held any time you expect node_start_pfn, node_present_pages
  279 + * or node_spanned_pages stay constant. Holding this will also
  280 + * guarantee that any pfn_valid() stays that way.
  281 + *
  282 + * Nests above zone->lock and zone->size_seqlock.
  283 + */
  284 + spinlock_t node_size_lock;
  285 +#endif
276 286 unsigned long node_start_pfn;
277 287 unsigned long node_present_pages; /* total number of physical pages */
278 288 unsigned long node_spanned_pages; /* total size of physical page
... ... @@ -292,6 +302,8 @@
292 302 #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr))
293 303 #endif
294 304 #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
  305 +
  306 +#include <linux/memory_hotplug.h>
295 307  
296 308 extern struct pglist_data *pgdat_list;
297 309  
... ... @@ -1958,6 +1958,7 @@
1958 1958 int nid = pgdat->node_id;
1959 1959 unsigned long zone_start_pfn = pgdat->node_start_pfn;
1960 1960  
  1961 + pgdat_resize_init(pgdat);
1961 1962 pgdat->nr_zones = 0;
1962 1963 init_waitqueue_head(&pgdat->kswapd_wait);
1963 1964 pgdat->kswapd_max_order = 0;