Commit 208d54e5513c0c02d85af0990901354c74364d5c
Committed by
Linus Torvalds
1 parent
c6a57e19e4
Exists in
master
and in
7 other branches
[PATCH] memory hotplug locking: node_size_lock
pgdat->node_size_lock is basically only neeeded in one place in the normal code: show_mem(), which is the arch-specific sysrq-m printing function. Strictly speaking, the architectures not doing memory hotplug do no need this locking in show_mem(). However, they are all included for completeness. This should also make any future consolidation of all of the implementations a little more straightforward. This lock is also held in the sparsemem code during a memory removal, as sections are invalidated. This is the place there pfn_valid() is made false for a memory area that's being removed. The lock is only required when doing pfn_valid() operations on memory which the user does not already have a reference on the page, such as in show_mem(). Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 9 changed files with 76 additions and 2 deletions Side-by-side Diff
arch/alpha/mm/numa.c
... | ... | @@ -371,6 +371,8 @@ |
371 | 371 | show_free_areas(); |
372 | 372 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
373 | 373 | for_each_online_node(nid) { |
374 | + unsigned long flags; | |
375 | + pgdat_resize_lock(NODE_DATA(nid), &flags); | |
374 | 376 | i = node_spanned_pages(nid); |
375 | 377 | while (i-- > 0) { |
376 | 378 | struct page *page = nid_page_nr(nid, i); |
... | ... | @@ -384,6 +386,7 @@ |
384 | 386 | else |
385 | 387 | shared += page_count(page) - 1; |
386 | 388 | } |
389 | + pgdat_resize_unlock(NODE_DATA(nid), &flags); | |
387 | 390 | } |
388 | 391 | printk("%ld pages of RAM\n",total); |
389 | 392 | printk("%ld free pages\n",free); |
arch/i386/mm/pgtable.c
... | ... | @@ -31,11 +31,13 @@ |
31 | 31 | pg_data_t *pgdat; |
32 | 32 | unsigned long i; |
33 | 33 | struct page_state ps; |
34 | + unsigned long flags; | |
34 | 35 | |
35 | 36 | printk(KERN_INFO "Mem-info:\n"); |
36 | 37 | show_free_areas(); |
37 | 38 | printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
38 | 39 | for_each_pgdat(pgdat) { |
40 | + pgdat_resize_lock(pgdat, &flags); | |
39 | 41 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
40 | 42 | page = pgdat_page_nr(pgdat, i); |
41 | 43 | total++; |
... | ... | @@ -48,6 +50,7 @@ |
48 | 50 | else if (page_count(page)) |
49 | 51 | shared += page_count(page) - 1; |
50 | 52 | } |
53 | + pgdat_resize_unlock(pgdat, &flags); | |
51 | 54 | } |
52 | 55 | printk(KERN_INFO "%d pages of RAM\n", total); |
53 | 56 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); |
arch/ia64/mm/discontig.c
... | ... | @@ -555,9 +555,13 @@ |
555 | 555 | show_free_areas(); |
556 | 556 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
557 | 557 | for_each_pgdat(pgdat) { |
558 | - unsigned long present = pgdat->node_present_pages; | |
558 | + unsigned long present; | |
559 | + unsigned long flags; | |
559 | 560 | int shared = 0, cached = 0, reserved = 0; |
561 | + | |
560 | 562 | printk("Node ID: %d\n", pgdat->node_id); |
563 | + pgdat_resize_lock(pgdat, &flags); | |
564 | + present = pgdat->node_present_pages; | |
561 | 565 | for(i = 0; i < pgdat->node_spanned_pages; i++) { |
562 | 566 | struct page *page; |
563 | 567 | if (pfn_valid(pgdat->node_start_pfn + i)) |
... | ... | @@ -571,6 +575,7 @@ |
571 | 575 | else if (page_count(page)) |
572 | 576 | shared += page_count(page)-1; |
573 | 577 | } |
578 | + pgdat_resize_unlock(pgdat, &flags); | |
574 | 579 | total_present += present; |
575 | 580 | total_reserved += reserved; |
576 | 581 | total_cached += cached; |
arch/m32r/mm/init.c
... | ... | @@ -48,6 +48,8 @@ |
48 | 48 | show_free_areas(); |
49 | 49 | printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); |
50 | 50 | for_each_pgdat(pgdat) { |
51 | + unsigned long flags; | |
52 | + pgdat_resize_lock(pgdat, &flags); | |
51 | 53 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
52 | 54 | page = pgdat_page_nr(pgdat, i); |
53 | 55 | total++; |
... | ... | @@ -60,6 +62,7 @@ |
60 | 62 | else if (page_count(page)) |
61 | 63 | shared += page_count(page) - 1; |
62 | 64 | } |
65 | + pgdat_resize_unlock(pgdat, &flags); | |
63 | 66 | } |
64 | 67 | printk("%d pages of RAM\n", total); |
65 | 68 | printk("%d pages of HIGHMEM\n",highmem); |
66 | 69 | |
... | ... | @@ -150,10 +153,14 @@ |
150 | 153 | int reservedpages, nid, i; |
151 | 154 | |
152 | 155 | reservedpages = 0; |
153 | - for_each_online_node(nid) | |
156 | + for_each_online_node(nid) { | |
157 | + unsigned long flags; | |
158 | + pgdat_resize_lock(NODE_DATA(nid), &flags); | |
154 | 159 | for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) |
155 | 160 | if (PageReserved(nid_page_nr(nid, i))) |
156 | 161 | reservedpages++; |
162 | + pgdat_resize_unlock(NODE_DATA(nid), &flags); | |
163 | + } | |
157 | 164 | |
158 | 165 | return reservedpages; |
159 | 166 | } |
arch/parisc/mm/init.c
... | ... | @@ -505,7 +505,9 @@ |
505 | 505 | |
506 | 506 | for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { |
507 | 507 | struct page *p; |
508 | + unsigned long flags; | |
508 | 509 | |
510 | + pgdat_resize_lock(NODE_DATA(i), &flags); | |
509 | 511 | p = nid_page_nr(i, j) - node_start_pfn(i); |
510 | 512 | |
511 | 513 | total++; |
... | ... | @@ -517,6 +519,7 @@ |
517 | 519 | free++; |
518 | 520 | else |
519 | 521 | shared += page_count(p) - 1; |
522 | + pgdat_resize_unlock(NODE_DATA(i), &flags); | |
520 | 523 | } |
521 | 524 | } |
522 | 525 | #endif |
arch/ppc64/mm/init.c
... | ... | @@ -104,6 +104,8 @@ |
104 | 104 | show_free_areas(); |
105 | 105 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
106 | 106 | for_each_pgdat(pgdat) { |
107 | + unsigned long flags; | |
108 | + pgdat_resize_lock(pgdat, &flags); | |
107 | 109 | for (i = 0; i < pgdat->node_spanned_pages; i++) { |
108 | 110 | page = pgdat_page_nr(pgdat, i); |
109 | 111 | total++; |
... | ... | @@ -114,6 +116,7 @@ |
114 | 116 | else if (page_count(page)) |
115 | 117 | shared += page_count(page) - 1; |
116 | 118 | } |
119 | + pgdat_resize_unlock(pgdat, &flags); | |
117 | 120 | } |
118 | 121 | printk("%ld pages of RAM\n", total); |
119 | 122 | printk("%ld reserved pages\n", reserved); |
120 | 123 | |
... | ... | @@ -647,11 +650,14 @@ |
647 | 650 | #endif |
648 | 651 | |
649 | 652 | for_each_pgdat(pgdat) { |
653 | + unsigned long flags; | |
654 | + pgdat_resize_lock(pgdat, &flags); | |
650 | 655 | for (i = 0; i < pgdat->node_spanned_pages; i++) { |
651 | 656 | page = pgdat_page_nr(pgdat, i); |
652 | 657 | if (PageReserved(page)) |
653 | 658 | reservedpages++; |
654 | 659 | } |
660 | + pgdat_resize_unlock(pgdat, &flags); | |
655 | 661 | } |
656 | 662 | |
657 | 663 | codesize = (unsigned long)&_etext - (unsigned long)&_stext; |
include/linux/memory_hotplug.h
1 | +#ifndef __LINUX_MEMORY_HOTPLUG_H | |
2 | +#define __LINUX_MEMORY_HOTPLUG_H | |
3 | + | |
4 | +#include <linux/mmzone.h> | |
5 | +#include <linux/spinlock.h> | |
6 | + | |
7 | +#ifdef CONFIG_MEMORY_HOTPLUG | |
8 | +/* | |
9 | + * pgdat resizing functions | |
10 | + */ | |
11 | +static inline | |
12 | +void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) | |
13 | +{ | |
14 | + spin_lock_irqsave(&pgdat->node_size_lock, *flags); | |
15 | +} | |
16 | +static inline | |
17 | +void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) | |
18 | +{ | |
19 | + spin_lock_irqrestore(&pgdat->node_size_lock, *flags); | |
20 | +} | |
21 | +static inline | |
22 | +void pgdat_resize_init(struct pglist_data *pgdat) | |
23 | +{ | |
24 | + spin_lock_init(&pgdat->node_size_lock); | |
25 | +} | |
26 | +#else /* ! CONFIG_MEMORY_HOTPLUG */ | |
27 | +/* | |
28 | + * Stub functions for when hotplug is off | |
29 | + */ | |
30 | +static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} | |
31 | +static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} | |
32 | +static inline void pgdat_resize_init(struct pglist_data *pgdat) {} | |
33 | +#endif | |
34 | +#endif /* __LINUX_MEMORY_HOTPLUG_H */ |
include/linux/mmzone.h
... | ... | @@ -273,6 +273,16 @@ |
273 | 273 | struct page *node_mem_map; |
274 | 274 | #endif |
275 | 275 | struct bootmem_data *bdata; |
276 | +#ifdef CONFIG_MEMORY_HOTPLUG | |
277 | + /* | |
278 | + * Must be held any time you expect node_start_pfn, node_present_pages | |
279 | + * or node_spanned_pages stay constant. Holding this will also | |
280 | + * guarantee that any pfn_valid() stays that way. | |
281 | + * | |
282 | + * Nests above zone->lock and zone->size_seqlock. | |
283 | + */ | |
284 | + spinlock_t node_size_lock; | |
285 | +#endif | |
276 | 286 | unsigned long node_start_pfn; |
277 | 287 | unsigned long node_present_pages; /* total number of physical pages */ |
278 | 288 | unsigned long node_spanned_pages; /* total size of physical page |
... | ... | @@ -292,6 +302,8 @@ |
292 | 302 | #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) |
293 | 303 | #endif |
294 | 304 | #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) |
305 | + | |
306 | +#include <linux/memory_hotplug.h> | |
295 | 307 | |
296 | 308 | extern struct pglist_data *pgdat_list; |
297 | 309 |
mm/page_alloc.c