Commit 1e8537baacd59e96bbe5f8d3d32feafd11f509fe
Committed by
Linus Torvalds
1 parent
38a76013ad
Exists in
master
and in
20 other branches
memory-hotplug: build zonelists when offlining pages
online_pages() does build_all_zonelists() and zone_pcp_update(), I think offline_pages() should do it too. When the zone has no memory to allocate, remove it from other nodes' zonelists. zone_batchsize() depends on zone's present pages, if zone's present pages are changed, zone's pcp should be updated. Signed-off-by: Xishi Qiu <qiuxishi@huawei.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 1 changed file with 6 additions and 1 deletions Inline Diff
mm/memory_hotplug.c
1 | /* | 1 | /* |
2 | * linux/mm/memory_hotplug.c | 2 | * linux/mm/memory_hotplug.c |
3 | * | 3 | * |
4 | * Copyright (C) | 4 | * Copyright (C) |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/stddef.h> | 7 | #include <linux/stddef.h> |
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/swap.h> | 9 | #include <linux/swap.h> |
10 | #include <linux/interrupt.h> | 10 | #include <linux/interrupt.h> |
11 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
12 | #include <linux/bootmem.h> | 12 | #include <linux/bootmem.h> |
13 | #include <linux/compiler.h> | 13 | #include <linux/compiler.h> |
14 | #include <linux/export.h> | 14 | #include <linux/export.h> |
15 | #include <linux/pagevec.h> | 15 | #include <linux/pagevec.h> |
16 | #include <linux/writeback.h> | 16 | #include <linux/writeback.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/sysctl.h> | 18 | #include <linux/sysctl.h> |
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/memory.h> | 20 | #include <linux/memory.h> |
21 | #include <linux/memory_hotplug.h> | 21 | #include <linux/memory_hotplug.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/ioport.h> | 24 | #include <linux/ioport.h> |
25 | #include <linux/delay.h> | 25 | #include <linux/delay.h> |
26 | #include <linux/migrate.h> | 26 | #include <linux/migrate.h> |
27 | #include <linux/page-isolation.h> | 27 | #include <linux/page-isolation.h> |
28 | #include <linux/pfn.h> | 28 | #include <linux/pfn.h> |
29 | #include <linux/suspend.h> | 29 | #include <linux/suspend.h> |
30 | #include <linux/mm_inline.h> | 30 | #include <linux/mm_inline.h> |
31 | #include <linux/firmware-map.h> | 31 | #include <linux/firmware-map.h> |
32 | 32 | ||
33 | #include <asm/tlbflush.h> | 33 | #include <asm/tlbflush.h> |
34 | 34 | ||
35 | #include "internal.h" | 35 | #include "internal.h" |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * online_page_callback contains pointer to current page onlining function. | 38 | * online_page_callback contains pointer to current page onlining function. |
39 | * Initially it is generic_online_page(). If it is required it could be | 39 | * Initially it is generic_online_page(). If it is required it could be |
40 | * changed by calling set_online_page_callback() for callback registration | 40 | * changed by calling set_online_page_callback() for callback registration |
41 | * and restore_online_page_callback() for generic callback restore. | 41 | * and restore_online_page_callback() for generic callback restore. |
42 | */ | 42 | */ |
43 | 43 | ||
44 | static void generic_online_page(struct page *page); | 44 | static void generic_online_page(struct page *page); |
45 | 45 | ||
46 | static online_page_callback_t online_page_callback = generic_online_page; | 46 | static online_page_callback_t online_page_callback = generic_online_page; |
47 | 47 | ||
48 | DEFINE_MUTEX(mem_hotplug_mutex); | 48 | DEFINE_MUTEX(mem_hotplug_mutex); |
49 | 49 | ||
50 | void lock_memory_hotplug(void) | 50 | void lock_memory_hotplug(void) |
51 | { | 51 | { |
52 | mutex_lock(&mem_hotplug_mutex); | 52 | mutex_lock(&mem_hotplug_mutex); |
53 | 53 | ||
54 | /* for exclusive hibernation if CONFIG_HIBERNATION=y */ | 54 | /* for exclusive hibernation if CONFIG_HIBERNATION=y */ |
55 | lock_system_sleep(); | 55 | lock_system_sleep(); |
56 | } | 56 | } |
57 | 57 | ||
58 | void unlock_memory_hotplug(void) | 58 | void unlock_memory_hotplug(void) |
59 | { | 59 | { |
60 | unlock_system_sleep(); | 60 | unlock_system_sleep(); |
61 | mutex_unlock(&mem_hotplug_mutex); | 61 | mutex_unlock(&mem_hotplug_mutex); |
62 | } | 62 | } |
63 | 63 | ||
64 | 64 | ||
65 | /* add this memory to iomem resource */ | 65 | /* add this memory to iomem resource */ |
66 | static struct resource *register_memory_resource(u64 start, u64 size) | 66 | static struct resource *register_memory_resource(u64 start, u64 size) |
67 | { | 67 | { |
68 | struct resource *res; | 68 | struct resource *res; |
69 | res = kzalloc(sizeof(struct resource), GFP_KERNEL); | 69 | res = kzalloc(sizeof(struct resource), GFP_KERNEL); |
70 | BUG_ON(!res); | 70 | BUG_ON(!res); |
71 | 71 | ||
72 | res->name = "System RAM"; | 72 | res->name = "System RAM"; |
73 | res->start = start; | 73 | res->start = start; |
74 | res->end = start + size - 1; | 74 | res->end = start + size - 1; |
75 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | 75 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
76 | if (request_resource(&iomem_resource, res) < 0) { | 76 | if (request_resource(&iomem_resource, res) < 0) { |
77 | printk("System RAM resource %pR cannot be added\n", res); | 77 | printk("System RAM resource %pR cannot be added\n", res); |
78 | kfree(res); | 78 | kfree(res); |
79 | res = NULL; | 79 | res = NULL; |
80 | } | 80 | } |
81 | return res; | 81 | return res; |
82 | } | 82 | } |
83 | 83 | ||
84 | static void release_memory_resource(struct resource *res) | 84 | static void release_memory_resource(struct resource *res) |
85 | { | 85 | { |
86 | if (!res) | 86 | if (!res) |
87 | return; | 87 | return; |
88 | release_resource(res); | 88 | release_resource(res); |
89 | kfree(res); | 89 | kfree(res); |
90 | return; | 90 | return; |
91 | } | 91 | } |
92 | 92 | ||
93 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 93 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
94 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | 94 | #ifndef CONFIG_SPARSEMEM_VMEMMAP |
95 | static void get_page_bootmem(unsigned long info, struct page *page, | 95 | static void get_page_bootmem(unsigned long info, struct page *page, |
96 | unsigned long type) | 96 | unsigned long type) |
97 | { | 97 | { |
98 | page->lru.next = (struct list_head *) type; | 98 | page->lru.next = (struct list_head *) type; |
99 | SetPagePrivate(page); | 99 | SetPagePrivate(page); |
100 | set_page_private(page, info); | 100 | set_page_private(page, info); |
101 | atomic_inc(&page->_count); | 101 | atomic_inc(&page->_count); |
102 | } | 102 | } |
103 | 103 | ||
104 | /* reference to __meminit __free_pages_bootmem is valid | 104 | /* reference to __meminit __free_pages_bootmem is valid |
105 | * so use __ref to tell modpost not to generate a warning */ | 105 | * so use __ref to tell modpost not to generate a warning */ |
106 | void __ref put_page_bootmem(struct page *page) | 106 | void __ref put_page_bootmem(struct page *page) |
107 | { | 107 | { |
108 | unsigned long type; | 108 | unsigned long type; |
109 | 109 | ||
110 | type = (unsigned long) page->lru.next; | 110 | type = (unsigned long) page->lru.next; |
111 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || | 111 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || |
112 | type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); | 112 | type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); |
113 | 113 | ||
114 | if (atomic_dec_return(&page->_count) == 1) { | 114 | if (atomic_dec_return(&page->_count) == 1) { |
115 | ClearPagePrivate(page); | 115 | ClearPagePrivate(page); |
116 | set_page_private(page, 0); | 116 | set_page_private(page, 0); |
117 | INIT_LIST_HEAD(&page->lru); | 117 | INIT_LIST_HEAD(&page->lru); |
118 | __free_pages_bootmem(page, 0); | 118 | __free_pages_bootmem(page, 0); |
119 | } | 119 | } |
120 | 120 | ||
121 | } | 121 | } |
122 | 122 | ||
123 | static void register_page_bootmem_info_section(unsigned long start_pfn) | 123 | static void register_page_bootmem_info_section(unsigned long start_pfn) |
124 | { | 124 | { |
125 | unsigned long *usemap, mapsize, section_nr, i; | 125 | unsigned long *usemap, mapsize, section_nr, i; |
126 | struct mem_section *ms; | 126 | struct mem_section *ms; |
127 | struct page *page, *memmap; | 127 | struct page *page, *memmap; |
128 | 128 | ||
129 | section_nr = pfn_to_section_nr(start_pfn); | 129 | section_nr = pfn_to_section_nr(start_pfn); |
130 | ms = __nr_to_section(section_nr); | 130 | ms = __nr_to_section(section_nr); |
131 | 131 | ||
132 | /* Get section's memmap address */ | 132 | /* Get section's memmap address */ |
133 | memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); | 133 | memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * Get page for the memmap's phys address | 136 | * Get page for the memmap's phys address |
137 | * XXX: need more consideration for sparse_vmemmap... | 137 | * XXX: need more consideration for sparse_vmemmap... |
138 | */ | 138 | */ |
139 | page = virt_to_page(memmap); | 139 | page = virt_to_page(memmap); |
140 | mapsize = sizeof(struct page) * PAGES_PER_SECTION; | 140 | mapsize = sizeof(struct page) * PAGES_PER_SECTION; |
141 | mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT; | 141 | mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT; |
142 | 142 | ||
143 | /* remember memmap's page */ | 143 | /* remember memmap's page */ |
144 | for (i = 0; i < mapsize; i++, page++) | 144 | for (i = 0; i < mapsize; i++, page++) |
145 | get_page_bootmem(section_nr, page, SECTION_INFO); | 145 | get_page_bootmem(section_nr, page, SECTION_INFO); |
146 | 146 | ||
147 | usemap = __nr_to_section(section_nr)->pageblock_flags; | 147 | usemap = __nr_to_section(section_nr)->pageblock_flags; |
148 | page = virt_to_page(usemap); | 148 | page = virt_to_page(usemap); |
149 | 149 | ||
150 | mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; | 150 | mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; |
151 | 151 | ||
152 | for (i = 0; i < mapsize; i++, page++) | 152 | for (i = 0; i < mapsize; i++, page++) |
153 | get_page_bootmem(section_nr, page, MIX_SECTION_INFO); | 153 | get_page_bootmem(section_nr, page, MIX_SECTION_INFO); |
154 | 154 | ||
155 | } | 155 | } |
156 | 156 | ||
157 | void register_page_bootmem_info_node(struct pglist_data *pgdat) | 157 | void register_page_bootmem_info_node(struct pglist_data *pgdat) |
158 | { | 158 | { |
159 | unsigned long i, pfn, end_pfn, nr_pages; | 159 | unsigned long i, pfn, end_pfn, nr_pages; |
160 | int node = pgdat->node_id; | 160 | int node = pgdat->node_id; |
161 | struct page *page; | 161 | struct page *page; |
162 | struct zone *zone; | 162 | struct zone *zone; |
163 | 163 | ||
164 | nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT; | 164 | nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT; |
165 | page = virt_to_page(pgdat); | 165 | page = virt_to_page(pgdat); |
166 | 166 | ||
167 | for (i = 0; i < nr_pages; i++, page++) | 167 | for (i = 0; i < nr_pages; i++, page++) |
168 | get_page_bootmem(node, page, NODE_INFO); | 168 | get_page_bootmem(node, page, NODE_INFO); |
169 | 169 | ||
170 | zone = &pgdat->node_zones[0]; | 170 | zone = &pgdat->node_zones[0]; |
171 | for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { | 171 | for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { |
172 | if (zone->wait_table) { | 172 | if (zone->wait_table) { |
173 | nr_pages = zone->wait_table_hash_nr_entries | 173 | nr_pages = zone->wait_table_hash_nr_entries |
174 | * sizeof(wait_queue_head_t); | 174 | * sizeof(wait_queue_head_t); |
175 | nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; | 175 | nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; |
176 | page = virt_to_page(zone->wait_table); | 176 | page = virt_to_page(zone->wait_table); |
177 | 177 | ||
178 | for (i = 0; i < nr_pages; i++, page++) | 178 | for (i = 0; i < nr_pages; i++, page++) |
179 | get_page_bootmem(node, page, NODE_INFO); | 179 | get_page_bootmem(node, page, NODE_INFO); |
180 | } | 180 | } |
181 | } | 181 | } |
182 | 182 | ||
183 | pfn = pgdat->node_start_pfn; | 183 | pfn = pgdat->node_start_pfn; |
184 | end_pfn = pfn + pgdat->node_spanned_pages; | 184 | end_pfn = pfn + pgdat->node_spanned_pages; |
185 | 185 | ||
186 | /* register_section info */ | 186 | /* register_section info */ |
187 | for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | 187 | for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { |
188 | /* | 188 | /* |
189 | * Some platforms can assign the same pfn to multiple nodes - on | 189 | * Some platforms can assign the same pfn to multiple nodes - on |
190 | * node0 as well as nodeN. To avoid registering a pfn against | 190 | * node0 as well as nodeN. To avoid registering a pfn against |
191 | * multiple nodes we check that this pfn does not already | 191 | * multiple nodes we check that this pfn does not already |
192 | * reside in some other node. | 192 | * reside in some other node. |
193 | */ | 193 | */ |
194 | if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node)) | 194 | if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node)) |
195 | register_page_bootmem_info_section(pfn); | 195 | register_page_bootmem_info_section(pfn); |
196 | } | 196 | } |
197 | } | 197 | } |
198 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 198 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
199 | 199 | ||
200 | static void grow_zone_span(struct zone *zone, unsigned long start_pfn, | 200 | static void grow_zone_span(struct zone *zone, unsigned long start_pfn, |
201 | unsigned long end_pfn) | 201 | unsigned long end_pfn) |
202 | { | 202 | { |
203 | unsigned long old_zone_end_pfn; | 203 | unsigned long old_zone_end_pfn; |
204 | 204 | ||
205 | zone_span_writelock(zone); | 205 | zone_span_writelock(zone); |
206 | 206 | ||
207 | old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; | 207 | old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; |
208 | if (start_pfn < zone->zone_start_pfn) | 208 | if (start_pfn < zone->zone_start_pfn) |
209 | zone->zone_start_pfn = start_pfn; | 209 | zone->zone_start_pfn = start_pfn; |
210 | 210 | ||
211 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - | 211 | zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - |
212 | zone->zone_start_pfn; | 212 | zone->zone_start_pfn; |
213 | 213 | ||
214 | zone_span_writeunlock(zone); | 214 | zone_span_writeunlock(zone); |
215 | } | 215 | } |
216 | 216 | ||
217 | static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, | 217 | static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, |
218 | unsigned long end_pfn) | 218 | unsigned long end_pfn) |
219 | { | 219 | { |
220 | unsigned long old_pgdat_end_pfn = | 220 | unsigned long old_pgdat_end_pfn = |
221 | pgdat->node_start_pfn + pgdat->node_spanned_pages; | 221 | pgdat->node_start_pfn + pgdat->node_spanned_pages; |
222 | 222 | ||
223 | if (start_pfn < pgdat->node_start_pfn) | 223 | if (start_pfn < pgdat->node_start_pfn) |
224 | pgdat->node_start_pfn = start_pfn; | 224 | pgdat->node_start_pfn = start_pfn; |
225 | 225 | ||
226 | pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) - | 226 | pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) - |
227 | pgdat->node_start_pfn; | 227 | pgdat->node_start_pfn; |
228 | } | 228 | } |
229 | 229 | ||
230 | static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) | 230 | static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) |
231 | { | 231 | { |
232 | struct pglist_data *pgdat = zone->zone_pgdat; | 232 | struct pglist_data *pgdat = zone->zone_pgdat; |
233 | int nr_pages = PAGES_PER_SECTION; | 233 | int nr_pages = PAGES_PER_SECTION; |
234 | int nid = pgdat->node_id; | 234 | int nid = pgdat->node_id; |
235 | int zone_type; | 235 | int zone_type; |
236 | unsigned long flags; | 236 | unsigned long flags; |
237 | 237 | ||
238 | zone_type = zone - pgdat->node_zones; | 238 | zone_type = zone - pgdat->node_zones; |
239 | if (!zone->wait_table) { | 239 | if (!zone->wait_table) { |
240 | int ret; | 240 | int ret; |
241 | 241 | ||
242 | ret = init_currently_empty_zone(zone, phys_start_pfn, | 242 | ret = init_currently_empty_zone(zone, phys_start_pfn, |
243 | nr_pages, MEMMAP_HOTPLUG); | 243 | nr_pages, MEMMAP_HOTPLUG); |
244 | if (ret) | 244 | if (ret) |
245 | return ret; | 245 | return ret; |
246 | } | 246 | } |
247 | pgdat_resize_lock(zone->zone_pgdat, &flags); | 247 | pgdat_resize_lock(zone->zone_pgdat, &flags); |
248 | grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages); | 248 | grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages); |
249 | grow_pgdat_span(zone->zone_pgdat, phys_start_pfn, | 249 | grow_pgdat_span(zone->zone_pgdat, phys_start_pfn, |
250 | phys_start_pfn + nr_pages); | 250 | phys_start_pfn + nr_pages); |
251 | pgdat_resize_unlock(zone->zone_pgdat, &flags); | 251 | pgdat_resize_unlock(zone->zone_pgdat, &flags); |
252 | memmap_init_zone(nr_pages, nid, zone_type, | 252 | memmap_init_zone(nr_pages, nid, zone_type, |
253 | phys_start_pfn, MEMMAP_HOTPLUG); | 253 | phys_start_pfn, MEMMAP_HOTPLUG); |
254 | return 0; | 254 | return 0; |
255 | } | 255 | } |
256 | 256 | ||
257 | static int __meminit __add_section(int nid, struct zone *zone, | 257 | static int __meminit __add_section(int nid, struct zone *zone, |
258 | unsigned long phys_start_pfn) | 258 | unsigned long phys_start_pfn) |
259 | { | 259 | { |
260 | int nr_pages = PAGES_PER_SECTION; | 260 | int nr_pages = PAGES_PER_SECTION; |
261 | int ret; | 261 | int ret; |
262 | 262 | ||
263 | if (pfn_valid(phys_start_pfn)) | 263 | if (pfn_valid(phys_start_pfn)) |
264 | return -EEXIST; | 264 | return -EEXIST; |
265 | 265 | ||
266 | ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages); | 266 | ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages); |
267 | 267 | ||
268 | if (ret < 0) | 268 | if (ret < 0) |
269 | return ret; | 269 | return ret; |
270 | 270 | ||
271 | ret = __add_zone(zone, phys_start_pfn); | 271 | ret = __add_zone(zone, phys_start_pfn); |
272 | 272 | ||
273 | if (ret < 0) | 273 | if (ret < 0) |
274 | return ret; | 274 | return ret; |
275 | 275 | ||
276 | return register_new_memory(nid, __pfn_to_section(phys_start_pfn)); | 276 | return register_new_memory(nid, __pfn_to_section(phys_start_pfn)); |
277 | } | 277 | } |
278 | 278 | ||
279 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 279 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
280 | static int __remove_section(struct zone *zone, struct mem_section *ms) | 280 | static int __remove_section(struct zone *zone, struct mem_section *ms) |
281 | { | 281 | { |
282 | /* | 282 | /* |
283 | * XXX: Freeing memmap with vmemmap is not implement yet. | 283 | * XXX: Freeing memmap with vmemmap is not implement yet. |
284 | * This should be removed later. | 284 | * This should be removed later. |
285 | */ | 285 | */ |
286 | return -EBUSY; | 286 | return -EBUSY; |
287 | } | 287 | } |
288 | #else | 288 | #else |
289 | static int __remove_section(struct zone *zone, struct mem_section *ms) | 289 | static int __remove_section(struct zone *zone, struct mem_section *ms) |
290 | { | 290 | { |
291 | unsigned long flags; | 291 | unsigned long flags; |
292 | struct pglist_data *pgdat = zone->zone_pgdat; | 292 | struct pglist_data *pgdat = zone->zone_pgdat; |
293 | int ret = -EINVAL; | 293 | int ret = -EINVAL; |
294 | 294 | ||
295 | if (!valid_section(ms)) | 295 | if (!valid_section(ms)) |
296 | return ret; | 296 | return ret; |
297 | 297 | ||
298 | ret = unregister_memory_section(ms); | 298 | ret = unregister_memory_section(ms); |
299 | if (ret) | 299 | if (ret) |
300 | return ret; | 300 | return ret; |
301 | 301 | ||
302 | pgdat_resize_lock(pgdat, &flags); | 302 | pgdat_resize_lock(pgdat, &flags); |
303 | sparse_remove_one_section(zone, ms); | 303 | sparse_remove_one_section(zone, ms); |
304 | pgdat_resize_unlock(pgdat, &flags); | 304 | pgdat_resize_unlock(pgdat, &flags); |
305 | return 0; | 305 | return 0; |
306 | } | 306 | } |
307 | #endif | 307 | #endif |
308 | 308 | ||
309 | /* | 309 | /* |
310 | * Reasonably generic function for adding memory. It is | 310 | * Reasonably generic function for adding memory. It is |
311 | * expected that archs that support memory hotplug will | 311 | * expected that archs that support memory hotplug will |
312 | * call this function after deciding the zone to which to | 312 | * call this function after deciding the zone to which to |
313 | * add the new pages. | 313 | * add the new pages. |
314 | */ | 314 | */ |
315 | int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | 315 | int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, |
316 | unsigned long nr_pages) | 316 | unsigned long nr_pages) |
317 | { | 317 | { |
318 | unsigned long i; | 318 | unsigned long i; |
319 | int err = 0; | 319 | int err = 0; |
320 | int start_sec, end_sec; | 320 | int start_sec, end_sec; |
321 | /* during initialize mem_map, align hot-added range to section */ | 321 | /* during initialize mem_map, align hot-added range to section */ |
322 | start_sec = pfn_to_section_nr(phys_start_pfn); | 322 | start_sec = pfn_to_section_nr(phys_start_pfn); |
323 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); | 323 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); |
324 | 324 | ||
325 | for (i = start_sec; i <= end_sec; i++) { | 325 | for (i = start_sec; i <= end_sec; i++) { |
326 | err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); | 326 | err = __add_section(nid, zone, i << PFN_SECTION_SHIFT); |
327 | 327 | ||
328 | /* | 328 | /* |
329 | * EEXIST is finally dealt with by ioresource collision | 329 | * EEXIST is finally dealt with by ioresource collision |
330 | * check. see add_memory() => register_memory_resource() | 330 | * check. see add_memory() => register_memory_resource() |
331 | * Warning will be printed if there is collision. | 331 | * Warning will be printed if there is collision. |
332 | */ | 332 | */ |
333 | if (err && (err != -EEXIST)) | 333 | if (err && (err != -EEXIST)) |
334 | break; | 334 | break; |
335 | err = 0; | 335 | err = 0; |
336 | } | 336 | } |
337 | 337 | ||
338 | return err; | 338 | return err; |
339 | } | 339 | } |
340 | EXPORT_SYMBOL_GPL(__add_pages); | 340 | EXPORT_SYMBOL_GPL(__add_pages); |
341 | 341 | ||
342 | /** | 342 | /** |
343 | * __remove_pages() - remove sections of pages from a zone | 343 | * __remove_pages() - remove sections of pages from a zone |
344 | * @zone: zone from which pages need to be removed | 344 | * @zone: zone from which pages need to be removed |
345 | * @phys_start_pfn: starting pageframe (must be aligned to start of a section) | 345 | * @phys_start_pfn: starting pageframe (must be aligned to start of a section) |
346 | * @nr_pages: number of pages to remove (must be multiple of section size) | 346 | * @nr_pages: number of pages to remove (must be multiple of section size) |
347 | * | 347 | * |
348 | * Generic helper function to remove section mappings and sysfs entries | 348 | * Generic helper function to remove section mappings and sysfs entries |
349 | * for the section of the memory we are removing. Caller needs to make | 349 | * for the section of the memory we are removing. Caller needs to make |
350 | * sure that pages are marked reserved and zones are adjust properly by | 350 | * sure that pages are marked reserved and zones are adjust properly by |
351 | * calling offline_pages(). | 351 | * calling offline_pages(). |
352 | */ | 352 | */ |
353 | int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, | 353 | int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, |
354 | unsigned long nr_pages) | 354 | unsigned long nr_pages) |
355 | { | 355 | { |
356 | unsigned long i, ret = 0; | 356 | unsigned long i, ret = 0; |
357 | int sections_to_remove; | 357 | int sections_to_remove; |
358 | 358 | ||
359 | /* | 359 | /* |
360 | * We can only remove entire sections | 360 | * We can only remove entire sections |
361 | */ | 361 | */ |
362 | BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); | 362 | BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); |
363 | BUG_ON(nr_pages % PAGES_PER_SECTION); | 363 | BUG_ON(nr_pages % PAGES_PER_SECTION); |
364 | 364 | ||
365 | sections_to_remove = nr_pages / PAGES_PER_SECTION; | 365 | sections_to_remove = nr_pages / PAGES_PER_SECTION; |
366 | for (i = 0; i < sections_to_remove; i++) { | 366 | for (i = 0; i < sections_to_remove; i++) { |
367 | unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; | 367 | unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; |
368 | release_mem_region(pfn << PAGE_SHIFT, | 368 | release_mem_region(pfn << PAGE_SHIFT, |
369 | PAGES_PER_SECTION << PAGE_SHIFT); | 369 | PAGES_PER_SECTION << PAGE_SHIFT); |
370 | ret = __remove_section(zone, __pfn_to_section(pfn)); | 370 | ret = __remove_section(zone, __pfn_to_section(pfn)); |
371 | if (ret) | 371 | if (ret) |
372 | break; | 372 | break; |
373 | } | 373 | } |
374 | return ret; | 374 | return ret; |
375 | } | 375 | } |
376 | EXPORT_SYMBOL_GPL(__remove_pages); | 376 | EXPORT_SYMBOL_GPL(__remove_pages); |
377 | 377 | ||
378 | int set_online_page_callback(online_page_callback_t callback) | 378 | int set_online_page_callback(online_page_callback_t callback) |
379 | { | 379 | { |
380 | int rc = -EINVAL; | 380 | int rc = -EINVAL; |
381 | 381 | ||
382 | lock_memory_hotplug(); | 382 | lock_memory_hotplug(); |
383 | 383 | ||
384 | if (online_page_callback == generic_online_page) { | 384 | if (online_page_callback == generic_online_page) { |
385 | online_page_callback = callback; | 385 | online_page_callback = callback; |
386 | rc = 0; | 386 | rc = 0; |
387 | } | 387 | } |
388 | 388 | ||
389 | unlock_memory_hotplug(); | 389 | unlock_memory_hotplug(); |
390 | 390 | ||
391 | return rc; | 391 | return rc; |
392 | } | 392 | } |
393 | EXPORT_SYMBOL_GPL(set_online_page_callback); | 393 | EXPORT_SYMBOL_GPL(set_online_page_callback); |
394 | 394 | ||
395 | int restore_online_page_callback(online_page_callback_t callback) | 395 | int restore_online_page_callback(online_page_callback_t callback) |
396 | { | 396 | { |
397 | int rc = -EINVAL; | 397 | int rc = -EINVAL; |
398 | 398 | ||
399 | lock_memory_hotplug(); | 399 | lock_memory_hotplug(); |
400 | 400 | ||
401 | if (online_page_callback == callback) { | 401 | if (online_page_callback == callback) { |
402 | online_page_callback = generic_online_page; | 402 | online_page_callback = generic_online_page; |
403 | rc = 0; | 403 | rc = 0; |
404 | } | 404 | } |
405 | 405 | ||
406 | unlock_memory_hotplug(); | 406 | unlock_memory_hotplug(); |
407 | 407 | ||
408 | return rc; | 408 | return rc; |
409 | } | 409 | } |
410 | EXPORT_SYMBOL_GPL(restore_online_page_callback); | 410 | EXPORT_SYMBOL_GPL(restore_online_page_callback); |
411 | 411 | ||
412 | void __online_page_set_limits(struct page *page) | 412 | void __online_page_set_limits(struct page *page) |
413 | { | 413 | { |
414 | unsigned long pfn = page_to_pfn(page); | 414 | unsigned long pfn = page_to_pfn(page); |
415 | 415 | ||
416 | if (pfn >= num_physpages) | 416 | if (pfn >= num_physpages) |
417 | num_physpages = pfn + 1; | 417 | num_physpages = pfn + 1; |
418 | } | 418 | } |
419 | EXPORT_SYMBOL_GPL(__online_page_set_limits); | 419 | EXPORT_SYMBOL_GPL(__online_page_set_limits); |
420 | 420 | ||
421 | void __online_page_increment_counters(struct page *page) | 421 | void __online_page_increment_counters(struct page *page) |
422 | { | 422 | { |
423 | totalram_pages++; | 423 | totalram_pages++; |
424 | 424 | ||
425 | #ifdef CONFIG_HIGHMEM | 425 | #ifdef CONFIG_HIGHMEM |
426 | if (PageHighMem(page)) | 426 | if (PageHighMem(page)) |
427 | totalhigh_pages++; | 427 | totalhigh_pages++; |
428 | #endif | 428 | #endif |
429 | } | 429 | } |
430 | EXPORT_SYMBOL_GPL(__online_page_increment_counters); | 430 | EXPORT_SYMBOL_GPL(__online_page_increment_counters); |
431 | 431 | ||
432 | void __online_page_free(struct page *page) | 432 | void __online_page_free(struct page *page) |
433 | { | 433 | { |
434 | ClearPageReserved(page); | 434 | ClearPageReserved(page); |
435 | init_page_count(page); | 435 | init_page_count(page); |
436 | __free_page(page); | 436 | __free_page(page); |
437 | } | 437 | } |
438 | EXPORT_SYMBOL_GPL(__online_page_free); | 438 | EXPORT_SYMBOL_GPL(__online_page_free); |
439 | 439 | ||
440 | static void generic_online_page(struct page *page) | 440 | static void generic_online_page(struct page *page) |
441 | { | 441 | { |
442 | __online_page_set_limits(page); | 442 | __online_page_set_limits(page); |
443 | __online_page_increment_counters(page); | 443 | __online_page_increment_counters(page); |
444 | __online_page_free(page); | 444 | __online_page_free(page); |
445 | } | 445 | } |
446 | 446 | ||
447 | static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, | 447 | static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, |
448 | void *arg) | 448 | void *arg) |
449 | { | 449 | { |
450 | unsigned long i; | 450 | unsigned long i; |
451 | unsigned long onlined_pages = *(unsigned long *)arg; | 451 | unsigned long onlined_pages = *(unsigned long *)arg; |
452 | struct page *page; | 452 | struct page *page; |
453 | if (PageReserved(pfn_to_page(start_pfn))) | 453 | if (PageReserved(pfn_to_page(start_pfn))) |
454 | for (i = 0; i < nr_pages; i++) { | 454 | for (i = 0; i < nr_pages; i++) { |
455 | page = pfn_to_page(start_pfn + i); | 455 | page = pfn_to_page(start_pfn + i); |
456 | (*online_page_callback)(page); | 456 | (*online_page_callback)(page); |
457 | onlined_pages++; | 457 | onlined_pages++; |
458 | } | 458 | } |
459 | *(unsigned long *)arg = onlined_pages; | 459 | *(unsigned long *)arg = onlined_pages; |
460 | return 0; | 460 | return 0; |
461 | } | 461 | } |
462 | 462 | ||
463 | 463 | ||
464 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages) | 464 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages) |
465 | { | 465 | { |
466 | unsigned long onlined_pages = 0; | 466 | unsigned long onlined_pages = 0; |
467 | struct zone *zone; | 467 | struct zone *zone; |
468 | int need_zonelists_rebuild = 0; | 468 | int need_zonelists_rebuild = 0; |
469 | int nid; | 469 | int nid; |
470 | int ret; | 470 | int ret; |
471 | struct memory_notify arg; | 471 | struct memory_notify arg; |
472 | 472 | ||
473 | lock_memory_hotplug(); | 473 | lock_memory_hotplug(); |
474 | arg.start_pfn = pfn; | 474 | arg.start_pfn = pfn; |
475 | arg.nr_pages = nr_pages; | 475 | arg.nr_pages = nr_pages; |
476 | arg.status_change_nid = -1; | 476 | arg.status_change_nid = -1; |
477 | 477 | ||
478 | nid = page_to_nid(pfn_to_page(pfn)); | 478 | nid = page_to_nid(pfn_to_page(pfn)); |
479 | if (node_present_pages(nid) == 0) | 479 | if (node_present_pages(nid) == 0) |
480 | arg.status_change_nid = nid; | 480 | arg.status_change_nid = nid; |
481 | 481 | ||
482 | ret = memory_notify(MEM_GOING_ONLINE, &arg); | 482 | ret = memory_notify(MEM_GOING_ONLINE, &arg); |
483 | ret = notifier_to_errno(ret); | 483 | ret = notifier_to_errno(ret); |
484 | if (ret) { | 484 | if (ret) { |
485 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 485 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
486 | unlock_memory_hotplug(); | 486 | unlock_memory_hotplug(); |
487 | return ret; | 487 | return ret; |
488 | } | 488 | } |
489 | /* | 489 | /* |
490 | * This doesn't need a lock to do pfn_to_page(). | 490 | * This doesn't need a lock to do pfn_to_page(). |
491 | * The section can't be removed here because of the | 491 | * The section can't be removed here because of the |
492 | * memory_block->state_mutex. | 492 | * memory_block->state_mutex. |
493 | */ | 493 | */ |
494 | zone = page_zone(pfn_to_page(pfn)); | 494 | zone = page_zone(pfn_to_page(pfn)); |
495 | /* | 495 | /* |
496 | * If this zone is not populated, then it is not in zonelist. | 496 | * If this zone is not populated, then it is not in zonelist. |
497 | * This means the page allocator ignores this zone. | 497 | * This means the page allocator ignores this zone. |
498 | * So, zonelist must be updated after online. | 498 | * So, zonelist must be updated after online. |
499 | */ | 499 | */ |
500 | mutex_lock(&zonelists_mutex); | 500 | mutex_lock(&zonelists_mutex); |
501 | if (!populated_zone(zone)) | 501 | if (!populated_zone(zone)) |
502 | need_zonelists_rebuild = 1; | 502 | need_zonelists_rebuild = 1; |
503 | 503 | ||
504 | ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages, | 504 | ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages, |
505 | online_pages_range); | 505 | online_pages_range); |
506 | if (ret) { | 506 | if (ret) { |
507 | mutex_unlock(&zonelists_mutex); | 507 | mutex_unlock(&zonelists_mutex); |
508 | printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] failed\n", | 508 | printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] failed\n", |
509 | (unsigned long long) pfn << PAGE_SHIFT, | 509 | (unsigned long long) pfn << PAGE_SHIFT, |
510 | (((unsigned long long) pfn + nr_pages) | 510 | (((unsigned long long) pfn + nr_pages) |
511 | << PAGE_SHIFT) - 1); | 511 | << PAGE_SHIFT) - 1); |
512 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 512 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
513 | unlock_memory_hotplug(); | 513 | unlock_memory_hotplug(); |
514 | return ret; | 514 | return ret; |
515 | } | 515 | } |
516 | 516 | ||
517 | zone->present_pages += onlined_pages; | 517 | zone->present_pages += onlined_pages; |
518 | zone->zone_pgdat->node_present_pages += onlined_pages; | 518 | zone->zone_pgdat->node_present_pages += onlined_pages; |
519 | if (onlined_pages) { | 519 | if (onlined_pages) { |
520 | node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); | 520 | node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); |
521 | if (need_zonelists_rebuild) | 521 | if (need_zonelists_rebuild) |
522 | build_all_zonelists(NULL, zone); | 522 | build_all_zonelists(NULL, zone); |
523 | else | 523 | else |
524 | zone_pcp_update(zone); | 524 | zone_pcp_update(zone); |
525 | } | 525 | } |
526 | 526 | ||
527 | mutex_unlock(&zonelists_mutex); | 527 | mutex_unlock(&zonelists_mutex); |
528 | 528 | ||
529 | init_per_zone_wmark_min(); | 529 | init_per_zone_wmark_min(); |
530 | 530 | ||
531 | if (onlined_pages) | 531 | if (onlined_pages) |
532 | kswapd_run(zone_to_nid(zone)); | 532 | kswapd_run(zone_to_nid(zone)); |
533 | 533 | ||
534 | vm_total_pages = nr_free_pagecache_pages(); | 534 | vm_total_pages = nr_free_pagecache_pages(); |
535 | 535 | ||
536 | writeback_set_ratelimit(); | 536 | writeback_set_ratelimit(); |
537 | 537 | ||
538 | if (onlined_pages) | 538 | if (onlined_pages) |
539 | memory_notify(MEM_ONLINE, &arg); | 539 | memory_notify(MEM_ONLINE, &arg); |
540 | unlock_memory_hotplug(); | 540 | unlock_memory_hotplug(); |
541 | 541 | ||
542 | return 0; | 542 | return 0; |
543 | } | 543 | } |
544 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | 544 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
545 | 545 | ||
546 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 546 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
547 | static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | 547 | static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) |
548 | { | 548 | { |
549 | struct pglist_data *pgdat; | 549 | struct pglist_data *pgdat; |
550 | unsigned long zones_size[MAX_NR_ZONES] = {0}; | 550 | unsigned long zones_size[MAX_NR_ZONES] = {0}; |
551 | unsigned long zholes_size[MAX_NR_ZONES] = {0}; | 551 | unsigned long zholes_size[MAX_NR_ZONES] = {0}; |
552 | unsigned long start_pfn = start >> PAGE_SHIFT; | 552 | unsigned long start_pfn = start >> PAGE_SHIFT; |
553 | 553 | ||
554 | pgdat = arch_alloc_nodedata(nid); | 554 | pgdat = arch_alloc_nodedata(nid); |
555 | if (!pgdat) | 555 | if (!pgdat) |
556 | return NULL; | 556 | return NULL; |
557 | 557 | ||
558 | arch_refresh_nodedata(nid, pgdat); | 558 | arch_refresh_nodedata(nid, pgdat); |
559 | 559 | ||
560 | /* we can use NODE_DATA(nid) from here */ | 560 | /* we can use NODE_DATA(nid) from here */ |
561 | 561 | ||
562 | /* init node's zones as empty zones, we don't have any present pages.*/ | 562 | /* init node's zones as empty zones, we don't have any present pages.*/ |
563 | free_area_init_node(nid, zones_size, start_pfn, zholes_size); | 563 | free_area_init_node(nid, zones_size, start_pfn, zholes_size); |
564 | 564 | ||
565 | /* | 565 | /* |
566 | * The node we allocated has no zone fallback lists. For avoiding | 566 | * The node we allocated has no zone fallback lists. For avoiding |
567 | * to access not-initialized zonelist, build here. | 567 | * to access not-initialized zonelist, build here. |
568 | */ | 568 | */ |
569 | mutex_lock(&zonelists_mutex); | 569 | mutex_lock(&zonelists_mutex); |
570 | build_all_zonelists(pgdat, NULL); | 570 | build_all_zonelists(pgdat, NULL); |
571 | mutex_unlock(&zonelists_mutex); | 571 | mutex_unlock(&zonelists_mutex); |
572 | 572 | ||
573 | return pgdat; | 573 | return pgdat; |
574 | } | 574 | } |
575 | 575 | ||
576 | static void rollback_node_hotadd(int nid, pg_data_t *pgdat) | 576 | static void rollback_node_hotadd(int nid, pg_data_t *pgdat) |
577 | { | 577 | { |
578 | arch_refresh_nodedata(nid, NULL); | 578 | arch_refresh_nodedata(nid, NULL); |
579 | arch_free_nodedata(pgdat); | 579 | arch_free_nodedata(pgdat); |
580 | return; | 580 | return; |
581 | } | 581 | } |
582 | 582 | ||
583 | 583 | ||
584 | /* | 584 | /* |
585 | * called by cpu_up() to online a node without onlined memory. | 585 | * called by cpu_up() to online a node without onlined memory. |
586 | */ | 586 | */ |
587 | int mem_online_node(int nid) | 587 | int mem_online_node(int nid) |
588 | { | 588 | { |
589 | pg_data_t *pgdat; | 589 | pg_data_t *pgdat; |
590 | int ret; | 590 | int ret; |
591 | 591 | ||
592 | lock_memory_hotplug(); | 592 | lock_memory_hotplug(); |
593 | pgdat = hotadd_new_pgdat(nid, 0); | 593 | pgdat = hotadd_new_pgdat(nid, 0); |
594 | if (!pgdat) { | 594 | if (!pgdat) { |
595 | ret = -ENOMEM; | 595 | ret = -ENOMEM; |
596 | goto out; | 596 | goto out; |
597 | } | 597 | } |
598 | node_set_online(nid); | 598 | node_set_online(nid); |
599 | ret = register_one_node(nid); | 599 | ret = register_one_node(nid); |
600 | BUG_ON(ret); | 600 | BUG_ON(ret); |
601 | 601 | ||
602 | out: | 602 | out: |
603 | unlock_memory_hotplug(); | 603 | unlock_memory_hotplug(); |
604 | return ret; | 604 | return ret; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ | 607 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
608 | int __ref add_memory(int nid, u64 start, u64 size) | 608 | int __ref add_memory(int nid, u64 start, u64 size) |
609 | { | 609 | { |
610 | pg_data_t *pgdat = NULL; | 610 | pg_data_t *pgdat = NULL; |
611 | int new_pgdat = 0; | 611 | int new_pgdat = 0; |
612 | struct resource *res; | 612 | struct resource *res; |
613 | int ret; | 613 | int ret; |
614 | 614 | ||
615 | lock_memory_hotplug(); | 615 | lock_memory_hotplug(); |
616 | 616 | ||
617 | res = register_memory_resource(start, size); | 617 | res = register_memory_resource(start, size); |
618 | ret = -EEXIST; | 618 | ret = -EEXIST; |
619 | if (!res) | 619 | if (!res) |
620 | goto out; | 620 | goto out; |
621 | 621 | ||
622 | if (!node_online(nid)) { | 622 | if (!node_online(nid)) { |
623 | pgdat = hotadd_new_pgdat(nid, start); | 623 | pgdat = hotadd_new_pgdat(nid, start); |
624 | ret = -ENOMEM; | 624 | ret = -ENOMEM; |
625 | if (!pgdat) | 625 | if (!pgdat) |
626 | goto error; | 626 | goto error; |
627 | new_pgdat = 1; | 627 | new_pgdat = 1; |
628 | } | 628 | } |
629 | 629 | ||
630 | /* call arch's memory hotadd */ | 630 | /* call arch's memory hotadd */ |
631 | ret = arch_add_memory(nid, start, size); | 631 | ret = arch_add_memory(nid, start, size); |
632 | 632 | ||
633 | if (ret < 0) | 633 | if (ret < 0) |
634 | goto error; | 634 | goto error; |
635 | 635 | ||
636 | /* we online node here. we can't roll back from here. */ | 636 | /* we online node here. we can't roll back from here. */ |
637 | node_set_online(nid); | 637 | node_set_online(nid); |
638 | 638 | ||
639 | if (new_pgdat) { | 639 | if (new_pgdat) { |
640 | ret = register_one_node(nid); | 640 | ret = register_one_node(nid); |
641 | /* | 641 | /* |
642 | * If sysfs file of new node can't create, cpu on the node | 642 | * If sysfs file of new node can't create, cpu on the node |
643 | * can't be hot-added. There is no rollback way now. | 643 | * can't be hot-added. There is no rollback way now. |
644 | * So, check by BUG_ON() to catch it reluctantly.. | 644 | * So, check by BUG_ON() to catch it reluctantly.. |
645 | */ | 645 | */ |
646 | BUG_ON(ret); | 646 | BUG_ON(ret); |
647 | } | 647 | } |
648 | 648 | ||
649 | /* create new memmap entry */ | 649 | /* create new memmap entry */ |
650 | firmware_map_add_hotplug(start, start + size, "System RAM"); | 650 | firmware_map_add_hotplug(start, start + size, "System RAM"); |
651 | 651 | ||
652 | goto out; | 652 | goto out; |
653 | 653 | ||
654 | error: | 654 | error: |
655 | /* rollback pgdat allocation and others */ | 655 | /* rollback pgdat allocation and others */ |
656 | if (new_pgdat) | 656 | if (new_pgdat) |
657 | rollback_node_hotadd(nid, pgdat); | 657 | rollback_node_hotadd(nid, pgdat); |
658 | if (res) | 658 | if (res) |
659 | release_memory_resource(res); | 659 | release_memory_resource(res); |
660 | 660 | ||
661 | out: | 661 | out: |
662 | unlock_memory_hotplug(); | 662 | unlock_memory_hotplug(); |
663 | return ret; | 663 | return ret; |
664 | } | 664 | } |
665 | EXPORT_SYMBOL_GPL(add_memory); | 665 | EXPORT_SYMBOL_GPL(add_memory); |
666 | 666 | ||
667 | #ifdef CONFIG_MEMORY_HOTREMOVE | 667 | #ifdef CONFIG_MEMORY_HOTREMOVE |
668 | /* | 668 | /* |
669 | * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy | 669 | * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy |
670 | * set and the size of the free page is given by page_order(). Using this, | 670 | * set and the size of the free page is given by page_order(). Using this, |
671 | * the function determines if the pageblock contains only free pages. | 671 | * the function determines if the pageblock contains only free pages. |
672 | * Due to buddy contraints, a free page at least the size of a pageblock will | 672 | * Due to buddy contraints, a free page at least the size of a pageblock will |
673 | * be located at the start of the pageblock | 673 | * be located at the start of the pageblock |
674 | */ | 674 | */ |
675 | static inline int pageblock_free(struct page *page) | 675 | static inline int pageblock_free(struct page *page) |
676 | { | 676 | { |
677 | return PageBuddy(page) && page_order(page) >= pageblock_order; | 677 | return PageBuddy(page) && page_order(page) >= pageblock_order; |
678 | } | 678 | } |
679 | 679 | ||
680 | /* Return the start of the next active pageblock after a given page */ | 680 | /* Return the start of the next active pageblock after a given page */ |
681 | static struct page *next_active_pageblock(struct page *page) | 681 | static struct page *next_active_pageblock(struct page *page) |
682 | { | 682 | { |
683 | /* Ensure the starting page is pageblock-aligned */ | 683 | /* Ensure the starting page is pageblock-aligned */ |
684 | BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); | 684 | BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); |
685 | 685 | ||
686 | /* If the entire pageblock is free, move to the end of free page */ | 686 | /* If the entire pageblock is free, move to the end of free page */ |
687 | if (pageblock_free(page)) { | 687 | if (pageblock_free(page)) { |
688 | int order; | 688 | int order; |
689 | /* be careful. we don't have locks, page_order can be changed.*/ | 689 | /* be careful. we don't have locks, page_order can be changed.*/ |
690 | order = page_order(page); | 690 | order = page_order(page); |
691 | if ((order < MAX_ORDER) && (order >= pageblock_order)) | 691 | if ((order < MAX_ORDER) && (order >= pageblock_order)) |
692 | return page + (1 << order); | 692 | return page + (1 << order); |
693 | } | 693 | } |
694 | 694 | ||
695 | return page + pageblock_nr_pages; | 695 | return page + pageblock_nr_pages; |
696 | } | 696 | } |
697 | 697 | ||
698 | /* Checks if this range of memory is likely to be hot-removable. */ | 698 | /* Checks if this range of memory is likely to be hot-removable. */ |
699 | int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) | 699 | int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) |
700 | { | 700 | { |
701 | struct page *page = pfn_to_page(start_pfn); | 701 | struct page *page = pfn_to_page(start_pfn); |
702 | struct page *end_page = page + nr_pages; | 702 | struct page *end_page = page + nr_pages; |
703 | 703 | ||
704 | /* Check the starting page of each pageblock within the range */ | 704 | /* Check the starting page of each pageblock within the range */ |
705 | for (; page < end_page; page = next_active_pageblock(page)) { | 705 | for (; page < end_page; page = next_active_pageblock(page)) { |
706 | if (!is_pageblock_removable_nolock(page)) | 706 | if (!is_pageblock_removable_nolock(page)) |
707 | return 0; | 707 | return 0; |
708 | cond_resched(); | 708 | cond_resched(); |
709 | } | 709 | } |
710 | 710 | ||
711 | /* All pageblocks in the memory block are likely to be hot-removable */ | 711 | /* All pageblocks in the memory block are likely to be hot-removable */ |
712 | return 1; | 712 | return 1; |
713 | } | 713 | } |
714 | 714 | ||
715 | /* | 715 | /* |
716 | * Confirm all pages in a range [start, end) is belongs to the same zone. | 716 | * Confirm all pages in a range [start, end) is belongs to the same zone. |
717 | */ | 717 | */ |
718 | static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) | 718 | static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) |
719 | { | 719 | { |
720 | unsigned long pfn; | 720 | unsigned long pfn; |
721 | struct zone *zone = NULL; | 721 | struct zone *zone = NULL; |
722 | struct page *page; | 722 | struct page *page; |
723 | int i; | 723 | int i; |
724 | for (pfn = start_pfn; | 724 | for (pfn = start_pfn; |
725 | pfn < end_pfn; | 725 | pfn < end_pfn; |
726 | pfn += MAX_ORDER_NR_PAGES) { | 726 | pfn += MAX_ORDER_NR_PAGES) { |
727 | i = 0; | 727 | i = 0; |
728 | /* This is just a CONFIG_HOLES_IN_ZONE check.*/ | 728 | /* This is just a CONFIG_HOLES_IN_ZONE check.*/ |
729 | while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i)) | 729 | while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i)) |
730 | i++; | 730 | i++; |
731 | if (i == MAX_ORDER_NR_PAGES) | 731 | if (i == MAX_ORDER_NR_PAGES) |
732 | continue; | 732 | continue; |
733 | page = pfn_to_page(pfn + i); | 733 | page = pfn_to_page(pfn + i); |
734 | if (zone && page_zone(page) != zone) | 734 | if (zone && page_zone(page) != zone) |
735 | return 0; | 735 | return 0; |
736 | zone = page_zone(page); | 736 | zone = page_zone(page); |
737 | } | 737 | } |
738 | return 1; | 738 | return 1; |
739 | } | 739 | } |
740 | 740 | ||
741 | /* | 741 | /* |
742 | * Scanning pfn is much easier than scanning lru list. | 742 | * Scanning pfn is much easier than scanning lru list. |
743 | * Scan pfn from start to end and Find LRU page. | 743 | * Scan pfn from start to end and Find LRU page. |
744 | */ | 744 | */ |
745 | static unsigned long scan_lru_pages(unsigned long start, unsigned long end) | 745 | static unsigned long scan_lru_pages(unsigned long start, unsigned long end) |
746 | { | 746 | { |
747 | unsigned long pfn; | 747 | unsigned long pfn; |
748 | struct page *page; | 748 | struct page *page; |
749 | for (pfn = start; pfn < end; pfn++) { | 749 | for (pfn = start; pfn < end; pfn++) { |
750 | if (pfn_valid(pfn)) { | 750 | if (pfn_valid(pfn)) { |
751 | page = pfn_to_page(pfn); | 751 | page = pfn_to_page(pfn); |
752 | if (PageLRU(page)) | 752 | if (PageLRU(page)) |
753 | return pfn; | 753 | return pfn; |
754 | } | 754 | } |
755 | } | 755 | } |
756 | return 0; | 756 | return 0; |
757 | } | 757 | } |
758 | 758 | ||
759 | static struct page * | 759 | static struct page * |
760 | hotremove_migrate_alloc(struct page *page, unsigned long private, int **x) | 760 | hotremove_migrate_alloc(struct page *page, unsigned long private, int **x) |
761 | { | 761 | { |
762 | /* This should be improooooved!! */ | 762 | /* This should be improooooved!! */ |
763 | return alloc_page(GFP_HIGHUSER_MOVABLE); | 763 | return alloc_page(GFP_HIGHUSER_MOVABLE); |
764 | } | 764 | } |
765 | 765 | ||
766 | #define NR_OFFLINE_AT_ONCE_PAGES (256) | 766 | #define NR_OFFLINE_AT_ONCE_PAGES (256) |
767 | static int | 767 | static int |
768 | do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | 768 | do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) |
769 | { | 769 | { |
770 | unsigned long pfn; | 770 | unsigned long pfn; |
771 | struct page *page; | 771 | struct page *page; |
772 | int move_pages = NR_OFFLINE_AT_ONCE_PAGES; | 772 | int move_pages = NR_OFFLINE_AT_ONCE_PAGES; |
773 | int not_managed = 0; | 773 | int not_managed = 0; |
774 | int ret = 0; | 774 | int ret = 0; |
775 | LIST_HEAD(source); | 775 | LIST_HEAD(source); |
776 | 776 | ||
777 | for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) { | 777 | for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) { |
778 | if (!pfn_valid(pfn)) | 778 | if (!pfn_valid(pfn)) |
779 | continue; | 779 | continue; |
780 | page = pfn_to_page(pfn); | 780 | page = pfn_to_page(pfn); |
781 | if (!get_page_unless_zero(page)) | 781 | if (!get_page_unless_zero(page)) |
782 | continue; | 782 | continue; |
783 | /* | 783 | /* |
784 | * We can skip free pages. And we can only deal with pages on | 784 | * We can skip free pages. And we can only deal with pages on |
785 | * LRU. | 785 | * LRU. |
786 | */ | 786 | */ |
787 | ret = isolate_lru_page(page); | 787 | ret = isolate_lru_page(page); |
788 | if (!ret) { /* Success */ | 788 | if (!ret) { /* Success */ |
789 | put_page(page); | 789 | put_page(page); |
790 | list_add_tail(&page->lru, &source); | 790 | list_add_tail(&page->lru, &source); |
791 | move_pages--; | 791 | move_pages--; |
792 | inc_zone_page_state(page, NR_ISOLATED_ANON + | 792 | inc_zone_page_state(page, NR_ISOLATED_ANON + |
793 | page_is_file_cache(page)); | 793 | page_is_file_cache(page)); |
794 | 794 | ||
795 | } else { | 795 | } else { |
796 | #ifdef CONFIG_DEBUG_VM | 796 | #ifdef CONFIG_DEBUG_VM |
797 | printk(KERN_ALERT "removing pfn %lx from LRU failed\n", | 797 | printk(KERN_ALERT "removing pfn %lx from LRU failed\n", |
798 | pfn); | 798 | pfn); |
799 | dump_page(page); | 799 | dump_page(page); |
800 | #endif | 800 | #endif |
801 | put_page(page); | 801 | put_page(page); |
802 | /* Because we don't have big zone->lock. we should | 802 | /* Because we don't have big zone->lock. we should |
803 | check this again here. */ | 803 | check this again here. */ |
804 | if (page_count(page)) { | 804 | if (page_count(page)) { |
805 | not_managed++; | 805 | not_managed++; |
806 | ret = -EBUSY; | 806 | ret = -EBUSY; |
807 | break; | 807 | break; |
808 | } | 808 | } |
809 | } | 809 | } |
810 | } | 810 | } |
811 | if (!list_empty(&source)) { | 811 | if (!list_empty(&source)) { |
812 | if (not_managed) { | 812 | if (not_managed) { |
813 | putback_lru_pages(&source); | 813 | putback_lru_pages(&source); |
814 | goto out; | 814 | goto out; |
815 | } | 815 | } |
816 | /* this function returns # of failed pages */ | 816 | /* this function returns # of failed pages */ |
817 | ret = migrate_pages(&source, hotremove_migrate_alloc, 0, | 817 | ret = migrate_pages(&source, hotremove_migrate_alloc, 0, |
818 | true, MIGRATE_SYNC); | 818 | true, MIGRATE_SYNC); |
819 | if (ret) | 819 | if (ret) |
820 | putback_lru_pages(&source); | 820 | putback_lru_pages(&source); |
821 | } | 821 | } |
822 | out: | 822 | out: |
823 | return ret; | 823 | return ret; |
824 | } | 824 | } |
825 | 825 | ||
826 | /* | 826 | /* |
827 | * remove from free_area[] and mark all as Reserved. | 827 | * remove from free_area[] and mark all as Reserved. |
828 | */ | 828 | */ |
829 | static int | 829 | static int |
830 | offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages, | 830 | offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages, |
831 | void *data) | 831 | void *data) |
832 | { | 832 | { |
833 | __offline_isolated_pages(start, start + nr_pages); | 833 | __offline_isolated_pages(start, start + nr_pages); |
834 | return 0; | 834 | return 0; |
835 | } | 835 | } |
836 | 836 | ||
837 | static void | 837 | static void |
838 | offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | 838 | offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) |
839 | { | 839 | { |
840 | walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL, | 840 | walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL, |
841 | offline_isolated_pages_cb); | 841 | offline_isolated_pages_cb); |
842 | } | 842 | } |
843 | 843 | ||
844 | /* | 844 | /* |
845 | * Check all pages in range, recoreded as memory resource, are isolated. | 845 | * Check all pages in range, recoreded as memory resource, are isolated. |
846 | */ | 846 | */ |
847 | static int | 847 | static int |
848 | check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, | 848 | check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, |
849 | void *data) | 849 | void *data) |
850 | { | 850 | { |
851 | int ret; | 851 | int ret; |
852 | long offlined = *(long *)data; | 852 | long offlined = *(long *)data; |
853 | ret = test_pages_isolated(start_pfn, start_pfn + nr_pages); | 853 | ret = test_pages_isolated(start_pfn, start_pfn + nr_pages); |
854 | offlined = nr_pages; | 854 | offlined = nr_pages; |
855 | if (!ret) | 855 | if (!ret) |
856 | *(long *)data += offlined; | 856 | *(long *)data += offlined; |
857 | return ret; | 857 | return ret; |
858 | } | 858 | } |
859 | 859 | ||
860 | static long | 860 | static long |
861 | check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) | 861 | check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) |
862 | { | 862 | { |
863 | long offlined = 0; | 863 | long offlined = 0; |
864 | int ret; | 864 | int ret; |
865 | 865 | ||
866 | ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined, | 866 | ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined, |
867 | check_pages_isolated_cb); | 867 | check_pages_isolated_cb); |
868 | if (ret < 0) | 868 | if (ret < 0) |
869 | offlined = (long)ret; | 869 | offlined = (long)ret; |
870 | return offlined; | 870 | return offlined; |
871 | } | 871 | } |
872 | 872 | ||
873 | static int __ref offline_pages(unsigned long start_pfn, | 873 | static int __ref offline_pages(unsigned long start_pfn, |
874 | unsigned long end_pfn, unsigned long timeout) | 874 | unsigned long end_pfn, unsigned long timeout) |
875 | { | 875 | { |
876 | unsigned long pfn, nr_pages, expire; | 876 | unsigned long pfn, nr_pages, expire; |
877 | long offlined_pages; | 877 | long offlined_pages; |
878 | int ret, drain, retry_max, node; | 878 | int ret, drain, retry_max, node; |
879 | struct zone *zone; | 879 | struct zone *zone; |
880 | struct memory_notify arg; | 880 | struct memory_notify arg; |
881 | 881 | ||
882 | BUG_ON(start_pfn >= end_pfn); | 882 | BUG_ON(start_pfn >= end_pfn); |
883 | /* at least, alignment against pageblock is necessary */ | 883 | /* at least, alignment against pageblock is necessary */ |
884 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) | 884 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) |
885 | return -EINVAL; | 885 | return -EINVAL; |
886 | if (!IS_ALIGNED(end_pfn, pageblock_nr_pages)) | 886 | if (!IS_ALIGNED(end_pfn, pageblock_nr_pages)) |
887 | return -EINVAL; | 887 | return -EINVAL; |
888 | /* This makes hotplug much easier...and readable. | 888 | /* This makes hotplug much easier...and readable. |
889 | we assume this for now. .*/ | 889 | we assume this for now. .*/ |
890 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 890 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
891 | return -EINVAL; | 891 | return -EINVAL; |
892 | 892 | ||
893 | lock_memory_hotplug(); | 893 | lock_memory_hotplug(); |
894 | 894 | ||
895 | zone = page_zone(pfn_to_page(start_pfn)); | 895 | zone = page_zone(pfn_to_page(start_pfn)); |
896 | node = zone_to_nid(zone); | 896 | node = zone_to_nid(zone); |
897 | nr_pages = end_pfn - start_pfn; | 897 | nr_pages = end_pfn - start_pfn; |
898 | 898 | ||
899 | /* set above range as isolated */ | 899 | /* set above range as isolated */ |
900 | ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); | 900 | ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
901 | if (ret) | 901 | if (ret) |
902 | goto out; | 902 | goto out; |
903 | 903 | ||
904 | arg.start_pfn = start_pfn; | 904 | arg.start_pfn = start_pfn; |
905 | arg.nr_pages = nr_pages; | 905 | arg.nr_pages = nr_pages; |
906 | arg.status_change_nid = -1; | 906 | arg.status_change_nid = -1; |
907 | if (nr_pages >= node_present_pages(node)) | 907 | if (nr_pages >= node_present_pages(node)) |
908 | arg.status_change_nid = node; | 908 | arg.status_change_nid = node; |
909 | 909 | ||
910 | ret = memory_notify(MEM_GOING_OFFLINE, &arg); | 910 | ret = memory_notify(MEM_GOING_OFFLINE, &arg); |
911 | ret = notifier_to_errno(ret); | 911 | ret = notifier_to_errno(ret); |
912 | if (ret) | 912 | if (ret) |
913 | goto failed_removal; | 913 | goto failed_removal; |
914 | 914 | ||
915 | pfn = start_pfn; | 915 | pfn = start_pfn; |
916 | expire = jiffies + timeout; | 916 | expire = jiffies + timeout; |
917 | drain = 0; | 917 | drain = 0; |
918 | retry_max = 5; | 918 | retry_max = 5; |
919 | repeat: | 919 | repeat: |
920 | /* start memory hot removal */ | 920 | /* start memory hot removal */ |
921 | ret = -EAGAIN; | 921 | ret = -EAGAIN; |
922 | if (time_after(jiffies, expire)) | 922 | if (time_after(jiffies, expire)) |
923 | goto failed_removal; | 923 | goto failed_removal; |
924 | ret = -EINTR; | 924 | ret = -EINTR; |
925 | if (signal_pending(current)) | 925 | if (signal_pending(current)) |
926 | goto failed_removal; | 926 | goto failed_removal; |
927 | ret = 0; | 927 | ret = 0; |
928 | if (drain) { | 928 | if (drain) { |
929 | lru_add_drain_all(); | 929 | lru_add_drain_all(); |
930 | cond_resched(); | 930 | cond_resched(); |
931 | drain_all_pages(); | 931 | drain_all_pages(); |
932 | } | 932 | } |
933 | 933 | ||
934 | pfn = scan_lru_pages(start_pfn, end_pfn); | 934 | pfn = scan_lru_pages(start_pfn, end_pfn); |
935 | if (pfn) { /* We have page on LRU */ | 935 | if (pfn) { /* We have page on LRU */ |
936 | ret = do_migrate_range(pfn, end_pfn); | 936 | ret = do_migrate_range(pfn, end_pfn); |
937 | if (!ret) { | 937 | if (!ret) { |
938 | drain = 1; | 938 | drain = 1; |
939 | goto repeat; | 939 | goto repeat; |
940 | } else { | 940 | } else { |
941 | if (ret < 0) | 941 | if (ret < 0) |
942 | if (--retry_max == 0) | 942 | if (--retry_max == 0) |
943 | goto failed_removal; | 943 | goto failed_removal; |
944 | yield(); | 944 | yield(); |
945 | drain = 1; | 945 | drain = 1; |
946 | goto repeat; | 946 | goto repeat; |
947 | } | 947 | } |
948 | } | 948 | } |
949 | /* drain all zone's lru pagevec, this is asyncronous... */ | 949 | /* drain all zone's lru pagevec, this is asyncronous... */ |
950 | lru_add_drain_all(); | 950 | lru_add_drain_all(); |
951 | yield(); | 951 | yield(); |
952 | /* drain pcp pages , this is synchrouns. */ | 952 | /* drain pcp pages , this is synchrouns. */ |
953 | drain_all_pages(); | 953 | drain_all_pages(); |
954 | /* check again */ | 954 | /* check again */ |
955 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | 955 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); |
956 | if (offlined_pages < 0) { | 956 | if (offlined_pages < 0) { |
957 | ret = -EBUSY; | 957 | ret = -EBUSY; |
958 | goto failed_removal; | 958 | goto failed_removal; |
959 | } | 959 | } |
960 | printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages); | 960 | printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages); |
961 | /* Ok, all of our target is islaoted. | 961 | /* Ok, all of our target is islaoted. |
962 | We cannot do rollback at this point. */ | 962 | We cannot do rollback at this point. */ |
963 | offline_isolated_pages(start_pfn, end_pfn); | 963 | offline_isolated_pages(start_pfn, end_pfn); |
964 | /* reset pagetype flags and makes migrate type to be MOVABLE */ | 964 | /* reset pagetype flags and makes migrate type to be MOVABLE */ |
965 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); | 965 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
966 | /* removal success */ | 966 | /* removal success */ |
967 | zone->present_pages -= offlined_pages; | 967 | zone->present_pages -= offlined_pages; |
968 | zone->zone_pgdat->node_present_pages -= offlined_pages; | 968 | zone->zone_pgdat->node_present_pages -= offlined_pages; |
969 | totalram_pages -= offlined_pages; | 969 | totalram_pages -= offlined_pages; |
970 | 970 | ||
971 | init_per_zone_wmark_min(); | 971 | init_per_zone_wmark_min(); |
972 | 972 | ||
973 | if (!populated_zone(zone)) | 973 | if (!populated_zone(zone)) { |
974 | zone_pcp_reset(zone); | 974 | zone_pcp_reset(zone); |
975 | mutex_lock(&zonelists_mutex); | ||
976 | build_all_zonelists(NULL, NULL); | ||
977 | mutex_unlock(&zonelists_mutex); | ||
978 | } else | ||
979 | zone_pcp_update(zone); | ||
975 | 980 | ||
976 | if (!node_present_pages(node)) { | 981 | if (!node_present_pages(node)) { |
977 | node_clear_state(node, N_HIGH_MEMORY); | 982 | node_clear_state(node, N_HIGH_MEMORY); |
978 | kswapd_stop(node); | 983 | kswapd_stop(node); |
979 | } | 984 | } |
980 | 985 | ||
981 | vm_total_pages = nr_free_pagecache_pages(); | 986 | vm_total_pages = nr_free_pagecache_pages(); |
982 | writeback_set_ratelimit(); | 987 | writeback_set_ratelimit(); |
983 | 988 | ||
984 | memory_notify(MEM_OFFLINE, &arg); | 989 | memory_notify(MEM_OFFLINE, &arg); |
985 | unlock_memory_hotplug(); | 990 | unlock_memory_hotplug(); |
986 | return 0; | 991 | return 0; |
987 | 992 | ||
988 | failed_removal: | 993 | failed_removal: |
989 | printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n", | 994 | printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n", |
990 | (unsigned long long) start_pfn << PAGE_SHIFT, | 995 | (unsigned long long) start_pfn << PAGE_SHIFT, |
991 | ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); | 996 | ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); |
992 | memory_notify(MEM_CANCEL_OFFLINE, &arg); | 997 | memory_notify(MEM_CANCEL_OFFLINE, &arg); |
993 | /* pushback to free area */ | 998 | /* pushback to free area */ |
994 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); | 999 | undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); |
995 | 1000 | ||
996 | out: | 1001 | out: |
997 | unlock_memory_hotplug(); | 1002 | unlock_memory_hotplug(); |
998 | return ret; | 1003 | return ret; |
999 | } | 1004 | } |
1000 | 1005 | ||
1001 | int remove_memory(u64 start, u64 size) | 1006 | int remove_memory(u64 start, u64 size) |
1002 | { | 1007 | { |
1003 | unsigned long start_pfn, end_pfn; | 1008 | unsigned long start_pfn, end_pfn; |
1004 | 1009 | ||
1005 | start_pfn = PFN_DOWN(start); | 1010 | start_pfn = PFN_DOWN(start); |
1006 | end_pfn = start_pfn + PFN_DOWN(size); | 1011 | end_pfn = start_pfn + PFN_DOWN(size); |
1007 | return offline_pages(start_pfn, end_pfn, 120 * HZ); | 1012 | return offline_pages(start_pfn, end_pfn, 120 * HZ); |
1008 | } | 1013 | } |
1009 | #else | 1014 | #else |
1010 | int remove_memory(u64 start, u64 size) | 1015 | int remove_memory(u64 start, u64 size) |
1011 | { | 1016 | { |
1012 | return -EINVAL; | 1017 | return -EINVAL; |
1013 | } | 1018 | } |
1014 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 1019 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
1015 | EXPORT_SYMBOL_GPL(remove_memory); | 1020 | EXPORT_SYMBOL_GPL(remove_memory); |
1016 | 1021 |