Blame view

mm/memory_hotplug.c 47.3 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
3947be196   Dave Hansen   [PATCH] memory ho...
2
3
4
5
6
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
7
8
  #include <linux/stddef.h>
  #include <linux/mm.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
9
  #include <linux/sched/signal.h>
3947be196   Dave Hansen   [PATCH] memory ho...
10
11
12
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
13
  #include <linux/compiler.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
14
  #include <linux/export.h>
3947be196   Dave Hansen   [PATCH] memory ho...
15
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
16
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
17
18
19
20
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
4b94ffdc4   Dan Williams   x86, mm: introduc...
21
  #include <linux/memremap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
22
23
24
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
25
  #include <linux/ioport.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
26
27
28
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
71088785c   Badari Pulavarty   mm: cleanup to ma...
29
  #include <linux/pfn.h>
6ad696d2c   Andi Kleen   mm: allow memory ...
30
  #include <linux/suspend.h>
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
31
  #include <linux/mm_inline.h>
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
32
  #include <linux/firmware-map.h>
60a5a19e7   Tang Chen   memory-hotplug: r...
33
  #include <linux/stop_machine.h>
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
34
  #include <linux/hugetlb.h>
c5320926e   Tang Chen   mem-hotplug: intr...
35
  #include <linux/memblock.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
36
  #include <linux/compaction.h>
b15c87263   Michal Hocko   hwpoison, memory_...
37
  #include <linux/rmap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
38
39
  
  #include <asm/tlbflush.h>
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
40
  #include "internal.h"
e900a918b   Dan Williams   mm: shuffle initi...
41
  #include "shuffle.h"
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
42

9d0ad8ca4   Daniel Kiper   mm: extend memory...
43
44
45
46
47
48
  /*
   * online_page_callback contains pointer to current page onlining function.
   * Initially it is generic_online_page(). If it is required it could be
   * changed by calling set_online_page_callback() for callback registration
   * and restore_online_page_callback() for generic callback restore.
   */
a9cd410a3   Arun KS   mm/page_alloc.c: ...
49
  static void generic_online_page(struct page *page, unsigned int order);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
50
51
  
  static online_page_callback_t online_page_callback = generic_online_page;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
52
  static DEFINE_MUTEX(online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
53

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
54
  DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
55

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
56
57
58
59
  void get_online_mems(void)
  {
  	percpu_down_read(&mem_hotplug_lock);
  }
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
60

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
61
62
63
64
  void put_online_mems(void)
  {
  	percpu_up_read(&mem_hotplug_lock);
  }
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
65

4932381ee   Michal Hocko   mm, memory_hotplu...
66
  bool movable_node_enabled = false;
8604d9e53   Vitaly Kuznetsov   memory_hotplug: i...
67
  #ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
68
  bool memhp_auto_online;
8604d9e53   Vitaly Kuznetsov   memory_hotplug: i...
69
70
71
  #else
  bool memhp_auto_online = true;
  #endif
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
72
  EXPORT_SYMBOL_GPL(memhp_auto_online);
86dd995d6   Vitaly Kuznetsov   memory_hotplug: i...
73
74
75
76
77
78
79
80
81
82
  static int __init setup_memhp_default_state(char *str)
  {
  	if (!strcmp(str, "online"))
  		memhp_auto_online = true;
  	else if (!strcmp(str, "offline"))
  		memhp_auto_online = false;
  
  	return 1;
  }
  __setup("memhp_default_state=", setup_memhp_default_state);
30467e0b3   David Rientjes   mm, hotplug: fix ...
83
  void mem_hotplug_begin(void)
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
84
  {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
85
86
  	cpus_read_lock();
  	percpu_down_write(&mem_hotplug_lock);
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
87
  }
30467e0b3   David Rientjes   mm, hotplug: fix ...
88
  void mem_hotplug_done(void)
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
89
  {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
90
91
  	percpu_up_write(&mem_hotplug_lock);
  	cpus_read_unlock();
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
92
  }
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
93

357b4da50   Juergen Gross   x86: respect memo...
94
  u64 max_mem_size = U64_MAX;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
95
96
97
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
2794129e9   Dave Hansen   mm/memory-hotplug...
98
99
100
  	struct resource *res;
  	unsigned long flags =  IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
  	char *resource_name = "System RAM";
357b4da50   Juergen Gross   x86: respect memo...
101
102
103
  
  	if (start + size > max_mem_size)
  		return ERR_PTR(-E2BIG);
2794129e9   Dave Hansen   mm/memory-hotplug...
104
105
106
107
108
109
110
111
112
113
114
115
  	/*
  	 * Request ownership of the new memory range.  This might be
  	 * a child of an existing resource that was present but
  	 * not marked as busy.
  	 */
  	res = __request_region(&iomem_resource, start, size,
  			       resource_name, flags);
  
  	if (!res) {
  		pr_debug("Unable to reserve System RAM region: %016llx->%016llx
  ",
  				start, start + size);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
116
  		return ERR_PTR(-EEXIST);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
117
118
119
120
121
122
123
124
125
126
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
127
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
128
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
129
130
  void get_page_bootmem(unsigned long info,  struct page *page,
  		      unsigned long type)
047532787   Yasunori Goto   memory hotplug: r...
131
  {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
132
  	page->freelist = (void *)type;
047532787   Yasunori Goto   memory hotplug: r...
133
134
  	SetPagePrivate(page);
  	set_page_private(page, info);
fe896d187   Joonsoo Kim   mm: introduce pag...
135
  	page_ref_inc(page);
047532787   Yasunori Goto   memory hotplug: r...
136
  }
170a5a7eb   Jiang Liu   mm: make __free_p...
137
  void put_page_bootmem(struct page *page)
047532787   Yasunori Goto   memory hotplug: r...
138
  {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
139
  	unsigned long type;
047532787   Yasunori Goto   memory hotplug: r...
140

ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
141
  	type = (unsigned long) page->freelist;
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
142
143
  	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
  	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
047532787   Yasunori Goto   memory hotplug: r...
144

fe896d187   Joonsoo Kim   mm: introduce pag...
145
  	if (page_ref_dec_return(page) == 1) {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
146
  		page->freelist = NULL;
047532787   Yasunori Goto   memory hotplug: r...
147
148
  		ClearPagePrivate(page);
  		set_page_private(page, 0);
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
149
  		INIT_LIST_HEAD(&page->lru);
170a5a7eb   Jiang Liu   mm: make __free_p...
150
  		free_reserved_page(page);
047532787   Yasunori Goto   memory hotplug: r...
151
  	}
047532787   Yasunori Goto   memory hotplug: r...
152
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
153
154
  #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
d92bc3185   Adrian Bunk   mm: make register...
155
  static void register_page_bootmem_info_section(unsigned long start_pfn)
047532787   Yasunori Goto   memory hotplug: r...
156
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
157
  	unsigned long mapsize, section_nr, i;
047532787   Yasunori Goto   memory hotplug: r...
158
159
  	struct mem_section *ms;
  	struct page *page, *memmap;
f1eca35a0   Dan Williams   mm/sparsemem: int...
160
  	struct mem_section_usage *usage;
047532787   Yasunori Goto   memory hotplug: r...
161

047532787   Yasunori Goto   memory hotplug: r...
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	/* Get section's memmap address */
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	/*
  	 * Get page for the memmap's phys address
  	 * XXX: need more consideration for sparse_vmemmap...
  	 */
  	page = virt_to_page(memmap);
  	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
  	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
  
  	/* remember memmap's page */
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, SECTION_INFO);
f1eca35a0   Dan Williams   mm/sparsemem: int...
179
180
  	usage = ms->usage;
  	page = virt_to_page(usage);
047532787   Yasunori Goto   memory hotplug: r...
181

f1eca35a0   Dan Williams   mm/sparsemem: int...
182
  	mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT;
047532787   Yasunori Goto   memory hotplug: r...
183
184
  
  	for (i = 0; i < mapsize; i++, page++)
af370fb8c   Yasunori Goto   memory hotplug: s...
185
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
047532787   Yasunori Goto   memory hotplug: r...
186
187
  
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
188
189
190
  #else /* CONFIG_SPARSEMEM_VMEMMAP */
  static void register_page_bootmem_info_section(unsigned long start_pfn)
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
191
  	unsigned long mapsize, section_nr, i;
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
192
193
  	struct mem_section *ms;
  	struct page *page, *memmap;
f1eca35a0   Dan Williams   mm/sparsemem: int...
194
  	struct mem_section_usage *usage;
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
195

46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
196
197
198
199
200
201
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
f1eca35a0   Dan Williams   mm/sparsemem: int...
202
203
  	usage = ms->usage;
  	page = virt_to_page(usage);
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
204

f1eca35a0   Dan Williams   mm/sparsemem: int...
205
  	mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT;
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
206
207
208
209
210
  
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
047532787   Yasunori Goto   memory hotplug: r...
211

7ded384a1   Linus Torvalds   mm: fix section m...
212
  void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
047532787   Yasunori Goto   memory hotplug: r...
213
214
215
216
  {
  	unsigned long i, pfn, end_pfn, nr_pages;
  	int node = pgdat->node_id;
  	struct page *page;
047532787   Yasunori Goto   memory hotplug: r...
217
218
219
220
221
222
  
  	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
  	page = virt_to_page(pgdat);
  
  	for (i = 0; i < nr_pages; i++, page++)
  		get_page_bootmem(node, page, NODE_INFO);
047532787   Yasunori Goto   memory hotplug: r...
223
  	pfn = pgdat->node_start_pfn;
c1f194952   Cody P Schafer   mm/memory_hotplug...
224
  	end_pfn = pgdat_end_pfn(pgdat);
047532787   Yasunori Goto   memory hotplug: r...
225

7e9f5eb03   Tang Chen   mm/memory_hotplug...
226
  	/* register section info */
f14851af0   qiuxishi   memory hotplug: f...
227
228
229
230
231
  	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		/*
  		 * Some platforms can assign the same pfn to multiple nodes - on
  		 * node0 as well as nodeN.  To avoid registering a pfn against
  		 * multiple nodes we check that this pfn does not already
7e9f5eb03   Tang Chen   mm/memory_hotplug...
232
  		 * reside in some other nodes.
f14851af0   qiuxishi   memory hotplug: f...
233
  		 */
f65e91df2   Yang Shi   mm: use early_pfn...
234
  		if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
f14851af0   qiuxishi   memory hotplug: f...
235
236
  			register_page_bootmem_info_section(pfn);
  	}
047532787   Yasunori Goto   memory hotplug: r...
237
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
238
  #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
047532787   Yasunori Goto   memory hotplug: r...
239

7ea621604   Dan Williams   mm/sparsemem: pre...
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
  static int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
  		const char *reason)
  {
  	/*
  	 * Disallow all operations smaller than a sub-section and only
  	 * allow operations smaller than a section for
  	 * SPARSEMEM_VMEMMAP. Note that check_hotplug_memory_range()
  	 * enforces a larger memory_block_size_bytes() granularity for
  	 * memory that will be marked online, so this check should only
  	 * fire for direct arch_{add,remove}_memory() users outside of
  	 * add_memory_resource().
  	 */
  	unsigned long min_align;
  
  	if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
  		min_align = PAGES_PER_SUBSECTION;
  	else
  		min_align = PAGES_PER_SECTION;
  	if (!IS_ALIGNED(pfn, min_align)
  			|| !IS_ALIGNED(nr_pages, min_align)) {
  		WARN(1, "Misaligned __%s_pages start: %#lx end: #%lx
  ",
  				reason, pfn, pfn + nr_pages - 1);
  		return -EINVAL;
  	}
  	return 0;
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
267
268
269
270
271
272
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
7ea621604   Dan Williams   mm/sparsemem: pre...
273
274
  int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
  		struct mhp_restrictions *restrictions)
4edd7ceff   David Rientjes   mm, hotplug: avoi...
275
  {
9a8450304   Dan Williams   mm/sparsemem: cle...
276
277
  	int err;
  	unsigned long nr, start_sec, end_sec;
940519f0c   Michal Hocko   mm, memory_hotplu...
278
  	struct vmem_altmap *altmap = restrictions->altmap;
4b94ffdc4   Dan Williams   x86, mm: introduc...
279

4b94ffdc4   Dan Williams   x86, mm: introduc...
280
281
282
283
  	if (altmap) {
  		/*
  		 * Validate altmap is within bounds of the total request
  		 */
7ea621604   Dan Williams   mm/sparsemem: pre...
284
  		if (altmap->base_pfn != pfn
4b94ffdc4   Dan Williams   x86, mm: introduc...
285
286
287
  				|| vmem_altmap_offset(altmap) > nr_pages) {
  			pr_warn_once("memory add fail, invalid altmap
  ");
7ea621604   Dan Williams   mm/sparsemem: pre...
288
  			return -EINVAL;
4b94ffdc4   Dan Williams   x86, mm: introduc...
289
290
291
  		}
  		altmap->alloc = 0;
  	}
7ea621604   Dan Williams   mm/sparsemem: pre...
292
293
294
295
296
297
  	err = check_pfn_span(pfn, nr_pages, "add");
  	if (err)
  		return err;
  
  	start_sec = pfn_to_section_nr(pfn);
  	end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
9a8450304   Dan Williams   mm/sparsemem: cle...
298
  	for (nr = start_sec; nr <= end_sec; nr++) {
7ea621604   Dan Williams   mm/sparsemem: pre...
299
300
301
302
  		unsigned long pfns;
  
  		pfns = min(nr_pages, PAGES_PER_SECTION
  				- (pfn & ~PAGE_SECTION_MASK));
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
303
304
305
  		err = sparse_add_section(nid, pfn, pfns, altmap);
  		if (err)
  			break;
7ea621604   Dan Williams   mm/sparsemem: pre...
306
307
  		pfn += pfns;
  		nr_pages -= pfns;
f64ac5e6e   Michal Hocko   mm, memory_hotplu...
308
  		cond_resched();
4edd7ceff   David Rientjes   mm, hotplug: avoi...
309
  	}
c435a3905   Zhu Guihua   mm/memory hotplug...
310
  	vmemmap_populate_print_last();
4edd7ceff   David Rientjes   mm, hotplug: avoi...
311
312
  	return err;
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
313

815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
314
  /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
d09b0137d   YASUAKI ISHIMATSU   mm/memory_hotplug...
315
  static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
316
317
318
  				     unsigned long start_pfn,
  				     unsigned long end_pfn)
  {
49ba3c6b3   Dan Williams   mm/hotplug: prepa...
319
  	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SUBSECTION) {
7ce700bf1   David Hildenbrand   mm/memory_hotplug...
320
  		if (unlikely(!pfn_to_online_page(start_pfn)))
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
  			continue;
  
  		if (unlikely(pfn_to_nid(start_pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(start_pfn)))
  			continue;
  
  		return start_pfn;
  	}
  
  	return 0;
  }
  
  /* find the biggest valid pfn in the range [start_pfn, end_pfn). */
d09b0137d   YASUAKI ISHIMATSU   mm/memory_hotplug...
336
  static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
337
338
339
  				    unsigned long start_pfn,
  				    unsigned long end_pfn)
  {
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
340
341
342
343
  	unsigned long pfn;
  
  	/* pfn is the end pfn of a memory section. */
  	pfn = end_pfn - 1;
49ba3c6b3   Dan Williams   mm/hotplug: prepa...
344
  	for (; pfn >= start_pfn; pfn -= PAGES_PER_SUBSECTION) {
7ce700bf1   David Hildenbrand   mm/memory_hotplug...
345
  		if (unlikely(!pfn_to_online_page(pfn)))
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  			continue;
  
  		if (unlikely(pfn_to_nid(pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(pfn)))
  			continue;
  
  		return pfn;
  	}
  
  	return 0;
  }
  
  static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
  			     unsigned long end_pfn)
  {
c33bc315f   Xishi Qiu   mm: use zone_end_...
363
364
365
  	unsigned long zone_start_pfn = zone->zone_start_pfn;
  	unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
  	unsigned long zone_end_pfn = z;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
366
  	unsigned long pfn;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  	int nid = zone_to_nid(zone);
  
  	zone_span_writelock(zone);
  	if (zone_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the zone, it need
  		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, zone, end_pfn,
  						zone_end_pfn);
  		if (pfn) {
  			zone->zone_start_pfn = pfn;
  			zone->spanned_pages = zone_end_pfn - pfn;
  		}
  	} else if (zone_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the zone, it need
  		 * shrink zone->spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
  					       start_pfn);
  		if (pfn)
  			zone->spanned_pages = pfn - zone_start_pfn + 1;
  	}
  
  	/*
  	 * The section is not biggest or smallest mem_section in the zone, it
  	 * only creates a hole in the zone. So in this case, we need not
  	 * change the zone. But perhaps, the zone has only hole data. Thus
  	 * it check the zone has only hole or not.
  	 */
  	pfn = zone_start_pfn;
49ba3c6b3   Dan Williams   mm/hotplug: prepa...
403
  	for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) {
7ce700bf1   David Hildenbrand   mm/memory_hotplug...
404
  		if (unlikely(!pfn_to_online_page(pfn)))
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
405
406
407
408
  			continue;
  
  		if (page_zone(pfn_to_page(pfn)) != zone)
  			continue;
49ba3c6b3   Dan Williams   mm/hotplug: prepa...
409
410
  		/* Skip range to be removed */
  		if (pfn >= start_pfn && pfn < end_pfn)
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
411
412
413
414
415
416
417
418
419
420
421
422
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		zone_span_writeunlock(zone);
  		return;
  	}
  
  	/* The zone has no valid section */
  	zone->zone_start_pfn = 0;
  	zone->spanned_pages = 0;
  	zone_span_writeunlock(zone);
  }
00d6c019b   David Hildenbrand   mm/memory_hotplug...
423
  static void update_pgdat_span(struct pglist_data *pgdat)
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
424
  {
00d6c019b   David Hildenbrand   mm/memory_hotplug...
425
426
427
428
429
430
431
432
433
  	unsigned long node_start_pfn = 0, node_end_pfn = 0;
  	struct zone *zone;
  
  	for (zone = pgdat->node_zones;
  	     zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
  		unsigned long zone_end_pfn = zone->zone_start_pfn +
  					     zone->spanned_pages;
  
  		/* No need to lock the zones, they can't change. */
656d57119   David Hildenbrand   mm/memory_hotplug...
434
435
436
437
438
439
440
  		if (!zone->spanned_pages)
  			continue;
  		if (!node_end_pfn) {
  			node_start_pfn = zone->zone_start_pfn;
  			node_end_pfn = zone_end_pfn;
  			continue;
  		}
00d6c019b   David Hildenbrand   mm/memory_hotplug...
441
442
443
444
  		if (zone_end_pfn > node_end_pfn)
  			node_end_pfn = zone_end_pfn;
  		if (zone->zone_start_pfn < node_start_pfn)
  			node_start_pfn = zone->zone_start_pfn;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
445
  	}
00d6c019b   David Hildenbrand   mm/memory_hotplug...
446
447
  	pgdat->node_start_pfn = node_start_pfn;
  	pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
448
  }
e84c5b761   David Hildenbrand   mm/memory_hotplug...
449
450
451
  void __ref remove_pfn_range_from_zone(struct zone *zone,
  				      unsigned long start_pfn,
  				      unsigned long nr_pages)
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
452
453
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
454
  	unsigned long flags;
7ce700bf1   David Hildenbrand   mm/memory_hotplug...
455
456
457
458
459
460
461
462
463
  #ifdef CONFIG_ZONE_DEVICE
  	/*
  	 * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
  	 * we will not try to shrink the zones - which is okay as
  	 * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
  	 */
  	if (zone_idx(zone) == ZONE_DEVICE)
  		return;
  #endif
e84c5b761   David Hildenbrand   mm/memory_hotplug...
464
  	clear_zone_contiguous(zone);
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
465
466
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
00d6c019b   David Hildenbrand   mm/memory_hotplug...
467
  	update_pgdat_span(pgdat);
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
468
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
e84c5b761   David Hildenbrand   mm/memory_hotplug...
469
470
  
  	set_zone_contiguous(zone);
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
471
  }
e84c5b761   David Hildenbrand   mm/memory_hotplug...
472
473
474
  static void __remove_section(unsigned long pfn, unsigned long nr_pages,
  			     unsigned long map_offset,
  			     struct vmem_altmap *altmap)
ea01ea937   Badari Pulavarty   hotplug memory re...
475
  {
7ea621604   Dan Williams   mm/sparsemem: pre...
476
  	struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
ea01ea937   Badari Pulavarty   hotplug memory re...
477

9d1d887d7   David Hildenbrand   mm/memory_hotplug...
478
479
  	if (WARN_ON_ONCE(!valid_section(ms)))
  		return;
ea01ea937   Badari Pulavarty   hotplug memory re...
480

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
481
  	sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
ea01ea937   Badari Pulavarty   hotplug memory re...
482
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
483
  /**
e84c5b761   David Hildenbrand   mm/memory_hotplug...
484
   * __remove_pages() - remove sections of pages
7ea621604   Dan Williams   mm/sparsemem: pre...
485
   * @pfn: starting pageframe (must be aligned to start of a section)
ea01ea937   Badari Pulavarty   hotplug memory re...
486
   * @nr_pages: number of pages to remove (must be multiple of section size)
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
487
   * @altmap: alternative device page map or %NULL if default memmap is used
ea01ea937   Badari Pulavarty   hotplug memory re...
488
489
490
491
492
493
   *
   * Generic helper function to remove section mappings and sysfs entries
   * for the section of the memory we are removing. Caller needs to make
   * sure that pages are marked reserved and zones are adjust properly by
   * calling offline_pages().
   */
e84c5b761   David Hildenbrand   mm/memory_hotplug...
494
495
  void __remove_pages(unsigned long pfn, unsigned long nr_pages,
  		    struct vmem_altmap *altmap)
ea01ea937   Badari Pulavarty   hotplug memory re...
496
  {
4b94ffdc4   Dan Williams   x86, mm: introduc...
497
  	unsigned long map_offset = 0;
9a8450304   Dan Williams   mm/sparsemem: cle...
498
  	unsigned long nr, start_sec, end_sec;
4b94ffdc4   Dan Williams   x86, mm: introduc...
499

96da43500   Dan Williams   mm/hotplug: kill ...
500
  	map_offset = vmem_altmap_offset(altmap);
ea01ea937   Badari Pulavarty   hotplug memory re...
501

7ea621604   Dan Williams   mm/sparsemem: pre...
502
503
  	if (check_pfn_span(pfn, nr_pages, "remove"))
  		return;
ea01ea937   Badari Pulavarty   hotplug memory re...
504

7ea621604   Dan Williams   mm/sparsemem: pre...
505
506
  	start_sec = pfn_to_section_nr(pfn);
  	end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
9a8450304   Dan Williams   mm/sparsemem: cle...
507
  	for (nr = start_sec; nr <= end_sec; nr++) {
7ea621604   Dan Williams   mm/sparsemem: pre...
508
  		unsigned long pfns;
4b94ffdc4   Dan Williams   x86, mm: introduc...
509

dd33ad7b2   Michal Hocko   memory_hotplug: c...
510
  		cond_resched();
7ea621604   Dan Williams   mm/sparsemem: pre...
511
512
  		pfns = min(nr_pages, PAGES_PER_SECTION
  				- (pfn & ~PAGE_SECTION_MASK));
e84c5b761   David Hildenbrand   mm/memory_hotplug...
513
  		__remove_section(pfn, pfns, map_offset, altmap);
7ea621604   Dan Williams   mm/sparsemem: pre...
514
515
  		pfn += pfns;
  		nr_pages -= pfns;
4b94ffdc4   Dan Williams   x86, mm: introduc...
516
  		map_offset = 0;
ea01ea937   Badari Pulavarty   hotplug memory re...
517
  	}
ea01ea937   Badari Pulavarty   hotplug memory re...
518
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
519

9d0ad8ca4   Daniel Kiper   mm: extend memory...
520
521
522
  int set_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
523
524
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
525
526
527
528
529
  
  	if (online_page_callback == generic_online_page) {
  		online_page_callback = callback;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
530
531
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
532
533
534
535
536
537
538
539
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(set_online_page_callback);
  
  int restore_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
540
541
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
542
543
544
545
546
  
  	if (online_page_callback == callback) {
  		online_page_callback = generic_online_page;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
547
548
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
549
550
551
552
553
554
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(restore_online_page_callback);
  
  void __online_page_set_limits(struct page *page)
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
555
  {
9d0ad8ca4   Daniel Kiper   mm: extend memory...
556
557
558
559
560
  }
  EXPORT_SYMBOL_GPL(__online_page_set_limits);
  
  void __online_page_increment_counters(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
561
  	adjust_managed_page_count(page, 1);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
562
563
  }
  EXPORT_SYMBOL_GPL(__online_page_increment_counters);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
564

9d0ad8ca4   Daniel Kiper   mm: extend memory...
565
566
  void __online_page_free(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
567
  	__free_reserved_page(page);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
568
  }
9d0ad8ca4   Daniel Kiper   mm: extend memory...
569
  EXPORT_SYMBOL_GPL(__online_page_free);
a9cd410a3   Arun KS   mm/page_alloc.c: ...
570
  static void generic_online_page(struct page *page, unsigned int order)
9d0ad8ca4   Daniel Kiper   mm: extend memory...
571
  {
cd02cf1ac   Qian Cai   mm/hotplug: fix a...
572
  	kernel_map_pages(page, 1 << order, 1);
a9cd410a3   Arun KS   mm/page_alloc.c: ...
573
574
575
576
577
578
579
  	__free_pages_core(page, order);
  	totalram_pages_add(1UL << order);
  #ifdef CONFIG_HIGHMEM
  	if (PageHighMem(page))
  		totalhigh_pages_add(1UL << order);
  #endif
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
580
581
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
582
  {
b2c2ab208   David Hildenbrand   mm/memory_hotplug...
583
584
585
586
587
588
589
590
591
592
593
  	const unsigned long end_pfn = start_pfn + nr_pages;
  	unsigned long pfn;
  	int order;
  
  	/*
  	 * Online the pages. The callback might decide to keep some pages
  	 * PG_reserved (to add them to the buddy later), but we still account
  	 * them as being online/belonging to this zone ("present").
  	 */
  	for (pfn = start_pfn; pfn < end_pfn; pfn += 1ul << order) {
  		order = min(MAX_ORDER - 1, get_order(PFN_PHYS(end_pfn - pfn)));
bd02cc01d   David Hildenbrand   mm/memory_hotplug...
594
595
596
  		/* __free_pages_core() wants pfns to be aligned to the order */
  		if (WARN_ON_ONCE(!IS_ALIGNED(pfn, 1ul << order)))
  			order = 0;
b2c2ab208   David Hildenbrand   mm/memory_hotplug...
597
598
  		(*online_page_callback)(pfn_to_page(pfn), order);
  	}
2d070eab2   Michal Hocko   mm: consider zone...
599

b2c2ab208   David Hildenbrand   mm/memory_hotplug...
600
601
  	/* mark all involved sections as online */
  	online_mem_sections(start_pfn, end_pfn);
2d070eab2   Michal Hocko   mm: consider zone...
602

b2c2ab208   David Hildenbrand   mm/memory_hotplug...
603
  	*(unsigned long *)arg += nr_pages;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
604
605
  	return 0;
  }
d9713679d   Lai Jiangshan   memory_hotplug: f...
606
607
608
609
610
  /* check which state of node_states will be changed when online memory */
  static void node_states_check_changes_online(unsigned long nr_pages,
  	struct zone *zone, struct memory_notify *arg)
  {
  	int nid = zone_to_nid(zone);
d9713679d   Lai Jiangshan   memory_hotplug: f...
611

98fa15f34   Anshuman Khandual   mm: replace all o...
612
613
614
  	arg->status_change_nid = NUMA_NO_NODE;
  	arg->status_change_nid_normal = NUMA_NO_NODE;
  	arg->status_change_nid_high = NUMA_NO_NODE;
d9713679d   Lai Jiangshan   memory_hotplug: f...
615

8efe33f40   Oscar Salvador   mm/memory_hotplug...
616
617
618
  	if (!node_state(nid, N_MEMORY))
  		arg->status_change_nid = nid;
  	if (zone_idx(zone) <= ZONE_NORMAL && !node_state(nid, N_NORMAL_MEMORY))
d9713679d   Lai Jiangshan   memory_hotplug: f...
619
  		arg->status_change_nid_normal = nid;
6715ddf94   Lai Jiangshan   hotplug: update n...
620
  #ifdef CONFIG_HIGHMEM
d3ba3ae19   Baoquan He   mm/memory_hotplug...
621
  	if (zone_idx(zone) <= ZONE_HIGHMEM && !node_state(nid, N_HIGH_MEMORY))
6715ddf94   Lai Jiangshan   hotplug: update n...
622
  		arg->status_change_nid_high = nid;
6715ddf94   Lai Jiangshan   hotplug: update n...
623
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
624
625
626
627
628
629
  }
  
  static void node_states_set_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_set_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
630
631
  	if (arg->status_change_nid_high >= 0)
  		node_set_state(node, N_HIGH_MEMORY);
83d83612d   Oscar Salvador   mm/memory_hotplug...
632
633
  	if (arg->status_change_nid >= 0)
  		node_set_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
634
  }
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
  static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
  		unsigned long nr_pages)
  {
  	unsigned long old_end_pfn = zone_end_pfn(zone);
  
  	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
  		zone->zone_start_pfn = start_pfn;
  
  	zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
  }
  
  static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
                                       unsigned long nr_pages)
  {
  	unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
  
  	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
  		pgdat->node_start_pfn = start_pfn;
  
  	pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
655

3fccb74cf   David Hildenbrand   mm/memory_hotplug...
656
657
658
659
660
661
  }
  /*
   * Associate the pfn range with the given zone, initializing the memmaps
   * and resizing the pgdat/zone data to span the added pages. After this
   * call, all affected pages are PG_reserved.
   */
a99583e78   Christoph Hellwig   mm: pass the vmem...
662
663
  void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
  		unsigned long nr_pages, struct vmem_altmap *altmap)
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
664
665
666
667
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nid = pgdat->node_id;
  	unsigned long flags;
df429ac03   Reza Arbab   memory-hotplug: m...
668

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
669
670
671
672
673
  	clear_zone_contiguous(zone);
  
  	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
  	pgdat_resize_lock(pgdat, &flags);
  	zone_span_writelock(zone);
fa004ab73   Wei Yang   mm, hotplug: move...
674
675
  	if (zone_is_empty(zone))
  		init_currently_empty_zone(zone, start_pfn, nr_pages);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
676
677
678
679
680
681
682
683
684
685
686
  	resize_zone_range(zone, start_pfn, nr_pages);
  	zone_span_writeunlock(zone);
  	resize_pgdat_range(pgdat, start_pfn, nr_pages);
  	pgdat_resize_unlock(pgdat, &flags);
  
  	/*
  	 * TODO now we have a visible range of pages which are not associated
  	 * with their zone properly. Not nice but set_pfnblock_flags_mask
  	 * expects the zone spans the pfn range. All the pages in the range
  	 * are reserved so nobody should be touching them so we should be safe
  	 */
a99583e78   Christoph Hellwig   mm: pass the vmem...
687
688
  	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
  			MEMMAP_HOTPLUG, altmap);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
689
690
691
692
693
  
  	set_zone_contiguous(zone);
  }
  
  /*
c246a213f   Michal Hocko   mm, memory_hotplu...
694
695
696
697
   * Returns a default kernel memory zone for the given pfn range.
   * If no kernel zone covers this pfn range it will automatically go
   * to the ZONE_NORMAL.
   */
c6f03e290   Michal Hocko   mm, memory_hotplu...
698
  static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
c246a213f   Michal Hocko   mm, memory_hotplu...
699
700
701
702
703
704
705
706
707
708
709
710
711
712
  		unsigned long nr_pages)
  {
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int zid;
  
  	for (zid = 0; zid <= ZONE_NORMAL; zid++) {
  		struct zone *zone = &pgdat->node_zones[zid];
  
  		if (zone_intersects(zone, start_pfn, nr_pages))
  			return zone;
  	}
  
  	return &pgdat->node_zones[ZONE_NORMAL];
  }
c6f03e290   Michal Hocko   mm, memory_hotplu...
713
714
  static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
  		unsigned long nr_pages)
e5e689302   Michal Hocko   mm, memory_hotplu...
715
  {
c6f03e290   Michal Hocko   mm, memory_hotplu...
716
717
718
719
720
  	struct zone *kernel_zone = default_kernel_zone_for_pfn(nid, start_pfn,
  			nr_pages);
  	struct zone *movable_zone = &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
  	bool in_kernel = zone_intersects(kernel_zone, start_pfn, nr_pages);
  	bool in_movable = zone_intersects(movable_zone, start_pfn, nr_pages);
e5e689302   Michal Hocko   mm, memory_hotplu...
721
722
  
  	/*
c6f03e290   Michal Hocko   mm, memory_hotplu...
723
724
  	 * We inherit the existing zone in a simple case where zones do not
  	 * overlap in the given range
e5e689302   Michal Hocko   mm, memory_hotplu...
725
  	 */
c6f03e290   Michal Hocko   mm, memory_hotplu...
726
727
  	if (in_kernel ^ in_movable)
  		return (in_kernel) ? kernel_zone : movable_zone;
9f123ab54   Michal Hocko   mm, memory_hotplu...
728

c6f03e290   Michal Hocko   mm, memory_hotplu...
729
730
731
732
733
734
  	/*
  	 * If the range doesn't belong to any zone or two zones overlap in the
  	 * given range then we use movable zone only if movable_node is
  	 * enabled because we always online to a kernel zone by default.
  	 */
  	return movable_node_enabled ? movable_zone : kernel_zone;
9f123ab54   Michal Hocko   mm, memory_hotplu...
735
  }
e5e689302   Michal Hocko   mm, memory_hotplu...
736
737
  struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
  		unsigned long nr_pages)
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
738
  {
c6f03e290   Michal Hocko   mm, memory_hotplu...
739
740
  	if (online_type == MMOP_ONLINE_KERNEL)
  		return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
741

c6f03e290   Michal Hocko   mm, memory_hotplu...
742
743
  	if (online_type == MMOP_ONLINE_MOVABLE)
  		return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
df429ac03   Reza Arbab   memory-hotplug: m...
744

c6f03e290   Michal Hocko   mm, memory_hotplu...
745
  	return default_zone_for_pfn(nid, start_pfn, nr_pages);
e5e689302   Michal Hocko   mm, memory_hotplu...
746
  }
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
747
  int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
748
  {
aa47228a1   Cody P Schafer   memory_hotplug: u...
749
  	unsigned long flags;
3947be196   Dave Hansen   [PATCH] memory ho...
750
751
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
752
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
753
754
755
  	int nid;
  	int ret;
  	struct memory_notify arg;
d0dc12e86   Pavel Tatashin   mm/memory_hotplug...
756
  	struct memory_block *mem;
381eab4a6   David Hildenbrand   mm/memory_hotplug...
757
  	mem_hotplug_begin();
d0dc12e86   Pavel Tatashin   mm/memory_hotplug...
758
759
760
761
762
763
  	/*
  	 * We can't use pfn_to_nid() because nid might be stored in struct page
  	 * which is not yet initialized. Instead, we find nid from memory block.
  	 */
  	mem = find_memory_block(__pfn_to_section(pfn));
  	nid = mem->nid;
89c02e69f   David Hildenbrand   mm/memory_hotplug...
764
  	put_device(&mem->dev);
7b78d335a   Yasunori Goto   memory hotplug: r...
765

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
766
  	/* associate pfn range with the zone */
3fccb74cf   David Hildenbrand   mm/memory_hotplug...
767
768
  	zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
  	move_pfn_range_to_zone(zone, pfn, nr_pages, NULL);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
769

7b78d335a   Yasunori Goto   memory hotplug: r...
770
771
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
772
  	node_states_check_changes_online(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
773

7b78d335a   Yasunori Goto   memory hotplug: r...
774
775
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
e33e33b4d   Chen Yucong   mm, memory hotplu...
776
777
  	if (ret)
  		goto failed_addition;
3947be196   Dave Hansen   [PATCH] memory ho...
778
  	/*
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
779
780
781
782
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
6dcd73d70   Wen Congyang   memory-hotplug: a...
783
  	if (!populated_zone(zone)) {
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
784
  		need_zonelists_rebuild = 1;
72675e131   Michal Hocko   mm, memory_hotplu...
785
  		setup_zone_pageset(zone);
6dcd73d70   Wen Congyang   memory-hotplug: a...
786
  	}
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
787

908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
788
  	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
789
  		online_pages_range);
fd8a4221a   Geoff Levand   memory_hotplug: c...
790
  	if (ret) {
ca9a46f8a   David Hildenbrand   mm/memory_hotplug...
791
  		/* not a single memory resource was applicable */
6dcd73d70   Wen Congyang   memory-hotplug: a...
792
793
  		if (need_zonelists_rebuild)
  			zone_pcp_reset(zone);
e33e33b4d   Chen Yucong   mm, memory hotplu...
794
  		goto failed_addition;
fd8a4221a   Geoff Levand   memory_hotplug: c...
795
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
796
  	zone->present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
797
798
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
f2937be58   Yasunori Goto   [PATCH] memory ho...
799
  	zone->zone_pgdat->node_present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
800
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
e900a918b   Dan Williams   mm: shuffle initi...
801
  	shuffle_zone(zone);
ca9a46f8a   David Hildenbrand   mm/memory_hotplug...
802
803
804
805
806
  	node_states_set_node(nid, &arg);
  	if (need_zonelists_rebuild)
  		build_all_zonelists(NULL);
  	else
  		zone_pcp_update(zone);
3947be196   Dave Hansen   [PATCH] memory ho...
807

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
808
  	init_per_zone_wmark_min();
ca9a46f8a   David Hildenbrand   mm/memory_hotplug...
809
810
  	kswapd_run(nid);
  	kcompactd_run(nid);
61b13993a   Dave Hansen   [PATCH] memory ho...
811

1f522509c   Haicheng Li   mem-hotplug: avoi...
812
  	vm_total_pages = nr_free_pagecache_pages();
2f7f24eca   Kent Liu   memory-hotplug: d...
813

2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
814
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
815

ca9a46f8a   David Hildenbrand   mm/memory_hotplug...
816
  	memory_notify(MEM_ONLINE, &arg);
381eab4a6   David Hildenbrand   mm/memory_hotplug...
817
  	mem_hotplug_done();
30467e0b3   David Rientjes   mm, hotplug: fix ...
818
  	return 0;
e33e33b4d   Chen Yucong   mm, memory hotplu...
819
820
821
822
823
824
825
  
  failed_addition:
  	pr_debug("online_pages [mem %#010llx-%#010llx] failed
  ",
  		 (unsigned long long) pfn << PAGE_SHIFT,
  		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
  	memory_notify(MEM_CANCEL_ONLINE, &arg);
e84c5b761   David Hildenbrand   mm/memory_hotplug...
826
  	remove_pfn_range_from_zone(zone, pfn, nr_pages);
381eab4a6   David Hildenbrand   mm/memory_hotplug...
827
  	mem_hotplug_done();
e33e33b4d   Chen Yucong   mm, memory hotplu...
828
  	return ret;
3947be196   Dave Hansen   [PATCH] memory ho...
829
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
830
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
831

0bd854200   Tang Chen   mem-hotplug: rese...
832
833
834
835
836
837
838
839
840
  static void reset_node_present_pages(pg_data_t *pgdat)
  {
  	struct zone *z;
  
  	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
  		z->present_pages = 0;
  
  	pgdat->node_present_pages = 0;
  }
e13193319   Hidetoshi Seto   mm/memory_hotplug...
841
842
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
843
844
  {
  	struct pglist_data *pgdat;
c8e861a53   Fabian Frederick   mm/memory_hotplug...
845
  	unsigned long start_pfn = PFN_DOWN(start);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
846

a1e565aa3   Tang Chen   memory-hotplug: d...
847
848
849
850
851
  	pgdat = NODE_DATA(nid);
  	if (!pgdat) {
  		pgdat = arch_alloc_nodedata(nid);
  		if (!pgdat)
  			return NULL;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
852

33fce0113   Wei Yang   mm/memory_hotplug...
853
854
  		pgdat->per_cpu_nodestats =
  			alloc_percpu(struct per_cpu_nodestat);
a1e565aa3   Tang Chen   memory-hotplug: d...
855
  		arch_refresh_nodedata(nid, pgdat);
b0dc3a342   Gu Zheng   mm/memory hotplug...
856
  	} else {
33fce0113   Wei Yang   mm/memory_hotplug...
857
  		int cpu;
e716f2eb2   Mel Gorman   mm, vmscan: preve...
858
859
860
861
862
  		/*
  		 * Reset the nr_zones, order and classzone_idx before reuse.
  		 * Note that kswapd will init kswapd_classzone_idx properly
  		 * when it starts in the near future.
  		 */
b0dc3a342   Gu Zheng   mm/memory hotplug...
863
  		pgdat->nr_zones = 0;
38087d9b0   Mel Gorman   mm, vmscan: simpl...
864
865
  		pgdat->kswapd_order = 0;
  		pgdat->kswapd_classzone_idx = 0;
33fce0113   Wei Yang   mm/memory_hotplug...
866
867
868
869
870
871
  		for_each_online_cpu(cpu) {
  			struct per_cpu_nodestat *p;
  
  			p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
  			memset(p, 0, sizeof(*p));
  		}
a1e565aa3   Tang Chen   memory-hotplug: d...
872
  	}
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
873
874
  
  	/* we can use NODE_DATA(nid) from here */
03e85f9d5   Oscar Salvador   mm/page_alloc: In...
875
876
  	pgdat->node_id = nid;
  	pgdat->node_start_pfn = start_pfn;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
877
  	/* init node's zones as empty zones, we don't have any present pages.*/
03e85f9d5   Oscar Salvador   mm/page_alloc: In...
878
  	free_area_init_core_hotplug(nid);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
879

959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
880
881
882
883
  	/*
  	 * The node we allocated has no zone fallback lists. For avoiding
  	 * to access not-initialized zonelist, build here.
  	 */
72675e131   Michal Hocko   mm, memory_hotplu...
884
  	build_all_zonelists(pgdat);
959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
885

f784a3f19   Tang Chen   mem-hotplug: rese...
886
  	/*
0bd854200   Tang Chen   mem-hotplug: rese...
887
888
889
890
  	 * When memory is hot-added, all the memory is in offline state. So
  	 * clear all zones' present_pages because they will be updated in
  	 * online_pages() and offline_pages().
  	 */
03e85f9d5   Oscar Salvador   mm/page_alloc: In...
891
  	reset_node_managed_pages(pgdat);
0bd854200   Tang Chen   mem-hotplug: rese...
892
  	reset_node_present_pages(pgdat);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
893
894
  	return pgdat;
  }
b9ff03608   Oscar Salvador   mm/memory_hotplug...
895
  static void rollback_node_hotadd(int nid)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
896
  {
b9ff03608   Oscar Salvador   mm/memory_hotplug...
897
  	pg_data_t *pgdat = NODE_DATA(nid);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
898
  	arch_refresh_nodedata(nid, NULL);
5830169f4   Reza Arbab   mm/memory_hotplug...
899
  	free_percpu(pgdat->per_cpu_nodestats);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
900
  	arch_free_nodedata(pgdat);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
901
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
902

01b0f1970   Toshi Kani   cpu/mem hotplug: ...
903
904
  /**
   * try_online_node - online a node if offlined
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
905
   * @nid: the node ID
b9ff03608   Oscar Salvador   mm/memory_hotplug...
906
907
   * @start: start addr of the node
   * @set_node_online: Whether we want to online the node
cf23422b9   minskey guo   cpu/mem hotplug: ...
908
   * called by cpu_up() to online a node without onlined memory.
b9ff03608   Oscar Salvador   mm/memory_hotplug...
909
910
911
912
913
   *
   * Returns:
   * 1 -> a new node has been allocated
   * 0 -> the node is already online
   * -ENOMEM -> the node could not be allocated
cf23422b9   minskey guo   cpu/mem hotplug: ...
914
   */
b9ff03608   Oscar Salvador   mm/memory_hotplug...
915
  static int __try_online_node(int nid, u64 start, bool set_node_online)
cf23422b9   minskey guo   cpu/mem hotplug: ...
916
  {
b9ff03608   Oscar Salvador   mm/memory_hotplug...
917
918
  	pg_data_t *pgdat;
  	int ret = 1;
cf23422b9   minskey guo   cpu/mem hotplug: ...
919

01b0f1970   Toshi Kani   cpu/mem hotplug: ...
920
921
  	if (node_online(nid))
  		return 0;
b9ff03608   Oscar Salvador   mm/memory_hotplug...
922
  	pgdat = hotadd_new_pgdat(nid, start);
7553e8f2d   David Rientjes   mm, hotplug: fix ...
923
  	if (!pgdat) {
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
924
925
  		pr_err("Cannot online node %d due to NULL pgdat
  ", nid);
cf23422b9   minskey guo   cpu/mem hotplug: ...
926
927
928
  		ret = -ENOMEM;
  		goto out;
  	}
b9ff03608   Oscar Salvador   mm/memory_hotplug...
929
930
931
932
933
934
  
  	if (set_node_online) {
  		node_set_online(nid);
  		ret = register_one_node(nid);
  		BUG_ON(ret);
  	}
cf23422b9   minskey guo   cpu/mem hotplug: ...
935
  out:
b9ff03608   Oscar Salvador   mm/memory_hotplug...
936
937
938
939
940
941
942
943
944
945
946
947
  	return ret;
  }
  
  /*
   * Users of this function always want to online/register the node
   */
  int try_online_node(int nid)
  {
  	int ret;
  
  	mem_hotplug_begin();
  	ret =  __try_online_node(nid, 0, true);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
948
  	mem_hotplug_done();
cf23422b9   minskey guo   cpu/mem hotplug: ...
949
950
  	return ret;
  }
27356f54c   Toshi Kani   mm/hotplug: verif...
951
952
  static int check_hotplug_memory_range(u64 start, u64 size)
  {
ba3255852   Pavel Tatashin   mm/memory_hotplug...
953
  	/* memory range must be block size aligned */
cec3ebd08   David Hildenbrand   mm/memory_hotplug...
954
955
  	if (!size || !IS_ALIGNED(start, memory_block_size_bytes()) ||
  	    !IS_ALIGNED(size, memory_block_size_bytes())) {
ba3255852   Pavel Tatashin   mm/memory_hotplug...
956
  		pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx",
cec3ebd08   David Hildenbrand   mm/memory_hotplug...
957
  		       memory_block_size_bytes(), start, size);
27356f54c   Toshi Kani   mm/hotplug: verif...
958
959
960
961
962
  		return -EINVAL;
  	}
  
  	return 0;
  }
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
963
964
  static int online_memory_block(struct memory_block *mem, void *arg)
  {
dc18d706a   Nathan Fontenot   memory-hotplug: u...
965
  	return device_online(&mem->dev);
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
966
  }
8df1d0e4a   David Hildenbrand   mm/memory_hotplug...
967
968
969
970
971
972
  /*
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations (triggered e.g. by sysfs).
   *
   * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
   */
f29d8e9c0   David Hildenbrand   mm/memory_hotplug...
973
  int __ref add_memory_resource(int nid, struct resource *res)
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
974
  {
05f800a0b   David Hildenbrand   mm/memory_hotplug...
975
  	struct mhp_restrictions restrictions = {};
62cedb9f1   David Vrabel   mm: memory hotplu...
976
  	u64 start, size;
b9ff03608   Oscar Salvador   mm/memory_hotplug...
977
  	bool new_node = false;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
978
  	int ret;
62cedb9f1   David Vrabel   mm: memory hotplu...
979
980
  	start = res->start;
  	size = resource_size(res);
27356f54c   Toshi Kani   mm/hotplug: verif...
981
982
983
  	ret = check_hotplug_memory_range(start, size);
  	if (ret)
  		return ret;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
984
  	mem_hotplug_begin();
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
985

7f36e3e56   Tang Chen   memory-hotplug: a...
986
987
988
989
990
991
992
  	/*
  	 * Add new range to memblock so that when hotadd_new_pgdat() is called
  	 * to allocate new pgdat, get_pfn_range_for_nid() will be able to find
  	 * this new range and calculate total pages correctly.  The range will
  	 * be removed at hot-remove time.
  	 */
  	memblock_add_node(start, size, nid);
b9ff03608   Oscar Salvador   mm/memory_hotplug...
993
994
995
996
  	ret = __try_online_node(nid, start, false);
  	if (ret < 0)
  		goto error;
  	new_node = ret;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
997

bc02af93d   Yasunori Goto   [PATCH] pgdat all...
998
  	/* call arch's memory hotadd */
940519f0c   Michal Hocko   mm, memory_hotplu...
999
  	ret = arch_add_memory(nid, start, size, &restrictions);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1000
1001
  	if (ret < 0)
  		goto error;
db051a0da   David Hildenbrand   mm/memory_hotplug...
1002
1003
1004
1005
1006
1007
  	/* create memory block devices after memory was added */
  	ret = create_memory_block_devices(start, size);
  	if (ret) {
  		arch_remove_memory(nid, start, size, NULL);
  		goto error;
  	}
a1e565aa3   Tang Chen   memory-hotplug: d...
1008
  	if (new_node) {
d5b6f6a36   Oscar Salvador   mm/memory_hotplug...
1009
  		/* If sysfs file of new node can't be created, cpu on the node
0fc44159b   Yasunori Goto   [PATCH] Register ...
1010
1011
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
d5b6f6a36   Oscar Salvador   mm/memory_hotplug...
1012
  		 * We online node here. We can't roll back from here.
0fc44159b   Yasunori Goto   [PATCH] Register ...
1013
  		 */
d5b6f6a36   Oscar Salvador   mm/memory_hotplug...
1014
1015
  		node_set_online(nid);
  		ret = __register_one_node(nid);
0fc44159b   Yasunori Goto   [PATCH] Register ...
1016
1017
  		BUG_ON(ret);
  	}
d5b6f6a36   Oscar Salvador   mm/memory_hotplug...
1018
  	/* link memory sections under this node.*/
4fbce6339   Oscar Salvador   mm/memory_hotplug...
1019
  	ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1));
d5b6f6a36   Oscar Salvador   mm/memory_hotplug...
1020
  	BUG_ON(ret);
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
1021
1022
  	/* create new memmap entry */
  	firmware_map_add_hotplug(start, start + size, "System RAM");
381eab4a6   David Hildenbrand   mm/memory_hotplug...
1023
1024
  	/* device_online() will take the lock when calling online_pages() */
  	mem_hotplug_done();
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1025
  	/* online pages if requested */
f29d8e9c0   David Hildenbrand   mm/memory_hotplug...
1026
  	if (memhp_auto_online)
fbcf73ce6   David Hildenbrand   mm/memory_hotplug...
1027
  		walk_memory_blocks(start, size, NULL, online_memory_block);
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1028

381eab4a6   David Hildenbrand   mm/memory_hotplug...
1029
  	return ret;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1030
1031
  error:
  	/* rollback pgdat allocation and others */
b9ff03608   Oscar Salvador   mm/memory_hotplug...
1032
1033
  	if (new_node)
  		rollback_node_hotadd(nid);
7f36e3e56   Tang Chen   memory-hotplug: a...
1034
  	memblock_remove(start, size);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1035
  	mem_hotplug_done();
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1036
1037
  	return ret;
  }
62cedb9f1   David Vrabel   mm: memory hotplu...
1038

8df1d0e4a   David Hildenbrand   mm/memory_hotplug...
1039
1040
  /* requires device_hotplug_lock, see add_memory_resource() */
  int __ref __add_memory(int nid, u64 start, u64 size)
62cedb9f1   David Vrabel   mm: memory hotplu...
1041
1042
1043
1044
1045
  {
  	struct resource *res;
  	int ret;
  
  	res = register_memory_resource(start, size);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
1046
1047
  	if (IS_ERR(res))
  		return PTR_ERR(res);
62cedb9f1   David Vrabel   mm: memory hotplu...
1048

f29d8e9c0   David Hildenbrand   mm/memory_hotplug...
1049
  	ret = add_memory_resource(nid, res);
62cedb9f1   David Vrabel   mm: memory hotplu...
1050
1051
1052
1053
  	if (ret < 0)
  		release_memory_resource(res);
  	return ret;
  }
8df1d0e4a   David Hildenbrand   mm/memory_hotplug...
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
  
  int add_memory(int nid, u64 start, u64 size)
  {
  	int rc;
  
  	lock_device_hotplug();
  	rc = __add_memory(nid, start, size);
  	unlock_device_hotplug();
  
  	return rc;
  }
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1065
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1066
1067
1068
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
   * set and the size of the free page is given by page_order(). Using this,
   * the function determines if the pageblock contains only free pages.
   * Due to buddy contraints, a free page at least the size of a pageblock will
   * be located at the start of the pageblock
   */
  static inline int pageblock_free(struct page *page)
  {
  	return PageBuddy(page) && page_order(page) >= pageblock_order;
  }
891cb2a72   Michal Hocko   mm, memory_hotplu...
1079
1080
  /* Return the pfn of the start of the next active pageblock after a given pfn */
  static unsigned long next_active_pageblock(unsigned long pfn)
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1081
  {
891cb2a72   Michal Hocko   mm, memory_hotplu...
1082
  	struct page *page = pfn_to_page(pfn);
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1083
  	/* Ensure the starting page is pageblock-aligned */
891cb2a72   Michal Hocko   mm, memory_hotplu...
1084
  	BUG_ON(pfn & (pageblock_nr_pages - 1));
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1085

5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1086
  	/* If the entire pageblock is free, move to the end of free page */
0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1087
1088
1089
1090
1091
  	if (pageblock_free(page)) {
  		int order;
  		/* be careful. we don't have locks, page_order can be changed.*/
  		order = page_order(page);
  		if ((order < MAX_ORDER) && (order >= pageblock_order))
891cb2a72   Michal Hocko   mm, memory_hotplu...
1092
  			return pfn + (1 << order);
0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1093
  	}
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1094

891cb2a72   Michal Hocko   mm, memory_hotplu...
1095
  	return pfn + pageblock_nr_pages;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1096
  }
891cb2a72   Michal Hocko   mm, memory_hotplu...
1097
  static bool is_pageblock_removable_nolock(unsigned long pfn)
fb52bbaee   Mathieu Malaterre   mm: move is_pageb...
1098
  {
891cb2a72   Michal Hocko   mm, memory_hotplu...
1099
  	struct page *page = pfn_to_page(pfn);
fb52bbaee   Mathieu Malaterre   mm: move is_pageb...
1100
  	struct zone *zone;
fb52bbaee   Mathieu Malaterre   mm: move is_pageb...
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
  
  	/*
  	 * We have to be careful here because we are iterating over memory
  	 * sections which are not zone aware so we might end up outside of
  	 * the zone but still within the section.
  	 * We have to take care about the node as well. If the node is offline
  	 * its NODE_DATA will be NULL - see page_zone.
  	 */
  	if (!node_online(page_to_nid(page)))
  		return false;
  
  	zone = page_zone(page);
  	pfn = page_to_pfn(page);
  	if (!zone_spans_pfn(zone, pfn))
  		return false;
d381c5476   Michal Hocko   mm: only report i...
1116
  	return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, SKIP_HWPOISON);
fb52bbaee   Mathieu Malaterre   mm: move is_pageb...
1117
  }
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1118
  /* Checks if this range of memory is likely to be hot-removable. */
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1119
  bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1120
  {
891cb2a72   Michal Hocko   mm, memory_hotplu...
1121
1122
1123
1124
  	unsigned long end_pfn, pfn;
  
  	end_pfn = min(start_pfn + nr_pages,
  			zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1125
1126
  
  	/* Check the starting page of each pageblock within the range */
891cb2a72   Michal Hocko   mm, memory_hotplu...
1127
1128
  	for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
  		if (!is_pageblock_removable_nolock(pfn))
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1129
  			return false;
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1130
  		cond_resched();
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1131
1132
1133
  	}
  
  	/* All pageblocks in the memory block are likely to be hot-removable */
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1134
  	return true;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1135
1136
1137
  }
  
  /*
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1138
   * Confirm all pages in a range [start, end) belong to the same zone.
a96dfddbc   Toshi Kani   base/memory, hotp...
1139
   * When true, return its valid [start, end).
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1140
   */
a96dfddbc   Toshi Kani   base/memory, hotp...
1141
1142
  int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
  			 unsigned long *valid_start, unsigned long *valid_end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1143
  {
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1144
  	unsigned long pfn, sec_end_pfn;
a96dfddbc   Toshi Kani   base/memory, hotp...
1145
  	unsigned long start, end;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1146
1147
1148
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1149
  	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1150
  	     pfn < end_pfn;
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1151
  	     pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1152
1153
  		/* Make sure the memory section is present first */
  		if (!present_section_nr(pfn_to_section_nr(pfn)))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1154
  			continue;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1155
1156
1157
1158
1159
1160
1161
  		for (; pfn < sec_end_pfn && pfn < end_pfn;
  		     pfn += MAX_ORDER_NR_PAGES) {
  			i = 0;
  			/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  			while ((i < MAX_ORDER_NR_PAGES) &&
  				!pfn_valid_within(pfn + i))
  				i++;
d6d8c8a48   zhong jiang   mm/memory_hotplug...
1162
  			if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1163
  				continue;
24feb47c5   Mikhail Zaslonko   mm, memory_hotplu...
1164
1165
1166
  			/* Check if we got outside of the zone */
  			if (zone && !zone_spans_pfn(zone, pfn + i))
  				return 0;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1167
1168
1169
  			page = pfn_to_page(pfn + i);
  			if (zone && page_zone(page) != zone)
  				return 0;
a96dfddbc   Toshi Kani   base/memory, hotp...
1170
1171
  			if (!zone)
  				start = pfn + i;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1172
  			zone = page_zone(page);
a96dfddbc   Toshi Kani   base/memory, hotp...
1173
  			end = pfn + MAX_ORDER_NR_PAGES;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1174
  		}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1175
  	}
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1176

a96dfddbc   Toshi Kani   base/memory, hotp...
1177
1178
  	if (zone) {
  		*valid_start = start;
d6d8c8a48   zhong jiang   mm/memory_hotplug...
1179
  		*valid_end = min(end, end_pfn);
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1180
  		return 1;
a96dfddbc   Toshi Kani   base/memory, hotp...
1181
  	} else {
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1182
  		return 0;
a96dfddbc   Toshi Kani   base/memory, hotp...
1183
  	}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1184
1185
1186
  }
  
  /*
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1187
1188
1189
1190
   * Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
   * non-lru movable pages and hugepages). We scan pfn because it's much
   * easier than scanning over linked list. This function returns the pfn
   * of the first found movable page if it's found, otherwise 0.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1191
   */
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1192
  static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1193
1194
  {
  	unsigned long pfn;
eeb0efd07   Oscar Salvador   mm,memory_hotplug...
1195

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1196
  	for (pfn = start; pfn < end; pfn++) {
eeb0efd07   Oscar Salvador   mm,memory_hotplug...
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
  		struct page *page, *head;
  		unsigned long skip;
  
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
  		if (PageLRU(page))
  			return pfn;
  		if (__PageMovable(page))
  			return pfn;
  
  		if (!PageHuge(page))
  			continue;
  		head = compound_head(page);
39186cbe6   Oscar Salvador   mm,memory_hotplug...
1211
  		if (page_huge_active(head))
eeb0efd07   Oscar Salvador   mm,memory_hotplug...
1212
  			return pfn;
d8c6546b1   Matthew Wilcox (Oracle)   mm: introduce com...
1213
  		skip = compound_nr(head) - (page - head);
eeb0efd07   Oscar Salvador   mm,memory_hotplug...
1214
  		pfn += skip - 1;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1215
1216
1217
  	}
  	return 0;
  }
666feb21a   Michal Hocko   mm, migrate: remo...
1218
  static struct page *new_node_page(struct page *page, unsigned long private)
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1219
  {
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1220
  	int nid = page_to_nid(page);
231e97e2b   Li Zhong   mem-hotplug: use ...
1221
  	nodemask_t nmask = node_states[N_MEMORY];
7f252f277   Michal Hocko   mm, memory_hotplu...
1222
1223
1224
1225
1226
1227
1228
1229
1230
  
  	/*
  	 * try to allocate from a different node but reuse this node if there
  	 * are no other online nodes to be used (e.g. we are offlining a part
  	 * of the only existing node)
  	 */
  	node_clear(nid, nmask);
  	if (nodes_empty(nmask))
  		node_set(nid, nmask);
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1231

8b9132388   Michal Hocko   mm: unify new_nod...
1232
  	return new_page_nodemask(page, nid, &nmask);
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1233
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1234
1235
1236
1237
1238
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1239
1240
  	int ret = 0;
  	LIST_HEAD(source);
a85009c37   Michal Hocko   mm, memory_hotplu...
1241
  	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1242
1243
1244
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1245
1246
1247
  
  		if (PageHuge(page)) {
  			struct page *head = compound_head(page);
d8c6546b1   Matthew Wilcox (Oracle)   mm: introduce com...
1248
  			pfn = page_to_pfn(head) + compound_nr(head) - 1;
daf3538ad   Oscar Salvador   mm,memory_hotplug...
1249
  			isolate_huge_page(head, &source);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1250
  			continue;
94723aafb   Michal Hocko   mm: unclutter THP...
1251
  		} else if (PageTransHuge(page))
8135d8926   Naoya Horiguchi   mm: memory_hotplu...
1252
1253
  			pfn = page_to_pfn(compound_head(page))
  				+ hpage_nr_pages(page) - 1;
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1254

b15c87263   Michal Hocko   hwpoison, memory_...
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
  		/*
  		 * HWPoison pages have elevated reference counts so the migration would
  		 * fail on them. It also doesn't make any sense to migrate them in the
  		 * first place. Still try to unmap such a page in case it is still mapped
  		 * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep
  		 * the unmap as the catch all safety net).
  		 */
  		if (PageHWPoison(page)) {
  			if (WARN_ON(PageLRU(page)))
  				isolate_lru_page(page);
  			if (page_mapped(page))
  				try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS);
  			continue;
  		}
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1269
  		if (!get_page_unless_zero(page))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1270
1271
  			continue;
  		/*
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1272
1273
  		 * We can skip free pages. And we can deal with pages on
  		 * LRU and non-lru movable pages.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1274
  		 */
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1275
1276
1277
1278
  		if (PageLRU(page))
  			ret = isolate_lru_page(page);
  		else
  			ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1279
  		if (!ret) { /* Success */
62695a84e   Nick Piggin   vmscan: move isol...
1280
  			list_add_tail(&page->lru, &source);
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1281
1282
1283
  			if (!__PageMovable(page))
  				inc_node_page_state(page, NR_ISOLATED_ANON +
  						    page_is_file_cache(page));
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1284

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1285
  		} else {
2932c8b05   Michal Hocko   mm, memory_hotplu...
1286
1287
  			pr_warn("failed to isolate pfn %lx
  ", pfn);
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1288
  			dump_page(page, "isolation failed");
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1289
  		}
1723058ea   Oscar Salvador   mm, memory_hotplu...
1290
  		put_page(page);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1291
  	}
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1292
  	if (!list_empty(&source)) {
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1293
1294
  		/* Allocate a new page from the nearest neighbor node */
  		ret = migrate_pages(&source, new_node_page, NULL, 0,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1295
  					MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
2932c8b05   Michal Hocko   mm, memory_hotplu...
1296
1297
1298
1299
1300
1301
  		if (ret) {
  			list_for_each_entry(page, &source, lru) {
  				pr_warn("migrating pfn %lx failed ret:%d ",
  				       page_to_pfn(page), ret);
  				dump_page(page, "migration failure");
  			}
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1302
  			putback_movable_pages(&source);
2932c8b05   Michal Hocko   mm, memory_hotplu...
1303
  		}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1304
  	}
1723058ea   Oscar Salvador   mm, memory_hotplu...
1305

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
5557c766a   Michal Hocko   mm, memory_hotplu...
1316
  	unsigned long *offlined_pages = (unsigned long *)data;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1317

5557c766a   Michal Hocko   mm, memory_hotplu...
1318
1319
  	*offlined_pages += __offline_isolated_pages(start, start + nr_pages);
  	return 0;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1320
1321
1322
1323
1324
1325
1326
1327
1328
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
5557c766a   Michal Hocko   mm, memory_hotplu...
1329
  	return test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1330
  }
c5320926e   Tang Chen   mem-hotplug: intr...
1331
1332
  static int __init cmdline_parse_movable_node(char *p)
  {
4932381ee   Michal Hocko   mm, memory_hotplu...
1333
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
55ac590c2   Tang Chen   memblock, mem_hot...
1334
  	movable_node_enabled = true;
4932381ee   Michal Hocko   mm, memory_hotplu...
1335
1336
1337
1338
  #else
  	pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly
  ");
  #endif
c5320926e   Tang Chen   mem-hotplug: intr...
1339
1340
1341
  	return 0;
  }
  early_param("movable_node", cmdline_parse_movable_node);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1342
1343
1344
1345
1346
1347
  /* check which state of node_states will be changed when offline memory */
  static void node_states_check_changes_offline(unsigned long nr_pages,
  		struct zone *zone, struct memory_notify *arg)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	unsigned long present_pages = 0;
86b27beae   Oscar Salvador   mm/memory_hotplug...
1348
  	enum zone_type zt;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1349

98fa15f34   Anshuman Khandual   mm: replace all o...
1350
1351
1352
  	arg->status_change_nid = NUMA_NO_NODE;
  	arg->status_change_nid_normal = NUMA_NO_NODE;
  	arg->status_change_nid_high = NUMA_NO_NODE;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1353
1354
  
  	/*
86b27beae   Oscar Salvador   mm/memory_hotplug...
1355
1356
1357
1358
1359
1360
  	 * Check whether node_states[N_NORMAL_MEMORY] will be changed.
  	 * If the memory to be offline is within the range
  	 * [0..ZONE_NORMAL], and it is the last present memory there,
  	 * the zones in that range will become empty after the offlining,
  	 * thus we can determine that we need to clear the node from
  	 * node_states[N_NORMAL_MEMORY].
d9713679d   Lai Jiangshan   memory_hotplug: f...
1361
  	 */
86b27beae   Oscar Salvador   mm/memory_hotplug...
1362
  	for (zt = 0; zt <= ZONE_NORMAL; zt++)
d9713679d   Lai Jiangshan   memory_hotplug: f...
1363
  		present_pages += pgdat->node_zones[zt].present_pages;
86b27beae   Oscar Salvador   mm/memory_hotplug...
1364
  	if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages)
d9713679d   Lai Jiangshan   memory_hotplug: f...
1365
  		arg->status_change_nid_normal = zone_to_nid(zone);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1366

6715ddf94   Lai Jiangshan   hotplug: update n...
1367
1368
  #ifdef CONFIG_HIGHMEM
  	/*
86b27beae   Oscar Salvador   mm/memory_hotplug...
1369
1370
1371
1372
1373
1374
  	 * node_states[N_HIGH_MEMORY] contains nodes which
  	 * have normal memory or high memory.
  	 * Here we add the present_pages belonging to ZONE_HIGHMEM.
  	 * If the zone is within the range of [0..ZONE_HIGHMEM), and
  	 * we determine that the zones in that range become empty,
  	 * we need to clear the node for N_HIGH_MEMORY.
6715ddf94   Lai Jiangshan   hotplug: update n...
1375
  	 */
86b27beae   Oscar Salvador   mm/memory_hotplug...
1376
1377
  	present_pages += pgdat->node_zones[ZONE_HIGHMEM].present_pages;
  	if (zone_idx(zone) <= ZONE_HIGHMEM && nr_pages >= present_pages)
6715ddf94   Lai Jiangshan   hotplug: update n...
1378
  		arg->status_change_nid_high = zone_to_nid(zone);
6715ddf94   Lai Jiangshan   hotplug: update n...
1379
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
1380
  	/*
86b27beae   Oscar Salvador   mm/memory_hotplug...
1381
1382
1383
1384
1385
1386
1387
1388
  	 * We have accounted the pages from [0..ZONE_NORMAL), and
  	 * in case of CONFIG_HIGHMEM the pages from ZONE_HIGHMEM
  	 * as well.
  	 * Here we count the possible pages from ZONE_MOVABLE.
  	 * If after having accounted all the pages, we see that the nr_pages
  	 * to be offlined is over or equal to the accounted pages,
  	 * we know that the node will become empty, and so, we can clear
  	 * it for N_MEMORY as well.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1389
  	 */
86b27beae   Oscar Salvador   mm/memory_hotplug...
1390
  	present_pages += pgdat->node_zones[ZONE_MOVABLE].present_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1391

d9713679d   Lai Jiangshan   memory_hotplug: f...
1392
1393
  	if (nr_pages >= present_pages)
  		arg->status_change_nid = zone_to_nid(zone);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1394
1395
1396
1397
1398
1399
  }
  
  static void node_states_clear_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_clear_state(node, N_NORMAL_MEMORY);
cf01f6f5e   Oscar Salvador   mm/memory_hotplug...
1400
  	if (arg->status_change_nid_high >= 0)
d9713679d   Lai Jiangshan   memory_hotplug: f...
1401
  		node_clear_state(node, N_HIGH_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1402

cf01f6f5e   Oscar Salvador   mm/memory_hotplug...
1403
  	if (arg->status_change_nid >= 0)
6715ddf94   Lai Jiangshan   hotplug: update n...
1404
  		node_clear_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1405
  }
a16cee10c   Wen Congyang   memory-hotplug: p...
1406
  static int __ref __offline_pages(unsigned long start_pfn,
ecde0f3e7   Michal Hocko   mm, memory_hotplu...
1407
  		  unsigned long end_pfn)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1408
  {
ecde0f3e7   Michal Hocko   mm, memory_hotplu...
1409
  	unsigned long pfn, nr_pages;
5557c766a   Michal Hocko   mm, memory_hotplu...
1410
  	unsigned long offlined_pages = 0;
9b7ea46a8   Qian Cai   mm/hotplug: fix o...
1411
  	int ret, node, nr_isolate_pageblock;
d702909f0   Cody P Schafer   memory_hotplug: u...
1412
  	unsigned long flags;
a96dfddbc   Toshi Kani   base/memory, hotp...
1413
  	unsigned long valid_start, valid_end;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1414
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
1415
  	struct memory_notify arg;
796050932   Michal Hocko   mm, memory_hotplu...
1416
  	char *reason;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1417

381eab4a6   David Hildenbrand   mm/memory_hotplug...
1418
  	mem_hotplug_begin();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1419
1420
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
381eab4a6   David Hildenbrand   mm/memory_hotplug...
1421
1422
  	if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
  				  &valid_end)) {
796050932   Michal Hocko   mm, memory_hotplu...
1423
1424
1425
  		ret = -EINVAL;
  		reason = "multizone range";
  		goto failed_removal;
381eab4a6   David Hildenbrand   mm/memory_hotplug...
1426
  	}
7b78d335a   Yasunori Goto   memory hotplug: r...
1427

a96dfddbc   Toshi Kani   base/memory, hotp...
1428
  	zone = page_zone(pfn_to_page(valid_start));
7b78d335a   Yasunori Goto   memory hotplug: r...
1429
1430
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1431
  	/* set above range as isolated */
b023f4681   Wen Congyang   memory-hotplug: s...
1432
  	ret = start_isolate_page_range(start_pfn, end_pfn,
d381c5476   Michal Hocko   mm: only report i...
1433
1434
  				       MIGRATE_MOVABLE,
  				       SKIP_HWPOISON | REPORT_FAILURE);
9b7ea46a8   Qian Cai   mm/hotplug: fix o...
1435
  	if (ret < 0) {
796050932   Michal Hocko   mm, memory_hotplu...
1436
1437
  		reason = "failure to isolate range";
  		goto failed_removal;
381eab4a6   David Hildenbrand   mm/memory_hotplug...
1438
  	}
9b7ea46a8   Qian Cai   mm/hotplug: fix o...
1439
  	nr_isolate_pageblock = ret;
7b78d335a   Yasunori Goto   memory hotplug: r...
1440
1441
1442
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1443
  	node_states_check_changes_offline(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
1444
1445
1446
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
796050932   Michal Hocko   mm, memory_hotplu...
1447
1448
1449
1450
  	if (ret) {
  		reason = "notifier failure";
  		goto failed_removal_isolated;
  	}
7b78d335a   Yasunori Goto   memory hotplug: r...
1451

bb8965bd8   Michal Hocko   mm, memory_hotplu...
1452
1453
1454
1455
1456
1457
1458
  	do {
  		for (pfn = start_pfn; pfn;) {
  			if (signal_pending(current)) {
  				ret = -EINTR;
  				reason = "signal backoff";
  				goto failed_removal_isolated;
  			}
72b39cfc4   Michal Hocko   mm, memory_hotplu...
1459

bb8965bd8   Michal Hocko   mm, memory_hotplu...
1460
1461
  			cond_resched();
  			lru_add_drain_all();
bb8965bd8   Michal Hocko   mm, memory_hotplu...
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
  
  			pfn = scan_movable_pages(pfn, end_pfn);
  			if (pfn) {
  				/*
  				 * TODO: fatal migration failures should bail
  				 * out
  				 */
  				do_migrate_range(pfn, end_pfn);
  			}
  		}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1472

bb8965bd8   Michal Hocko   mm, memory_hotplu...
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
  		/*
  		 * Dissolve free hugepages in the memory block before doing
  		 * offlining actually in order to make hugetlbfs's object
  		 * counting consistent.
  		 */
  		ret = dissolve_free_huge_pages(start_pfn, end_pfn);
  		if (ret) {
  			reason = "failure to dissolve huge pages";
  			goto failed_removal_isolated;
  		}
  		/* check again */
5557c766a   Michal Hocko   mm, memory_hotplu...
1484
1485
1486
  		ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
  					    NULL, check_pages_isolated_cb);
  	} while (ret);
72b39cfc4   Michal Hocko   mm, memory_hotplu...
1487

b3834be5c   Adam Buchbinder   various: Fix spel...
1488
  	/* Ok, all of our target is isolated.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1489
  	   We cannot do rollback at this point. */
5557c766a   Michal Hocko   mm, memory_hotplu...
1490
1491
1492
1493
  	walk_system_ram_range(start_pfn, end_pfn - start_pfn,
  			      &offlined_pages, offline_isolated_pages_cb);
  	pr_info("Offlined Pages %ld
  ", offlined_pages);
9b7ea46a8   Qian Cai   mm/hotplug: fix o...
1494
1495
1496
1497
1498
1499
1500
1501
  	/*
  	 * Onlining will reset pagetype flags and makes migrate type
  	 * MOVABLE, so just need to decrease the number of isolated
  	 * pageblocks zone counter here.
  	 */
  	spin_lock_irqsave(&zone->lock, flags);
  	zone->nr_isolate_pageblock -= nr_isolate_pageblock;
  	spin_unlock_irqrestore(&zone->lock, flags);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1502
  	/* removal success */
3dcc0571c   Jiang Liu   mm: correctly upd...
1503
  	adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1504
  	zone->present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1505
1506
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1507
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1508
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
7b78d335a   Yasunori Goto   memory hotplug: r...
1509

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
1510
  	init_per_zone_wmark_min();
1e8537baa   Xishi Qiu   memory-hotplug: b...
1511
  	if (!populated_zone(zone)) {
340175b7d   Jiang Liu   mm/hotplug: free ...
1512
  		zone_pcp_reset(zone);
72675e131   Michal Hocko   mm, memory_hotplu...
1513
  		build_all_zonelists(NULL);
1e8537baa   Xishi Qiu   memory-hotplug: b...
1514
1515
  	} else
  		zone_pcp_update(zone);
340175b7d   Jiang Liu   mm/hotplug: free ...
1516

d9713679d   Lai Jiangshan   memory_hotplug: f...
1517
  	node_states_clear_node(node, &arg);
698b1b306   Vlastimil Babka   mm, compaction: i...
1518
  	if (arg.status_change_nid >= 0) {
8fe23e057   David Rientjes   mm: clear node in...
1519
  		kswapd_stop(node);
698b1b306   Vlastimil Babka   mm, compaction: i...
1520
1521
  		kcompactd_stop(node);
  	}
bce7394a3   Minchan Kim   page-allocator: r...
1522

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1523
1524
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
1525
1526
  
  	memory_notify(MEM_OFFLINE, &arg);
e84c5b761   David Hildenbrand   mm/memory_hotplug...
1527
  	remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
381eab4a6   David Hildenbrand   mm/memory_hotplug...
1528
  	mem_hotplug_done();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1529
  	return 0;
796050932   Michal Hocko   mm, memory_hotplu...
1530
1531
  failed_removal_isolated:
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
c4efe484b   Qian Cai   mm/memory_hotplug...
1532
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1533
  failed_removal:
796050932   Michal Hocko   mm, memory_hotplu...
1534
1535
  	pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s
  ",
e33e33b4d   Chen Yucong   mm, memory hotplu...
1536
  		 (unsigned long long) start_pfn << PAGE_SHIFT,
796050932   Michal Hocko   mm, memory_hotplu...
1537
1538
  		 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
  		 reason);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1539
  	/* pushback to free area */
381eab4a6   David Hildenbrand   mm/memory_hotplug...
1540
  	mem_hotplug_done();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1541
1542
  	return ret;
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1543

a16cee10c   Wen Congyang   memory-hotplug: p...
1544
1545
  int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
  {
ecde0f3e7   Michal Hocko   mm, memory_hotplu...
1546
  	return __offline_pages(start_pfn, start_pfn + nr_pages);
a16cee10c   Wen Congyang   memory-hotplug: p...
1547
  }
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1548
  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
bbc76be67   Wen Congyang   memory-hotplug: r...
1549
1550
  {
  	int ret = !is_memblock_offlined(mem);
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1551
1552
1553
1554
  	if (unlikely(ret)) {
  		phys_addr_t beginpa, endpa;
  
  		beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
b6c88d3b9   David Hildenbrand   drivers/base/memo...
1555
  		endpa = beginpa + memory_block_size_bytes() - 1;
756a025f0   Joe Perches   mm: coalesce spli...
1556
1557
  		pr_warn("removing memory fails, because memory [%pa-%pa] is onlined
  ",
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1558
  			&beginpa, &endpa);
bbc76be67   Wen Congyang   memory-hotplug: r...
1559

eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1560
1561
1562
  		return -EBUSY;
  	}
  	return 0;
bbc76be67   Wen Congyang   memory-hotplug: r...
1563
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1564
  static int check_cpu_on_node(pg_data_t *pgdat)
60a5a19e7   Tang Chen   memory-hotplug: r...
1565
  {
60a5a19e7   Tang Chen   memory-hotplug: r...
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
  	int cpu;
  
  	for_each_present_cpu(cpu) {
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			/*
  			 * the cpu on this node isn't removed, and we can't
  			 * offline this node.
  			 */
  			return -EBUSY;
  	}
  
  	return 0;
  }
2c91f8fc6   David Hildenbrand   mm/memory_hotplug...
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
  static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
  {
  	int nid = *(int *)arg;
  
  	/*
  	 * If a memory block belongs to multiple nodes, the stored nid is not
  	 * reliable. However, such blocks are always online (e.g., cannot get
  	 * offlined) and, therefore, are still spanned by the node.
  	 */
  	return mem->nid == nid ? -EEXIST : 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1590
1591
  /**
   * try_offline_node
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
1592
   * @nid: the node ID
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1593
1594
1595
1596
1597
1598
   *
   * Offline a node if all memory sections and cpus of the node are removed.
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call.
   */
90b30cdc1   Wen Congyang   memory-hotplug: e...
1599
  void try_offline_node(int nid)
60a5a19e7   Tang Chen   memory-hotplug: r...
1600
  {
d822b86a9   Wen Congyang   memory-hotplug: f...
1601
  	pg_data_t *pgdat = NODE_DATA(nid);
2c91f8fc6   David Hildenbrand   mm/memory_hotplug...
1602
  	int rc;
60a5a19e7   Tang Chen   memory-hotplug: r...
1603

2c91f8fc6   David Hildenbrand   mm/memory_hotplug...
1604
1605
1606
1607
1608
1609
1610
  	/*
  	 * If the node still spans pages (especially ZONE_DEVICE), don't
  	 * offline it. A node spans memory after move_pfn_range_to_zone(),
  	 * e.g., after the memory block was onlined.
  	 */
  	if (pgdat->node_spanned_pages)
  		return;
60a5a19e7   Tang Chen   memory-hotplug: r...
1611

2c91f8fc6   David Hildenbrand   mm/memory_hotplug...
1612
1613
1614
1615
1616
1617
1618
  	/*
  	 * Especially offline memory blocks might not be spanned by the
  	 * node. They will get spanned by the node once they get onlined.
  	 * However, they link to the node in sysfs and can get onlined later.
  	 */
  	rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb);
  	if (rc)
60a5a19e7   Tang Chen   memory-hotplug: r...
1619
  		return;
60a5a19e7   Tang Chen   memory-hotplug: r...
1620

46a3679b8   Michal Hocko   mm, memory_hotplu...
1621
  	if (check_cpu_on_node(pgdat))
60a5a19e7   Tang Chen   memory-hotplug: r...
1622
1623
1624
1625
1626
1627
1628
1629
1630
  		return;
  
  	/*
  	 * all memory/cpu of this node are removed, we can offline this
  	 * node now.
  	 */
  	node_set_offline(nid);
  	unregister_one_node(nid);
  }
90b30cdc1   Wen Congyang   memory-hotplug: e...
1631
  EXPORT_SYMBOL(try_offline_node);
60a5a19e7   Tang Chen   memory-hotplug: r...
1632

d9eb1417c   David Hildenbrand   mm/memory_hotplug...
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
  static void __release_memory_resource(resource_size_t start,
  				      resource_size_t size)
  {
  	int ret;
  
  	/*
  	 * When removing memory in the same granularity as it was added,
  	 * this function never fails. It might only fail if resources
  	 * have to be adjusted or split. We'll ignore the error, as
  	 * removing of memory cannot fail.
  	 */
  	ret = release_mem_region_adjustable(&iomem_resource, start, size);
  	if (ret) {
  		resource_size_t endres = start + size - 1;
  
  		pr_warn("Unable to release resource <%pa-%pa> (%d)
  ",
  			&start, &endres, ret);
  	}
  }
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1653
  static int __ref try_remove_memory(int nid, u64 start, u64 size)
bbc76be67   Wen Congyang   memory-hotplug: r...
1654
  {
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1655
  	int rc = 0;
993c1aad8   Wen Congyang   memory-hotplug: t...
1656

27356f54c   Toshi Kani   mm/hotplug: verif...
1657
  	BUG_ON(check_hotplug_memory_range(start, size));
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1658
  	/*
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1659
  	 * All memory blocks must be offlined before removing memory.  Check
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1660
  	 * whether all memory blocks in question are offline and return error
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1661
  	 * if this is not the case.
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1662
  	 */
fbcf73ce6   David Hildenbrand   mm/memory_hotplug...
1663
  	rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1664
1665
  	if (rc)
  		goto done;
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1666

46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1667
1668
  	/* remove memmap entry */
  	firmware_map_remove(start, start + size, "System RAM");
f9126ab92   Xishi Qiu   memory-hotplug: f...
1669
1670
  	memblock_free(start, size);
  	memblock_remove(start, size);
4c4b7f9ba   David Hildenbrand   mm/memory_hotplug...
1671

aab4189df   Dan Williams   mm/memory_hotplug...
1672
1673
1674
1675
  	/*
  	 * Memory block device removal under the device_hotplug_lock is
  	 * a barrier against racing online attempts.
  	 */
4c4b7f9ba   David Hildenbrand   mm/memory_hotplug...
1676
  	remove_memory_block_devices(start, size);
46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1677

aab4189df   Dan Williams   mm/memory_hotplug...
1678
  	mem_hotplug_begin();
2c2a5af6f   Oscar Salvador   mm, memory_hotplu...
1679
  	arch_remove_memory(nid, start, size, NULL);
d9eb1417c   David Hildenbrand   mm/memory_hotplug...
1680
  	__release_memory_resource(start, size);
24d335ca3   Wen Congyang   memory-hotplug: i...
1681

60a5a19e7   Tang Chen   memory-hotplug: r...
1682
  	try_offline_node(nid);
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1683
  done:
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1684
  	mem_hotplug_done();
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1685
  	return rc;
71088785c   Badari Pulavarty   mm: cleanup to ma...
1686
  }
d15e59260   David Hildenbrand   mm/memory_hotplug...
1687

eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
  /**
   * remove_memory
   * @nid: the node ID
   * @start: physical address of the region to remove
   * @size: size of the region to remove
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call, as required by
   * try_offline_node().
   */
  void __remove_memory(int nid, u64 start, u64 size)
  {
  
  	/*
29a90db92   Souptick Joarder   mm/memory_hotplug...
1702
  	 * trigger BUG() if some memory is not offlined prior to calling this
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
  	 * function
  	 */
  	if (try_remove_memory(nid, start, size))
  		BUG();
  }
  
  /*
   * Remove memory if every memory block is offline, otherwise return -EBUSY is
   * some memory is not offline
   */
  int remove_memory(int nid, u64 start, u64 size)
d15e59260   David Hildenbrand   mm/memory_hotplug...
1714
  {
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1715
  	int rc;
d15e59260   David Hildenbrand   mm/memory_hotplug...
1716
  	lock_device_hotplug();
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1717
  	rc  = try_remove_memory(nid, start, size);
d15e59260   David Hildenbrand   mm/memory_hotplug...
1718
  	unlock_device_hotplug();
eca499ab3   Pavel Tatashin   mm/hotplug: make ...
1719
1720
  
  	return rc;
d15e59260   David Hildenbrand   mm/memory_hotplug...
1721
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1722
  EXPORT_SYMBOL_GPL(remove_memory);
aba6efc47   Rafael J. Wysocki   Memory hotplug: M...
1723
  #endif /* CONFIG_MEMORY_HOTREMOVE */