Blame view

mm/memory_hotplug.c 50 KB
3947be196   Dave Hansen   [PATCH] memory ho...
1
2
3
4
5
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
6
7
  #include <linux/stddef.h>
  #include <linux/mm.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
8
  #include <linux/sched/signal.h>
3947be196   Dave Hansen   [PATCH] memory ho...
9
10
11
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
12
  #include <linux/compiler.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
13
  #include <linux/export.h>
3947be196   Dave Hansen   [PATCH] memory ho...
14
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
15
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
16
17
18
19
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
4b94ffdc4   Dan Williams   x86, mm: introduc...
20
  #include <linux/memremap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
21
22
23
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
24
  #include <linux/ioport.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
25
26
27
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
71088785c   Badari Pulavarty   mm: cleanup to ma...
28
  #include <linux/pfn.h>
6ad696d2c   Andi Kleen   mm: allow memory ...
29
  #include <linux/suspend.h>
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
30
  #include <linux/mm_inline.h>
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
31
  #include <linux/firmware-map.h>
60a5a19e7   Tang Chen   memory-hotplug: r...
32
  #include <linux/stop_machine.h>
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
33
  #include <linux/hugetlb.h>
c5320926e   Tang Chen   mem-hotplug: intr...
34
  #include <linux/memblock.h>
f784a3f19   Tang Chen   mem-hotplug: rese...
35
  #include <linux/bootmem.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
36
  #include <linux/compaction.h>
2c25071be   Michal Hocko   hwpoison, memory_...
37
  #include <linux/rmap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
38
39
  
  #include <asm/tlbflush.h>
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
40
  #include "internal.h"
9d0ad8ca4   Daniel Kiper   mm: extend memory...
41
42
43
44
45
46
47
48
49
50
  /*
   * online_page_callback contains pointer to current page onlining function.
   * Initially it is generic_online_page(). If it is required it could be
   * changed by calling set_online_page_callback() for callback registration
   * and restore_online_page_callback() for generic callback restore.
   */
  
  static void generic_online_page(struct page *page);
  
  static online_page_callback_t online_page_callback = generic_online_page;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
51
  static DEFINE_MUTEX(online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
52

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
53
  DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
54

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
55
56
57
58
  void get_online_mems(void)
  {
  	percpu_down_read(&mem_hotplug_lock);
  }
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
59

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
60
61
62
63
  void put_online_mems(void)
  {
  	percpu_up_read(&mem_hotplug_lock);
  }
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
64

4932381ee   Michal Hocko   mm, memory_hotplu...
65
  bool movable_node_enabled = false;
8604d9e53   Vitaly Kuznetsov   memory_hotplug: i...
66
  #ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
67
  bool memhp_auto_online;
8604d9e53   Vitaly Kuznetsov   memory_hotplug: i...
68
69
70
  #else
  bool memhp_auto_online = true;
  #endif
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
71
  EXPORT_SYMBOL_GPL(memhp_auto_online);
86dd995d6   Vitaly Kuznetsov   memory_hotplug: i...
72
73
74
75
76
77
78
79
80
81
  static int __init setup_memhp_default_state(char *str)
  {
  	if (!strcmp(str, "online"))
  		memhp_auto_online = true;
  	else if (!strcmp(str, "offline"))
  		memhp_auto_online = false;
  
  	return 1;
  }
  __setup("memhp_default_state=", setup_memhp_default_state);
30467e0b3   David Rientjes   mm, hotplug: fix ...
82
  void mem_hotplug_begin(void)
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
83
  {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
84
85
  	cpus_read_lock();
  	percpu_down_write(&mem_hotplug_lock);
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
86
  }
30467e0b3   David Rientjes   mm, hotplug: fix ...
87
  void mem_hotplug_done(void)
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
88
  {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
89
90
  	percpu_up_write(&mem_hotplug_lock);
  	cpus_read_unlock();
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
91
  }
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
92

45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
93
94
95
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
5042db43c   Jérôme Glisse   mm/ZONE_DEVICE: n...
96
  	struct resource *res, *conflict;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
97
  	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
98
99
  	if (!res)
  		return ERR_PTR(-ENOMEM);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
100
101
102
103
  
  	res->name = "System RAM";
  	res->start = start;
  	res->end = start + size - 1;
782b86641   Toshi Kani   xen, mm: Set IORE...
104
  	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
5042db43c   Jérôme Glisse   mm/ZONE_DEVICE: n...
105
106
107
108
109
110
111
112
  	conflict =  request_resource_conflict(&iomem_resource, res);
  	if (conflict) {
  		if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
  			pr_debug("Device unaddressable memory block "
  				 "memory hotplug at %#010llx !
  ",
  				 (unsigned long long)start);
  		}
4996eed86   Toshi Kani   mm/memory_hotplug...
113
114
  		pr_debug("System RAM resource %pR cannot be added
  ", res);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
115
  		kfree(res);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
116
  		return ERR_PTR(-EEXIST);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
117
118
119
120
121
122
123
124
125
126
127
128
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
  	return;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
129
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
130
131
  void get_page_bootmem(unsigned long info,  struct page *page,
  		      unsigned long type)
047532787   Yasunori Goto   memory hotplug: r...
132
  {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
133
  	page->freelist = (void *)type;
047532787   Yasunori Goto   memory hotplug: r...
134
135
  	SetPagePrivate(page);
  	set_page_private(page, info);
fe896d187   Joonsoo Kim   mm: introduce pag...
136
  	page_ref_inc(page);
047532787   Yasunori Goto   memory hotplug: r...
137
  }
170a5a7eb   Jiang Liu   mm: make __free_p...
138
  void put_page_bootmem(struct page *page)
047532787   Yasunori Goto   memory hotplug: r...
139
  {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
140
  	unsigned long type;
047532787   Yasunori Goto   memory hotplug: r...
141

ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
142
  	type = (unsigned long) page->freelist;
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
143
144
  	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
  	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
047532787   Yasunori Goto   memory hotplug: r...
145

fe896d187   Joonsoo Kim   mm: introduce pag...
146
  	if (page_ref_dec_return(page) == 1) {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
147
  		page->freelist = NULL;
047532787   Yasunori Goto   memory hotplug: r...
148
149
  		ClearPagePrivate(page);
  		set_page_private(page, 0);
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
150
  		INIT_LIST_HEAD(&page->lru);
170a5a7eb   Jiang Liu   mm: make __free_p...
151
  		free_reserved_page(page);
047532787   Yasunori Goto   memory hotplug: r...
152
  	}
047532787   Yasunori Goto   memory hotplug: r...
153
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
154
155
  #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
d92bc3185   Adrian Bunk   mm: make register...
156
  static void register_page_bootmem_info_section(unsigned long start_pfn)
047532787   Yasunori Goto   memory hotplug: r...
157
158
159
160
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
047532787   Yasunori Goto   memory hotplug: r...
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	/* Get section's memmap address */
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	/*
  	 * Get page for the memmap's phys address
  	 * XXX: need more consideration for sparse_vmemmap...
  	 */
  	page = virt_to_page(memmap);
  	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
  	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
  
  	/* remember memmap's page */
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, SECTION_INFO);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
af370fb8c   Yasunori Goto   memory hotplug: s...
185
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
047532787   Yasunori Goto   memory hotplug: r...
186
187
  
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
  #else /* CONFIG_SPARSEMEM_VMEMMAP */
  static void register_page_bootmem_info_section(unsigned long start_pfn)
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
  
  	if (!pfn_valid(start_pfn))
  		return;
  
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
047532787   Yasunori Goto   memory hotplug: r...
214

7ded384a1   Linus Torvalds   mm: fix section m...
215
  void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
047532787   Yasunori Goto   memory hotplug: r...
216
217
218
219
  {
  	unsigned long i, pfn, end_pfn, nr_pages;
  	int node = pgdat->node_id;
  	struct page *page;
047532787   Yasunori Goto   memory hotplug: r...
220
221
222
223
224
225
  
  	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
  	page = virt_to_page(pgdat);
  
  	for (i = 0; i < nr_pages; i++, page++)
  		get_page_bootmem(node, page, NODE_INFO);
047532787   Yasunori Goto   memory hotplug: r...
226
  	pfn = pgdat->node_start_pfn;
c1f194952   Cody P Schafer   mm/memory_hotplug...
227
  	end_pfn = pgdat_end_pfn(pgdat);
047532787   Yasunori Goto   memory hotplug: r...
228

7e9f5eb03   Tang Chen   mm/memory_hotplug...
229
  	/* register section info */
f14851af0   qiuxishi   memory hotplug: f...
230
231
232
233
234
  	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		/*
  		 * Some platforms can assign the same pfn to multiple nodes - on
  		 * node0 as well as nodeN.  To avoid registering a pfn against
  		 * multiple nodes we check that this pfn does not already
7e9f5eb03   Tang Chen   mm/memory_hotplug...
235
  		 * reside in some other nodes.
f14851af0   qiuxishi   memory hotplug: f...
236
  		 */
f65e91df2   Yang Shi   mm: use early_pfn...
237
  		if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
f14851af0   qiuxishi   memory hotplug: f...
238
239
  			register_page_bootmem_info_section(pfn);
  	}
047532787   Yasunori Goto   memory hotplug: r...
240
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
241
  #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
047532787   Yasunori Goto   memory hotplug: r...
242

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
243
244
  static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
  		bool want_memblock)
3947be196   Dave Hansen   [PATCH] memory ho...
245
  {
3947be196   Dave Hansen   [PATCH] memory ho...
246
  	int ret;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
247
  	int i;
3947be196   Dave Hansen   [PATCH] memory ho...
248

ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
249
250
  	if (pfn_valid(phys_start_pfn))
  		return -EEXIST;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
251
  	ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn);
3947be196   Dave Hansen   [PATCH] memory ho...
252
253
  	if (ret < 0)
  		return ret;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
254
255
256
257
258
259
260
261
262
263
264
  	/*
  	 * Make all the pages reserved so that nobody will stumble over half
  	 * initialized state.
  	 * FIXME: We also have to associate it with a node because pfn_to_node
  	 * relies on having page with the proper node.
  	 */
  	for (i = 0; i < PAGES_PER_SECTION; i++) {
  		unsigned long pfn = phys_start_pfn + i;
  		struct page *page;
  		if (!pfn_valid(pfn))
  			continue;
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
265

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
266
267
268
269
  		page = pfn_to_page(pfn);
  		set_page_node(page, nid);
  		SetPageReserved(page);
  	}
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
270

1b862aecf   Michal Hocko   mm, memory_hotplu...
271
272
  	if (!want_memblock)
  		return 0;
c04fc586c   Gary Hade   mm: show node to ...
273
  	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
3947be196   Dave Hansen   [PATCH] memory ho...
274
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
275
276
277
278
279
280
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
281
  int __ref __add_pages(int nid, unsigned long phys_start_pfn,
1b862aecf   Michal Hocko   mm, memory_hotplu...
282
  			unsigned long nr_pages, bool want_memblock)
4edd7ceff   David Rientjes   mm, hotplug: avoi...
283
284
285
286
  {
  	unsigned long i;
  	int err = 0;
  	int start_sec, end_sec;
4b94ffdc4   Dan Williams   x86, mm: introduc...
287
  	struct vmem_altmap *altmap;
4edd7ceff   David Rientjes   mm, hotplug: avoi...
288
289
290
  	/* during initialize mem_map, align hot-added range to section */
  	start_sec = pfn_to_section_nr(phys_start_pfn);
  	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
4b94ffdc4   Dan Williams   x86, mm: introduc...
291
292
293
294
295
296
297
298
299
  	altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
  	if (altmap) {
  		/*
  		 * Validate altmap is within bounds of the total request
  		 */
  		if (altmap->base_pfn != phys_start_pfn
  				|| vmem_altmap_offset(altmap) > nr_pages) {
  			pr_warn_once("memory add fail, invalid altmap
  ");
7cf91a98e   Joonsoo Kim   mm/compaction: sp...
300
301
  			err = -EINVAL;
  			goto out;
4b94ffdc4   Dan Williams   x86, mm: introduc...
302
303
304
  		}
  		altmap->alloc = 0;
  	}
4edd7ceff   David Rientjes   mm, hotplug: avoi...
305
  	for (i = start_sec; i <= end_sec; i++) {
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
306
  		err = __add_section(nid, section_nr_to_pfn(i), want_memblock);
4edd7ceff   David Rientjes   mm, hotplug: avoi...
307
308
309
310
311
312
313
314
315
  
  		/*
  		 * EEXIST is finally dealt with by ioresource collision
  		 * check. see add_memory() => register_memory_resource()
  		 * Warning will be printed if there is collision.
  		 */
  		if (err && (err != -EEXIST))
  			break;
  		err = 0;
f64ac5e6e   Michal Hocko   mm, memory_hotplu...
316
  		cond_resched();
4edd7ceff   David Rientjes   mm, hotplug: avoi...
317
  	}
c435a3905   Zhu Guihua   mm/memory hotplug...
318
  	vmemmap_populate_print_last();
7cf91a98e   Joonsoo Kim   mm/compaction: sp...
319
  out:
4edd7ceff   David Rientjes   mm, hotplug: avoi...
320
321
322
323
324
  	return err;
  }
  EXPORT_SYMBOL_GPL(__add_pages);
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
325
  /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
d09b0137d   YASUAKI ISHIMATSU   mm/memory_hotplug...
326
  static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
  				     unsigned long start_pfn,
  				     unsigned long end_pfn)
  {
  	struct mem_section *ms;
  
  	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(start_pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(start_pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(start_pfn)))
  			continue;
  
  		return start_pfn;
  	}
  
  	return 0;
  }
  
  /* find the biggest valid pfn in the range [start_pfn, end_pfn). */
d09b0137d   YASUAKI ISHIMATSU   mm/memory_hotplug...
351
  static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
  				    unsigned long start_pfn,
  				    unsigned long end_pfn)
  {
  	struct mem_section *ms;
  	unsigned long pfn;
  
  	/* pfn is the end pfn of a memory section. */
  	pfn = end_pfn - 1;
  	for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(pfn)))
  			continue;
  
  		return pfn;
  	}
  
  	return 0;
  }
  
  static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
  			     unsigned long end_pfn)
  {
c33bc315f   Xishi Qiu   mm: use zone_end_...
381
382
383
  	unsigned long zone_start_pfn = zone->zone_start_pfn;
  	unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
  	unsigned long zone_end_pfn = z;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = zone_to_nid(zone);
  
  	zone_span_writelock(zone);
  	if (zone_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the zone, it need
  		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, zone, end_pfn,
  						zone_end_pfn);
  		if (pfn) {
  			zone->zone_start_pfn = pfn;
  			zone->spanned_pages = zone_end_pfn - pfn;
  		}
  	} else if (zone_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the zone, it need
  		 * shrink zone->spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
  					       start_pfn);
  		if (pfn)
  			zone->spanned_pages = pfn - zone_start_pfn + 1;
  	}
  
  	/*
  	 * The section is not biggest or smallest mem_section in the zone, it
  	 * only creates a hole in the zone. So in this case, we need not
  	 * change the zone. But perhaps, the zone has only hole data. Thus
  	 * it check the zone has only hole or not.
  	 */
  	pfn = zone_start_pfn;
  	for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (page_zone(pfn_to_page(pfn)) != zone)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		zone_span_writeunlock(zone);
  		return;
  	}
  
  	/* The zone has no valid section */
  	zone->zone_start_pfn = 0;
  	zone->spanned_pages = 0;
  	zone_span_writeunlock(zone);
  }
  
  static void shrink_pgdat_span(struct pglist_data *pgdat,
  			      unsigned long start_pfn, unsigned long end_pfn)
  {
83285c72e   Xishi Qiu   mm: use pgdat_end...
449
450
451
  	unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
  	unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
  	unsigned long pgdat_end_pfn = p;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = pgdat->node_id;
  
  	if (pgdat_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the pgdat, it need
  		 * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
  						pgdat_end_pfn);
  		if (pfn) {
  			pgdat->node_start_pfn = pfn;
  			pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
  		}
  	} else if (pgdat_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the pgdat, it need
  		 * shrink pgdat->node_spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
  					       start_pfn);
  		if (pfn)
  			pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
  	}
  
  	/*
  	 * If the section is not biggest or smallest mem_section in the pgdat,
  	 * it only creates a hole in the pgdat. So in this case, we need not
  	 * change the pgdat.
  	 * But perhaps, the pgdat has only hole data. Thus it check the pgdat
  	 * has only hole or not.
  	 */
  	pfn = pgdat_start_pfn;
  	for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		return;
  	}
  
  	/* The pgdat has no valid section */
  	pgdat->node_start_pfn = 0;
  	pgdat->node_spanned_pages = 0;
  }
  
  static void __remove_zone(struct zone *zone, unsigned long start_pfn)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
516
  	unsigned long flags;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
517
518
519
520
521
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
  	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
  }
4b94ffdc4   Dan Williams   x86, mm: introduc...
522
523
  static int __remove_section(struct zone *zone, struct mem_section *ms,
  		unsigned long map_offset)
ea01ea937   Badari Pulavarty   hotplug memory re...
524
  {
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
525
526
  	unsigned long start_pfn;
  	int scn_nr;
ea01ea937   Badari Pulavarty   hotplug memory re...
527
528
529
530
531
532
533
534
  	int ret = -EINVAL;
  
  	if (!valid_section(ms))
  		return ret;
  
  	ret = unregister_memory_section(ms);
  	if (ret)
  		return ret;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
535
  	scn_nr = __section_nr(ms);
1dd2bfc86   YASUAKI ISHIMATSU   mm/memory_hotplug...
536
  	start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
537
  	__remove_zone(zone, start_pfn);
4b94ffdc4   Dan Williams   x86, mm: introduc...
538
  	sparse_remove_one_section(zone, ms, map_offset);
ea01ea937   Badari Pulavarty   hotplug memory re...
539
540
  	return 0;
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
541
542
543
544
545
546
547
548
549
550
551
552
553
554
  /**
   * __remove_pages() - remove sections of pages from a zone
   * @zone: zone from which pages need to be removed
   * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
   * @nr_pages: number of pages to remove (must be multiple of section size)
   *
   * Generic helper function to remove section mappings and sysfs entries
   * for the section of the memory we are removing. Caller needs to make
   * sure that pages are marked reserved and zones are adjust properly by
   * calling offline_pages().
   */
  int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
  		 unsigned long nr_pages)
  {
fe74ebb10   Toshi Kani   mm: change __remo...
555
  	unsigned long i;
4b94ffdc4   Dan Williams   x86, mm: introduc...
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
  	unsigned long map_offset = 0;
  	int sections_to_remove, ret = 0;
  
  	/* In the ZONE_DEVICE case device driver owns the memory region */
  	if (is_dev_zone(zone)) {
  		struct page *page = pfn_to_page(phys_start_pfn);
  		struct vmem_altmap *altmap;
  
  		altmap = to_vmem_altmap((unsigned long) page);
  		if (altmap)
  			map_offset = vmem_altmap_offset(altmap);
  	} else {
  		resource_size_t start, size;
  
  		start = phys_start_pfn << PAGE_SHIFT;
  		size = nr_pages * PAGE_SIZE;
  
  		ret = release_mem_region_adjustable(&iomem_resource, start,
  					size);
  		if (ret) {
  			resource_size_t endres = start + size - 1;
  
  			pr_warn("Unable to release resource <%pa-%pa> (%d)
  ",
  					&start, &endres, ret);
  		}
  	}
ea01ea937   Badari Pulavarty   hotplug memory re...
583

7cf91a98e   Joonsoo Kim   mm/compaction: sp...
584
  	clear_zone_contiguous(zone);
ea01ea937   Badari Pulavarty   hotplug memory re...
585
586
587
588
589
  	/*
  	 * We can only remove entire sections
  	 */
  	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
  	BUG_ON(nr_pages % PAGES_PER_SECTION);
ea01ea937   Badari Pulavarty   hotplug memory re...
590
591
592
  	sections_to_remove = nr_pages / PAGES_PER_SECTION;
  	for (i = 0; i < sections_to_remove; i++) {
  		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
4b94ffdc4   Dan Williams   x86, mm: introduc...
593
594
595
  
  		ret = __remove_section(zone, __pfn_to_section(pfn), map_offset);
  		map_offset = 0;
ea01ea937   Badari Pulavarty   hotplug memory re...
596
597
598
  		if (ret)
  			break;
  	}
7cf91a98e   Joonsoo Kim   mm/compaction: sp...
599
600
  
  	set_zone_contiguous(zone);
ea01ea937   Badari Pulavarty   hotplug memory re...
601
602
  	return ret;
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
603
  #endif /* CONFIG_MEMORY_HOTREMOVE */
ea01ea937   Badari Pulavarty   hotplug memory re...
604

9d0ad8ca4   Daniel Kiper   mm: extend memory...
605
606
607
  int set_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
608
609
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
610
611
612
613
614
  
  	if (online_page_callback == generic_online_page) {
  		online_page_callback = callback;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
615
616
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
617
618
619
620
621
622
623
624
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(set_online_page_callback);
  
  int restore_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
625
626
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
627
628
629
630
631
  
  	if (online_page_callback == callback) {
  		online_page_callback = generic_online_page;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
632
633
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
634
635
636
637
638
639
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(restore_online_page_callback);
  
  void __online_page_set_limits(struct page *page)
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
640
  {
9d0ad8ca4   Daniel Kiper   mm: extend memory...
641
642
643
644
645
  }
  EXPORT_SYMBOL_GPL(__online_page_set_limits);
  
  void __online_page_increment_counters(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
646
  	adjust_managed_page_count(page, 1);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
647
648
  }
  EXPORT_SYMBOL_GPL(__online_page_increment_counters);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
649

9d0ad8ca4   Daniel Kiper   mm: extend memory...
650
651
  void __online_page_free(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
652
  	__free_reserved_page(page);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
653
  }
9d0ad8ca4   Daniel Kiper   mm: extend memory...
654
655
656
657
658
659
660
661
  EXPORT_SYMBOL_GPL(__online_page_free);
  
  static void generic_online_page(struct page *page)
  {
  	__online_page_set_limits(page);
  	__online_page_increment_counters(page);
  	__online_page_free(page);
  }
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
662

75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
663
664
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
665
666
  {
  	unsigned long i;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
667
668
  	unsigned long onlined_pages = *(unsigned long *)arg;
  	struct page *page;
2d070eab2   Michal Hocko   mm: consider zone...
669

75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
670
671
672
  	if (PageReserved(pfn_to_page(start_pfn)))
  		for (i = 0; i < nr_pages; i++) {
  			page = pfn_to_page(start_pfn + i);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
673
  			(*online_page_callback)(page);
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
674
675
  			onlined_pages++;
  		}
2d070eab2   Michal Hocko   mm: consider zone...
676
677
  
  	online_mem_sections(start_pfn, start_pfn + nr_pages);
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
678
679
680
  	*(unsigned long *)arg = onlined_pages;
  	return 0;
  }
d9713679d   Lai Jiangshan   memory_hotplug: f...
681
682
683
684
685
686
687
688
  /* check which state of node_states will be changed when online memory */
  static void node_states_check_changes_online(unsigned long nr_pages,
  	struct zone *zone, struct memory_notify *arg)
  {
  	int nid = zone_to_nid(zone);
  	enum zone_type zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
689
690
691
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
692
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
693
694
695
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
696
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
697
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
698
699
700
701
702
703
704
705
706
707
708
709
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * if the memory to be online is in a zone of 0...zone_last, and
  	 * the zones of 0...zone_last don't have memory before online, we will
  	 * need to set the node to node_states[N_NORMAL_MEMORY] after
  	 * the memory is online.
  	 */
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_NORMAL_MEMORY))
  		arg->status_change_nid_normal = nid;
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_HIGH_MEMORY))
  		arg->status_change_nid_high = nid;
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
731
732
  	/*
  	 * if the node don't have memory befor online, we will need to
6715ddf94   Lai Jiangshan   hotplug: update n...
733
  	 * set the node to node_states[N_MEMORY] after the memory
d9713679d   Lai Jiangshan   memory_hotplug: f...
734
735
  	 * is online.
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
736
  	if (!node_state(nid, N_MEMORY))
d9713679d   Lai Jiangshan   memory_hotplug: f...
737
738
739
740
741
742
743
744
745
  		arg->status_change_nid = nid;
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_set_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_set_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
746
747
748
749
  	if (arg->status_change_nid_high >= 0)
  		node_set_state(node, N_HIGH_MEMORY);
  
  	node_set_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
750
  }
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
  static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
  		unsigned long nr_pages)
  {
  	unsigned long old_end_pfn = zone_end_pfn(zone);
  
  	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
  		zone->zone_start_pfn = start_pfn;
  
  	zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
  }
  
  static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
                                       unsigned long nr_pages)
  {
  	unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
  
  	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
  		pgdat->node_start_pfn = start_pfn;
  
  	pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
  }
cdf72f250   Michal Hocko   mm, memory_hotplu...
772
  void __ref move_pfn_range_to_zone(struct zone *zone,
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
773
774
775
776
777
  		unsigned long start_pfn, unsigned long nr_pages)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nid = pgdat->node_id;
  	unsigned long flags;
df429ac03   Reza Arbab   memory-hotplug: m...
778

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
779
780
  	if (zone_is_empty(zone))
  		init_currently_empty_zone(zone, start_pfn, nr_pages);
df429ac03   Reza Arbab   memory-hotplug: m...
781

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
  	clear_zone_contiguous(zone);
  
  	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
  	pgdat_resize_lock(pgdat, &flags);
  	zone_span_writelock(zone);
  	resize_zone_range(zone, start_pfn, nr_pages);
  	zone_span_writeunlock(zone);
  	resize_pgdat_range(pgdat, start_pfn, nr_pages);
  	pgdat_resize_unlock(pgdat, &flags);
  
  	/*
  	 * TODO now we have a visible range of pages which are not associated
  	 * with their zone properly. Not nice but set_pfnblock_flags_mask
  	 * expects the zone spans the pfn range. All the pages in the range
  	 * are reserved so nobody should be touching them so we should be safe
  	 */
  	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG);
  
  	set_zone_contiguous(zone);
  }
  
  /*
c246a213f   Michal Hocko   mm, memory_hotplu...
804
805
806
807
   * Returns a default kernel memory zone for the given pfn range.
   * If no kernel zone covers this pfn range it will automatically go
   * to the ZONE_NORMAL.
   */
c6f03e290   Michal Hocko   mm, memory_hotplu...
808
  static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
c246a213f   Michal Hocko   mm, memory_hotplu...
809
810
811
812
813
814
815
816
817
818
819
820
821
822
  		unsigned long nr_pages)
  {
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int zid;
  
  	for (zid = 0; zid <= ZONE_NORMAL; zid++) {
  		struct zone *zone = &pgdat->node_zones[zid];
  
  		if (zone_intersects(zone, start_pfn, nr_pages))
  			return zone;
  	}
  
  	return &pgdat->node_zones[ZONE_NORMAL];
  }
c6f03e290   Michal Hocko   mm, memory_hotplu...
823
824
  static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
  		unsigned long nr_pages)
e5e689302   Michal Hocko   mm, memory_hotplu...
825
  {
c6f03e290   Michal Hocko   mm, memory_hotplu...
826
827
828
829
830
  	struct zone *kernel_zone = default_kernel_zone_for_pfn(nid, start_pfn,
  			nr_pages);
  	struct zone *movable_zone = &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
  	bool in_kernel = zone_intersects(kernel_zone, start_pfn, nr_pages);
  	bool in_movable = zone_intersects(movable_zone, start_pfn, nr_pages);
e5e689302   Michal Hocko   mm, memory_hotplu...
831
832
  
  	/*
c6f03e290   Michal Hocko   mm, memory_hotplu...
833
834
  	 * We inherit the existing zone in a simple case where zones do not
  	 * overlap in the given range
e5e689302   Michal Hocko   mm, memory_hotplu...
835
  	 */
c6f03e290   Michal Hocko   mm, memory_hotplu...
836
837
  	if (in_kernel ^ in_movable)
  		return (in_kernel) ? kernel_zone : movable_zone;
9f123ab54   Michal Hocko   mm, memory_hotplu...
838

c6f03e290   Michal Hocko   mm, memory_hotplu...
839
840
841
842
843
844
  	/*
  	 * If the range doesn't belong to any zone or two zones overlap in the
  	 * given range then we use movable zone only if movable_node is
  	 * enabled because we always online to a kernel zone by default.
  	 */
  	return movable_node_enabled ? movable_zone : kernel_zone;
9f123ab54   Michal Hocko   mm, memory_hotplu...
845
  }
e5e689302   Michal Hocko   mm, memory_hotplu...
846
847
  struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
  		unsigned long nr_pages)
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
848
  {
c6f03e290   Michal Hocko   mm, memory_hotplu...
849
850
  	if (online_type == MMOP_ONLINE_KERNEL)
  		return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
851

c6f03e290   Michal Hocko   mm, memory_hotplu...
852
853
  	if (online_type == MMOP_ONLINE_MOVABLE)
  		return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
df429ac03   Reza Arbab   memory-hotplug: m...
854

c6f03e290   Michal Hocko   mm, memory_hotplu...
855
  	return default_zone_for_pfn(nid, start_pfn, nr_pages);
e5e689302   Michal Hocko   mm, memory_hotplu...
856
857
858
859
860
861
862
863
864
865
866
867
  }
  
  /*
   * Associates the given pfn range with the given node and the zone appropriate
   * for the given online type.
   */
  static struct zone * __meminit move_pfn_range(int online_type, int nid,
  		unsigned long start_pfn, unsigned long nr_pages)
  {
  	struct zone *zone;
  
  	zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
868
869
  	move_pfn_range_to_zone(zone, start_pfn, nr_pages);
  	return zone;
df429ac03   Reza Arbab   memory-hotplug: m...
870
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
871

b93e0f329   Michal Hocko   mm, memory_hotplu...
872
  /* Must be protected by mem_hotplug_begin() or a device_lock */
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
873
  int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
874
  {
aa47228a1   Cody P Schafer   memory_hotplug: u...
875
  	unsigned long flags;
3947be196   Dave Hansen   [PATCH] memory ho...
876
877
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
878
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
879
880
881
  	int nid;
  	int ret;
  	struct memory_notify arg;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
882
  	nid = pfn_to_nid(pfn);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
883
884
  	/* associate pfn range with the zone */
  	zone = move_pfn_range(online_type, nid, pfn, nr_pages);
7b78d335a   Yasunori Goto   memory hotplug: r...
885
886
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
887
  	node_states_check_changes_online(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
888

7b78d335a   Yasunori Goto   memory hotplug: r...
889
890
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
e33e33b4d   Chen Yucong   mm, memory hotplu...
891
892
  	if (ret)
  		goto failed_addition;
3947be196   Dave Hansen   [PATCH] memory ho...
893
  	/*
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
894
895
896
897
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
6dcd73d70   Wen Congyang   memory-hotplug: a...
898
  	if (!populated_zone(zone)) {
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
899
  		need_zonelists_rebuild = 1;
72675e131   Michal Hocko   mm, memory_hotplu...
900
  		setup_zone_pageset(zone);
6dcd73d70   Wen Congyang   memory-hotplug: a...
901
  	}
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
902

908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
903
  	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
904
  		online_pages_range);
fd8a4221a   Geoff Levand   memory_hotplug: c...
905
  	if (ret) {
6dcd73d70   Wen Congyang   memory-hotplug: a...
906
907
  		if (need_zonelists_rebuild)
  			zone_pcp_reset(zone);
e33e33b4d   Chen Yucong   mm, memory hotplu...
908
  		goto failed_addition;
fd8a4221a   Geoff Levand   memory_hotplug: c...
909
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
910
  	zone->present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
911
912
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
f2937be58   Yasunori Goto   [PATCH] memory ho...
913
  	zone->zone_pgdat->node_present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
914
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
915
  	if (onlined_pages) {
e888ca354   Vlastimil Babka   mm, memory hotplu...
916
  		node_states_set_node(nid, &arg);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
917
  		if (need_zonelists_rebuild)
72675e131   Michal Hocko   mm, memory_hotplu...
918
  			build_all_zonelists(NULL);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
919
920
921
  		else
  			zone_pcp_update(zone);
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
922

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
923
  	init_per_zone_wmark_min();
698b1b306   Vlastimil Babka   mm, compaction: i...
924
  	if (onlined_pages) {
e888ca354   Vlastimil Babka   mm, memory hotplu...
925
  		kswapd_run(nid);
698b1b306   Vlastimil Babka   mm, compaction: i...
926
927
  		kcompactd_run(nid);
  	}
61b13993a   Dave Hansen   [PATCH] memory ho...
928

1f522509c   Haicheng Li   mem-hotplug: avoi...
929
  	vm_total_pages = nr_free_pagecache_pages();
2f7f24eca   Kent Liu   memory-hotplug: d...
930

2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
931
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
932
933
934
  
  	if (onlined_pages)
  		memory_notify(MEM_ONLINE, &arg);
30467e0b3   David Rientjes   mm, hotplug: fix ...
935
  	return 0;
e33e33b4d   Chen Yucong   mm, memory hotplu...
936
937
938
939
940
941
942
943
  
  failed_addition:
  	pr_debug("online_pages [mem %#010llx-%#010llx] failed
  ",
  		 (unsigned long long) pfn << PAGE_SHIFT,
  		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
  	memory_notify(MEM_CANCEL_ONLINE, &arg);
  	return ret;
3947be196   Dave Hansen   [PATCH] memory ho...
944
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
945
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
946

0bd854200   Tang Chen   mem-hotplug: rese...
947
948
949
950
951
952
953
954
955
  static void reset_node_present_pages(pg_data_t *pgdat)
  {
  	struct zone *z;
  
  	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
  		z->present_pages = 0;
  
  	pgdat->node_present_pages = 0;
  }
e13193319   Hidetoshi Seto   mm/memory_hotplug...
956
957
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
958
959
960
961
  {
  	struct pglist_data *pgdat;
  	unsigned long zones_size[MAX_NR_ZONES] = {0};
  	unsigned long zholes_size[MAX_NR_ZONES] = {0};
c8e861a53   Fabian Frederick   mm/memory_hotplug...
962
  	unsigned long start_pfn = PFN_DOWN(start);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
963

a1e565aa3   Tang Chen   memory-hotplug: d...
964
965
966
967
968
  	pgdat = NODE_DATA(nid);
  	if (!pgdat) {
  		pgdat = arch_alloc_nodedata(nid);
  		if (!pgdat)
  			return NULL;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
969

a1e565aa3   Tang Chen   memory-hotplug: d...
970
  		arch_refresh_nodedata(nid, pgdat);
b0dc3a342   Gu Zheng   mm/memory hotplug...
971
  	} else {
e716f2eb2   Mel Gorman   mm, vmscan: preve...
972
973
974
975
976
  		/*
  		 * Reset the nr_zones, order and classzone_idx before reuse.
  		 * Note that kswapd will init kswapd_classzone_idx properly
  		 * when it starts in the near future.
  		 */
b0dc3a342   Gu Zheng   mm/memory hotplug...
977
  		pgdat->nr_zones = 0;
38087d9b0   Mel Gorman   mm, vmscan: simpl...
978
979
  		pgdat->kswapd_order = 0;
  		pgdat->kswapd_classzone_idx = 0;
a1e565aa3   Tang Chen   memory-hotplug: d...
980
  	}
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
981
982
983
984
  
  	/* we can use NODE_DATA(nid) from here */
  
  	/* init node's zones as empty zones, we don't have any present pages.*/
9109fb7b3   Johannes Weiner   mm: drop unneeded...
985
  	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
5830169f4   Reza Arbab   mm/memory_hotplug...
986
  	pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
987

959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
988
989
990
991
  	/*
  	 * The node we allocated has no zone fallback lists. For avoiding
  	 * to access not-initialized zonelist, build here.
  	 */
72675e131   Michal Hocko   mm, memory_hotplu...
992
  	build_all_zonelists(pgdat);
959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
993

f784a3f19   Tang Chen   mem-hotplug: rese...
994
995
996
997
998
999
1000
  	/*
  	 * zone->managed_pages is set to an approximate value in
  	 * free_area_init_core(), which will cause
  	 * /sys/device/system/node/nodeX/meminfo has wrong data.
  	 * So reset it to 0 before any memory is onlined.
  	 */
  	reset_node_managed_pages(pgdat);
0bd854200   Tang Chen   mem-hotplug: rese...
1001
1002
1003
1004
1005
1006
  	/*
  	 * When memory is hot-added, all the memory is in offline state. So
  	 * clear all zones' present_pages because they will be updated in
  	 * online_pages() and offline_pages().
  	 */
  	reset_node_present_pages(pgdat);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1007
1008
1009
1010
1011
1012
  	return pgdat;
  }
  
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
  	arch_refresh_nodedata(nid, NULL);
5830169f4   Reza Arbab   mm/memory_hotplug...
1013
  	free_percpu(pgdat->per_cpu_nodestats);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1014
1015
1016
  	arch_free_nodedata(pgdat);
  	return;
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
1017

01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1018
1019
1020
  /**
   * try_online_node - online a node if offlined
   *
cf23422b9   minskey guo   cpu/mem hotplug: ...
1021
1022
   * called by cpu_up() to online a node without onlined memory.
   */
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1023
  int try_online_node(int nid)
cf23422b9   minskey guo   cpu/mem hotplug: ...
1024
1025
1026
  {
  	pg_data_t	*pgdat;
  	int	ret;
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1027
1028
  	if (node_online(nid))
  		return 0;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1029
  	mem_hotplug_begin();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1030
  	pgdat = hotadd_new_pgdat(nid, 0);
7553e8f2d   David Rientjes   mm, hotplug: fix ...
1031
  	if (!pgdat) {
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1032
1033
  		pr_err("Cannot online node %d due to NULL pgdat
  ", nid);
cf23422b9   minskey guo   cpu/mem hotplug: ...
1034
1035
1036
1037
1038
1039
  		ret = -ENOMEM;
  		goto out;
  	}
  	node_set_online(nid);
  	ret = register_one_node(nid);
  	BUG_ON(ret);
cf23422b9   minskey guo   cpu/mem hotplug: ...
1040
  out:
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1041
  	mem_hotplug_done();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1042
1043
  	return ret;
  }
27356f54c   Toshi Kani   mm/hotplug: verif...
1044
1045
  static int check_hotplug_memory_range(u64 start, u64 size)
  {
c8e861a53   Fabian Frederick   mm/memory_hotplug...
1046
  	u64 start_pfn = PFN_DOWN(start);
27356f54c   Toshi Kani   mm/hotplug: verif...
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
  	u64 nr_pages = size >> PAGE_SHIFT;
  
  	/* Memory range must be aligned with section */
  	if ((start_pfn & ~PAGE_SECTION_MASK) ||
  	    (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
  		pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx
  ",
  				(unsigned long long)start,
  				(unsigned long long)size);
  		return -EINVAL;
  	}
  
  	return 0;
  }
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1061
1062
  static int online_memory_block(struct memory_block *mem, void *arg)
  {
dc18d706a   Nathan Fontenot   memory-hotplug: u...
1063
  	return device_online(&mem->dev);
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1064
  }
31168481c   Al Viro   meminit section w...
1065
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1066
  int __ref add_memory_resource(int nid, struct resource *res, bool online)
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1067
  {
62cedb9f1   David Vrabel   mm: memory hotplu...
1068
  	u64 start, size;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1069
  	pg_data_t *pgdat = NULL;
a1e565aa3   Tang Chen   memory-hotplug: d...
1070
1071
  	bool new_pgdat;
  	bool new_node;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1072
  	int ret;
62cedb9f1   David Vrabel   mm: memory hotplu...
1073
1074
  	start = res->start;
  	size = resource_size(res);
27356f54c   Toshi Kani   mm/hotplug: verif...
1075
1076
1077
  	ret = check_hotplug_memory_range(start, size);
  	if (ret)
  		return ret;
a1e565aa3   Tang Chen   memory-hotplug: d...
1078
1079
1080
1081
  	{	/* Stupid hack to suppress address-never-null warning */
  		void *p = NODE_DATA(nid);
  		new_pgdat = !p;
  	}
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1082

bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1083
  	mem_hotplug_begin();
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1084

7f36e3e56   Tang Chen   memory-hotplug: a...
1085
1086
1087
1088
1089
1090
1091
  	/*
  	 * Add new range to memblock so that when hotadd_new_pgdat() is called
  	 * to allocate new pgdat, get_pfn_range_for_nid() will be able to find
  	 * this new range and calculate total pages correctly.  The range will
  	 * be removed at hot-remove time.
  	 */
  	memblock_add_node(start, size, nid);
a1e565aa3   Tang Chen   memory-hotplug: d...
1092
1093
  	new_node = !node_online(nid);
  	if (new_node) {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1094
  		pgdat = hotadd_new_pgdat(nid, start);
6ad696d2c   Andi Kleen   mm: allow memory ...
1095
  		ret = -ENOMEM;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1096
  		if (!pgdat)
41b9e2d7e   Wen Congyang   mm/memory_hotplug...
1097
  			goto error;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1098
  	}
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1099
  	/* call arch's memory hotadd */
3d79a728f   Michal Hocko   mm, memory_hotplu...
1100
  	ret = arch_add_memory(nid, start, size, true);
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1101

9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1102
1103
  	if (ret < 0)
  		goto error;
0fc44159b   Yasunori Goto   [PATCH] Register ...
1104
  	/* we online node here. we can't roll back from here. */
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1105
  	node_set_online(nid);
a1e565aa3   Tang Chen   memory-hotplug: d...
1106
  	if (new_node) {
9037a9934   Michal Hocko   mm, memory_hotplu...
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
  		unsigned long start_pfn = start >> PAGE_SHIFT;
  		unsigned long nr_pages = size >> PAGE_SHIFT;
  
  		ret = __register_one_node(nid);
  		if (ret)
  			goto register_fail;
  
  		/*
  		 * link memory sections under this node. This is already
  		 * done when creatig memory section in register_new_memory
  		 * but that depends to have the node registered so offline
  		 * nodes have to go through register_node.
  		 * TODO clean up this mess.
  		 */
  		ret = link_mem_sections(nid, start_pfn, nr_pages);
  register_fail:
0fc44159b   Yasunori Goto   [PATCH] Register ...
1123
1124
1125
1126
1127
1128
1129
  		/*
  		 * If sysfs file of new node can't create, cpu on the node
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
  		 */
  		BUG_ON(ret);
  	}
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
1130
1131
  	/* create new memmap entry */
  	firmware_map_add_hotplug(start, start + size, "System RAM");
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1132
1133
1134
1135
  	/* online pages if requested */
  	if (online)
  		walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
  				  NULL, online_memory_block);
6ad696d2c   Andi Kleen   mm: allow memory ...
1136
  	goto out;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1137
1138
  error:
  	/* rollback pgdat allocation and others */
dbac61a3f   Gustavo A. R. Silva   mm/memory_hotplug...
1139
  	if (new_pgdat && pgdat)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1140
  		rollback_node_hotadd(nid, pgdat);
7f36e3e56   Tang Chen   memory-hotplug: a...
1141
  	memblock_remove(start, size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1142

6ad696d2c   Andi Kleen   mm: allow memory ...
1143
  out:
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1144
  	mem_hotplug_done();
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1145
1146
  	return ret;
  }
62cedb9f1   David Vrabel   mm: memory hotplu...
1147
1148
1149
1150
1151
1152
1153
1154
  EXPORT_SYMBOL_GPL(add_memory_resource);
  
  int __ref add_memory(int nid, u64 start, u64 size)
  {
  	struct resource *res;
  	int ret;
  
  	res = register_memory_resource(start, size);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
1155
1156
  	if (IS_ERR(res))
  		return PTR_ERR(res);
62cedb9f1   David Vrabel   mm: memory hotplu...
1157

31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1158
  	ret = add_memory_resource(nid, res, memhp_auto_online);
62cedb9f1   David Vrabel   mm: memory hotplu...
1159
1160
1161
1162
  	if (ret < 0)
  		release_memory_resource(res);
  	return ret;
  }
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1163
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1164
1165
1166
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
   * set and the size of the free page is given by page_order(). Using this,
   * the function determines if the pageblock contains only free pages.
   * Due to buddy contraints, a free page at least the size of a pageblock will
   * be located at the start of the pageblock
   */
  static inline int pageblock_free(struct page *page)
  {
  	return PageBuddy(page) && page_order(page) >= pageblock_order;
  }
  
  /* Return the start of the next active pageblock after a given page */
  static struct page *next_active_pageblock(struct page *page)
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1181
1182
  	/* Ensure the starting page is pageblock-aligned */
  	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1183
  	/* If the entire pageblock is free, move to the end of free page */
0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1184
1185
1186
1187
1188
1189
1190
  	if (pageblock_free(page)) {
  		int order;
  		/* be careful. we don't have locks, page_order can be changed.*/
  		order = page_order(page);
  		if ((order < MAX_ORDER) && (order >= pageblock_order))
  			return page + (1 << order);
  	}
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1191

0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1192
  	return page + pageblock_nr_pages;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1193
1194
1195
  }
  
  /* Checks if this range of memory is likely to be hot-removable. */
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1196
  bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1197
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1198
1199
1200
1201
1202
  	struct page *page = pfn_to_page(start_pfn);
  	struct page *end_page = page + nr_pages;
  
  	/* Check the starting page of each pageblock within the range */
  	for (; page < end_page; page = next_active_pageblock(page)) {
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1203
  		if (!is_pageblock_removable_nolock(page))
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1204
  			return false;
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1205
  		cond_resched();
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1206
1207
1208
  	}
  
  	/* All pageblocks in the memory block are likely to be hot-removable */
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1209
  	return true;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1210
1211
1212
  }
  
  /*
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1213
   * Confirm all pages in a range [start, end) belong to the same zone.
a96dfddbc   Toshi Kani   base/memory, hotp...
1214
   * When true, return its valid [start, end).
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1215
   */
a96dfddbc   Toshi Kani   base/memory, hotp...
1216
1217
  int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
  			 unsigned long *valid_start, unsigned long *valid_end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1218
  {
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1219
  	unsigned long pfn, sec_end_pfn;
a96dfddbc   Toshi Kani   base/memory, hotp...
1220
  	unsigned long start, end;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1221
1222
1223
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1224
  	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1225
  	     pfn < end_pfn;
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1226
  	     pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1227
1228
  		/* Make sure the memory section is present first */
  		if (!present_section_nr(pfn_to_section_nr(pfn)))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1229
  			continue;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1230
1231
1232
1233
1234
1235
1236
  		for (; pfn < sec_end_pfn && pfn < end_pfn;
  		     pfn += MAX_ORDER_NR_PAGES) {
  			i = 0;
  			/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  			while ((i < MAX_ORDER_NR_PAGES) &&
  				!pfn_valid_within(pfn + i))
  				i++;
d6d8c8a48   zhong jiang   mm/memory_hotplug...
1237
  			if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1238
1239
1240
1241
  				continue;
  			page = pfn_to_page(pfn + i);
  			if (zone && page_zone(page) != zone)
  				return 0;
a96dfddbc   Toshi Kani   base/memory, hotp...
1242
1243
  			if (!zone)
  				start = pfn + i;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1244
  			zone = page_zone(page);
a96dfddbc   Toshi Kani   base/memory, hotp...
1245
  			end = pfn + MAX_ORDER_NR_PAGES;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1246
  		}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1247
  	}
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1248

a96dfddbc   Toshi Kani   base/memory, hotp...
1249
1250
  	if (zone) {
  		*valid_start = start;
d6d8c8a48   zhong jiang   mm/memory_hotplug...
1251
  		*valid_end = min(end, end_pfn);
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1252
  		return 1;
a96dfddbc   Toshi Kani   base/memory, hotp...
1253
  	} else {
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1254
  		return 0;
a96dfddbc   Toshi Kani   base/memory, hotp...
1255
  	}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1256
1257
1258
  }
  
  /*
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1259
1260
1261
1262
   * Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
   * non-lru movable pages and hugepages). We scan pfn because it's much
   * easier than scanning over linked list. This function returns the pfn
   * of the first found movable page if it's found, otherwise 0.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1263
   */
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1264
  static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1265
1266
1267
1268
1269
1270
1271
1272
  {
  	unsigned long pfn;
  	struct page *page;
  	for (pfn = start; pfn < end; pfn++) {
  		if (pfn_valid(pfn)) {
  			page = pfn_to_page(pfn);
  			if (PageLRU(page))
  				return pfn;
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1273
1274
  			if (__PageMovable(page))
  				return pfn;
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1275
  			if (PageHuge(page)) {
7e1f049ef   Naoya Horiguchi   mm: hugetlb: clea...
1276
  				if (page_huge_active(page))
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1277
1278
1279
1280
1281
  					return pfn;
  				else
  					pfn = round_up(pfn + 1,
  						1 << compound_order(page)) - 1;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1282
1283
1284
1285
  		}
  	}
  	return 0;
  }
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1286
1287
1288
  static struct page *new_node_page(struct page *page, unsigned long private,
  		int **result)
  {
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1289
  	int nid = page_to_nid(page);
231e97e2b   Li Zhong   mem-hotplug: use ...
1290
  	nodemask_t nmask = node_states[N_MEMORY];
7f252f277   Michal Hocko   mm, memory_hotplu...
1291
1292
1293
1294
1295
1296
1297
1298
1299
  
  	/*
  	 * try to allocate from a different node but reuse this node if there
  	 * are no other online nodes to be used (e.g. we are offlining a part
  	 * of the only existing node)
  	 */
  	node_clear(nid, nmask);
  	if (nodes_empty(nmask))
  		node_set(nid, nmask);
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1300

8b9132388   Michal Hocko   mm: unify new_nod...
1301
  	return new_page_nodemask(page, nid, &nmask);
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1302
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
  #define NR_OFFLINE_AT_ONCE_PAGES	(256)
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
  	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
  	int not_managed = 0;
  	int ret = 0;
  	LIST_HEAD(source);
  
  	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
  
  		if (PageHuge(page)) {
  			struct page *head = compound_head(page);
  			pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
  			if (compound_order(head) > PFN_SECTION_SHIFT) {
  				ret = -EBUSY;
  				break;
  			}
  			if (isolate_huge_page(page, &source))
  				move_pages -= 1 << compound_order(head);
  			continue;
8135d8926   Naoya Horiguchi   mm: memory_hotplu...
1329
1330
1331
  		} else if (thp_migration_supported() && PageTransHuge(page))
  			pfn = page_to_pfn(compound_head(page))
  				+ hpage_nr_pages(page) - 1;
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1332

2c25071be   Michal Hocko   hwpoison, memory_...
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
  		/*
  		 * HWPoison pages have elevated reference counts so the migration would
  		 * fail on them. It also doesn't make any sense to migrate them in the
  		 * first place. Still try to unmap such a page in case it is still mapped
  		 * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep
  		 * the unmap as the catch all safety net).
  		 */
  		if (PageHWPoison(page)) {
  			if (WARN_ON(PageLRU(page)))
  				isolate_lru_page(page);
  			if (page_mapped(page))
  				try_to_unmap(page, TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS);
  			continue;
  		}
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1347
  		if (!get_page_unless_zero(page))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1348
1349
  			continue;
  		/*
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1350
1351
  		 * We can skip free pages. And we can deal with pages on
  		 * LRU and non-lru movable pages.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1352
  		 */
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1353
1354
1355
1356
  		if (PageLRU(page))
  			ret = isolate_lru_page(page);
  		else
  			ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1357
  		if (!ret) { /* Success */
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1358
  			put_page(page);
62695a84e   Nick Piggin   vmscan: move isol...
1359
  			list_add_tail(&page->lru, &source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1360
  			move_pages--;
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1361
1362
1363
  			if (!__PageMovable(page))
  				inc_node_page_state(page, NR_ISOLATED_ANON +
  						    page_is_file_cache(page));
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1364

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1365
  		} else {
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1366
  #ifdef CONFIG_DEBUG_VM
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1367
1368
1369
  			pr_alert("failed to isolate pfn %lx
  ", pfn);
  			dump_page(page, "isolation failed");
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1370
  #endif
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1371
  			put_page(page);
25985edce   Lucas De Marchi   Fix common misspe...
1372
  			/* Because we don't have big zone->lock. we should
809c44497   Bob Liu   mm: do_migrate_ra...
1373
1374
1375
  			   check this again here. */
  			if (page_count(page)) {
  				not_managed++;
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1376
  				ret = -EBUSY;
809c44497   Bob Liu   mm: do_migrate_ra...
1377
1378
  				break;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1379
1380
  		}
  	}
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1381
1382
  	if (!list_empty(&source)) {
  		if (not_managed) {
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1383
  			putback_movable_pages(&source);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1384
1385
  			goto out;
  		}
74c08f982   Minchan Kim   memory-hotplug: d...
1386

394e31d2c   Xishi Qiu   mem-hotplug: allo...
1387
1388
  		/* Allocate a new page from the nearest neighbor node */
  		ret = migrate_pages(&source, new_node_page, NULL, 0,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1389
  					MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1390
  		if (ret)
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1391
  			putback_movable_pages(&source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1392
  	}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
  out:
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
  	__offline_isolated_pages(start, start + nr_pages);
  	return 0;
  }
  
  static void
  offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1411
  	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
  				offline_isolated_pages_cb);
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
  	int ret;
  	long offlined = *(long *)data;
b023f4681   Wen Congyang   memory-hotplug: s...
1424
  	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
  	offlined = nr_pages;
  	if (!ret)
  		*(long *)data += offlined;
  	return ret;
  }
  
  static long
  check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
  {
  	long offlined = 0;
  	int ret;
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1436
  	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1437
1438
1439
1440
1441
  			check_pages_isolated_cb);
  	if (ret < 0)
  		offlined = (long)ret;
  	return offlined;
  }
c5320926e   Tang Chen   mem-hotplug: intr...
1442
1443
  static int __init cmdline_parse_movable_node(char *p)
  {
4932381ee   Michal Hocko   mm, memory_hotplu...
1444
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
55ac590c2   Tang Chen   memblock, mem_hot...
1445
  	movable_node_enabled = true;
4932381ee   Michal Hocko   mm, memory_hotplu...
1446
1447
1448
1449
  #else
  	pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly
  ");
  #endif
c5320926e   Tang Chen   mem-hotplug: intr...
1450
1451
1452
  	return 0;
  }
  early_param("movable_node", cmdline_parse_movable_node);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1453
1454
1455
1456
1457
1458
1459
1460
1461
  /* check which state of node_states will be changed when offline memory */
  static void node_states_check_changes_offline(unsigned long nr_pages,
  		struct zone *zone, struct memory_notify *arg)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	unsigned long present_pages = 0;
  	enum zone_type zt, zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
1462
1463
1464
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1465
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
1466
1467
1468
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1469
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
1470
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_NORMAL_MEMORY] will be changed.
  	 * If the memory to be offline is in a zone of 0...zone_last,
  	 * and it is the last present memory, 0...zone_last will
  	 * become empty after offline , thus we can determind we will
  	 * need to clear the node from node_states[N_NORMAL_MEMORY].
  	 */
  	for (zt = 0; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_normal = zone_to_nid(zone);
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_high = zone_to_nid(zone);
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
  	/*
  	 * node_states[N_HIGH_MEMORY] contains nodes which have 0...ZONE_MOVABLE
  	 */
  	zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_HIGH_MEMORY] will be changed
  	 * If we try to offline the last present @nr_pages from the node,
  	 * we can determind we will need to clear the node from
  	 * node_states[N_HIGH_MEMORY].
  	 */
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (nr_pages >= present_pages)
  		arg->status_change_nid = zone_to_nid(zone);
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_clear_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_clear_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1532
1533
  	if ((N_MEMORY != N_NORMAL_MEMORY) &&
  	    (arg->status_change_nid_high >= 0))
d9713679d   Lai Jiangshan   memory_hotplug: f...
1534
  		node_clear_state(node, N_HIGH_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1535
1536
1537
1538
  
  	if ((N_MEMORY != N_HIGH_MEMORY) &&
  	    (arg->status_change_nid >= 0))
  		node_clear_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1539
  }
a16cee10c   Wen Congyang   memory-hotplug: p...
1540
  static int __ref __offline_pages(unsigned long start_pfn,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1541
1542
1543
1544
  		  unsigned long end_pfn, unsigned long timeout)
  {
  	unsigned long pfn, nr_pages, expire;
  	long offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
1545
  	int ret, drain, retry_max, node;
d702909f0   Cody P Schafer   memory_hotplug: u...
1546
  	unsigned long flags;
a96dfddbc   Toshi Kani   base/memory, hotp...
1547
  	unsigned long valid_start, valid_end;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1548
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
1549
  	struct memory_notify arg;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1550

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1551
1552
1553
1554
1555
1556
1557
  	/* at least, alignment against pageblock is necessary */
  	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
a96dfddbc   Toshi Kani   base/memory, hotp...
1558
  	if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1559
  		return -EINVAL;
7b78d335a   Yasunori Goto   memory hotplug: r...
1560

a96dfddbc   Toshi Kani   base/memory, hotp...
1561
  	zone = page_zone(pfn_to_page(valid_start));
7b78d335a   Yasunori Goto   memory hotplug: r...
1562
1563
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1564
  	/* set above range as isolated */
b023f4681   Wen Congyang   memory-hotplug: s...
1565
1566
  	ret = start_isolate_page_range(start_pfn, end_pfn,
  				       MIGRATE_MOVABLE, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1567
  	if (ret)
30467e0b3   David Rientjes   mm, hotplug: fix ...
1568
  		return ret;
7b78d335a   Yasunori Goto   memory hotplug: r...
1569
1570
1571
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1572
  	node_states_check_changes_offline(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
1573
1574
1575
1576
1577
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
  	pfn = start_pfn;
  	expire = jiffies + timeout;
  	drain = 0;
  	retry_max = 5;
  repeat:
  	/* start memory hot removal */
  	ret = -EAGAIN;
  	if (time_after(jiffies, expire))
  		goto failed_removal;
  	ret = -EINTR;
  	if (signal_pending(current))
  		goto failed_removal;
  	ret = 0;
  	if (drain) {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
1592
  		lru_add_drain_all_cpuslocked();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1593
  		cond_resched();
c05543293   Vlastimil Babka   mm, memory_hotplu...
1594
  		drain_all_pages(zone);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1595
  	}
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1596
1597
  	pfn = scan_movable_pages(start_pfn, end_pfn);
  	if (pfn) { /* We have movable pages */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
  		ret = do_migrate_range(pfn, end_pfn);
  		if (!ret) {
  			drain = 1;
  			goto repeat;
  		} else {
  			if (ret < 0)
  				if (--retry_max == 0)
  					goto failed_removal;
  			yield();
  			drain = 1;
  			goto repeat;
  		}
  	}
b3834be5c   Adam Buchbinder   various: Fix spel...
1611
  	/* drain all zone's lru pagevec, this is asynchronous... */
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
1612
  	lru_add_drain_all_cpuslocked();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1613
  	yield();
b3834be5c   Adam Buchbinder   various: Fix spel...
1614
  	/* drain pcp pages, this is synchronous. */
c05543293   Vlastimil Babka   mm, memory_hotplu...
1615
  	drain_all_pages(zone);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1616
1617
1618
1619
  	/*
  	 * dissolve free hugepages in the memory block before doing offlining
  	 * actually in order to make hugetlbfs's object counting consistent.
  	 */
082d5b6b6   Gerald Schaefer   mm/hugetlb: check...
1620
1621
1622
  	ret = dissolve_free_huge_pages(start_pfn, end_pfn);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1623
1624
1625
1626
1627
1628
  	/* check again */
  	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
  	if (offlined_pages < 0) {
  		ret = -EBUSY;
  		goto failed_removal;
  	}
e33e33b4d   Chen Yucong   mm, memory hotplu...
1629
1630
  	pr_info("Offlined Pages %ld
  ", offlined_pages);
b3834be5c   Adam Buchbinder   various: Fix spel...
1631
  	/* Ok, all of our target is isolated.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1632
1633
  	   We cannot do rollback at this point. */
  	offline_isolated_pages(start_pfn, end_pfn);
dbc0e4cef   KAMEZAWA Hiroyuki   memory hotremove:...
1634
  	/* reset pagetype flags and makes migrate type to be MOVABLE */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1635
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1636
  	/* removal success */
3dcc0571c   Jiang Liu   mm: correctly upd...
1637
  	adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1638
  	zone->present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1639
1640
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1641
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1642
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
7b78d335a   Yasunori Goto   memory hotplug: r...
1643

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
1644
  	init_per_zone_wmark_min();
1e8537baa   Xishi Qiu   memory-hotplug: b...
1645
  	if (!populated_zone(zone)) {
340175b7d   Jiang Liu   mm/hotplug: free ...
1646
  		zone_pcp_reset(zone);
72675e131   Michal Hocko   mm, memory_hotplu...
1647
  		build_all_zonelists(NULL);
1e8537baa   Xishi Qiu   memory-hotplug: b...
1648
1649
  	} else
  		zone_pcp_update(zone);
340175b7d   Jiang Liu   mm/hotplug: free ...
1650

d9713679d   Lai Jiangshan   memory_hotplug: f...
1651
  	node_states_clear_node(node, &arg);
698b1b306   Vlastimil Babka   mm, compaction: i...
1652
  	if (arg.status_change_nid >= 0) {
8fe23e057   David Rientjes   mm: clear node in...
1653
  		kswapd_stop(node);
698b1b306   Vlastimil Babka   mm, compaction: i...
1654
1655
  		kcompactd_stop(node);
  	}
bce7394a3   Minchan Kim   page-allocator: r...
1656

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1657
1658
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
1659
1660
  
  	memory_notify(MEM_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1661
1662
1663
  	return 0;
  
  failed_removal:
e33e33b4d   Chen Yucong   mm, memory hotplu...
1664
1665
1666
1667
  	pr_debug("memory offlining [mem %#010llx-%#010llx] failed
  ",
  		 (unsigned long long) start_pfn << PAGE_SHIFT,
  		 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
7b78d335a   Yasunori Goto   memory hotplug: r...
1668
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1669
  	/* pushback to free area */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1670
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1671
1672
  	return ret;
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1673

b93e0f329   Michal Hocko   mm, memory_hotplu...
1674
  /* Must be protected by mem_hotplug_begin() or a device_lock */
a16cee10c   Wen Congyang   memory-hotplug: p...
1675
1676
1677
1678
  int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
  {
  	return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1679
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a16cee10c   Wen Congyang   memory-hotplug: p...
1680

bbc76be67   Wen Congyang   memory-hotplug: r...
1681
1682
1683
  /**
   * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn)
   * @start_pfn: start pfn of the memory range
e05c4bbfa   Toshi Kani   mm: walk_memory_r...
1684
   * @end_pfn: end pfn of the memory range
bbc76be67   Wen Congyang   memory-hotplug: r...
1685
1686
1687
1688
1689
1690
1691
1692
   * @arg: argument passed to func
   * @func: callback for each memory section walked
   *
   * This function walks through all present mem sections in range
   * [start_pfn, end_pfn) and call func on each mem section.
   *
   * Returns the return value of func.
   */
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1693
  int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
bbc76be67   Wen Congyang   memory-hotplug: r...
1694
  		void *arg, int (*func)(struct memory_block *, void *))
71088785c   Badari Pulavarty   mm: cleanup to ma...
1695
  {
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1696
1697
  	struct memory_block *mem = NULL;
  	struct mem_section *section;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1698
1699
  	unsigned long pfn, section_nr;
  	int ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		section_nr = pfn_to_section_nr(pfn);
  		if (!present_section_nr(section_nr))
  			continue;
  
  		section = __nr_to_section(section_nr);
  		/* same memblock? */
  		if (mem)
  			if ((section_nr >= mem->start_section_nr) &&
  			    (section_nr <= mem->end_section_nr))
  				continue;
  
  		mem = find_memory_block_hinted(section, mem);
  		if (!mem)
  			continue;
bbc76be67   Wen Congyang   memory-hotplug: r...
1716
  		ret = func(mem, arg);
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1717
  		if (ret) {
bbc76be67   Wen Congyang   memory-hotplug: r...
1718
1719
  			kobject_put(&mem->dev.kobj);
  			return ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1720
1721
1722
1723
1724
  		}
  	}
  
  	if (mem)
  		kobject_put(&mem->dev.kobj);
bbc76be67   Wen Congyang   memory-hotplug: r...
1725
1726
  	return 0;
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1727
  #ifdef CONFIG_MEMORY_HOTREMOVE
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1728
  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
bbc76be67   Wen Congyang   memory-hotplug: r...
1729
1730
  {
  	int ret = !is_memblock_offlined(mem);
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1731
1732
1733
1734
1735
  	if (unlikely(ret)) {
  		phys_addr_t beginpa, endpa;
  
  		beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
  		endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
756a025f0   Joe Perches   mm: coalesce spli...
1736
1737
  		pr_warn("removing memory fails, because memory [%pa-%pa] is onlined
  ",
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1738
1739
  			&beginpa, &endpa);
  	}
bbc76be67   Wen Congyang   memory-hotplug: r...
1740
1741
1742
  
  	return ret;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1743
  static int check_cpu_on_node(pg_data_t *pgdat)
60a5a19e7   Tang Chen   memory-hotplug: r...
1744
  {
60a5a19e7   Tang Chen   memory-hotplug: r...
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
  	int cpu;
  
  	for_each_present_cpu(cpu) {
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			/*
  			 * the cpu on this node isn't removed, and we can't
  			 * offline this node.
  			 */
  			return -EBUSY;
  	}
  
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1758
  static void unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1759
1760
  {
  #ifdef CONFIG_ACPI_NUMA
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1761
1762
1763
1764
1765
1766
1767
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			numa_clear_node(cpu);
  #endif
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1768
  static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1769
  {
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1770
  	int ret;
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1771

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1772
  	ret = check_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1773
1774
1775
1776
1777
1778
1779
  	if (ret)
  		return ret;
  
  	/*
  	 * the node will be offlined when we come here, so we can clear
  	 * the cpu_to_node() now.
  	 */
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1780
  	unmap_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1781
1782
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1783
1784
1785
1786
1787
1788
1789
1790
  /**
   * try_offline_node
   *
   * Offline a node if all memory sections and cpus of the node are removed.
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call.
   */
90b30cdc1   Wen Congyang   memory-hotplug: e...
1791
  void try_offline_node(int nid)
60a5a19e7   Tang Chen   memory-hotplug: r...
1792
  {
d822b86a9   Wen Congyang   memory-hotplug: f...
1793
1794
1795
  	pg_data_t *pgdat = NODE_DATA(nid);
  	unsigned long start_pfn = pgdat->node_start_pfn;
  	unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
60a5a19e7   Tang Chen   memory-hotplug: r...
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		unsigned long section_nr = pfn_to_section_nr(pfn);
  
  		if (!present_section_nr(section_nr))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		/*
  		 * some memory sections of this node are not removed, and we
  		 * can't offline node now.
  		 */
  		return;
  	}
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1813
  	if (check_and_unmap_cpu_on_node(pgdat))
60a5a19e7   Tang Chen   memory-hotplug: r...
1814
1815
1816
1817
1818
1819
1820
1821
1822
  		return;
  
  	/*
  	 * all memory/cpu of this node are removed, we can offline this
  	 * node now.
  	 */
  	node_set_offline(nid);
  	unregister_one_node(nid);
  }
90b30cdc1   Wen Congyang   memory-hotplug: e...
1823
  EXPORT_SYMBOL(try_offline_node);
60a5a19e7   Tang Chen   memory-hotplug: r...
1824

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1825
1826
1827
1828
1829
1830
1831
  /**
   * remove_memory
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call, as required by
   * try_offline_node().
   */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1832
  void __ref remove_memory(int nid, u64 start, u64 size)
bbc76be67   Wen Congyang   memory-hotplug: r...
1833
  {
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1834
  	int ret;
993c1aad8   Wen Congyang   memory-hotplug: t...
1835

27356f54c   Toshi Kani   mm/hotplug: verif...
1836
  	BUG_ON(check_hotplug_memory_range(start, size));
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1837
  	mem_hotplug_begin();
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1838
1839
  
  	/*
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1840
1841
1842
  	 * All memory blocks must be offlined before removing memory.  Check
  	 * whether all memory blocks in question are offline and trigger a BUG()
  	 * if this is not the case.
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1843
  	 */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1844
  	ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1845
  				check_memblock_offlined_cb);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1846
  	if (ret)
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1847
  		BUG();
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1848

46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1849
1850
  	/* remove memmap entry */
  	firmware_map_remove(start, start + size, "System RAM");
f9126ab92   Xishi Qiu   memory-hotplug: f...
1851
1852
  	memblock_free(start, size);
  	memblock_remove(start, size);
46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1853

24d335ca3   Wen Congyang   memory-hotplug: i...
1854
  	arch_remove_memory(start, size);
60a5a19e7   Tang Chen   memory-hotplug: r...
1855
  	try_offline_node(nid);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1856
  	mem_hotplug_done();
71088785c   Badari Pulavarty   mm: cleanup to ma...
1857
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1858
  EXPORT_SYMBOL_GPL(remove_memory);
aba6efc47   Rafael J. Wysocki   Memory hotplug: M...
1859
  #endif /* CONFIG_MEMORY_HOTREMOVE */