Blame view

mm/compaction.c 83.6 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
748446bb6   Mel Gorman   mm: compaction: m...
2
3
4
5
6
7
8
9
10
  /*
   * linux/mm/compaction.c
   *
   * Memory compaction for the reduction of external fragmentation. Note that
   * this heavily depends upon page migration to do all the real heavy
   * lifting
   *
   * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
   */
698b1b306   Vlastimil Babka   mm, compaction: i...
11
  #include <linux/cpu.h>
748446bb6   Mel Gorman   mm: compaction: m...
12
13
14
15
  #include <linux/swap.h>
  #include <linux/migrate.h>
  #include <linux/compaction.h>
  #include <linux/mm_inline.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
16
  #include <linux/sched/signal.h>
748446bb6   Mel Gorman   mm: compaction: m...
17
  #include <linux/backing-dev.h>
76ab0f530   Mel Gorman   mm: compaction: a...
18
  #include <linux/sysctl.h>
ed4a6d7f0   Mel Gorman   mm: compaction: a...
19
  #include <linux/sysfs.h>
194159fbc   Minchan Kim   mm: remove MIGRAT...
20
  #include <linux/page-isolation.h>
b8c73fc24   Andrey Ryabinin   mm: page_alloc: a...
21
  #include <linux/kasan.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
22
23
  #include <linux/kthread.h>
  #include <linux/freezer.h>
83358ece2   Joonsoo Kim   mm/page_owner: in...
24
  #include <linux/page_owner.h>
eb414681d   Johannes Weiner   psi: pressure sta...
25
  #include <linux/psi.h>
748446bb6   Mel Gorman   mm: compaction: m...
26
  #include "internal.h"
010fc29a4   Minchan Kim   compaction: fix b...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  #ifdef CONFIG_COMPACTION
  static inline void count_compact_event(enum vm_event_item item)
  {
  	count_vm_event(item);
  }
  
  static inline void count_compact_events(enum vm_event_item item, long delta)
  {
  	count_vm_events(item, delta);
  }
  #else
  #define count_compact_event(item) do { } while (0)
  #define count_compact_events(item, delta) do { } while (0)
  #endif
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
41
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b7aba6984   Mel Gorman   mm: compaction: a...
42
43
  #define CREATE_TRACE_POINTS
  #include <trace/events/compaction.h>
06b6640a3   Vlastimil Babka   mm, compaction: w...
44
45
46
47
  #define block_start_pfn(pfn, order)	round_down(pfn, 1UL << (order))
  #define block_end_pfn(pfn, order)	ALIGN((pfn) + 1, 1UL << (order))
  #define pageblock_start_pfn(pfn)	block_start_pfn(pfn, pageblock_order)
  #define pageblock_end_pfn(pfn)		block_end_pfn(pfn, pageblock_order)
facdaa917   Nitin Gupta   mm: proactive com...
48
49
50
  /*
   * Fragmentation score check interval for proactive compaction purposes.
   */
d34c0a759   Nitin Gupta   mm: use unsigned ...
51
  static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500;
facdaa917   Nitin Gupta   mm: proactive com...
52
53
54
55
56
57
58
59
  
  /*
   * Page order with-respect-to which proactive compaction
   * calculates external fragmentation, which is used as
   * the "fragmentation score" of a node/zone.
   */
  #if defined CONFIG_TRANSPARENT_HUGEPAGE
  #define COMPACTION_HPAGE_ORDER	HPAGE_PMD_ORDER
25788738e   Nitin Gupta   mm: fix compile e...
60
  #elif defined CONFIG_HUGETLBFS
facdaa917   Nitin Gupta   mm: proactive com...
61
62
63
64
  #define COMPACTION_HPAGE_ORDER	HUGETLB_PAGE_ORDER
  #else
  #define COMPACTION_HPAGE_ORDER	(PMD_SHIFT - PAGE_SHIFT)
  #endif
748446bb6   Mel Gorman   mm: compaction: m...
65
66
67
  static unsigned long release_freepages(struct list_head *freelist)
  {
  	struct page *page, *next;
6bace090a   Vlastimil Babka   mm, compaction: a...
68
  	unsigned long high_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
69
70
  
  	list_for_each_entry_safe(page, next, freelist, lru) {
6bace090a   Vlastimil Babka   mm, compaction: a...
71
  		unsigned long pfn = page_to_pfn(page);
748446bb6   Mel Gorman   mm: compaction: m...
72
73
  		list_del(&page->lru);
  		__free_page(page);
6bace090a   Vlastimil Babka   mm, compaction: a...
74
75
  		if (pfn > high_pfn)
  			high_pfn = pfn;
748446bb6   Mel Gorman   mm: compaction: m...
76
  	}
6bace090a   Vlastimil Babka   mm, compaction: a...
77
  	return high_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
78
  }
4469ab984   Mel Gorman   mm, compaction: r...
79
  static void split_map_pages(struct list_head *list)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
80
  {
66c64223a   Joonsoo Kim   mm/compaction: sp...
81
82
83
84
85
86
87
88
89
  	unsigned int i, order, nr_pages;
  	struct page *page, *next;
  	LIST_HEAD(tmp_list);
  
  	list_for_each_entry_safe(page, next, list, lru) {
  		list_del(&page->lru);
  
  		order = page_private(page);
  		nr_pages = 1 << order;
66c64223a   Joonsoo Kim   mm/compaction: sp...
90

46f24fd85   Joonsoo Kim   mm/page_alloc: in...
91
  		post_alloc_hook(page, order, __GFP_MOVABLE);
66c64223a   Joonsoo Kim   mm/compaction: sp...
92
93
  		if (order)
  			split_page(page, order);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
94

66c64223a   Joonsoo Kim   mm/compaction: sp...
95
96
97
98
  		for (i = 0; i < nr_pages; i++) {
  			list_add(&page->lru, &tmp_list);
  			page++;
  		}
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
99
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
100
101
  
  	list_splice(&tmp_list, list);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
102
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
103
  #ifdef CONFIG_COMPACTION
24e2716f6   Joonsoo Kim   mm/compaction: ad...
104

bda807d44   Minchan Kim   mm: migrate: supp...
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
  int PageMovable(struct page *page)
  {
  	struct address_space *mapping;
  
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	if (!__PageMovable(page))
  		return 0;
  
  	mapping = page_mapping(page);
  	if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
  		return 1;
  
  	return 0;
  }
  EXPORT_SYMBOL(PageMovable);
  
  void __SetPageMovable(struct page *page, struct address_space *mapping)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
  	page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__SetPageMovable);
  
  void __ClearPageMovable(struct page *page)
  {
bda807d44   Minchan Kim   mm: migrate: supp...
131
132
133
134
135
136
137
138
139
140
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	/*
  	 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
  	 * flag so that VM can catch up released page by driver after isolation.
  	 * With it, VM migration doesn't try to put it back.
  	 */
  	page->mapping = (void *)((unsigned long)page->mapping &
  				PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__ClearPageMovable);
24e2716f6   Joonsoo Kim   mm/compaction: ad...
141
142
143
144
145
  /* Do not skip compaction more than 64 times */
  #define COMPACT_MAX_DEFER_SHIFT 6
  
  /*
   * Compaction is deferred when compaction fails to result in a page
860b32729   Alex Shi   mm/compaction: co...
146
   * allocation success. 1 << compact_defer_shift, compactions are skipped up
24e2716f6   Joonsoo Kim   mm/compaction: ad...
147
148
   * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
   */
2271b016b   Hui Su   mm/compaction: ma...
149
  static void defer_compaction(struct zone *zone, int order)
24e2716f6   Joonsoo Kim   mm/compaction: ad...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
  {
  	zone->compact_considered = 0;
  	zone->compact_defer_shift++;
  
  	if (order < zone->compact_order_failed)
  		zone->compact_order_failed = order;
  
  	if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
  		zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
  
  	trace_mm_compaction_defer_compaction(zone, order);
  }
  
  /* Returns true if compaction should be skipped this time */
2271b016b   Hui Su   mm/compaction: ma...
164
  static bool compaction_deferred(struct zone *zone, int order)
24e2716f6   Joonsoo Kim   mm/compaction: ad...
165
166
167
168
169
170
171
  {
  	unsigned long defer_limit = 1UL << zone->compact_defer_shift;
  
  	if (order < zone->compact_order_failed)
  		return false;
  
  	/* Avoid possible overflow */
62b35fe0e   Mateusz Nosek   mm/compaction.c: ...
172
  	if (++zone->compact_considered >= defer_limit) {
24e2716f6   Joonsoo Kim   mm/compaction: ad...
173
  		zone->compact_considered = defer_limit;
24e2716f6   Joonsoo Kim   mm/compaction: ad...
174
  		return false;
62b35fe0e   Mateusz Nosek   mm/compaction.c: ...
175
  	}
24e2716f6   Joonsoo Kim   mm/compaction: ad...
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
  
  	trace_mm_compaction_deferred(zone, order);
  
  	return true;
  }
  
  /*
   * Update defer tracking counters after successful compaction of given order,
   * which means an allocation either succeeded (alloc_success == true) or is
   * expected to succeed.
   */
  void compaction_defer_reset(struct zone *zone, int order,
  		bool alloc_success)
  {
  	if (alloc_success) {
  		zone->compact_considered = 0;
  		zone->compact_defer_shift = 0;
  	}
  	if (order >= zone->compact_order_failed)
  		zone->compact_order_failed = order + 1;
  
  	trace_mm_compaction_defer_reset(zone, order);
  }
  
  /* Returns true if restarting compaction after many failures */
2271b016b   Hui Su   mm/compaction: ma...
201
  static bool compaction_restarting(struct zone *zone, int order)
24e2716f6   Joonsoo Kim   mm/compaction: ad...
202
203
204
205
206
207
208
  {
  	if (order < zone->compact_order_failed)
  		return false;
  
  	return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
  		zone->compact_considered >= 1UL << zone->compact_defer_shift;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
209
210
211
212
213
214
215
216
217
  /* Returns true if the pageblock should be scanned for pages to isolate. */
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	if (cc->ignore_skip_hint)
  		return true;
  
  	return !get_pageblock_skip(page);
  }
02333641e   Vlastimil Babka   mm, compaction: e...
218
219
220
221
  static void reset_cached_positions(struct zone *zone)
  {
  	zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
  	zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
623446e4d   Joonsoo Kim   mm/compaction: fi...
222
  	zone->compact_cached_free_pfn =
06b6640a3   Vlastimil Babka   mm, compaction: w...
223
  				pageblock_start_pfn(zone_end_pfn(zone) - 1);
02333641e   Vlastimil Babka   mm, compaction: e...
224
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
225
  /*
2271b016b   Hui Su   mm/compaction: ma...
226
   * Compound pages of >= pageblock_order should consistently be skipped until
b527cfe5b   Vlastimil Babka   mm, compaction: e...
227
228
   * released. It is always pointless to compact pages of such order (if they are
   * migratable), and the pageblocks they occupy cannot contain any free pages.
21dc7e023   David Rientjes   mm, compaction: p...
229
   */
b527cfe5b   Vlastimil Babka   mm, compaction: e...
230
  static bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
231
  {
b527cfe5b   Vlastimil Babka   mm, compaction: e...
232
  	if (!PageCompound(page))
21dc7e023   David Rientjes   mm, compaction: p...
233
  		return false;
b527cfe5b   Vlastimil Babka   mm, compaction: e...
234
235
236
237
238
239
240
  
  	page = compound_head(page);
  
  	if (compound_order(page) >= pageblock_order)
  		return true;
  
  	return false;
21dc7e023   David Rientjes   mm, compaction: p...
241
  }
e332f741a   Mel Gorman   mm, compaction: b...
242
243
244
245
246
  static bool
  __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
  							bool check_target)
  {
  	struct page *page = pfn_to_online_page(pfn);
6b0868c82   Mel Gorman   mm/compaction.c: ...
247
  	struct page *block_page;
e332f741a   Mel Gorman   mm, compaction: b...
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
  	struct page *end_page;
  	unsigned long block_pfn;
  
  	if (!page)
  		return false;
  	if (zone != page_zone(page))
  		return false;
  	if (pageblock_skip_persistent(page))
  		return false;
  
  	/*
  	 * If skip is already cleared do no further checking once the
  	 * restart points have been set.
  	 */
  	if (check_source && check_target && !get_pageblock_skip(page))
  		return true;
  
  	/*
  	 * If clearing skip for the target scanner, do not select a
  	 * non-movable pageblock as the starting point.
  	 */
  	if (!check_source && check_target &&
  	    get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
  		return false;
6b0868c82   Mel Gorman   mm/compaction.c: ...
272
273
  	/* Ensure the start of the pageblock or zone is online and valid */
  	block_pfn = pageblock_start_pfn(pfn);
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
274
275
  	block_pfn = max(block_pfn, zone->zone_start_pfn);
  	block_page = pfn_to_online_page(block_pfn);
6b0868c82   Mel Gorman   mm/compaction.c: ...
276
277
278
279
280
281
  	if (block_page) {
  		page = block_page;
  		pfn = block_pfn;
  	}
  
  	/* Ensure the end of the pageblock or zone is online and valid */
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
282
  	block_pfn = pageblock_end_pfn(pfn) - 1;
6b0868c82   Mel Gorman   mm/compaction.c: ...
283
284
285
286
  	block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
  	end_page = pfn_to_online_page(block_pfn);
  	if (!end_page)
  		return false;
e332f741a   Mel Gorman   mm, compaction: b...
287
288
289
290
291
  	/*
  	 * Only clear the hint if a sample indicates there is either a
  	 * free page or an LRU page in the block. One or other condition
  	 * is necessary for the block to be a migration source/target.
  	 */
e332f741a   Mel Gorman   mm, compaction: b...
292
  	do {
859a85ddf   Mike Rapoport   mm: remove pfn_va...
293
294
295
296
  		if (check_source && PageLRU(page)) {
  			clear_pageblock_skip(page);
  			return true;
  		}
e332f741a   Mel Gorman   mm, compaction: b...
297

859a85ddf   Mike Rapoport   mm: remove pfn_va...
298
299
300
  		if (check_target && PageBuddy(page)) {
  			clear_pageblock_skip(page);
  			return true;
e332f741a   Mel Gorman   mm, compaction: b...
301
302
303
304
  		}
  
  		page += (1 << PAGE_ALLOC_COSTLY_ORDER);
  		pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
305
  	} while (page <= end_page);
e332f741a   Mel Gorman   mm, compaction: b...
306
307
308
  
  	return false;
  }
21dc7e023   David Rientjes   mm, compaction: p...
309
  /*
bb13ffeb9   Mel Gorman   mm: compaction: c...
310
311
312
313
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
   * meet.
   */
62997027c   Mel Gorman   mm: compaction: c...
314
  static void __reset_isolation_suitable(struct zone *zone)
bb13ffeb9   Mel Gorman   mm: compaction: c...
315
  {
e332f741a   Mel Gorman   mm, compaction: b...
316
  	unsigned long migrate_pfn = zone->zone_start_pfn;
6b0868c82   Mel Gorman   mm/compaction.c: ...
317
  	unsigned long free_pfn = zone_end_pfn(zone) - 1;
e332f741a   Mel Gorman   mm, compaction: b...
318
319
320
321
322
323
324
  	unsigned long reset_migrate = free_pfn;
  	unsigned long reset_free = migrate_pfn;
  	bool source_set = false;
  	bool free_set = false;
  
  	if (!zone->compact_blockskip_flush)
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
325

62997027c   Mel Gorman   mm: compaction: c...
326
  	zone->compact_blockskip_flush = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
327

e332f741a   Mel Gorman   mm, compaction: b...
328
329
330
331
332
333
334
335
  	/*
  	 * Walk the zone and update pageblock skip information. Source looks
  	 * for PageLRU while target looks for PageBuddy. When the scanner
  	 * is found, both PageBuddy and PageLRU are checked as the pageblock
  	 * is suitable as both source and target.
  	 */
  	for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
  					free_pfn -= pageblock_nr_pages) {
bb13ffeb9   Mel Gorman   mm: compaction: c...
336
  		cond_resched();
e332f741a   Mel Gorman   mm, compaction: b...
337
338
339
340
341
342
343
344
345
  		/* Update the migrate PFN */
  		if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
  		    migrate_pfn < reset_migrate) {
  			source_set = true;
  			reset_migrate = migrate_pfn;
  			zone->compact_init_migrate_pfn = reset_migrate;
  			zone->compact_cached_migrate_pfn[0] = reset_migrate;
  			zone->compact_cached_migrate_pfn[1] = reset_migrate;
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
346

e332f741a   Mel Gorman   mm, compaction: b...
347
348
349
350
351
352
353
354
  		/* Update the free PFN */
  		if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
  		    free_pfn > reset_free) {
  			free_set = true;
  			reset_free = free_pfn;
  			zone->compact_init_free_pfn = reset_free;
  			zone->compact_cached_free_pfn = reset_free;
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
355
  	}
02333641e   Vlastimil Babka   mm, compaction: e...
356

e332f741a   Mel Gorman   mm, compaction: b...
357
358
359
360
361
362
  	/* Leave no distance if no suitable block was reset */
  	if (reset_migrate >= reset_free) {
  		zone->compact_cached_migrate_pfn[0] = migrate_pfn;
  		zone->compact_cached_migrate_pfn[1] = migrate_pfn;
  		zone->compact_cached_free_pfn = free_pfn;
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
363
  }
62997027c   Mel Gorman   mm: compaction: c...
364
365
366
367
368
369
370
371
372
373
374
375
376
377
  void reset_isolation_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		/* Only flush if a full compaction finished recently */
  		if (zone->compact_blockskip_flush)
  			__reset_isolation_suitable(zone);
  	}
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
378
  /*
e380bebe4   Mel Gorman   mm, compaction: k...
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
   * Sets the pageblock skip bit if it was clear. Note that this is a hint as
   * locks are not required for read/writers. Returns true if it was already set.
   */
  static bool test_and_set_skip(struct compact_control *cc, struct page *page,
  							unsigned long pfn)
  {
  	bool skip;
  
  	/* Do no update if skip hint is being ignored */
  	if (cc->ignore_skip_hint)
  		return false;
  
  	if (!IS_ALIGNED(pfn, pageblock_nr_pages))
  		return false;
  
  	skip = get_pageblock_skip(page);
  	if (!skip && !cc->no_set_skip_hint)
  		set_pageblock_skip(page);
  
  	return skip;
  }
  
  static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
  {
  	struct zone *zone = cc->zone;
  
  	pfn = pageblock_end_pfn(pfn);
  
  	/* Set for isolation rather than compaction */
  	if (cc->no_set_skip_hint)
  		return;
  
  	if (pfn > zone->compact_cached_migrate_pfn[0])
  		zone->compact_cached_migrate_pfn[0] = pfn;
  	if (cc->mode != MIGRATE_ASYNC &&
  	    pfn > zone->compact_cached_migrate_pfn[1])
  		zone->compact_cached_migrate_pfn[1] = pfn;
  }
  
  /*
bb13ffeb9   Mel Gorman   mm: compaction: c...
419
   * If no pages were isolated then mark this pageblock to be skipped in the
62997027c   Mel Gorman   mm: compaction: c...
420
   * future. The information is later cleared by __reset_isolation_suitable().
bb13ffeb9   Mel Gorman   mm: compaction: c...
421
   */
c89511ab2   Mel Gorman   mm: compaction: R...
422
  static void update_pageblock_skip(struct compact_control *cc,
d097a6f63   Mel Gorman   mm, compaction: r...
423
  			struct page *page, unsigned long pfn)
bb13ffeb9   Mel Gorman   mm: compaction: c...
424
  {
c89511ab2   Mel Gorman   mm: compaction: R...
425
  	struct zone *zone = cc->zone;
6815bf3f2   Joonsoo Kim   mm/compaction: re...
426

2583d6713   Vlastimil Babka   mm, compaction: s...
427
  	if (cc->no_set_skip_hint)
6815bf3f2   Joonsoo Kim   mm/compaction: re...
428
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
429
430
  	if (!page)
  		return;
edc2ca612   Vlastimil Babka   mm, compaction: m...
431
  	set_pageblock_skip(page);
c89511ab2   Mel Gorman   mm: compaction: R...
432

35979ef33   David Rientjes   mm, compaction: a...
433
  	/* Update where async and sync compaction should restart */
e380bebe4   Mel Gorman   mm, compaction: k...
434
435
  	if (pfn < zone->compact_cached_free_pfn)
  		zone->compact_cached_free_pfn = pfn;
bb13ffeb9   Mel Gorman   mm: compaction: c...
436
437
438
439
440
441
442
  }
  #else
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	return true;
  }
b527cfe5b   Vlastimil Babka   mm, compaction: e...
443
  static inline bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
444
445
446
447
448
  {
  	return false;
  }
  
  static inline void update_pageblock_skip(struct compact_control *cc,
d097a6f63   Mel Gorman   mm, compaction: r...
449
  			struct page *page, unsigned long pfn)
bb13ffeb9   Mel Gorman   mm: compaction: c...
450
451
  {
  }
e380bebe4   Mel Gorman   mm, compaction: k...
452
453
454
455
456
457
458
459
460
461
  
  static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
  {
  }
  
  static bool test_and_set_skip(struct compact_control *cc, struct page *page,
  							unsigned long pfn)
  {
  	return false;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
462
  #endif /* CONFIG_COMPACTION */
8b44d2791   Vlastimil Babka   mm, compaction: p...
463
464
  /*
   * Compaction requires the taking of some coarse locks that are potentially
cb2dcaf02   Mel Gorman   mm, compaction: f...
465
466
467
468
   * very heavily contended. For async compaction, trylock and record if the
   * lock is contended. The lock will still be acquired but compaction will
   * abort when the current block is finished regardless of success rate.
   * Sync compaction acquires the lock.
8b44d2791   Vlastimil Babka   mm, compaction: p...
469
   *
cb2dcaf02   Mel Gorman   mm, compaction: f...
470
   * Always returns true which makes it easier to track lock state in callers.
8b44d2791   Vlastimil Babka   mm, compaction: p...
471
   */
cb2dcaf02   Mel Gorman   mm, compaction: f...
472
  static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
8b44d2791   Vlastimil Babka   mm, compaction: p...
473
  						struct compact_control *cc)
77337edee   Jules Irenge   mm/compaction: ad...
474
  	__acquires(lock)
2a1402aa0   Mel Gorman   mm: compaction: a...
475
  {
cb2dcaf02   Mel Gorman   mm, compaction: f...
476
477
478
479
480
481
  	/* Track if the lock is contended in async mode */
  	if (cc->mode == MIGRATE_ASYNC && !cc->contended) {
  		if (spin_trylock_irqsave(lock, *flags))
  			return true;
  
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
482
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
483

cb2dcaf02   Mel Gorman   mm, compaction: f...
484
  	spin_lock_irqsave(lock, *flags);
8b44d2791   Vlastimil Babka   mm, compaction: p...
485
  	return true;
2a1402aa0   Mel Gorman   mm: compaction: a...
486
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
487
  /*
c67fe3752   Mel Gorman   mm: compaction: A...
488
   * Compaction requires the taking of some coarse locks that are potentially
8b44d2791   Vlastimil Babka   mm, compaction: p...
489
490
491
492
493
494
495
   * very heavily contended. The lock should be periodically unlocked to avoid
   * having disabled IRQs for a long time, even when there is nobody waiting on
   * the lock. It might also be that allowing the IRQs will result in
   * need_resched() becoming true. If scheduling is needed, async compaction
   * aborts. Sync compaction schedules.
   * Either compaction type will also abort if a fatal signal is pending.
   * In either case if the lock was locked, it is dropped and not regained.
c67fe3752   Mel Gorman   mm: compaction: A...
496
   *
8b44d2791   Vlastimil Babka   mm, compaction: p...
497
498
499
500
   * Returns true if compaction should abort due to fatal signal pending, or
   *		async compaction due to need_resched()
   * Returns false when compaction can continue (sync compaction might have
   *		scheduled)
c67fe3752   Mel Gorman   mm: compaction: A...
501
   */
8b44d2791   Vlastimil Babka   mm, compaction: p...
502
503
  static bool compact_unlock_should_abort(spinlock_t *lock,
  		unsigned long flags, bool *locked, struct compact_control *cc)
c67fe3752   Mel Gorman   mm: compaction: A...
504
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
505
506
507
508
  	if (*locked) {
  		spin_unlock_irqrestore(lock, flags);
  		*locked = false;
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
509

8b44d2791   Vlastimil Babka   mm, compaction: p...
510
  	if (fatal_signal_pending(current)) {
c3486f537   Vlastimil Babka   mm, compaction: s...
511
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
512
513
  		return true;
  	}
c67fe3752   Mel Gorman   mm: compaction: A...
514

cf66f0700   Mel Gorman   mm, compaction: d...
515
  	cond_resched();
be9765722   Vlastimil Babka   mm, compaction: p...
516
517
518
  
  	return false;
  }
c67fe3752   Mel Gorman   mm: compaction: A...
519
  /*
9e4be4708   Jerome Marchand   mm/compaction.c: ...
520
521
522
   * Isolate free pages onto a private freelist. If @strict is true, will abort
   * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
   * (even though it may still end up isolating some pages).
85aa125f0   Michal Nazarewicz   mm: compaction: i...
523
   */
f40d1e42b   Mel Gorman   mm: compaction: a...
524
  static unsigned long isolate_freepages_block(struct compact_control *cc,
e14c720ef   Vlastimil Babka   mm, compaction: r...
525
  				unsigned long *start_pfn,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
526
527
  				unsigned long end_pfn,
  				struct list_head *freelist,
4fca9730c   Mel Gorman   mm, compaction: s...
528
  				unsigned int stride,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
529
  				bool strict)
748446bb6   Mel Gorman   mm: compaction: m...
530
  {
b7aba6984   Mel Gorman   mm: compaction: a...
531
  	int nr_scanned = 0, total_isolated = 0;
d097a6f63   Mel Gorman   mm, compaction: r...
532
  	struct page *cursor;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
533
  	unsigned long flags = 0;
f40d1e42b   Mel Gorman   mm: compaction: a...
534
  	bool locked = false;
e14c720ef   Vlastimil Babka   mm, compaction: r...
535
  	unsigned long blockpfn = *start_pfn;
66c64223a   Joonsoo Kim   mm/compaction: sp...
536
  	unsigned int order;
748446bb6   Mel Gorman   mm: compaction: m...
537

4fca9730c   Mel Gorman   mm, compaction: s...
538
539
540
  	/* Strict mode is for isolation, speed is secondary */
  	if (strict)
  		stride = 1;
748446bb6   Mel Gorman   mm: compaction: m...
541
  	cursor = pfn_to_page(blockpfn);
f40d1e42b   Mel Gorman   mm: compaction: a...
542
  	/* Isolate free pages. */
4fca9730c   Mel Gorman   mm, compaction: s...
543
  	for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
66c64223a   Joonsoo Kim   mm/compaction: sp...
544
  		int isolated;
748446bb6   Mel Gorman   mm: compaction: m...
545
  		struct page *page = cursor;
8b44d2791   Vlastimil Babka   mm, compaction: p...
546
547
548
549
550
551
552
553
554
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort if fatal signal
  		 * pending or async compaction detects need_resched()
  		 */
  		if (!(blockpfn % SWAP_CLUSTER_MAX)
  		    && compact_unlock_should_abort(&cc->zone->lock, flags,
  								&locked, cc))
  			break;
b7aba6984   Mel Gorman   mm: compaction: a...
555
  		nr_scanned++;
2af120bc0   Laura Abbott   mm/compaction: br...
556

9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
557
558
559
560
561
562
563
  		/*
  		 * For compound pages such as THP and hugetlbfs, we can save
  		 * potentially a lot of iterations if we skip them at once.
  		 * The check is racy, but we can consider only valid values
  		 * and the only danger is skipping too much.
  		 */
  		if (PageCompound(page)) {
21dc7e023   David Rientjes   mm, compaction: p...
564
  			const unsigned int order = compound_order(page);
d3c85bad8   Vlastimil Babka   mm, compaction: r...
565
  			if (likely(order < MAX_ORDER)) {
21dc7e023   David Rientjes   mm, compaction: p...
566
567
  				blockpfn += (1UL << order) - 1;
  				cursor += (1UL << order) - 1;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
568
  			}
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
569
570
  			goto isolate_fail;
  		}
f40d1e42b   Mel Gorman   mm: compaction: a...
571
  		if (!PageBuddy(page))
2af120bc0   Laura Abbott   mm/compaction: br...
572
  			goto isolate_fail;
f40d1e42b   Mel Gorman   mm: compaction: a...
573
574
  
  		/*
69b7189f1   Vlastimil Babka   mm, compaction: s...
575
576
577
578
579
  		 * If we already hold the lock, we can skip some rechecking.
  		 * Note that if we hold the lock now, checked_pageblock was
  		 * already set in some previous iteration (or strict is true),
  		 * so it is correct to skip the suitable migration target
  		 * recheck as well.
f40d1e42b   Mel Gorman   mm: compaction: a...
580
  		 */
69b7189f1   Vlastimil Babka   mm, compaction: s...
581
  		if (!locked) {
cb2dcaf02   Mel Gorman   mm, compaction: f...
582
  			locked = compact_lock_irqsave(&cc->zone->lock,
8b44d2791   Vlastimil Babka   mm, compaction: p...
583
  								&flags, cc);
f40d1e42b   Mel Gorman   mm: compaction: a...
584

69b7189f1   Vlastimil Babka   mm, compaction: s...
585
586
587
588
  			/* Recheck this is a buddy page under lock */
  			if (!PageBuddy(page))
  				goto isolate_fail;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
589

66c64223a   Joonsoo Kim   mm/compaction: sp...
590
  		/* Found a free page, will break it into order-0 pages */
ab130f910   Matthew Wilcox (Oracle)   mm: rename page_o...
591
  		order = buddy_order(page);
66c64223a   Joonsoo Kim   mm/compaction: sp...
592
  		isolated = __isolate_free_page(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
593
594
  		if (!isolated)
  			break;
66c64223a   Joonsoo Kim   mm/compaction: sp...
595
  		set_page_private(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
596

748446bb6   Mel Gorman   mm: compaction: m...
597
  		total_isolated += isolated;
a4f04f2c6   David Rientjes   mm, compaction: a...
598
  		cc->nr_freepages += isolated;
66c64223a   Joonsoo Kim   mm/compaction: sp...
599
  		list_add_tail(&page->lru, freelist);
a4f04f2c6   David Rientjes   mm, compaction: a...
600
601
602
  		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
  			blockpfn += isolated;
  			break;
748446bb6   Mel Gorman   mm: compaction: m...
603
  		}
a4f04f2c6   David Rientjes   mm, compaction: a...
604
605
606
607
  		/* Advance to the end of split page */
  		blockpfn += isolated - 1;
  		cursor += isolated - 1;
  		continue;
2af120bc0   Laura Abbott   mm/compaction: br...
608
609
610
611
612
613
  
  isolate_fail:
  		if (strict)
  			break;
  		else
  			continue;
748446bb6   Mel Gorman   mm: compaction: m...
614
  	}
a4f04f2c6   David Rientjes   mm, compaction: a...
615
616
  	if (locked)
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
617
618
619
620
621
622
  	/*
  	 * There is a tiny chance that we have read bogus compound_order(),
  	 * so be careful to not go outside of the pageblock.
  	 */
  	if (unlikely(blockpfn > end_pfn))
  		blockpfn = end_pfn;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
623
624
  	trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
  					nr_scanned, total_isolated);
e14c720ef   Vlastimil Babka   mm, compaction: r...
625
626
  	/* Record how far we have got within the block */
  	*start_pfn = blockpfn;
f40d1e42b   Mel Gorman   mm: compaction: a...
627
628
629
630
631
  	/*
  	 * If strict isolation is requested by CMA then check that all the
  	 * pages requested were isolated. If there were any failures, 0 is
  	 * returned and CMA will fail.
  	 */
2af120bc0   Laura Abbott   mm/compaction: br...
632
  	if (strict && blockpfn < end_pfn)
f40d1e42b   Mel Gorman   mm: compaction: a...
633
  		total_isolated = 0;
7f354a548   David Rientjes   mm, compaction: a...
634
  	cc->total_free_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
635
  	if (total_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
636
  		count_compact_events(COMPACTISOLATED, total_isolated);
748446bb6   Mel Gorman   mm: compaction: m...
637
638
  	return total_isolated;
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
639
640
  /**
   * isolate_freepages_range() - isolate free pages.
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
641
   * @cc:        Compaction control structure.
85aa125f0   Michal Nazarewicz   mm: compaction: i...
642
643
644
645
646
647
648
649
650
651
652
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Non-free pages, invalid PFNs, or zone boundaries within the
   * [start_pfn, end_pfn) range are considered errors, cause function to
   * undo its actions and return zero.
   *
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater then end_pfn if end fell in a middle of
   * a free page).
   */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
653
  unsigned long
bb13ffeb9   Mel Gorman   mm: compaction: c...
654
655
  isolate_freepages_range(struct compact_control *cc,
  			unsigned long start_pfn, unsigned long end_pfn)
85aa125f0   Michal Nazarewicz   mm: compaction: i...
656
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
657
  	unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
658
  	LIST_HEAD(freelist);
7d49d8868   Vlastimil Babka   mm, compaction: r...
659
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
660
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
661
662
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
663
  	block_end_pfn = pageblock_end_pfn(pfn);
7d49d8868   Vlastimil Babka   mm, compaction: r...
664
665
  
  	for (; pfn < end_pfn; pfn += isolated,
e1409c325   Joonsoo Kim   mm/compaction: pa...
666
  				block_start_pfn = block_end_pfn,
7d49d8868   Vlastimil Babka   mm, compaction: r...
667
  				block_end_pfn += pageblock_nr_pages) {
e14c720ef   Vlastimil Babka   mm, compaction: r...
668
669
  		/* Protect pfn from changing by isolate_freepages_block */
  		unsigned long isolate_start_pfn = pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
670

85aa125f0   Michal Nazarewicz   mm: compaction: i...
671
  		block_end_pfn = min(block_end_pfn, end_pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
672
673
674
675
676
677
  		/*
  		 * pfn could pass the block_end_pfn if isolated freepage
  		 * is more than pageblock order. In this case, we adjust
  		 * scanning range to right one.
  		 */
  		if (pfn >= block_end_pfn) {
06b6640a3   Vlastimil Babka   mm, compaction: w...
678
679
  			block_start_pfn = pageblock_start_pfn(pfn);
  			block_end_pfn = pageblock_end_pfn(pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
680
681
  			block_end_pfn = min(block_end_pfn, end_pfn);
  		}
e1409c325   Joonsoo Kim   mm/compaction: pa...
682
683
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
7d49d8868   Vlastimil Babka   mm, compaction: r...
684
  			break;
e14c720ef   Vlastimil Babka   mm, compaction: r...
685
  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
4fca9730c   Mel Gorman   mm, compaction: s...
686
  					block_end_pfn, &freelist, 0, true);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
  
  		/*
  		 * In strict mode, isolate_freepages_block() returns 0 if
  		 * there are any holes in the block (ie. invalid PFNs or
  		 * non-free pages).
  		 */
  		if (!isolated)
  			break;
  
  		/*
  		 * If we managed to isolate pages, it is always (1 << n) *
  		 * pageblock_nr_pages for some non-negative n.  (Max order
  		 * page may span two pageblocks).
  		 */
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
702
  	/* __isolate_free_page() does not map the pages */
4469ab984   Mel Gorman   mm, compaction: r...
703
  	split_map_pages(&freelist);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
704
705
706
707
708
709
710
711
712
713
  
  	if (pfn < end_pfn) {
  		/* Loop terminated early, cleanup. */
  		release_freepages(&freelist);
  		return 0;
  	}
  
  	/* We don't use freelists for anything. */
  	return pfn;
  }
748446bb6   Mel Gorman   mm: compaction: m...
714
  /* Similar to reclaim, but different enough that they don't share logic */
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
715
  static bool too_many_isolated(pg_data_t *pgdat)
748446bb6   Mel Gorman   mm: compaction: m...
716
  {
bc6930457   Minchan Kim   mm: compaction: h...
717
  	unsigned long active, inactive, isolated;
748446bb6   Mel Gorman   mm: compaction: m...
718

5f438eee8   Andrey Ryabinin   mm/compaction: pa...
719
720
721
722
723
724
  	inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
  			node_page_state(pgdat, NR_INACTIVE_ANON);
  	active = node_page_state(pgdat, NR_ACTIVE_FILE) +
  			node_page_state(pgdat, NR_ACTIVE_ANON);
  	isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
  			node_page_state(pgdat, NR_ISOLATED_ANON);
748446bb6   Mel Gorman   mm: compaction: m...
725

bc6930457   Minchan Kim   mm: compaction: h...
726
  	return isolated > (inactive + active) / 2;
748446bb6   Mel Gorman   mm: compaction: m...
727
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
728
  /**
edc2ca612   Vlastimil Babka   mm, compaction: m...
729
730
   * isolate_migratepages_block() - isolate all migrate-able pages within
   *				  a single pageblock
2fe86e000   Michal Nazarewicz   mm: compaction: i...
731
   * @cc:		Compaction control structure.
edc2ca612   Vlastimil Babka   mm, compaction: m...
732
733
734
   * @low_pfn:	The first PFN to isolate
   * @end_pfn:	The one-past-the-last PFN to isolate, within same pageblock
   * @isolate_mode: Isolation mode to be used.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
735
736
   *
   * Isolate all pages that can be migrated from the range specified by
edc2ca612   Vlastimil Babka   mm, compaction: m...
737
   * [low_pfn, end_pfn). The range is expected to be within same pageblock.
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
738
   * Returns errno, like -EAGAIN or -EINTR in case e.g signal pending or congestion,
369fa227c   Oscar Salvador   mm: make alloc_co...
739
   * -ENOMEM in case we could not allocate a page, or 0.
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
740
   * cc->migrate_pfn will contain the next pfn to scan.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
741
   *
edc2ca612   Vlastimil Babka   mm, compaction: m...
742
   * The pages are isolated on cc->migratepages list (not required to be empty),
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
743
   * and cc->nr_migratepages is updated accordingly.
748446bb6   Mel Gorman   mm: compaction: m...
744
   */
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
745
  static int
edc2ca612   Vlastimil Babka   mm, compaction: m...
746
747
  isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  			unsigned long end_pfn, isolate_mode_t isolate_mode)
748446bb6   Mel Gorman   mm: compaction: m...
748
  {
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
749
  	pg_data_t *pgdat = cc->zone->zone_pgdat;
b7aba6984   Mel Gorman   mm: compaction: a...
750
  	unsigned long nr_scanned = 0, nr_isolated = 0;
fa9add641   Hugh Dickins   mm/memcg: apply a...
751
  	struct lruvec *lruvec;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
752
  	unsigned long flags = 0;
6168d0da2   Alex Shi   mm/lru: replace p...
753
  	struct lruvec *locked = NULL;
bb13ffeb9   Mel Gorman   mm: compaction: c...
754
  	struct page *page = NULL, *valid_page = NULL;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
755
  	unsigned long start_pfn = low_pfn;
fdd048e12   Vlastimil Babka   mm, compaction: s...
756
757
  	bool skip_on_failure = false;
  	unsigned long next_skip_pfn = 0;
e380bebe4   Mel Gorman   mm, compaction: k...
758
  	bool skip_updated = false;
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
759
760
761
  	int ret = 0;
  
  	cc->migrate_pfn = low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
762

748446bb6   Mel Gorman   mm: compaction: m...
763
764
765
766
767
  	/*
  	 * Ensure that there are not too many pages isolated from the LRU
  	 * list by either parallel reclaimers or compaction. If there are,
  	 * delay for some time until fewer pages are isolated
  	 */
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
768
  	while (unlikely(too_many_isolated(pgdat))) {
d20bdd571   Zi Yan   mm/compaction: st...
769
770
  		/* stop isolation if there are still pages not migrated */
  		if (cc->nr_migratepages)
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
771
  			return -EAGAIN;
d20bdd571   Zi Yan   mm/compaction: st...
772

f9e35b3b4   Mel Gorman   mm: compaction: a...
773
  		/* async migration should just abort */
e0b9daeb4   David Rientjes   mm, compaction: e...
774
  		if (cc->mode == MIGRATE_ASYNC)
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
775
  			return -EAGAIN;
f9e35b3b4   Mel Gorman   mm: compaction: a...
776

748446bb6   Mel Gorman   mm: compaction: m...
777
778
779
  		congestion_wait(BLK_RW_ASYNC, HZ/10);
  
  		if (fatal_signal_pending(current))
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
780
  			return -EINTR;
748446bb6   Mel Gorman   mm: compaction: m...
781
  	}
cf66f0700   Mel Gorman   mm, compaction: d...
782
  	cond_resched();
aeef4b838   David Rientjes   mm, compaction: t...
783

fdd048e12   Vlastimil Babka   mm, compaction: s...
784
785
786
787
  	if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
  		skip_on_failure = true;
  		next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  	}
748446bb6   Mel Gorman   mm: compaction: m...
788
  	/* Time to isolate some pages for migration */
748446bb6   Mel Gorman   mm: compaction: m...
789
  	for (; low_pfn < end_pfn; low_pfn++) {
29c0dde83   Vlastimil Babka   mm, compaction: a...
790

fdd048e12   Vlastimil Babka   mm, compaction: s...
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
  		if (skip_on_failure && low_pfn >= next_skip_pfn) {
  			/*
  			 * We have isolated all migration candidates in the
  			 * previous order-aligned block, and did not skip it due
  			 * to failure. We should migrate the pages now and
  			 * hopefully succeed compaction.
  			 */
  			if (nr_isolated)
  				break;
  
  			/*
  			 * We failed to isolate in the previous order-aligned
  			 * block. Set the new boundary to the end of the
  			 * current block. Note we can't simply increase
  			 * next_skip_pfn by 1 << order, as low_pfn might have
  			 * been incremented by a higher number due to skipping
  			 * a compound or a high-order buddy page in the
  			 * previous loop iteration.
  			 */
  			next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  		}
8b44d2791   Vlastimil Babka   mm, compaction: p...
812
813
  		/*
  		 * Periodically drop the lock (if held) regardless of its
670105a25   Mel Gorman   mm: compaction: a...
814
815
  		 * contention, to give chance to IRQs. Abort completely if
  		 * a fatal signal is pending.
8b44d2791   Vlastimil Babka   mm, compaction: p...
816
  		 */
6168d0da2   Alex Shi   mm/lru: replace p...
817
818
819
820
821
822
823
824
  		if (!(low_pfn % SWAP_CLUSTER_MAX)) {
  			if (locked) {
  				unlock_page_lruvec_irqrestore(locked, flags);
  				locked = NULL;
  			}
  
  			if (fatal_signal_pending(current)) {
  				cc->contended = true;
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
825
  				ret = -EINTR;
6168d0da2   Alex Shi   mm/lru: replace p...
826

6168d0da2   Alex Shi   mm/lru: replace p...
827
828
829
830
  				goto fatal_pending;
  			}
  
  			cond_resched();
670105a25   Mel Gorman   mm: compaction: a...
831
  		}
c67fe3752   Mel Gorman   mm: compaction: A...
832

b7aba6984   Mel Gorman   mm: compaction: a...
833
  		nr_scanned++;
748446bb6   Mel Gorman   mm: compaction: m...
834

748446bb6   Mel Gorman   mm: compaction: m...
835
  		page = pfn_to_page(low_pfn);
dc9086004   Mel Gorman   mm: compaction: c...
836

e380bebe4   Mel Gorman   mm, compaction: k...
837
838
839
840
841
842
843
844
845
  		/*
  		 * Check if the pageblock has already been marked skipped.
  		 * Only the aligned PFN is checked as the caller isolates
  		 * COMPACT_CLUSTER_MAX at a time so the second call must
  		 * not falsely conclude that the block should be skipped.
  		 */
  		if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
  			if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
  				low_pfn = end_pfn;
9df413143   Alex Shi   mm/compaction: do...
846
  				page = NULL;
e380bebe4   Mel Gorman   mm, compaction: k...
847
848
  				goto isolate_abort;
  			}
bb13ffeb9   Mel Gorman   mm: compaction: c...
849
  			valid_page = page;
e380bebe4   Mel Gorman   mm, compaction: k...
850
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
851

369fa227c   Oscar Salvador   mm: make alloc_co...
852
  		if (PageHuge(page) && cc->alloc_contig) {
ae37c7ff7   Oscar Salvador   mm: make alloc_co...
853
  			ret = isolate_or_dissolve_huge_page(page, &cc->migratepages);
369fa227c   Oscar Salvador   mm: make alloc_co...
854
855
856
857
858
859
860
861
862
863
864
865
  
  			/*
  			 * Fail isolation in case isolate_or_dissolve_huge_page()
  			 * reports an error. In case of -ENOMEM, abort right away.
  			 */
  			if (ret < 0) {
  				 /* Do not report -EBUSY down the chain */
  				if (ret == -EBUSY)
  					ret = 0;
  				low_pfn += (1UL << compound_order(page)) - 1;
  				goto isolate_fail;
  			}
ae37c7ff7   Oscar Salvador   mm: make alloc_co...
866
867
868
869
870
871
872
873
  			if (PageHuge(page)) {
  				/*
  				 * Hugepage was successfully isolated and placed
  				 * on the cc->migratepages list.
  				 */
  				low_pfn += compound_nr(page) - 1;
  				goto isolate_success_no_list;
  			}
369fa227c   Oscar Salvador   mm: make alloc_co...
874
875
876
877
878
879
880
  			/*
  			 * Ok, the hugepage was dissolved. Now these pages are
  			 * Buddy and cannot be re-allocated because they are
  			 * isolated. Fall-through as the check below handles
  			 * Buddy pages.
  			 */
  		}
6c14466cc   Mel Gorman   mm: improve docum...
881
  		/*
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
882
883
884
885
  		 * Skip if free. We read page order here without zone lock
  		 * which is generally unsafe, but the race window is small and
  		 * the worst thing that can happen is that we skip some
  		 * potential isolation targets.
6c14466cc   Mel Gorman   mm: improve docum...
886
  		 */
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
887
  		if (PageBuddy(page)) {
ab130f910   Matthew Wilcox (Oracle)   mm: rename page_o...
888
  			unsigned long freepage_order = buddy_order_unsafe(page);
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
889
890
891
892
893
894
895
896
  
  			/*
  			 * Without lock, we cannot be sure that what we got is
  			 * a valid page order. Consider only values in the
  			 * valid order range to prevent low_pfn overflow.
  			 */
  			if (freepage_order > 0 && freepage_order < MAX_ORDER)
  				low_pfn += (1UL << freepage_order) - 1;
748446bb6   Mel Gorman   mm: compaction: m...
897
  			continue;
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
898
  		}
748446bb6   Mel Gorman   mm: compaction: m...
899

9927af740   Mel Gorman   mm: compaction: p...
900
  		/*
29c0dde83   Vlastimil Babka   mm, compaction: a...
901
  		 * Regardless of being on LRU, compound pages such as THP and
1da2f328f   Rik van Riel   mm,thp,compaction...
902
903
904
905
906
  		 * hugetlbfs are not to be compacted unless we are attempting
  		 * an allocation much larger than the huge page size (eg CMA).
  		 * We can potentially save a lot of iterations if we skip them
  		 * at once. The check is racy, but we can consider only valid
  		 * values and the only danger is skipping too much.
bc835011a   Andrea Arcangeli   thp: transhuge is...
907
  		 */
1da2f328f   Rik van Riel   mm,thp,compaction...
908
  		if (PageCompound(page) && !cc->alloc_contig) {
21dc7e023   David Rientjes   mm, compaction: p...
909
  			const unsigned int order = compound_order(page);
edc2ca612   Vlastimil Babka   mm, compaction: m...
910

d3c85bad8   Vlastimil Babka   mm, compaction: r...
911
  			if (likely(order < MAX_ORDER))
21dc7e023   David Rientjes   mm, compaction: p...
912
  				low_pfn += (1UL << order) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
913
  			goto isolate_fail;
2a1402aa0   Mel Gorman   mm: compaction: a...
914
  		}
bda807d44   Minchan Kim   mm: migrate: supp...
915
916
917
918
919
920
  		/*
  		 * Check may be lockless but that's ok as we recheck later.
  		 * It's possible to migrate LRU and non-lru movable pages.
  		 * Skip any other type of page
  		 */
  		if (!PageLRU(page)) {
bda807d44   Minchan Kim   mm: migrate: supp...
921
922
923
924
925
926
927
  			/*
  			 * __PageMovable can return false positive so we need
  			 * to verify it under page_lock.
  			 */
  			if (unlikely(__PageMovable(page)) &&
  					!PageIsolated(page)) {
  				if (locked) {
6168d0da2   Alex Shi   mm/lru: replace p...
928
929
  					unlock_page_lruvec_irqrestore(locked, flags);
  					locked = NULL;
bda807d44   Minchan Kim   mm: migrate: supp...
930
  				}
9e5bcd610   Yisheng Xie   mm/migration: mak...
931
  				if (!isolate_movable_page(page, isolate_mode))
bda807d44   Minchan Kim   mm: migrate: supp...
932
933
  					goto isolate_success;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
934
  			goto isolate_fail;
bda807d44   Minchan Kim   mm: migrate: supp...
935
  		}
29c0dde83   Vlastimil Babka   mm, compaction: a...
936

119d6d59d   David Rientjes   mm, compaction: a...
937
938
939
940
941
942
943
  		/*
  		 * Migration will fail if an anonymous page is pinned in memory,
  		 * so avoid taking lru_lock and isolating it unnecessarily in an
  		 * admittedly racy check.
  		 */
  		if (!page_mapping(page) &&
  		    page_count(page) > page_mapcount(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
944
  			goto isolate_fail;
119d6d59d   David Rientjes   mm, compaction: a...
945

73e64c51a   Michal Hocko   mm, compaction: a...
946
947
948
949
950
951
  		/*
  		 * Only allow to migrate anonymous pages in GFP_NOFS context
  		 * because those do not depend on fs locks.
  		 */
  		if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
  			goto isolate_fail;
9df413143   Alex Shi   mm/compaction: do...
952
953
954
955
956
957
958
  		/*
  		 * Be careful not to clear PageLRU until after we're
  		 * sure the page is not being freed elsewhere -- the
  		 * page release code relies on it.
  		 */
  		if (unlikely(!get_page_unless_zero(page)))
  			goto isolate_fail;
c2135f7c5   Alex Shi   mm/vmscan: __isol...
959
  		if (!__isolate_lru_page_prepare(page, isolate_mode))
9df413143   Alex Shi   mm/compaction: do...
960
961
962
963
964
  			goto isolate_fail_put;
  
  		/* Try isolate the page */
  		if (!TestClearPageLRU(page))
  			goto isolate_fail_put;
a984226f4   Muchun Song   mm: memcontrol: r...
965
  		lruvec = mem_cgroup_page_lruvec(page);
6168d0da2   Alex Shi   mm/lru: replace p...
966

69b7189f1   Vlastimil Babka   mm, compaction: s...
967
  		/* If we already hold the lock, we can skip some rechecking */
6168d0da2   Alex Shi   mm/lru: replace p...
968
969
970
971
972
973
  		if (lruvec != locked) {
  			if (locked)
  				unlock_page_lruvec_irqrestore(locked, flags);
  
  			compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
  			locked = lruvec;
6168d0da2   Alex Shi   mm/lru: replace p...
974
975
  
  			lruvec_memcg_debug(lruvec, page);
e380bebe4   Mel Gorman   mm, compaction: k...
976

e380bebe4   Mel Gorman   mm, compaction: k...
977
978
979
980
981
982
  			/* Try get exclusive access under lock */
  			if (!skip_updated) {
  				skip_updated = true;
  				if (test_and_set_skip(cc, page, low_pfn))
  					goto isolate_abort;
  			}
2a1402aa0   Mel Gorman   mm: compaction: a...
983

29c0dde83   Vlastimil Babka   mm, compaction: a...
984
985
986
987
988
  			/*
  			 * Page become compound since the non-locked check,
  			 * and it's on LRU. It can only be a THP so the order
  			 * is safe to read and it's 0 for tail pages.
  			 */
1da2f328f   Rik van Riel   mm,thp,compaction...
989
  			if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
d8c6546b1   Matthew Wilcox (Oracle)   mm: introduce com...
990
  				low_pfn += compound_nr(page) - 1;
9df413143   Alex Shi   mm/compaction: do...
991
992
  				SetPageLRU(page);
  				goto isolate_fail_put;
69b7189f1   Vlastimil Babka   mm, compaction: s...
993
  			}
d99fd5feb   Alex Shi   mm/compaction: re...
994
  		}
fa9add641   Hugh Dickins   mm/memcg: apply a...
995

1da2f328f   Rik van Riel   mm,thp,compaction...
996
997
998
  		/* The whole page is taken off the LRU; skip the tail pages. */
  		if (PageCompound(page))
  			low_pfn += compound_nr(page) - 1;
bc835011a   Andrea Arcangeli   thp: transhuge is...
999

748446bb6   Mel Gorman   mm: compaction: m...
1000
  		/* Successfully isolated */
46ae6b2cc   Yu Zhao   mm/swap.c: don't ...
1001
  		del_page_from_lru_list(page, lruvec);
1da2f328f   Rik van Riel   mm,thp,compaction...
1002
  		mod_node_page_state(page_pgdat(page),
9de4f22a6   Huang Ying   mm: code cleanup ...
1003
  				NR_ISOLATED_ANON + page_is_file_lru(page),
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
1004
  				thp_nr_pages(page));
b6c750163   Joonsoo Kim   mm/compaction: cl...
1005
1006
  
  isolate_success:
fdd048e12   Vlastimil Babka   mm, compaction: s...
1007
  		list_add(&page->lru, &cc->migratepages);
ae37c7ff7   Oscar Salvador   mm: make alloc_co...
1008
  isolate_success_no_list:
38935861d   Zi Yan   mm/compaction: co...
1009
1010
  		cc->nr_migratepages += compound_nr(page);
  		nr_isolated += compound_nr(page);
748446bb6   Mel Gorman   mm: compaction: m...
1011

804d3121b   Mel Gorman   mm, compaction: a...
1012
1013
  		/*
  		 * Avoid isolating too much unless this block is being
cb2dcaf02   Mel Gorman   mm, compaction: f...
1014
1015
1016
  		 * rescanned (e.g. dirty/writeback pages, parallel allocation)
  		 * or a lock is contended. For contention, isolate quickly to
  		 * potentially remove one source of contention.
804d3121b   Mel Gorman   mm, compaction: a...
1017
  		 */
38935861d   Zi Yan   mm/compaction: co...
1018
  		if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX &&
cb2dcaf02   Mel Gorman   mm, compaction: f...
1019
  		    !cc->rescan && !cc->contended) {
31b8384a5   Hillf Danton   mm: compaction: p...
1020
  			++low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
1021
  			break;
31b8384a5   Hillf Danton   mm: compaction: p...
1022
  		}
fdd048e12   Vlastimil Babka   mm, compaction: s...
1023
1024
  
  		continue;
9df413143   Alex Shi   mm/compaction: do...
1025
1026
1027
1028
  
  isolate_fail_put:
  		/* Avoid potential deadlock in freeing page under lru_lock */
  		if (locked) {
6168d0da2   Alex Shi   mm/lru: replace p...
1029
1030
  			unlock_page_lruvec_irqrestore(locked, flags);
  			locked = NULL;
9df413143   Alex Shi   mm/compaction: do...
1031
1032
  		}
  		put_page(page);
fdd048e12   Vlastimil Babka   mm, compaction: s...
1033
  isolate_fail:
369fa227c   Oscar Salvador   mm: make alloc_co...
1034
  		if (!skip_on_failure && ret != -ENOMEM)
fdd048e12   Vlastimil Babka   mm, compaction: s...
1035
1036
1037
1038
1039
1040
1041
1042
1043
  			continue;
  
  		/*
  		 * We have isolated some pages, but then failed. Release them
  		 * instead of migrating, as we cannot form the cc->order buddy
  		 * page anyway.
  		 */
  		if (nr_isolated) {
  			if (locked) {
6168d0da2   Alex Shi   mm/lru: replace p...
1044
1045
  				unlock_page_lruvec_irqrestore(locked, flags);
  				locked = NULL;
fdd048e12   Vlastimil Babka   mm, compaction: s...
1046
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
1047
1048
  			putback_movable_pages(&cc->migratepages);
  			cc->nr_migratepages = 0;
fdd048e12   Vlastimil Babka   mm, compaction: s...
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
  			nr_isolated = 0;
  		}
  
  		if (low_pfn < next_skip_pfn) {
  			low_pfn = next_skip_pfn - 1;
  			/*
  			 * The check near the loop beginning would have updated
  			 * next_skip_pfn too, but this is a bit simpler.
  			 */
  			next_skip_pfn += 1UL << cc->order;
  		}
369fa227c   Oscar Salvador   mm: make alloc_co...
1060
1061
1062
  
  		if (ret == -ENOMEM)
  			break;
748446bb6   Mel Gorman   mm: compaction: m...
1063
  	}
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
1064
1065
1066
1067
1068
1069
  	/*
  	 * The PageBuddy() check could have potentially brought us outside
  	 * the range to be scanned.
  	 */
  	if (unlikely(low_pfn > end_pfn))
  		low_pfn = end_pfn;
9df413143   Alex Shi   mm/compaction: do...
1070
  	page = NULL;
e380bebe4   Mel Gorman   mm, compaction: k...
1071
  isolate_abort:
c67fe3752   Mel Gorman   mm: compaction: A...
1072
  	if (locked)
6168d0da2   Alex Shi   mm/lru: replace p...
1073
  		unlock_page_lruvec_irqrestore(locked, flags);
9df413143   Alex Shi   mm/compaction: do...
1074
1075
1076
1077
  	if (page) {
  		SetPageLRU(page);
  		put_page(page);
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1078

50b5b094e   Vlastimil Babka   mm: compaction: d...
1079
  	/*
804d3121b   Mel Gorman   mm, compaction: a...
1080
1081
1082
1083
1084
1085
  	 * Updated the cached scanner pfn once the pageblock has been scanned
  	 * Pages will either be migrated in which case there is no point
  	 * scanning in the near future or migration failed in which case the
  	 * failure reason may persist. The block is marked for skipping if
  	 * there were no pages isolated in the block or if the block is
  	 * rescanned twice in a row.
50b5b094e   Vlastimil Babka   mm: compaction: d...
1086
  	 */
804d3121b   Mel Gorman   mm, compaction: a...
1087
  	if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
e380bebe4   Mel Gorman   mm, compaction: k...
1088
1089
1090
1091
  		if (valid_page && !skip_updated)
  			set_pageblock_skip(valid_page);
  		update_cached_migrate(cc, low_pfn);
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
1092

e34d85f0e   Joonsoo Kim   mm/compaction: pr...
1093
1094
  	trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
  						nr_scanned, nr_isolated);
b7aba6984   Mel Gorman   mm: compaction: a...
1095

670105a25   Mel Gorman   mm: compaction: a...
1096
  fatal_pending:
7f354a548   David Rientjes   mm, compaction: a...
1097
  	cc->total_migrate_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
1098
  	if (nr_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
1099
  		count_compact_events(COMPACTISOLATED, nr_isolated);
397487db6   Mel Gorman   mm: compaction: A...
1100

c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1101
1102
1103
  	cc->migrate_pfn = low_pfn;
  
  	return ret;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1104
  }
edc2ca612   Vlastimil Babka   mm, compaction: m...
1105
1106
1107
1108
1109
1110
  /**
   * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
   * @cc:        Compaction control structure.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
369fa227c   Oscar Salvador   mm: make alloc_co...
1111
1112
   * Returns -EAGAIN when contented, -EINTR in case of a signal pending, -ENOMEM
   * in case we could not allocate a page, or 0.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1113
   */
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1114
  int
edc2ca612   Vlastimil Babka   mm, compaction: m...
1115
1116
1117
  isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
  							unsigned long end_pfn)
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1118
  	unsigned long pfn, block_start_pfn, block_end_pfn;
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1119
  	int ret = 0;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1120
1121
1122
  
  	/* Scan block by block. First and last block may be incomplete */
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1123
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1124
1125
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1126
  	block_end_pfn = pageblock_end_pfn(pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1127
1128
  
  	for (; pfn < end_pfn; pfn = block_end_pfn,
e1409c325   Joonsoo Kim   mm/compaction: pa...
1129
  				block_start_pfn = block_end_pfn,
edc2ca612   Vlastimil Babka   mm, compaction: m...
1130
1131
1132
  				block_end_pfn += pageblock_nr_pages) {
  
  		block_end_pfn = min(block_end_pfn, end_pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1133
1134
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1135
  			continue;
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1136
1137
  		ret = isolate_migratepages_block(cc, pfn, block_end_pfn,
  						 ISOLATE_UNEVICTABLE);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1138

c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1139
  		if (ret)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1140
  			break;
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
1141

38935861d   Zi Yan   mm/compaction: co...
1142
  		if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
1143
  			break;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1144
  	}
edc2ca612   Vlastimil Babka   mm, compaction: m...
1145

c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1146
  	return ret;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1147
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1148
1149
  #endif /* CONFIG_COMPACTION || CONFIG_CMA */
  #ifdef CONFIG_COMPACTION
018e9a49a   Andrew Morton   mm/compaction.c: ...
1150

b682debd9   Vlastimil Babka   mm, compaction: c...
1151
1152
1153
  static bool suitable_migration_source(struct compact_control *cc,
  							struct page *page)
  {
282722b0d   Vlastimil Babka   mm, compaction: r...
1154
  	int block_mt;
9bebefd59   Mel Gorman   mm, compaction: c...
1155
1156
  	if (pageblock_skip_persistent(page))
  		return false;
282722b0d   Vlastimil Babka   mm, compaction: r...
1157
  	if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
b682debd9   Vlastimil Babka   mm, compaction: c...
1158
  		return true;
282722b0d   Vlastimil Babka   mm, compaction: r...
1159
1160
1161
1162
1163
1164
  	block_mt = get_pageblock_migratetype(page);
  
  	if (cc->migratetype == MIGRATE_MOVABLE)
  		return is_migrate_movable(block_mt);
  	else
  		return block_mt == cc->migratetype;
b682debd9   Vlastimil Babka   mm, compaction: c...
1165
  }
018e9a49a   Andrew Morton   mm/compaction.c: ...
1166
  /* Returns true if the page is within a block suitable for migration to */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1167
1168
  static bool suitable_migration_target(struct compact_control *cc,
  							struct page *page)
018e9a49a   Andrew Morton   mm/compaction.c: ...
1169
1170
1171
1172
1173
1174
1175
1176
  {
  	/* If the page is a large free page, then disallow migration */
  	if (PageBuddy(page)) {
  		/*
  		 * We are checking page_order without zone->lock taken. But
  		 * the only small danger is that we skip a potentially suitable
  		 * pageblock, so it's not worth to check order for valid range.
  		 */
ab130f910   Matthew Wilcox (Oracle)   mm: rename page_o...
1177
  		if (buddy_order_unsafe(page) >= pageblock_order)
018e9a49a   Andrew Morton   mm/compaction.c: ...
1178
1179
  			return false;
  	}
1ef36db2a   Yisheng Xie   mm/compaction: ig...
1180
1181
  	if (cc->ignore_block_suitable)
  		return true;
018e9a49a   Andrew Morton   mm/compaction.c: ...
1182
  	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
b682debd9   Vlastimil Babka   mm, compaction: c...
1183
  	if (is_migrate_movable(get_pageblock_migratetype(page)))
018e9a49a   Andrew Morton   mm/compaction.c: ...
1184
1185
1186
1187
1188
  		return true;
  
  	/* Otherwise skip the block */
  	return false;
  }
70b44595e   Mel Gorman   mm, compaction: u...
1189
1190
1191
  static inline unsigned int
  freelist_scan_limit(struct compact_control *cc)
  {
dd7ef7bd1   Qian Cai   mm/compaction.c: ...
1192
1193
1194
  	unsigned short shift = BITS_PER_LONG - 1;
  
  	return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1;
70b44595e   Mel Gorman   mm, compaction: u...
1195
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1196
  /*
f2849aa09   Vlastimil Babka   mm, compaction: m...
1197
1198
1199
1200
1201
1202
1203
1204
   * Test whether the free scanner has reached the same or lower pageblock than
   * the migration scanner, and compaction should thus terminate.
   */
  static inline bool compact_scanners_met(struct compact_control *cc)
  {
  	return (cc->free_pfn >> pageblock_order)
  		<= (cc->migrate_pfn >> pageblock_order);
  }
5a811889d   Mel Gorman   mm, compaction: u...
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
  /*
   * Used when scanning for a suitable migration target which scans freelists
   * in reverse. Reorders the list such as the unscanned pages are scanned
   * first on the next iteration of the free scanner
   */
  static void
  move_freelist_head(struct list_head *freelist, struct page *freepage)
  {
  	LIST_HEAD(sublist);
  
  	if (!list_is_last(freelist, &freepage->lru)) {
  		list_cut_before(&sublist, freelist, &freepage->lru);
d2155fe54   Liu Xiang   mm: compaction: r...
1217
  		list_splice_tail(&sublist, freelist);
5a811889d   Mel Gorman   mm, compaction: u...
1218
1219
1220
1221
1222
1223
1224
1225
1226
  	}
  }
  
  /*
   * Similar to move_freelist_head except used by the migration scanner
   * when scanning forward. It's possible for these list operations to
   * move against each other if they search the free list exactly in
   * lockstep.
   */
70b44595e   Mel Gorman   mm, compaction: u...
1227
1228
1229
1230
1231
1232
1233
  static void
  move_freelist_tail(struct list_head *freelist, struct page *freepage)
  {
  	LIST_HEAD(sublist);
  
  	if (!list_is_first(freelist, &freepage->lru)) {
  		list_cut_position(&sublist, freelist, &freepage->lru);
d2155fe54   Liu Xiang   mm: compaction: r...
1234
  		list_splice_tail(&sublist, freelist);
70b44595e   Mel Gorman   mm, compaction: u...
1235
1236
  	}
  }
5a811889d   Mel Gorman   mm, compaction: u...
1237
1238
1239
1240
  static void
  fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
  {
  	unsigned long start_pfn, end_pfn;
6e2b7044c   Vlastimil Babka   mm, compaction: m...
1241
  	struct page *page;
5a811889d   Mel Gorman   mm, compaction: u...
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
  
  	/* Do not search around if there are enough pages already */
  	if (cc->nr_freepages >= cc->nr_migratepages)
  		return;
  
  	/* Minimise scanning during async compaction */
  	if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
  		return;
  
  	/* Pageblock boundaries */
6e2b7044c   Vlastimil Babka   mm, compaction: m...
1252
1253
1254
1255
1256
1257
  	start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn);
  	end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone));
  
  	page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone);
  	if (!page)
  		return;
5a811889d   Mel Gorman   mm, compaction: u...
1258
1259
1260
  
  	/* Scan before */
  	if (start_pfn != pfn) {
4fca9730c   Mel Gorman   mm, compaction: s...
1261
  		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
5a811889d   Mel Gorman   mm, compaction: u...
1262
1263
1264
1265
1266
1267
  		if (cc->nr_freepages >= cc->nr_migratepages)
  			return;
  	}
  
  	/* Scan after */
  	start_pfn = pfn + nr_isolated;
60fce36af   Mel Gorman   mm/compaction.c: ...
1268
  	if (start_pfn < end_pfn)
4fca9730c   Mel Gorman   mm, compaction: s...
1269
  		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
5a811889d   Mel Gorman   mm, compaction: u...
1270
1271
1272
1273
1274
  
  	/* Skip this pageblock in the future as it's full or nearly full */
  	if (cc->nr_freepages < cc->nr_migratepages)
  		set_pageblock_skip(page);
  }
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
  /* Search orders in round-robin fashion */
  static int next_search_order(struct compact_control *cc, int order)
  {
  	order--;
  	if (order < 0)
  		order = cc->order - 1;
  
  	/* Search wrapped around? */
  	if (order == cc->search_order) {
  		cc->search_order--;
  		if (cc->search_order < 0)
  			cc->search_order = cc->order - 1;
  		return -1;
  	}
  
  	return order;
  }
5a811889d   Mel Gorman   mm, compaction: u...
1292
1293
1294
  static unsigned long
  fast_isolate_freepages(struct compact_control *cc)
  {
b55ca5264   Wonhyuk Yang   mm/compaction: fi...
1295
  	unsigned int limit = max(1U, freelist_scan_limit(cc) >> 1);
5a811889d   Mel Gorman   mm, compaction: u...
1296
  	unsigned int nr_scanned = 0;
74e21484e   Rokudo Yan   mm, compaction: m...
1297
  	unsigned long low_pfn, min_pfn, highest = 0;
5a811889d   Mel Gorman   mm, compaction: u...
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
  	unsigned long nr_isolated = 0;
  	unsigned long distance;
  	struct page *page = NULL;
  	bool scan_start = false;
  	int order;
  
  	/* Full compaction passes in a negative order */
  	if (cc->order <= 0)
  		return cc->free_pfn;
  
  	/*
  	 * If starting the scan, use a deeper search and use the highest
  	 * PFN found if a suitable one is not found.
  	 */
e332f741a   Mel Gorman   mm, compaction: b...
1312
  	if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
5a811889d   Mel Gorman   mm, compaction: u...
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
  		limit = pageblock_nr_pages >> 1;
  		scan_start = true;
  	}
  
  	/*
  	 * Preferred point is in the top quarter of the scan space but take
  	 * a pfn from the top half if the search is problematic.
  	 */
  	distance = (cc->free_pfn - cc->migrate_pfn);
  	low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
  	min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
  
  	if (WARN_ON_ONCE(min_pfn > low_pfn))
  		low_pfn = min_pfn;
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1327
1328
1329
1330
1331
1332
1333
1334
1335
  	/*
  	 * Search starts from the last successful isolation order or the next
  	 * order to search after a previous failure
  	 */
  	cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order);
  
  	for (order = cc->search_order;
  	     !page && order >= 0;
  	     order = next_search_order(cc, order)) {
5a811889d   Mel Gorman   mm, compaction: u...
1336
1337
1338
1339
1340
  		struct free_area *area = &cc->zone->free_area[order];
  		struct list_head *freelist;
  		struct page *freepage;
  		unsigned long flags;
  		unsigned int order_scanned = 0;
74e21484e   Rokudo Yan   mm, compaction: m...
1341
  		unsigned long high_pfn = 0;
5a811889d   Mel Gorman   mm, compaction: u...
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
  
  		if (!area->nr_free)
  			continue;
  
  		spin_lock_irqsave(&cc->zone->lock, flags);
  		freelist = &area->free_list[MIGRATE_MOVABLE];
  		list_for_each_entry_reverse(freepage, freelist, lru) {
  			unsigned long pfn;
  
  			order_scanned++;
  			nr_scanned++;
  			pfn = page_to_pfn(freepage);
  
  			if (pfn >= highest)
6e2b7044c   Vlastimil Babka   mm, compaction: m...
1356
1357
  				highest = max(pageblock_start_pfn(pfn),
  					      cc->zone->zone_start_pfn);
5a811889d   Mel Gorman   mm, compaction: u...
1358
1359
1360
  
  			if (pfn >= low_pfn) {
  				cc->fast_search_fail = 0;
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1361
  				cc->search_order = order;
5a811889d   Mel Gorman   mm, compaction: u...
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
  				page = freepage;
  				break;
  			}
  
  			if (pfn >= min_pfn && pfn > high_pfn) {
  				high_pfn = pfn;
  
  				/* Shorten the scan if a candidate is found */
  				limit >>= 1;
  			}
  
  			if (order_scanned >= limit)
  				break;
  		}
  
  		/* Use a minimum pfn if a preferred one was not found */
  		if (!page && high_pfn) {
  			page = pfn_to_page(high_pfn);
  
  			/* Update freepage for the list reorder below */
  			freepage = page;
  		}
  
  		/* Reorder to so a future search skips recent pages */
  		move_freelist_head(freelist, freepage);
  
  		/* Isolate the page if available */
  		if (page) {
  			if (__isolate_free_page(page, order)) {
  				set_page_private(page, order);
  				nr_isolated = 1 << order;
  				cc->nr_freepages += nr_isolated;
  				list_add_tail(&page->lru, &cc->freepages);
  				count_compact_events(COMPACTISOLATED, nr_isolated);
  			} else {
  				/* If isolation fails, abort the search */
5b56d996d   Qian Cai   mm/compaction.c: ...
1398
  				order = cc->search_order + 1;
5a811889d   Mel Gorman   mm, compaction: u...
1399
1400
1401
1402
1403
1404
1405
  				page = NULL;
  			}
  		}
  
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
  
  		/*
b55ca5264   Wonhyuk Yang   mm/compaction: fi...
1406
  		 * Smaller scan on next order so the total scan is related
5a811889d   Mel Gorman   mm, compaction: u...
1407
1408
1409
  		 * to freelist_scan_limit.
  		 */
  		if (order_scanned >= limit)
b55ca5264   Wonhyuk Yang   mm/compaction: fi...
1410
  			limit = max(1U, limit >> 1);
5a811889d   Mel Gorman   mm, compaction: u...
1411
1412
1413
1414
1415
1416
1417
  	}
  
  	if (!page) {
  		cc->fast_search_fail++;
  		if (scan_start) {
  			/*
  			 * Use the highest PFN found above min. If one was
f38677551   Ethon Paul   mm/compaction: fi...
1418
  			 * not found, be pessimistic for direct compaction
5a811889d   Mel Gorman   mm, compaction: u...
1419
1420
1421
1422
1423
1424
  			 * and use the min mark.
  			 */
  			if (highest) {
  				page = pfn_to_page(highest);
  				cc->free_pfn = highest;
  			} else {
e577c8b64   Suzuki K Poulose   mm, compaction: m...
1425
  				if (cc->direct_compaction && pfn_valid(min_pfn)) {
73a6e474c   Baoquan He   mm: memmap_init: ...
1426
  					page = pageblock_pfn_to_page(min_pfn,
6e2b7044c   Vlastimil Babka   mm, compaction: m...
1427
1428
  						min(pageblock_end_pfn(min_pfn),
  						    zone_end_pfn(cc->zone)),
73a6e474c   Baoquan He   mm: memmap_init: ...
1429
  						cc->zone);
5a811889d   Mel Gorman   mm, compaction: u...
1430
1431
1432
1433
1434
  					cc->free_pfn = min_pfn;
  				}
  			}
  		}
  	}
d097a6f63   Mel Gorman   mm, compaction: r...
1435
1436
  	if (highest && highest >= cc->zone->compact_cached_free_pfn) {
  		highest -= pageblock_nr_pages;
5a811889d   Mel Gorman   mm, compaction: u...
1437
  		cc->zone->compact_cached_free_pfn = highest;
d097a6f63   Mel Gorman   mm, compaction: r...
1438
  	}
5a811889d   Mel Gorman   mm, compaction: u...
1439
1440
1441
1442
1443
1444
1445
1446
1447
  
  	cc->total_free_scanned += nr_scanned;
  	if (!page)
  		return cc->free_pfn;
  
  	low_pfn = page_to_pfn(page);
  	fast_isolate_around(cc, low_pfn, nr_isolated);
  	return low_pfn;
  }
f2849aa09   Vlastimil Babka   mm, compaction: m...
1448
  /*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1449
1450
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1451
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
1452
  static void isolate_freepages(struct compact_control *cc)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1453
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
1454
  	struct zone *zone = cc->zone;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1455
  	struct page *page;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1456
  	unsigned long block_start_pfn;	/* start of current pageblock */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1457
  	unsigned long isolate_start_pfn; /* exact pfn we start at */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1458
1459
  	unsigned long block_end_pfn;	/* end of current pageblock */
  	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1460
  	struct list_head *freelist = &cc->freepages;
4fca9730c   Mel Gorman   mm, compaction: s...
1461
  	unsigned int stride;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1462

5a811889d   Mel Gorman   mm, compaction: u...
1463
1464
1465
1466
  	/* Try a small search of the free lists for a candidate */
  	isolate_start_pfn = fast_isolate_freepages(cc);
  	if (cc->nr_freepages)
  		goto splitmap;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1467
1468
  	/*
  	 * Initialise the free scanner. The starting point is where we last
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1469
  	 * successfully isolated from, zone-cached value, or the end of the
e14c720ef   Vlastimil Babka   mm, compaction: r...
1470
1471
  	 * zone when isolating for the first time. For looping we also need
  	 * this pfn aligned down to the pageblock boundary, because we do
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1472
1473
  	 * block_start_pfn -= pageblock_nr_pages in the for loop.
  	 * For ending point, take care when isolating in last pageblock of a
a1c1dbeb2   Randy Dunlap   mm/compaction.c: ...
1474
  	 * zone which ends in the middle of a pageblock.
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1475
1476
  	 * The low boundary is the end of the pageblock the migration scanner
  	 * is using.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1477
  	 */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1478
  	isolate_start_pfn = cc->free_pfn;
5a811889d   Mel Gorman   mm, compaction: u...
1479
  	block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1480
1481
  	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
  						zone_end_pfn(zone));
06b6640a3   Vlastimil Babka   mm, compaction: w...
1482
  	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
4fca9730c   Mel Gorman   mm, compaction: s...
1483
  	stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1484

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1485
  	/*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1486
1487
1488
1489
  	 * Isolate free pages until enough are available to migrate the
  	 * pages on cc->migratepages. We stop searching if the migrate
  	 * and free page scanners meet or enough free pages are isolated.
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1490
  	for (; block_start_pfn >= low_pfn;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1491
  				block_end_pfn = block_start_pfn,
e14c720ef   Vlastimil Babka   mm, compaction: r...
1492
1493
  				block_start_pfn -= pageblock_nr_pages,
  				isolate_start_pfn = block_start_pfn) {
4fca9730c   Mel Gorman   mm, compaction: s...
1494
  		unsigned long nr_isolated;
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1495
1496
  		/*
  		 * This can iterate a massively long zone without finding any
cb810ad29   Mel Gorman   mm, compaction: r...
1497
  		 * suitable migration targets, so periodically check resched.
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1498
  		 */
cb810ad29   Mel Gorman   mm, compaction: r...
1499
  		if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
cf66f0700   Mel Gorman   mm, compaction: d...
1500
  			cond_resched();
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1501

7d49d8868   Vlastimil Babka   mm, compaction: r...
1502
1503
1504
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
  		if (!page)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1505
1506
1507
  			continue;
  
  		/* Check the block is suitable for migration */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1508
  		if (!suitable_migration_target(cc, page))
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1509
  			continue;
68e3e9262   Linus Torvalds   Revert "mm: compa...
1510

bb13ffeb9   Mel Gorman   mm: compaction: c...
1511
1512
1513
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
e14c720ef   Vlastimil Babka   mm, compaction: r...
1514
  		/* Found a block suitable for isolating free pages from. */
4fca9730c   Mel Gorman   mm, compaction: s...
1515
1516
  		nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
  					block_end_pfn, freelist, stride, false);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1517

d097a6f63   Mel Gorman   mm, compaction: r...
1518
1519
1520
  		/* Update the skip hint if the full pageblock was scanned */
  		if (isolate_start_pfn == block_end_pfn)
  			update_pageblock_skip(cc, page, block_start_pfn);
cb2dcaf02   Mel Gorman   mm, compaction: f...
1521
1522
  		/* Are enough freepages isolated? */
  		if (cc->nr_freepages >= cc->nr_migratepages) {
a46cbf3bc   David Rientjes   mm, compaction: p...
1523
1524
1525
1526
1527
  			if (isolate_start_pfn >= block_end_pfn) {
  				/*
  				 * Restart at previous pageblock if more
  				 * freepages can be isolated next time.
  				 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1528
1529
  				isolate_start_pfn =
  					block_start_pfn - pageblock_nr_pages;
a46cbf3bc   David Rientjes   mm, compaction: p...
1530
  			}
be9765722   Vlastimil Babka   mm, compaction: p...
1531
  			break;
a46cbf3bc   David Rientjes   mm, compaction: p...
1532
  		} else if (isolate_start_pfn < block_end_pfn) {
f5f61a320   Vlastimil Babka   mm, compaction: s...
1533
  			/*
a46cbf3bc   David Rientjes   mm, compaction: p...
1534
1535
  			 * If isolation failed early, do not continue
  			 * needlessly.
f5f61a320   Vlastimil Babka   mm, compaction: s...
1536
  			 */
a46cbf3bc   David Rientjes   mm, compaction: p...
1537
  			break;
f5f61a320   Vlastimil Babka   mm, compaction: s...
1538
  		}
4fca9730c   Mel Gorman   mm, compaction: s...
1539
1540
1541
1542
1543
1544
1545
  
  		/* Adjust stride depending on isolation */
  		if (nr_isolated) {
  			stride = 1;
  			continue;
  		}
  		stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1546
  	}
7ed695e06   Vlastimil Babka   mm: compaction: d...
1547
  	/*
f5f61a320   Vlastimil Babka   mm, compaction: s...
1548
1549
1550
1551
  	 * Record where the free scanner will restart next time. Either we
  	 * broke from the loop and set isolate_start_pfn based on the last
  	 * call to isolate_freepages_block(), or we met the migration scanner
  	 * and the loop terminated due to isolate_start_pfn < low_pfn
7ed695e06   Vlastimil Babka   mm: compaction: d...
1552
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1553
  	cc->free_pfn = isolate_start_pfn;
5a811889d   Mel Gorman   mm, compaction: u...
1554
1555
1556
1557
  
  splitmap:
  	/* __isolate_free_page() does not map the pages */
  	split_map_pages(freelist);
748446bb6   Mel Gorman   mm: compaction: m...
1558
1559
1560
1561
1562
1563
1564
  }
  
  /*
   * This is a migrate-callback that "allocates" freepages by taking pages
   * from the isolated freelists in the block we are migrating to.
   */
  static struct page *compaction_alloc(struct page *migratepage,
666feb21a   Michal Hocko   mm, migrate: remo...
1565
  					unsigned long data)
748446bb6   Mel Gorman   mm: compaction: m...
1566
1567
1568
  {
  	struct compact_control *cc = (struct compact_control *)data;
  	struct page *freepage;
748446bb6   Mel Gorman   mm: compaction: m...
1569
  	if (list_empty(&cc->freepages)) {
cb2dcaf02   Mel Gorman   mm, compaction: f...
1570
  		isolate_freepages(cc);
748446bb6   Mel Gorman   mm: compaction: m...
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
  
  		if (list_empty(&cc->freepages))
  			return NULL;
  	}
  
  	freepage = list_entry(cc->freepages.next, struct page, lru);
  	list_del(&freepage->lru);
  	cc->nr_freepages--;
  
  	return freepage;
  }
  
  /*
d53aea3d4   David Rientjes   mm, compaction: r...
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
   * This is a migrate-callback that "frees" freepages back to the isolated
   * freelist.  All pages on the freelist are from the same zone, so there is no
   * special handling needed for NUMA.
   */
  static void compaction_free(struct page *page, unsigned long data)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  
  	list_add(&page->lru, &cc->freepages);
  	cc->nr_freepages++;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1595
1596
1597
1598
1599
1600
1601
1602
  /* possible outcome of isolate_migratepages */
  typedef enum {
  	ISOLATE_ABORT,		/* Abort compaction now */
  	ISOLATE_NONE,		/* No pages isolated, continue scanning */
  	ISOLATE_SUCCESS,	/* Pages isolated, migrate */
  } isolate_migrate_t;
  
  /*
5bbe3547a   Eric B Munson   mm: allow compact...
1603
1604
1605
   * Allow userspace to control policy on scanning the unevictable LRU for
   * compactable pages.
   */
6923aa0d8   Sebastian Andrzej Siewior   mm/compaction: Di...
1606
1607
1608
  #ifdef CONFIG_PREEMPT_RT
  int sysctl_compact_unevictable_allowed __read_mostly = 0;
  #else
5bbe3547a   Eric B Munson   mm: allow compact...
1609
  int sysctl_compact_unevictable_allowed __read_mostly = 1;
6923aa0d8   Sebastian Andrzej Siewior   mm/compaction: Di...
1610
  #endif
5bbe3547a   Eric B Munson   mm: allow compact...
1611

70b44595e   Mel Gorman   mm, compaction: u...
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
  static inline void
  update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
  {
  	if (cc->fast_start_pfn == ULONG_MAX)
  		return;
  
  	if (!cc->fast_start_pfn)
  		cc->fast_start_pfn = pfn;
  
  	cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
  }
  
  static inline unsigned long
  reinit_migrate_pfn(struct compact_control *cc)
  {
  	if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
  		return cc->migrate_pfn;
  
  	cc->migrate_pfn = cc->fast_start_pfn;
  	cc->fast_start_pfn = ULONG_MAX;
  
  	return cc->migrate_pfn;
  }
  
  /*
   * Briefly search the free lists for a migration source that already has
   * some free pages to reduce the number of pages that need migration
   * before a pageblock is free.
   */
  static unsigned long fast_find_migrateblock(struct compact_control *cc)
  {
  	unsigned int limit = freelist_scan_limit(cc);
  	unsigned int nr_scanned = 0;
  	unsigned long distance;
  	unsigned long pfn = cc->migrate_pfn;
  	unsigned long high_pfn;
  	int order;
15d28d0d1   Wonhyuk Yang   mm/compaction: fi...
1649
  	bool found_block = false;
70b44595e   Mel Gorman   mm, compaction: u...
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
  
  	/* Skip hints are relied on to avoid repeats on the fast search */
  	if (cc->ignore_skip_hint)
  		return pfn;
  
  	/*
  	 * If the migrate_pfn is not at the start of a zone or the start
  	 * of a pageblock then assume this is a continuation of a previous
  	 * scan restarted due to COMPACT_CLUSTER_MAX.
  	 */
  	if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
  		return pfn;
  
  	/*
  	 * For smaller orders, just linearly scan as the number of pages
  	 * to migrate should be relatively small and does not necessarily
  	 * justify freeing up a large block for a small allocation.
  	 */
  	if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
  		return pfn;
  
  	/*
  	 * Only allow kcompactd and direct requests for movable pages to
  	 * quickly clear out a MOVABLE pageblock for allocation. This
  	 * reduces the risk that a large movable pageblock is freed for
  	 * an unmovable/reclaimable small allocation.
  	 */
  	if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
  		return pfn;
  
  	/*
  	 * When starting the migration scanner, pick any pageblock within the
  	 * first half of the search space. Otherwise try and pick a pageblock
  	 * within the first eighth to reduce the chances that a migration
  	 * target later becomes a source.
  	 */
  	distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
  	if (cc->migrate_pfn != cc->zone->zone_start_pfn)
  		distance >>= 2;
  	high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
  
  	for (order = cc->order - 1;
15d28d0d1   Wonhyuk Yang   mm/compaction: fi...
1692
  	     order >= PAGE_ALLOC_COSTLY_ORDER && !found_block && nr_scanned < limit;
70b44595e   Mel Gorman   mm, compaction: u...
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
  	     order--) {
  		struct free_area *area = &cc->zone->free_area[order];
  		struct list_head *freelist;
  		unsigned long flags;
  		struct page *freepage;
  
  		if (!area->nr_free)
  			continue;
  
  		spin_lock_irqsave(&cc->zone->lock, flags);
  		freelist = &area->free_list[MIGRATE_MOVABLE];
  		list_for_each_entry(freepage, freelist, lru) {
  			unsigned long free_pfn;
15d28d0d1   Wonhyuk Yang   mm/compaction: fi...
1706
1707
1708
1709
  			if (nr_scanned++ >= limit) {
  				move_freelist_tail(freelist, freepage);
  				break;
  			}
70b44595e   Mel Gorman   mm, compaction: u...
1710
1711
  			free_pfn = page_to_pfn(freepage);
  			if (free_pfn < high_pfn) {
70b44595e   Mel Gorman   mm, compaction: u...
1712
1713
1714
1715
1716
1717
  				/*
  				 * Avoid if skipped recently. Ideally it would
  				 * move to the tail but even safe iteration of
  				 * the list assumes an entry is deleted, not
  				 * reordered.
  				 */
15d28d0d1   Wonhyuk Yang   mm/compaction: fi...
1718
  				if (get_pageblock_skip(freepage))
70b44595e   Mel Gorman   mm, compaction: u...
1719
  					continue;
70b44595e   Mel Gorman   mm, compaction: u...
1720
1721
1722
  
  				/* Reorder to so a future search skips recent pages */
  				move_freelist_tail(freelist, freepage);
e380bebe4   Mel Gorman   mm, compaction: k...
1723
  				update_fast_start_pfn(cc, free_pfn);
70b44595e   Mel Gorman   mm, compaction: u...
1724
  				pfn = pageblock_start_pfn(free_pfn);
20e6ec76a   Rei Yamamoto   mm, compaction: f...
1725
1726
  				if (pfn < cc->zone->zone_start_pfn)
  					pfn = cc->zone->zone_start_pfn;
70b44595e   Mel Gorman   mm, compaction: u...
1727
  				cc->fast_search_fail = 0;
15d28d0d1   Wonhyuk Yang   mm/compaction: fi...
1728
  				found_block = true;
70b44595e   Mel Gorman   mm, compaction: u...
1729
1730
1731
  				set_pageblock_skip(freepage);
  				break;
  			}
70b44595e   Mel Gorman   mm, compaction: u...
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
  		}
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
  	}
  
  	cc->total_migrate_scanned += nr_scanned;
  
  	/*
  	 * If fast scanning failed then use a cached entry for a page block
  	 * that had free pages as the basis for starting a linear scan.
  	 */
15d28d0d1   Wonhyuk Yang   mm/compaction: fi...
1742
1743
  	if (!found_block) {
  		cc->fast_search_fail++;
70b44595e   Mel Gorman   mm, compaction: u...
1744
  		pfn = reinit_migrate_pfn(cc);
15d28d0d1   Wonhyuk Yang   mm/compaction: fi...
1745
  	}
70b44595e   Mel Gorman   mm, compaction: u...
1746
1747
  	return pfn;
  }
5bbe3547a   Eric B Munson   mm: allow compact...
1748
  /*
edc2ca612   Vlastimil Babka   mm, compaction: m...
1749
1750
1751
   * Isolate all pages that can be migrated from the first suitable block,
   * starting at the block pointed to by the migrate scanner pfn within
   * compact_control.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1752
   */
32aaf0553   Pengfei Li   mm/compaction.c: ...
1753
  static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1754
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1755
1756
1757
  	unsigned long block_start_pfn;
  	unsigned long block_end_pfn;
  	unsigned long low_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1758
1759
  	struct page *page;
  	const isolate_mode_t isolate_mode =
5bbe3547a   Eric B Munson   mm: allow compact...
1760
  		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
1d2047fef   Hugh Dickins   mm, compaction: d...
1761
  		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
70b44595e   Mel Gorman   mm, compaction: u...
1762
  	bool fast_find_block;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1763

edc2ca612   Vlastimil Babka   mm, compaction: m...
1764
1765
  	/*
  	 * Start at where we last stopped, or beginning of the zone as
70b44595e   Mel Gorman   mm, compaction: u...
1766
1767
  	 * initialized by compact_zone(). The first failure will use
  	 * the lowest PFN as the starting point for linear scanning.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1768
  	 */
70b44595e   Mel Gorman   mm, compaction: u...
1769
  	low_pfn = fast_find_migrateblock(cc);
06b6640a3   Vlastimil Babka   mm, compaction: w...
1770
  	block_start_pfn = pageblock_start_pfn(low_pfn);
32aaf0553   Pengfei Li   mm/compaction.c: ...
1771
1772
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1773

70b44595e   Mel Gorman   mm, compaction: u...
1774
1775
1776
1777
1778
1779
  	/*
  	 * fast_find_migrateblock marks a pageblock skipped so to avoid
  	 * the isolation_suitable check below, check whether the fast
  	 * search was successful.
  	 */
  	fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1780
  	/* Only scan within a pageblock boundary */
06b6640a3   Vlastimil Babka   mm, compaction: w...
1781
  	block_end_pfn = pageblock_end_pfn(low_pfn);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1782

edc2ca612   Vlastimil Babka   mm, compaction: m...
1783
1784
1785
1786
  	/*
  	 * Iterate over whole pageblocks until we find the first suitable.
  	 * Do not cross the free scanner.
  	 */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1787
  	for (; block_end_pfn <= cc->free_pfn;
70b44595e   Mel Gorman   mm, compaction: u...
1788
  			fast_find_block = false,
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1789
  			cc->migrate_pfn = low_pfn = block_end_pfn,
e1409c325   Joonsoo Kim   mm/compaction: pa...
1790
1791
  			block_start_pfn = block_end_pfn,
  			block_end_pfn += pageblock_nr_pages) {
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1792

edc2ca612   Vlastimil Babka   mm, compaction: m...
1793
1794
1795
  		/*
  		 * This can potentially iterate a massively long zone with
  		 * many pageblocks unsuitable, so periodically check if we
cb810ad29   Mel Gorman   mm, compaction: r...
1796
  		 * need to schedule.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1797
  		 */
cb810ad29   Mel Gorman   mm, compaction: r...
1798
  		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
cf66f0700   Mel Gorman   mm, compaction: d...
1799
  			cond_resched();
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1800

32aaf0553   Pengfei Li   mm/compaction.c: ...
1801
1802
  		page = pageblock_pfn_to_page(block_start_pfn,
  						block_end_pfn, cc->zone);
7d49d8868   Vlastimil Babka   mm, compaction: r...
1803
  		if (!page)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1804
  			continue;
e380bebe4   Mel Gorman   mm, compaction: k...
1805
1806
1807
1808
1809
1810
1811
1812
1813
  		/*
  		 * If isolation recently failed, do not retry. Only check the
  		 * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock
  		 * to be visited multiple times. Assume skip was checked
  		 * before making it "skip" so other compaction instances do
  		 * not scan the same block.
  		 */
  		if (IS_ALIGNED(low_pfn, pageblock_nr_pages) &&
  		    !fast_find_block && !isolation_suitable(cc, page))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1814
1815
1816
  			continue;
  
  		/*
9bebefd59   Mel Gorman   mm, compaction: c...
1817
1818
1819
1820
1821
1822
  		 * For async compaction, also only scan in MOVABLE blocks
  		 * without huge pages. Async compaction is optimistic to see
  		 * if the minimum amount of work satisfies the allocation.
  		 * The cached PFN is updated as it's possible that all
  		 * remaining blocks between source and target are unsuitable
  		 * and the compaction scanners fail to meet.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1823
  		 */
9bebefd59   Mel Gorman   mm, compaction: c...
1824
1825
  		if (!suitable_migration_source(cc, page)) {
  			update_cached_migrate(cc, block_end_pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1826
  			continue;
9bebefd59   Mel Gorman   mm, compaction: c...
1827
  		}
edc2ca612   Vlastimil Babka   mm, compaction: m...
1828
1829
  
  		/* Perform the isolation */
c2ad7a1ff   Oscar Salvador   mm,compaction: le...
1830
1831
  		if (isolate_migratepages_block(cc, low_pfn, block_end_pfn,
  						isolate_mode))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1832
1833
1834
1835
1836
1837
1838
1839
1840
  			return ISOLATE_ABORT;
  
  		/*
  		 * Either we isolated something and proceed with migration. Or
  		 * we failed and compact_zone should decide if we should
  		 * continue or not.
  		 */
  		break;
  	}
edc2ca612   Vlastimil Babka   mm, compaction: m...
1841
  	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1842
  }
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1843
1844
1845
1846
1847
1848
1849
1850
  /*
   * order == -1 is expected when compacting via
   * /proc/sys/vm/compact_memory
   */
  static inline bool is_via_compact_memory(int order)
  {
  	return order == -1;
  }
facdaa917   Nitin Gupta   mm: proactive com...
1851
1852
  static bool kswapd_is_running(pg_data_t *pgdat)
  {
b03fbd4ff   Peter Zijlstra   sched: Introduce ...
1853
  	return pgdat->kswapd && task_is_running(pgdat->kswapd);
facdaa917   Nitin Gupta   mm: proactive com...
1854
1855
1856
1857
  }
  
  /*
   * A zone's fragmentation score is the external fragmentation wrt to the
40d7e2032   Charan Teja Reddy   mm/compaction: co...
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
   * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
   */
  static unsigned int fragmentation_score_zone(struct zone *zone)
  {
  	return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
  }
  
  /*
   * A weighted zone's fragmentation score is the external fragmentation
   * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
   * returns a value in the range [0, 100].
facdaa917   Nitin Gupta   mm: proactive com...
1869
1870
1871
1872
1873
1874
   *
   * The scaling factor ensures that proactive compaction focuses on larger
   * zones like ZONE_NORMAL, rather than smaller, specialized zones like
   * ZONE_DMA32. For smaller zones, the score value remains close to zero,
   * and thus never exceeds the high threshold for proactive compaction.
   */
40d7e2032   Charan Teja Reddy   mm/compaction: co...
1875
  static unsigned int fragmentation_score_zone_weighted(struct zone *zone)
facdaa917   Nitin Gupta   mm: proactive com...
1876
1877
  {
  	unsigned long score;
40d7e2032   Charan Teja Reddy   mm/compaction: co...
1878
  	score = zone->present_pages * fragmentation_score_zone(zone);
facdaa917   Nitin Gupta   mm: proactive com...
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
  	return div64_ul(score, zone->zone_pgdat->node_present_pages + 1);
  }
  
  /*
   * The per-node proactive (background) compaction process is started by its
   * corresponding kcompactd thread when the node's fragmentation score
   * exceeds the high threshold. The compaction process remains active till
   * the node's score falls below the low threshold, or one of the back-off
   * conditions is met.
   */
d34c0a759   Nitin Gupta   mm: use unsigned ...
1889
  static unsigned int fragmentation_score_node(pg_data_t *pgdat)
facdaa917   Nitin Gupta   mm: proactive com...
1890
  {
d34c0a759   Nitin Gupta   mm: use unsigned ...
1891
  	unsigned int score = 0;
facdaa917   Nitin Gupta   mm: proactive com...
1892
1893
1894
1895
1896
1897
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone;
  
  		zone = &pgdat->node_zones[zoneid];
40d7e2032   Charan Teja Reddy   mm/compaction: co...
1898
  		score += fragmentation_score_zone_weighted(zone);
facdaa917   Nitin Gupta   mm: proactive com...
1899
1900
1901
1902
  	}
  
  	return score;
  }
d34c0a759   Nitin Gupta   mm: use unsigned ...
1903
  static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low)
facdaa917   Nitin Gupta   mm: proactive com...
1904
  {
d34c0a759   Nitin Gupta   mm: use unsigned ...
1905
  	unsigned int wmark_low;
facdaa917   Nitin Gupta   mm: proactive com...
1906
1907
  
  	/*
f0953a1bb   Ingo Molnar   mm: fix typos in ...
1908
1909
  	 * Cap the low watermark to avoid excessive compaction
  	 * activity in case a user sets the proactiveness tunable
facdaa917   Nitin Gupta   mm: proactive com...
1910
1911
  	 * close to 100 (maximum).
  	 */
d34c0a759   Nitin Gupta   mm: use unsigned ...
1912
1913
  	wmark_low = max(100U - sysctl_compaction_proactiveness, 5U);
  	return low ? wmark_low : min(wmark_low + 10, 100U);
facdaa917   Nitin Gupta   mm: proactive com...
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
  }
  
  static bool should_proactive_compact_node(pg_data_t *pgdat)
  {
  	int wmark_high;
  
  	if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat))
  		return false;
  
  	wmark_high = fragmentation_score_wmark(pgdat, false);
  	return fragmentation_score_node(pgdat) > wmark_high;
  }
40cacbcb3   Mel Gorman   mm, compaction: r...
1926
  static enum compact_result __compact_finished(struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
1927
  {
8fb74b9fb   Mel Gorman   mm: compaction: p...
1928
  	unsigned int order;
d39773a06   Vlastimil Babka   mm, compaction: a...
1929
  	const int migratetype = cc->migratetype;
cb2dcaf02   Mel Gorman   mm, compaction: f...
1930
  	int ret;
748446bb6   Mel Gorman   mm: compaction: m...
1931

753341a4b   Mel Gorman   revert "mm: have ...
1932
  	/* Compaction run completes if the migrate and free scanner meet */
f2849aa09   Vlastimil Babka   mm, compaction: m...
1933
  	if (compact_scanners_met(cc)) {
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1934
  		/* Let the next compaction start anew. */
40cacbcb3   Mel Gorman   mm, compaction: r...
1935
  		reset_cached_positions(cc->zone);
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1936

62997027c   Mel Gorman   mm: compaction: c...
1937
1938
  		/*
  		 * Mark that the PG_migrate_skip information should be cleared
accf62422   Vlastimil Babka   mm, kswapd: repla...
1939
  		 * by kswapd when it goes to sleep. kcompactd does not set the
62997027c   Mel Gorman   mm: compaction: c...
1940
1941
1942
  		 * flag itself as the decision to be clear should be directly
  		 * based on an allocation request.
  		 */
accf62422   Vlastimil Babka   mm, kswapd: repla...
1943
  		if (cc->direct_compaction)
40cacbcb3   Mel Gorman   mm, compaction: r...
1944
  			cc->zone->compact_blockskip_flush = true;
62997027c   Mel Gorman   mm: compaction: c...
1945

c8f7de0bf   Michal Hocko   mm, compaction: d...
1946
1947
1948
1949
  		if (cc->whole_zone)
  			return COMPACT_COMPLETE;
  		else
  			return COMPACT_PARTIAL_SKIPPED;
bb13ffeb9   Mel Gorman   mm: compaction: c...
1950
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1951

facdaa917   Nitin Gupta   mm: proactive com...
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
  	if (cc->proactive_compaction) {
  		int score, wmark_low;
  		pg_data_t *pgdat;
  
  		pgdat = cc->zone->zone_pgdat;
  		if (kswapd_is_running(pgdat))
  			return COMPACT_PARTIAL_SKIPPED;
  
  		score = fragmentation_score_zone(cc->zone);
  		wmark_low = fragmentation_score_wmark(pgdat, true);
  
  		if (score > wmark_low)
  			ret = COMPACT_CONTINUE;
  		else
  			ret = COMPACT_SUCCESS;
  
  		goto out;
  	}
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1970
  	if (is_via_compact_memory(cc->order))
56de7263f   Mel Gorman   mm: compaction: d...
1971
  		return COMPACT_CONTINUE;
efe771c76   Mel Gorman   mm, compaction: a...
1972
1973
1974
1975
1976
1977
1978
1979
  	/*
  	 * Always finish scanning a pageblock to reduce the possibility of
  	 * fallbacks in the future. This is particularly important when
  	 * migration source is unmovable/reclaimable but it's not worth
  	 * special casing.
  	 */
  	if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
  		return COMPACT_CONTINUE;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1980

56de7263f   Mel Gorman   mm: compaction: d...
1981
  	/* Direct compactor: Is a suitable page free? */
cb2dcaf02   Mel Gorman   mm, compaction: f...
1982
  	ret = COMPACT_NO_SUITABLE_PAGE;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1983
  	for (order = cc->order; order < MAX_ORDER; order++) {
40cacbcb3   Mel Gorman   mm, compaction: r...
1984
  		struct free_area *area = &cc->zone->free_area[order];
2149cdaef   Joonsoo Kim   mm/compaction: en...
1985
  		bool can_steal;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1986
1987
  
  		/* Job done if page is free of the right migratetype */
b03641af6   Dan Williams   mm: move buddy li...
1988
  		if (!free_area_empty(area, migratetype))
cf378319d   Vlastimil Babka   mm, compaction: r...
1989
  			return COMPACT_SUCCESS;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1990

2149cdaef   Joonsoo Kim   mm/compaction: en...
1991
1992
1993
  #ifdef CONFIG_CMA
  		/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
  		if (migratetype == MIGRATE_MOVABLE &&
b03641af6   Dan Williams   mm: move buddy li...
1994
  			!free_area_empty(area, MIGRATE_CMA))
cf378319d   Vlastimil Babka   mm, compaction: r...
1995
  			return COMPACT_SUCCESS;
2149cdaef   Joonsoo Kim   mm/compaction: en...
1996
1997
1998
1999
2000
2001
  #endif
  		/*
  		 * Job done if allocation would steal freepages from
  		 * other migratetype buddy lists.
  		 */
  		if (find_suitable_fallback(area, order, migratetype,
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
  						true, &can_steal) != -1) {
  
  			/* movable pages are OK in any pageblock */
  			if (migratetype == MIGRATE_MOVABLE)
  				return COMPACT_SUCCESS;
  
  			/*
  			 * We are stealing for a non-movable allocation. Make
  			 * sure we finish compacting the current pageblock
  			 * first so it is as free as possible and we won't
  			 * have to steal another one soon. This only applies
  			 * to sync compaction, as async compaction operates
  			 * on pageblocks of the same migratetype.
  			 */
  			if (cc->mode == MIGRATE_ASYNC ||
  					IS_ALIGNED(cc->migrate_pfn,
  							pageblock_nr_pages)) {
  				return COMPACT_SUCCESS;
  			}
cb2dcaf02   Mel Gorman   mm, compaction: f...
2021
2022
  			ret = COMPACT_CONTINUE;
  			break;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
2023
  		}
56de7263f   Mel Gorman   mm: compaction: d...
2024
  	}
facdaa917   Nitin Gupta   mm: proactive com...
2025
  out:
cb2dcaf02   Mel Gorman   mm, compaction: f...
2026
2027
2028
2029
  	if (cc->contended || fatal_signal_pending(current))
  		ret = COMPACT_CONTENDED;
  
  	return ret;
837d026d5   Joonsoo Kim   mm/compaction: mo...
2030
  }
40cacbcb3   Mel Gorman   mm, compaction: r...
2031
  static enum compact_result compact_finished(struct compact_control *cc)
837d026d5   Joonsoo Kim   mm/compaction: mo...
2032
2033
  {
  	int ret;
40cacbcb3   Mel Gorman   mm, compaction: r...
2034
2035
  	ret = __compact_finished(cc);
  	trace_mm_compaction_finished(cc->zone, cc->order, ret);
837d026d5   Joonsoo Kim   mm/compaction: mo...
2036
2037
2038
2039
  	if (ret == COMPACT_NO_SUITABLE_PAGE)
  		ret = COMPACT_CONTINUE;
  
  	return ret;
748446bb6   Mel Gorman   mm: compaction: m...
2040
  }
ea7ab982b   Michal Hocko   mm, compaction: c...
2041
  static enum compact_result __compaction_suitable(struct zone *zone, int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
2042
  					unsigned int alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2043
  					int highest_zoneidx,
86a294a81   Michal Hocko   mm, oom, compacti...
2044
  					unsigned long wmark_target)
3e7d34497   Mel Gorman   mm: vmscan: recla...
2045
  {
3e7d34497   Mel Gorman   mm: vmscan: recla...
2046
  	unsigned long watermark;
21c527a3c   Yaowei Bai   mm/compaction.c: ...
2047
  	if (is_via_compact_memory(order))
3957c7768   Michal Hocko   mm: compaction: f...
2048
  		return COMPACT_CONTINUE;
a92144438   Mel Gorman   mm: move zone wat...
2049
  	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
ebff39801   Vlastimil Babka   mm, compaction: p...
2050
2051
2052
2053
  	/*
  	 * If watermarks for high-order allocation are already met, there
  	 * should be no need for compaction at all.
  	 */
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2054
  	if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
ebff39801   Vlastimil Babka   mm, compaction: p...
2055
  								alloc_flags))
cf378319d   Vlastimil Babka   mm, compaction: r...
2056
  		return COMPACT_SUCCESS;
ebff39801   Vlastimil Babka   mm, compaction: p...
2057

3957c7768   Michal Hocko   mm: compaction: f...
2058
  	/*
9861a62c3   Vlastimil Babka   mm, compaction: c...
2059
  	 * Watermarks for order-0 must be met for compaction to be able to
984fdba6a   Vlastimil Babka   mm, compaction: u...
2060
2061
2062
2063
  	 * isolate free pages for migration targets. This means that the
  	 * watermark and alloc_flags have to match, or be more pessimistic than
  	 * the check in __isolate_free_page(). We don't use the direct
  	 * compactor's alloc_flags, as they are not relevant for freepage
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2064
2065
2066
  	 * isolation. We however do use the direct compactor's highest_zoneidx
  	 * to skip over zones where lowmem reserves would prevent allocation
  	 * even if compaction succeeds.
8348faf91   Vlastimil Babka   mm, compaction: r...
2067
2068
  	 * For costly orders, we require low watermark instead of min for
  	 * compaction to proceed to increase its chances.
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
2069
2070
  	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
  	 * suitable migration targets
3e7d34497   Mel Gorman   mm: vmscan: recla...
2071
  	 */
8348faf91   Vlastimil Babka   mm, compaction: r...
2072
2073
2074
  	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
  				low_wmark_pages(zone) : min_wmark_pages(zone);
  	watermark += compact_gap(order);
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2075
  	if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
2076
  						ALLOC_CMA, wmark_target))
3e7d34497   Mel Gorman   mm: vmscan: recla...
2077
  		return COMPACT_SKIPPED;
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2078
2079
  	return COMPACT_CONTINUE;
  }
2b1a20c3a   Hui Su   mm/compaction: mo...
2080
2081
2082
2083
2084
2085
2086
  /*
   * compaction_suitable: Is this suitable to run compaction on this zone now?
   * Returns
   *   COMPACT_SKIPPED  - If there are too few free pages for compaction
   *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
   *   COMPACT_CONTINUE - If compaction should run now
   */
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2087
2088
  enum compact_result compaction_suitable(struct zone *zone, int order,
  					unsigned int alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2089
  					int highest_zoneidx)
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2090
2091
2092
  {
  	enum compact_result ret;
  	int fragindex;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2093
  	ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2094
  				    zone_page_state(zone, NR_FREE_PAGES));
3e7d34497   Mel Gorman   mm: vmscan: recla...
2095
2096
2097
2098
  	/*
  	 * fragmentation index determines if allocation failures are due to
  	 * low memory or external fragmentation
  	 *
ebff39801   Vlastimil Babka   mm, compaction: p...
2099
2100
  	 * index of -1000 would imply allocations might succeed depending on
  	 * watermarks, but we already failed the high-order watermark check
3e7d34497   Mel Gorman   mm: vmscan: recla...
2101
2102
2103
  	 * index towards 0 implies failure is due to lack of memory
  	 * index towards 1000 implies failure is due to fragmentation
  	 *
203114202   Vlastimil Babka   mm, compaction: r...
2104
2105
2106
2107
2108
2109
  	 * Only compact if a failure would be due to fragmentation. Also
  	 * ignore fragindex for non-costly orders where the alternative to
  	 * a successful reclaim/compaction is OOM. Fragindex and the
  	 * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
  	 * excessive compaction for costly orders, but it should not be at the
  	 * expense of system stability.
3e7d34497   Mel Gorman   mm: vmscan: recla...
2110
  	 */
203114202   Vlastimil Babka   mm, compaction: r...
2111
  	if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2112
2113
2114
2115
  		fragindex = fragmentation_index(zone, order);
  		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
  			ret = COMPACT_NOT_SUITABLE_ZONE;
  	}
837d026d5   Joonsoo Kim   mm/compaction: mo...
2116

837d026d5   Joonsoo Kim   mm/compaction: mo...
2117
2118
2119
2120
2121
2122
  	trace_mm_compaction_suitable(zone, order, ret);
  	if (ret == COMPACT_NOT_SUITABLE_ZONE)
  		ret = COMPACT_SKIPPED;
  
  	return ret;
  }
86a294a81   Michal Hocko   mm, oom, compacti...
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
  bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
  		int alloc_flags)
  {
  	struct zone *zone;
  	struct zoneref *z;
  
  	/*
  	 * Make sure at least one zone would pass __compaction_suitable if we continue
  	 * retrying the reclaim.
  	 */
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2133
2134
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
  				ac->highest_zoneidx, ac->nodemask) {
86a294a81   Michal Hocko   mm, oom, compacti...
2135
2136
2137
2138
2139
2140
2141
2142
2143
  		unsigned long available;
  		enum compact_result compact_result;
  
  		/*
  		 * Do not consider all the reclaimable memory because we do not
  		 * want to trash just for a single high order allocation which
  		 * is even not guaranteed to appear even if __compaction_suitable
  		 * is happy about the watermark check.
  		 */
5a1c84b40   Mel Gorman   mm: remove reclai...
2144
  		available = zone_reclaimable_pages(zone) / order;
86a294a81   Michal Hocko   mm, oom, compacti...
2145
2146
  		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
  		compact_result = __compaction_suitable(zone, order, alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2147
  				ac->highest_zoneidx, available);
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2148
  		if (compact_result != COMPACT_SKIPPED)
86a294a81   Michal Hocko   mm, oom, compacti...
2149
2150
2151
2152
2153
  			return true;
  	}
  
  	return false;
  }
5e1f0f098   Mel Gorman   mm, compaction: c...
2154
2155
  static enum compact_result
  compact_zone(struct compact_control *cc, struct capture_control *capc)
748446bb6   Mel Gorman   mm: compaction: m...
2156
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
2157
  	enum compact_result ret;
40cacbcb3   Mel Gorman   mm, compaction: r...
2158
2159
  	unsigned long start_pfn = cc->zone->zone_start_pfn;
  	unsigned long end_pfn = zone_end_pfn(cc->zone);
566e54e11   Mel Gorman   mm, compaction: r...
2160
  	unsigned long last_migrated_pfn;
e0b9daeb4   David Rientjes   mm, compaction: e...
2161
  	const bool sync = cc->mode != MIGRATE_ASYNC;
8854c55f5   Mel Gorman   mm, compaction: k...
2162
  	bool update_cached;
748446bb6   Mel Gorman   mm: compaction: m...
2163

a94b52524   Yafang Shao   mm/compaction.c: ...
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
  	/*
  	 * These counters track activities during zone compaction.  Initialize
  	 * them before compacting a new zone.
  	 */
  	cc->total_migrate_scanned = 0;
  	cc->total_free_scanned = 0;
  	cc->nr_migratepages = 0;
  	cc->nr_freepages = 0;
  	INIT_LIST_HEAD(&cc->freepages);
  	INIT_LIST_HEAD(&cc->migratepages);
01c0bfe06   Wei Yang   mm: rename gfpfla...
2174
  	cc->migratetype = gfp_migratetype(cc->gfp_mask);
40cacbcb3   Mel Gorman   mm, compaction: r...
2175
  	ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2176
  							cc->highest_zoneidx);
c46649dea   Michal Hocko   mm, compaction: c...
2177
  	/* Compaction is likely to fail */
cf378319d   Vlastimil Babka   mm, compaction: r...
2178
  	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
3e7d34497   Mel Gorman   mm: vmscan: recla...
2179
  		return ret;
c46649dea   Michal Hocko   mm, compaction: c...
2180
2181
2182
  
  	/* huh, compaction_suitable is returning something unexpected */
  	VM_BUG_ON(ret != COMPACT_CONTINUE);
3e7d34497   Mel Gorman   mm: vmscan: recla...
2183

c89511ab2   Mel Gorman   mm: compaction: R...
2184
  	/*
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2185
  	 * Clear pageblock skip if there were failures recently and compaction
accf62422   Vlastimil Babka   mm, kswapd: repla...
2186
  	 * is about to be retried after being deferred.
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2187
  	 */
40cacbcb3   Mel Gorman   mm, compaction: r...
2188
2189
  	if (compaction_restarting(cc->zone, cc->order))
  		__reset_isolation_suitable(cc->zone);
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2190
2191
  
  	/*
c89511ab2   Mel Gorman   mm: compaction: R...
2192
  	 * Setup to move all movable pages to the end of the zone. Used cached
06ed29989   Vlastimil Babka   mm, compaction: m...
2193
2194
2195
  	 * information on where the scanners should start (unless we explicitly
  	 * want to compact the whole zone), but check that it is initialised
  	 * by ensuring the values are within zone boundaries.
c89511ab2   Mel Gorman   mm: compaction: R...
2196
  	 */
70b44595e   Mel Gorman   mm, compaction: u...
2197
  	cc->fast_start_pfn = 0;
06ed29989   Vlastimil Babka   mm, compaction: m...
2198
  	if (cc->whole_zone) {
c89511ab2   Mel Gorman   mm: compaction: R...
2199
  		cc->migrate_pfn = start_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2200
2201
  		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
  	} else {
40cacbcb3   Mel Gorman   mm, compaction: r...
2202
2203
  		cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync];
  		cc->free_pfn = cc->zone->compact_cached_free_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2204
2205
  		if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
  			cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
40cacbcb3   Mel Gorman   mm, compaction: r...
2206
  			cc->zone->compact_cached_free_pfn = cc->free_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2207
2208
2209
  		}
  		if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
  			cc->migrate_pfn = start_pfn;
40cacbcb3   Mel Gorman   mm, compaction: r...
2210
2211
  			cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
  			cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2212
  		}
c8f7de0bf   Michal Hocko   mm, compaction: d...
2213

e332f741a   Mel Gorman   mm, compaction: b...
2214
  		if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
06ed29989   Vlastimil Babka   mm, compaction: m...
2215
2216
  			cc->whole_zone = true;
  	}
c8f7de0bf   Michal Hocko   mm, compaction: d...
2217

566e54e11   Mel Gorman   mm, compaction: r...
2218
  	last_migrated_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
2219

8854c55f5   Mel Gorman   mm, compaction: k...
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
  	/*
  	 * Migrate has separate cached PFNs for ASYNC and SYNC* migration on
  	 * the basis that some migrations will fail in ASYNC mode. However,
  	 * if the cached PFNs match and pageblocks are skipped due to having
  	 * no isolation candidates, then the sync state does not matter.
  	 * Until a pageblock with isolation candidates is found, keep the
  	 * cached PFNs in sync to avoid revisiting the same blocks.
  	 */
  	update_cached = !sync &&
  		cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
16c4a097a   Joonsoo Kim   mm/compaction: en...
2230
2231
  	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync);
0eb927c0a   Mel Gorman   mm: compaction: t...
2232

361a2a229   Minchan Kim   mm: replace migra...
2233
2234
  	/* lru_add_drain_all could be expensive with involving other CPUs */
  	lru_add_drain();
748446bb6   Mel Gorman   mm: compaction: m...
2235

40cacbcb3   Mel Gorman   mm, compaction: r...
2236
  	while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
9d502c1c8   Minchan Kim   mm/compaction: ch...
2237
  		int err;
19d3cf9de   Yanfei Xu   mm/compaction: re...
2238
  		unsigned long iteration_start_pfn = cc->migrate_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
2239

804d3121b   Mel Gorman   mm, compaction: a...
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
  		/*
  		 * Avoid multiple rescans which can happen if a page cannot be
  		 * isolated (dirty/writeback in async mode) or if the migrated
  		 * pages are being allocated before the pageblock is cleared.
  		 * The first rescan will capture the entire pageblock for
  		 * migration. If it fails, it'll be marked skip and scanning
  		 * will proceed as normal.
  		 */
  		cc->rescan = false;
  		if (pageblock_start_pfn(last_migrated_pfn) ==
19d3cf9de   Yanfei Xu   mm/compaction: re...
2250
  		    pageblock_start_pfn(iteration_start_pfn)) {
804d3121b   Mel Gorman   mm, compaction: a...
2251
2252
  			cc->rescan = true;
  		}
32aaf0553   Pengfei Li   mm/compaction.c: ...
2253
  		switch (isolate_migratepages(cc)) {
f9e35b3b4   Mel Gorman   mm: compaction: a...
2254
  		case ISOLATE_ABORT:
2d1e10412   Vlastimil Babka   mm, compaction: d...
2255
  			ret = COMPACT_CONTENDED;
5733c7d11   Rafael Aquini   mm: introduce put...
2256
  			putback_movable_pages(&cc->migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
2257
  			cc->nr_migratepages = 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2258
2259
  			goto out;
  		case ISOLATE_NONE:
8854c55f5   Mel Gorman   mm, compaction: k...
2260
2261
2262
2263
  			if (update_cached) {
  				cc->zone->compact_cached_migrate_pfn[1] =
  					cc->zone->compact_cached_migrate_pfn[0];
  			}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2264
2265
2266
2267
2268
2269
  			/*
  			 * We haven't isolated and migrated anything, but
  			 * there might still be unflushed migrations from
  			 * previous cc->order aligned block.
  			 */
  			goto check_drain;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2270
  		case ISOLATE_SUCCESS:
8854c55f5   Mel Gorman   mm, compaction: k...
2271
  			update_cached = false;
19d3cf9de   Yanfei Xu   mm/compaction: re...
2272
  			last_migrated_pfn = iteration_start_pfn;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2273
  		}
748446bb6   Mel Gorman   mm: compaction: m...
2274

d53aea3d4   David Rientjes   mm, compaction: r...
2275
  		err = migrate_pages(&cc->migratepages, compaction_alloc,
e0b9daeb4   David Rientjes   mm, compaction: e...
2276
  				compaction_free, (unsigned long)cc, cc->mode,
5ac95884a   Yang Shi   mm/migrate: enabl...
2277
  				MR_COMPACTION, NULL);
748446bb6   Mel Gorman   mm: compaction: m...
2278

f8c9301fa   Vlastimil Babka   mm/compaction: do...
2279
2280
  		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
  							&cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
2281

f8c9301fa   Vlastimil Babka   mm/compaction: do...
2282
2283
  		/* All pages were either migrated or will be released */
  		cc->nr_migratepages = 0;
9d502c1c8   Minchan Kim   mm/compaction: ch...
2284
  		if (err) {
5733c7d11   Rafael Aquini   mm: introduce put...
2285
  			putback_movable_pages(&cc->migratepages);
7ed695e06   Vlastimil Babka   mm: compaction: d...
2286
2287
2288
2289
  			/*
  			 * migrate_pages() may return -ENOMEM when scanners meet
  			 * and we want compact_finished() to detect it
  			 */
f2849aa09   Vlastimil Babka   mm, compaction: m...
2290
  			if (err == -ENOMEM && !compact_scanners_met(cc)) {
2d1e10412   Vlastimil Babka   mm, compaction: d...
2291
  				ret = COMPACT_CONTENDED;
4bf2bba37   David Rientjes   mm, thp: abort co...
2292
2293
  				goto out;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
2294
2295
2296
2297
2298
2299
2300
2301
2302
  			/*
  			 * We failed to migrate at least one page in the current
  			 * order-aligned block, so skip the rest of it.
  			 */
  			if (cc->direct_compaction &&
  						(cc->mode == MIGRATE_ASYNC)) {
  				cc->migrate_pfn = block_end_pfn(
  						cc->migrate_pfn - 1, cc->order);
  				/* Draining pcplists is useless in this case */
566e54e11   Mel Gorman   mm, compaction: r...
2303
  				last_migrated_pfn = 0;
fdd048e12   Vlastimil Babka   mm, compaction: s...
2304
  			}
748446bb6   Mel Gorman   mm: compaction: m...
2305
  		}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2306

fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2307
2308
2309
2310
2311
2312
2313
2314
  check_drain:
  		/*
  		 * Has the migration scanner moved away from the previous
  		 * cc->order aligned block where we migrated from? If yes,
  		 * flush the pages that were freed, so that they can merge and
  		 * compact_finished() can detect immediately if allocation
  		 * would succeed.
  		 */
566e54e11   Mel Gorman   mm, compaction: r...
2315
  		if (cc->order > 0 && last_migrated_pfn) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2316
  			unsigned long current_block_start =
06b6640a3   Vlastimil Babka   mm, compaction: w...
2317
  				block_start_pfn(cc->migrate_pfn, cc->order);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2318

566e54e11   Mel Gorman   mm, compaction: r...
2319
  			if (last_migrated_pfn < current_block_start) {
b01b21419   Ingo Molnar   mm/swap: Use loca...
2320
  				lru_add_drain_cpu_zone(cc->zone);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2321
  				/* No more flushing until we migrate again */
566e54e11   Mel Gorman   mm, compaction: r...
2322
  				last_migrated_pfn = 0;
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2323
2324
  			}
  		}
5e1f0f098   Mel Gorman   mm, compaction: c...
2325
2326
2327
2328
2329
  		/* Stop if a page has been captured */
  		if (capc && capc->page) {
  			ret = COMPACT_SUCCESS;
  			break;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
2330
  	}
f9e35b3b4   Mel Gorman   mm: compaction: a...
2331
  out:
6bace090a   Vlastimil Babka   mm, compaction: a...
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
  	/*
  	 * Release free pages and update where the free scanner should restart,
  	 * so we don't leave any returned pages behind in the next attempt.
  	 */
  	if (cc->nr_freepages > 0) {
  		unsigned long free_pfn = release_freepages(&cc->freepages);
  
  		cc->nr_freepages = 0;
  		VM_BUG_ON(free_pfn == 0);
  		/* The cached pfn is always the first in a pageblock */
06b6640a3   Vlastimil Babka   mm, compaction: w...
2342
  		free_pfn = pageblock_start_pfn(free_pfn);
6bace090a   Vlastimil Babka   mm, compaction: a...
2343
2344
2345
2346
  		/*
  		 * Only go back, not forward. The cached pfn might have been
  		 * already reset to zone end in compact_finished()
  		 */
40cacbcb3   Mel Gorman   mm, compaction: r...
2347
2348
  		if (free_pfn > cc->zone->compact_cached_free_pfn)
  			cc->zone->compact_cached_free_pfn = free_pfn;
6bace090a   Vlastimil Babka   mm, compaction: a...
2349
  	}
748446bb6   Mel Gorman   mm: compaction: m...
2350

7f354a548   David Rientjes   mm, compaction: a...
2351
2352
  	count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
  	count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
16c4a097a   Joonsoo Kim   mm/compaction: en...
2353
2354
  	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync, ret);
0eb927c0a   Mel Gorman   mm: compaction: t...
2355

748446bb6   Mel Gorman   mm: compaction: m...
2356
2357
  	return ret;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
2358

ea7ab982b   Michal Hocko   mm, compaction: c...
2359
  static enum compact_result compact_zone_order(struct zone *zone, int order,
c3486f537   Vlastimil Babka   mm, compaction: s...
2360
  		gfp_t gfp_mask, enum compact_priority prio,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2361
  		unsigned int alloc_flags, int highest_zoneidx,
5e1f0f098   Mel Gorman   mm, compaction: c...
2362
  		struct page **capture)
56de7263f   Mel Gorman   mm: compaction: d...
2363
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
2364
  	enum compact_result ret;
56de7263f   Mel Gorman   mm: compaction: d...
2365
  	struct compact_control cc = {
56de7263f   Mel Gorman   mm: compaction: d...
2366
  		.order = order,
dbe2d4e4f   Mel Gorman   mm, compaction: r...
2367
  		.search_order = order,
6d7ce5594   David Rientjes   mm, compaction: p...
2368
  		.gfp_mask = gfp_mask,
56de7263f   Mel Gorman   mm: compaction: d...
2369
  		.zone = zone,
a5508cd83   Vlastimil Babka   mm, compaction: i...
2370
2371
  		.mode = (prio == COMPACT_PRIO_ASYNC) ?
  					MIGRATE_ASYNC :	MIGRATE_SYNC_LIGHT,
ebff39801   Vlastimil Babka   mm, compaction: p...
2372
  		.alloc_flags = alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2373
  		.highest_zoneidx = highest_zoneidx,
accf62422   Vlastimil Babka   mm, kswapd: repla...
2374
  		.direct_compaction = true,
a8e025e55   Vlastimil Babka   mm, compaction: a...
2375
  		.whole_zone = (prio == MIN_COMPACT_PRIORITY),
9f7e33879   Vlastimil Babka   mm, compaction: m...
2376
2377
  		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
  		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
56de7263f   Mel Gorman   mm: compaction: d...
2378
  	};
5e1f0f098   Mel Gorman   mm, compaction: c...
2379
2380
2381
2382
  	struct capture_control capc = {
  		.cc = &cc,
  		.page = NULL,
  	};
b9e20f0da   Vlastimil Babka   mm, compaction: m...
2383
2384
2385
2386
2387
2388
2389
  	/*
  	 * Make sure the structs are really initialized before we expose the
  	 * capture control, in case we are interrupted and the interrupt handler
  	 * frees a page.
  	 */
  	barrier();
  	WRITE_ONCE(current->capture_control, &capc);
56de7263f   Mel Gorman   mm: compaction: d...
2390

5e1f0f098   Mel Gorman   mm, compaction: c...
2391
  	ret = compact_zone(&cc, &capc);
e64c5237c   Shaohua Li   mm: compaction: a...
2392
2393
2394
  
  	VM_BUG_ON(!list_empty(&cc.freepages));
  	VM_BUG_ON(!list_empty(&cc.migratepages));
b9e20f0da   Vlastimil Babka   mm, compaction: m...
2395
2396
2397
2398
2399
2400
2401
  	/*
  	 * Make sure we hide capture control first before we read the captured
  	 * page pointer, otherwise an interrupt could free and capture a page
  	 * and we would leak it.
  	 */
  	WRITE_ONCE(current->capture_control, NULL);
  	*capture = READ_ONCE(capc.page);
06dac2f46   Charan Teja Reddy   mm: compaction: u...
2402
2403
2404
2405
2406
2407
2408
2409
  	/*
  	 * Technically, it is also possible that compaction is skipped but
  	 * the page is still captured out of luck(IRQ came and freed the page).
  	 * Returning COMPACT_SUCCESS in such cases helps in properly accounting
  	 * the COMPACT[STALL|FAIL] when compaction is skipped.
  	 */
  	if (*capture)
  		ret = COMPACT_SUCCESS;
5e1f0f098   Mel Gorman   mm, compaction: c...
2410

e64c5237c   Shaohua Li   mm: compaction: a...
2411
  	return ret;
56de7263f   Mel Gorman   mm: compaction: d...
2412
  }
5e7719058   Mel Gorman   mm: compaction: a...
2413
  int sysctl_extfrag_threshold = 500;
56de7263f   Mel Gorman   mm: compaction: d...
2414
2415
  /**
   * try_to_compact_pages - Direct compact to satisfy a high-order allocation
56de7263f   Mel Gorman   mm: compaction: d...
2416
   * @gfp_mask: The GFP mask of the current allocation
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
2417
2418
2419
   * @order: The order of the current allocation
   * @alloc_flags: The allocation flags of the current allocation
   * @ac: The context of current allocation
112d2d29f   Yang Shi   mm/compaction.c: ...
2420
   * @prio: Determines how hard direct compaction should try to succeed
6467552ca   Vlastimil Babka   mm, compaction: f...
2421
   * @capture: Pointer to free page created by compaction will be stored here
56de7263f   Mel Gorman   mm: compaction: d...
2422
2423
2424
   *
   * This is the main entry point for direct page compaction.
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
2425
  enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
2426
  		unsigned int alloc_flags, const struct alloc_context *ac,
5e1f0f098   Mel Gorman   mm, compaction: c...
2427
  		enum compact_priority prio, struct page **capture)
56de7263f   Mel Gorman   mm: compaction: d...
2428
  {
56de7263f   Mel Gorman   mm: compaction: d...
2429
  	int may_perform_io = gfp_mask & __GFP_IO;
56de7263f   Mel Gorman   mm: compaction: d...
2430
2431
  	struct zoneref *z;
  	struct zone *zone;
1d4746d39   Michal Hocko   mm, compaction: d...
2432
  	enum compact_result rc = COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
2433

73e64c51a   Michal Hocko   mm, compaction: a...
2434
2435
2436
2437
2438
  	/*
  	 * Check if the GFP flags allow compaction - GFP_NOIO is really
  	 * tricky context because the migration might require IO
  	 */
  	if (!may_perform_io)
53853e2d2   Vlastimil Babka   mm, compaction: d...
2439
  		return COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
2440

a5508cd83   Vlastimil Babka   mm, compaction: i...
2441
  	trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
837d026d5   Joonsoo Kim   mm/compaction: mo...
2442

56de7263f   Mel Gorman   mm: compaction: d...
2443
  	/* Compact each zone in the list */
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2444
2445
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
  					ac->highest_zoneidx, ac->nodemask) {
ea7ab982b   Michal Hocko   mm, compaction: c...
2446
  		enum compact_result status;
56de7263f   Mel Gorman   mm: compaction: d...
2447

a8e025e55   Vlastimil Babka   mm, compaction: a...
2448
2449
  		if (prio > MIN_COMPACT_PRIORITY
  					&& compaction_deferred(zone, order)) {
1d4746d39   Michal Hocko   mm, compaction: d...
2450
  			rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
53853e2d2   Vlastimil Babka   mm, compaction: d...
2451
  			continue;
1d4746d39   Michal Hocko   mm, compaction: d...
2452
  		}
53853e2d2   Vlastimil Babka   mm, compaction: d...
2453

a5508cd83   Vlastimil Babka   mm, compaction: i...
2454
  		status = compact_zone_order(zone, order, gfp_mask, prio,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2455
  				alloc_flags, ac->highest_zoneidx, capture);
56de7263f   Mel Gorman   mm: compaction: d...
2456
  		rc = max(status, rc);
7ceb009a2   Vlastimil Babka   mm, compaction: d...
2457
2458
  		/* The allocation should succeed, stop compacting */
  		if (status == COMPACT_SUCCESS) {
53853e2d2   Vlastimil Babka   mm, compaction: d...
2459
2460
2461
2462
2463
2464
2465
  			/*
  			 * We think the allocation will succeed in this zone,
  			 * but it is not certain, hence the false. The caller
  			 * will repeat this with true if allocation indeed
  			 * succeeds in this zone.
  			 */
  			compaction_defer_reset(zone, order, false);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2466

c3486f537   Vlastimil Babka   mm, compaction: s...
2467
  			break;
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2468
  		}
a5508cd83   Vlastimil Babka   mm, compaction: i...
2469
  		if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
c3486f537   Vlastimil Babka   mm, compaction: s...
2470
  					status == COMPACT_PARTIAL_SKIPPED))
53853e2d2   Vlastimil Babka   mm, compaction: d...
2471
2472
2473
2474
2475
2476
  			/*
  			 * We think that allocation won't succeed in this zone
  			 * so we defer compaction there. If it ends up
  			 * succeeding after all, it will be reset.
  			 */
  			defer_compaction(zone, order);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2477
2478
2479
2480
  
  		/*
  		 * We might have stopped compacting due to need_resched() in
  		 * async compaction, or due to a fatal signal detected. In that
c3486f537   Vlastimil Babka   mm, compaction: s...
2481
  		 * case do not try further zones
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2482
  		 */
c3486f537   Vlastimil Babka   mm, compaction: s...
2483
2484
2485
  		if ((prio == COMPACT_PRIO_ASYNC && need_resched())
  					|| fatal_signal_pending(current))
  			break;
56de7263f   Mel Gorman   mm: compaction: d...
2486
2487
2488
2489
  	}
  
  	return rc;
  }
facdaa917   Nitin Gupta   mm: proactive com...
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
  /*
   * Compact all zones within a node till each zone's fragmentation score
   * reaches within proactive compaction thresholds (as determined by the
   * proactiveness tunable).
   *
   * It is possible that the function returns before reaching score targets
   * due to various back-off conditions, such as, contention on per-node or
   * per-zone locks.
   */
  static void proactive_compact_node(pg_data_t *pgdat)
  {
  	int zoneid;
  	struct zone *zone;
  	struct compact_control cc = {
  		.order = -1,
  		.mode = MIGRATE_SYNC_LIGHT,
  		.ignore_skip_hint = true,
  		.whole_zone = true,
  		.gfp_mask = GFP_KERNEL,
  		.proactive_compaction = true,
  	};
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		cc.zone = zone;
  
  		compact_zone(&cc, NULL);
  
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
  	}
  }
56de7263f   Mel Gorman   mm: compaction: d...
2525

76ab0f530   Mel Gorman   mm: compaction: a...
2526
  /* Compact all zones within a node */
791cae962   Vlastimil Babka   mm, compaction: c...
2527
  static void compact_node(int nid)
76ab0f530   Mel Gorman   mm: compaction: a...
2528
  {
791cae962   Vlastimil Babka   mm, compaction: c...
2529
  	pg_data_t *pgdat = NODE_DATA(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
2530
  	int zoneid;
76ab0f530   Mel Gorman   mm: compaction: a...
2531
  	struct zone *zone;
791cae962   Vlastimil Babka   mm, compaction: c...
2532
2533
2534
2535
2536
  	struct compact_control cc = {
  		.order = -1,
  		.mode = MIGRATE_SYNC,
  		.ignore_skip_hint = true,
  		.whole_zone = true,
73e64c51a   Michal Hocko   mm, compaction: a...
2537
  		.gfp_mask = GFP_KERNEL,
791cae962   Vlastimil Babka   mm, compaction: c...
2538
  	};
76ab0f530   Mel Gorman   mm: compaction: a...
2539

76ab0f530   Mel Gorman   mm: compaction: a...
2540
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
76ab0f530   Mel Gorman   mm: compaction: a...
2541
2542
2543
2544
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
791cae962   Vlastimil Babka   mm, compaction: c...
2545
  		cc.zone = zone;
76ab0f530   Mel Gorman   mm: compaction: a...
2546

5e1f0f098   Mel Gorman   mm, compaction: c...
2547
  		compact_zone(&cc, NULL);
754693457   Joonsoo Kim   mm/compaction.c: ...
2548

791cae962   Vlastimil Babka   mm, compaction: c...
2549
2550
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
76ab0f530   Mel Gorman   mm: compaction: a...
2551
  	}
76ab0f530   Mel Gorman   mm: compaction: a...
2552
2553
2554
  }
  
  /* Compact all nodes in the system */
7964c06d6   Jason Liu   mm: compaction: f...
2555
  static void compact_nodes(void)
76ab0f530   Mel Gorman   mm: compaction: a...
2556
2557
  {
  	int nid;
8575ec29f   Hugh Dickins   compact_pgdat: wo...
2558
2559
  	/* Flush pending updates to the LRU lists */
  	lru_add_drain_all();
76ab0f530   Mel Gorman   mm: compaction: a...
2560
2561
  	for_each_online_node(nid)
  		compact_node(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
2562
  }
fec4eb2c8   Yaowei Bai   mm/compaction: im...
2563
  /*
facdaa917   Nitin Gupta   mm: proactive com...
2564
2565
2566
2567
   * Tunable for proactive compaction. It determines how
   * aggressively the kernel should compact memory in the
   * background. It takes values in the range [0, 100].
   */
d34c0a759   Nitin Gupta   mm: use unsigned ...
2568
  unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
facdaa917   Nitin Gupta   mm: proactive com...
2569

65d759c8f   Charan Teja Reddy   mm: compaction: s...
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
  int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
  		void *buffer, size_t *length, loff_t *ppos)
  {
  	int rc, nid;
  
  	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
  	if (rc)
  		return rc;
  
  	if (write && sysctl_compaction_proactiveness) {
  		for_each_online_node(nid) {
  			pg_data_t *pgdat = NODE_DATA(nid);
  
  			if (pgdat->proactive_compact_trigger)
  				continue;
  
  			pgdat->proactive_compact_trigger = true;
  			wake_up_interruptible(&pgdat->kcompactd_wait);
  		}
  	}
  
  	return 0;
  }
facdaa917   Nitin Gupta   mm: proactive com...
2593
  /*
fec4eb2c8   Yaowei Bai   mm/compaction: im...
2594
2595
2596
   * This is the entry point for compacting all nodes via
   * /proc/sys/vm/compact_memory
   */
76ab0f530   Mel Gorman   mm: compaction: a...
2597
  int sysctl_compaction_handler(struct ctl_table *table, int write,
32927393d   Christoph Hellwig   sysctl: pass kern...
2598
  			void *buffer, size_t *length, loff_t *ppos)
76ab0f530   Mel Gorman   mm: compaction: a...
2599
2600
  {
  	if (write)
7964c06d6   Jason Liu   mm: compaction: f...
2601
  		compact_nodes();
76ab0f530   Mel Gorman   mm: compaction: a...
2602
2603
2604
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2605
2606
  
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
17adb230d   YueHaibing   mm/compaction: us...
2607
2608
2609
  static ssize_t compact_store(struct device *dev,
  			     struct device_attribute *attr,
  			     const char *buf, size_t count)
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2610
  {
8575ec29f   Hugh Dickins   compact_pgdat: wo...
2611
2612
2613
2614
2615
2616
2617
2618
  	int nid = dev->id;
  
  	if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
  		/* Flush pending updates to the LRU lists */
  		lru_add_drain_all();
  
  		compact_node(nid);
  	}
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2619
2620
2621
  
  	return count;
  }
17adb230d   YueHaibing   mm/compaction: us...
2622
  static DEVICE_ATTR_WO(compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2623
2624
2625
  
  int compaction_register_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
2626
  	return device_create_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2627
2628
2629
2630
  }
  
  void compaction_unregister_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
2631
  	return device_remove_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2632
2633
  }
  #endif /* CONFIG_SYSFS && CONFIG_NUMA */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2634

698b1b306   Vlastimil Babka   mm, compaction: i...
2635
2636
  static inline bool kcompactd_work_requested(pg_data_t *pgdat)
  {
65d759c8f   Charan Teja Reddy   mm: compaction: s...
2637
2638
  	return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
  		pgdat->proactive_compact_trigger;
698b1b306   Vlastimil Babka   mm, compaction: i...
2639
2640
2641
2642
2643
2644
  }
  
  static bool kcompactd_node_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  	struct zone *zone;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2645
  	enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx;
698b1b306   Vlastimil Babka   mm, compaction: i...
2646

97a225e69   Joonsoo Kim   mm/page_alloc: in...
2647
  	for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2648
2649
2650
2651
2652
2653
  		zone = &pgdat->node_zones[zoneid];
  
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2654
  					highest_zoneidx) == COMPACT_CONTINUE)
698b1b306   Vlastimil Babka   mm, compaction: i...
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
  			return true;
  	}
  
  	return false;
  }
  
  static void kcompactd_do_work(pg_data_t *pgdat)
  {
  	/*
  	 * With no special task, compact all zones so that a page of requested
  	 * order is allocatable.
  	 */
  	int zoneid;
  	struct zone *zone;
  	struct compact_control cc = {
  		.order = pgdat->kcompactd_max_order,
dbe2d4e4f   Mel Gorman   mm, compaction: r...
2671
  		.search_order = pgdat->kcompactd_max_order,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2672
  		.highest_zoneidx = pgdat->kcompactd_highest_zoneidx,
698b1b306   Vlastimil Babka   mm, compaction: i...
2673
  		.mode = MIGRATE_SYNC_LIGHT,
a0647dc92   David Rientjes   mm, compaction: k...
2674
  		.ignore_skip_hint = false,
73e64c51a   Michal Hocko   mm, compaction: a...
2675
  		.gfp_mask = GFP_KERNEL,
698b1b306   Vlastimil Babka   mm, compaction: i...
2676
  	};
698b1b306   Vlastimil Babka   mm, compaction: i...
2677
  	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2678
  							cc.highest_zoneidx);
7f354a548   David Rientjes   mm, compaction: a...
2679
  	count_compact_event(KCOMPACTD_WAKE);
698b1b306   Vlastimil Babka   mm, compaction: i...
2680

97a225e69   Joonsoo Kim   mm/page_alloc: in...
2681
  	for (zoneid = 0; zoneid <= cc.highest_zoneidx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
  		int status;
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_deferred(zone, cc.order))
  			continue;
  
  		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
  							COMPACT_CONTINUE)
  			continue;
172400c69   Vlastimil Babka   mm: fix kcompactd...
2694
2695
  		if (kthread_should_stop())
  			return;
a94b52524   Yafang Shao   mm/compaction.c: ...
2696
2697
  
  		cc.zone = zone;
5e1f0f098   Mel Gorman   mm, compaction: c...
2698
  		status = compact_zone(&cc, NULL);
698b1b306   Vlastimil Babka   mm, compaction: i...
2699

7ceb009a2   Vlastimil Babka   mm, compaction: d...
2700
  		if (status == COMPACT_SUCCESS) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2701
  			compaction_defer_reset(zone, cc.order, false);
c8f7de0bf   Michal Hocko   mm, compaction: d...
2702
  		} else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2703
  			/*
bc3106b26   David Rientjes   mm, compaction: d...
2704
2705
2706
2707
2708
2709
2710
2711
  			 * Buddy pages may become stranded on pcps that could
  			 * otherwise coalesce on the zone's free area for
  			 * order >= cc.order.  This is ratelimited by the
  			 * upcoming deferral.
  			 */
  			drain_all_pages(zone);
  
  			/*
698b1b306   Vlastimil Babka   mm, compaction: i...
2712
2713
2714
2715
2716
  			 * We use sync migration mode here, so we defer like
  			 * sync direct compaction does.
  			 */
  			defer_compaction(zone, cc.order);
  		}
7f354a548   David Rientjes   mm, compaction: a...
2717
2718
2719
2720
  		count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
  				     cc.total_migrate_scanned);
  		count_compact_events(KCOMPACTD_FREE_SCANNED,
  				     cc.total_free_scanned);
698b1b306   Vlastimil Babka   mm, compaction: i...
2721
2722
2723
2724
2725
2726
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
  	}
  
  	/*
  	 * Regardless of success, we are done until woken up next. But remember
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2727
2728
  	 * the requested order/highest_zoneidx in case it was higher/tighter
  	 * than our current ones
698b1b306   Vlastimil Babka   mm, compaction: i...
2729
2730
2731
  	 */
  	if (pgdat->kcompactd_max_order <= cc.order)
  		pgdat->kcompactd_max_order = 0;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2732
2733
  	if (pgdat->kcompactd_highest_zoneidx >= cc.highest_zoneidx)
  		pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
698b1b306   Vlastimil Babka   mm, compaction: i...
2734
  }
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2735
  void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
698b1b306   Vlastimil Babka   mm, compaction: i...
2736
2737
2738
2739
2740
2741
  {
  	if (!order)
  		return;
  
  	if (pgdat->kcompactd_max_order < order)
  		pgdat->kcompactd_max_order = order;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2742
2743
  	if (pgdat->kcompactd_highest_zoneidx > highest_zoneidx)
  		pgdat->kcompactd_highest_zoneidx = highest_zoneidx;
698b1b306   Vlastimil Babka   mm, compaction: i...
2744

6818600ff   Davidlohr Bueso   mm,compaction: se...
2745
2746
2747
2748
2749
  	/*
  	 * Pairs with implicit barrier in wait_event_freezable()
  	 * such that wakeups are not missed.
  	 */
  	if (!wq_has_sleeper(&pgdat->kcompactd_wait))
698b1b306   Vlastimil Babka   mm, compaction: i...
2750
2751
2752
2753
2754
2755
  		return;
  
  	if (!kcompactd_node_suitable(pgdat))
  		return;
  
  	trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2756
  							highest_zoneidx);
698b1b306   Vlastimil Babka   mm, compaction: i...
2757
2758
2759
2760
2761
2762
2763
2764
2765
  	wake_up_interruptible(&pgdat->kcompactd_wait);
  }
  
  /*
   * The background compaction daemon, started as a kernel thread
   * from the init process.
   */
  static int kcompactd(void *p)
  {
68d68ff6e   Zhiyuan Dai   mm/mempool: minor...
2766
  	pg_data_t *pgdat = (pg_data_t *)p;
698b1b306   Vlastimil Babka   mm, compaction: i...
2767
  	struct task_struct *tsk = current;
e1e92bfa3   Charan Teja Reddy   mm: compaction: o...
2768
2769
  	long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
  	long timeout = default_timeout;
698b1b306   Vlastimil Babka   mm, compaction: i...
2770
2771
2772
2773
2774
2775
2776
2777
2778
  
  	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
  	if (!cpumask_empty(cpumask))
  		set_cpus_allowed_ptr(tsk, cpumask);
  
  	set_freezable();
  
  	pgdat->kcompactd_max_order = 0;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2779
  	pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
698b1b306   Vlastimil Babka   mm, compaction: i...
2780
2781
  
  	while (!kthread_should_stop()) {
eb414681d   Johannes Weiner   psi: pressure sta...
2782
  		unsigned long pflags;
65d759c8f   Charan Teja Reddy   mm: compaction: s...
2783
2784
2785
2786
2787
2788
  		/*
  		 * Avoid the unnecessary wakeup for proactive compaction
  		 * when it is disabled.
  		 */
  		if (!sysctl_compaction_proactiveness)
  			timeout = MAX_SCHEDULE_TIMEOUT;
698b1b306   Vlastimil Babka   mm, compaction: i...
2789
  		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
facdaa917   Nitin Gupta   mm: proactive com...
2790
  		if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
65d759c8f   Charan Teja Reddy   mm: compaction: s...
2791
2792
  			kcompactd_work_requested(pgdat), timeout) &&
  			!pgdat->proactive_compact_trigger) {
facdaa917   Nitin Gupta   mm: proactive com...
2793
2794
2795
2796
  
  			psi_memstall_enter(&pflags);
  			kcompactd_do_work(pgdat);
  			psi_memstall_leave(&pflags);
e1e92bfa3   Charan Teja Reddy   mm: compaction: o...
2797
2798
2799
2800
2801
2802
2803
2804
  			/*
  			 * Reset the timeout value. The defer timeout from
  			 * proactive compaction is lost here but that is fine
  			 * as the condition of the zone changing substantionally
  			 * then carrying on with the previous defer interval is
  			 * not useful.
  			 */
  			timeout = default_timeout;
facdaa917   Nitin Gupta   mm: proactive com...
2805
2806
  			continue;
  		}
698b1b306   Vlastimil Babka   mm, compaction: i...
2807

e1e92bfa3   Charan Teja Reddy   mm: compaction: o...
2808
2809
2810
2811
2812
  		/*
  		 * Start the proactive work with default timeout. Based
  		 * on the fragmentation score, this timeout is updated.
  		 */
  		timeout = default_timeout;
facdaa917   Nitin Gupta   mm: proactive com...
2813
2814
  		if (should_proactive_compact_node(pgdat)) {
  			unsigned int prev_score, score;
facdaa917   Nitin Gupta   mm: proactive com...
2815
2816
2817
2818
2819
2820
2821
  			prev_score = fragmentation_score_node(pgdat);
  			proactive_compact_node(pgdat);
  			score = fragmentation_score_node(pgdat);
  			/*
  			 * Defer proactive compaction if the fragmentation
  			 * score did not go down i.e. no progress made.
  			 */
e1e92bfa3   Charan Teja Reddy   mm: compaction: o...
2822
2823
2824
  			if (unlikely(score >= prev_score))
  				timeout =
  				   default_timeout << COMPACT_MAX_DEFER_SHIFT;
facdaa917   Nitin Gupta   mm: proactive com...
2825
  		}
65d759c8f   Charan Teja Reddy   mm: compaction: s...
2826
2827
  		if (unlikely(pgdat->proactive_compact_trigger))
  			pgdat->proactive_compact_trigger = false;
698b1b306   Vlastimil Babka   mm, compaction: i...
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
  	}
  
  	return 0;
  }
  
  /*
   * This kcompactd start function will be called by init and node-hot-add.
   * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
   */
  int kcompactd_run(int nid)
  {
  	pg_data_t *pgdat = NODE_DATA(nid);
  	int ret = 0;
  
  	if (pgdat->kcompactd)
  		return 0;
  
  	pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
  	if (IS_ERR(pgdat->kcompactd)) {
  		pr_err("Failed to start kcompactd on node %d
  ", nid);
  		ret = PTR_ERR(pgdat->kcompactd);
  		pgdat->kcompactd = NULL;
  	}
  	return ret;
  }
  
  /*
   * Called by memory hotplug when all memory in a node is offlined. Caller must
   * hold mem_hotplug_begin/end().
   */
  void kcompactd_stop(int nid)
  {
  	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
  
  	if (kcompactd) {
  		kthread_stop(kcompactd);
  		NODE_DATA(nid)->kcompactd = NULL;
  	}
  }
  
  /*
   * It's optimal to keep kcompactd on the same CPUs as their memory, but
   * not required for correctness. So if the last cpu in a node goes
   * away, we get changed to run anywhere: as the first one comes back,
   * restore their cpu bindings.
   */
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2875
  static int kcompactd_cpu_online(unsigned int cpu)
698b1b306   Vlastimil Babka   mm, compaction: i...
2876
2877
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2878
2879
2880
  	for_each_node_state(nid, N_MEMORY) {
  		pg_data_t *pgdat = NODE_DATA(nid);
  		const struct cpumask *mask;
698b1b306   Vlastimil Babka   mm, compaction: i...
2881

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2882
  		mask = cpumask_of_node(pgdat->node_id);
698b1b306   Vlastimil Babka   mm, compaction: i...
2883

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2884
2885
2886
  		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
  			/* One of our CPUs online: restore mask */
  			set_cpus_allowed_ptr(pgdat->kcompactd, mask);
698b1b306   Vlastimil Babka   mm, compaction: i...
2887
  	}
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2888
  	return 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
2889
2890
2891
2892
2893
  }
  
  static int __init kcompactd_init(void)
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
  	int ret;
  
  	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  					"mm/compaction:online",
  					kcompactd_cpu_online, NULL);
  	if (ret < 0) {
  		pr_err("kcompactd: failed to register hotplug callbacks.
  ");
  		return ret;
  	}
698b1b306   Vlastimil Babka   mm, compaction: i...
2904
2905
2906
  
  	for_each_node_state(nid, N_MEMORY)
  		kcompactd_run(nid);
698b1b306   Vlastimil Babka   mm, compaction: i...
2907
2908
2909
  	return 0;
  }
  subsys_initcall(kcompactd_init)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2910
  #endif /* CONFIG_COMPACTION */