Blame view

mm/compaction.c 80.4 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
748446bb6   Mel Gorman   mm: compaction: m...
2
3
4
5
6
7
8
9
10
  /*
   * linux/mm/compaction.c
   *
   * Memory compaction for the reduction of external fragmentation. Note that
   * this heavily depends upon page migration to do all the real heavy
   * lifting
   *
   * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
   */
698b1b306   Vlastimil Babka   mm, compaction: i...
11
  #include <linux/cpu.h>
748446bb6   Mel Gorman   mm: compaction: m...
12
13
14
15
  #include <linux/swap.h>
  #include <linux/migrate.h>
  #include <linux/compaction.h>
  #include <linux/mm_inline.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
16
  #include <linux/sched/signal.h>
748446bb6   Mel Gorman   mm: compaction: m...
17
  #include <linux/backing-dev.h>
76ab0f530   Mel Gorman   mm: compaction: a...
18
  #include <linux/sysctl.h>
ed4a6d7f0   Mel Gorman   mm: compaction: a...
19
  #include <linux/sysfs.h>
194159fbc   Minchan Kim   mm: remove MIGRAT...
20
  #include <linux/page-isolation.h>
b8c73fc24   Andrey Ryabinin   mm: page_alloc: a...
21
  #include <linux/kasan.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
22
23
  #include <linux/kthread.h>
  #include <linux/freezer.h>
83358ece2   Joonsoo Kim   mm/page_owner: in...
24
  #include <linux/page_owner.h>
eb414681d   Johannes Weiner   psi: pressure sta...
25
  #include <linux/psi.h>
748446bb6   Mel Gorman   mm: compaction: m...
26
  #include "internal.h"
010fc29a4   Minchan Kim   compaction: fix b...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  #ifdef CONFIG_COMPACTION
  static inline void count_compact_event(enum vm_event_item item)
  {
  	count_vm_event(item);
  }
  
  static inline void count_compact_events(enum vm_event_item item, long delta)
  {
  	count_vm_events(item, delta);
  }
  #else
  #define count_compact_event(item) do { } while (0)
  #define count_compact_events(item, delta) do { } while (0)
  #endif
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
41
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b7aba6984   Mel Gorman   mm: compaction: a...
42
43
  #define CREATE_TRACE_POINTS
  #include <trace/events/compaction.h>
06b6640a3   Vlastimil Babka   mm, compaction: w...
44
45
46
47
  #define block_start_pfn(pfn, order)	round_down(pfn, 1UL << (order))
  #define block_end_pfn(pfn, order)	ALIGN((pfn) + 1, 1UL << (order))
  #define pageblock_start_pfn(pfn)	block_start_pfn(pfn, pageblock_order)
  #define pageblock_end_pfn(pfn)		block_end_pfn(pfn, pageblock_order)
facdaa917   Nitin Gupta   mm: proactive com...
48
49
50
  /*
   * Fragmentation score check interval for proactive compaction purposes.
   */
d34c0a759   Nitin Gupta   mm: use unsigned ...
51
  static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500;
facdaa917   Nitin Gupta   mm: proactive com...
52
53
54
55
56
57
58
59
  
  /*
   * Page order with-respect-to which proactive compaction
   * calculates external fragmentation, which is used as
   * the "fragmentation score" of a node/zone.
   */
  #if defined CONFIG_TRANSPARENT_HUGEPAGE
  #define COMPACTION_HPAGE_ORDER	HPAGE_PMD_ORDER
25788738e   Nitin Gupta   mm: fix compile e...
60
  #elif defined CONFIG_HUGETLBFS
facdaa917   Nitin Gupta   mm: proactive com...
61
62
63
64
  #define COMPACTION_HPAGE_ORDER	HUGETLB_PAGE_ORDER
  #else
  #define COMPACTION_HPAGE_ORDER	(PMD_SHIFT - PAGE_SHIFT)
  #endif
748446bb6   Mel Gorman   mm: compaction: m...
65
66
67
  static unsigned long release_freepages(struct list_head *freelist)
  {
  	struct page *page, *next;
6bace090a   Vlastimil Babka   mm, compaction: a...
68
  	unsigned long high_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
69
70
  
  	list_for_each_entry_safe(page, next, freelist, lru) {
6bace090a   Vlastimil Babka   mm, compaction: a...
71
  		unsigned long pfn = page_to_pfn(page);
748446bb6   Mel Gorman   mm: compaction: m...
72
73
  		list_del(&page->lru);
  		__free_page(page);
6bace090a   Vlastimil Babka   mm, compaction: a...
74
75
  		if (pfn > high_pfn)
  			high_pfn = pfn;
748446bb6   Mel Gorman   mm: compaction: m...
76
  	}
6bace090a   Vlastimil Babka   mm, compaction: a...
77
  	return high_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
78
  }
4469ab984   Mel Gorman   mm, compaction: r...
79
  static void split_map_pages(struct list_head *list)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
80
  {
66c64223a   Joonsoo Kim   mm/compaction: sp...
81
82
83
84
85
86
87
88
89
  	unsigned int i, order, nr_pages;
  	struct page *page, *next;
  	LIST_HEAD(tmp_list);
  
  	list_for_each_entry_safe(page, next, list, lru) {
  		list_del(&page->lru);
  
  		order = page_private(page);
  		nr_pages = 1 << order;
66c64223a   Joonsoo Kim   mm/compaction: sp...
90

46f24fd85   Joonsoo Kim   mm/page_alloc: in...
91
  		post_alloc_hook(page, order, __GFP_MOVABLE);
66c64223a   Joonsoo Kim   mm/compaction: sp...
92
93
  		if (order)
  			split_page(page, order);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
94

66c64223a   Joonsoo Kim   mm/compaction: sp...
95
96
97
98
  		for (i = 0; i < nr_pages; i++) {
  			list_add(&page->lru, &tmp_list);
  			page++;
  		}
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
99
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
100
101
  
  	list_splice(&tmp_list, list);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
102
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
103
  #ifdef CONFIG_COMPACTION
24e2716f6   Joonsoo Kim   mm/compaction: ad...
104

bda807d44   Minchan Kim   mm: migrate: supp...
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
  int PageMovable(struct page *page)
  {
  	struct address_space *mapping;
  
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	if (!__PageMovable(page))
  		return 0;
  
  	mapping = page_mapping(page);
  	if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
  		return 1;
  
  	return 0;
  }
  EXPORT_SYMBOL(PageMovable);
  
  void __SetPageMovable(struct page *page, struct address_space *mapping)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
  	page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__SetPageMovable);
  
  void __ClearPageMovable(struct page *page)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	/*
  	 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
  	 * flag so that VM can catch up released page by driver after isolation.
  	 * With it, VM migration doesn't try to put it back.
  	 */
  	page->mapping = (void *)((unsigned long)page->mapping &
  				PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__ClearPageMovable);
24e2716f6   Joonsoo Kim   mm/compaction: ad...
142
143
144
145
146
  /* Do not skip compaction more than 64 times */
  #define COMPACT_MAX_DEFER_SHIFT 6
  
  /*
   * Compaction is deferred when compaction fails to result in a page
860b32729   Alex Shi   mm/compaction: co...
147
   * allocation success. 1 << compact_defer_shift, compactions are skipped up
24e2716f6   Joonsoo Kim   mm/compaction: ad...
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
   * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
   */
  void defer_compaction(struct zone *zone, int order)
  {
  	zone->compact_considered = 0;
  	zone->compact_defer_shift++;
  
  	if (order < zone->compact_order_failed)
  		zone->compact_order_failed = order;
  
  	if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
  		zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
  
  	trace_mm_compaction_defer_compaction(zone, order);
  }
  
  /* Returns true if compaction should be skipped this time */
  bool compaction_deferred(struct zone *zone, int order)
  {
  	unsigned long defer_limit = 1UL << zone->compact_defer_shift;
  
  	if (order < zone->compact_order_failed)
  		return false;
  
  	/* Avoid possible overflow */
62b35fe0e   Mateusz Nosek   mm/compaction.c: ...
173
  	if (++zone->compact_considered >= defer_limit) {
24e2716f6   Joonsoo Kim   mm/compaction: ad...
174
  		zone->compact_considered = defer_limit;
24e2716f6   Joonsoo Kim   mm/compaction: ad...
175
  		return false;
62b35fe0e   Mateusz Nosek   mm/compaction.c: ...
176
  	}
24e2716f6   Joonsoo Kim   mm/compaction: ad...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  
  	trace_mm_compaction_deferred(zone, order);
  
  	return true;
  }
  
  /*
   * Update defer tracking counters after successful compaction of given order,
   * which means an allocation either succeeded (alloc_success == true) or is
   * expected to succeed.
   */
  void compaction_defer_reset(struct zone *zone, int order,
  		bool alloc_success)
  {
  	if (alloc_success) {
  		zone->compact_considered = 0;
  		zone->compact_defer_shift = 0;
  	}
  	if (order >= zone->compact_order_failed)
  		zone->compact_order_failed = order + 1;
  
  	trace_mm_compaction_defer_reset(zone, order);
  }
  
  /* Returns true if restarting compaction after many failures */
  bool compaction_restarting(struct zone *zone, int order)
  {
  	if (order < zone->compact_order_failed)
  		return false;
  
  	return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
  		zone->compact_considered >= 1UL << zone->compact_defer_shift;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
210
211
212
213
214
215
216
217
218
  /* Returns true if the pageblock should be scanned for pages to isolate. */
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	if (cc->ignore_skip_hint)
  		return true;
  
  	return !get_pageblock_skip(page);
  }
02333641e   Vlastimil Babka   mm, compaction: e...
219
220
221
222
  static void reset_cached_positions(struct zone *zone)
  {
  	zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
  	zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
623446e4d   Joonsoo Kim   mm/compaction: fi...
223
  	zone->compact_cached_free_pfn =
06b6640a3   Vlastimil Babka   mm, compaction: w...
224
  				pageblock_start_pfn(zone_end_pfn(zone) - 1);
02333641e   Vlastimil Babka   mm, compaction: e...
225
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
226
  /*
b527cfe5b   Vlastimil Babka   mm, compaction: e...
227
228
229
   * Compound pages of >= pageblock_order should consistenly be skipped until
   * released. It is always pointless to compact pages of such order (if they are
   * migratable), and the pageblocks they occupy cannot contain any free pages.
21dc7e023   David Rientjes   mm, compaction: p...
230
   */
b527cfe5b   Vlastimil Babka   mm, compaction: e...
231
  static bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
232
  {
b527cfe5b   Vlastimil Babka   mm, compaction: e...
233
  	if (!PageCompound(page))
21dc7e023   David Rientjes   mm, compaction: p...
234
  		return false;
b527cfe5b   Vlastimil Babka   mm, compaction: e...
235
236
237
238
239
240
241
  
  	page = compound_head(page);
  
  	if (compound_order(page) >= pageblock_order)
  		return true;
  
  	return false;
21dc7e023   David Rientjes   mm, compaction: p...
242
  }
e332f741a   Mel Gorman   mm, compaction: b...
243
244
245
246
247
  static bool
  __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
  							bool check_target)
  {
  	struct page *page = pfn_to_online_page(pfn);
6b0868c82   Mel Gorman   mm/compaction.c: ...
248
  	struct page *block_page;
e332f741a   Mel Gorman   mm, compaction: b...
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
  	struct page *end_page;
  	unsigned long block_pfn;
  
  	if (!page)
  		return false;
  	if (zone != page_zone(page))
  		return false;
  	if (pageblock_skip_persistent(page))
  		return false;
  
  	/*
  	 * If skip is already cleared do no further checking once the
  	 * restart points have been set.
  	 */
  	if (check_source && check_target && !get_pageblock_skip(page))
  		return true;
  
  	/*
  	 * If clearing skip for the target scanner, do not select a
  	 * non-movable pageblock as the starting point.
  	 */
  	if (!check_source && check_target &&
  	    get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
  		return false;
6b0868c82   Mel Gorman   mm/compaction.c: ...
273
274
  	/* Ensure the start of the pageblock or zone is online and valid */
  	block_pfn = pageblock_start_pfn(pfn);
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
275
276
  	block_pfn = max(block_pfn, zone->zone_start_pfn);
  	block_page = pfn_to_online_page(block_pfn);
6b0868c82   Mel Gorman   mm/compaction.c: ...
277
278
279
280
281
282
  	if (block_page) {
  		page = block_page;
  		pfn = block_pfn;
  	}
  
  	/* Ensure the end of the pageblock or zone is online and valid */
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
283
  	block_pfn = pageblock_end_pfn(pfn) - 1;
6b0868c82   Mel Gorman   mm/compaction.c: ...
284
285
286
287
  	block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
  	end_page = pfn_to_online_page(block_pfn);
  	if (!end_page)
  		return false;
e332f741a   Mel Gorman   mm, compaction: b...
288
289
290
291
292
  	/*
  	 * Only clear the hint if a sample indicates there is either a
  	 * free page or an LRU page in the block. One or other condition
  	 * is necessary for the block to be a migration source/target.
  	 */
e332f741a   Mel Gorman   mm, compaction: b...
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
  	do {
  		if (pfn_valid_within(pfn)) {
  			if (check_source && PageLRU(page)) {
  				clear_pageblock_skip(page);
  				return true;
  			}
  
  			if (check_target && PageBuddy(page)) {
  				clear_pageblock_skip(page);
  				return true;
  			}
  		}
  
  		page += (1 << PAGE_ALLOC_COSTLY_ORDER);
  		pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
308
  	} while (page <= end_page);
e332f741a   Mel Gorman   mm, compaction: b...
309
310
311
  
  	return false;
  }
21dc7e023   David Rientjes   mm, compaction: p...
312
  /*
bb13ffeb9   Mel Gorman   mm: compaction: c...
313
314
315
316
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
   * meet.
   */
62997027c   Mel Gorman   mm: compaction: c...
317
  static void __reset_isolation_suitable(struct zone *zone)
bb13ffeb9   Mel Gorman   mm: compaction: c...
318
  {
e332f741a   Mel Gorman   mm, compaction: b...
319
  	unsigned long migrate_pfn = zone->zone_start_pfn;
6b0868c82   Mel Gorman   mm/compaction.c: ...
320
  	unsigned long free_pfn = zone_end_pfn(zone) - 1;
e332f741a   Mel Gorman   mm, compaction: b...
321
322
323
324
325
326
327
  	unsigned long reset_migrate = free_pfn;
  	unsigned long reset_free = migrate_pfn;
  	bool source_set = false;
  	bool free_set = false;
  
  	if (!zone->compact_blockskip_flush)
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
328

62997027c   Mel Gorman   mm: compaction: c...
329
  	zone->compact_blockskip_flush = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
330

e332f741a   Mel Gorman   mm, compaction: b...
331
332
333
334
335
336
337
338
  	/*
  	 * Walk the zone and update pageblock skip information. Source looks
  	 * for PageLRU while target looks for PageBuddy. When the scanner
  	 * is found, both PageBuddy and PageLRU are checked as the pageblock
  	 * is suitable as both source and target.
  	 */
  	for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
  					free_pfn -= pageblock_nr_pages) {
bb13ffeb9   Mel Gorman   mm: compaction: c...
339
  		cond_resched();
e332f741a   Mel Gorman   mm, compaction: b...
340
341
342
343
344
345
346
347
348
  		/* Update the migrate PFN */
  		if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
  		    migrate_pfn < reset_migrate) {
  			source_set = true;
  			reset_migrate = migrate_pfn;
  			zone->compact_init_migrate_pfn = reset_migrate;
  			zone->compact_cached_migrate_pfn[0] = reset_migrate;
  			zone->compact_cached_migrate_pfn[1] = reset_migrate;
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
349

e332f741a   Mel Gorman   mm, compaction: b...
350
351
352
353
354
355
356
357
  		/* Update the free PFN */
  		if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
  		    free_pfn > reset_free) {
  			free_set = true;
  			reset_free = free_pfn;
  			zone->compact_init_free_pfn = reset_free;
  			zone->compact_cached_free_pfn = reset_free;
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
358
  	}
02333641e   Vlastimil Babka   mm, compaction: e...
359

e332f741a   Mel Gorman   mm, compaction: b...
360
361
362
363
364
365
  	/* Leave no distance if no suitable block was reset */
  	if (reset_migrate >= reset_free) {
  		zone->compact_cached_migrate_pfn[0] = migrate_pfn;
  		zone->compact_cached_migrate_pfn[1] = migrate_pfn;
  		zone->compact_cached_free_pfn = free_pfn;
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
366
  }
62997027c   Mel Gorman   mm: compaction: c...
367
368
369
370
371
372
373
374
375
376
377
378
379
380
  void reset_isolation_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		/* Only flush if a full compaction finished recently */
  		if (zone->compact_blockskip_flush)
  			__reset_isolation_suitable(zone);
  	}
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
381
  /*
e380bebe4   Mel Gorman   mm, compaction: k...
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
   * Sets the pageblock skip bit if it was clear. Note that this is a hint as
   * locks are not required for read/writers. Returns true if it was already set.
   */
  static bool test_and_set_skip(struct compact_control *cc, struct page *page,
  							unsigned long pfn)
  {
  	bool skip;
  
  	/* Do no update if skip hint is being ignored */
  	if (cc->ignore_skip_hint)
  		return false;
  
  	if (!IS_ALIGNED(pfn, pageblock_nr_pages))
  		return false;
  
  	skip = get_pageblock_skip(page);
  	if (!skip && !cc->no_set_skip_hint)
  		set_pageblock_skip(page);
  
  	return skip;
  }
  
  static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
  {
  	struct zone *zone = cc->zone;
  
  	pfn = pageblock_end_pfn(pfn);
  
  	/* Set for isolation rather than compaction */
  	if (cc->no_set_skip_hint)
  		return;
  
  	if (pfn > zone->compact_cached_migrate_pfn[0])
  		zone->compact_cached_migrate_pfn[0] = pfn;
  	if (cc->mode != MIGRATE_ASYNC &&
  	    pfn > zone->compact_cached_migrate_pfn[1])
  		zone->compact_cached_migrate_pfn[1] = pfn;
  }
  
  /*
bb13ffeb9   Mel Gorman   mm: compaction: c...
422
   * If no pages were isolated then mark this pageblock to be skipped in the
62997027c   Mel Gorman   mm: compaction: c...
423
   * future. The information is later cleared by __reset_isolation_suitable().
bb13ffeb9   Mel Gorman   mm: compaction: c...
424
   */
c89511ab2   Mel Gorman   mm: compaction: R...
425
  static void update_pageblock_skip(struct compact_control *cc,
d097a6f63   Mel Gorman   mm, compaction: r...
426
  			struct page *page, unsigned long pfn)
bb13ffeb9   Mel Gorman   mm: compaction: c...
427
  {
c89511ab2   Mel Gorman   mm: compaction: R...
428
  	struct zone *zone = cc->zone;
6815bf3f2   Joonsoo Kim   mm/compaction: re...
429

2583d6713   Vlastimil Babka   mm, compaction: s...
430
  	if (cc->no_set_skip_hint)
6815bf3f2   Joonsoo Kim   mm/compaction: re...
431
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
432
433
  	if (!page)
  		return;
edc2ca612   Vlastimil Babka   mm, compaction: m...
434
  	set_pageblock_skip(page);
c89511ab2   Mel Gorman   mm: compaction: R...
435

35979ef33   David Rientjes   mm, compaction: a...
436
  	/* Update where async and sync compaction should restart */
e380bebe4   Mel Gorman   mm, compaction: k...
437
438
  	if (pfn < zone->compact_cached_free_pfn)
  		zone->compact_cached_free_pfn = pfn;
bb13ffeb9   Mel Gorman   mm: compaction: c...
439
440
441
442
443
444
445
  }
  #else
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	return true;
  }
b527cfe5b   Vlastimil Babka   mm, compaction: e...
446
  static inline bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
447
448
449
450
451
  {
  	return false;
  }
  
  static inline void update_pageblock_skip(struct compact_control *cc,
d097a6f63   Mel Gorman   mm, compaction: r...
452
  			struct page *page, unsigned long pfn)
bb13ffeb9   Mel Gorman   mm: compaction: c...
453
454
  {
  }
e380bebe4   Mel Gorman   mm, compaction: k...
455
456
457
458
459
460
461
462
463
464
  
  static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
  {
  }
  
  static bool test_and_set_skip(struct compact_control *cc, struct page *page,
  							unsigned long pfn)
  {
  	return false;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
465
  #endif /* CONFIG_COMPACTION */
8b44d2791   Vlastimil Babka   mm, compaction: p...
466
467
  /*
   * Compaction requires the taking of some coarse locks that are potentially
cb2dcaf02   Mel Gorman   mm, compaction: f...
468
469
470
471
   * very heavily contended. For async compaction, trylock and record if the
   * lock is contended. The lock will still be acquired but compaction will
   * abort when the current block is finished regardless of success rate.
   * Sync compaction acquires the lock.
8b44d2791   Vlastimil Babka   mm, compaction: p...
472
   *
cb2dcaf02   Mel Gorman   mm, compaction: f...
473
   * Always returns true which makes it easier to track lock state in callers.
8b44d2791   Vlastimil Babka   mm, compaction: p...
474
   */
cb2dcaf02   Mel Gorman   mm, compaction: f...
475
  static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
8b44d2791   Vlastimil Babka   mm, compaction: p...
476
  						struct compact_control *cc)
77337edee   Jules Irenge   mm/compaction: ad...
477
  	__acquires(lock)
2a1402aa0   Mel Gorman   mm: compaction: a...
478
  {
cb2dcaf02   Mel Gorman   mm, compaction: f...
479
480
481
482
483
484
  	/* Track if the lock is contended in async mode */
  	if (cc->mode == MIGRATE_ASYNC && !cc->contended) {
  		if (spin_trylock_irqsave(lock, *flags))
  			return true;
  
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
485
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
486

cb2dcaf02   Mel Gorman   mm, compaction: f...
487
  	spin_lock_irqsave(lock, *flags);
8b44d2791   Vlastimil Babka   mm, compaction: p...
488
  	return true;
2a1402aa0   Mel Gorman   mm: compaction: a...
489
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
490
  /*
c67fe3752   Mel Gorman   mm: compaction: A...
491
   * Compaction requires the taking of some coarse locks that are potentially
8b44d2791   Vlastimil Babka   mm, compaction: p...
492
493
494
495
496
497
498
   * very heavily contended. The lock should be periodically unlocked to avoid
   * having disabled IRQs for a long time, even when there is nobody waiting on
   * the lock. It might also be that allowing the IRQs will result in
   * need_resched() becoming true. If scheduling is needed, async compaction
   * aborts. Sync compaction schedules.
   * Either compaction type will also abort if a fatal signal is pending.
   * In either case if the lock was locked, it is dropped and not regained.
c67fe3752   Mel Gorman   mm: compaction: A...
499
   *
8b44d2791   Vlastimil Babka   mm, compaction: p...
500
501
502
503
   * Returns true if compaction should abort due to fatal signal pending, or
   *		async compaction due to need_resched()
   * Returns false when compaction can continue (sync compaction might have
   *		scheduled)
c67fe3752   Mel Gorman   mm: compaction: A...
504
   */
8b44d2791   Vlastimil Babka   mm, compaction: p...
505
506
  static bool compact_unlock_should_abort(spinlock_t *lock,
  		unsigned long flags, bool *locked, struct compact_control *cc)
c67fe3752   Mel Gorman   mm: compaction: A...
507
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
508
509
510
511
  	if (*locked) {
  		spin_unlock_irqrestore(lock, flags);
  		*locked = false;
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
512

8b44d2791   Vlastimil Babka   mm, compaction: p...
513
  	if (fatal_signal_pending(current)) {
c3486f537   Vlastimil Babka   mm, compaction: s...
514
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
515
516
  		return true;
  	}
c67fe3752   Mel Gorman   mm: compaction: A...
517

cf66f0700   Mel Gorman   mm, compaction: d...
518
  	cond_resched();
be9765722   Vlastimil Babka   mm, compaction: p...
519
520
521
  
  	return false;
  }
c67fe3752   Mel Gorman   mm: compaction: A...
522
  /*
9e4be4708   Jerome Marchand   mm/compaction.c: ...
523
524
525
   * Isolate free pages onto a private freelist. If @strict is true, will abort
   * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
   * (even though it may still end up isolating some pages).
85aa125f0   Michal Nazarewicz   mm: compaction: i...
526
   */
f40d1e42b   Mel Gorman   mm: compaction: a...
527
  static unsigned long isolate_freepages_block(struct compact_control *cc,
e14c720ef   Vlastimil Babka   mm, compaction: r...
528
  				unsigned long *start_pfn,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
529
530
  				unsigned long end_pfn,
  				struct list_head *freelist,
4fca9730c   Mel Gorman   mm, compaction: s...
531
  				unsigned int stride,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
532
  				bool strict)
748446bb6   Mel Gorman   mm: compaction: m...
533
  {
b7aba6984   Mel Gorman   mm: compaction: a...
534
  	int nr_scanned = 0, total_isolated = 0;
d097a6f63   Mel Gorman   mm, compaction: r...
535
  	struct page *cursor;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
536
  	unsigned long flags = 0;
f40d1e42b   Mel Gorman   mm: compaction: a...
537
  	bool locked = false;
e14c720ef   Vlastimil Babka   mm, compaction: r...
538
  	unsigned long blockpfn = *start_pfn;
66c64223a   Joonsoo Kim   mm/compaction: sp...
539
  	unsigned int order;
748446bb6   Mel Gorman   mm: compaction: m...
540

4fca9730c   Mel Gorman   mm, compaction: s...
541
542
543
  	/* Strict mode is for isolation, speed is secondary */
  	if (strict)
  		stride = 1;
748446bb6   Mel Gorman   mm: compaction: m...
544
  	cursor = pfn_to_page(blockpfn);
f40d1e42b   Mel Gorman   mm: compaction: a...
545
  	/* Isolate free pages. */
4fca9730c   Mel Gorman   mm, compaction: s...
546
  	for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
66c64223a   Joonsoo Kim   mm/compaction: sp...
547
  		int isolated;
748446bb6   Mel Gorman   mm: compaction: m...
548
  		struct page *page = cursor;
8b44d2791   Vlastimil Babka   mm, compaction: p...
549
550
551
552
553
554
555
556
557
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort if fatal signal
  		 * pending or async compaction detects need_resched()
  		 */
  		if (!(blockpfn % SWAP_CLUSTER_MAX)
  		    && compact_unlock_should_abort(&cc->zone->lock, flags,
  								&locked, cc))
  			break;
b7aba6984   Mel Gorman   mm: compaction: a...
558
  		nr_scanned++;
f40d1e42b   Mel Gorman   mm: compaction: a...
559
  		if (!pfn_valid_within(blockpfn))
2af120bc0   Laura Abbott   mm/compaction: br...
560
  			goto isolate_fail;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
561
562
563
564
565
566
567
  		/*
  		 * For compound pages such as THP and hugetlbfs, we can save
  		 * potentially a lot of iterations if we skip them at once.
  		 * The check is racy, but we can consider only valid values
  		 * and the only danger is skipping too much.
  		 */
  		if (PageCompound(page)) {
21dc7e023   David Rientjes   mm, compaction: p...
568
  			const unsigned int order = compound_order(page);
d3c85bad8   Vlastimil Babka   mm, compaction: r...
569
  			if (likely(order < MAX_ORDER)) {
21dc7e023   David Rientjes   mm, compaction: p...
570
571
  				blockpfn += (1UL << order) - 1;
  				cursor += (1UL << order) - 1;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
572
  			}
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
573
574
  			goto isolate_fail;
  		}
f40d1e42b   Mel Gorman   mm: compaction: a...
575
  		if (!PageBuddy(page))
2af120bc0   Laura Abbott   mm/compaction: br...
576
  			goto isolate_fail;
f40d1e42b   Mel Gorman   mm: compaction: a...
577
578
  
  		/*
69b7189f1   Vlastimil Babka   mm, compaction: s...
579
580
581
582
583
  		 * If we already hold the lock, we can skip some rechecking.
  		 * Note that if we hold the lock now, checked_pageblock was
  		 * already set in some previous iteration (or strict is true),
  		 * so it is correct to skip the suitable migration target
  		 * recheck as well.
f40d1e42b   Mel Gorman   mm: compaction: a...
584
  		 */
69b7189f1   Vlastimil Babka   mm, compaction: s...
585
  		if (!locked) {
cb2dcaf02   Mel Gorman   mm, compaction: f...
586
  			locked = compact_lock_irqsave(&cc->zone->lock,
8b44d2791   Vlastimil Babka   mm, compaction: p...
587
  								&flags, cc);
f40d1e42b   Mel Gorman   mm: compaction: a...
588

69b7189f1   Vlastimil Babka   mm, compaction: s...
589
590
591
592
  			/* Recheck this is a buddy page under lock */
  			if (!PageBuddy(page))
  				goto isolate_fail;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
593

66c64223a   Joonsoo Kim   mm/compaction: sp...
594
  		/* Found a free page, will break it into order-0 pages */
ab130f910   Matthew Wilcox (Oracle)   mm: rename page_o...
595
  		order = buddy_order(page);
66c64223a   Joonsoo Kim   mm/compaction: sp...
596
  		isolated = __isolate_free_page(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
597
598
  		if (!isolated)
  			break;
66c64223a   Joonsoo Kim   mm/compaction: sp...
599
  		set_page_private(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
600

748446bb6   Mel Gorman   mm: compaction: m...
601
  		total_isolated += isolated;
a4f04f2c6   David Rientjes   mm, compaction: a...
602
  		cc->nr_freepages += isolated;
66c64223a   Joonsoo Kim   mm/compaction: sp...
603
  		list_add_tail(&page->lru, freelist);
a4f04f2c6   David Rientjes   mm, compaction: a...
604
605
606
  		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
  			blockpfn += isolated;
  			break;
748446bb6   Mel Gorman   mm: compaction: m...
607
  		}
a4f04f2c6   David Rientjes   mm, compaction: a...
608
609
610
611
  		/* Advance to the end of split page */
  		blockpfn += isolated - 1;
  		cursor += isolated - 1;
  		continue;
2af120bc0   Laura Abbott   mm/compaction: br...
612
613
614
615
616
617
  
  isolate_fail:
  		if (strict)
  			break;
  		else
  			continue;
748446bb6   Mel Gorman   mm: compaction: m...
618
  	}
a4f04f2c6   David Rientjes   mm, compaction: a...
619
620
  	if (locked)
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
621
622
623
624
625
626
  	/*
  	 * There is a tiny chance that we have read bogus compound_order(),
  	 * so be careful to not go outside of the pageblock.
  	 */
  	if (unlikely(blockpfn > end_pfn))
  		blockpfn = end_pfn;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
627
628
  	trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
  					nr_scanned, total_isolated);
e14c720ef   Vlastimil Babka   mm, compaction: r...
629
630
  	/* Record how far we have got within the block */
  	*start_pfn = blockpfn;
f40d1e42b   Mel Gorman   mm: compaction: a...
631
632
633
634
635
  	/*
  	 * If strict isolation is requested by CMA then check that all the
  	 * pages requested were isolated. If there were any failures, 0 is
  	 * returned and CMA will fail.
  	 */
2af120bc0   Laura Abbott   mm/compaction: br...
636
  	if (strict && blockpfn < end_pfn)
f40d1e42b   Mel Gorman   mm: compaction: a...
637
  		total_isolated = 0;
7f354a548   David Rientjes   mm, compaction: a...
638
  	cc->total_free_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
639
  	if (total_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
640
  		count_compact_events(COMPACTISOLATED, total_isolated);
748446bb6   Mel Gorman   mm: compaction: m...
641
642
  	return total_isolated;
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
643
644
  /**
   * isolate_freepages_range() - isolate free pages.
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
645
   * @cc:        Compaction control structure.
85aa125f0   Michal Nazarewicz   mm: compaction: i...
646
647
648
649
650
651
652
653
654
655
656
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Non-free pages, invalid PFNs, or zone boundaries within the
   * [start_pfn, end_pfn) range are considered errors, cause function to
   * undo its actions and return zero.
   *
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater then end_pfn if end fell in a middle of
   * a free page).
   */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
657
  unsigned long
bb13ffeb9   Mel Gorman   mm: compaction: c...
658
659
  isolate_freepages_range(struct compact_control *cc,
  			unsigned long start_pfn, unsigned long end_pfn)
85aa125f0   Michal Nazarewicz   mm: compaction: i...
660
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
661
  	unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
662
  	LIST_HEAD(freelist);
7d49d8868   Vlastimil Babka   mm, compaction: r...
663
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
664
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
665
666
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
667
  	block_end_pfn = pageblock_end_pfn(pfn);
7d49d8868   Vlastimil Babka   mm, compaction: r...
668
669
  
  	for (; pfn < end_pfn; pfn += isolated,
e1409c325   Joonsoo Kim   mm/compaction: pa...
670
  				block_start_pfn = block_end_pfn,
7d49d8868   Vlastimil Babka   mm, compaction: r...
671
  				block_end_pfn += pageblock_nr_pages) {
e14c720ef   Vlastimil Babka   mm, compaction: r...
672
673
  		/* Protect pfn from changing by isolate_freepages_block */
  		unsigned long isolate_start_pfn = pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
674

85aa125f0   Michal Nazarewicz   mm: compaction: i...
675
  		block_end_pfn = min(block_end_pfn, end_pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
676
677
678
679
680
681
  		/*
  		 * pfn could pass the block_end_pfn if isolated freepage
  		 * is more than pageblock order. In this case, we adjust
  		 * scanning range to right one.
  		 */
  		if (pfn >= block_end_pfn) {
06b6640a3   Vlastimil Babka   mm, compaction: w...
682
683
  			block_start_pfn = pageblock_start_pfn(pfn);
  			block_end_pfn = pageblock_end_pfn(pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
684
685
  			block_end_pfn = min(block_end_pfn, end_pfn);
  		}
e1409c325   Joonsoo Kim   mm/compaction: pa...
686
687
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
7d49d8868   Vlastimil Babka   mm, compaction: r...
688
  			break;
e14c720ef   Vlastimil Babka   mm, compaction: r...
689
  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
4fca9730c   Mel Gorman   mm, compaction: s...
690
  					block_end_pfn, &freelist, 0, true);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
  
  		/*
  		 * In strict mode, isolate_freepages_block() returns 0 if
  		 * there are any holes in the block (ie. invalid PFNs or
  		 * non-free pages).
  		 */
  		if (!isolated)
  			break;
  
  		/*
  		 * If we managed to isolate pages, it is always (1 << n) *
  		 * pageblock_nr_pages for some non-negative n.  (Max order
  		 * page may span two pageblocks).
  		 */
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
706
  	/* __isolate_free_page() does not map the pages */
4469ab984   Mel Gorman   mm, compaction: r...
707
  	split_map_pages(&freelist);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
708
709
710
711
712
713
714
715
716
717
  
  	if (pfn < end_pfn) {
  		/* Loop terminated early, cleanup. */
  		release_freepages(&freelist);
  		return 0;
  	}
  
  	/* We don't use freelists for anything. */
  	return pfn;
  }
748446bb6   Mel Gorman   mm: compaction: m...
718
  /* Similar to reclaim, but different enough that they don't share logic */
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
719
  static bool too_many_isolated(pg_data_t *pgdat)
748446bb6   Mel Gorman   mm: compaction: m...
720
  {
bc6930457   Minchan Kim   mm: compaction: h...
721
  	unsigned long active, inactive, isolated;
748446bb6   Mel Gorman   mm: compaction: m...
722

5f438eee8   Andrey Ryabinin   mm/compaction: pa...
723
724
725
726
727
728
  	inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
  			node_page_state(pgdat, NR_INACTIVE_ANON);
  	active = node_page_state(pgdat, NR_ACTIVE_FILE) +
  			node_page_state(pgdat, NR_ACTIVE_ANON);
  	isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
  			node_page_state(pgdat, NR_ISOLATED_ANON);
748446bb6   Mel Gorman   mm: compaction: m...
729

bc6930457   Minchan Kim   mm: compaction: h...
730
  	return isolated > (inactive + active) / 2;
748446bb6   Mel Gorman   mm: compaction: m...
731
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
732
  /**
edc2ca612   Vlastimil Babka   mm, compaction: m...
733
734
   * isolate_migratepages_block() - isolate all migrate-able pages within
   *				  a single pageblock
2fe86e000   Michal Nazarewicz   mm: compaction: i...
735
   * @cc:		Compaction control structure.
edc2ca612   Vlastimil Babka   mm, compaction: m...
736
737
738
   * @low_pfn:	The first PFN to isolate
   * @end_pfn:	The one-past-the-last PFN to isolate, within same pageblock
   * @isolate_mode: Isolation mode to be used.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
739
740
   *
   * Isolate all pages that can be migrated from the range specified by
edc2ca612   Vlastimil Babka   mm, compaction: m...
741
742
743
744
   * [low_pfn, end_pfn). The range is expected to be within same pageblock.
   * Returns zero if there is a fatal signal pending, otherwise PFN of the
   * first page that was not scanned (which may be both less, equal to or more
   * than end_pfn).
2fe86e000   Michal Nazarewicz   mm: compaction: i...
745
   *
edc2ca612   Vlastimil Babka   mm, compaction: m...
746
747
748
   * The pages are isolated on cc->migratepages list (not required to be empty),
   * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
   * is neither read nor updated.
748446bb6   Mel Gorman   mm: compaction: m...
749
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
750
751
752
  static unsigned long
  isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  			unsigned long end_pfn, isolate_mode_t isolate_mode)
748446bb6   Mel Gorman   mm: compaction: m...
753
  {
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
754
  	pg_data_t *pgdat = cc->zone->zone_pgdat;
b7aba6984   Mel Gorman   mm: compaction: a...
755
  	unsigned long nr_scanned = 0, nr_isolated = 0;
fa9add641   Hugh Dickins   mm/memcg: apply a...
756
  	struct lruvec *lruvec;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
757
  	unsigned long flags = 0;
2a1402aa0   Mel Gorman   mm: compaction: a...
758
  	bool locked = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
759
  	struct page *page = NULL, *valid_page = NULL;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
760
  	unsigned long start_pfn = low_pfn;
fdd048e12   Vlastimil Babka   mm, compaction: s...
761
762
  	bool skip_on_failure = false;
  	unsigned long next_skip_pfn = 0;
e380bebe4   Mel Gorman   mm, compaction: k...
763
  	bool skip_updated = false;
748446bb6   Mel Gorman   mm: compaction: m...
764

748446bb6   Mel Gorman   mm: compaction: m...
765
766
767
768
769
  	/*
  	 * Ensure that there are not too many pages isolated from the LRU
  	 * list by either parallel reclaimers or compaction. If there are,
  	 * delay for some time until fewer pages are isolated
  	 */
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
770
  	while (unlikely(too_many_isolated(pgdat))) {
d20bdd571   Zi Yan   mm/compaction: st...
771
772
773
  		/* stop isolation if there are still pages not migrated */
  		if (cc->nr_migratepages)
  			return 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
774
  		/* async migration should just abort */
e0b9daeb4   David Rientjes   mm, compaction: e...
775
  		if (cc->mode == MIGRATE_ASYNC)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
776
  			return 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
777

748446bb6   Mel Gorman   mm: compaction: m...
778
779
780
  		congestion_wait(BLK_RW_ASYNC, HZ/10);
  
  		if (fatal_signal_pending(current))
2fe86e000   Michal Nazarewicz   mm: compaction: i...
781
  			return 0;
748446bb6   Mel Gorman   mm: compaction: m...
782
  	}
cf66f0700   Mel Gorman   mm, compaction: d...
783
  	cond_resched();
aeef4b838   David Rientjes   mm, compaction: t...
784

fdd048e12   Vlastimil Babka   mm, compaction: s...
785
786
787
788
  	if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
  		skip_on_failure = true;
  		next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  	}
748446bb6   Mel Gorman   mm: compaction: m...
789
  	/* Time to isolate some pages for migration */
748446bb6   Mel Gorman   mm: compaction: m...
790
  	for (; low_pfn < end_pfn; low_pfn++) {
29c0dde83   Vlastimil Babka   mm, compaction: a...
791

fdd048e12   Vlastimil Babka   mm, compaction: s...
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
  		if (skip_on_failure && low_pfn >= next_skip_pfn) {
  			/*
  			 * We have isolated all migration candidates in the
  			 * previous order-aligned block, and did not skip it due
  			 * to failure. We should migrate the pages now and
  			 * hopefully succeed compaction.
  			 */
  			if (nr_isolated)
  				break;
  
  			/*
  			 * We failed to isolate in the previous order-aligned
  			 * block. Set the new boundary to the end of the
  			 * current block. Note we can't simply increase
  			 * next_skip_pfn by 1 << order, as low_pfn might have
  			 * been incremented by a higher number due to skipping
  			 * a compound or a high-order buddy page in the
  			 * previous loop iteration.
  			 */
  			next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  		}
8b44d2791   Vlastimil Babka   mm, compaction: p...
813
814
  		/*
  		 * Periodically drop the lock (if held) regardless of its
670105a25   Mel Gorman   mm: compaction: a...
815
816
  		 * contention, to give chance to IRQs. Abort completely if
  		 * a fatal signal is pending.
8b44d2791   Vlastimil Babka   mm, compaction: p...
817
818
  		 */
  		if (!(low_pfn % SWAP_CLUSTER_MAX)
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
819
  		    && compact_unlock_should_abort(&pgdat->lru_lock,
670105a25   Mel Gorman   mm: compaction: a...
820
821
822
823
  					    flags, &locked, cc)) {
  			low_pfn = 0;
  			goto fatal_pending;
  		}
c67fe3752   Mel Gorman   mm: compaction: A...
824

748446bb6   Mel Gorman   mm: compaction: m...
825
  		if (!pfn_valid_within(low_pfn))
fdd048e12   Vlastimil Babka   mm, compaction: s...
826
  			goto isolate_fail;
b7aba6984   Mel Gorman   mm: compaction: a...
827
  		nr_scanned++;
748446bb6   Mel Gorman   mm: compaction: m...
828

748446bb6   Mel Gorman   mm: compaction: m...
829
  		page = pfn_to_page(low_pfn);
dc9086004   Mel Gorman   mm: compaction: c...
830

e380bebe4   Mel Gorman   mm, compaction: k...
831
832
833
834
835
836
837
838
839
840
841
  		/*
  		 * Check if the pageblock has already been marked skipped.
  		 * Only the aligned PFN is checked as the caller isolates
  		 * COMPACT_CLUSTER_MAX at a time so the second call must
  		 * not falsely conclude that the block should be skipped.
  		 */
  		if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
  			if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
  				low_pfn = end_pfn;
  				goto isolate_abort;
  			}
bb13ffeb9   Mel Gorman   mm: compaction: c...
842
  			valid_page = page;
e380bebe4   Mel Gorman   mm, compaction: k...
843
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
844

6c14466cc   Mel Gorman   mm: improve docum...
845
  		/*
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
846
847
848
849
  		 * Skip if free. We read page order here without zone lock
  		 * which is generally unsafe, but the race window is small and
  		 * the worst thing that can happen is that we skip some
  		 * potential isolation targets.
6c14466cc   Mel Gorman   mm: improve docum...
850
  		 */
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
851
  		if (PageBuddy(page)) {
ab130f910   Matthew Wilcox (Oracle)   mm: rename page_o...
852
  			unsigned long freepage_order = buddy_order_unsafe(page);
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
853
854
855
856
857
858
859
860
  
  			/*
  			 * Without lock, we cannot be sure that what we got is
  			 * a valid page order. Consider only values in the
  			 * valid order range to prevent low_pfn overflow.
  			 */
  			if (freepage_order > 0 && freepage_order < MAX_ORDER)
  				low_pfn += (1UL << freepage_order) - 1;
748446bb6   Mel Gorman   mm: compaction: m...
861
  			continue;
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
862
  		}
748446bb6   Mel Gorman   mm: compaction: m...
863

9927af740   Mel Gorman   mm: compaction: p...
864
  		/*
29c0dde83   Vlastimil Babka   mm, compaction: a...
865
  		 * Regardless of being on LRU, compound pages such as THP and
1da2f328f   Rik van Riel   mm,thp,compaction...
866
867
868
869
870
  		 * hugetlbfs are not to be compacted unless we are attempting
  		 * an allocation much larger than the huge page size (eg CMA).
  		 * We can potentially save a lot of iterations if we skip them
  		 * at once. The check is racy, but we can consider only valid
  		 * values and the only danger is skipping too much.
bc835011a   Andrea Arcangeli   thp: transhuge is...
871
  		 */
1da2f328f   Rik van Riel   mm,thp,compaction...
872
  		if (PageCompound(page) && !cc->alloc_contig) {
21dc7e023   David Rientjes   mm, compaction: p...
873
  			const unsigned int order = compound_order(page);
edc2ca612   Vlastimil Babka   mm, compaction: m...
874

d3c85bad8   Vlastimil Babka   mm, compaction: r...
875
  			if (likely(order < MAX_ORDER))
21dc7e023   David Rientjes   mm, compaction: p...
876
  				low_pfn += (1UL << order) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
877
  			goto isolate_fail;
2a1402aa0   Mel Gorman   mm: compaction: a...
878
  		}
bda807d44   Minchan Kim   mm: migrate: supp...
879
880
881
882
883
884
  		/*
  		 * Check may be lockless but that's ok as we recheck later.
  		 * It's possible to migrate LRU and non-lru movable pages.
  		 * Skip any other type of page
  		 */
  		if (!PageLRU(page)) {
bda807d44   Minchan Kim   mm: migrate: supp...
885
886
887
888
889
890
891
  			/*
  			 * __PageMovable can return false positive so we need
  			 * to verify it under page_lock.
  			 */
  			if (unlikely(__PageMovable(page)) &&
  					!PageIsolated(page)) {
  				if (locked) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
892
  					spin_unlock_irqrestore(&pgdat->lru_lock,
bda807d44   Minchan Kim   mm: migrate: supp...
893
894
895
  									flags);
  					locked = false;
  				}
9e5bcd610   Yisheng Xie   mm/migration: mak...
896
  				if (!isolate_movable_page(page, isolate_mode))
bda807d44   Minchan Kim   mm: migrate: supp...
897
898
  					goto isolate_success;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
899
  			goto isolate_fail;
bda807d44   Minchan Kim   mm: migrate: supp...
900
  		}
29c0dde83   Vlastimil Babka   mm, compaction: a...
901

119d6d59d   David Rientjes   mm, compaction: a...
902
903
904
905
906
907
908
  		/*
  		 * Migration will fail if an anonymous page is pinned in memory,
  		 * so avoid taking lru_lock and isolating it unnecessarily in an
  		 * admittedly racy check.
  		 */
  		if (!page_mapping(page) &&
  		    page_count(page) > page_mapcount(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
909
  			goto isolate_fail;
119d6d59d   David Rientjes   mm, compaction: a...
910

73e64c51a   Michal Hocko   mm, compaction: a...
911
912
913
914
915
916
  		/*
  		 * Only allow to migrate anonymous pages in GFP_NOFS context
  		 * because those do not depend on fs locks.
  		 */
  		if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
  			goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
917
918
  		/* If we already hold the lock, we can skip some rechecking */
  		if (!locked) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
919
  			locked = compact_lock_irqsave(&pgdat->lru_lock,
8b44d2791   Vlastimil Babka   mm, compaction: p...
920
  								&flags, cc);
e380bebe4   Mel Gorman   mm, compaction: k...
921

e380bebe4   Mel Gorman   mm, compaction: k...
922
923
924
925
926
927
  			/* Try get exclusive access under lock */
  			if (!skip_updated) {
  				skip_updated = true;
  				if (test_and_set_skip(cc, page, low_pfn))
  					goto isolate_abort;
  			}
2a1402aa0   Mel Gorman   mm: compaction: a...
928

29c0dde83   Vlastimil Babka   mm, compaction: a...
929
  			/* Recheck PageLRU and PageCompound under lock */
69b7189f1   Vlastimil Babka   mm, compaction: s...
930
  			if (!PageLRU(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
931
  				goto isolate_fail;
29c0dde83   Vlastimil Babka   mm, compaction: a...
932
933
934
935
936
937
  
  			/*
  			 * Page become compound since the non-locked check,
  			 * and it's on LRU. It can only be a THP so the order
  			 * is safe to read and it's 0 for tail pages.
  			 */
1da2f328f   Rik van Riel   mm,thp,compaction...
938
  			if (unlikely(PageCompound(page) && !cc->alloc_contig)) {
d8c6546b1   Matthew Wilcox (Oracle)   mm: introduce com...
939
  				low_pfn += compound_nr(page) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
940
  				goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
941
  			}
bc835011a   Andrea Arcangeli   thp: transhuge is...
942
  		}
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
943
  		lruvec = mem_cgroup_page_lruvec(page, pgdat);
fa9add641   Hugh Dickins   mm/memcg: apply a...
944

748446bb6   Mel Gorman   mm: compaction: m...
945
  		/* Try isolate the page */
edc2ca612   Vlastimil Babka   mm, compaction: m...
946
  		if (__isolate_lru_page(page, isolate_mode) != 0)
fdd048e12   Vlastimil Babka   mm, compaction: s...
947
  			goto isolate_fail;
748446bb6   Mel Gorman   mm: compaction: m...
948

1da2f328f   Rik van Riel   mm,thp,compaction...
949
950
951
  		/* The whole page is taken off the LRU; skip the tail pages. */
  		if (PageCompound(page))
  			low_pfn += compound_nr(page) - 1;
bc835011a   Andrea Arcangeli   thp: transhuge is...
952

748446bb6   Mel Gorman   mm: compaction: m...
953
  		/* Successfully isolated */
fa9add641   Hugh Dickins   mm/memcg: apply a...
954
  		del_page_from_lru_list(page, lruvec, page_lru(page));
1da2f328f   Rik van Riel   mm,thp,compaction...
955
  		mod_node_page_state(page_pgdat(page),
9de4f22a6   Huang Ying   mm: code cleanup ...
956
  				NR_ISOLATED_ANON + page_is_file_lru(page),
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
957
  				thp_nr_pages(page));
b6c750163   Joonsoo Kim   mm/compaction: cl...
958
959
  
  isolate_success:
fdd048e12   Vlastimil Babka   mm, compaction: s...
960
  		list_add(&page->lru, &cc->migratepages);
38935861d   Zi Yan   mm/compaction: co...
961
962
  		cc->nr_migratepages += compound_nr(page);
  		nr_isolated += compound_nr(page);
748446bb6   Mel Gorman   mm: compaction: m...
963

804d3121b   Mel Gorman   mm, compaction: a...
964
965
  		/*
  		 * Avoid isolating too much unless this block is being
cb2dcaf02   Mel Gorman   mm, compaction: f...
966
967
968
  		 * rescanned (e.g. dirty/writeback pages, parallel allocation)
  		 * or a lock is contended. For contention, isolate quickly to
  		 * potentially remove one source of contention.
804d3121b   Mel Gorman   mm, compaction: a...
969
  		 */
38935861d   Zi Yan   mm/compaction: co...
970
  		if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX &&
cb2dcaf02   Mel Gorman   mm, compaction: f...
971
  		    !cc->rescan && !cc->contended) {
31b8384a5   Hillf Danton   mm: compaction: p...
972
  			++low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
973
  			break;
31b8384a5   Hillf Danton   mm: compaction: p...
974
  		}
fdd048e12   Vlastimil Babka   mm, compaction: s...
975
976
977
978
979
980
981
982
983
984
985
986
987
  
  		continue;
  isolate_fail:
  		if (!skip_on_failure)
  			continue;
  
  		/*
  		 * We have isolated some pages, but then failed. Release them
  		 * instead of migrating, as we cannot form the cc->order buddy
  		 * page anyway.
  		 */
  		if (nr_isolated) {
  			if (locked) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
988
  				spin_unlock_irqrestore(&pgdat->lru_lock, flags);
fdd048e12   Vlastimil Babka   mm, compaction: s...
989
990
  				locked = false;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
991
992
  			putback_movable_pages(&cc->migratepages);
  			cc->nr_migratepages = 0;
fdd048e12   Vlastimil Babka   mm, compaction: s...
993
994
995
996
997
998
999
1000
1001
1002
1003
  			nr_isolated = 0;
  		}
  
  		if (low_pfn < next_skip_pfn) {
  			low_pfn = next_skip_pfn - 1;
  			/*
  			 * The check near the loop beginning would have updated
  			 * next_skip_pfn too, but this is a bit simpler.
  			 */
  			next_skip_pfn += 1UL << cc->order;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
1004
  	}
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
1005
1006
1007
1008
1009
1010
  	/*
  	 * The PageBuddy() check could have potentially brought us outside
  	 * the range to be scanned.
  	 */
  	if (unlikely(low_pfn > end_pfn))
  		low_pfn = end_pfn;
e380bebe4   Mel Gorman   mm, compaction: k...
1011
  isolate_abort:
c67fe3752   Mel Gorman   mm: compaction: A...
1012
  	if (locked)
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
1013
  		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
748446bb6   Mel Gorman   mm: compaction: m...
1014

50b5b094e   Vlastimil Babka   mm: compaction: d...
1015
  	/*
804d3121b   Mel Gorman   mm, compaction: a...
1016
1017
1018
1019
1020
1021
  	 * Updated the cached scanner pfn once the pageblock has been scanned
  	 * Pages will either be migrated in which case there is no point
  	 * scanning in the near future or migration failed in which case the
  	 * failure reason may persist. The block is marked for skipping if
  	 * there were no pages isolated in the block or if the block is
  	 * rescanned twice in a row.
50b5b094e   Vlastimil Babka   mm: compaction: d...
1022
  	 */
804d3121b   Mel Gorman   mm, compaction: a...
1023
  	if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
e380bebe4   Mel Gorman   mm, compaction: k...
1024
1025
1026
1027
  		if (valid_page && !skip_updated)
  			set_pageblock_skip(valid_page);
  		update_cached_migrate(cc, low_pfn);
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
1028

e34d85f0e   Joonsoo Kim   mm/compaction: pr...
1029
1030
  	trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
  						nr_scanned, nr_isolated);
b7aba6984   Mel Gorman   mm: compaction: a...
1031

670105a25   Mel Gorman   mm: compaction: a...
1032
  fatal_pending:
7f354a548   David Rientjes   mm, compaction: a...
1033
  	cc->total_migrate_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
1034
  	if (nr_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
1035
  		count_compact_events(COMPACTISOLATED, nr_isolated);
397487db6   Mel Gorman   mm: compaction: A...
1036

2fe86e000   Michal Nazarewicz   mm: compaction: i...
1037
1038
  	return low_pfn;
  }
edc2ca612   Vlastimil Babka   mm, compaction: m...
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
  /**
   * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
   * @cc:        Compaction control structure.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Returns zero if isolation fails fatally due to e.g. pending signal.
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater than end_pfn if end fell in a middle of a THP page).
   */
  unsigned long
  isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
  							unsigned long end_pfn)
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1053
  	unsigned long pfn, block_start_pfn, block_end_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1054
1055
1056
  
  	/* Scan block by block. First and last block may be incomplete */
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1057
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1058
1059
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1060
  	block_end_pfn = pageblock_end_pfn(pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1061
1062
  
  	for (; pfn < end_pfn; pfn = block_end_pfn,
e1409c325   Joonsoo Kim   mm/compaction: pa...
1063
  				block_start_pfn = block_end_pfn,
edc2ca612   Vlastimil Babka   mm, compaction: m...
1064
1065
1066
  				block_end_pfn += pageblock_nr_pages) {
  
  		block_end_pfn = min(block_end_pfn, end_pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1067
1068
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1069
1070
1071
1072
  			continue;
  
  		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
  							ISOLATE_UNEVICTABLE);
14af4a5e9   Hugh Dickins   mm, cma: prevent ...
1073
  		if (!pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1074
  			break;
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
1075

38935861d   Zi Yan   mm/compaction: co...
1076
  		if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
1077
  			break;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1078
  	}
edc2ca612   Vlastimil Babka   mm, compaction: m...
1079
1080
1081
  
  	return pfn;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1082
1083
  #endif /* CONFIG_COMPACTION || CONFIG_CMA */
  #ifdef CONFIG_COMPACTION
018e9a49a   Andrew Morton   mm/compaction.c: ...
1084

b682debd9   Vlastimil Babka   mm, compaction: c...
1085
1086
1087
  static bool suitable_migration_source(struct compact_control *cc,
  							struct page *page)
  {
282722b0d   Vlastimil Babka   mm, compaction: r...
1088
  	int block_mt;
9bebefd59   Mel Gorman   mm, compaction: c...
1089
1090
  	if (pageblock_skip_persistent(page))
  		return false;
282722b0d   Vlastimil Babka   mm, compaction: r...
1091
  	if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
b682debd9   Vlastimil Babka   mm, compaction: c...
1092
  		return true;
282722b0d   Vlastimil Babka   mm, compaction: r...
1093
1094
1095
1096
1097
1098
  	block_mt = get_pageblock_migratetype(page);
  
  	if (cc->migratetype == MIGRATE_MOVABLE)
  		return is_migrate_movable(block_mt);
  	else
  		return block_mt == cc->migratetype;
b682debd9   Vlastimil Babka   mm, compaction: c...
1099
  }
018e9a49a   Andrew Morton   mm/compaction.c: ...
1100
  /* Returns true if the page is within a block suitable for migration to */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1101
1102
  static bool suitable_migration_target(struct compact_control *cc,
  							struct page *page)
018e9a49a   Andrew Morton   mm/compaction.c: ...
1103
1104
1105
1106
1107
1108
1109
1110
  {
  	/* If the page is a large free page, then disallow migration */
  	if (PageBuddy(page)) {
  		/*
  		 * We are checking page_order without zone->lock taken. But
  		 * the only small danger is that we skip a potentially suitable
  		 * pageblock, so it's not worth to check order for valid range.
  		 */
ab130f910   Matthew Wilcox (Oracle)   mm: rename page_o...
1111
  		if (buddy_order_unsafe(page) >= pageblock_order)
018e9a49a   Andrew Morton   mm/compaction.c: ...
1112
1113
  			return false;
  	}
1ef36db2a   Yisheng Xie   mm/compaction: ig...
1114
1115
  	if (cc->ignore_block_suitable)
  		return true;
018e9a49a   Andrew Morton   mm/compaction.c: ...
1116
  	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
b682debd9   Vlastimil Babka   mm, compaction: c...
1117
  	if (is_migrate_movable(get_pageblock_migratetype(page)))
018e9a49a   Andrew Morton   mm/compaction.c: ...
1118
1119
1120
1121
1122
  		return true;
  
  	/* Otherwise skip the block */
  	return false;
  }
70b44595e   Mel Gorman   mm, compaction: u...
1123
1124
1125
  static inline unsigned int
  freelist_scan_limit(struct compact_control *cc)
  {
dd7ef7bd1   Qian Cai   mm/compaction.c: ...
1126
1127
1128
  	unsigned short shift = BITS_PER_LONG - 1;
  
  	return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1;
70b44595e   Mel Gorman   mm, compaction: u...
1129
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1130
  /*
f2849aa09   Vlastimil Babka   mm, compaction: m...
1131
1132
1133
1134
1135
1136
1137
1138
   * Test whether the free scanner has reached the same or lower pageblock than
   * the migration scanner, and compaction should thus terminate.
   */
  static inline bool compact_scanners_met(struct compact_control *cc)
  {
  	return (cc->free_pfn >> pageblock_order)
  		<= (cc->migrate_pfn >> pageblock_order);
  }
5a811889d   Mel Gorman   mm, compaction: u...
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
  /*
   * Used when scanning for a suitable migration target which scans freelists
   * in reverse. Reorders the list such as the unscanned pages are scanned
   * first on the next iteration of the free scanner
   */
  static void
  move_freelist_head(struct list_head *freelist, struct page *freepage)
  {
  	LIST_HEAD(sublist);
  
  	if (!list_is_last(freelist, &freepage->lru)) {
  		list_cut_before(&sublist, freelist, &freepage->lru);
  		if (!list_empty(&sublist))
  			list_splice_tail(&sublist, freelist);
  	}
  }
  
  /*
   * Similar to move_freelist_head except used by the migration scanner
   * when scanning forward. It's possible for these list operations to
   * move against each other if they search the free list exactly in
   * lockstep.
   */
70b44595e   Mel Gorman   mm, compaction: u...
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
  static void
  move_freelist_tail(struct list_head *freelist, struct page *freepage)
  {
  	LIST_HEAD(sublist);
  
  	if (!list_is_first(freelist, &freepage->lru)) {
  		list_cut_position(&sublist, freelist, &freepage->lru);
  		if (!list_empty(&sublist))
  			list_splice_tail(&sublist, freelist);
  	}
  }
5a811889d   Mel Gorman   mm, compaction: u...
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
  static void
  fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
  {
  	unsigned long start_pfn, end_pfn;
  	struct page *page = pfn_to_page(pfn);
  
  	/* Do not search around if there are enough pages already */
  	if (cc->nr_freepages >= cc->nr_migratepages)
  		return;
  
  	/* Minimise scanning during async compaction */
  	if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
  		return;
  
  	/* Pageblock boundaries */
  	start_pfn = pageblock_start_pfn(pfn);
60fce36af   Mel Gorman   mm/compaction.c: ...
1189
  	end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1;
5a811889d   Mel Gorman   mm, compaction: u...
1190
1191
1192
  
  	/* Scan before */
  	if (start_pfn != pfn) {
4fca9730c   Mel Gorman   mm, compaction: s...
1193
  		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
5a811889d   Mel Gorman   mm, compaction: u...
1194
1195
1196
1197
1198
1199
  		if (cc->nr_freepages >= cc->nr_migratepages)
  			return;
  	}
  
  	/* Scan after */
  	start_pfn = pfn + nr_isolated;
60fce36af   Mel Gorman   mm/compaction.c: ...
1200
  	if (start_pfn < end_pfn)
4fca9730c   Mel Gorman   mm, compaction: s...
1201
  		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
5a811889d   Mel Gorman   mm, compaction: u...
1202
1203
1204
1205
1206
  
  	/* Skip this pageblock in the future as it's full or nearly full */
  	if (cc->nr_freepages < cc->nr_migratepages)
  		set_pageblock_skip(page);
  }
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
  /* Search orders in round-robin fashion */
  static int next_search_order(struct compact_control *cc, int order)
  {
  	order--;
  	if (order < 0)
  		order = cc->order - 1;
  
  	/* Search wrapped around? */
  	if (order == cc->search_order) {
  		cc->search_order--;
  		if (cc->search_order < 0)
  			cc->search_order = cc->order - 1;
  		return -1;
  	}
  
  	return order;
  }
5a811889d   Mel Gorman   mm, compaction: u...
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
  static unsigned long
  fast_isolate_freepages(struct compact_control *cc)
  {
  	unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
  	unsigned int nr_scanned = 0;
  	unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
  	unsigned long nr_isolated = 0;
  	unsigned long distance;
  	struct page *page = NULL;
  	bool scan_start = false;
  	int order;
  
  	/* Full compaction passes in a negative order */
  	if (cc->order <= 0)
  		return cc->free_pfn;
  
  	/*
  	 * If starting the scan, use a deeper search and use the highest
  	 * PFN found if a suitable one is not found.
  	 */
e332f741a   Mel Gorman   mm, compaction: b...
1244
  	if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
5a811889d   Mel Gorman   mm, compaction: u...
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
  		limit = pageblock_nr_pages >> 1;
  		scan_start = true;
  	}
  
  	/*
  	 * Preferred point is in the top quarter of the scan space but take
  	 * a pfn from the top half if the search is problematic.
  	 */
  	distance = (cc->free_pfn - cc->migrate_pfn);
  	low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
  	min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
  
  	if (WARN_ON_ONCE(min_pfn > low_pfn))
  		low_pfn = min_pfn;
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1259
1260
1261
1262
1263
1264
1265
1266
1267
  	/*
  	 * Search starts from the last successful isolation order or the next
  	 * order to search after a previous failure
  	 */
  	cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order);
  
  	for (order = cc->search_order;
  	     !page && order >= 0;
  	     order = next_search_order(cc, order)) {
5a811889d   Mel Gorman   mm, compaction: u...
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
  		struct free_area *area = &cc->zone->free_area[order];
  		struct list_head *freelist;
  		struct page *freepage;
  		unsigned long flags;
  		unsigned int order_scanned = 0;
  
  		if (!area->nr_free)
  			continue;
  
  		spin_lock_irqsave(&cc->zone->lock, flags);
  		freelist = &area->free_list[MIGRATE_MOVABLE];
  		list_for_each_entry_reverse(freepage, freelist, lru) {
  			unsigned long pfn;
  
  			order_scanned++;
  			nr_scanned++;
  			pfn = page_to_pfn(freepage);
  
  			if (pfn >= highest)
  				highest = pageblock_start_pfn(pfn);
  
  			if (pfn >= low_pfn) {
  				cc->fast_search_fail = 0;
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1291
  				cc->search_order = order;
5a811889d   Mel Gorman   mm, compaction: u...
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
  				page = freepage;
  				break;
  			}
  
  			if (pfn >= min_pfn && pfn > high_pfn) {
  				high_pfn = pfn;
  
  				/* Shorten the scan if a candidate is found */
  				limit >>= 1;
  			}
  
  			if (order_scanned >= limit)
  				break;
  		}
  
  		/* Use a minimum pfn if a preferred one was not found */
  		if (!page && high_pfn) {
  			page = pfn_to_page(high_pfn);
  
  			/* Update freepage for the list reorder below */
  			freepage = page;
  		}
  
  		/* Reorder to so a future search skips recent pages */
  		move_freelist_head(freelist, freepage);
  
  		/* Isolate the page if available */
  		if (page) {
  			if (__isolate_free_page(page, order)) {
  				set_page_private(page, order);
  				nr_isolated = 1 << order;
  				cc->nr_freepages += nr_isolated;
  				list_add_tail(&page->lru, &cc->freepages);
  				count_compact_events(COMPACTISOLATED, nr_isolated);
  			} else {
  				/* If isolation fails, abort the search */
5b56d996d   Qian Cai   mm/compaction.c: ...
1328
  				order = cc->search_order + 1;
5a811889d   Mel Gorman   mm, compaction: u...
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
  				page = NULL;
  			}
  		}
  
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
  
  		/*
  		 * Smaller scan on next order so the total scan ig related
  		 * to freelist_scan_limit.
  		 */
  		if (order_scanned >= limit)
  			limit = min(1U, limit >> 1);
  	}
  
  	if (!page) {
  		cc->fast_search_fail++;
  		if (scan_start) {
  			/*
  			 * Use the highest PFN found above min. If one was
f38677551   Ethon Paul   mm/compaction: fi...
1348
  			 * not found, be pessimistic for direct compaction
5a811889d   Mel Gorman   mm, compaction: u...
1349
1350
1351
1352
1353
1354
  			 * and use the min mark.
  			 */
  			if (highest) {
  				page = pfn_to_page(highest);
  				cc->free_pfn = highest;
  			} else {
e577c8b64   Suzuki K Poulose   mm, compaction: m...
1355
  				if (cc->direct_compaction && pfn_valid(min_pfn)) {
73a6e474c   Baoquan He   mm: memmap_init: ...
1356
1357
1358
  					page = pageblock_pfn_to_page(min_pfn,
  						pageblock_end_pfn(min_pfn),
  						cc->zone);
5a811889d   Mel Gorman   mm, compaction: u...
1359
1360
1361
1362
1363
  					cc->free_pfn = min_pfn;
  				}
  			}
  		}
  	}
d097a6f63   Mel Gorman   mm, compaction: r...
1364
1365
  	if (highest && highest >= cc->zone->compact_cached_free_pfn) {
  		highest -= pageblock_nr_pages;
5a811889d   Mel Gorman   mm, compaction: u...
1366
  		cc->zone->compact_cached_free_pfn = highest;
d097a6f63   Mel Gorman   mm, compaction: r...
1367
  	}
5a811889d   Mel Gorman   mm, compaction: u...
1368
1369
1370
1371
1372
1373
1374
1375
1376
  
  	cc->total_free_scanned += nr_scanned;
  	if (!page)
  		return cc->free_pfn;
  
  	low_pfn = page_to_pfn(page);
  	fast_isolate_around(cc, low_pfn, nr_isolated);
  	return low_pfn;
  }
f2849aa09   Vlastimil Babka   mm, compaction: m...
1377
  /*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1378
1379
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1380
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
1381
  static void isolate_freepages(struct compact_control *cc)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1382
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
1383
  	struct zone *zone = cc->zone;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1384
  	struct page *page;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1385
  	unsigned long block_start_pfn;	/* start of current pageblock */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1386
  	unsigned long isolate_start_pfn; /* exact pfn we start at */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1387
1388
  	unsigned long block_end_pfn;	/* end of current pageblock */
  	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1389
  	struct list_head *freelist = &cc->freepages;
4fca9730c   Mel Gorman   mm, compaction: s...
1390
  	unsigned int stride;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1391

5a811889d   Mel Gorman   mm, compaction: u...
1392
1393
1394
1395
  	/* Try a small search of the free lists for a candidate */
  	isolate_start_pfn = fast_isolate_freepages(cc);
  	if (cc->nr_freepages)
  		goto splitmap;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1396
1397
  	/*
  	 * Initialise the free scanner. The starting point is where we last
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1398
  	 * successfully isolated from, zone-cached value, or the end of the
e14c720ef   Vlastimil Babka   mm, compaction: r...
1399
1400
  	 * zone when isolating for the first time. For looping we also need
  	 * this pfn aligned down to the pageblock boundary, because we do
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1401
1402
  	 * block_start_pfn -= pageblock_nr_pages in the for loop.
  	 * For ending point, take care when isolating in last pageblock of a
a1c1dbeb2   Randy Dunlap   mm/compaction.c: ...
1403
  	 * zone which ends in the middle of a pageblock.
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1404
1405
  	 * The low boundary is the end of the pageblock the migration scanner
  	 * is using.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1406
  	 */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1407
  	isolate_start_pfn = cc->free_pfn;
5a811889d   Mel Gorman   mm, compaction: u...
1408
  	block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1409
1410
  	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
  						zone_end_pfn(zone));
06b6640a3   Vlastimil Babka   mm, compaction: w...
1411
  	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
4fca9730c   Mel Gorman   mm, compaction: s...
1412
  	stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1413

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1414
  	/*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1415
1416
1417
1418
  	 * Isolate free pages until enough are available to migrate the
  	 * pages on cc->migratepages. We stop searching if the migrate
  	 * and free page scanners meet or enough free pages are isolated.
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1419
  	for (; block_start_pfn >= low_pfn;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1420
  				block_end_pfn = block_start_pfn,
e14c720ef   Vlastimil Babka   mm, compaction: r...
1421
1422
  				block_start_pfn -= pageblock_nr_pages,
  				isolate_start_pfn = block_start_pfn) {
4fca9730c   Mel Gorman   mm, compaction: s...
1423
  		unsigned long nr_isolated;
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1424
1425
  		/*
  		 * This can iterate a massively long zone without finding any
cb810ad29   Mel Gorman   mm, compaction: r...
1426
  		 * suitable migration targets, so periodically check resched.
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1427
  		 */
cb810ad29   Mel Gorman   mm, compaction: r...
1428
  		if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
cf66f0700   Mel Gorman   mm, compaction: d...
1429
  			cond_resched();
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1430

7d49d8868   Vlastimil Babka   mm, compaction: r...
1431
1432
1433
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
  		if (!page)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1434
1435
1436
  			continue;
  
  		/* Check the block is suitable for migration */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1437
  		if (!suitable_migration_target(cc, page))
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1438
  			continue;
68e3e9262   Linus Torvalds   Revert "mm: compa...
1439

bb13ffeb9   Mel Gorman   mm: compaction: c...
1440
1441
1442
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
e14c720ef   Vlastimil Babka   mm, compaction: r...
1443
  		/* Found a block suitable for isolating free pages from. */
4fca9730c   Mel Gorman   mm, compaction: s...
1444
1445
  		nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
  					block_end_pfn, freelist, stride, false);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1446

d097a6f63   Mel Gorman   mm, compaction: r...
1447
1448
1449
  		/* Update the skip hint if the full pageblock was scanned */
  		if (isolate_start_pfn == block_end_pfn)
  			update_pageblock_skip(cc, page, block_start_pfn);
cb2dcaf02   Mel Gorman   mm, compaction: f...
1450
1451
  		/* Are enough freepages isolated? */
  		if (cc->nr_freepages >= cc->nr_migratepages) {
a46cbf3bc   David Rientjes   mm, compaction: p...
1452
1453
1454
1455
1456
  			if (isolate_start_pfn >= block_end_pfn) {
  				/*
  				 * Restart at previous pageblock if more
  				 * freepages can be isolated next time.
  				 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1457
1458
  				isolate_start_pfn =
  					block_start_pfn - pageblock_nr_pages;
a46cbf3bc   David Rientjes   mm, compaction: p...
1459
  			}
be9765722   Vlastimil Babka   mm, compaction: p...
1460
  			break;
a46cbf3bc   David Rientjes   mm, compaction: p...
1461
  		} else if (isolate_start_pfn < block_end_pfn) {
f5f61a320   Vlastimil Babka   mm, compaction: s...
1462
  			/*
a46cbf3bc   David Rientjes   mm, compaction: p...
1463
1464
  			 * If isolation failed early, do not continue
  			 * needlessly.
f5f61a320   Vlastimil Babka   mm, compaction: s...
1465
  			 */
a46cbf3bc   David Rientjes   mm, compaction: p...
1466
  			break;
f5f61a320   Vlastimil Babka   mm, compaction: s...
1467
  		}
4fca9730c   Mel Gorman   mm, compaction: s...
1468
1469
1470
1471
1472
1473
1474
  
  		/* Adjust stride depending on isolation */
  		if (nr_isolated) {
  			stride = 1;
  			continue;
  		}
  		stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1475
  	}
7ed695e06   Vlastimil Babka   mm: compaction: d...
1476
  	/*
f5f61a320   Vlastimil Babka   mm, compaction: s...
1477
1478
1479
1480
  	 * Record where the free scanner will restart next time. Either we
  	 * broke from the loop and set isolate_start_pfn based on the last
  	 * call to isolate_freepages_block(), or we met the migration scanner
  	 * and the loop terminated due to isolate_start_pfn < low_pfn
7ed695e06   Vlastimil Babka   mm: compaction: d...
1481
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1482
  	cc->free_pfn = isolate_start_pfn;
5a811889d   Mel Gorman   mm, compaction: u...
1483
1484
1485
1486
  
  splitmap:
  	/* __isolate_free_page() does not map the pages */
  	split_map_pages(freelist);
748446bb6   Mel Gorman   mm: compaction: m...
1487
1488
1489
1490
1491
1492
1493
  }
  
  /*
   * This is a migrate-callback that "allocates" freepages by taking pages
   * from the isolated freelists in the block we are migrating to.
   */
  static struct page *compaction_alloc(struct page *migratepage,
666feb21a   Michal Hocko   mm, migrate: remo...
1494
  					unsigned long data)
748446bb6   Mel Gorman   mm: compaction: m...
1495
1496
1497
  {
  	struct compact_control *cc = (struct compact_control *)data;
  	struct page *freepage;
748446bb6   Mel Gorman   mm: compaction: m...
1498
  	if (list_empty(&cc->freepages)) {
cb2dcaf02   Mel Gorman   mm, compaction: f...
1499
  		isolate_freepages(cc);
748446bb6   Mel Gorman   mm: compaction: m...
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
  
  		if (list_empty(&cc->freepages))
  			return NULL;
  	}
  
  	freepage = list_entry(cc->freepages.next, struct page, lru);
  	list_del(&freepage->lru);
  	cc->nr_freepages--;
  
  	return freepage;
  }
  
  /*
d53aea3d4   David Rientjes   mm, compaction: r...
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
   * This is a migrate-callback that "frees" freepages back to the isolated
   * freelist.  All pages on the freelist are from the same zone, so there is no
   * special handling needed for NUMA.
   */
  static void compaction_free(struct page *page, unsigned long data)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  
  	list_add(&page->lru, &cc->freepages);
  	cc->nr_freepages++;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1524
1525
1526
1527
1528
1529
1530
1531
  /* possible outcome of isolate_migratepages */
  typedef enum {
  	ISOLATE_ABORT,		/* Abort compaction now */
  	ISOLATE_NONE,		/* No pages isolated, continue scanning */
  	ISOLATE_SUCCESS,	/* Pages isolated, migrate */
  } isolate_migrate_t;
  
  /*
5bbe3547a   Eric B Munson   mm: allow compact...
1532
1533
1534
   * Allow userspace to control policy on scanning the unevictable LRU for
   * compactable pages.
   */
6923aa0d8   Sebastian Andrzej Siewior   mm/compaction: Di...
1535
1536
1537
  #ifdef CONFIG_PREEMPT_RT
  int sysctl_compact_unevictable_allowed __read_mostly = 0;
  #else
5bbe3547a   Eric B Munson   mm: allow compact...
1538
  int sysctl_compact_unevictable_allowed __read_mostly = 1;
6923aa0d8   Sebastian Andrzej Siewior   mm/compaction: Di...
1539
  #endif
5bbe3547a   Eric B Munson   mm: allow compact...
1540

70b44595e   Mel Gorman   mm, compaction: u...
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
  static inline void
  update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
  {
  	if (cc->fast_start_pfn == ULONG_MAX)
  		return;
  
  	if (!cc->fast_start_pfn)
  		cc->fast_start_pfn = pfn;
  
  	cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
  }
  
  static inline unsigned long
  reinit_migrate_pfn(struct compact_control *cc)
  {
  	if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
  		return cc->migrate_pfn;
  
  	cc->migrate_pfn = cc->fast_start_pfn;
  	cc->fast_start_pfn = ULONG_MAX;
  
  	return cc->migrate_pfn;
  }
  
  /*
   * Briefly search the free lists for a migration source that already has
   * some free pages to reduce the number of pages that need migration
   * before a pageblock is free.
   */
  static unsigned long fast_find_migrateblock(struct compact_control *cc)
  {
  	unsigned int limit = freelist_scan_limit(cc);
  	unsigned int nr_scanned = 0;
  	unsigned long distance;
  	unsigned long pfn = cc->migrate_pfn;
  	unsigned long high_pfn;
  	int order;
  
  	/* Skip hints are relied on to avoid repeats on the fast search */
  	if (cc->ignore_skip_hint)
  		return pfn;
  
  	/*
  	 * If the migrate_pfn is not at the start of a zone or the start
  	 * of a pageblock then assume this is a continuation of a previous
  	 * scan restarted due to COMPACT_CLUSTER_MAX.
  	 */
  	if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
  		return pfn;
  
  	/*
  	 * For smaller orders, just linearly scan as the number of pages
  	 * to migrate should be relatively small and does not necessarily
  	 * justify freeing up a large block for a small allocation.
  	 */
  	if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
  		return pfn;
  
  	/*
  	 * Only allow kcompactd and direct requests for movable pages to
  	 * quickly clear out a MOVABLE pageblock for allocation. This
  	 * reduces the risk that a large movable pageblock is freed for
  	 * an unmovable/reclaimable small allocation.
  	 */
  	if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
  		return pfn;
  
  	/*
  	 * When starting the migration scanner, pick any pageblock within the
  	 * first half of the search space. Otherwise try and pick a pageblock
  	 * within the first eighth to reduce the chances that a migration
  	 * target later becomes a source.
  	 */
  	distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
  	if (cc->migrate_pfn != cc->zone->zone_start_pfn)
  		distance >>= 2;
  	high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
  
  	for (order = cc->order - 1;
  	     order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
  	     order--) {
  		struct free_area *area = &cc->zone->free_area[order];
  		struct list_head *freelist;
  		unsigned long flags;
  		struct page *freepage;
  
  		if (!area->nr_free)
  			continue;
  
  		spin_lock_irqsave(&cc->zone->lock, flags);
  		freelist = &area->free_list[MIGRATE_MOVABLE];
  		list_for_each_entry(freepage, freelist, lru) {
  			unsigned long free_pfn;
  
  			nr_scanned++;
  			free_pfn = page_to_pfn(freepage);
  			if (free_pfn < high_pfn) {
70b44595e   Mel Gorman   mm, compaction: u...
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
  				/*
  				 * Avoid if skipped recently. Ideally it would
  				 * move to the tail but even safe iteration of
  				 * the list assumes an entry is deleted, not
  				 * reordered.
  				 */
  				if (get_pageblock_skip(freepage)) {
  					if (list_is_last(freelist, &freepage->lru))
  						break;
  
  					continue;
  				}
  
  				/* Reorder to so a future search skips recent pages */
  				move_freelist_tail(freelist, freepage);
e380bebe4   Mel Gorman   mm, compaction: k...
1653
  				update_fast_start_pfn(cc, free_pfn);
70b44595e   Mel Gorman   mm, compaction: u...
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
  				pfn = pageblock_start_pfn(free_pfn);
  				cc->fast_search_fail = 0;
  				set_pageblock_skip(freepage);
  				break;
  			}
  
  			if (nr_scanned >= limit) {
  				cc->fast_search_fail++;
  				move_freelist_tail(freelist, freepage);
  				break;
  			}
  		}
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
  	}
  
  	cc->total_migrate_scanned += nr_scanned;
  
  	/*
  	 * If fast scanning failed then use a cached entry for a page block
  	 * that had free pages as the basis for starting a linear scan.
  	 */
  	if (pfn == cc->migrate_pfn)
  		pfn = reinit_migrate_pfn(cc);
  
  	return pfn;
  }
5bbe3547a   Eric B Munson   mm: allow compact...
1680
  /*
edc2ca612   Vlastimil Babka   mm, compaction: m...
1681
1682
1683
   * Isolate all pages that can be migrated from the first suitable block,
   * starting at the block pointed to by the migrate scanner pfn within
   * compact_control.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1684
   */
32aaf0553   Pengfei Li   mm/compaction.c: ...
1685
  static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1686
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1687
1688
1689
  	unsigned long block_start_pfn;
  	unsigned long block_end_pfn;
  	unsigned long low_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1690
1691
  	struct page *page;
  	const isolate_mode_t isolate_mode =
5bbe3547a   Eric B Munson   mm: allow compact...
1692
  		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
1d2047fef   Hugh Dickins   mm, compaction: d...
1693
  		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
70b44595e   Mel Gorman   mm, compaction: u...
1694
  	bool fast_find_block;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1695

edc2ca612   Vlastimil Babka   mm, compaction: m...
1696
1697
  	/*
  	 * Start at where we last stopped, or beginning of the zone as
70b44595e   Mel Gorman   mm, compaction: u...
1698
1699
  	 * initialized by compact_zone(). The first failure will use
  	 * the lowest PFN as the starting point for linear scanning.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1700
  	 */
70b44595e   Mel Gorman   mm, compaction: u...
1701
  	low_pfn = fast_find_migrateblock(cc);
06b6640a3   Vlastimil Babka   mm, compaction: w...
1702
  	block_start_pfn = pageblock_start_pfn(low_pfn);
32aaf0553   Pengfei Li   mm/compaction.c: ...
1703
1704
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1705

70b44595e   Mel Gorman   mm, compaction: u...
1706
1707
1708
1709
1710
1711
  	/*
  	 * fast_find_migrateblock marks a pageblock skipped so to avoid
  	 * the isolation_suitable check below, check whether the fast
  	 * search was successful.
  	 */
  	fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1712
  	/* Only scan within a pageblock boundary */
06b6640a3   Vlastimil Babka   mm, compaction: w...
1713
  	block_end_pfn = pageblock_end_pfn(low_pfn);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1714

edc2ca612   Vlastimil Babka   mm, compaction: m...
1715
1716
1717
1718
  	/*
  	 * Iterate over whole pageblocks until we find the first suitable.
  	 * Do not cross the free scanner.
  	 */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1719
  	for (; block_end_pfn <= cc->free_pfn;
70b44595e   Mel Gorman   mm, compaction: u...
1720
  			fast_find_block = false,
e1409c325   Joonsoo Kim   mm/compaction: pa...
1721
1722
1723
  			low_pfn = block_end_pfn,
  			block_start_pfn = block_end_pfn,
  			block_end_pfn += pageblock_nr_pages) {
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1724

edc2ca612   Vlastimil Babka   mm, compaction: m...
1725
1726
1727
  		/*
  		 * This can potentially iterate a massively long zone with
  		 * many pageblocks unsuitable, so periodically check if we
cb810ad29   Mel Gorman   mm, compaction: r...
1728
  		 * need to schedule.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1729
  		 */
cb810ad29   Mel Gorman   mm, compaction: r...
1730
  		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
cf66f0700   Mel Gorman   mm, compaction: d...
1731
  			cond_resched();
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1732

32aaf0553   Pengfei Li   mm/compaction.c: ...
1733
1734
  		page = pageblock_pfn_to_page(block_start_pfn,
  						block_end_pfn, cc->zone);
7d49d8868   Vlastimil Babka   mm, compaction: r...
1735
  		if (!page)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1736
  			continue;
e380bebe4   Mel Gorman   mm, compaction: k...
1737
1738
1739
1740
1741
1742
1743
1744
1745
  		/*
  		 * If isolation recently failed, do not retry. Only check the
  		 * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock
  		 * to be visited multiple times. Assume skip was checked
  		 * before making it "skip" so other compaction instances do
  		 * not scan the same block.
  		 */
  		if (IS_ALIGNED(low_pfn, pageblock_nr_pages) &&
  		    !fast_find_block && !isolation_suitable(cc, page))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1746
1747
1748
  			continue;
  
  		/*
9bebefd59   Mel Gorman   mm, compaction: c...
1749
1750
1751
1752
1753
1754
  		 * For async compaction, also only scan in MOVABLE blocks
  		 * without huge pages. Async compaction is optimistic to see
  		 * if the minimum amount of work satisfies the allocation.
  		 * The cached PFN is updated as it's possible that all
  		 * remaining blocks between source and target are unsuitable
  		 * and the compaction scanners fail to meet.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1755
  		 */
9bebefd59   Mel Gorman   mm, compaction: c...
1756
1757
  		if (!suitable_migration_source(cc, page)) {
  			update_cached_migrate(cc, block_end_pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1758
  			continue;
9bebefd59   Mel Gorman   mm, compaction: c...
1759
  		}
edc2ca612   Vlastimil Babka   mm, compaction: m...
1760
1761
  
  		/* Perform the isolation */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1762
1763
  		low_pfn = isolate_migratepages_block(cc, low_pfn,
  						block_end_pfn, isolate_mode);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1764

cb2dcaf02   Mel Gorman   mm, compaction: f...
1765
  		if (!low_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1766
1767
1768
1769
1770
1771
1772
1773
1774
  			return ISOLATE_ABORT;
  
  		/*
  		 * Either we isolated something and proceed with migration. Or
  		 * we failed and compact_zone should decide if we should
  		 * continue or not.
  		 */
  		break;
  	}
f2849aa09   Vlastimil Babka   mm, compaction: m...
1775
1776
  	/* Record where migration scanner will be restarted. */
  	cc->migrate_pfn = low_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1777

edc2ca612   Vlastimil Babka   mm, compaction: m...
1778
  	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1779
  }
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1780
1781
1782
1783
1784
1785
1786
1787
  /*
   * order == -1 is expected when compacting via
   * /proc/sys/vm/compact_memory
   */
  static inline bool is_via_compact_memory(int order)
  {
  	return order == -1;
  }
facdaa917   Nitin Gupta   mm: proactive com...
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
  static bool kswapd_is_running(pg_data_t *pgdat)
  {
  	return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING);
  }
  
  /*
   * A zone's fragmentation score is the external fragmentation wrt to the
   * COMPACTION_HPAGE_ORDER scaled by the zone's size. It returns a value
   * in the range [0, 100].
   *
   * The scaling factor ensures that proactive compaction focuses on larger
   * zones like ZONE_NORMAL, rather than smaller, specialized zones like
   * ZONE_DMA32. For smaller zones, the score value remains close to zero,
   * and thus never exceeds the high threshold for proactive compaction.
   */
d34c0a759   Nitin Gupta   mm: use unsigned ...
1803
  static unsigned int fragmentation_score_zone(struct zone *zone)
facdaa917   Nitin Gupta   mm: proactive com...
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
  {
  	unsigned long score;
  
  	score = zone->present_pages *
  			extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
  	return div64_ul(score, zone->zone_pgdat->node_present_pages + 1);
  }
  
  /*
   * The per-node proactive (background) compaction process is started by its
   * corresponding kcompactd thread when the node's fragmentation score
   * exceeds the high threshold. The compaction process remains active till
   * the node's score falls below the low threshold, or one of the back-off
   * conditions is met.
   */
d34c0a759   Nitin Gupta   mm: use unsigned ...
1819
  static unsigned int fragmentation_score_node(pg_data_t *pgdat)
facdaa917   Nitin Gupta   mm: proactive com...
1820
  {
d34c0a759   Nitin Gupta   mm: use unsigned ...
1821
  	unsigned int score = 0;
facdaa917   Nitin Gupta   mm: proactive com...
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone;
  
  		zone = &pgdat->node_zones[zoneid];
  		score += fragmentation_score_zone(zone);
  	}
  
  	return score;
  }
d34c0a759   Nitin Gupta   mm: use unsigned ...
1833
  static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low)
facdaa917   Nitin Gupta   mm: proactive com...
1834
  {
d34c0a759   Nitin Gupta   mm: use unsigned ...
1835
  	unsigned int wmark_low;
facdaa917   Nitin Gupta   mm: proactive com...
1836
1837
1838
1839
1840
1841
  
  	/*
  	 * Cap the low watermak to avoid excessive compaction
  	 * activity in case a user sets the proactivess tunable
  	 * close to 100 (maximum).
  	 */
d34c0a759   Nitin Gupta   mm: use unsigned ...
1842
1843
  	wmark_low = max(100U - sysctl_compaction_proactiveness, 5U);
  	return low ? wmark_low : min(wmark_low + 10, 100U);
facdaa917   Nitin Gupta   mm: proactive com...
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
  }
  
  static bool should_proactive_compact_node(pg_data_t *pgdat)
  {
  	int wmark_high;
  
  	if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat))
  		return false;
  
  	wmark_high = fragmentation_score_wmark(pgdat, false);
  	return fragmentation_score_node(pgdat) > wmark_high;
  }
40cacbcb3   Mel Gorman   mm, compaction: r...
1856
  static enum compact_result __compact_finished(struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
1857
  {
8fb74b9fb   Mel Gorman   mm: compaction: p...
1858
  	unsigned int order;
d39773a06   Vlastimil Babka   mm, compaction: a...
1859
  	const int migratetype = cc->migratetype;
cb2dcaf02   Mel Gorman   mm, compaction: f...
1860
  	int ret;
748446bb6   Mel Gorman   mm: compaction: m...
1861

753341a4b   Mel Gorman   revert "mm: have ...
1862
  	/* Compaction run completes if the migrate and free scanner meet */
f2849aa09   Vlastimil Babka   mm, compaction: m...
1863
  	if (compact_scanners_met(cc)) {
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1864
  		/* Let the next compaction start anew. */
40cacbcb3   Mel Gorman   mm, compaction: r...
1865
  		reset_cached_positions(cc->zone);
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1866

62997027c   Mel Gorman   mm: compaction: c...
1867
1868
  		/*
  		 * Mark that the PG_migrate_skip information should be cleared
accf62422   Vlastimil Babka   mm, kswapd: repla...
1869
  		 * by kswapd when it goes to sleep. kcompactd does not set the
62997027c   Mel Gorman   mm: compaction: c...
1870
1871
1872
  		 * flag itself as the decision to be clear should be directly
  		 * based on an allocation request.
  		 */
accf62422   Vlastimil Babka   mm, kswapd: repla...
1873
  		if (cc->direct_compaction)
40cacbcb3   Mel Gorman   mm, compaction: r...
1874
  			cc->zone->compact_blockskip_flush = true;
62997027c   Mel Gorman   mm: compaction: c...
1875

c8f7de0bf   Michal Hocko   mm, compaction: d...
1876
1877
1878
1879
  		if (cc->whole_zone)
  			return COMPACT_COMPLETE;
  		else
  			return COMPACT_PARTIAL_SKIPPED;
bb13ffeb9   Mel Gorman   mm: compaction: c...
1880
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1881

facdaa917   Nitin Gupta   mm: proactive com...
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
  	if (cc->proactive_compaction) {
  		int score, wmark_low;
  		pg_data_t *pgdat;
  
  		pgdat = cc->zone->zone_pgdat;
  		if (kswapd_is_running(pgdat))
  			return COMPACT_PARTIAL_SKIPPED;
  
  		score = fragmentation_score_zone(cc->zone);
  		wmark_low = fragmentation_score_wmark(pgdat, true);
  
  		if (score > wmark_low)
  			ret = COMPACT_CONTINUE;
  		else
  			ret = COMPACT_SUCCESS;
  
  		goto out;
  	}
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1900
  	if (is_via_compact_memory(cc->order))
56de7263f   Mel Gorman   mm: compaction: d...
1901
  		return COMPACT_CONTINUE;
efe771c76   Mel Gorman   mm, compaction: a...
1902
1903
1904
1905
1906
1907
1908
1909
  	/*
  	 * Always finish scanning a pageblock to reduce the possibility of
  	 * fallbacks in the future. This is particularly important when
  	 * migration source is unmovable/reclaimable but it's not worth
  	 * special casing.
  	 */
  	if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
  		return COMPACT_CONTINUE;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1910

56de7263f   Mel Gorman   mm: compaction: d...
1911
  	/* Direct compactor: Is a suitable page free? */
cb2dcaf02   Mel Gorman   mm, compaction: f...
1912
  	ret = COMPACT_NO_SUITABLE_PAGE;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1913
  	for (order = cc->order; order < MAX_ORDER; order++) {
40cacbcb3   Mel Gorman   mm, compaction: r...
1914
  		struct free_area *area = &cc->zone->free_area[order];
2149cdaef   Joonsoo Kim   mm/compaction: en...
1915
  		bool can_steal;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1916
1917
  
  		/* Job done if page is free of the right migratetype */
b03641af6   Dan Williams   mm: move buddy li...
1918
  		if (!free_area_empty(area, migratetype))
cf378319d   Vlastimil Babka   mm, compaction: r...
1919
  			return COMPACT_SUCCESS;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1920

2149cdaef   Joonsoo Kim   mm/compaction: en...
1921
1922
1923
  #ifdef CONFIG_CMA
  		/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
  		if (migratetype == MIGRATE_MOVABLE &&
b03641af6   Dan Williams   mm: move buddy li...
1924
  			!free_area_empty(area, MIGRATE_CMA))
cf378319d   Vlastimil Babka   mm, compaction: r...
1925
  			return COMPACT_SUCCESS;
2149cdaef   Joonsoo Kim   mm/compaction: en...
1926
1927
1928
1929
1930
1931
  #endif
  		/*
  		 * Job done if allocation would steal freepages from
  		 * other migratetype buddy lists.
  		 */
  		if (find_suitable_fallback(area, order, migratetype,
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
  						true, &can_steal) != -1) {
  
  			/* movable pages are OK in any pageblock */
  			if (migratetype == MIGRATE_MOVABLE)
  				return COMPACT_SUCCESS;
  
  			/*
  			 * We are stealing for a non-movable allocation. Make
  			 * sure we finish compacting the current pageblock
  			 * first so it is as free as possible and we won't
  			 * have to steal another one soon. This only applies
  			 * to sync compaction, as async compaction operates
  			 * on pageblocks of the same migratetype.
  			 */
  			if (cc->mode == MIGRATE_ASYNC ||
  					IS_ALIGNED(cc->migrate_pfn,
  							pageblock_nr_pages)) {
  				return COMPACT_SUCCESS;
  			}
cb2dcaf02   Mel Gorman   mm, compaction: f...
1951
1952
  			ret = COMPACT_CONTINUE;
  			break;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1953
  		}
56de7263f   Mel Gorman   mm: compaction: d...
1954
  	}
facdaa917   Nitin Gupta   mm: proactive com...
1955
  out:
cb2dcaf02   Mel Gorman   mm, compaction: f...
1956
1957
1958
1959
  	if (cc->contended || fatal_signal_pending(current))
  		ret = COMPACT_CONTENDED;
  
  	return ret;
837d026d5   Joonsoo Kim   mm/compaction: mo...
1960
  }
40cacbcb3   Mel Gorman   mm, compaction: r...
1961
  static enum compact_result compact_finished(struct compact_control *cc)
837d026d5   Joonsoo Kim   mm/compaction: mo...
1962
1963
  {
  	int ret;
40cacbcb3   Mel Gorman   mm, compaction: r...
1964
1965
  	ret = __compact_finished(cc);
  	trace_mm_compaction_finished(cc->zone, cc->order, ret);
837d026d5   Joonsoo Kim   mm/compaction: mo...
1966
1967
1968
1969
  	if (ret == COMPACT_NO_SUITABLE_PAGE)
  		ret = COMPACT_CONTINUE;
  
  	return ret;
748446bb6   Mel Gorman   mm: compaction: m...
1970
  }
3e7d34497   Mel Gorman   mm: vmscan: recla...
1971
1972
1973
1974
  /*
   * compaction_suitable: Is this suitable to run compaction on this zone now?
   * Returns
   *   COMPACT_SKIPPED  - If there are too few free pages for compaction
cf378319d   Vlastimil Babka   mm, compaction: r...
1975
   *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
3e7d34497   Mel Gorman   mm: vmscan: recla...
1976
1977
   *   COMPACT_CONTINUE - If compaction should run now
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
1978
  static enum compact_result __compaction_suitable(struct zone *zone, int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
1979
  					unsigned int alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
1980
  					int highest_zoneidx,
86a294a81   Michal Hocko   mm, oom, compacti...
1981
  					unsigned long wmark_target)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1982
  {
3e7d34497   Mel Gorman   mm: vmscan: recla...
1983
  	unsigned long watermark;
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1984
  	if (is_via_compact_memory(order))
3957c7768   Michal Hocko   mm: compaction: f...
1985
  		return COMPACT_CONTINUE;
a92144438   Mel Gorman   mm: move zone wat...
1986
  	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
ebff39801   Vlastimil Babka   mm, compaction: p...
1987
1988
1989
1990
  	/*
  	 * If watermarks for high-order allocation are already met, there
  	 * should be no need for compaction at all.
  	 */
97a225e69   Joonsoo Kim   mm/page_alloc: in...
1991
  	if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
ebff39801   Vlastimil Babka   mm, compaction: p...
1992
  								alloc_flags))
cf378319d   Vlastimil Babka   mm, compaction: r...
1993
  		return COMPACT_SUCCESS;
ebff39801   Vlastimil Babka   mm, compaction: p...
1994

3957c7768   Michal Hocko   mm: compaction: f...
1995
  	/*
9861a62c3   Vlastimil Babka   mm, compaction: c...
1996
  	 * Watermarks for order-0 must be met for compaction to be able to
984fdba6a   Vlastimil Babka   mm, compaction: u...
1997
1998
1999
2000
  	 * isolate free pages for migration targets. This means that the
  	 * watermark and alloc_flags have to match, or be more pessimistic than
  	 * the check in __isolate_free_page(). We don't use the direct
  	 * compactor's alloc_flags, as they are not relevant for freepage
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2001
2002
2003
  	 * isolation. We however do use the direct compactor's highest_zoneidx
  	 * to skip over zones where lowmem reserves would prevent allocation
  	 * even if compaction succeeds.
8348faf91   Vlastimil Babka   mm, compaction: r...
2004
2005
  	 * For costly orders, we require low watermark instead of min for
  	 * compaction to proceed to increase its chances.
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
2006
2007
  	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
  	 * suitable migration targets
3e7d34497   Mel Gorman   mm: vmscan: recla...
2008
  	 */
8348faf91   Vlastimil Babka   mm, compaction: r...
2009
2010
2011
  	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
  				low_wmark_pages(zone) : min_wmark_pages(zone);
  	watermark += compact_gap(order);
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2012
  	if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
2013
  						ALLOC_CMA, wmark_target))
3e7d34497   Mel Gorman   mm: vmscan: recla...
2014
  		return COMPACT_SKIPPED;
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2015
2016
2017
2018
2019
  	return COMPACT_CONTINUE;
  }
  
  enum compact_result compaction_suitable(struct zone *zone, int order,
  					unsigned int alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2020
  					int highest_zoneidx)
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2021
2022
2023
  {
  	enum compact_result ret;
  	int fragindex;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2024
  	ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2025
  				    zone_page_state(zone, NR_FREE_PAGES));
3e7d34497   Mel Gorman   mm: vmscan: recla...
2026
2027
2028
2029
  	/*
  	 * fragmentation index determines if allocation failures are due to
  	 * low memory or external fragmentation
  	 *
ebff39801   Vlastimil Babka   mm, compaction: p...
2030
2031
  	 * index of -1000 would imply allocations might succeed depending on
  	 * watermarks, but we already failed the high-order watermark check
3e7d34497   Mel Gorman   mm: vmscan: recla...
2032
2033
2034
  	 * index towards 0 implies failure is due to lack of memory
  	 * index towards 1000 implies failure is due to fragmentation
  	 *
203114202   Vlastimil Babka   mm, compaction: r...
2035
2036
2037
2038
2039
2040
  	 * Only compact if a failure would be due to fragmentation. Also
  	 * ignore fragindex for non-costly orders where the alternative to
  	 * a successful reclaim/compaction is OOM. Fragindex and the
  	 * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
  	 * excessive compaction for costly orders, but it should not be at the
  	 * expense of system stability.
3e7d34497   Mel Gorman   mm: vmscan: recla...
2041
  	 */
203114202   Vlastimil Babka   mm, compaction: r...
2042
  	if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2043
2044
2045
2046
  		fragindex = fragmentation_index(zone, order);
  		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
  			ret = COMPACT_NOT_SUITABLE_ZONE;
  	}
837d026d5   Joonsoo Kim   mm/compaction: mo...
2047

837d026d5   Joonsoo Kim   mm/compaction: mo...
2048
2049
2050
2051
2052
2053
  	trace_mm_compaction_suitable(zone, order, ret);
  	if (ret == COMPACT_NOT_SUITABLE_ZONE)
  		ret = COMPACT_SKIPPED;
  
  	return ret;
  }
86a294a81   Michal Hocko   mm, oom, compacti...
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
  bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
  		int alloc_flags)
  {
  	struct zone *zone;
  	struct zoneref *z;
  
  	/*
  	 * Make sure at least one zone would pass __compaction_suitable if we continue
  	 * retrying the reclaim.
  	 */
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2064
2065
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
  				ac->highest_zoneidx, ac->nodemask) {
86a294a81   Michal Hocko   mm, oom, compacti...
2066
2067
2068
2069
2070
2071
2072
2073
2074
  		unsigned long available;
  		enum compact_result compact_result;
  
  		/*
  		 * Do not consider all the reclaimable memory because we do not
  		 * want to trash just for a single high order allocation which
  		 * is even not guaranteed to appear even if __compaction_suitable
  		 * is happy about the watermark check.
  		 */
5a1c84b40   Mel Gorman   mm: remove reclai...
2075
  		available = zone_reclaimable_pages(zone) / order;
86a294a81   Michal Hocko   mm, oom, compacti...
2076
2077
  		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
  		compact_result = __compaction_suitable(zone, order, alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2078
  				ac->highest_zoneidx, available);
cc5c9f098   Vlastimil Babka   mm, compaction: i...
2079
  		if (compact_result != COMPACT_SKIPPED)
86a294a81   Michal Hocko   mm, oom, compacti...
2080
2081
2082
2083
2084
  			return true;
  	}
  
  	return false;
  }
5e1f0f098   Mel Gorman   mm, compaction: c...
2085
2086
  static enum compact_result
  compact_zone(struct compact_control *cc, struct capture_control *capc)
748446bb6   Mel Gorman   mm: compaction: m...
2087
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
2088
  	enum compact_result ret;
40cacbcb3   Mel Gorman   mm, compaction: r...
2089
2090
  	unsigned long start_pfn = cc->zone->zone_start_pfn;
  	unsigned long end_pfn = zone_end_pfn(cc->zone);
566e54e11   Mel Gorman   mm, compaction: r...
2091
  	unsigned long last_migrated_pfn;
e0b9daeb4   David Rientjes   mm, compaction: e...
2092
  	const bool sync = cc->mode != MIGRATE_ASYNC;
8854c55f5   Mel Gorman   mm, compaction: k...
2093
  	bool update_cached;
748446bb6   Mel Gorman   mm: compaction: m...
2094

a94b52524   Yafang Shao   mm/compaction.c: ...
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
  	/*
  	 * These counters track activities during zone compaction.  Initialize
  	 * them before compacting a new zone.
  	 */
  	cc->total_migrate_scanned = 0;
  	cc->total_free_scanned = 0;
  	cc->nr_migratepages = 0;
  	cc->nr_freepages = 0;
  	INIT_LIST_HEAD(&cc->freepages);
  	INIT_LIST_HEAD(&cc->migratepages);
01c0bfe06   Wei Yang   mm: rename gfpfla...
2105
  	cc->migratetype = gfp_migratetype(cc->gfp_mask);
40cacbcb3   Mel Gorman   mm, compaction: r...
2106
  	ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2107
  							cc->highest_zoneidx);
c46649dea   Michal Hocko   mm, compaction: c...
2108
  	/* Compaction is likely to fail */
cf378319d   Vlastimil Babka   mm, compaction: r...
2109
  	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
3e7d34497   Mel Gorman   mm: vmscan: recla...
2110
  		return ret;
c46649dea   Michal Hocko   mm, compaction: c...
2111
2112
2113
  
  	/* huh, compaction_suitable is returning something unexpected */
  	VM_BUG_ON(ret != COMPACT_CONTINUE);
3e7d34497   Mel Gorman   mm: vmscan: recla...
2114

c89511ab2   Mel Gorman   mm: compaction: R...
2115
  	/*
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2116
  	 * Clear pageblock skip if there were failures recently and compaction
accf62422   Vlastimil Babka   mm, kswapd: repla...
2117
  	 * is about to be retried after being deferred.
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2118
  	 */
40cacbcb3   Mel Gorman   mm, compaction: r...
2119
2120
  	if (compaction_restarting(cc->zone, cc->order))
  		__reset_isolation_suitable(cc->zone);
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2121
2122
  
  	/*
c89511ab2   Mel Gorman   mm: compaction: R...
2123
  	 * Setup to move all movable pages to the end of the zone. Used cached
06ed29989   Vlastimil Babka   mm, compaction: m...
2124
2125
2126
  	 * information on where the scanners should start (unless we explicitly
  	 * want to compact the whole zone), but check that it is initialised
  	 * by ensuring the values are within zone boundaries.
c89511ab2   Mel Gorman   mm: compaction: R...
2127
  	 */
70b44595e   Mel Gorman   mm, compaction: u...
2128
  	cc->fast_start_pfn = 0;
06ed29989   Vlastimil Babka   mm, compaction: m...
2129
  	if (cc->whole_zone) {
c89511ab2   Mel Gorman   mm: compaction: R...
2130
  		cc->migrate_pfn = start_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2131
2132
  		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
  	} else {
40cacbcb3   Mel Gorman   mm, compaction: r...
2133
2134
  		cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync];
  		cc->free_pfn = cc->zone->compact_cached_free_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2135
2136
  		if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
  			cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
40cacbcb3   Mel Gorman   mm, compaction: r...
2137
  			cc->zone->compact_cached_free_pfn = cc->free_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2138
2139
2140
  		}
  		if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
  			cc->migrate_pfn = start_pfn;
40cacbcb3   Mel Gorman   mm, compaction: r...
2141
2142
  			cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
  			cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2143
  		}
c8f7de0bf   Michal Hocko   mm, compaction: d...
2144

e332f741a   Mel Gorman   mm, compaction: b...
2145
  		if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
06ed29989   Vlastimil Babka   mm, compaction: m...
2146
2147
  			cc->whole_zone = true;
  	}
c8f7de0bf   Michal Hocko   mm, compaction: d...
2148

566e54e11   Mel Gorman   mm, compaction: r...
2149
  	last_migrated_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
2150

8854c55f5   Mel Gorman   mm, compaction: k...
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
  	/*
  	 * Migrate has separate cached PFNs for ASYNC and SYNC* migration on
  	 * the basis that some migrations will fail in ASYNC mode. However,
  	 * if the cached PFNs match and pageblocks are skipped due to having
  	 * no isolation candidates, then the sync state does not matter.
  	 * Until a pageblock with isolation candidates is found, keep the
  	 * cached PFNs in sync to avoid revisiting the same blocks.
  	 */
  	update_cached = !sync &&
  		cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
16c4a097a   Joonsoo Kim   mm/compaction: en...
2161
2162
  	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync);
0eb927c0a   Mel Gorman   mm: compaction: t...
2163

748446bb6   Mel Gorman   mm: compaction: m...
2164
  	migrate_prep_local();
40cacbcb3   Mel Gorman   mm, compaction: r...
2165
  	while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
9d502c1c8   Minchan Kim   mm/compaction: ch...
2166
  		int err;
566e54e11   Mel Gorman   mm, compaction: r...
2167
  		unsigned long start_pfn = cc->migrate_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
2168

804d3121b   Mel Gorman   mm, compaction: a...
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
  		/*
  		 * Avoid multiple rescans which can happen if a page cannot be
  		 * isolated (dirty/writeback in async mode) or if the migrated
  		 * pages are being allocated before the pageblock is cleared.
  		 * The first rescan will capture the entire pageblock for
  		 * migration. If it fails, it'll be marked skip and scanning
  		 * will proceed as normal.
  		 */
  		cc->rescan = false;
  		if (pageblock_start_pfn(last_migrated_pfn) ==
  		    pageblock_start_pfn(start_pfn)) {
  			cc->rescan = true;
  		}
32aaf0553   Pengfei Li   mm/compaction.c: ...
2182
  		switch (isolate_migratepages(cc)) {
f9e35b3b4   Mel Gorman   mm: compaction: a...
2183
  		case ISOLATE_ABORT:
2d1e10412   Vlastimil Babka   mm, compaction: d...
2184
  			ret = COMPACT_CONTENDED;
5733c7d11   Rafael Aquini   mm: introduce put...
2185
  			putback_movable_pages(&cc->migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
2186
  			cc->nr_migratepages = 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2187
2188
  			goto out;
  		case ISOLATE_NONE:
8854c55f5   Mel Gorman   mm, compaction: k...
2189
2190
2191
2192
  			if (update_cached) {
  				cc->zone->compact_cached_migrate_pfn[1] =
  					cc->zone->compact_cached_migrate_pfn[0];
  			}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2193
2194
2195
2196
2197
2198
  			/*
  			 * We haven't isolated and migrated anything, but
  			 * there might still be unflushed migrations from
  			 * previous cc->order aligned block.
  			 */
  			goto check_drain;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2199
  		case ISOLATE_SUCCESS:
8854c55f5   Mel Gorman   mm, compaction: k...
2200
  			update_cached = false;
566e54e11   Mel Gorman   mm, compaction: r...
2201
  			last_migrated_pfn = start_pfn;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2202
2203
  			;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
2204

d53aea3d4   David Rientjes   mm, compaction: r...
2205
  		err = migrate_pages(&cc->migratepages, compaction_alloc,
e0b9daeb4   David Rientjes   mm, compaction: e...
2206
  				compaction_free, (unsigned long)cc, cc->mode,
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
2207
  				MR_COMPACTION);
748446bb6   Mel Gorman   mm: compaction: m...
2208

f8c9301fa   Vlastimil Babka   mm/compaction: do...
2209
2210
  		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
  							&cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
2211

f8c9301fa   Vlastimil Babka   mm/compaction: do...
2212
2213
  		/* All pages were either migrated or will be released */
  		cc->nr_migratepages = 0;
9d502c1c8   Minchan Kim   mm/compaction: ch...
2214
  		if (err) {
5733c7d11   Rafael Aquini   mm: introduce put...
2215
  			putback_movable_pages(&cc->migratepages);
7ed695e06   Vlastimil Babka   mm: compaction: d...
2216
2217
2218
2219
  			/*
  			 * migrate_pages() may return -ENOMEM when scanners meet
  			 * and we want compact_finished() to detect it
  			 */
f2849aa09   Vlastimil Babka   mm, compaction: m...
2220
  			if (err == -ENOMEM && !compact_scanners_met(cc)) {
2d1e10412   Vlastimil Babka   mm, compaction: d...
2221
  				ret = COMPACT_CONTENDED;
4bf2bba37   David Rientjes   mm, thp: abort co...
2222
2223
  				goto out;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
2224
2225
2226
2227
2228
2229
2230
2231
2232
  			/*
  			 * We failed to migrate at least one page in the current
  			 * order-aligned block, so skip the rest of it.
  			 */
  			if (cc->direct_compaction &&
  						(cc->mode == MIGRATE_ASYNC)) {
  				cc->migrate_pfn = block_end_pfn(
  						cc->migrate_pfn - 1, cc->order);
  				/* Draining pcplists is useless in this case */
566e54e11   Mel Gorman   mm, compaction: r...
2233
  				last_migrated_pfn = 0;
fdd048e12   Vlastimil Babka   mm, compaction: s...
2234
  			}
748446bb6   Mel Gorman   mm: compaction: m...
2235
  		}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2236

fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2237
2238
2239
2240
2241
2242
2243
2244
  check_drain:
  		/*
  		 * Has the migration scanner moved away from the previous
  		 * cc->order aligned block where we migrated from? If yes,
  		 * flush the pages that were freed, so that they can merge and
  		 * compact_finished() can detect immediately if allocation
  		 * would succeed.
  		 */
566e54e11   Mel Gorman   mm, compaction: r...
2245
  		if (cc->order > 0 && last_migrated_pfn) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2246
  			unsigned long current_block_start =
06b6640a3   Vlastimil Babka   mm, compaction: w...
2247
  				block_start_pfn(cc->migrate_pfn, cc->order);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2248

566e54e11   Mel Gorman   mm, compaction: r...
2249
  			if (last_migrated_pfn < current_block_start) {
b01b21419   Ingo Molnar   mm/swap: Use loca...
2250
  				lru_add_drain_cpu_zone(cc->zone);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2251
  				/* No more flushing until we migrate again */
566e54e11   Mel Gorman   mm, compaction: r...
2252
  				last_migrated_pfn = 0;
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2253
2254
  			}
  		}
5e1f0f098   Mel Gorman   mm, compaction: c...
2255
2256
2257
2258
2259
  		/* Stop if a page has been captured */
  		if (capc && capc->page) {
  			ret = COMPACT_SUCCESS;
  			break;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
2260
  	}
f9e35b3b4   Mel Gorman   mm: compaction: a...
2261
  out:
6bace090a   Vlastimil Babka   mm, compaction: a...
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
  	/*
  	 * Release free pages and update where the free scanner should restart,
  	 * so we don't leave any returned pages behind in the next attempt.
  	 */
  	if (cc->nr_freepages > 0) {
  		unsigned long free_pfn = release_freepages(&cc->freepages);
  
  		cc->nr_freepages = 0;
  		VM_BUG_ON(free_pfn == 0);
  		/* The cached pfn is always the first in a pageblock */
06b6640a3   Vlastimil Babka   mm, compaction: w...
2272
  		free_pfn = pageblock_start_pfn(free_pfn);
6bace090a   Vlastimil Babka   mm, compaction: a...
2273
2274
2275
2276
  		/*
  		 * Only go back, not forward. The cached pfn might have been
  		 * already reset to zone end in compact_finished()
  		 */
40cacbcb3   Mel Gorman   mm, compaction: r...
2277
2278
  		if (free_pfn > cc->zone->compact_cached_free_pfn)
  			cc->zone->compact_cached_free_pfn = free_pfn;
6bace090a   Vlastimil Babka   mm, compaction: a...
2279
  	}
748446bb6   Mel Gorman   mm: compaction: m...
2280

7f354a548   David Rientjes   mm, compaction: a...
2281
2282
  	count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
  	count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
16c4a097a   Joonsoo Kim   mm/compaction: en...
2283
2284
  	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync, ret);
0eb927c0a   Mel Gorman   mm: compaction: t...
2285

748446bb6   Mel Gorman   mm: compaction: m...
2286
2287
  	return ret;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
2288

ea7ab982b   Michal Hocko   mm, compaction: c...
2289
  static enum compact_result compact_zone_order(struct zone *zone, int order,
c3486f537   Vlastimil Babka   mm, compaction: s...
2290
  		gfp_t gfp_mask, enum compact_priority prio,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2291
  		unsigned int alloc_flags, int highest_zoneidx,
5e1f0f098   Mel Gorman   mm, compaction: c...
2292
  		struct page **capture)
56de7263f   Mel Gorman   mm: compaction: d...
2293
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
2294
  	enum compact_result ret;
56de7263f   Mel Gorman   mm: compaction: d...
2295
  	struct compact_control cc = {
56de7263f   Mel Gorman   mm: compaction: d...
2296
  		.order = order,
dbe2d4e4f   Mel Gorman   mm, compaction: r...
2297
  		.search_order = order,
6d7ce5594   David Rientjes   mm, compaction: p...
2298
  		.gfp_mask = gfp_mask,
56de7263f   Mel Gorman   mm: compaction: d...
2299
  		.zone = zone,
a5508cd83   Vlastimil Babka   mm, compaction: i...
2300
2301
  		.mode = (prio == COMPACT_PRIO_ASYNC) ?
  					MIGRATE_ASYNC :	MIGRATE_SYNC_LIGHT,
ebff39801   Vlastimil Babka   mm, compaction: p...
2302
  		.alloc_flags = alloc_flags,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2303
  		.highest_zoneidx = highest_zoneidx,
accf62422   Vlastimil Babka   mm, kswapd: repla...
2304
  		.direct_compaction = true,
a8e025e55   Vlastimil Babka   mm, compaction: a...
2305
  		.whole_zone = (prio == MIN_COMPACT_PRIORITY),
9f7e33879   Vlastimil Babka   mm, compaction: m...
2306
2307
  		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
  		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
56de7263f   Mel Gorman   mm: compaction: d...
2308
  	};
5e1f0f098   Mel Gorman   mm, compaction: c...
2309
2310
2311
2312
  	struct capture_control capc = {
  		.cc = &cc,
  		.page = NULL,
  	};
b9e20f0da   Vlastimil Babka   mm, compaction: m...
2313
2314
2315
2316
2317
2318
2319
  	/*
  	 * Make sure the structs are really initialized before we expose the
  	 * capture control, in case we are interrupted and the interrupt handler
  	 * frees a page.
  	 */
  	barrier();
  	WRITE_ONCE(current->capture_control, &capc);
56de7263f   Mel Gorman   mm: compaction: d...
2320

5e1f0f098   Mel Gorman   mm, compaction: c...
2321
  	ret = compact_zone(&cc, &capc);
e64c5237c   Shaohua Li   mm: compaction: a...
2322
2323
2324
  
  	VM_BUG_ON(!list_empty(&cc.freepages));
  	VM_BUG_ON(!list_empty(&cc.migratepages));
b9e20f0da   Vlastimil Babka   mm, compaction: m...
2325
2326
2327
2328
2329
2330
2331
  	/*
  	 * Make sure we hide capture control first before we read the captured
  	 * page pointer, otherwise an interrupt could free and capture a page
  	 * and we would leak it.
  	 */
  	WRITE_ONCE(current->capture_control, NULL);
  	*capture = READ_ONCE(capc.page);
5e1f0f098   Mel Gorman   mm, compaction: c...
2332

e64c5237c   Shaohua Li   mm: compaction: a...
2333
  	return ret;
56de7263f   Mel Gorman   mm: compaction: d...
2334
  }
5e7719058   Mel Gorman   mm: compaction: a...
2335
  int sysctl_extfrag_threshold = 500;
56de7263f   Mel Gorman   mm: compaction: d...
2336
2337
  /**
   * try_to_compact_pages - Direct compact to satisfy a high-order allocation
56de7263f   Mel Gorman   mm: compaction: d...
2338
   * @gfp_mask: The GFP mask of the current allocation
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
2339
2340
2341
   * @order: The order of the current allocation
   * @alloc_flags: The allocation flags of the current allocation
   * @ac: The context of current allocation
112d2d29f   Yang Shi   mm/compaction.c: ...
2342
   * @prio: Determines how hard direct compaction should try to succeed
6467552ca   Vlastimil Babka   mm, compaction: f...
2343
   * @capture: Pointer to free page created by compaction will be stored here
56de7263f   Mel Gorman   mm: compaction: d...
2344
2345
2346
   *
   * This is the main entry point for direct page compaction.
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
2347
  enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
2348
  		unsigned int alloc_flags, const struct alloc_context *ac,
5e1f0f098   Mel Gorman   mm, compaction: c...
2349
  		enum compact_priority prio, struct page **capture)
56de7263f   Mel Gorman   mm: compaction: d...
2350
  {
56de7263f   Mel Gorman   mm: compaction: d...
2351
  	int may_perform_io = gfp_mask & __GFP_IO;
56de7263f   Mel Gorman   mm: compaction: d...
2352
2353
  	struct zoneref *z;
  	struct zone *zone;
1d4746d39   Michal Hocko   mm, compaction: d...
2354
  	enum compact_result rc = COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
2355

73e64c51a   Michal Hocko   mm, compaction: a...
2356
2357
2358
2359
2360
  	/*
  	 * Check if the GFP flags allow compaction - GFP_NOIO is really
  	 * tricky context because the migration might require IO
  	 */
  	if (!may_perform_io)
53853e2d2   Vlastimil Babka   mm, compaction: d...
2361
  		return COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
2362

a5508cd83   Vlastimil Babka   mm, compaction: i...
2363
  	trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
837d026d5   Joonsoo Kim   mm/compaction: mo...
2364

56de7263f   Mel Gorman   mm: compaction: d...
2365
  	/* Compact each zone in the list */
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2366
2367
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
  					ac->highest_zoneidx, ac->nodemask) {
ea7ab982b   Michal Hocko   mm, compaction: c...
2368
  		enum compact_result status;
56de7263f   Mel Gorman   mm: compaction: d...
2369

a8e025e55   Vlastimil Babka   mm, compaction: a...
2370
2371
  		if (prio > MIN_COMPACT_PRIORITY
  					&& compaction_deferred(zone, order)) {
1d4746d39   Michal Hocko   mm, compaction: d...
2372
  			rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
53853e2d2   Vlastimil Babka   mm, compaction: d...
2373
  			continue;
1d4746d39   Michal Hocko   mm, compaction: d...
2374
  		}
53853e2d2   Vlastimil Babka   mm, compaction: d...
2375

a5508cd83   Vlastimil Babka   mm, compaction: i...
2376
  		status = compact_zone_order(zone, order, gfp_mask, prio,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2377
  				alloc_flags, ac->highest_zoneidx, capture);
56de7263f   Mel Gorman   mm: compaction: d...
2378
  		rc = max(status, rc);
7ceb009a2   Vlastimil Babka   mm, compaction: d...
2379
2380
  		/* The allocation should succeed, stop compacting */
  		if (status == COMPACT_SUCCESS) {
53853e2d2   Vlastimil Babka   mm, compaction: d...
2381
2382
2383
2384
2385
2386
2387
  			/*
  			 * We think the allocation will succeed in this zone,
  			 * but it is not certain, hence the false. The caller
  			 * will repeat this with true if allocation indeed
  			 * succeeds in this zone.
  			 */
  			compaction_defer_reset(zone, order, false);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2388

c3486f537   Vlastimil Babka   mm, compaction: s...
2389
  			break;
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2390
  		}
a5508cd83   Vlastimil Babka   mm, compaction: i...
2391
  		if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
c3486f537   Vlastimil Babka   mm, compaction: s...
2392
  					status == COMPACT_PARTIAL_SKIPPED))
53853e2d2   Vlastimil Babka   mm, compaction: d...
2393
2394
2395
2396
2397
2398
  			/*
  			 * We think that allocation won't succeed in this zone
  			 * so we defer compaction there. If it ends up
  			 * succeeding after all, it will be reset.
  			 */
  			defer_compaction(zone, order);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2399
2400
2401
2402
  
  		/*
  		 * We might have stopped compacting due to need_resched() in
  		 * async compaction, or due to a fatal signal detected. In that
c3486f537   Vlastimil Babka   mm, compaction: s...
2403
  		 * case do not try further zones
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2404
  		 */
c3486f537   Vlastimil Babka   mm, compaction: s...
2405
2406
2407
  		if ((prio == COMPACT_PRIO_ASYNC && need_resched())
  					|| fatal_signal_pending(current))
  			break;
56de7263f   Mel Gorman   mm: compaction: d...
2408
2409
2410
2411
  	}
  
  	return rc;
  }
facdaa917   Nitin Gupta   mm: proactive com...
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
  /*
   * Compact all zones within a node till each zone's fragmentation score
   * reaches within proactive compaction thresholds (as determined by the
   * proactiveness tunable).
   *
   * It is possible that the function returns before reaching score targets
   * due to various back-off conditions, such as, contention on per-node or
   * per-zone locks.
   */
  static void proactive_compact_node(pg_data_t *pgdat)
  {
  	int zoneid;
  	struct zone *zone;
  	struct compact_control cc = {
  		.order = -1,
  		.mode = MIGRATE_SYNC_LIGHT,
  		.ignore_skip_hint = true,
  		.whole_zone = true,
  		.gfp_mask = GFP_KERNEL,
  		.proactive_compaction = true,
  	};
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		cc.zone = zone;
  
  		compact_zone(&cc, NULL);
  
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
  	}
  }
56de7263f   Mel Gorman   mm: compaction: d...
2447

76ab0f530   Mel Gorman   mm: compaction: a...
2448
  /* Compact all zones within a node */
791cae962   Vlastimil Babka   mm, compaction: c...
2449
  static void compact_node(int nid)
76ab0f530   Mel Gorman   mm: compaction: a...
2450
  {
791cae962   Vlastimil Babka   mm, compaction: c...
2451
  	pg_data_t *pgdat = NODE_DATA(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
2452
  	int zoneid;
76ab0f530   Mel Gorman   mm: compaction: a...
2453
  	struct zone *zone;
791cae962   Vlastimil Babka   mm, compaction: c...
2454
2455
2456
2457
2458
  	struct compact_control cc = {
  		.order = -1,
  		.mode = MIGRATE_SYNC,
  		.ignore_skip_hint = true,
  		.whole_zone = true,
73e64c51a   Michal Hocko   mm, compaction: a...
2459
  		.gfp_mask = GFP_KERNEL,
791cae962   Vlastimil Babka   mm, compaction: c...
2460
  	};
76ab0f530   Mel Gorman   mm: compaction: a...
2461

76ab0f530   Mel Gorman   mm: compaction: a...
2462
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
76ab0f530   Mel Gorman   mm: compaction: a...
2463
2464
2465
2466
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
791cae962   Vlastimil Babka   mm, compaction: c...
2467
  		cc.zone = zone;
76ab0f530   Mel Gorman   mm: compaction: a...
2468

5e1f0f098   Mel Gorman   mm, compaction: c...
2469
  		compact_zone(&cc, NULL);
754693457   Joonsoo Kim   mm/compaction.c: ...
2470

791cae962   Vlastimil Babka   mm, compaction: c...
2471
2472
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
76ab0f530   Mel Gorman   mm: compaction: a...
2473
  	}
76ab0f530   Mel Gorman   mm: compaction: a...
2474
2475
2476
  }
  
  /* Compact all nodes in the system */
7964c06d6   Jason Liu   mm: compaction: f...
2477
  static void compact_nodes(void)
76ab0f530   Mel Gorman   mm: compaction: a...
2478
2479
  {
  	int nid;
8575ec29f   Hugh Dickins   compact_pgdat: wo...
2480
2481
  	/* Flush pending updates to the LRU lists */
  	lru_add_drain_all();
76ab0f530   Mel Gorman   mm: compaction: a...
2482
2483
  	for_each_online_node(nid)
  		compact_node(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
2484
2485
2486
2487
  }
  
  /* The written value is actually unused, all memory is compacted */
  int sysctl_compact_memory;
fec4eb2c8   Yaowei Bai   mm/compaction: im...
2488
  /*
facdaa917   Nitin Gupta   mm: proactive com...
2489
2490
2491
2492
   * Tunable for proactive compaction. It determines how
   * aggressively the kernel should compact memory in the
   * background. It takes values in the range [0, 100].
   */
d34c0a759   Nitin Gupta   mm: use unsigned ...
2493
  unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
facdaa917   Nitin Gupta   mm: proactive com...
2494
2495
  
  /*
fec4eb2c8   Yaowei Bai   mm/compaction: im...
2496
2497
2498
   * This is the entry point for compacting all nodes via
   * /proc/sys/vm/compact_memory
   */
76ab0f530   Mel Gorman   mm: compaction: a...
2499
  int sysctl_compaction_handler(struct ctl_table *table, int write,
32927393d   Christoph Hellwig   sysctl: pass kern...
2500
  			void *buffer, size_t *length, loff_t *ppos)
76ab0f530   Mel Gorman   mm: compaction: a...
2501
2502
  {
  	if (write)
7964c06d6   Jason Liu   mm: compaction: f...
2503
  		compact_nodes();
76ab0f530   Mel Gorman   mm: compaction: a...
2504
2505
2506
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2507
2508
  
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
74e77fb9a   Rashika Kheria   mm/compaction.c: ...
2509
  static ssize_t sysfs_compact_node(struct device *dev,
10fbcf4c6   Kay Sievers   convert 'memory' ...
2510
  			struct device_attribute *attr,
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2511
2512
  			const char *buf, size_t count)
  {
8575ec29f   Hugh Dickins   compact_pgdat: wo...
2513
2514
2515
2516
2517
2518
2519
2520
  	int nid = dev->id;
  
  	if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
  		/* Flush pending updates to the LRU lists */
  		lru_add_drain_all();
  
  		compact_node(nid);
  	}
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2521
2522
2523
  
  	return count;
  }
0825a6f98   Joe Perches   mm: use octal not...
2524
  static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2525
2526
2527
  
  int compaction_register_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
2528
  	return device_create_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2529
2530
2531
2532
  }
  
  void compaction_unregister_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
2533
  	return device_remove_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2534
2535
  }
  #endif /* CONFIG_SYSFS && CONFIG_NUMA */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2536

698b1b306   Vlastimil Babka   mm, compaction: i...
2537
2538
  static inline bool kcompactd_work_requested(pg_data_t *pgdat)
  {
172400c69   Vlastimil Babka   mm: fix kcompactd...
2539
  	return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
698b1b306   Vlastimil Babka   mm, compaction: i...
2540
2541
2542
2543
2544
2545
  }
  
  static bool kcompactd_node_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  	struct zone *zone;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2546
  	enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx;
698b1b306   Vlastimil Babka   mm, compaction: i...
2547

97a225e69   Joonsoo Kim   mm/page_alloc: in...
2548
  	for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2549
2550
2551
2552
2553
2554
  		zone = &pgdat->node_zones[zoneid];
  
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2555
  					highest_zoneidx) == COMPACT_CONTINUE)
698b1b306   Vlastimil Babka   mm, compaction: i...
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
  			return true;
  	}
  
  	return false;
  }
  
  static void kcompactd_do_work(pg_data_t *pgdat)
  {
  	/*
  	 * With no special task, compact all zones so that a page of requested
  	 * order is allocatable.
  	 */
  	int zoneid;
  	struct zone *zone;
  	struct compact_control cc = {
  		.order = pgdat->kcompactd_max_order,
dbe2d4e4f   Mel Gorman   mm, compaction: r...
2572
  		.search_order = pgdat->kcompactd_max_order,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2573
  		.highest_zoneidx = pgdat->kcompactd_highest_zoneidx,
698b1b306   Vlastimil Babka   mm, compaction: i...
2574
  		.mode = MIGRATE_SYNC_LIGHT,
a0647dc92   David Rientjes   mm, compaction: k...
2575
  		.ignore_skip_hint = false,
73e64c51a   Michal Hocko   mm, compaction: a...
2576
  		.gfp_mask = GFP_KERNEL,
698b1b306   Vlastimil Babka   mm, compaction: i...
2577
  	};
698b1b306   Vlastimil Babka   mm, compaction: i...
2578
  	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2579
  							cc.highest_zoneidx);
7f354a548   David Rientjes   mm, compaction: a...
2580
  	count_compact_event(KCOMPACTD_WAKE);
698b1b306   Vlastimil Babka   mm, compaction: i...
2581

97a225e69   Joonsoo Kim   mm/page_alloc: in...
2582
  	for (zoneid = 0; zoneid <= cc.highest_zoneidx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
  		int status;
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_deferred(zone, cc.order))
  			continue;
  
  		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
  							COMPACT_CONTINUE)
  			continue;
172400c69   Vlastimil Babka   mm: fix kcompactd...
2595
2596
  		if (kthread_should_stop())
  			return;
a94b52524   Yafang Shao   mm/compaction.c: ...
2597
2598
  
  		cc.zone = zone;
5e1f0f098   Mel Gorman   mm, compaction: c...
2599
  		status = compact_zone(&cc, NULL);
698b1b306   Vlastimil Babka   mm, compaction: i...
2600

7ceb009a2   Vlastimil Babka   mm, compaction: d...
2601
  		if (status == COMPACT_SUCCESS) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2602
  			compaction_defer_reset(zone, cc.order, false);
c8f7de0bf   Michal Hocko   mm, compaction: d...
2603
  		} else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2604
  			/*
bc3106b26   David Rientjes   mm, compaction: d...
2605
2606
2607
2608
2609
2610
2611
2612
  			 * Buddy pages may become stranded on pcps that could
  			 * otherwise coalesce on the zone's free area for
  			 * order >= cc.order.  This is ratelimited by the
  			 * upcoming deferral.
  			 */
  			drain_all_pages(zone);
  
  			/*
698b1b306   Vlastimil Babka   mm, compaction: i...
2613
2614
2615
2616
2617
  			 * We use sync migration mode here, so we defer like
  			 * sync direct compaction does.
  			 */
  			defer_compaction(zone, cc.order);
  		}
7f354a548   David Rientjes   mm, compaction: a...
2618
2619
2620
2621
  		count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
  				     cc.total_migrate_scanned);
  		count_compact_events(KCOMPACTD_FREE_SCANNED,
  				     cc.total_free_scanned);
698b1b306   Vlastimil Babka   mm, compaction: i...
2622
2623
2624
2625
2626
2627
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
  	}
  
  	/*
  	 * Regardless of success, we are done until woken up next. But remember
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2628
2629
  	 * the requested order/highest_zoneidx in case it was higher/tighter
  	 * than our current ones
698b1b306   Vlastimil Babka   mm, compaction: i...
2630
2631
2632
  	 */
  	if (pgdat->kcompactd_max_order <= cc.order)
  		pgdat->kcompactd_max_order = 0;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2633
2634
  	if (pgdat->kcompactd_highest_zoneidx >= cc.highest_zoneidx)
  		pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
698b1b306   Vlastimil Babka   mm, compaction: i...
2635
  }
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2636
  void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
698b1b306   Vlastimil Babka   mm, compaction: i...
2637
2638
2639
2640
2641
2642
  {
  	if (!order)
  		return;
  
  	if (pgdat->kcompactd_max_order < order)
  		pgdat->kcompactd_max_order = order;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2643
2644
  	if (pgdat->kcompactd_highest_zoneidx > highest_zoneidx)
  		pgdat->kcompactd_highest_zoneidx = highest_zoneidx;
698b1b306   Vlastimil Babka   mm, compaction: i...
2645

6818600ff   Davidlohr Bueso   mm,compaction: se...
2646
2647
2648
2649
2650
  	/*
  	 * Pairs with implicit barrier in wait_event_freezable()
  	 * such that wakeups are not missed.
  	 */
  	if (!wq_has_sleeper(&pgdat->kcompactd_wait))
698b1b306   Vlastimil Babka   mm, compaction: i...
2651
2652
2653
2654
2655
2656
  		return;
  
  	if (!kcompactd_node_suitable(pgdat))
  		return;
  
  	trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2657
  							highest_zoneidx);
698b1b306   Vlastimil Babka   mm, compaction: i...
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
  	wake_up_interruptible(&pgdat->kcompactd_wait);
  }
  
  /*
   * The background compaction daemon, started as a kernel thread
   * from the init process.
   */
  static int kcompactd(void *p)
  {
  	pg_data_t *pgdat = (pg_data_t*)p;
  	struct task_struct *tsk = current;
facdaa917   Nitin Gupta   mm: proactive com...
2669
  	unsigned int proactive_defer = 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
2670
2671
2672
2673
2674
2675
2676
2677
2678
  
  	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
  	if (!cpumask_empty(cpumask))
  		set_cpus_allowed_ptr(tsk, cpumask);
  
  	set_freezable();
  
  	pgdat->kcompactd_max_order = 0;
97a225e69   Joonsoo Kim   mm/page_alloc: in...
2679
  	pgdat->kcompactd_highest_zoneidx = pgdat->nr_zones - 1;
698b1b306   Vlastimil Babka   mm, compaction: i...
2680
2681
  
  	while (!kthread_should_stop()) {
eb414681d   Johannes Weiner   psi: pressure sta...
2682
  		unsigned long pflags;
698b1b306   Vlastimil Babka   mm, compaction: i...
2683
  		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
facdaa917   Nitin Gupta   mm: proactive com...
2684
2685
2686
2687
2688
2689
2690
2691
2692
  		if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
  			kcompactd_work_requested(pgdat),
  			msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) {
  
  			psi_memstall_enter(&pflags);
  			kcompactd_do_work(pgdat);
  			psi_memstall_leave(&pflags);
  			continue;
  		}
698b1b306   Vlastimil Babka   mm, compaction: i...
2693

facdaa917   Nitin Gupta   mm: proactive com...
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
  		/* kcompactd wait timeout */
  		if (should_proactive_compact_node(pgdat)) {
  			unsigned int prev_score, score;
  
  			if (proactive_defer) {
  				proactive_defer--;
  				continue;
  			}
  			prev_score = fragmentation_score_node(pgdat);
  			proactive_compact_node(pgdat);
  			score = fragmentation_score_node(pgdat);
  			/*
  			 * Defer proactive compaction if the fragmentation
  			 * score did not go down i.e. no progress made.
  			 */
  			proactive_defer = score < prev_score ?
  					0 : 1 << COMPACT_MAX_DEFER_SHIFT;
  		}
698b1b306   Vlastimil Babka   mm, compaction: i...
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
  	}
  
  	return 0;
  }
  
  /*
   * This kcompactd start function will be called by init and node-hot-add.
   * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
   */
  int kcompactd_run(int nid)
  {
  	pg_data_t *pgdat = NODE_DATA(nid);
  	int ret = 0;
  
  	if (pgdat->kcompactd)
  		return 0;
  
  	pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
  	if (IS_ERR(pgdat->kcompactd)) {
  		pr_err("Failed to start kcompactd on node %d
  ", nid);
  		ret = PTR_ERR(pgdat->kcompactd);
  		pgdat->kcompactd = NULL;
  	}
  	return ret;
  }
  
  /*
   * Called by memory hotplug when all memory in a node is offlined. Caller must
   * hold mem_hotplug_begin/end().
   */
  void kcompactd_stop(int nid)
  {
  	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
  
  	if (kcompactd) {
  		kthread_stop(kcompactd);
  		NODE_DATA(nid)->kcompactd = NULL;
  	}
  }
  
  /*
   * It's optimal to keep kcompactd on the same CPUs as their memory, but
   * not required for correctness. So if the last cpu in a node goes
   * away, we get changed to run anywhere: as the first one comes back,
   * restore their cpu bindings.
   */
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2759
  static int kcompactd_cpu_online(unsigned int cpu)
698b1b306   Vlastimil Babka   mm, compaction: i...
2760
2761
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2762
2763
2764
  	for_each_node_state(nid, N_MEMORY) {
  		pg_data_t *pgdat = NODE_DATA(nid);
  		const struct cpumask *mask;
698b1b306   Vlastimil Babka   mm, compaction: i...
2765

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2766
  		mask = cpumask_of_node(pgdat->node_id);
698b1b306   Vlastimil Babka   mm, compaction: i...
2767

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2768
2769
2770
  		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
  			/* One of our CPUs online: restore mask */
  			set_cpus_allowed_ptr(pgdat->kcompactd, mask);
698b1b306   Vlastimil Babka   mm, compaction: i...
2771
  	}
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2772
  	return 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
2773
2774
2775
2776
2777
  }
  
  static int __init kcompactd_init(void)
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
  	int ret;
  
  	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  					"mm/compaction:online",
  					kcompactd_cpu_online, NULL);
  	if (ret < 0) {
  		pr_err("kcompactd: failed to register hotplug callbacks.
  ");
  		return ret;
  	}
698b1b306   Vlastimil Babka   mm, compaction: i...
2788
2789
2790
  
  	for_each_node_state(nid, N_MEMORY)
  		kcompactd_run(nid);
698b1b306   Vlastimil Babka   mm, compaction: i...
2791
2792
2793
  	return 0;
  }
  subsys_initcall(kcompactd_init)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2794
  #endif /* CONFIG_COMPACTION */