Blame view

mm/compaction.c 74.8 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
748446bb6   Mel Gorman   mm: compaction: m...
2
3
4
5
6
7
8
9
10
  /*
   * linux/mm/compaction.c
   *
   * Memory compaction for the reduction of external fragmentation. Note that
   * this heavily depends upon page migration to do all the real heavy
   * lifting
   *
   * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
   */
698b1b306   Vlastimil Babka   mm, compaction: i...
11
  #include <linux/cpu.h>
748446bb6   Mel Gorman   mm: compaction: m...
12
13
14
15
  #include <linux/swap.h>
  #include <linux/migrate.h>
  #include <linux/compaction.h>
  #include <linux/mm_inline.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
16
  #include <linux/sched/signal.h>
748446bb6   Mel Gorman   mm: compaction: m...
17
  #include <linux/backing-dev.h>
76ab0f530   Mel Gorman   mm: compaction: a...
18
  #include <linux/sysctl.h>
ed4a6d7f0   Mel Gorman   mm: compaction: a...
19
  #include <linux/sysfs.h>
194159fbc   Minchan Kim   mm: remove MIGRAT...
20
  #include <linux/page-isolation.h>
b8c73fc24   Andrey Ryabinin   mm: page_alloc: a...
21
  #include <linux/kasan.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
22
23
  #include <linux/kthread.h>
  #include <linux/freezer.h>
83358ece2   Joonsoo Kim   mm/page_owner: in...
24
  #include <linux/page_owner.h>
eb414681d   Johannes Weiner   psi: pressure sta...
25
  #include <linux/psi.h>
748446bb6   Mel Gorman   mm: compaction: m...
26
  #include "internal.h"
010fc29a4   Minchan Kim   compaction: fix b...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  #ifdef CONFIG_COMPACTION
  static inline void count_compact_event(enum vm_event_item item)
  {
  	count_vm_event(item);
  }
  
  static inline void count_compact_events(enum vm_event_item item, long delta)
  {
  	count_vm_events(item, delta);
  }
  #else
  #define count_compact_event(item) do { } while (0)
  #define count_compact_events(item, delta) do { } while (0)
  #endif
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
41
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b7aba6984   Mel Gorman   mm: compaction: a...
42
43
  #define CREATE_TRACE_POINTS
  #include <trace/events/compaction.h>
06b6640a3   Vlastimil Babka   mm, compaction: w...
44
45
46
47
  #define block_start_pfn(pfn, order)	round_down(pfn, 1UL << (order))
  #define block_end_pfn(pfn, order)	ALIGN((pfn) + 1, 1UL << (order))
  #define pageblock_start_pfn(pfn)	block_start_pfn(pfn, pageblock_order)
  #define pageblock_end_pfn(pfn)		block_end_pfn(pfn, pageblock_order)
748446bb6   Mel Gorman   mm: compaction: m...
48
49
50
  static unsigned long release_freepages(struct list_head *freelist)
  {
  	struct page *page, *next;
6bace090a   Vlastimil Babka   mm, compaction: a...
51
  	unsigned long high_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
52
53
  
  	list_for_each_entry_safe(page, next, freelist, lru) {
6bace090a   Vlastimil Babka   mm, compaction: a...
54
  		unsigned long pfn = page_to_pfn(page);
748446bb6   Mel Gorman   mm: compaction: m...
55
56
  		list_del(&page->lru);
  		__free_page(page);
6bace090a   Vlastimil Babka   mm, compaction: a...
57
58
  		if (pfn > high_pfn)
  			high_pfn = pfn;
748446bb6   Mel Gorman   mm: compaction: m...
59
  	}
6bace090a   Vlastimil Babka   mm, compaction: a...
60
  	return high_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
61
  }
4469ab984   Mel Gorman   mm, compaction: r...
62
  static void split_map_pages(struct list_head *list)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
63
  {
66c64223a   Joonsoo Kim   mm/compaction: sp...
64
65
66
67
68
69
70
71
72
  	unsigned int i, order, nr_pages;
  	struct page *page, *next;
  	LIST_HEAD(tmp_list);
  
  	list_for_each_entry_safe(page, next, list, lru) {
  		list_del(&page->lru);
  
  		order = page_private(page);
  		nr_pages = 1 << order;
66c64223a   Joonsoo Kim   mm/compaction: sp...
73

46f24fd85   Joonsoo Kim   mm/page_alloc: in...
74
  		post_alloc_hook(page, order, __GFP_MOVABLE);
66c64223a   Joonsoo Kim   mm/compaction: sp...
75
76
  		if (order)
  			split_page(page, order);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
77

66c64223a   Joonsoo Kim   mm/compaction: sp...
78
79
80
81
  		for (i = 0; i < nr_pages; i++) {
  			list_add(&page->lru, &tmp_list);
  			page++;
  		}
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
82
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
83
84
  
  	list_splice(&tmp_list, list);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
85
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
86
  #ifdef CONFIG_COMPACTION
24e2716f6   Joonsoo Kim   mm/compaction: ad...
87

bda807d44   Minchan Kim   mm: migrate: supp...
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  int PageMovable(struct page *page)
  {
  	struct address_space *mapping;
  
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	if (!__PageMovable(page))
  		return 0;
  
  	mapping = page_mapping(page);
  	if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
  		return 1;
  
  	return 0;
  }
  EXPORT_SYMBOL(PageMovable);
  
  void __SetPageMovable(struct page *page, struct address_space *mapping)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
  	page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__SetPageMovable);
  
  void __ClearPageMovable(struct page *page)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	/*
  	 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
  	 * flag so that VM can catch up released page by driver after isolation.
  	 * With it, VM migration doesn't try to put it back.
  	 */
  	page->mapping = (void *)((unsigned long)page->mapping &
  				PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__ClearPageMovable);
24e2716f6   Joonsoo Kim   mm/compaction: ad...
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
  /* Do not skip compaction more than 64 times */
  #define COMPACT_MAX_DEFER_SHIFT 6
  
  /*
   * Compaction is deferred when compaction fails to result in a page
   * allocation success. 1 << compact_defer_limit compactions are skipped up
   * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
   */
  void defer_compaction(struct zone *zone, int order)
  {
  	zone->compact_considered = 0;
  	zone->compact_defer_shift++;
  
  	if (order < zone->compact_order_failed)
  		zone->compact_order_failed = order;
  
  	if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
  		zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
  
  	trace_mm_compaction_defer_compaction(zone, order);
  }
  
  /* Returns true if compaction should be skipped this time */
  bool compaction_deferred(struct zone *zone, int order)
  {
  	unsigned long defer_limit = 1UL << zone->compact_defer_shift;
  
  	if (order < zone->compact_order_failed)
  		return false;
  
  	/* Avoid possible overflow */
  	if (++zone->compact_considered > defer_limit)
  		zone->compact_considered = defer_limit;
  
  	if (zone->compact_considered >= defer_limit)
  		return false;
  
  	trace_mm_compaction_deferred(zone, order);
  
  	return true;
  }
  
  /*
   * Update defer tracking counters after successful compaction of given order,
   * which means an allocation either succeeded (alloc_success == true) or is
   * expected to succeed.
   */
  void compaction_defer_reset(struct zone *zone, int order,
  		bool alloc_success)
  {
  	if (alloc_success) {
  		zone->compact_considered = 0;
  		zone->compact_defer_shift = 0;
  	}
  	if (order >= zone->compact_order_failed)
  		zone->compact_order_failed = order + 1;
  
  	trace_mm_compaction_defer_reset(zone, order);
  }
  
  /* Returns true if restarting compaction after many failures */
  bool compaction_restarting(struct zone *zone, int order)
  {
  	if (order < zone->compact_order_failed)
  		return false;
  
  	return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
  		zone->compact_considered >= 1UL << zone->compact_defer_shift;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
194
195
196
197
198
199
200
201
202
  /* Returns true if the pageblock should be scanned for pages to isolate. */
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	if (cc->ignore_skip_hint)
  		return true;
  
  	return !get_pageblock_skip(page);
  }
02333641e   Vlastimil Babka   mm, compaction: e...
203
204
205
206
  static void reset_cached_positions(struct zone *zone)
  {
  	zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
  	zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
623446e4d   Joonsoo Kim   mm/compaction: fi...
207
  	zone->compact_cached_free_pfn =
06b6640a3   Vlastimil Babka   mm, compaction: w...
208
  				pageblock_start_pfn(zone_end_pfn(zone) - 1);
02333641e   Vlastimil Babka   mm, compaction: e...
209
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
210
  /*
b527cfe5b   Vlastimil Babka   mm, compaction: e...
211
212
213
   * Compound pages of >= pageblock_order should consistenly be skipped until
   * released. It is always pointless to compact pages of such order (if they are
   * migratable), and the pageblocks they occupy cannot contain any free pages.
21dc7e023   David Rientjes   mm, compaction: p...
214
   */
b527cfe5b   Vlastimil Babka   mm, compaction: e...
215
  static bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
216
  {
b527cfe5b   Vlastimil Babka   mm, compaction: e...
217
  	if (!PageCompound(page))
21dc7e023   David Rientjes   mm, compaction: p...
218
  		return false;
b527cfe5b   Vlastimil Babka   mm, compaction: e...
219
220
221
222
223
224
225
  
  	page = compound_head(page);
  
  	if (compound_order(page) >= pageblock_order)
  		return true;
  
  	return false;
21dc7e023   David Rientjes   mm, compaction: p...
226
  }
e332f741a   Mel Gorman   mm, compaction: b...
227
228
229
230
231
  static bool
  __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
  							bool check_target)
  {
  	struct page *page = pfn_to_online_page(pfn);
6b0868c82   Mel Gorman   mm/compaction.c: ...
232
  	struct page *block_page;
e332f741a   Mel Gorman   mm, compaction: b...
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  	struct page *end_page;
  	unsigned long block_pfn;
  
  	if (!page)
  		return false;
  	if (zone != page_zone(page))
  		return false;
  	if (pageblock_skip_persistent(page))
  		return false;
  
  	/*
  	 * If skip is already cleared do no further checking once the
  	 * restart points have been set.
  	 */
  	if (check_source && check_target && !get_pageblock_skip(page))
  		return true;
  
  	/*
  	 * If clearing skip for the target scanner, do not select a
  	 * non-movable pageblock as the starting point.
  	 */
  	if (!check_source && check_target &&
  	    get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
  		return false;
6b0868c82   Mel Gorman   mm/compaction.c: ...
257
258
  	/* Ensure the start of the pageblock or zone is online and valid */
  	block_pfn = pageblock_start_pfn(pfn);
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
259
260
  	block_pfn = max(block_pfn, zone->zone_start_pfn);
  	block_page = pfn_to_online_page(block_pfn);
6b0868c82   Mel Gorman   mm/compaction.c: ...
261
262
263
264
265
266
  	if (block_page) {
  		page = block_page;
  		pfn = block_pfn;
  	}
  
  	/* Ensure the end of the pageblock or zone is online and valid */
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
267
  	block_pfn = pageblock_end_pfn(pfn) - 1;
6b0868c82   Mel Gorman   mm/compaction.c: ...
268
269
270
271
  	block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
  	end_page = pfn_to_online_page(block_pfn);
  	if (!end_page)
  		return false;
e332f741a   Mel Gorman   mm, compaction: b...
272
273
274
275
276
  	/*
  	 * Only clear the hint if a sample indicates there is either a
  	 * free page or an LRU page in the block. One or other condition
  	 * is necessary for the block to be a migration source/target.
  	 */
e332f741a   Mel Gorman   mm, compaction: b...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
  	do {
  		if (pfn_valid_within(pfn)) {
  			if (check_source && PageLRU(page)) {
  				clear_pageblock_skip(page);
  				return true;
  			}
  
  			if (check_target && PageBuddy(page)) {
  				clear_pageblock_skip(page);
  				return true;
  			}
  		}
  
  		page += (1 << PAGE_ALLOC_COSTLY_ORDER);
  		pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
a2e9a5afc   Vlastimil Babka   mm, compaction: f...
292
  	} while (page <= end_page);
e332f741a   Mel Gorman   mm, compaction: b...
293
294
295
  
  	return false;
  }
21dc7e023   David Rientjes   mm, compaction: p...
296
  /*
bb13ffeb9   Mel Gorman   mm: compaction: c...
297
298
299
300
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
   * meet.
   */
62997027c   Mel Gorman   mm: compaction: c...
301
  static void __reset_isolation_suitable(struct zone *zone)
bb13ffeb9   Mel Gorman   mm: compaction: c...
302
  {
e332f741a   Mel Gorman   mm, compaction: b...
303
  	unsigned long migrate_pfn = zone->zone_start_pfn;
6b0868c82   Mel Gorman   mm/compaction.c: ...
304
  	unsigned long free_pfn = zone_end_pfn(zone) - 1;
e332f741a   Mel Gorman   mm, compaction: b...
305
306
307
308
309
310
311
  	unsigned long reset_migrate = free_pfn;
  	unsigned long reset_free = migrate_pfn;
  	bool source_set = false;
  	bool free_set = false;
  
  	if (!zone->compact_blockskip_flush)
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
312

62997027c   Mel Gorman   mm: compaction: c...
313
  	zone->compact_blockskip_flush = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
314

e332f741a   Mel Gorman   mm, compaction: b...
315
316
317
318
319
320
321
322
  	/*
  	 * Walk the zone and update pageblock skip information. Source looks
  	 * for PageLRU while target looks for PageBuddy. When the scanner
  	 * is found, both PageBuddy and PageLRU are checked as the pageblock
  	 * is suitable as both source and target.
  	 */
  	for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
  					free_pfn -= pageblock_nr_pages) {
bb13ffeb9   Mel Gorman   mm: compaction: c...
323
  		cond_resched();
e332f741a   Mel Gorman   mm, compaction: b...
324
325
326
327
328
329
330
331
332
  		/* Update the migrate PFN */
  		if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
  		    migrate_pfn < reset_migrate) {
  			source_set = true;
  			reset_migrate = migrate_pfn;
  			zone->compact_init_migrate_pfn = reset_migrate;
  			zone->compact_cached_migrate_pfn[0] = reset_migrate;
  			zone->compact_cached_migrate_pfn[1] = reset_migrate;
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
333

e332f741a   Mel Gorman   mm, compaction: b...
334
335
336
337
338
339
340
341
  		/* Update the free PFN */
  		if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
  		    free_pfn > reset_free) {
  			free_set = true;
  			reset_free = free_pfn;
  			zone->compact_init_free_pfn = reset_free;
  			zone->compact_cached_free_pfn = reset_free;
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
342
  	}
02333641e   Vlastimil Babka   mm, compaction: e...
343

e332f741a   Mel Gorman   mm, compaction: b...
344
345
346
347
348
349
  	/* Leave no distance if no suitable block was reset */
  	if (reset_migrate >= reset_free) {
  		zone->compact_cached_migrate_pfn[0] = migrate_pfn;
  		zone->compact_cached_migrate_pfn[1] = migrate_pfn;
  		zone->compact_cached_free_pfn = free_pfn;
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
350
  }
62997027c   Mel Gorman   mm: compaction: c...
351
352
353
354
355
356
357
358
359
360
361
362
363
364
  void reset_isolation_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		/* Only flush if a full compaction finished recently */
  		if (zone->compact_blockskip_flush)
  			__reset_isolation_suitable(zone);
  	}
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
365
  /*
e380bebe4   Mel Gorman   mm, compaction: k...
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
   * Sets the pageblock skip bit if it was clear. Note that this is a hint as
   * locks are not required for read/writers. Returns true if it was already set.
   */
  static bool test_and_set_skip(struct compact_control *cc, struct page *page,
  							unsigned long pfn)
  {
  	bool skip;
  
  	/* Do no update if skip hint is being ignored */
  	if (cc->ignore_skip_hint)
  		return false;
  
  	if (!IS_ALIGNED(pfn, pageblock_nr_pages))
  		return false;
  
  	skip = get_pageblock_skip(page);
  	if (!skip && !cc->no_set_skip_hint)
  		set_pageblock_skip(page);
  
  	return skip;
  }
  
  static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
  {
  	struct zone *zone = cc->zone;
  
  	pfn = pageblock_end_pfn(pfn);
  
  	/* Set for isolation rather than compaction */
  	if (cc->no_set_skip_hint)
  		return;
  
  	if (pfn > zone->compact_cached_migrate_pfn[0])
  		zone->compact_cached_migrate_pfn[0] = pfn;
  	if (cc->mode != MIGRATE_ASYNC &&
  	    pfn > zone->compact_cached_migrate_pfn[1])
  		zone->compact_cached_migrate_pfn[1] = pfn;
  }
  
  /*
bb13ffeb9   Mel Gorman   mm: compaction: c...
406
   * If no pages were isolated then mark this pageblock to be skipped in the
62997027c   Mel Gorman   mm: compaction: c...
407
   * future. The information is later cleared by __reset_isolation_suitable().
bb13ffeb9   Mel Gorman   mm: compaction: c...
408
   */
c89511ab2   Mel Gorman   mm: compaction: R...
409
  static void update_pageblock_skip(struct compact_control *cc,
d097a6f63   Mel Gorman   mm, compaction: r...
410
  			struct page *page, unsigned long pfn)
bb13ffeb9   Mel Gorman   mm: compaction: c...
411
  {
c89511ab2   Mel Gorman   mm: compaction: R...
412
  	struct zone *zone = cc->zone;
6815bf3f2   Joonsoo Kim   mm/compaction: re...
413

2583d6713   Vlastimil Babka   mm, compaction: s...
414
  	if (cc->no_set_skip_hint)
6815bf3f2   Joonsoo Kim   mm/compaction: re...
415
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
416
417
  	if (!page)
  		return;
edc2ca612   Vlastimil Babka   mm, compaction: m...
418
  	set_pageblock_skip(page);
c89511ab2   Mel Gorman   mm: compaction: R...
419

35979ef33   David Rientjes   mm, compaction: a...
420
  	/* Update where async and sync compaction should restart */
e380bebe4   Mel Gorman   mm, compaction: k...
421
422
  	if (pfn < zone->compact_cached_free_pfn)
  		zone->compact_cached_free_pfn = pfn;
bb13ffeb9   Mel Gorman   mm: compaction: c...
423
424
425
426
427
428
429
  }
  #else
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	return true;
  }
b527cfe5b   Vlastimil Babka   mm, compaction: e...
430
  static inline bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
431
432
433
434
435
  {
  	return false;
  }
  
  static inline void update_pageblock_skip(struct compact_control *cc,
d097a6f63   Mel Gorman   mm, compaction: r...
436
  			struct page *page, unsigned long pfn)
bb13ffeb9   Mel Gorman   mm: compaction: c...
437
438
  {
  }
e380bebe4   Mel Gorman   mm, compaction: k...
439
440
441
442
443
444
445
446
447
448
  
  static void update_cached_migrate(struct compact_control *cc, unsigned long pfn)
  {
  }
  
  static bool test_and_set_skip(struct compact_control *cc, struct page *page,
  							unsigned long pfn)
  {
  	return false;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
449
  #endif /* CONFIG_COMPACTION */
8b44d2791   Vlastimil Babka   mm, compaction: p...
450
451
  /*
   * Compaction requires the taking of some coarse locks that are potentially
cb2dcaf02   Mel Gorman   mm, compaction: f...
452
453
454
455
   * very heavily contended. For async compaction, trylock and record if the
   * lock is contended. The lock will still be acquired but compaction will
   * abort when the current block is finished regardless of success rate.
   * Sync compaction acquires the lock.
8b44d2791   Vlastimil Babka   mm, compaction: p...
456
   *
cb2dcaf02   Mel Gorman   mm, compaction: f...
457
   * Always returns true which makes it easier to track lock state in callers.
8b44d2791   Vlastimil Babka   mm, compaction: p...
458
   */
cb2dcaf02   Mel Gorman   mm, compaction: f...
459
  static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
8b44d2791   Vlastimil Babka   mm, compaction: p...
460
  						struct compact_control *cc)
2a1402aa0   Mel Gorman   mm: compaction: a...
461
  {
cb2dcaf02   Mel Gorman   mm, compaction: f...
462
463
464
465
466
467
  	/* Track if the lock is contended in async mode */
  	if (cc->mode == MIGRATE_ASYNC && !cc->contended) {
  		if (spin_trylock_irqsave(lock, *flags))
  			return true;
  
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
468
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
469

cb2dcaf02   Mel Gorman   mm, compaction: f...
470
  	spin_lock_irqsave(lock, *flags);
8b44d2791   Vlastimil Babka   mm, compaction: p...
471
  	return true;
2a1402aa0   Mel Gorman   mm: compaction: a...
472
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
473
  /*
c67fe3752   Mel Gorman   mm: compaction: A...
474
   * Compaction requires the taking of some coarse locks that are potentially
8b44d2791   Vlastimil Babka   mm, compaction: p...
475
476
477
478
479
480
481
   * very heavily contended. The lock should be periodically unlocked to avoid
   * having disabled IRQs for a long time, even when there is nobody waiting on
   * the lock. It might also be that allowing the IRQs will result in
   * need_resched() becoming true. If scheduling is needed, async compaction
   * aborts. Sync compaction schedules.
   * Either compaction type will also abort if a fatal signal is pending.
   * In either case if the lock was locked, it is dropped and not regained.
c67fe3752   Mel Gorman   mm: compaction: A...
482
   *
8b44d2791   Vlastimil Babka   mm, compaction: p...
483
484
485
486
   * Returns true if compaction should abort due to fatal signal pending, or
   *		async compaction due to need_resched()
   * Returns false when compaction can continue (sync compaction might have
   *		scheduled)
c67fe3752   Mel Gorman   mm: compaction: A...
487
   */
8b44d2791   Vlastimil Babka   mm, compaction: p...
488
489
  static bool compact_unlock_should_abort(spinlock_t *lock,
  		unsigned long flags, bool *locked, struct compact_control *cc)
c67fe3752   Mel Gorman   mm: compaction: A...
490
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
491
492
493
494
  	if (*locked) {
  		spin_unlock_irqrestore(lock, flags);
  		*locked = false;
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
495

8b44d2791   Vlastimil Babka   mm, compaction: p...
496
  	if (fatal_signal_pending(current)) {
c3486f537   Vlastimil Babka   mm, compaction: s...
497
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
498
499
  		return true;
  	}
c67fe3752   Mel Gorman   mm: compaction: A...
500

cf66f0700   Mel Gorman   mm, compaction: d...
501
  	cond_resched();
be9765722   Vlastimil Babka   mm, compaction: p...
502
503
504
  
  	return false;
  }
c67fe3752   Mel Gorman   mm: compaction: A...
505
  /*
9e4be4708   Jerome Marchand   mm/compaction.c: ...
506
507
508
   * Isolate free pages onto a private freelist. If @strict is true, will abort
   * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
   * (even though it may still end up isolating some pages).
85aa125f0   Michal Nazarewicz   mm: compaction: i...
509
   */
f40d1e42b   Mel Gorman   mm: compaction: a...
510
  static unsigned long isolate_freepages_block(struct compact_control *cc,
e14c720ef   Vlastimil Babka   mm, compaction: r...
511
  				unsigned long *start_pfn,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
512
513
  				unsigned long end_pfn,
  				struct list_head *freelist,
4fca9730c   Mel Gorman   mm, compaction: s...
514
  				unsigned int stride,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
515
  				bool strict)
748446bb6   Mel Gorman   mm: compaction: m...
516
  {
b7aba6984   Mel Gorman   mm: compaction: a...
517
  	int nr_scanned = 0, total_isolated = 0;
d097a6f63   Mel Gorman   mm, compaction: r...
518
  	struct page *cursor;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
519
  	unsigned long flags = 0;
f40d1e42b   Mel Gorman   mm: compaction: a...
520
  	bool locked = false;
e14c720ef   Vlastimil Babka   mm, compaction: r...
521
  	unsigned long blockpfn = *start_pfn;
66c64223a   Joonsoo Kim   mm/compaction: sp...
522
  	unsigned int order;
748446bb6   Mel Gorman   mm: compaction: m...
523

4fca9730c   Mel Gorman   mm, compaction: s...
524
525
526
  	/* Strict mode is for isolation, speed is secondary */
  	if (strict)
  		stride = 1;
748446bb6   Mel Gorman   mm: compaction: m...
527
  	cursor = pfn_to_page(blockpfn);
f40d1e42b   Mel Gorman   mm: compaction: a...
528
  	/* Isolate free pages. */
4fca9730c   Mel Gorman   mm, compaction: s...
529
  	for (; blockpfn < end_pfn; blockpfn += stride, cursor += stride) {
66c64223a   Joonsoo Kim   mm/compaction: sp...
530
  		int isolated;
748446bb6   Mel Gorman   mm: compaction: m...
531
  		struct page *page = cursor;
8b44d2791   Vlastimil Babka   mm, compaction: p...
532
533
534
535
536
537
538
539
540
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort if fatal signal
  		 * pending or async compaction detects need_resched()
  		 */
  		if (!(blockpfn % SWAP_CLUSTER_MAX)
  		    && compact_unlock_should_abort(&cc->zone->lock, flags,
  								&locked, cc))
  			break;
b7aba6984   Mel Gorman   mm: compaction: a...
541
  		nr_scanned++;
f40d1e42b   Mel Gorman   mm: compaction: a...
542
  		if (!pfn_valid_within(blockpfn))
2af120bc0   Laura Abbott   mm/compaction: br...
543
  			goto isolate_fail;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
544
545
546
547
548
549
550
  		/*
  		 * For compound pages such as THP and hugetlbfs, we can save
  		 * potentially a lot of iterations if we skip them at once.
  		 * The check is racy, but we can consider only valid values
  		 * and the only danger is skipping too much.
  		 */
  		if (PageCompound(page)) {
21dc7e023   David Rientjes   mm, compaction: p...
551
  			const unsigned int order = compound_order(page);
d3c85bad8   Vlastimil Babka   mm, compaction: r...
552
  			if (likely(order < MAX_ORDER)) {
21dc7e023   David Rientjes   mm, compaction: p...
553
554
  				blockpfn += (1UL << order) - 1;
  				cursor += (1UL << order) - 1;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
555
  			}
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
556
557
  			goto isolate_fail;
  		}
f40d1e42b   Mel Gorman   mm: compaction: a...
558
  		if (!PageBuddy(page))
2af120bc0   Laura Abbott   mm/compaction: br...
559
  			goto isolate_fail;
f40d1e42b   Mel Gorman   mm: compaction: a...
560
561
  
  		/*
69b7189f1   Vlastimil Babka   mm, compaction: s...
562
563
564
565
566
  		 * If we already hold the lock, we can skip some rechecking.
  		 * Note that if we hold the lock now, checked_pageblock was
  		 * already set in some previous iteration (or strict is true),
  		 * so it is correct to skip the suitable migration target
  		 * recheck as well.
f40d1e42b   Mel Gorman   mm: compaction: a...
567
  		 */
69b7189f1   Vlastimil Babka   mm, compaction: s...
568
  		if (!locked) {
cb2dcaf02   Mel Gorman   mm, compaction: f...
569
  			locked = compact_lock_irqsave(&cc->zone->lock,
8b44d2791   Vlastimil Babka   mm, compaction: p...
570
  								&flags, cc);
f40d1e42b   Mel Gorman   mm: compaction: a...
571

69b7189f1   Vlastimil Babka   mm, compaction: s...
572
573
574
575
  			/* Recheck this is a buddy page under lock */
  			if (!PageBuddy(page))
  				goto isolate_fail;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
576

66c64223a   Joonsoo Kim   mm/compaction: sp...
577
578
579
  		/* Found a free page, will break it into order-0 pages */
  		order = page_order(page);
  		isolated = __isolate_free_page(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
580
581
  		if (!isolated)
  			break;
66c64223a   Joonsoo Kim   mm/compaction: sp...
582
  		set_page_private(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
583

748446bb6   Mel Gorman   mm: compaction: m...
584
  		total_isolated += isolated;
a4f04f2c6   David Rientjes   mm, compaction: a...
585
  		cc->nr_freepages += isolated;
66c64223a   Joonsoo Kim   mm/compaction: sp...
586
  		list_add_tail(&page->lru, freelist);
a4f04f2c6   David Rientjes   mm, compaction: a...
587
588
589
  		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
  			blockpfn += isolated;
  			break;
748446bb6   Mel Gorman   mm: compaction: m...
590
  		}
a4f04f2c6   David Rientjes   mm, compaction: a...
591
592
593
594
  		/* Advance to the end of split page */
  		blockpfn += isolated - 1;
  		cursor += isolated - 1;
  		continue;
2af120bc0   Laura Abbott   mm/compaction: br...
595
596
597
598
599
600
  
  isolate_fail:
  		if (strict)
  			break;
  		else
  			continue;
748446bb6   Mel Gorman   mm: compaction: m...
601
  	}
a4f04f2c6   David Rientjes   mm, compaction: a...
602
603
  	if (locked)
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
604
605
606
607
608
609
  	/*
  	 * There is a tiny chance that we have read bogus compound_order(),
  	 * so be careful to not go outside of the pageblock.
  	 */
  	if (unlikely(blockpfn > end_pfn))
  		blockpfn = end_pfn;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
610
611
  	trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
  					nr_scanned, total_isolated);
e14c720ef   Vlastimil Babka   mm, compaction: r...
612
613
  	/* Record how far we have got within the block */
  	*start_pfn = blockpfn;
f40d1e42b   Mel Gorman   mm: compaction: a...
614
615
616
617
618
  	/*
  	 * If strict isolation is requested by CMA then check that all the
  	 * pages requested were isolated. If there were any failures, 0 is
  	 * returned and CMA will fail.
  	 */
2af120bc0   Laura Abbott   mm/compaction: br...
619
  	if (strict && blockpfn < end_pfn)
f40d1e42b   Mel Gorman   mm: compaction: a...
620
  		total_isolated = 0;
7f354a548   David Rientjes   mm, compaction: a...
621
  	cc->total_free_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
622
  	if (total_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
623
  		count_compact_events(COMPACTISOLATED, total_isolated);
748446bb6   Mel Gorman   mm: compaction: m...
624
625
  	return total_isolated;
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
626
627
  /**
   * isolate_freepages_range() - isolate free pages.
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
628
   * @cc:        Compaction control structure.
85aa125f0   Michal Nazarewicz   mm: compaction: i...
629
630
631
632
633
634
635
636
637
638
639
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Non-free pages, invalid PFNs, or zone boundaries within the
   * [start_pfn, end_pfn) range are considered errors, cause function to
   * undo its actions and return zero.
   *
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater then end_pfn if end fell in a middle of
   * a free page).
   */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
640
  unsigned long
bb13ffeb9   Mel Gorman   mm: compaction: c...
641
642
  isolate_freepages_range(struct compact_control *cc,
  			unsigned long start_pfn, unsigned long end_pfn)
85aa125f0   Michal Nazarewicz   mm: compaction: i...
643
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
644
  	unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
645
  	LIST_HEAD(freelist);
7d49d8868   Vlastimil Babka   mm, compaction: r...
646
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
647
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
648
649
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
650
  	block_end_pfn = pageblock_end_pfn(pfn);
7d49d8868   Vlastimil Babka   mm, compaction: r...
651
652
  
  	for (; pfn < end_pfn; pfn += isolated,
e1409c325   Joonsoo Kim   mm/compaction: pa...
653
  				block_start_pfn = block_end_pfn,
7d49d8868   Vlastimil Babka   mm, compaction: r...
654
  				block_end_pfn += pageblock_nr_pages) {
e14c720ef   Vlastimil Babka   mm, compaction: r...
655
656
  		/* Protect pfn from changing by isolate_freepages_block */
  		unsigned long isolate_start_pfn = pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
657

85aa125f0   Michal Nazarewicz   mm: compaction: i...
658
  		block_end_pfn = min(block_end_pfn, end_pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
659
660
661
662
663
664
  		/*
  		 * pfn could pass the block_end_pfn if isolated freepage
  		 * is more than pageblock order. In this case, we adjust
  		 * scanning range to right one.
  		 */
  		if (pfn >= block_end_pfn) {
06b6640a3   Vlastimil Babka   mm, compaction: w...
665
666
  			block_start_pfn = pageblock_start_pfn(pfn);
  			block_end_pfn = pageblock_end_pfn(pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
667
668
  			block_end_pfn = min(block_end_pfn, end_pfn);
  		}
e1409c325   Joonsoo Kim   mm/compaction: pa...
669
670
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
7d49d8868   Vlastimil Babka   mm, compaction: r...
671
  			break;
e14c720ef   Vlastimil Babka   mm, compaction: r...
672
  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
4fca9730c   Mel Gorman   mm, compaction: s...
673
  					block_end_pfn, &freelist, 0, true);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
  
  		/*
  		 * In strict mode, isolate_freepages_block() returns 0 if
  		 * there are any holes in the block (ie. invalid PFNs or
  		 * non-free pages).
  		 */
  		if (!isolated)
  			break;
  
  		/*
  		 * If we managed to isolate pages, it is always (1 << n) *
  		 * pageblock_nr_pages for some non-negative n.  (Max order
  		 * page may span two pageblocks).
  		 */
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
689
  	/* __isolate_free_page() does not map the pages */
4469ab984   Mel Gorman   mm, compaction: r...
690
  	split_map_pages(&freelist);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
691
692
693
694
695
696
697
698
699
700
  
  	if (pfn < end_pfn) {
  		/* Loop terminated early, cleanup. */
  		release_freepages(&freelist);
  		return 0;
  	}
  
  	/* We don't use freelists for anything. */
  	return pfn;
  }
748446bb6   Mel Gorman   mm: compaction: m...
701
  /* Similar to reclaim, but different enough that they don't share logic */
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
702
  static bool too_many_isolated(pg_data_t *pgdat)
748446bb6   Mel Gorman   mm: compaction: m...
703
  {
bc6930457   Minchan Kim   mm: compaction: h...
704
  	unsigned long active, inactive, isolated;
748446bb6   Mel Gorman   mm: compaction: m...
705

5f438eee8   Andrey Ryabinin   mm/compaction: pa...
706
707
708
709
710
711
  	inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
  			node_page_state(pgdat, NR_INACTIVE_ANON);
  	active = node_page_state(pgdat, NR_ACTIVE_FILE) +
  			node_page_state(pgdat, NR_ACTIVE_ANON);
  	isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
  			node_page_state(pgdat, NR_ISOLATED_ANON);
748446bb6   Mel Gorman   mm: compaction: m...
712

bc6930457   Minchan Kim   mm: compaction: h...
713
  	return isolated > (inactive + active) / 2;
748446bb6   Mel Gorman   mm: compaction: m...
714
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
715
  /**
edc2ca612   Vlastimil Babka   mm, compaction: m...
716
717
   * isolate_migratepages_block() - isolate all migrate-able pages within
   *				  a single pageblock
2fe86e000   Michal Nazarewicz   mm: compaction: i...
718
   * @cc:		Compaction control structure.
edc2ca612   Vlastimil Babka   mm, compaction: m...
719
720
721
   * @low_pfn:	The first PFN to isolate
   * @end_pfn:	The one-past-the-last PFN to isolate, within same pageblock
   * @isolate_mode: Isolation mode to be used.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
722
723
   *
   * Isolate all pages that can be migrated from the range specified by
edc2ca612   Vlastimil Babka   mm, compaction: m...
724
725
726
727
   * [low_pfn, end_pfn). The range is expected to be within same pageblock.
   * Returns zero if there is a fatal signal pending, otherwise PFN of the
   * first page that was not scanned (which may be both less, equal to or more
   * than end_pfn).
2fe86e000   Michal Nazarewicz   mm: compaction: i...
728
   *
edc2ca612   Vlastimil Babka   mm, compaction: m...
729
730
731
   * The pages are isolated on cc->migratepages list (not required to be empty),
   * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
   * is neither read nor updated.
748446bb6   Mel Gorman   mm: compaction: m...
732
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
733
734
735
  static unsigned long
  isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  			unsigned long end_pfn, isolate_mode_t isolate_mode)
748446bb6   Mel Gorman   mm: compaction: m...
736
  {
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
737
  	pg_data_t *pgdat = cc->zone->zone_pgdat;
b7aba6984   Mel Gorman   mm: compaction: a...
738
  	unsigned long nr_scanned = 0, nr_isolated = 0;
fa9add641   Hugh Dickins   mm/memcg: apply a...
739
  	struct lruvec *lruvec;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
740
  	unsigned long flags = 0;
2a1402aa0   Mel Gorman   mm: compaction: a...
741
  	bool locked = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
742
  	struct page *page = NULL, *valid_page = NULL;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
743
  	unsigned long start_pfn = low_pfn;
fdd048e12   Vlastimil Babka   mm, compaction: s...
744
745
  	bool skip_on_failure = false;
  	unsigned long next_skip_pfn = 0;
e380bebe4   Mel Gorman   mm, compaction: k...
746
  	bool skip_updated = false;
748446bb6   Mel Gorman   mm: compaction: m...
747

748446bb6   Mel Gorman   mm: compaction: m...
748
749
750
751
752
  	/*
  	 * Ensure that there are not too many pages isolated from the LRU
  	 * list by either parallel reclaimers or compaction. If there are,
  	 * delay for some time until fewer pages are isolated
  	 */
5f438eee8   Andrey Ryabinin   mm/compaction: pa...
753
  	while (unlikely(too_many_isolated(pgdat))) {
f9e35b3b4   Mel Gorman   mm: compaction: a...
754
  		/* async migration should just abort */
e0b9daeb4   David Rientjes   mm, compaction: e...
755
  		if (cc->mode == MIGRATE_ASYNC)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
756
  			return 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
757

748446bb6   Mel Gorman   mm: compaction: m...
758
759
760
  		congestion_wait(BLK_RW_ASYNC, HZ/10);
  
  		if (fatal_signal_pending(current))
2fe86e000   Michal Nazarewicz   mm: compaction: i...
761
  			return 0;
748446bb6   Mel Gorman   mm: compaction: m...
762
  	}
cf66f0700   Mel Gorman   mm, compaction: d...
763
  	cond_resched();
aeef4b838   David Rientjes   mm, compaction: t...
764

fdd048e12   Vlastimil Babka   mm, compaction: s...
765
766
767
768
  	if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
  		skip_on_failure = true;
  		next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  	}
748446bb6   Mel Gorman   mm: compaction: m...
769
  	/* Time to isolate some pages for migration */
748446bb6   Mel Gorman   mm: compaction: m...
770
  	for (; low_pfn < end_pfn; low_pfn++) {
29c0dde83   Vlastimil Babka   mm, compaction: a...
771

fdd048e12   Vlastimil Babka   mm, compaction: s...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
  		if (skip_on_failure && low_pfn >= next_skip_pfn) {
  			/*
  			 * We have isolated all migration candidates in the
  			 * previous order-aligned block, and did not skip it due
  			 * to failure. We should migrate the pages now and
  			 * hopefully succeed compaction.
  			 */
  			if (nr_isolated)
  				break;
  
  			/*
  			 * We failed to isolate in the previous order-aligned
  			 * block. Set the new boundary to the end of the
  			 * current block. Note we can't simply increase
  			 * next_skip_pfn by 1 << order, as low_pfn might have
  			 * been incremented by a higher number due to skipping
  			 * a compound or a high-order buddy page in the
  			 * previous loop iteration.
  			 */
  			next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  		}
8b44d2791   Vlastimil Babka   mm, compaction: p...
793
794
  		/*
  		 * Periodically drop the lock (if held) regardless of its
670105a25   Mel Gorman   mm: compaction: a...
795
796
  		 * contention, to give chance to IRQs. Abort completely if
  		 * a fatal signal is pending.
8b44d2791   Vlastimil Babka   mm, compaction: p...
797
798
  		 */
  		if (!(low_pfn % SWAP_CLUSTER_MAX)
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
799
  		    && compact_unlock_should_abort(&pgdat->lru_lock,
670105a25   Mel Gorman   mm: compaction: a...
800
801
802
803
  					    flags, &locked, cc)) {
  			low_pfn = 0;
  			goto fatal_pending;
  		}
c67fe3752   Mel Gorman   mm: compaction: A...
804

748446bb6   Mel Gorman   mm: compaction: m...
805
  		if (!pfn_valid_within(low_pfn))
fdd048e12   Vlastimil Babka   mm, compaction: s...
806
  			goto isolate_fail;
b7aba6984   Mel Gorman   mm: compaction: a...
807
  		nr_scanned++;
748446bb6   Mel Gorman   mm: compaction: m...
808

748446bb6   Mel Gorman   mm: compaction: m...
809
  		page = pfn_to_page(low_pfn);
dc9086004   Mel Gorman   mm: compaction: c...
810

e380bebe4   Mel Gorman   mm, compaction: k...
811
812
813
814
815
816
817
818
819
820
821
  		/*
  		 * Check if the pageblock has already been marked skipped.
  		 * Only the aligned PFN is checked as the caller isolates
  		 * COMPACT_CLUSTER_MAX at a time so the second call must
  		 * not falsely conclude that the block should be skipped.
  		 */
  		if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) {
  			if (!cc->ignore_skip_hint && get_pageblock_skip(page)) {
  				low_pfn = end_pfn;
  				goto isolate_abort;
  			}
bb13ffeb9   Mel Gorman   mm: compaction: c...
822
  			valid_page = page;
e380bebe4   Mel Gorman   mm, compaction: k...
823
  		}
bb13ffeb9   Mel Gorman   mm: compaction: c...
824

6c14466cc   Mel Gorman   mm: improve docum...
825
  		/*
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
826
827
828
829
  		 * Skip if free. We read page order here without zone lock
  		 * which is generally unsafe, but the race window is small and
  		 * the worst thing that can happen is that we skip some
  		 * potential isolation targets.
6c14466cc   Mel Gorman   mm: improve docum...
830
  		 */
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
831
832
833
834
835
836
837
838
839
840
  		if (PageBuddy(page)) {
  			unsigned long freepage_order = page_order_unsafe(page);
  
  			/*
  			 * Without lock, we cannot be sure that what we got is
  			 * a valid page order. Consider only values in the
  			 * valid order range to prevent low_pfn overflow.
  			 */
  			if (freepage_order > 0 && freepage_order < MAX_ORDER)
  				low_pfn += (1UL << freepage_order) - 1;
748446bb6   Mel Gorman   mm: compaction: m...
841
  			continue;
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
842
  		}
748446bb6   Mel Gorman   mm: compaction: m...
843

9927af740   Mel Gorman   mm: compaction: p...
844
  		/*
29c0dde83   Vlastimil Babka   mm, compaction: a...
845
846
847
848
849
  		 * Regardless of being on LRU, compound pages such as THP and
  		 * hugetlbfs are not to be compacted. We can potentially save
  		 * a lot of iterations if we skip them at once. The check is
  		 * racy, but we can consider only valid values and the only
  		 * danger is skipping too much.
bc835011a   Andrea Arcangeli   thp: transhuge is...
850
  		 */
29c0dde83   Vlastimil Babka   mm, compaction: a...
851
  		if (PageCompound(page)) {
21dc7e023   David Rientjes   mm, compaction: p...
852
  			const unsigned int order = compound_order(page);
edc2ca612   Vlastimil Babka   mm, compaction: m...
853

d3c85bad8   Vlastimil Babka   mm, compaction: r...
854
  			if (likely(order < MAX_ORDER))
21dc7e023   David Rientjes   mm, compaction: p...
855
  				low_pfn += (1UL << order) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
856
  			goto isolate_fail;
2a1402aa0   Mel Gorman   mm: compaction: a...
857
  		}
bda807d44   Minchan Kim   mm: migrate: supp...
858
859
860
861
862
863
  		/*
  		 * Check may be lockless but that's ok as we recheck later.
  		 * It's possible to migrate LRU and non-lru movable pages.
  		 * Skip any other type of page
  		 */
  		if (!PageLRU(page)) {
bda807d44   Minchan Kim   mm: migrate: supp...
864
865
866
867
868
869
870
  			/*
  			 * __PageMovable can return false positive so we need
  			 * to verify it under page_lock.
  			 */
  			if (unlikely(__PageMovable(page)) &&
  					!PageIsolated(page)) {
  				if (locked) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
871
  					spin_unlock_irqrestore(&pgdat->lru_lock,
bda807d44   Minchan Kim   mm: migrate: supp...
872
873
874
  									flags);
  					locked = false;
  				}
9e5bcd610   Yisheng Xie   mm/migration: mak...
875
  				if (!isolate_movable_page(page, isolate_mode))
bda807d44   Minchan Kim   mm: migrate: supp...
876
877
  					goto isolate_success;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
878
  			goto isolate_fail;
bda807d44   Minchan Kim   mm: migrate: supp...
879
  		}
29c0dde83   Vlastimil Babka   mm, compaction: a...
880

119d6d59d   David Rientjes   mm, compaction: a...
881
882
883
884
885
886
887
  		/*
  		 * Migration will fail if an anonymous page is pinned in memory,
  		 * so avoid taking lru_lock and isolating it unnecessarily in an
  		 * admittedly racy check.
  		 */
  		if (!page_mapping(page) &&
  		    page_count(page) > page_mapcount(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
888
  			goto isolate_fail;
119d6d59d   David Rientjes   mm, compaction: a...
889

73e64c51a   Michal Hocko   mm, compaction: a...
890
891
892
893
894
895
  		/*
  		 * Only allow to migrate anonymous pages in GFP_NOFS context
  		 * because those do not depend on fs locks.
  		 */
  		if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
  			goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
896
897
  		/* If we already hold the lock, we can skip some rechecking */
  		if (!locked) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
898
  			locked = compact_lock_irqsave(&pgdat->lru_lock,
8b44d2791   Vlastimil Babka   mm, compaction: p...
899
  								&flags, cc);
e380bebe4   Mel Gorman   mm, compaction: k...
900

e380bebe4   Mel Gorman   mm, compaction: k...
901
902
903
904
905
906
  			/* Try get exclusive access under lock */
  			if (!skip_updated) {
  				skip_updated = true;
  				if (test_and_set_skip(cc, page, low_pfn))
  					goto isolate_abort;
  			}
2a1402aa0   Mel Gorman   mm: compaction: a...
907

29c0dde83   Vlastimil Babka   mm, compaction: a...
908
  			/* Recheck PageLRU and PageCompound under lock */
69b7189f1   Vlastimil Babka   mm, compaction: s...
909
  			if (!PageLRU(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
910
  				goto isolate_fail;
29c0dde83   Vlastimil Babka   mm, compaction: a...
911
912
913
914
915
916
917
  
  			/*
  			 * Page become compound since the non-locked check,
  			 * and it's on LRU. It can only be a THP so the order
  			 * is safe to read and it's 0 for tail pages.
  			 */
  			if (unlikely(PageCompound(page))) {
d8c6546b1   Matthew Wilcox (Oracle)   mm: introduce com...
918
  				low_pfn += compound_nr(page) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
919
  				goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
920
  			}
bc835011a   Andrea Arcangeli   thp: transhuge is...
921
  		}
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
922
  		lruvec = mem_cgroup_page_lruvec(page, pgdat);
fa9add641   Hugh Dickins   mm/memcg: apply a...
923

748446bb6   Mel Gorman   mm: compaction: m...
924
  		/* Try isolate the page */
edc2ca612   Vlastimil Babka   mm, compaction: m...
925
  		if (__isolate_lru_page(page, isolate_mode) != 0)
fdd048e12   Vlastimil Babka   mm, compaction: s...
926
  			goto isolate_fail;
748446bb6   Mel Gorman   mm: compaction: m...
927

29c0dde83   Vlastimil Babka   mm, compaction: a...
928
  		VM_BUG_ON_PAGE(PageCompound(page), page);
bc835011a   Andrea Arcangeli   thp: transhuge is...
929

748446bb6   Mel Gorman   mm: compaction: m...
930
  		/* Successfully isolated */
fa9add641   Hugh Dickins   mm/memcg: apply a...
931
  		del_page_from_lru_list(page, lruvec, page_lru(page));
6afcf8ef0   Ming Ling   mm, compaction: f...
932
933
  		inc_node_page_state(page,
  				NR_ISOLATED_ANON + page_is_file_cache(page));
b6c750163   Joonsoo Kim   mm/compaction: cl...
934
935
  
  isolate_success:
fdd048e12   Vlastimil Babka   mm, compaction: s...
936
  		list_add(&page->lru, &cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
937
  		cc->nr_migratepages++;
b7aba6984   Mel Gorman   mm: compaction: a...
938
  		nr_isolated++;
748446bb6   Mel Gorman   mm: compaction: m...
939

804d3121b   Mel Gorman   mm, compaction: a...
940
941
  		/*
  		 * Avoid isolating too much unless this block is being
cb2dcaf02   Mel Gorman   mm, compaction: f...
942
943
944
  		 * rescanned (e.g. dirty/writeback pages, parallel allocation)
  		 * or a lock is contended. For contention, isolate quickly to
  		 * potentially remove one source of contention.
804d3121b   Mel Gorman   mm, compaction: a...
945
  		 */
cb2dcaf02   Mel Gorman   mm, compaction: f...
946
947
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX &&
  		    !cc->rescan && !cc->contended) {
31b8384a5   Hillf Danton   mm: compaction: p...
948
  			++low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
949
  			break;
31b8384a5   Hillf Danton   mm: compaction: p...
950
  		}
fdd048e12   Vlastimil Babka   mm, compaction: s...
951
952
953
954
955
956
957
958
959
960
961
962
963
  
  		continue;
  isolate_fail:
  		if (!skip_on_failure)
  			continue;
  
  		/*
  		 * We have isolated some pages, but then failed. Release them
  		 * instead of migrating, as we cannot form the cc->order buddy
  		 * page anyway.
  		 */
  		if (nr_isolated) {
  			if (locked) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
964
  				spin_unlock_irqrestore(&pgdat->lru_lock, flags);
fdd048e12   Vlastimil Babka   mm, compaction: s...
965
966
  				locked = false;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
967
968
  			putback_movable_pages(&cc->migratepages);
  			cc->nr_migratepages = 0;
fdd048e12   Vlastimil Babka   mm, compaction: s...
969
970
971
972
973
974
975
976
977
978
979
  			nr_isolated = 0;
  		}
  
  		if (low_pfn < next_skip_pfn) {
  			low_pfn = next_skip_pfn - 1;
  			/*
  			 * The check near the loop beginning would have updated
  			 * next_skip_pfn too, but this is a bit simpler.
  			 */
  			next_skip_pfn += 1UL << cc->order;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
980
  	}
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
981
982
983
984
985
986
  	/*
  	 * The PageBuddy() check could have potentially brought us outside
  	 * the range to be scanned.
  	 */
  	if (unlikely(low_pfn > end_pfn))
  		low_pfn = end_pfn;
e380bebe4   Mel Gorman   mm, compaction: k...
987
  isolate_abort:
c67fe3752   Mel Gorman   mm: compaction: A...
988
  	if (locked)
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
989
  		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
748446bb6   Mel Gorman   mm: compaction: m...
990

50b5b094e   Vlastimil Babka   mm: compaction: d...
991
  	/*
804d3121b   Mel Gorman   mm, compaction: a...
992
993
994
995
996
997
  	 * Updated the cached scanner pfn once the pageblock has been scanned
  	 * Pages will either be migrated in which case there is no point
  	 * scanning in the near future or migration failed in which case the
  	 * failure reason may persist. The block is marked for skipping if
  	 * there were no pages isolated in the block or if the block is
  	 * rescanned twice in a row.
50b5b094e   Vlastimil Babka   mm: compaction: d...
998
  	 */
804d3121b   Mel Gorman   mm, compaction: a...
999
  	if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
e380bebe4   Mel Gorman   mm, compaction: k...
1000
1001
1002
1003
  		if (valid_page && !skip_updated)
  			set_pageblock_skip(valid_page);
  		update_cached_migrate(cc, low_pfn);
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
1004

e34d85f0e   Joonsoo Kim   mm/compaction: pr...
1005
1006
  	trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
  						nr_scanned, nr_isolated);
b7aba6984   Mel Gorman   mm: compaction: a...
1007

670105a25   Mel Gorman   mm: compaction: a...
1008
  fatal_pending:
7f354a548   David Rientjes   mm, compaction: a...
1009
  	cc->total_migrate_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
1010
  	if (nr_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
1011
  		count_compact_events(COMPACTISOLATED, nr_isolated);
397487db6   Mel Gorman   mm: compaction: A...
1012

2fe86e000   Michal Nazarewicz   mm: compaction: i...
1013
1014
  	return low_pfn;
  }
edc2ca612   Vlastimil Babka   mm, compaction: m...
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
  /**
   * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
   * @cc:        Compaction control structure.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Returns zero if isolation fails fatally due to e.g. pending signal.
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater than end_pfn if end fell in a middle of a THP page).
   */
  unsigned long
  isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
  							unsigned long end_pfn)
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1029
  	unsigned long pfn, block_start_pfn, block_end_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1030
1031
1032
  
  	/* Scan block by block. First and last block may be incomplete */
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1033
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1034
1035
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1036
  	block_end_pfn = pageblock_end_pfn(pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1037
1038
  
  	for (; pfn < end_pfn; pfn = block_end_pfn,
e1409c325   Joonsoo Kim   mm/compaction: pa...
1039
  				block_start_pfn = block_end_pfn,
edc2ca612   Vlastimil Babka   mm, compaction: m...
1040
1041
1042
  				block_end_pfn += pageblock_nr_pages) {
  
  		block_end_pfn = min(block_end_pfn, end_pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1043
1044
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1045
1046
1047
1048
  			continue;
  
  		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
  							ISOLATE_UNEVICTABLE);
14af4a5e9   Hugh Dickins   mm, cma: prevent ...
1049
  		if (!pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1050
  			break;
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
1051
1052
1053
  
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
  			break;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1054
  	}
edc2ca612   Vlastimil Babka   mm, compaction: m...
1055
1056
1057
  
  	return pfn;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1058
1059
  #endif /* CONFIG_COMPACTION || CONFIG_CMA */
  #ifdef CONFIG_COMPACTION
018e9a49a   Andrew Morton   mm/compaction.c: ...
1060

b682debd9   Vlastimil Babka   mm, compaction: c...
1061
1062
1063
  static bool suitable_migration_source(struct compact_control *cc,
  							struct page *page)
  {
282722b0d   Vlastimil Babka   mm, compaction: r...
1064
  	int block_mt;
9bebefd59   Mel Gorman   mm, compaction: c...
1065
1066
  	if (pageblock_skip_persistent(page))
  		return false;
282722b0d   Vlastimil Babka   mm, compaction: r...
1067
  	if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
b682debd9   Vlastimil Babka   mm, compaction: c...
1068
  		return true;
282722b0d   Vlastimil Babka   mm, compaction: r...
1069
1070
1071
1072
1073
1074
  	block_mt = get_pageblock_migratetype(page);
  
  	if (cc->migratetype == MIGRATE_MOVABLE)
  		return is_migrate_movable(block_mt);
  	else
  		return block_mt == cc->migratetype;
b682debd9   Vlastimil Babka   mm, compaction: c...
1075
  }
018e9a49a   Andrew Morton   mm/compaction.c: ...
1076
  /* Returns true if the page is within a block suitable for migration to */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1077
1078
  static bool suitable_migration_target(struct compact_control *cc,
  							struct page *page)
018e9a49a   Andrew Morton   mm/compaction.c: ...
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
  {
  	/* If the page is a large free page, then disallow migration */
  	if (PageBuddy(page)) {
  		/*
  		 * We are checking page_order without zone->lock taken. But
  		 * the only small danger is that we skip a potentially suitable
  		 * pageblock, so it's not worth to check order for valid range.
  		 */
  		if (page_order_unsafe(page) >= pageblock_order)
  			return false;
  	}
1ef36db2a   Yisheng Xie   mm/compaction: ig...
1090
1091
  	if (cc->ignore_block_suitable)
  		return true;
018e9a49a   Andrew Morton   mm/compaction.c: ...
1092
  	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
b682debd9   Vlastimil Babka   mm, compaction: c...
1093
  	if (is_migrate_movable(get_pageblock_migratetype(page)))
018e9a49a   Andrew Morton   mm/compaction.c: ...
1094
1095
1096
1097
1098
  		return true;
  
  	/* Otherwise skip the block */
  	return false;
  }
70b44595e   Mel Gorman   mm, compaction: u...
1099
1100
1101
  static inline unsigned int
  freelist_scan_limit(struct compact_control *cc)
  {
dd7ef7bd1   Qian Cai   mm/compaction.c: ...
1102
1103
1104
  	unsigned short shift = BITS_PER_LONG - 1;
  
  	return (COMPACT_CLUSTER_MAX >> min(shift, cc->fast_search_fail)) + 1;
70b44595e   Mel Gorman   mm, compaction: u...
1105
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1106
  /*
f2849aa09   Vlastimil Babka   mm, compaction: m...
1107
1108
1109
1110
1111
1112
1113
1114
   * Test whether the free scanner has reached the same or lower pageblock than
   * the migration scanner, and compaction should thus terminate.
   */
  static inline bool compact_scanners_met(struct compact_control *cc)
  {
  	return (cc->free_pfn >> pageblock_order)
  		<= (cc->migrate_pfn >> pageblock_order);
  }
5a811889d   Mel Gorman   mm, compaction: u...
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
  /*
   * Used when scanning for a suitable migration target which scans freelists
   * in reverse. Reorders the list such as the unscanned pages are scanned
   * first on the next iteration of the free scanner
   */
  static void
  move_freelist_head(struct list_head *freelist, struct page *freepage)
  {
  	LIST_HEAD(sublist);
  
  	if (!list_is_last(freelist, &freepage->lru)) {
  		list_cut_before(&sublist, freelist, &freepage->lru);
  		if (!list_empty(&sublist))
  			list_splice_tail(&sublist, freelist);
  	}
  }
  
  /*
   * Similar to move_freelist_head except used by the migration scanner
   * when scanning forward. It's possible for these list operations to
   * move against each other if they search the free list exactly in
   * lockstep.
   */
70b44595e   Mel Gorman   mm, compaction: u...
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
  static void
  move_freelist_tail(struct list_head *freelist, struct page *freepage)
  {
  	LIST_HEAD(sublist);
  
  	if (!list_is_first(freelist, &freepage->lru)) {
  		list_cut_position(&sublist, freelist, &freepage->lru);
  		if (!list_empty(&sublist))
  			list_splice_tail(&sublist, freelist);
  	}
  }
5a811889d   Mel Gorman   mm, compaction: u...
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
  static void
  fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
  {
  	unsigned long start_pfn, end_pfn;
  	struct page *page = pfn_to_page(pfn);
  
  	/* Do not search around if there are enough pages already */
  	if (cc->nr_freepages >= cc->nr_migratepages)
  		return;
  
  	/* Minimise scanning during async compaction */
  	if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
  		return;
  
  	/* Pageblock boundaries */
  	start_pfn = pageblock_start_pfn(pfn);
60fce36af   Mel Gorman   mm/compaction.c: ...
1165
  	end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1;
5a811889d   Mel Gorman   mm, compaction: u...
1166
1167
1168
  
  	/* Scan before */
  	if (start_pfn != pfn) {
4fca9730c   Mel Gorman   mm, compaction: s...
1169
  		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
5a811889d   Mel Gorman   mm, compaction: u...
1170
1171
1172
1173
1174
1175
  		if (cc->nr_freepages >= cc->nr_migratepages)
  			return;
  	}
  
  	/* Scan after */
  	start_pfn = pfn + nr_isolated;
60fce36af   Mel Gorman   mm/compaction.c: ...
1176
  	if (start_pfn < end_pfn)
4fca9730c   Mel Gorman   mm, compaction: s...
1177
  		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
5a811889d   Mel Gorman   mm, compaction: u...
1178
1179
1180
1181
1182
  
  	/* Skip this pageblock in the future as it's full or nearly full */
  	if (cc->nr_freepages < cc->nr_migratepages)
  		set_pageblock_skip(page);
  }
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
  /* Search orders in round-robin fashion */
  static int next_search_order(struct compact_control *cc, int order)
  {
  	order--;
  	if (order < 0)
  		order = cc->order - 1;
  
  	/* Search wrapped around? */
  	if (order == cc->search_order) {
  		cc->search_order--;
  		if (cc->search_order < 0)
  			cc->search_order = cc->order - 1;
  		return -1;
  	}
  
  	return order;
  }
5a811889d   Mel Gorman   mm, compaction: u...
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
  static unsigned long
  fast_isolate_freepages(struct compact_control *cc)
  {
  	unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
  	unsigned int nr_scanned = 0;
  	unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
  	unsigned long nr_isolated = 0;
  	unsigned long distance;
  	struct page *page = NULL;
  	bool scan_start = false;
  	int order;
  
  	/* Full compaction passes in a negative order */
  	if (cc->order <= 0)
  		return cc->free_pfn;
  
  	/*
  	 * If starting the scan, use a deeper search and use the highest
  	 * PFN found if a suitable one is not found.
  	 */
e332f741a   Mel Gorman   mm, compaction: b...
1220
  	if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
5a811889d   Mel Gorman   mm, compaction: u...
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
  		limit = pageblock_nr_pages >> 1;
  		scan_start = true;
  	}
  
  	/*
  	 * Preferred point is in the top quarter of the scan space but take
  	 * a pfn from the top half if the search is problematic.
  	 */
  	distance = (cc->free_pfn - cc->migrate_pfn);
  	low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
  	min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
  
  	if (WARN_ON_ONCE(min_pfn > low_pfn))
  		low_pfn = min_pfn;
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1235
1236
1237
1238
1239
1240
1241
1242
1243
  	/*
  	 * Search starts from the last successful isolation order or the next
  	 * order to search after a previous failure
  	 */
  	cc->search_order = min_t(unsigned int, cc->order - 1, cc->search_order);
  
  	for (order = cc->search_order;
  	     !page && order >= 0;
  	     order = next_search_order(cc, order)) {
5a811889d   Mel Gorman   mm, compaction: u...
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
  		struct free_area *area = &cc->zone->free_area[order];
  		struct list_head *freelist;
  		struct page *freepage;
  		unsigned long flags;
  		unsigned int order_scanned = 0;
  
  		if (!area->nr_free)
  			continue;
  
  		spin_lock_irqsave(&cc->zone->lock, flags);
  		freelist = &area->free_list[MIGRATE_MOVABLE];
  		list_for_each_entry_reverse(freepage, freelist, lru) {
  			unsigned long pfn;
  
  			order_scanned++;
  			nr_scanned++;
  			pfn = page_to_pfn(freepage);
  
  			if (pfn >= highest)
  				highest = pageblock_start_pfn(pfn);
  
  			if (pfn >= low_pfn) {
  				cc->fast_search_fail = 0;
dbe2d4e4f   Mel Gorman   mm, compaction: r...
1267
  				cc->search_order = order;
5a811889d   Mel Gorman   mm, compaction: u...
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
  				page = freepage;
  				break;
  			}
  
  			if (pfn >= min_pfn && pfn > high_pfn) {
  				high_pfn = pfn;
  
  				/* Shorten the scan if a candidate is found */
  				limit >>= 1;
  			}
  
  			if (order_scanned >= limit)
  				break;
  		}
  
  		/* Use a minimum pfn if a preferred one was not found */
  		if (!page && high_pfn) {
  			page = pfn_to_page(high_pfn);
  
  			/* Update freepage for the list reorder below */
  			freepage = page;
  		}
  
  		/* Reorder to so a future search skips recent pages */
  		move_freelist_head(freelist, freepage);
  
  		/* Isolate the page if available */
  		if (page) {
  			if (__isolate_free_page(page, order)) {
  				set_page_private(page, order);
  				nr_isolated = 1 << order;
  				cc->nr_freepages += nr_isolated;
  				list_add_tail(&page->lru, &cc->freepages);
  				count_compact_events(COMPACTISOLATED, nr_isolated);
  			} else {
  				/* If isolation fails, abort the search */
5b56d996d   Qian Cai   mm/compaction.c: ...
1304
  				order = cc->search_order + 1;
5a811889d   Mel Gorman   mm, compaction: u...
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
  				page = NULL;
  			}
  		}
  
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
  
  		/*
  		 * Smaller scan on next order so the total scan ig related
  		 * to freelist_scan_limit.
  		 */
  		if (order_scanned >= limit)
  			limit = min(1U, limit >> 1);
  	}
  
  	if (!page) {
  		cc->fast_search_fail++;
  		if (scan_start) {
  			/*
  			 * Use the highest PFN found above min. If one was
  			 * not found, be pessemistic for direct compaction
  			 * and use the min mark.
  			 */
  			if (highest) {
  				page = pfn_to_page(highest);
  				cc->free_pfn = highest;
  			} else {
e577c8b64   Suzuki K Poulose   mm, compaction: m...
1331
  				if (cc->direct_compaction && pfn_valid(min_pfn)) {
5a811889d   Mel Gorman   mm, compaction: u...
1332
1333
1334
1335
1336
1337
  					page = pfn_to_page(min_pfn);
  					cc->free_pfn = min_pfn;
  				}
  			}
  		}
  	}
d097a6f63   Mel Gorman   mm, compaction: r...
1338
1339
  	if (highest && highest >= cc->zone->compact_cached_free_pfn) {
  		highest -= pageblock_nr_pages;
5a811889d   Mel Gorman   mm, compaction: u...
1340
  		cc->zone->compact_cached_free_pfn = highest;
d097a6f63   Mel Gorman   mm, compaction: r...
1341
  	}
5a811889d   Mel Gorman   mm, compaction: u...
1342
1343
1344
1345
1346
1347
1348
1349
1350
  
  	cc->total_free_scanned += nr_scanned;
  	if (!page)
  		return cc->free_pfn;
  
  	low_pfn = page_to_pfn(page);
  	fast_isolate_around(cc, low_pfn, nr_isolated);
  	return low_pfn;
  }
f2849aa09   Vlastimil Babka   mm, compaction: m...
1351
  /*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1352
1353
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1354
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
1355
  static void isolate_freepages(struct compact_control *cc)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1356
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
1357
  	struct zone *zone = cc->zone;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1358
  	struct page *page;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1359
  	unsigned long block_start_pfn;	/* start of current pageblock */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1360
  	unsigned long isolate_start_pfn; /* exact pfn we start at */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1361
1362
  	unsigned long block_end_pfn;	/* end of current pageblock */
  	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1363
  	struct list_head *freelist = &cc->freepages;
4fca9730c   Mel Gorman   mm, compaction: s...
1364
  	unsigned int stride;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1365

5a811889d   Mel Gorman   mm, compaction: u...
1366
1367
1368
1369
  	/* Try a small search of the free lists for a candidate */
  	isolate_start_pfn = fast_isolate_freepages(cc);
  	if (cc->nr_freepages)
  		goto splitmap;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1370
1371
  	/*
  	 * Initialise the free scanner. The starting point is where we last
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1372
  	 * successfully isolated from, zone-cached value, or the end of the
e14c720ef   Vlastimil Babka   mm, compaction: r...
1373
1374
  	 * zone when isolating for the first time. For looping we also need
  	 * this pfn aligned down to the pageblock boundary, because we do
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1375
1376
1377
  	 * block_start_pfn -= pageblock_nr_pages in the for loop.
  	 * For ending point, take care when isolating in last pageblock of a
  	 * a zone which ends in the middle of a pageblock.
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1378
1379
  	 * The low boundary is the end of the pageblock the migration scanner
  	 * is using.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1380
  	 */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1381
  	isolate_start_pfn = cc->free_pfn;
5a811889d   Mel Gorman   mm, compaction: u...
1382
  	block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1383
1384
  	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
  						zone_end_pfn(zone));
06b6640a3   Vlastimil Babka   mm, compaction: w...
1385
  	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
4fca9730c   Mel Gorman   mm, compaction: s...
1386
  	stride = cc->mode == MIGRATE_ASYNC ? COMPACT_CLUSTER_MAX : 1;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1387

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1388
  	/*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1389
1390
1391
1392
  	 * Isolate free pages until enough are available to migrate the
  	 * pages on cc->migratepages. We stop searching if the migrate
  	 * and free page scanners meet or enough free pages are isolated.
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1393
  	for (; block_start_pfn >= low_pfn;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1394
  				block_end_pfn = block_start_pfn,
e14c720ef   Vlastimil Babka   mm, compaction: r...
1395
1396
  				block_start_pfn -= pageblock_nr_pages,
  				isolate_start_pfn = block_start_pfn) {
4fca9730c   Mel Gorman   mm, compaction: s...
1397
  		unsigned long nr_isolated;
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1398
1399
  		/*
  		 * This can iterate a massively long zone without finding any
cb810ad29   Mel Gorman   mm, compaction: r...
1400
  		 * suitable migration targets, so periodically check resched.
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1401
  		 */
cb810ad29   Mel Gorman   mm, compaction: r...
1402
  		if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
cf66f0700   Mel Gorman   mm, compaction: d...
1403
  			cond_resched();
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1404

7d49d8868   Vlastimil Babka   mm, compaction: r...
1405
1406
1407
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
  		if (!page)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1408
1409
1410
  			continue;
  
  		/* Check the block is suitable for migration */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1411
  		if (!suitable_migration_target(cc, page))
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1412
  			continue;
68e3e9262   Linus Torvalds   Revert "mm: compa...
1413

bb13ffeb9   Mel Gorman   mm: compaction: c...
1414
1415
1416
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
e14c720ef   Vlastimil Babka   mm, compaction: r...
1417
  		/* Found a block suitable for isolating free pages from. */
4fca9730c   Mel Gorman   mm, compaction: s...
1418
1419
  		nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
  					block_end_pfn, freelist, stride, false);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1420

d097a6f63   Mel Gorman   mm, compaction: r...
1421
1422
1423
  		/* Update the skip hint if the full pageblock was scanned */
  		if (isolate_start_pfn == block_end_pfn)
  			update_pageblock_skip(cc, page, block_start_pfn);
cb2dcaf02   Mel Gorman   mm, compaction: f...
1424
1425
  		/* Are enough freepages isolated? */
  		if (cc->nr_freepages >= cc->nr_migratepages) {
a46cbf3bc   David Rientjes   mm, compaction: p...
1426
1427
1428
1429
1430
  			if (isolate_start_pfn >= block_end_pfn) {
  				/*
  				 * Restart at previous pageblock if more
  				 * freepages can be isolated next time.
  				 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1431
1432
  				isolate_start_pfn =
  					block_start_pfn - pageblock_nr_pages;
a46cbf3bc   David Rientjes   mm, compaction: p...
1433
  			}
be9765722   Vlastimil Babka   mm, compaction: p...
1434
  			break;
a46cbf3bc   David Rientjes   mm, compaction: p...
1435
  		} else if (isolate_start_pfn < block_end_pfn) {
f5f61a320   Vlastimil Babka   mm, compaction: s...
1436
  			/*
a46cbf3bc   David Rientjes   mm, compaction: p...
1437
1438
  			 * If isolation failed early, do not continue
  			 * needlessly.
f5f61a320   Vlastimil Babka   mm, compaction: s...
1439
  			 */
a46cbf3bc   David Rientjes   mm, compaction: p...
1440
  			break;
f5f61a320   Vlastimil Babka   mm, compaction: s...
1441
  		}
4fca9730c   Mel Gorman   mm, compaction: s...
1442
1443
1444
1445
1446
1447
1448
  
  		/* Adjust stride depending on isolation */
  		if (nr_isolated) {
  			stride = 1;
  			continue;
  		}
  		stride = min_t(unsigned int, COMPACT_CLUSTER_MAX, stride << 1);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1449
  	}
7ed695e06   Vlastimil Babka   mm: compaction: d...
1450
  	/*
f5f61a320   Vlastimil Babka   mm, compaction: s...
1451
1452
1453
1454
  	 * Record where the free scanner will restart next time. Either we
  	 * broke from the loop and set isolate_start_pfn based on the last
  	 * call to isolate_freepages_block(), or we met the migration scanner
  	 * and the loop terminated due to isolate_start_pfn < low_pfn
7ed695e06   Vlastimil Babka   mm: compaction: d...
1455
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1456
  	cc->free_pfn = isolate_start_pfn;
5a811889d   Mel Gorman   mm, compaction: u...
1457
1458
1459
1460
  
  splitmap:
  	/* __isolate_free_page() does not map the pages */
  	split_map_pages(freelist);
748446bb6   Mel Gorman   mm: compaction: m...
1461
1462
1463
1464
1465
1466
1467
  }
  
  /*
   * This is a migrate-callback that "allocates" freepages by taking pages
   * from the isolated freelists in the block we are migrating to.
   */
  static struct page *compaction_alloc(struct page *migratepage,
666feb21a   Michal Hocko   mm, migrate: remo...
1468
  					unsigned long data)
748446bb6   Mel Gorman   mm: compaction: m...
1469
1470
1471
  {
  	struct compact_control *cc = (struct compact_control *)data;
  	struct page *freepage;
748446bb6   Mel Gorman   mm: compaction: m...
1472
  	if (list_empty(&cc->freepages)) {
cb2dcaf02   Mel Gorman   mm, compaction: f...
1473
  		isolate_freepages(cc);
748446bb6   Mel Gorman   mm: compaction: m...
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
  
  		if (list_empty(&cc->freepages))
  			return NULL;
  	}
  
  	freepage = list_entry(cc->freepages.next, struct page, lru);
  	list_del(&freepage->lru);
  	cc->nr_freepages--;
  
  	return freepage;
  }
  
  /*
d53aea3d4   David Rientjes   mm, compaction: r...
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
   * This is a migrate-callback that "frees" freepages back to the isolated
   * freelist.  All pages on the freelist are from the same zone, so there is no
   * special handling needed for NUMA.
   */
  static void compaction_free(struct page *page, unsigned long data)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  
  	list_add(&page->lru, &cc->freepages);
  	cc->nr_freepages++;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1498
1499
1500
1501
1502
1503
1504
1505
  /* possible outcome of isolate_migratepages */
  typedef enum {
  	ISOLATE_ABORT,		/* Abort compaction now */
  	ISOLATE_NONE,		/* No pages isolated, continue scanning */
  	ISOLATE_SUCCESS,	/* Pages isolated, migrate */
  } isolate_migrate_t;
  
  /*
5bbe3547a   Eric B Munson   mm: allow compact...
1506
1507
1508
1509
   * Allow userspace to control policy on scanning the unevictable LRU for
   * compactable pages.
   */
  int sysctl_compact_unevictable_allowed __read_mostly = 1;
70b44595e   Mel Gorman   mm, compaction: u...
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
  static inline void
  update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
  {
  	if (cc->fast_start_pfn == ULONG_MAX)
  		return;
  
  	if (!cc->fast_start_pfn)
  		cc->fast_start_pfn = pfn;
  
  	cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
  }
  
  static inline unsigned long
  reinit_migrate_pfn(struct compact_control *cc)
  {
  	if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
  		return cc->migrate_pfn;
  
  	cc->migrate_pfn = cc->fast_start_pfn;
  	cc->fast_start_pfn = ULONG_MAX;
  
  	return cc->migrate_pfn;
  }
  
  /*
   * Briefly search the free lists for a migration source that already has
   * some free pages to reduce the number of pages that need migration
   * before a pageblock is free.
   */
  static unsigned long fast_find_migrateblock(struct compact_control *cc)
  {
  	unsigned int limit = freelist_scan_limit(cc);
  	unsigned int nr_scanned = 0;
  	unsigned long distance;
  	unsigned long pfn = cc->migrate_pfn;
  	unsigned long high_pfn;
  	int order;
  
  	/* Skip hints are relied on to avoid repeats on the fast search */
  	if (cc->ignore_skip_hint)
  		return pfn;
  
  	/*
  	 * If the migrate_pfn is not at the start of a zone or the start
  	 * of a pageblock then assume this is a continuation of a previous
  	 * scan restarted due to COMPACT_CLUSTER_MAX.
  	 */
  	if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
  		return pfn;
  
  	/*
  	 * For smaller orders, just linearly scan as the number of pages
  	 * to migrate should be relatively small and does not necessarily
  	 * justify freeing up a large block for a small allocation.
  	 */
  	if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
  		return pfn;
  
  	/*
  	 * Only allow kcompactd and direct requests for movable pages to
  	 * quickly clear out a MOVABLE pageblock for allocation. This
  	 * reduces the risk that a large movable pageblock is freed for
  	 * an unmovable/reclaimable small allocation.
  	 */
  	if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
  		return pfn;
  
  	/*
  	 * When starting the migration scanner, pick any pageblock within the
  	 * first half of the search space. Otherwise try and pick a pageblock
  	 * within the first eighth to reduce the chances that a migration
  	 * target later becomes a source.
  	 */
  	distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
  	if (cc->migrate_pfn != cc->zone->zone_start_pfn)
  		distance >>= 2;
  	high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
  
  	for (order = cc->order - 1;
  	     order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
  	     order--) {
  		struct free_area *area = &cc->zone->free_area[order];
  		struct list_head *freelist;
  		unsigned long flags;
  		struct page *freepage;
  
  		if (!area->nr_free)
  			continue;
  
  		spin_lock_irqsave(&cc->zone->lock, flags);
  		freelist = &area->free_list[MIGRATE_MOVABLE];
  		list_for_each_entry(freepage, freelist, lru) {
  			unsigned long free_pfn;
  
  			nr_scanned++;
  			free_pfn = page_to_pfn(freepage);
  			if (free_pfn < high_pfn) {
70b44595e   Mel Gorman   mm, compaction: u...
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
  				/*
  				 * Avoid if skipped recently. Ideally it would
  				 * move to the tail but even safe iteration of
  				 * the list assumes an entry is deleted, not
  				 * reordered.
  				 */
  				if (get_pageblock_skip(freepage)) {
  					if (list_is_last(freelist, &freepage->lru))
  						break;
  
  					continue;
  				}
  
  				/* Reorder to so a future search skips recent pages */
  				move_freelist_tail(freelist, freepage);
e380bebe4   Mel Gorman   mm, compaction: k...
1622
  				update_fast_start_pfn(cc, free_pfn);
70b44595e   Mel Gorman   mm, compaction: u...
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
  				pfn = pageblock_start_pfn(free_pfn);
  				cc->fast_search_fail = 0;
  				set_pageblock_skip(freepage);
  				break;
  			}
  
  			if (nr_scanned >= limit) {
  				cc->fast_search_fail++;
  				move_freelist_tail(freelist, freepage);
  				break;
  			}
  		}
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
  	}
  
  	cc->total_migrate_scanned += nr_scanned;
  
  	/*
  	 * If fast scanning failed then use a cached entry for a page block
  	 * that had free pages as the basis for starting a linear scan.
  	 */
  	if (pfn == cc->migrate_pfn)
  		pfn = reinit_migrate_pfn(cc);
  
  	return pfn;
  }
5bbe3547a   Eric B Munson   mm: allow compact...
1649
  /*
edc2ca612   Vlastimil Babka   mm, compaction: m...
1650
1651
1652
   * Isolate all pages that can be migrated from the first suitable block,
   * starting at the block pointed to by the migrate scanner pfn within
   * compact_control.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1653
   */
32aaf0553   Pengfei Li   mm/compaction.c: ...
1654
  static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1655
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1656
1657
1658
  	unsigned long block_start_pfn;
  	unsigned long block_end_pfn;
  	unsigned long low_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1659
1660
  	struct page *page;
  	const isolate_mode_t isolate_mode =
5bbe3547a   Eric B Munson   mm: allow compact...
1661
  		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
1d2047fef   Hugh Dickins   mm, compaction: d...
1662
  		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
70b44595e   Mel Gorman   mm, compaction: u...
1663
  	bool fast_find_block;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1664

edc2ca612   Vlastimil Babka   mm, compaction: m...
1665
1666
  	/*
  	 * Start at where we last stopped, or beginning of the zone as
70b44595e   Mel Gorman   mm, compaction: u...
1667
1668
  	 * initialized by compact_zone(). The first failure will use
  	 * the lowest PFN as the starting point for linear scanning.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1669
  	 */
70b44595e   Mel Gorman   mm, compaction: u...
1670
  	low_pfn = fast_find_migrateblock(cc);
06b6640a3   Vlastimil Babka   mm, compaction: w...
1671
  	block_start_pfn = pageblock_start_pfn(low_pfn);
32aaf0553   Pengfei Li   mm/compaction.c: ...
1672
1673
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1674

70b44595e   Mel Gorman   mm, compaction: u...
1675
1676
1677
1678
1679
1680
  	/*
  	 * fast_find_migrateblock marks a pageblock skipped so to avoid
  	 * the isolation_suitable check below, check whether the fast
  	 * search was successful.
  	 */
  	fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1681
  	/* Only scan within a pageblock boundary */
06b6640a3   Vlastimil Babka   mm, compaction: w...
1682
  	block_end_pfn = pageblock_end_pfn(low_pfn);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1683

edc2ca612   Vlastimil Babka   mm, compaction: m...
1684
1685
1686
1687
  	/*
  	 * Iterate over whole pageblocks until we find the first suitable.
  	 * Do not cross the free scanner.
  	 */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1688
  	for (; block_end_pfn <= cc->free_pfn;
70b44595e   Mel Gorman   mm, compaction: u...
1689
  			fast_find_block = false,
e1409c325   Joonsoo Kim   mm/compaction: pa...
1690
1691
1692
  			low_pfn = block_end_pfn,
  			block_start_pfn = block_end_pfn,
  			block_end_pfn += pageblock_nr_pages) {
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1693

edc2ca612   Vlastimil Babka   mm, compaction: m...
1694
1695
1696
  		/*
  		 * This can potentially iterate a massively long zone with
  		 * many pageblocks unsuitable, so periodically check if we
cb810ad29   Mel Gorman   mm, compaction: r...
1697
  		 * need to schedule.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1698
  		 */
cb810ad29   Mel Gorman   mm, compaction: r...
1699
  		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)))
cf66f0700   Mel Gorman   mm, compaction: d...
1700
  			cond_resched();
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1701

32aaf0553   Pengfei Li   mm/compaction.c: ...
1702
1703
  		page = pageblock_pfn_to_page(block_start_pfn,
  						block_end_pfn, cc->zone);
7d49d8868   Vlastimil Babka   mm, compaction: r...
1704
  		if (!page)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1705
  			continue;
e380bebe4   Mel Gorman   mm, compaction: k...
1706
1707
1708
1709
1710
1711
1712
1713
1714
  		/*
  		 * If isolation recently failed, do not retry. Only check the
  		 * pageblock once. COMPACT_CLUSTER_MAX causes a pageblock
  		 * to be visited multiple times. Assume skip was checked
  		 * before making it "skip" so other compaction instances do
  		 * not scan the same block.
  		 */
  		if (IS_ALIGNED(low_pfn, pageblock_nr_pages) &&
  		    !fast_find_block && !isolation_suitable(cc, page))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1715
1716
1717
  			continue;
  
  		/*
9bebefd59   Mel Gorman   mm, compaction: c...
1718
1719
1720
1721
1722
1723
  		 * For async compaction, also only scan in MOVABLE blocks
  		 * without huge pages. Async compaction is optimistic to see
  		 * if the minimum amount of work satisfies the allocation.
  		 * The cached PFN is updated as it's possible that all
  		 * remaining blocks between source and target are unsuitable
  		 * and the compaction scanners fail to meet.
edc2ca612   Vlastimil Babka   mm, compaction: m...
1724
  		 */
9bebefd59   Mel Gorman   mm, compaction: c...
1725
1726
  		if (!suitable_migration_source(cc, page)) {
  			update_cached_migrate(cc, block_end_pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1727
  			continue;
9bebefd59   Mel Gorman   mm, compaction: c...
1728
  		}
edc2ca612   Vlastimil Babka   mm, compaction: m...
1729
1730
  
  		/* Perform the isolation */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1731
1732
  		low_pfn = isolate_migratepages_block(cc, low_pfn,
  						block_end_pfn, isolate_mode);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1733

cb2dcaf02   Mel Gorman   mm, compaction: f...
1734
  		if (!low_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1735
1736
1737
1738
1739
1740
1741
1742
1743
  			return ISOLATE_ABORT;
  
  		/*
  		 * Either we isolated something and proceed with migration. Or
  		 * we failed and compact_zone should decide if we should
  		 * continue or not.
  		 */
  		break;
  	}
f2849aa09   Vlastimil Babka   mm, compaction: m...
1744
1745
  	/* Record where migration scanner will be restarted. */
  	cc->migrate_pfn = low_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1746

edc2ca612   Vlastimil Babka   mm, compaction: m...
1747
  	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1748
  }
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1749
1750
1751
1752
1753
1754
1755
1756
  /*
   * order == -1 is expected when compacting via
   * /proc/sys/vm/compact_memory
   */
  static inline bool is_via_compact_memory(int order)
  {
  	return order == -1;
  }
40cacbcb3   Mel Gorman   mm, compaction: r...
1757
  static enum compact_result __compact_finished(struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
1758
  {
8fb74b9fb   Mel Gorman   mm: compaction: p...
1759
  	unsigned int order;
d39773a06   Vlastimil Babka   mm, compaction: a...
1760
  	const int migratetype = cc->migratetype;
cb2dcaf02   Mel Gorman   mm, compaction: f...
1761
  	int ret;
748446bb6   Mel Gorman   mm: compaction: m...
1762

753341a4b   Mel Gorman   revert "mm: have ...
1763
  	/* Compaction run completes if the migrate and free scanner meet */
f2849aa09   Vlastimil Babka   mm, compaction: m...
1764
  	if (compact_scanners_met(cc)) {
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1765
  		/* Let the next compaction start anew. */
40cacbcb3   Mel Gorman   mm, compaction: r...
1766
  		reset_cached_positions(cc->zone);
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1767

62997027c   Mel Gorman   mm: compaction: c...
1768
1769
  		/*
  		 * Mark that the PG_migrate_skip information should be cleared
accf62422   Vlastimil Babka   mm, kswapd: repla...
1770
  		 * by kswapd when it goes to sleep. kcompactd does not set the
62997027c   Mel Gorman   mm: compaction: c...
1771
1772
1773
  		 * flag itself as the decision to be clear should be directly
  		 * based on an allocation request.
  		 */
accf62422   Vlastimil Babka   mm, kswapd: repla...
1774
  		if (cc->direct_compaction)
40cacbcb3   Mel Gorman   mm, compaction: r...
1775
  			cc->zone->compact_blockskip_flush = true;
62997027c   Mel Gorman   mm: compaction: c...
1776

c8f7de0bf   Michal Hocko   mm, compaction: d...
1777
1778
1779
1780
  		if (cc->whole_zone)
  			return COMPACT_COMPLETE;
  		else
  			return COMPACT_PARTIAL_SKIPPED;
bb13ffeb9   Mel Gorman   mm: compaction: c...
1781
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1782

21c527a3c   Yaowei Bai   mm/compaction.c: ...
1783
  	if (is_via_compact_memory(cc->order))
56de7263f   Mel Gorman   mm: compaction: d...
1784
  		return COMPACT_CONTINUE;
efe771c76   Mel Gorman   mm, compaction: a...
1785
1786
1787
1788
1789
1790
1791
1792
  	/*
  	 * Always finish scanning a pageblock to reduce the possibility of
  	 * fallbacks in the future. This is particularly important when
  	 * migration source is unmovable/reclaimable but it's not worth
  	 * special casing.
  	 */
  	if (!IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
  		return COMPACT_CONTINUE;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1793

56de7263f   Mel Gorman   mm: compaction: d...
1794
  	/* Direct compactor: Is a suitable page free? */
cb2dcaf02   Mel Gorman   mm, compaction: f...
1795
  	ret = COMPACT_NO_SUITABLE_PAGE;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1796
  	for (order = cc->order; order < MAX_ORDER; order++) {
40cacbcb3   Mel Gorman   mm, compaction: r...
1797
  		struct free_area *area = &cc->zone->free_area[order];
2149cdaef   Joonsoo Kim   mm/compaction: en...
1798
  		bool can_steal;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1799
1800
  
  		/* Job done if page is free of the right migratetype */
b03641af6   Dan Williams   mm: move buddy li...
1801
  		if (!free_area_empty(area, migratetype))
cf378319d   Vlastimil Babka   mm, compaction: r...
1802
  			return COMPACT_SUCCESS;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1803

2149cdaef   Joonsoo Kim   mm/compaction: en...
1804
1805
1806
  #ifdef CONFIG_CMA
  		/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
  		if (migratetype == MIGRATE_MOVABLE &&
b03641af6   Dan Williams   mm: move buddy li...
1807
  			!free_area_empty(area, MIGRATE_CMA))
cf378319d   Vlastimil Babka   mm, compaction: r...
1808
  			return COMPACT_SUCCESS;
2149cdaef   Joonsoo Kim   mm/compaction: en...
1809
1810
1811
1812
1813
1814
  #endif
  		/*
  		 * Job done if allocation would steal freepages from
  		 * other migratetype buddy lists.
  		 */
  		if (find_suitable_fallback(area, order, migratetype,
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
  						true, &can_steal) != -1) {
  
  			/* movable pages are OK in any pageblock */
  			if (migratetype == MIGRATE_MOVABLE)
  				return COMPACT_SUCCESS;
  
  			/*
  			 * We are stealing for a non-movable allocation. Make
  			 * sure we finish compacting the current pageblock
  			 * first so it is as free as possible and we won't
  			 * have to steal another one soon. This only applies
  			 * to sync compaction, as async compaction operates
  			 * on pageblocks of the same migratetype.
  			 */
  			if (cc->mode == MIGRATE_ASYNC ||
  					IS_ALIGNED(cc->migrate_pfn,
  							pageblock_nr_pages)) {
  				return COMPACT_SUCCESS;
  			}
cb2dcaf02   Mel Gorman   mm, compaction: f...
1834
1835
  			ret = COMPACT_CONTINUE;
  			break;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1836
  		}
56de7263f   Mel Gorman   mm: compaction: d...
1837
  	}
cb2dcaf02   Mel Gorman   mm, compaction: f...
1838
1839
1840
1841
  	if (cc->contended || fatal_signal_pending(current))
  		ret = COMPACT_CONTENDED;
  
  	return ret;
837d026d5   Joonsoo Kim   mm/compaction: mo...
1842
  }
40cacbcb3   Mel Gorman   mm, compaction: r...
1843
  static enum compact_result compact_finished(struct compact_control *cc)
837d026d5   Joonsoo Kim   mm/compaction: mo...
1844
1845
  {
  	int ret;
40cacbcb3   Mel Gorman   mm, compaction: r...
1846
1847
  	ret = __compact_finished(cc);
  	trace_mm_compaction_finished(cc->zone, cc->order, ret);
837d026d5   Joonsoo Kim   mm/compaction: mo...
1848
1849
1850
1851
  	if (ret == COMPACT_NO_SUITABLE_PAGE)
  		ret = COMPACT_CONTINUE;
  
  	return ret;
748446bb6   Mel Gorman   mm: compaction: m...
1852
  }
3e7d34497   Mel Gorman   mm: vmscan: recla...
1853
1854
1855
1856
  /*
   * compaction_suitable: Is this suitable to run compaction on this zone now?
   * Returns
   *   COMPACT_SKIPPED  - If there are too few free pages for compaction
cf378319d   Vlastimil Babka   mm, compaction: r...
1857
   *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
3e7d34497   Mel Gorman   mm: vmscan: recla...
1858
1859
   *   COMPACT_CONTINUE - If compaction should run now
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
1860
  static enum compact_result __compaction_suitable(struct zone *zone, int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
1861
  					unsigned int alloc_flags,
86a294a81   Michal Hocko   mm, oom, compacti...
1862
1863
  					int classzone_idx,
  					unsigned long wmark_target)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1864
  {
3e7d34497   Mel Gorman   mm: vmscan: recla...
1865
  	unsigned long watermark;
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1866
  	if (is_via_compact_memory(order))
3957c7768   Michal Hocko   mm: compaction: f...
1867
  		return COMPACT_CONTINUE;
a92144438   Mel Gorman   mm: move zone wat...
1868
  	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
ebff39801   Vlastimil Babka   mm, compaction: p...
1869
1870
1871
1872
1873
1874
  	/*
  	 * If watermarks for high-order allocation are already met, there
  	 * should be no need for compaction at all.
  	 */
  	if (zone_watermark_ok(zone, order, watermark, classzone_idx,
  								alloc_flags))
cf378319d   Vlastimil Babka   mm, compaction: r...
1875
  		return COMPACT_SUCCESS;
ebff39801   Vlastimil Babka   mm, compaction: p...
1876

3957c7768   Michal Hocko   mm: compaction: f...
1877
  	/*
9861a62c3   Vlastimil Babka   mm, compaction: c...
1878
  	 * Watermarks for order-0 must be met for compaction to be able to
984fdba6a   Vlastimil Babka   mm, compaction: u...
1879
1880
1881
1882
1883
1884
1885
  	 * isolate free pages for migration targets. This means that the
  	 * watermark and alloc_flags have to match, or be more pessimistic than
  	 * the check in __isolate_free_page(). We don't use the direct
  	 * compactor's alloc_flags, as they are not relevant for freepage
  	 * isolation. We however do use the direct compactor's classzone_idx to
  	 * skip over zones where lowmem reserves would prevent allocation even
  	 * if compaction succeeds.
8348faf91   Vlastimil Babka   mm, compaction: r...
1886
1887
  	 * For costly orders, we require low watermark instead of min for
  	 * compaction to proceed to increase its chances.
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
1888
1889
  	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
  	 * suitable migration targets
3e7d34497   Mel Gorman   mm: vmscan: recla...
1890
  	 */
8348faf91   Vlastimil Babka   mm, compaction: r...
1891
1892
1893
  	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
  				low_wmark_pages(zone) : min_wmark_pages(zone);
  	watermark += compact_gap(order);
86a294a81   Michal Hocko   mm, oom, compacti...
1894
  	if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
1895
  						ALLOC_CMA, wmark_target))
3e7d34497   Mel Gorman   mm: vmscan: recla...
1896
  		return COMPACT_SKIPPED;
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
  	return COMPACT_CONTINUE;
  }
  
  enum compact_result compaction_suitable(struct zone *zone, int order,
  					unsigned int alloc_flags,
  					int classzone_idx)
  {
  	enum compact_result ret;
  	int fragindex;
  
  	ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
  				    zone_page_state(zone, NR_FREE_PAGES));
3e7d34497   Mel Gorman   mm: vmscan: recla...
1909
1910
1911
1912
  	/*
  	 * fragmentation index determines if allocation failures are due to
  	 * low memory or external fragmentation
  	 *
ebff39801   Vlastimil Babka   mm, compaction: p...
1913
1914
  	 * index of -1000 would imply allocations might succeed depending on
  	 * watermarks, but we already failed the high-order watermark check
3e7d34497   Mel Gorman   mm: vmscan: recla...
1915
1916
1917
  	 * index towards 0 implies failure is due to lack of memory
  	 * index towards 1000 implies failure is due to fragmentation
  	 *
203114202   Vlastimil Babka   mm, compaction: r...
1918
1919
1920
1921
1922
1923
  	 * Only compact if a failure would be due to fragmentation. Also
  	 * ignore fragindex for non-costly orders where the alternative to
  	 * a successful reclaim/compaction is OOM. Fragindex and the
  	 * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
  	 * excessive compaction for costly orders, but it should not be at the
  	 * expense of system stability.
3e7d34497   Mel Gorman   mm: vmscan: recla...
1924
  	 */
203114202   Vlastimil Babka   mm, compaction: r...
1925
  	if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1926
1927
1928
1929
  		fragindex = fragmentation_index(zone, order);
  		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
  			ret = COMPACT_NOT_SUITABLE_ZONE;
  	}
837d026d5   Joonsoo Kim   mm/compaction: mo...
1930

837d026d5   Joonsoo Kim   mm/compaction: mo...
1931
1932
1933
1934
1935
1936
  	trace_mm_compaction_suitable(zone, order, ret);
  	if (ret == COMPACT_NOT_SUITABLE_ZONE)
  		ret = COMPACT_SKIPPED;
  
  	return ret;
  }
86a294a81   Michal Hocko   mm, oom, compacti...
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
  bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
  		int alloc_flags)
  {
  	struct zone *zone;
  	struct zoneref *z;
  
  	/*
  	 * Make sure at least one zone would pass __compaction_suitable if we continue
  	 * retrying the reclaim.
  	 */
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
  					ac->nodemask) {
  		unsigned long available;
  		enum compact_result compact_result;
  
  		/*
  		 * Do not consider all the reclaimable memory because we do not
  		 * want to trash just for a single high order allocation which
  		 * is even not guaranteed to appear even if __compaction_suitable
  		 * is happy about the watermark check.
  		 */
5a1c84b40   Mel Gorman   mm: remove reclai...
1958
  		available = zone_reclaimable_pages(zone) / order;
86a294a81   Michal Hocko   mm, oom, compacti...
1959
1960
1961
  		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
  		compact_result = __compaction_suitable(zone, order, alloc_flags,
  				ac_classzone_idx(ac), available);
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1962
  		if (compact_result != COMPACT_SKIPPED)
86a294a81   Michal Hocko   mm, oom, compacti...
1963
1964
1965
1966
1967
  			return true;
  	}
  
  	return false;
  }
5e1f0f098   Mel Gorman   mm, compaction: c...
1968
1969
  static enum compact_result
  compact_zone(struct compact_control *cc, struct capture_control *capc)
748446bb6   Mel Gorman   mm: compaction: m...
1970
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
1971
  	enum compact_result ret;
40cacbcb3   Mel Gorman   mm, compaction: r...
1972
1973
  	unsigned long start_pfn = cc->zone->zone_start_pfn;
  	unsigned long end_pfn = zone_end_pfn(cc->zone);
566e54e11   Mel Gorman   mm, compaction: r...
1974
  	unsigned long last_migrated_pfn;
e0b9daeb4   David Rientjes   mm, compaction: e...
1975
  	const bool sync = cc->mode != MIGRATE_ASYNC;
8854c55f5   Mel Gorman   mm, compaction: k...
1976
  	bool update_cached;
748446bb6   Mel Gorman   mm: compaction: m...
1977

a94b52524   Yafang Shao   mm/compaction.c: ...
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
  	/*
  	 * These counters track activities during zone compaction.  Initialize
  	 * them before compacting a new zone.
  	 */
  	cc->total_migrate_scanned = 0;
  	cc->total_free_scanned = 0;
  	cc->nr_migratepages = 0;
  	cc->nr_freepages = 0;
  	INIT_LIST_HEAD(&cc->freepages);
  	INIT_LIST_HEAD(&cc->migratepages);
d39773a06   Vlastimil Babka   mm, compaction: a...
1988
  	cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
40cacbcb3   Mel Gorman   mm, compaction: r...
1989
  	ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
ebff39801   Vlastimil Babka   mm, compaction: p...
1990
  							cc->classzone_idx);
c46649dea   Michal Hocko   mm, compaction: c...
1991
  	/* Compaction is likely to fail */
cf378319d   Vlastimil Babka   mm, compaction: r...
1992
  	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1993
  		return ret;
c46649dea   Michal Hocko   mm, compaction: c...
1994
1995
1996
  
  	/* huh, compaction_suitable is returning something unexpected */
  	VM_BUG_ON(ret != COMPACT_CONTINUE);
3e7d34497   Mel Gorman   mm: vmscan: recla...
1997

c89511ab2   Mel Gorman   mm: compaction: R...
1998
  	/*
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1999
  	 * Clear pageblock skip if there were failures recently and compaction
accf62422   Vlastimil Babka   mm, kswapd: repla...
2000
  	 * is about to be retried after being deferred.
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2001
  	 */
40cacbcb3   Mel Gorman   mm, compaction: r...
2002
2003
  	if (compaction_restarting(cc->zone, cc->order))
  		__reset_isolation_suitable(cc->zone);
d3132e4b8   Vlastimil Babka   mm: compaction: r...
2004
2005
  
  	/*
c89511ab2   Mel Gorman   mm: compaction: R...
2006
  	 * Setup to move all movable pages to the end of the zone. Used cached
06ed29989   Vlastimil Babka   mm, compaction: m...
2007
2008
2009
  	 * information on where the scanners should start (unless we explicitly
  	 * want to compact the whole zone), but check that it is initialised
  	 * by ensuring the values are within zone boundaries.
c89511ab2   Mel Gorman   mm: compaction: R...
2010
  	 */
70b44595e   Mel Gorman   mm, compaction: u...
2011
  	cc->fast_start_pfn = 0;
06ed29989   Vlastimil Babka   mm, compaction: m...
2012
  	if (cc->whole_zone) {
c89511ab2   Mel Gorman   mm: compaction: R...
2013
  		cc->migrate_pfn = start_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2014
2015
  		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
  	} else {
40cacbcb3   Mel Gorman   mm, compaction: r...
2016
2017
  		cc->migrate_pfn = cc->zone->compact_cached_migrate_pfn[sync];
  		cc->free_pfn = cc->zone->compact_cached_free_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2018
2019
  		if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
  			cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
40cacbcb3   Mel Gorman   mm, compaction: r...
2020
  			cc->zone->compact_cached_free_pfn = cc->free_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2021
2022
2023
  		}
  		if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
  			cc->migrate_pfn = start_pfn;
40cacbcb3   Mel Gorman   mm, compaction: r...
2024
2025
  			cc->zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
  			cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
2026
  		}
c8f7de0bf   Michal Hocko   mm, compaction: d...
2027

e332f741a   Mel Gorman   mm, compaction: b...
2028
  		if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
06ed29989   Vlastimil Babka   mm, compaction: m...
2029
2030
  			cc->whole_zone = true;
  	}
c8f7de0bf   Michal Hocko   mm, compaction: d...
2031

566e54e11   Mel Gorman   mm, compaction: r...
2032
  	last_migrated_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
2033

8854c55f5   Mel Gorman   mm, compaction: k...
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
  	/*
  	 * Migrate has separate cached PFNs for ASYNC and SYNC* migration on
  	 * the basis that some migrations will fail in ASYNC mode. However,
  	 * if the cached PFNs match and pageblocks are skipped due to having
  	 * no isolation candidates, then the sync state does not matter.
  	 * Until a pageblock with isolation candidates is found, keep the
  	 * cached PFNs in sync to avoid revisiting the same blocks.
  	 */
  	update_cached = !sync &&
  		cc->zone->compact_cached_migrate_pfn[0] == cc->zone->compact_cached_migrate_pfn[1];
16c4a097a   Joonsoo Kim   mm/compaction: en...
2044
2045
  	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync);
0eb927c0a   Mel Gorman   mm: compaction: t...
2046

748446bb6   Mel Gorman   mm: compaction: m...
2047
  	migrate_prep_local();
40cacbcb3   Mel Gorman   mm, compaction: r...
2048
  	while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
9d502c1c8   Minchan Kim   mm/compaction: ch...
2049
  		int err;
566e54e11   Mel Gorman   mm, compaction: r...
2050
  		unsigned long start_pfn = cc->migrate_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
2051

804d3121b   Mel Gorman   mm, compaction: a...
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
  		/*
  		 * Avoid multiple rescans which can happen if a page cannot be
  		 * isolated (dirty/writeback in async mode) or if the migrated
  		 * pages are being allocated before the pageblock is cleared.
  		 * The first rescan will capture the entire pageblock for
  		 * migration. If it fails, it'll be marked skip and scanning
  		 * will proceed as normal.
  		 */
  		cc->rescan = false;
  		if (pageblock_start_pfn(last_migrated_pfn) ==
  		    pageblock_start_pfn(start_pfn)) {
  			cc->rescan = true;
  		}
32aaf0553   Pengfei Li   mm/compaction.c: ...
2065
  		switch (isolate_migratepages(cc)) {
f9e35b3b4   Mel Gorman   mm: compaction: a...
2066
  		case ISOLATE_ABORT:
2d1e10412   Vlastimil Babka   mm, compaction: d...
2067
  			ret = COMPACT_CONTENDED;
5733c7d11   Rafael Aquini   mm: introduce put...
2068
  			putback_movable_pages(&cc->migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
2069
  			cc->nr_migratepages = 0;
566e54e11   Mel Gorman   mm, compaction: r...
2070
  			last_migrated_pfn = 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2071
2072
  			goto out;
  		case ISOLATE_NONE:
8854c55f5   Mel Gorman   mm, compaction: k...
2073
2074
2075
2076
  			if (update_cached) {
  				cc->zone->compact_cached_migrate_pfn[1] =
  					cc->zone->compact_cached_migrate_pfn[0];
  			}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2077
2078
2079
2080
2081
2082
  			/*
  			 * We haven't isolated and migrated anything, but
  			 * there might still be unflushed migrations from
  			 * previous cc->order aligned block.
  			 */
  			goto check_drain;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2083
  		case ISOLATE_SUCCESS:
8854c55f5   Mel Gorman   mm, compaction: k...
2084
  			update_cached = false;
566e54e11   Mel Gorman   mm, compaction: r...
2085
  			last_migrated_pfn = start_pfn;
f9e35b3b4   Mel Gorman   mm: compaction: a...
2086
2087
  			;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
2088

d53aea3d4   David Rientjes   mm, compaction: r...
2089
  		err = migrate_pages(&cc->migratepages, compaction_alloc,
e0b9daeb4   David Rientjes   mm, compaction: e...
2090
  				compaction_free, (unsigned long)cc, cc->mode,
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
2091
  				MR_COMPACTION);
748446bb6   Mel Gorman   mm: compaction: m...
2092

f8c9301fa   Vlastimil Babka   mm/compaction: do...
2093
2094
  		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
  							&cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
2095

f8c9301fa   Vlastimil Babka   mm/compaction: do...
2096
2097
  		/* All pages were either migrated or will be released */
  		cc->nr_migratepages = 0;
9d502c1c8   Minchan Kim   mm/compaction: ch...
2098
  		if (err) {
5733c7d11   Rafael Aquini   mm: introduce put...
2099
  			putback_movable_pages(&cc->migratepages);
7ed695e06   Vlastimil Babka   mm: compaction: d...
2100
2101
2102
2103
  			/*
  			 * migrate_pages() may return -ENOMEM when scanners meet
  			 * and we want compact_finished() to detect it
  			 */
f2849aa09   Vlastimil Babka   mm, compaction: m...
2104
  			if (err == -ENOMEM && !compact_scanners_met(cc)) {
2d1e10412   Vlastimil Babka   mm, compaction: d...
2105
  				ret = COMPACT_CONTENDED;
4bf2bba37   David Rientjes   mm, thp: abort co...
2106
2107
  				goto out;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
2108
2109
2110
2111
2112
2113
2114
2115
2116
  			/*
  			 * We failed to migrate at least one page in the current
  			 * order-aligned block, so skip the rest of it.
  			 */
  			if (cc->direct_compaction &&
  						(cc->mode == MIGRATE_ASYNC)) {
  				cc->migrate_pfn = block_end_pfn(
  						cc->migrate_pfn - 1, cc->order);
  				/* Draining pcplists is useless in this case */
566e54e11   Mel Gorman   mm, compaction: r...
2117
  				last_migrated_pfn = 0;
fdd048e12   Vlastimil Babka   mm, compaction: s...
2118
  			}
748446bb6   Mel Gorman   mm: compaction: m...
2119
  		}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2120

fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2121
2122
2123
2124
2125
2126
2127
2128
  check_drain:
  		/*
  		 * Has the migration scanner moved away from the previous
  		 * cc->order aligned block where we migrated from? If yes,
  		 * flush the pages that were freed, so that they can merge and
  		 * compact_finished() can detect immediately if allocation
  		 * would succeed.
  		 */
566e54e11   Mel Gorman   mm, compaction: r...
2129
  		if (cc->order > 0 && last_migrated_pfn) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2130
2131
  			int cpu;
  			unsigned long current_block_start =
06b6640a3   Vlastimil Babka   mm, compaction: w...
2132
  				block_start_pfn(cc->migrate_pfn, cc->order);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2133

566e54e11   Mel Gorman   mm, compaction: r...
2134
  			if (last_migrated_pfn < current_block_start) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2135
2136
  				cpu = get_cpu();
  				lru_add_drain_cpu(cpu);
40cacbcb3   Mel Gorman   mm, compaction: r...
2137
  				drain_local_pages(cc->zone);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2138
2139
  				put_cpu();
  				/* No more flushing until we migrate again */
566e54e11   Mel Gorman   mm, compaction: r...
2140
  				last_migrated_pfn = 0;
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
2141
2142
  			}
  		}
5e1f0f098   Mel Gorman   mm, compaction: c...
2143
2144
2145
2146
2147
  		/* Stop if a page has been captured */
  		if (capc && capc->page) {
  			ret = COMPACT_SUCCESS;
  			break;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
2148
  	}
f9e35b3b4   Mel Gorman   mm: compaction: a...
2149
  out:
6bace090a   Vlastimil Babka   mm, compaction: a...
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
  	/*
  	 * Release free pages and update where the free scanner should restart,
  	 * so we don't leave any returned pages behind in the next attempt.
  	 */
  	if (cc->nr_freepages > 0) {
  		unsigned long free_pfn = release_freepages(&cc->freepages);
  
  		cc->nr_freepages = 0;
  		VM_BUG_ON(free_pfn == 0);
  		/* The cached pfn is always the first in a pageblock */
06b6640a3   Vlastimil Babka   mm, compaction: w...
2160
  		free_pfn = pageblock_start_pfn(free_pfn);
6bace090a   Vlastimil Babka   mm, compaction: a...
2161
2162
2163
2164
  		/*
  		 * Only go back, not forward. The cached pfn might have been
  		 * already reset to zone end in compact_finished()
  		 */
40cacbcb3   Mel Gorman   mm, compaction: r...
2165
2166
  		if (free_pfn > cc->zone->compact_cached_free_pfn)
  			cc->zone->compact_cached_free_pfn = free_pfn;
6bace090a   Vlastimil Babka   mm, compaction: a...
2167
  	}
748446bb6   Mel Gorman   mm: compaction: m...
2168

7f354a548   David Rientjes   mm, compaction: a...
2169
2170
  	count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
  	count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
16c4a097a   Joonsoo Kim   mm/compaction: en...
2171
2172
  	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync, ret);
0eb927c0a   Mel Gorman   mm: compaction: t...
2173

748446bb6   Mel Gorman   mm: compaction: m...
2174
2175
  	return ret;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
2176

ea7ab982b   Michal Hocko   mm, compaction: c...
2177
  static enum compact_result compact_zone_order(struct zone *zone, int order,
c3486f537   Vlastimil Babka   mm, compaction: s...
2178
  		gfp_t gfp_mask, enum compact_priority prio,
5e1f0f098   Mel Gorman   mm, compaction: c...
2179
2180
  		unsigned int alloc_flags, int classzone_idx,
  		struct page **capture)
56de7263f   Mel Gorman   mm: compaction: d...
2181
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
2182
  	enum compact_result ret;
56de7263f   Mel Gorman   mm: compaction: d...
2183
  	struct compact_control cc = {
56de7263f   Mel Gorman   mm: compaction: d...
2184
  		.order = order,
dbe2d4e4f   Mel Gorman   mm, compaction: r...
2185
  		.search_order = order,
6d7ce5594   David Rientjes   mm, compaction: p...
2186
  		.gfp_mask = gfp_mask,
56de7263f   Mel Gorman   mm: compaction: d...
2187
  		.zone = zone,
a5508cd83   Vlastimil Babka   mm, compaction: i...
2188
2189
  		.mode = (prio == COMPACT_PRIO_ASYNC) ?
  					MIGRATE_ASYNC :	MIGRATE_SYNC_LIGHT,
ebff39801   Vlastimil Babka   mm, compaction: p...
2190
2191
  		.alloc_flags = alloc_flags,
  		.classzone_idx = classzone_idx,
accf62422   Vlastimil Babka   mm, kswapd: repla...
2192
  		.direct_compaction = true,
a8e025e55   Vlastimil Babka   mm, compaction: a...
2193
  		.whole_zone = (prio == MIN_COMPACT_PRIORITY),
9f7e33879   Vlastimil Babka   mm, compaction: m...
2194
2195
  		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
  		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
56de7263f   Mel Gorman   mm: compaction: d...
2196
  	};
5e1f0f098   Mel Gorman   mm, compaction: c...
2197
2198
2199
2200
2201
2202
2203
  	struct capture_control capc = {
  		.cc = &cc,
  		.page = NULL,
  	};
  
  	if (capture)
  		current->capture_control = &capc;
56de7263f   Mel Gorman   mm: compaction: d...
2204

5e1f0f098   Mel Gorman   mm, compaction: c...
2205
  	ret = compact_zone(&cc, &capc);
e64c5237c   Shaohua Li   mm: compaction: a...
2206
2207
2208
  
  	VM_BUG_ON(!list_empty(&cc.freepages));
  	VM_BUG_ON(!list_empty(&cc.migratepages));
5e1f0f098   Mel Gorman   mm, compaction: c...
2209
2210
  	*capture = capc.page;
  	current->capture_control = NULL;
e64c5237c   Shaohua Li   mm: compaction: a...
2211
  	return ret;
56de7263f   Mel Gorman   mm: compaction: d...
2212
  }
5e7719058   Mel Gorman   mm: compaction: a...
2213
  int sysctl_extfrag_threshold = 500;
56de7263f   Mel Gorman   mm: compaction: d...
2214
2215
  /**
   * try_to_compact_pages - Direct compact to satisfy a high-order allocation
56de7263f   Mel Gorman   mm: compaction: d...
2216
   * @gfp_mask: The GFP mask of the current allocation
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
2217
2218
2219
   * @order: The order of the current allocation
   * @alloc_flags: The allocation flags of the current allocation
   * @ac: The context of current allocation
112d2d29f   Yang Shi   mm/compaction.c: ...
2220
   * @prio: Determines how hard direct compaction should try to succeed
56de7263f   Mel Gorman   mm: compaction: d...
2221
2222
2223
   *
   * This is the main entry point for direct page compaction.
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
2224
  enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
2225
  		unsigned int alloc_flags, const struct alloc_context *ac,
5e1f0f098   Mel Gorman   mm, compaction: c...
2226
  		enum compact_priority prio, struct page **capture)
56de7263f   Mel Gorman   mm: compaction: d...
2227
  {
56de7263f   Mel Gorman   mm: compaction: d...
2228
  	int may_perform_io = gfp_mask & __GFP_IO;
56de7263f   Mel Gorman   mm: compaction: d...
2229
2230
  	struct zoneref *z;
  	struct zone *zone;
1d4746d39   Michal Hocko   mm, compaction: d...
2231
  	enum compact_result rc = COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
2232

73e64c51a   Michal Hocko   mm, compaction: a...
2233
2234
2235
2236
2237
  	/*
  	 * Check if the GFP flags allow compaction - GFP_NOIO is really
  	 * tricky context because the migration might require IO
  	 */
  	if (!may_perform_io)
53853e2d2   Vlastimil Babka   mm, compaction: d...
2238
  		return COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
2239

a5508cd83   Vlastimil Babka   mm, compaction: i...
2240
  	trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
837d026d5   Joonsoo Kim   mm/compaction: mo...
2241

56de7263f   Mel Gorman   mm: compaction: d...
2242
  	/* Compact each zone in the list */
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
2243
2244
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
  								ac->nodemask) {
ea7ab982b   Michal Hocko   mm, compaction: c...
2245
  		enum compact_result status;
56de7263f   Mel Gorman   mm: compaction: d...
2246

a8e025e55   Vlastimil Babka   mm, compaction: a...
2247
2248
  		if (prio > MIN_COMPACT_PRIORITY
  					&& compaction_deferred(zone, order)) {
1d4746d39   Michal Hocko   mm, compaction: d...
2249
  			rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
53853e2d2   Vlastimil Babka   mm, compaction: d...
2250
  			continue;
1d4746d39   Michal Hocko   mm, compaction: d...
2251
  		}
53853e2d2   Vlastimil Babka   mm, compaction: d...
2252

a5508cd83   Vlastimil Babka   mm, compaction: i...
2253
  		status = compact_zone_order(zone, order, gfp_mask, prio,
5e1f0f098   Mel Gorman   mm, compaction: c...
2254
  				alloc_flags, ac_classzone_idx(ac), capture);
56de7263f   Mel Gorman   mm: compaction: d...
2255
  		rc = max(status, rc);
7ceb009a2   Vlastimil Babka   mm, compaction: d...
2256
2257
  		/* The allocation should succeed, stop compacting */
  		if (status == COMPACT_SUCCESS) {
53853e2d2   Vlastimil Babka   mm, compaction: d...
2258
2259
2260
2261
2262
2263
2264
  			/*
  			 * We think the allocation will succeed in this zone,
  			 * but it is not certain, hence the false. The caller
  			 * will repeat this with true if allocation indeed
  			 * succeeds in this zone.
  			 */
  			compaction_defer_reset(zone, order, false);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2265

c3486f537   Vlastimil Babka   mm, compaction: s...
2266
  			break;
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2267
  		}
a5508cd83   Vlastimil Babka   mm, compaction: i...
2268
  		if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
c3486f537   Vlastimil Babka   mm, compaction: s...
2269
  					status == COMPACT_PARTIAL_SKIPPED))
53853e2d2   Vlastimil Babka   mm, compaction: d...
2270
2271
2272
2273
2274
2275
  			/*
  			 * We think that allocation won't succeed in this zone
  			 * so we defer compaction there. If it ends up
  			 * succeeding after all, it will be reset.
  			 */
  			defer_compaction(zone, order);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2276
2277
2278
2279
  
  		/*
  		 * We might have stopped compacting due to need_resched() in
  		 * async compaction, or due to a fatal signal detected. In that
c3486f537   Vlastimil Babka   mm, compaction: s...
2280
  		 * case do not try further zones
1f9efdef4   Vlastimil Babka   mm, compaction: k...
2281
  		 */
c3486f537   Vlastimil Babka   mm, compaction: s...
2282
2283
2284
  		if ((prio == COMPACT_PRIO_ASYNC && need_resched())
  					|| fatal_signal_pending(current))
  			break;
56de7263f   Mel Gorman   mm: compaction: d...
2285
2286
2287
2288
  	}
  
  	return rc;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
2289
  /* Compact all zones within a node */
791cae962   Vlastimil Babka   mm, compaction: c...
2290
  static void compact_node(int nid)
76ab0f530   Mel Gorman   mm: compaction: a...
2291
  {
791cae962   Vlastimil Babka   mm, compaction: c...
2292
  	pg_data_t *pgdat = NODE_DATA(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
2293
  	int zoneid;
76ab0f530   Mel Gorman   mm: compaction: a...
2294
  	struct zone *zone;
791cae962   Vlastimil Babka   mm, compaction: c...
2295
2296
2297
2298
2299
  	struct compact_control cc = {
  		.order = -1,
  		.mode = MIGRATE_SYNC,
  		.ignore_skip_hint = true,
  		.whole_zone = true,
73e64c51a   Michal Hocko   mm, compaction: a...
2300
  		.gfp_mask = GFP_KERNEL,
791cae962   Vlastimil Babka   mm, compaction: c...
2301
  	};
76ab0f530   Mel Gorman   mm: compaction: a...
2302

76ab0f530   Mel Gorman   mm: compaction: a...
2303
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
76ab0f530   Mel Gorman   mm: compaction: a...
2304
2305
2306
2307
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
791cae962   Vlastimil Babka   mm, compaction: c...
2308
  		cc.zone = zone;
76ab0f530   Mel Gorman   mm: compaction: a...
2309

5e1f0f098   Mel Gorman   mm, compaction: c...
2310
  		compact_zone(&cc, NULL);
754693457   Joonsoo Kim   mm/compaction.c: ...
2311

791cae962   Vlastimil Babka   mm, compaction: c...
2312
2313
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
76ab0f530   Mel Gorman   mm: compaction: a...
2314
  	}
76ab0f530   Mel Gorman   mm: compaction: a...
2315
2316
2317
  }
  
  /* Compact all nodes in the system */
7964c06d6   Jason Liu   mm: compaction: f...
2318
  static void compact_nodes(void)
76ab0f530   Mel Gorman   mm: compaction: a...
2319
2320
  {
  	int nid;
8575ec29f   Hugh Dickins   compact_pgdat: wo...
2321
2322
  	/* Flush pending updates to the LRU lists */
  	lru_add_drain_all();
76ab0f530   Mel Gorman   mm: compaction: a...
2323
2324
  	for_each_online_node(nid)
  		compact_node(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
2325
2326
2327
2328
  }
  
  /* The written value is actually unused, all memory is compacted */
  int sysctl_compact_memory;
fec4eb2c8   Yaowei Bai   mm/compaction: im...
2329
2330
2331
2332
  /*
   * This is the entry point for compacting all nodes via
   * /proc/sys/vm/compact_memory
   */
76ab0f530   Mel Gorman   mm: compaction: a...
2333
2334
2335
2336
  int sysctl_compaction_handler(struct ctl_table *table, int write,
  			void __user *buffer, size_t *length, loff_t *ppos)
  {
  	if (write)
7964c06d6   Jason Liu   mm: compaction: f...
2337
  		compact_nodes();
76ab0f530   Mel Gorman   mm: compaction: a...
2338
2339
2340
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2341
2342
  
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
74e77fb9a   Rashika Kheria   mm/compaction.c: ...
2343
  static ssize_t sysfs_compact_node(struct device *dev,
10fbcf4c6   Kay Sievers   convert 'memory' ...
2344
  			struct device_attribute *attr,
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2345
2346
  			const char *buf, size_t count)
  {
8575ec29f   Hugh Dickins   compact_pgdat: wo...
2347
2348
2349
2350
2351
2352
2353
2354
  	int nid = dev->id;
  
  	if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
  		/* Flush pending updates to the LRU lists */
  		lru_add_drain_all();
  
  		compact_node(nid);
  	}
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2355
2356
2357
  
  	return count;
  }
0825a6f98   Joe Perches   mm: use octal not...
2358
  static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2359
2360
2361
  
  int compaction_register_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
2362
  	return device_create_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2363
2364
2365
2366
  }
  
  void compaction_unregister_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
2367
  	return device_remove_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
2368
2369
  }
  #endif /* CONFIG_SYSFS && CONFIG_NUMA */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2370

698b1b306   Vlastimil Babka   mm, compaction: i...
2371
2372
  static inline bool kcompactd_work_requested(pg_data_t *pgdat)
  {
172400c69   Vlastimil Babka   mm: fix kcompactd...
2373
  	return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
698b1b306   Vlastimil Babka   mm, compaction: i...
2374
2375
2376
2377
2378
2379
2380
  }
  
  static bool kcompactd_node_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  	struct zone *zone;
  	enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
6cd9dc3e7   Chen Feng   mm/compaction.c: ...
2381
  	for (zoneid = 0; zoneid <= classzone_idx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
  		zone = &pgdat->node_zones[zoneid];
  
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
  					classzone_idx) == COMPACT_CONTINUE)
  			return true;
  	}
  
  	return false;
  }
  
  static void kcompactd_do_work(pg_data_t *pgdat)
  {
  	/*
  	 * With no special task, compact all zones so that a page of requested
  	 * order is allocatable.
  	 */
  	int zoneid;
  	struct zone *zone;
  	struct compact_control cc = {
  		.order = pgdat->kcompactd_max_order,
dbe2d4e4f   Mel Gorman   mm, compaction: r...
2405
  		.search_order = pgdat->kcompactd_max_order,
698b1b306   Vlastimil Babka   mm, compaction: i...
2406
2407
  		.classzone_idx = pgdat->kcompactd_classzone_idx,
  		.mode = MIGRATE_SYNC_LIGHT,
a0647dc92   David Rientjes   mm, compaction: k...
2408
  		.ignore_skip_hint = false,
73e64c51a   Michal Hocko   mm, compaction: a...
2409
  		.gfp_mask = GFP_KERNEL,
698b1b306   Vlastimil Babka   mm, compaction: i...
2410
  	};
698b1b306   Vlastimil Babka   mm, compaction: i...
2411
2412
  	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
  							cc.classzone_idx);
7f354a548   David Rientjes   mm, compaction: a...
2413
  	count_compact_event(KCOMPACTD_WAKE);
698b1b306   Vlastimil Babka   mm, compaction: i...
2414

6cd9dc3e7   Chen Feng   mm/compaction.c: ...
2415
  	for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
  		int status;
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_deferred(zone, cc.order))
  			continue;
  
  		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
  							COMPACT_CONTINUE)
  			continue;
172400c69   Vlastimil Babka   mm: fix kcompactd...
2428
2429
  		if (kthread_should_stop())
  			return;
a94b52524   Yafang Shao   mm/compaction.c: ...
2430
2431
  
  		cc.zone = zone;
5e1f0f098   Mel Gorman   mm, compaction: c...
2432
  		status = compact_zone(&cc, NULL);
698b1b306   Vlastimil Babka   mm, compaction: i...
2433

7ceb009a2   Vlastimil Babka   mm, compaction: d...
2434
  		if (status == COMPACT_SUCCESS) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2435
  			compaction_defer_reset(zone, cc.order, false);
c8f7de0bf   Michal Hocko   mm, compaction: d...
2436
  		} else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
698b1b306   Vlastimil Babka   mm, compaction: i...
2437
  			/*
bc3106b26   David Rientjes   mm, compaction: d...
2438
2439
2440
2441
2442
2443
2444
2445
  			 * Buddy pages may become stranded on pcps that could
  			 * otherwise coalesce on the zone's free area for
  			 * order >= cc.order.  This is ratelimited by the
  			 * upcoming deferral.
  			 */
  			drain_all_pages(zone);
  
  			/*
698b1b306   Vlastimil Babka   mm, compaction: i...
2446
2447
2448
2449
2450
  			 * We use sync migration mode here, so we defer like
  			 * sync direct compaction does.
  			 */
  			defer_compaction(zone, cc.order);
  		}
7f354a548   David Rientjes   mm, compaction: a...
2451
2452
2453
2454
  		count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
  				     cc.total_migrate_scanned);
  		count_compact_events(KCOMPACTD_FREE_SCANNED,
  				     cc.total_free_scanned);
698b1b306   Vlastimil Babka   mm, compaction: i...
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
  	}
  
  	/*
  	 * Regardless of success, we are done until woken up next. But remember
  	 * the requested order/classzone_idx in case it was higher/tighter than
  	 * our current ones
  	 */
  	if (pgdat->kcompactd_max_order <= cc.order)
  		pgdat->kcompactd_max_order = 0;
  	if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
  		pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
  }
  
  void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
  {
  	if (!order)
  		return;
  
  	if (pgdat->kcompactd_max_order < order)
  		pgdat->kcompactd_max_order = order;
  
  	if (pgdat->kcompactd_classzone_idx > classzone_idx)
  		pgdat->kcompactd_classzone_idx = classzone_idx;
6818600ff   Davidlohr Bueso   mm,compaction: se...
2480
2481
2482
2483
2484
  	/*
  	 * Pairs with implicit barrier in wait_event_freezable()
  	 * such that wakeups are not missed.
  	 */
  	if (!wq_has_sleeper(&pgdat->kcompactd_wait))
698b1b306   Vlastimil Babka   mm, compaction: i...
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
  		return;
  
  	if (!kcompactd_node_suitable(pgdat))
  		return;
  
  	trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
  							classzone_idx);
  	wake_up_interruptible(&pgdat->kcompactd_wait);
  }
  
  /*
   * The background compaction daemon, started as a kernel thread
   * from the init process.
   */
  static int kcompactd(void *p)
  {
  	pg_data_t *pgdat = (pg_data_t*)p;
  	struct task_struct *tsk = current;
  
  	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
  	if (!cpumask_empty(cpumask))
  		set_cpus_allowed_ptr(tsk, cpumask);
  
  	set_freezable();
  
  	pgdat->kcompactd_max_order = 0;
  	pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
  
  	while (!kthread_should_stop()) {
eb414681d   Johannes Weiner   psi: pressure sta...
2515
  		unsigned long pflags;
698b1b306   Vlastimil Babka   mm, compaction: i...
2516
2517
2518
  		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
  		wait_event_freezable(pgdat->kcompactd_wait,
  				kcompactd_work_requested(pgdat));
eb414681d   Johannes Weiner   psi: pressure sta...
2519
  		psi_memstall_enter(&pflags);
698b1b306   Vlastimil Babka   mm, compaction: i...
2520
  		kcompactd_do_work(pgdat);
eb414681d   Johannes Weiner   psi: pressure sta...
2521
  		psi_memstall_leave(&pflags);
698b1b306   Vlastimil Babka   mm, compaction: i...
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
  	}
  
  	return 0;
  }
  
  /*
   * This kcompactd start function will be called by init and node-hot-add.
   * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
   */
  int kcompactd_run(int nid)
  {
  	pg_data_t *pgdat = NODE_DATA(nid);
  	int ret = 0;
  
  	if (pgdat->kcompactd)
  		return 0;
  
  	pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
  	if (IS_ERR(pgdat->kcompactd)) {
  		pr_err("Failed to start kcompactd on node %d
  ", nid);
  		ret = PTR_ERR(pgdat->kcompactd);
  		pgdat->kcompactd = NULL;
  	}
  	return ret;
  }
  
  /*
   * Called by memory hotplug when all memory in a node is offlined. Caller must
   * hold mem_hotplug_begin/end().
   */
  void kcompactd_stop(int nid)
  {
  	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
  
  	if (kcompactd) {
  		kthread_stop(kcompactd);
  		NODE_DATA(nid)->kcompactd = NULL;
  	}
  }
  
  /*
   * It's optimal to keep kcompactd on the same CPUs as their memory, but
   * not required for correctness. So if the last cpu in a node goes
   * away, we get changed to run anywhere: as the first one comes back,
   * restore their cpu bindings.
   */
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2569
  static int kcompactd_cpu_online(unsigned int cpu)
698b1b306   Vlastimil Babka   mm, compaction: i...
2570
2571
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2572
2573
2574
  	for_each_node_state(nid, N_MEMORY) {
  		pg_data_t *pgdat = NODE_DATA(nid);
  		const struct cpumask *mask;
698b1b306   Vlastimil Babka   mm, compaction: i...
2575

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2576
  		mask = cpumask_of_node(pgdat->node_id);
698b1b306   Vlastimil Babka   mm, compaction: i...
2577

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2578
2579
2580
  		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
  			/* One of our CPUs online: restore mask */
  			set_cpus_allowed_ptr(pgdat->kcompactd, mask);
698b1b306   Vlastimil Babka   mm, compaction: i...
2581
  	}
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2582
  	return 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
2583
2584
2585
2586
2587
  }
  
  static int __init kcompactd_init(void)
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
  	int ret;
  
  	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  					"mm/compaction:online",
  					kcompactd_cpu_online, NULL);
  	if (ret < 0) {
  		pr_err("kcompactd: failed to register hotplug callbacks.
  ");
  		return ret;
  	}
698b1b306   Vlastimil Babka   mm, compaction: i...
2598
2599
2600
  
  	for_each_node_state(nid, N_MEMORY)
  		kcompactd_run(nid);
698b1b306   Vlastimil Babka   mm, compaction: i...
2601
2602
2603
  	return 0;
  }
  subsys_initcall(kcompactd_init)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2604
  #endif /* CONFIG_COMPACTION */