Blame view

mm/compaction.c 57.9 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
748446bb6   Mel Gorman   mm: compaction: m...
2
3
4
5
6
7
8
9
10
  /*
   * linux/mm/compaction.c
   *
   * Memory compaction for the reduction of external fragmentation. Note that
   * this heavily depends upon page migration to do all the real heavy
   * lifting
   *
   * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
   */
698b1b306   Vlastimil Babka   mm, compaction: i...
11
  #include <linux/cpu.h>
748446bb6   Mel Gorman   mm: compaction: m...
12
13
14
15
  #include <linux/swap.h>
  #include <linux/migrate.h>
  #include <linux/compaction.h>
  #include <linux/mm_inline.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
16
  #include <linux/sched/signal.h>
748446bb6   Mel Gorman   mm: compaction: m...
17
  #include <linux/backing-dev.h>
76ab0f530   Mel Gorman   mm: compaction: a...
18
  #include <linux/sysctl.h>
ed4a6d7f0   Mel Gorman   mm: compaction: a...
19
  #include <linux/sysfs.h>
194159fbc   Minchan Kim   mm: remove MIGRAT...
20
  #include <linux/page-isolation.h>
b8c73fc24   Andrey Ryabinin   mm: page_alloc: a...
21
  #include <linux/kasan.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
22
23
  #include <linux/kthread.h>
  #include <linux/freezer.h>
83358ece2   Joonsoo Kim   mm/page_owner: in...
24
  #include <linux/page_owner.h>
748446bb6   Mel Gorman   mm: compaction: m...
25
  #include "internal.h"
010fc29a4   Minchan Kim   compaction: fix b...
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  #ifdef CONFIG_COMPACTION
  static inline void count_compact_event(enum vm_event_item item)
  {
  	count_vm_event(item);
  }
  
  static inline void count_compact_events(enum vm_event_item item, long delta)
  {
  	count_vm_events(item, delta);
  }
  #else
  #define count_compact_event(item) do { } while (0)
  #define count_compact_events(item, delta) do { } while (0)
  #endif
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
40
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b7aba6984   Mel Gorman   mm: compaction: a...
41
42
  #define CREATE_TRACE_POINTS
  #include <trace/events/compaction.h>
06b6640a3   Vlastimil Babka   mm, compaction: w...
43
44
45
46
  #define block_start_pfn(pfn, order)	round_down(pfn, 1UL << (order))
  #define block_end_pfn(pfn, order)	ALIGN((pfn) + 1, 1UL << (order))
  #define pageblock_start_pfn(pfn)	block_start_pfn(pfn, pageblock_order)
  #define pageblock_end_pfn(pfn)		block_end_pfn(pfn, pageblock_order)
748446bb6   Mel Gorman   mm: compaction: m...
47
48
49
  static unsigned long release_freepages(struct list_head *freelist)
  {
  	struct page *page, *next;
6bace090a   Vlastimil Babka   mm, compaction: a...
50
  	unsigned long high_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
51
52
  
  	list_for_each_entry_safe(page, next, freelist, lru) {
6bace090a   Vlastimil Babka   mm, compaction: a...
53
  		unsigned long pfn = page_to_pfn(page);
748446bb6   Mel Gorman   mm: compaction: m...
54
55
  		list_del(&page->lru);
  		__free_page(page);
6bace090a   Vlastimil Babka   mm, compaction: a...
56
57
  		if (pfn > high_pfn)
  			high_pfn = pfn;
748446bb6   Mel Gorman   mm: compaction: m...
58
  	}
6bace090a   Vlastimil Babka   mm, compaction: a...
59
  	return high_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
60
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
61
62
  static void map_pages(struct list_head *list)
  {
66c64223a   Joonsoo Kim   mm/compaction: sp...
63
64
65
66
67
68
69
70
71
  	unsigned int i, order, nr_pages;
  	struct page *page, *next;
  	LIST_HEAD(tmp_list);
  
  	list_for_each_entry_safe(page, next, list, lru) {
  		list_del(&page->lru);
  
  		order = page_private(page);
  		nr_pages = 1 << order;
66c64223a   Joonsoo Kim   mm/compaction: sp...
72

46f24fd85   Joonsoo Kim   mm/page_alloc: in...
73
  		post_alloc_hook(page, order, __GFP_MOVABLE);
66c64223a   Joonsoo Kim   mm/compaction: sp...
74
75
  		if (order)
  			split_page(page, order);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
76

66c64223a   Joonsoo Kim   mm/compaction: sp...
77
78
79
80
  		for (i = 0; i < nr_pages; i++) {
  			list_add(&page->lru, &tmp_list);
  			page++;
  		}
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
81
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
82
83
  
  	list_splice(&tmp_list, list);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
84
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
85
  #ifdef CONFIG_COMPACTION
24e2716f6   Joonsoo Kim   mm/compaction: ad...
86

bda807d44   Minchan Kim   mm: migrate: supp...
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  int PageMovable(struct page *page)
  {
  	struct address_space *mapping;
  
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	if (!__PageMovable(page))
  		return 0;
  
  	mapping = page_mapping(page);
  	if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
  		return 1;
  
  	return 0;
  }
  EXPORT_SYMBOL(PageMovable);
  
  void __SetPageMovable(struct page *page, struct address_space *mapping)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
  	page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__SetPageMovable);
  
  void __ClearPageMovable(struct page *page)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	/*
  	 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
  	 * flag so that VM can catch up released page by driver after isolation.
  	 * With it, VM migration doesn't try to put it back.
  	 */
  	page->mapping = (void *)((unsigned long)page->mapping &
  				PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__ClearPageMovable);
24e2716f6   Joonsoo Kim   mm/compaction: ad...
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  /* Do not skip compaction more than 64 times */
  #define COMPACT_MAX_DEFER_SHIFT 6
  
  /*
   * Compaction is deferred when compaction fails to result in a page
   * allocation success. 1 << compact_defer_limit compactions are skipped up
   * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
   */
  void defer_compaction(struct zone *zone, int order)
  {
  	zone->compact_considered = 0;
  	zone->compact_defer_shift++;
  
  	if (order < zone->compact_order_failed)
  		zone->compact_order_failed = order;
  
  	if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
  		zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
  
  	trace_mm_compaction_defer_compaction(zone, order);
  }
  
  /* Returns true if compaction should be skipped this time */
  bool compaction_deferred(struct zone *zone, int order)
  {
  	unsigned long defer_limit = 1UL << zone->compact_defer_shift;
  
  	if (order < zone->compact_order_failed)
  		return false;
  
  	/* Avoid possible overflow */
  	if (++zone->compact_considered > defer_limit)
  		zone->compact_considered = defer_limit;
  
  	if (zone->compact_considered >= defer_limit)
  		return false;
  
  	trace_mm_compaction_deferred(zone, order);
  
  	return true;
  }
  
  /*
   * Update defer tracking counters after successful compaction of given order,
   * which means an allocation either succeeded (alloc_success == true) or is
   * expected to succeed.
   */
  void compaction_defer_reset(struct zone *zone, int order,
  		bool alloc_success)
  {
  	if (alloc_success) {
  		zone->compact_considered = 0;
  		zone->compact_defer_shift = 0;
  	}
  	if (order >= zone->compact_order_failed)
  		zone->compact_order_failed = order + 1;
  
  	trace_mm_compaction_defer_reset(zone, order);
  }
  
  /* Returns true if restarting compaction after many failures */
  bool compaction_restarting(struct zone *zone, int order)
  {
  	if (order < zone->compact_order_failed)
  		return false;
  
  	return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
  		zone->compact_considered >= 1UL << zone->compact_defer_shift;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
193
194
195
196
197
198
199
200
201
  /* Returns true if the pageblock should be scanned for pages to isolate. */
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	if (cc->ignore_skip_hint)
  		return true;
  
  	return !get_pageblock_skip(page);
  }
02333641e   Vlastimil Babka   mm, compaction: e...
202
203
204
205
  static void reset_cached_positions(struct zone *zone)
  {
  	zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
  	zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
623446e4d   Joonsoo Kim   mm/compaction: fi...
206
  	zone->compact_cached_free_pfn =
06b6640a3   Vlastimil Babka   mm, compaction: w...
207
  				pageblock_start_pfn(zone_end_pfn(zone) - 1);
02333641e   Vlastimil Babka   mm, compaction: e...
208
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
209
210
211
212
213
  /*
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
   * meet.
   */
62997027c   Mel Gorman   mm: compaction: c...
214
  static void __reset_isolation_suitable(struct zone *zone)
bb13ffeb9   Mel Gorman   mm: compaction: c...
215
216
  {
  	unsigned long start_pfn = zone->zone_start_pfn;
108bcc96e   Cody P Schafer   mm: add & use zon...
217
  	unsigned long end_pfn = zone_end_pfn(zone);
bb13ffeb9   Mel Gorman   mm: compaction: c...
218
  	unsigned long pfn;
62997027c   Mel Gorman   mm: compaction: c...
219
  	zone->compact_blockskip_flush = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
220
221
222
223
224
225
  
  	/* Walk the zone and mark every pageblock as suitable for isolation */
  	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
  		struct page *page;
  
  		cond_resched();
ccbe1e4dd   Michal Hocko   mm, compaction: s...
226
227
  		page = pfn_to_online_page(pfn);
  		if (!page)
bb13ffeb9   Mel Gorman   mm: compaction: c...
228
  			continue;
bb13ffeb9   Mel Gorman   mm: compaction: c...
229
230
231
232
233
  		if (zone != page_zone(page))
  			continue;
  
  		clear_pageblock_skip(page);
  	}
02333641e   Vlastimil Babka   mm, compaction: e...
234
235
  
  	reset_cached_positions(zone);
bb13ffeb9   Mel Gorman   mm: compaction: c...
236
  }
62997027c   Mel Gorman   mm: compaction: c...
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  void reset_isolation_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		/* Only flush if a full compaction finished recently */
  		if (zone->compact_blockskip_flush)
  			__reset_isolation_suitable(zone);
  	}
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
251
252
  /*
   * If no pages were isolated then mark this pageblock to be skipped in the
62997027c   Mel Gorman   mm: compaction: c...
253
   * future. The information is later cleared by __reset_isolation_suitable().
bb13ffeb9   Mel Gorman   mm: compaction: c...
254
   */
c89511ab2   Mel Gorman   mm: compaction: R...
255
256
  static void update_pageblock_skip(struct compact_control *cc,
  			struct page *page, unsigned long nr_isolated,
edc2ca612   Vlastimil Babka   mm, compaction: m...
257
  			bool migrate_scanner)
bb13ffeb9   Mel Gorman   mm: compaction: c...
258
  {
c89511ab2   Mel Gorman   mm: compaction: R...
259
  	struct zone *zone = cc->zone;
35979ef33   David Rientjes   mm, compaction: a...
260
  	unsigned long pfn;
6815bf3f2   Joonsoo Kim   mm/compaction: re...
261
262
263
  
  	if (cc->ignore_skip_hint)
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
264
265
  	if (!page)
  		return;
35979ef33   David Rientjes   mm, compaction: a...
266
267
  	if (nr_isolated)
  		return;
edc2ca612   Vlastimil Babka   mm, compaction: m...
268
  	set_pageblock_skip(page);
c89511ab2   Mel Gorman   mm: compaction: R...
269

35979ef33   David Rientjes   mm, compaction: a...
270
271
272
273
  	pfn = page_to_pfn(page);
  
  	/* Update where async and sync compaction should restart */
  	if (migrate_scanner) {
35979ef33   David Rientjes   mm, compaction: a...
274
275
  		if (pfn > zone->compact_cached_migrate_pfn[0])
  			zone->compact_cached_migrate_pfn[0] = pfn;
e0b9daeb4   David Rientjes   mm, compaction: e...
276
277
  		if (cc->mode != MIGRATE_ASYNC &&
  		    pfn > zone->compact_cached_migrate_pfn[1])
35979ef33   David Rientjes   mm, compaction: a...
278
279
  			zone->compact_cached_migrate_pfn[1] = pfn;
  	} else {
35979ef33   David Rientjes   mm, compaction: a...
280
281
  		if (pfn < zone->compact_cached_free_pfn)
  			zone->compact_cached_free_pfn = pfn;
c89511ab2   Mel Gorman   mm: compaction: R...
282
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
283
284
285
286
287
288
289
  }
  #else
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	return true;
  }
c89511ab2   Mel Gorman   mm: compaction: R...
290
291
  static void update_pageblock_skip(struct compact_control *cc,
  			struct page *page, unsigned long nr_isolated,
edc2ca612   Vlastimil Babka   mm, compaction: m...
292
  			bool migrate_scanner)
bb13ffeb9   Mel Gorman   mm: compaction: c...
293
294
295
  {
  }
  #endif /* CONFIG_COMPACTION */
8b44d2791   Vlastimil Babka   mm, compaction: p...
296
297
298
299
300
301
302
303
304
305
  /*
   * Compaction requires the taking of some coarse locks that are potentially
   * very heavily contended. For async compaction, back out if the lock cannot
   * be taken immediately. For sync compaction, spin on the lock if needed.
   *
   * Returns true if the lock is held
   * Returns false if the lock is not held and compaction should abort
   */
  static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
  						struct compact_control *cc)
2a1402aa0   Mel Gorman   mm: compaction: a...
306
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
307
308
  	if (cc->mode == MIGRATE_ASYNC) {
  		if (!spin_trylock_irqsave(lock, *flags)) {
c3486f537   Vlastimil Babka   mm, compaction: s...
309
  			cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
310
311
312
313
314
  			return false;
  		}
  	} else {
  		spin_lock_irqsave(lock, *flags);
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
315

8b44d2791   Vlastimil Babka   mm, compaction: p...
316
  	return true;
2a1402aa0   Mel Gorman   mm: compaction: a...
317
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
318
  /*
c67fe3752   Mel Gorman   mm: compaction: A...
319
   * Compaction requires the taking of some coarse locks that are potentially
8b44d2791   Vlastimil Babka   mm, compaction: p...
320
321
322
323
324
325
326
   * very heavily contended. The lock should be periodically unlocked to avoid
   * having disabled IRQs for a long time, even when there is nobody waiting on
   * the lock. It might also be that allowing the IRQs will result in
   * need_resched() becoming true. If scheduling is needed, async compaction
   * aborts. Sync compaction schedules.
   * Either compaction type will also abort if a fatal signal is pending.
   * In either case if the lock was locked, it is dropped and not regained.
c67fe3752   Mel Gorman   mm: compaction: A...
327
   *
8b44d2791   Vlastimil Babka   mm, compaction: p...
328
329
330
331
   * Returns true if compaction should abort due to fatal signal pending, or
   *		async compaction due to need_resched()
   * Returns false when compaction can continue (sync compaction might have
   *		scheduled)
c67fe3752   Mel Gorman   mm: compaction: A...
332
   */
8b44d2791   Vlastimil Babka   mm, compaction: p...
333
334
  static bool compact_unlock_should_abort(spinlock_t *lock,
  		unsigned long flags, bool *locked, struct compact_control *cc)
c67fe3752   Mel Gorman   mm: compaction: A...
335
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
336
337
338
339
  	if (*locked) {
  		spin_unlock_irqrestore(lock, flags);
  		*locked = false;
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
340

8b44d2791   Vlastimil Babka   mm, compaction: p...
341
  	if (fatal_signal_pending(current)) {
c3486f537   Vlastimil Babka   mm, compaction: s...
342
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
343
344
  		return true;
  	}
c67fe3752   Mel Gorman   mm: compaction: A...
345

8b44d2791   Vlastimil Babka   mm, compaction: p...
346
  	if (need_resched()) {
e0b9daeb4   David Rientjes   mm, compaction: e...
347
  		if (cc->mode == MIGRATE_ASYNC) {
c3486f537   Vlastimil Babka   mm, compaction: s...
348
  			cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
349
  			return true;
c67fe3752   Mel Gorman   mm: compaction: A...
350
  		}
c67fe3752   Mel Gorman   mm: compaction: A...
351
  		cond_resched();
c67fe3752   Mel Gorman   mm: compaction: A...
352
  	}
8b44d2791   Vlastimil Babka   mm, compaction: p...
353
  	return false;
c67fe3752   Mel Gorman   mm: compaction: A...
354
  }
be9765722   Vlastimil Babka   mm, compaction: p...
355
356
357
  /*
   * Aside from avoiding lock contention, compaction also periodically checks
   * need_resched() and either schedules in sync compaction or aborts async
8b44d2791   Vlastimil Babka   mm, compaction: p...
358
   * compaction. This is similar to what compact_unlock_should_abort() does, but
be9765722   Vlastimil Babka   mm, compaction: p...
359
360
361
362
363
364
365
366
367
368
   * is used where no lock is concerned.
   *
   * Returns false when no scheduling was needed, or sync compaction scheduled.
   * Returns true when async compaction should abort.
   */
  static inline bool compact_should_abort(struct compact_control *cc)
  {
  	/* async compaction aborts if contended */
  	if (need_resched()) {
  		if (cc->mode == MIGRATE_ASYNC) {
c3486f537   Vlastimil Babka   mm, compaction: s...
369
  			cc->contended = true;
be9765722   Vlastimil Babka   mm, compaction: p...
370
371
372
373
374
375
376
377
  			return true;
  		}
  
  		cond_resched();
  	}
  
  	return false;
  }
c67fe3752   Mel Gorman   mm: compaction: A...
378
  /*
9e4be4708   Jerome Marchand   mm/compaction.c: ...
379
380
381
   * Isolate free pages onto a private freelist. If @strict is true, will abort
   * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
   * (even though it may still end up isolating some pages).
85aa125f0   Michal Nazarewicz   mm: compaction: i...
382
   */
f40d1e42b   Mel Gorman   mm: compaction: a...
383
  static unsigned long isolate_freepages_block(struct compact_control *cc,
e14c720ef   Vlastimil Babka   mm, compaction: r...
384
  				unsigned long *start_pfn,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
385
386
387
  				unsigned long end_pfn,
  				struct list_head *freelist,
  				bool strict)
748446bb6   Mel Gorman   mm: compaction: m...
388
  {
b7aba6984   Mel Gorman   mm: compaction: a...
389
  	int nr_scanned = 0, total_isolated = 0;
bb13ffeb9   Mel Gorman   mm: compaction: c...
390
  	struct page *cursor, *valid_page = NULL;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
391
  	unsigned long flags = 0;
f40d1e42b   Mel Gorman   mm: compaction: a...
392
  	bool locked = false;
e14c720ef   Vlastimil Babka   mm, compaction: r...
393
  	unsigned long blockpfn = *start_pfn;
66c64223a   Joonsoo Kim   mm/compaction: sp...
394
  	unsigned int order;
748446bb6   Mel Gorman   mm: compaction: m...
395

748446bb6   Mel Gorman   mm: compaction: m...
396
  	cursor = pfn_to_page(blockpfn);
f40d1e42b   Mel Gorman   mm: compaction: a...
397
  	/* Isolate free pages. */
748446bb6   Mel Gorman   mm: compaction: m...
398
  	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
66c64223a   Joonsoo Kim   mm/compaction: sp...
399
  		int isolated;
748446bb6   Mel Gorman   mm: compaction: m...
400
  		struct page *page = cursor;
8b44d2791   Vlastimil Babka   mm, compaction: p...
401
402
403
404
405
406
407
408
409
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort if fatal signal
  		 * pending or async compaction detects need_resched()
  		 */
  		if (!(blockpfn % SWAP_CLUSTER_MAX)
  		    && compact_unlock_should_abort(&cc->zone->lock, flags,
  								&locked, cc))
  			break;
b7aba6984   Mel Gorman   mm: compaction: a...
410
  		nr_scanned++;
f40d1e42b   Mel Gorman   mm: compaction: a...
411
  		if (!pfn_valid_within(blockpfn))
2af120bc0   Laura Abbott   mm/compaction: br...
412
  			goto isolate_fail;
bb13ffeb9   Mel Gorman   mm: compaction: c...
413
414
  		if (!valid_page)
  			valid_page = page;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
  
  		/*
  		 * For compound pages such as THP and hugetlbfs, we can save
  		 * potentially a lot of iterations if we skip them at once.
  		 * The check is racy, but we can consider only valid values
  		 * and the only danger is skipping too much.
  		 */
  		if (PageCompound(page)) {
  			unsigned int comp_order = compound_order(page);
  
  			if (likely(comp_order < MAX_ORDER)) {
  				blockpfn += (1UL << comp_order) - 1;
  				cursor += (1UL << comp_order) - 1;
  			}
  
  			goto isolate_fail;
  		}
f40d1e42b   Mel Gorman   mm: compaction: a...
432
  		if (!PageBuddy(page))
2af120bc0   Laura Abbott   mm/compaction: br...
433
  			goto isolate_fail;
f40d1e42b   Mel Gorman   mm: compaction: a...
434
435
  
  		/*
69b7189f1   Vlastimil Babka   mm, compaction: s...
436
437
438
439
440
  		 * If we already hold the lock, we can skip some rechecking.
  		 * Note that if we hold the lock now, checked_pageblock was
  		 * already set in some previous iteration (or strict is true),
  		 * so it is correct to skip the suitable migration target
  		 * recheck as well.
f40d1e42b   Mel Gorman   mm: compaction: a...
441
  		 */
69b7189f1   Vlastimil Babka   mm, compaction: s...
442
443
444
445
446
447
448
449
450
  		if (!locked) {
  			/*
  			 * The zone lock must be held to isolate freepages.
  			 * Unfortunately this is a very coarse lock and can be
  			 * heavily contended if there are parallel allocations
  			 * or parallel compactions. For async compaction do not
  			 * spin on the lock and we acquire the lock as late as
  			 * possible.
  			 */
8b44d2791   Vlastimil Babka   mm, compaction: p...
451
452
  			locked = compact_trylock_irqsave(&cc->zone->lock,
  								&flags, cc);
69b7189f1   Vlastimil Babka   mm, compaction: s...
453
454
  			if (!locked)
  				break;
f40d1e42b   Mel Gorman   mm: compaction: a...
455

69b7189f1   Vlastimil Babka   mm, compaction: s...
456
457
458
459
  			/* Recheck this is a buddy page under lock */
  			if (!PageBuddy(page))
  				goto isolate_fail;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
460

66c64223a   Joonsoo Kim   mm/compaction: sp...
461
462
463
  		/* Found a free page, will break it into order-0 pages */
  		order = page_order(page);
  		isolated = __isolate_free_page(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
464
465
  		if (!isolated)
  			break;
66c64223a   Joonsoo Kim   mm/compaction: sp...
466
  		set_page_private(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
467

748446bb6   Mel Gorman   mm: compaction: m...
468
  		total_isolated += isolated;
a4f04f2c6   David Rientjes   mm, compaction: a...
469
  		cc->nr_freepages += isolated;
66c64223a   Joonsoo Kim   mm/compaction: sp...
470
  		list_add_tail(&page->lru, freelist);
a4f04f2c6   David Rientjes   mm, compaction: a...
471
472
473
  		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
  			blockpfn += isolated;
  			break;
748446bb6   Mel Gorman   mm: compaction: m...
474
  		}
a4f04f2c6   David Rientjes   mm, compaction: a...
475
476
477
478
  		/* Advance to the end of split page */
  		blockpfn += isolated - 1;
  		cursor += isolated - 1;
  		continue;
2af120bc0   Laura Abbott   mm/compaction: br...
479
480
481
482
483
484
  
  isolate_fail:
  		if (strict)
  			break;
  		else
  			continue;
748446bb6   Mel Gorman   mm: compaction: m...
485
  	}
a4f04f2c6   David Rientjes   mm, compaction: a...
486
487
  	if (locked)
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
488
489
490
491
492
493
  	/*
  	 * There is a tiny chance that we have read bogus compound_order(),
  	 * so be careful to not go outside of the pageblock.
  	 */
  	if (unlikely(blockpfn > end_pfn))
  		blockpfn = end_pfn;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
494
495
  	trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
  					nr_scanned, total_isolated);
e14c720ef   Vlastimil Babka   mm, compaction: r...
496
497
  	/* Record how far we have got within the block */
  	*start_pfn = blockpfn;
f40d1e42b   Mel Gorman   mm: compaction: a...
498
499
500
501
502
  	/*
  	 * If strict isolation is requested by CMA then check that all the
  	 * pages requested were isolated. If there were any failures, 0 is
  	 * returned and CMA will fail.
  	 */
2af120bc0   Laura Abbott   mm/compaction: br...
503
  	if (strict && blockpfn < end_pfn)
f40d1e42b   Mel Gorman   mm: compaction: a...
504
  		total_isolated = 0;
bb13ffeb9   Mel Gorman   mm: compaction: c...
505
506
  	/* Update the pageblock-skip if the whole pageblock was scanned */
  	if (blockpfn == end_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
507
  		update_pageblock_skip(cc, valid_page, total_isolated, false);
bb13ffeb9   Mel Gorman   mm: compaction: c...
508

7f354a548   David Rientjes   mm, compaction: a...
509
  	cc->total_free_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
510
  	if (total_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
511
  		count_compact_events(COMPACTISOLATED, total_isolated);
748446bb6   Mel Gorman   mm: compaction: m...
512
513
  	return total_isolated;
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
514
515
516
517
518
519
520
521
522
523
524
525
526
  /**
   * isolate_freepages_range() - isolate free pages.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Non-free pages, invalid PFNs, or zone boundaries within the
   * [start_pfn, end_pfn) range are considered errors, cause function to
   * undo its actions and return zero.
   *
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater then end_pfn if end fell in a middle of
   * a free page).
   */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
527
  unsigned long
bb13ffeb9   Mel Gorman   mm: compaction: c...
528
529
  isolate_freepages_range(struct compact_control *cc,
  			unsigned long start_pfn, unsigned long end_pfn)
85aa125f0   Michal Nazarewicz   mm: compaction: i...
530
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
531
  	unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
532
  	LIST_HEAD(freelist);
7d49d8868   Vlastimil Babka   mm, compaction: r...
533
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
534
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
535
536
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
537
  	block_end_pfn = pageblock_end_pfn(pfn);
7d49d8868   Vlastimil Babka   mm, compaction: r...
538
539
  
  	for (; pfn < end_pfn; pfn += isolated,
e1409c325   Joonsoo Kim   mm/compaction: pa...
540
  				block_start_pfn = block_end_pfn,
7d49d8868   Vlastimil Babka   mm, compaction: r...
541
  				block_end_pfn += pageblock_nr_pages) {
e14c720ef   Vlastimil Babka   mm, compaction: r...
542
543
  		/* Protect pfn from changing by isolate_freepages_block */
  		unsigned long isolate_start_pfn = pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
544

85aa125f0   Michal Nazarewicz   mm: compaction: i...
545
  		block_end_pfn = min(block_end_pfn, end_pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
546
547
548
549
550
551
  		/*
  		 * pfn could pass the block_end_pfn if isolated freepage
  		 * is more than pageblock order. In this case, we adjust
  		 * scanning range to right one.
  		 */
  		if (pfn >= block_end_pfn) {
06b6640a3   Vlastimil Babka   mm, compaction: w...
552
553
  			block_start_pfn = pageblock_start_pfn(pfn);
  			block_end_pfn = pageblock_end_pfn(pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
554
555
  			block_end_pfn = min(block_end_pfn, end_pfn);
  		}
e1409c325   Joonsoo Kim   mm/compaction: pa...
556
557
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
7d49d8868   Vlastimil Babka   mm, compaction: r...
558
  			break;
e14c720ef   Vlastimil Babka   mm, compaction: r...
559
560
  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
  						block_end_pfn, &freelist, true);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
  
  		/*
  		 * In strict mode, isolate_freepages_block() returns 0 if
  		 * there are any holes in the block (ie. invalid PFNs or
  		 * non-free pages).
  		 */
  		if (!isolated)
  			break;
  
  		/*
  		 * If we managed to isolate pages, it is always (1 << n) *
  		 * pageblock_nr_pages for some non-negative n.  (Max order
  		 * page may span two pageblocks).
  		 */
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
576
  	/* __isolate_free_page() does not map the pages */
85aa125f0   Michal Nazarewicz   mm: compaction: i...
577
578
579
580
581
582
583
584
585
586
587
  	map_pages(&freelist);
  
  	if (pfn < end_pfn) {
  		/* Loop terminated early, cleanup. */
  		release_freepages(&freelist);
  		return 0;
  	}
  
  	/* We don't use freelists for anything. */
  	return pfn;
  }
748446bb6   Mel Gorman   mm: compaction: m...
588
589
590
  /* Similar to reclaim, but different enough that they don't share logic */
  static bool too_many_isolated(struct zone *zone)
  {
bc6930457   Minchan Kim   mm: compaction: h...
591
  	unsigned long active, inactive, isolated;
748446bb6   Mel Gorman   mm: compaction: m...
592

599d0c954   Mel Gorman   mm, vmscan: move ...
593
594
595
596
597
598
  	inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
  			node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
  	active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
  			node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
  	isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
  			node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
748446bb6   Mel Gorman   mm: compaction: m...
599

bc6930457   Minchan Kim   mm: compaction: h...
600
  	return isolated > (inactive + active) / 2;
748446bb6   Mel Gorman   mm: compaction: m...
601
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
602
  /**
edc2ca612   Vlastimil Babka   mm, compaction: m...
603
604
   * isolate_migratepages_block() - isolate all migrate-able pages within
   *				  a single pageblock
2fe86e000   Michal Nazarewicz   mm: compaction: i...
605
   * @cc:		Compaction control structure.
edc2ca612   Vlastimil Babka   mm, compaction: m...
606
607
608
   * @low_pfn:	The first PFN to isolate
   * @end_pfn:	The one-past-the-last PFN to isolate, within same pageblock
   * @isolate_mode: Isolation mode to be used.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
609
610
   *
   * Isolate all pages that can be migrated from the range specified by
edc2ca612   Vlastimil Babka   mm, compaction: m...
611
612
613
614
   * [low_pfn, end_pfn). The range is expected to be within same pageblock.
   * Returns zero if there is a fatal signal pending, otherwise PFN of the
   * first page that was not scanned (which may be both less, equal to or more
   * than end_pfn).
2fe86e000   Michal Nazarewicz   mm: compaction: i...
615
   *
edc2ca612   Vlastimil Babka   mm, compaction: m...
616
617
618
   * The pages are isolated on cc->migratepages list (not required to be empty),
   * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
   * is neither read nor updated.
748446bb6   Mel Gorman   mm: compaction: m...
619
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
620
621
622
  static unsigned long
  isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  			unsigned long end_pfn, isolate_mode_t isolate_mode)
748446bb6   Mel Gorman   mm: compaction: m...
623
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
624
  	struct zone *zone = cc->zone;
b7aba6984   Mel Gorman   mm: compaction: a...
625
  	unsigned long nr_scanned = 0, nr_isolated = 0;
fa9add641   Hugh Dickins   mm/memcg: apply a...
626
  	struct lruvec *lruvec;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
627
  	unsigned long flags = 0;
2a1402aa0   Mel Gorman   mm: compaction: a...
628
  	bool locked = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
629
  	struct page *page = NULL, *valid_page = NULL;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
630
  	unsigned long start_pfn = low_pfn;
fdd048e12   Vlastimil Babka   mm, compaction: s...
631
632
  	bool skip_on_failure = false;
  	unsigned long next_skip_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
633

748446bb6   Mel Gorman   mm: compaction: m...
634
635
636
637
638
639
  	/*
  	 * Ensure that there are not too many pages isolated from the LRU
  	 * list by either parallel reclaimers or compaction. If there are,
  	 * delay for some time until fewer pages are isolated
  	 */
  	while (unlikely(too_many_isolated(zone))) {
f9e35b3b4   Mel Gorman   mm: compaction: a...
640
  		/* async migration should just abort */
e0b9daeb4   David Rientjes   mm, compaction: e...
641
  		if (cc->mode == MIGRATE_ASYNC)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
642
  			return 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
643

748446bb6   Mel Gorman   mm: compaction: m...
644
645
646
  		congestion_wait(BLK_RW_ASYNC, HZ/10);
  
  		if (fatal_signal_pending(current))
2fe86e000   Michal Nazarewicz   mm: compaction: i...
647
  			return 0;
748446bb6   Mel Gorman   mm: compaction: m...
648
  	}
be9765722   Vlastimil Babka   mm, compaction: p...
649
650
  	if (compact_should_abort(cc))
  		return 0;
aeef4b838   David Rientjes   mm, compaction: t...
651

fdd048e12   Vlastimil Babka   mm, compaction: s...
652
653
654
655
  	if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
  		skip_on_failure = true;
  		next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  	}
748446bb6   Mel Gorman   mm: compaction: m...
656
  	/* Time to isolate some pages for migration */
748446bb6   Mel Gorman   mm: compaction: m...
657
  	for (; low_pfn < end_pfn; low_pfn++) {
29c0dde83   Vlastimil Babka   mm, compaction: a...
658

fdd048e12   Vlastimil Babka   mm, compaction: s...
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
  		if (skip_on_failure && low_pfn >= next_skip_pfn) {
  			/*
  			 * We have isolated all migration candidates in the
  			 * previous order-aligned block, and did not skip it due
  			 * to failure. We should migrate the pages now and
  			 * hopefully succeed compaction.
  			 */
  			if (nr_isolated)
  				break;
  
  			/*
  			 * We failed to isolate in the previous order-aligned
  			 * block. Set the new boundary to the end of the
  			 * current block. Note we can't simply increase
  			 * next_skip_pfn by 1 << order, as low_pfn might have
  			 * been incremented by a higher number due to skipping
  			 * a compound or a high-order buddy page in the
  			 * previous loop iteration.
  			 */
  			next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  		}
8b44d2791   Vlastimil Babka   mm, compaction: p...
680
681
682
683
684
685
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort async compaction
  		 * if contended.
  		 */
  		if (!(low_pfn % SWAP_CLUSTER_MAX)
a52633d8e   Mel Gorman   mm, vmscan: move ...
686
  		    && compact_unlock_should_abort(zone_lru_lock(zone), flags,
8b44d2791   Vlastimil Babka   mm, compaction: p...
687
688
  								&locked, cc))
  			break;
c67fe3752   Mel Gorman   mm: compaction: A...
689

748446bb6   Mel Gorman   mm: compaction: m...
690
  		if (!pfn_valid_within(low_pfn))
fdd048e12   Vlastimil Babka   mm, compaction: s...
691
  			goto isolate_fail;
b7aba6984   Mel Gorman   mm: compaction: a...
692
  		nr_scanned++;
748446bb6   Mel Gorman   mm: compaction: m...
693

748446bb6   Mel Gorman   mm: compaction: m...
694
  		page = pfn_to_page(low_pfn);
dc9086004   Mel Gorman   mm: compaction: c...
695

bb13ffeb9   Mel Gorman   mm: compaction: c...
696
697
  		if (!valid_page)
  			valid_page = page;
6c14466cc   Mel Gorman   mm: improve docum...
698
  		/*
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
699
700
701
702
  		 * Skip if free. We read page order here without zone lock
  		 * which is generally unsafe, but the race window is small and
  		 * the worst thing that can happen is that we skip some
  		 * potential isolation targets.
6c14466cc   Mel Gorman   mm: improve docum...
703
  		 */
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
704
705
706
707
708
709
710
711
712
713
  		if (PageBuddy(page)) {
  			unsigned long freepage_order = page_order_unsafe(page);
  
  			/*
  			 * Without lock, we cannot be sure that what we got is
  			 * a valid page order. Consider only values in the
  			 * valid order range to prevent low_pfn overflow.
  			 */
  			if (freepage_order > 0 && freepage_order < MAX_ORDER)
  				low_pfn += (1UL << freepage_order) - 1;
748446bb6   Mel Gorman   mm: compaction: m...
714
  			continue;
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
715
  		}
748446bb6   Mel Gorman   mm: compaction: m...
716

9927af740   Mel Gorman   mm: compaction: p...
717
  		/*
29c0dde83   Vlastimil Babka   mm, compaction: a...
718
719
720
721
722
  		 * Regardless of being on LRU, compound pages such as THP and
  		 * hugetlbfs are not to be compacted. We can potentially save
  		 * a lot of iterations if we skip them at once. The check is
  		 * racy, but we can consider only valid values and the only
  		 * danger is skipping too much.
bc835011a   Andrea Arcangeli   thp: transhuge is...
723
  		 */
29c0dde83   Vlastimil Babka   mm, compaction: a...
724
725
726
727
728
  		if (PageCompound(page)) {
  			unsigned int comp_order = compound_order(page);
  
  			if (likely(comp_order < MAX_ORDER))
  				low_pfn += (1UL << comp_order) - 1;
edc2ca612   Vlastimil Babka   mm, compaction: m...
729

fdd048e12   Vlastimil Babka   mm, compaction: s...
730
  			goto isolate_fail;
2a1402aa0   Mel Gorman   mm: compaction: a...
731
  		}
bda807d44   Minchan Kim   mm: migrate: supp...
732
733
734
735
736
737
  		/*
  		 * Check may be lockless but that's ok as we recheck later.
  		 * It's possible to migrate LRU and non-lru movable pages.
  		 * Skip any other type of page
  		 */
  		if (!PageLRU(page)) {
bda807d44   Minchan Kim   mm: migrate: supp...
738
739
740
741
742
743
744
  			/*
  			 * __PageMovable can return false positive so we need
  			 * to verify it under page_lock.
  			 */
  			if (unlikely(__PageMovable(page)) &&
  					!PageIsolated(page)) {
  				if (locked) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
745
  					spin_unlock_irqrestore(zone_lru_lock(zone),
bda807d44   Minchan Kim   mm: migrate: supp...
746
747
748
  									flags);
  					locked = false;
  				}
9e5bcd610   Yisheng Xie   mm/migration: mak...
749
  				if (!isolate_movable_page(page, isolate_mode))
bda807d44   Minchan Kim   mm: migrate: supp...
750
751
  					goto isolate_success;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
752
  			goto isolate_fail;
bda807d44   Minchan Kim   mm: migrate: supp...
753
  		}
29c0dde83   Vlastimil Babka   mm, compaction: a...
754

119d6d59d   David Rientjes   mm, compaction: a...
755
756
757
758
759
760
761
  		/*
  		 * Migration will fail if an anonymous page is pinned in memory,
  		 * so avoid taking lru_lock and isolating it unnecessarily in an
  		 * admittedly racy check.
  		 */
  		if (!page_mapping(page) &&
  		    page_count(page) > page_mapcount(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
762
  			goto isolate_fail;
119d6d59d   David Rientjes   mm, compaction: a...
763

73e64c51a   Michal Hocko   mm, compaction: a...
764
765
766
767
768
769
  		/*
  		 * Only allow to migrate anonymous pages in GFP_NOFS context
  		 * because those do not depend on fs locks.
  		 */
  		if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
  			goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
770
771
  		/* If we already hold the lock, we can skip some rechecking */
  		if (!locked) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
772
  			locked = compact_trylock_irqsave(zone_lru_lock(zone),
8b44d2791   Vlastimil Babka   mm, compaction: p...
773
  								&flags, cc);
69b7189f1   Vlastimil Babka   mm, compaction: s...
774
775
  			if (!locked)
  				break;
2a1402aa0   Mel Gorman   mm: compaction: a...
776

29c0dde83   Vlastimil Babka   mm, compaction: a...
777
  			/* Recheck PageLRU and PageCompound under lock */
69b7189f1   Vlastimil Babka   mm, compaction: s...
778
  			if (!PageLRU(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
779
  				goto isolate_fail;
29c0dde83   Vlastimil Babka   mm, compaction: a...
780
781
782
783
784
785
786
787
  
  			/*
  			 * Page become compound since the non-locked check,
  			 * and it's on LRU. It can only be a THP so the order
  			 * is safe to read and it's 0 for tail pages.
  			 */
  			if (unlikely(PageCompound(page))) {
  				low_pfn += (1UL << compound_order(page)) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
788
  				goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
789
  			}
bc835011a   Andrea Arcangeli   thp: transhuge is...
790
  		}
599d0c954   Mel Gorman   mm, vmscan: move ...
791
  		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
fa9add641   Hugh Dickins   mm/memcg: apply a...
792

748446bb6   Mel Gorman   mm: compaction: m...
793
  		/* Try isolate the page */
edc2ca612   Vlastimil Babka   mm, compaction: m...
794
  		if (__isolate_lru_page(page, isolate_mode) != 0)
fdd048e12   Vlastimil Babka   mm, compaction: s...
795
  			goto isolate_fail;
748446bb6   Mel Gorman   mm: compaction: m...
796

29c0dde83   Vlastimil Babka   mm, compaction: a...
797
  		VM_BUG_ON_PAGE(PageCompound(page), page);
bc835011a   Andrea Arcangeli   thp: transhuge is...
798

748446bb6   Mel Gorman   mm: compaction: m...
799
  		/* Successfully isolated */
fa9add641   Hugh Dickins   mm/memcg: apply a...
800
  		del_page_from_lru_list(page, lruvec, page_lru(page));
6afcf8ef0   Ming Ling   mm, compaction: f...
801
802
  		inc_node_page_state(page,
  				NR_ISOLATED_ANON + page_is_file_cache(page));
b6c750163   Joonsoo Kim   mm/compaction: cl...
803
804
  
  isolate_success:
fdd048e12   Vlastimil Babka   mm, compaction: s...
805
  		list_add(&page->lru, &cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
806
  		cc->nr_migratepages++;
b7aba6984   Mel Gorman   mm: compaction: a...
807
  		nr_isolated++;
748446bb6   Mel Gorman   mm: compaction: m...
808

a34753d27   Vlastimil Babka   mm, compaction: r...
809
810
811
812
813
814
815
816
  		/*
  		 * Record where we could have freed pages by migration and not
  		 * yet flushed them to buddy allocator.
  		 * - this is the lowest page that was isolated and likely be
  		 * then freed by migration.
  		 */
  		if (!cc->last_migrated_pfn)
  			cc->last_migrated_pfn = low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
817
  		/* Avoid isolating too much */
31b8384a5   Hillf Danton   mm: compaction: p...
818
819
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
  			++low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
820
  			break;
31b8384a5   Hillf Danton   mm: compaction: p...
821
  		}
fdd048e12   Vlastimil Babka   mm, compaction: s...
822
823
824
825
826
827
828
829
830
831
832
833
834
  
  		continue;
  isolate_fail:
  		if (!skip_on_failure)
  			continue;
  
  		/*
  		 * We have isolated some pages, but then failed. Release them
  		 * instead of migrating, as we cannot form the cc->order buddy
  		 * page anyway.
  		 */
  		if (nr_isolated) {
  			if (locked) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
835
  				spin_unlock_irqrestore(zone_lru_lock(zone), flags);
fdd048e12   Vlastimil Babka   mm, compaction: s...
836
837
  				locked = false;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
838
839
840
841
842
843
844
845
846
847
848
849
850
851
  			putback_movable_pages(&cc->migratepages);
  			cc->nr_migratepages = 0;
  			cc->last_migrated_pfn = 0;
  			nr_isolated = 0;
  		}
  
  		if (low_pfn < next_skip_pfn) {
  			low_pfn = next_skip_pfn - 1;
  			/*
  			 * The check near the loop beginning would have updated
  			 * next_skip_pfn too, but this is a bit simpler.
  			 */
  			next_skip_pfn += 1UL << cc->order;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
852
  	}
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
853
854
855
856
857
858
  	/*
  	 * The PageBuddy() check could have potentially brought us outside
  	 * the range to be scanned.
  	 */
  	if (unlikely(low_pfn > end_pfn))
  		low_pfn = end_pfn;
c67fe3752   Mel Gorman   mm: compaction: A...
859
  	if (locked)
a52633d8e   Mel Gorman   mm, vmscan: move ...
860
  		spin_unlock_irqrestore(zone_lru_lock(zone), flags);
748446bb6   Mel Gorman   mm: compaction: m...
861

50b5b094e   Vlastimil Babka   mm: compaction: d...
862
863
864
  	/*
  	 * Update the pageblock-skip information and cached scanner pfn,
  	 * if the whole pageblock was scanned without isolating any page.
50b5b094e   Vlastimil Babka   mm: compaction: d...
865
  	 */
35979ef33   David Rientjes   mm, compaction: a...
866
  	if (low_pfn == end_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
867
  		update_pageblock_skip(cc, valid_page, nr_isolated, true);
bb13ffeb9   Mel Gorman   mm: compaction: c...
868

e34d85f0e   Joonsoo Kim   mm/compaction: pr...
869
870
  	trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
  						nr_scanned, nr_isolated);
b7aba6984   Mel Gorman   mm: compaction: a...
871

7f354a548   David Rientjes   mm, compaction: a...
872
  	cc->total_migrate_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
873
  	if (nr_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
874
  		count_compact_events(COMPACTISOLATED, nr_isolated);
397487db6   Mel Gorman   mm: compaction: A...
875

2fe86e000   Michal Nazarewicz   mm: compaction: i...
876
877
  	return low_pfn;
  }
edc2ca612   Vlastimil Babka   mm, compaction: m...
878
879
880
881
882
883
884
885
886
887
888
889
890
891
  /**
   * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
   * @cc:        Compaction control structure.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Returns zero if isolation fails fatally due to e.g. pending signal.
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater than end_pfn if end fell in a middle of a THP page).
   */
  unsigned long
  isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
  							unsigned long end_pfn)
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
892
  	unsigned long pfn, block_start_pfn, block_end_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
893
894
895
  
  	/* Scan block by block. First and last block may be incomplete */
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
896
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
897
898
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
899
  	block_end_pfn = pageblock_end_pfn(pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
900
901
  
  	for (; pfn < end_pfn; pfn = block_end_pfn,
e1409c325   Joonsoo Kim   mm/compaction: pa...
902
  				block_start_pfn = block_end_pfn,
edc2ca612   Vlastimil Babka   mm, compaction: m...
903
904
905
  				block_end_pfn += pageblock_nr_pages) {
  
  		block_end_pfn = min(block_end_pfn, end_pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
906
907
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
edc2ca612   Vlastimil Babka   mm, compaction: m...
908
909
910
911
  			continue;
  
  		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
  							ISOLATE_UNEVICTABLE);
14af4a5e9   Hugh Dickins   mm, cma: prevent ...
912
  		if (!pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
913
  			break;
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
914
915
916
  
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
  			break;
edc2ca612   Vlastimil Babka   mm, compaction: m...
917
  	}
edc2ca612   Vlastimil Babka   mm, compaction: m...
918
919
920
  
  	return pfn;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
921
922
  #endif /* CONFIG_COMPACTION || CONFIG_CMA */
  #ifdef CONFIG_COMPACTION
018e9a49a   Andrew Morton   mm/compaction.c: ...
923

b682debd9   Vlastimil Babka   mm, compaction: c...
924
925
926
  static bool suitable_migration_source(struct compact_control *cc,
  							struct page *page)
  {
282722b0d   Vlastimil Babka   mm, compaction: r...
927
928
929
  	int block_mt;
  
  	if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
b682debd9   Vlastimil Babka   mm, compaction: c...
930
  		return true;
282722b0d   Vlastimil Babka   mm, compaction: r...
931
932
933
934
935
936
  	block_mt = get_pageblock_migratetype(page);
  
  	if (cc->migratetype == MIGRATE_MOVABLE)
  		return is_migrate_movable(block_mt);
  	else
  		return block_mt == cc->migratetype;
b682debd9   Vlastimil Babka   mm, compaction: c...
937
  }
018e9a49a   Andrew Morton   mm/compaction.c: ...
938
  /* Returns true if the page is within a block suitable for migration to */
9f7e33879   Vlastimil Babka   mm, compaction: m...
939
940
  static bool suitable_migration_target(struct compact_control *cc,
  							struct page *page)
018e9a49a   Andrew Morton   mm/compaction.c: ...
941
942
943
944
945
946
947
948
949
950
951
  {
  	/* If the page is a large free page, then disallow migration */
  	if (PageBuddy(page)) {
  		/*
  		 * We are checking page_order without zone->lock taken. But
  		 * the only small danger is that we skip a potentially suitable
  		 * pageblock, so it's not worth to check order for valid range.
  		 */
  		if (page_order_unsafe(page) >= pageblock_order)
  			return false;
  	}
1ef36db2a   Yisheng Xie   mm/compaction: ig...
952
953
  	if (cc->ignore_block_suitable)
  		return true;
018e9a49a   Andrew Morton   mm/compaction.c: ...
954
  	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
b682debd9   Vlastimil Babka   mm, compaction: c...
955
  	if (is_migrate_movable(get_pageblock_migratetype(page)))
018e9a49a   Andrew Morton   mm/compaction.c: ...
956
957
958
959
960
  		return true;
  
  	/* Otherwise skip the block */
  	return false;
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
961
  /*
f2849aa09   Vlastimil Babka   mm, compaction: m...
962
963
964
965
966
967
968
969
970
971
   * Test whether the free scanner has reached the same or lower pageblock than
   * the migration scanner, and compaction should thus terminate.
   */
  static inline bool compact_scanners_met(struct compact_control *cc)
  {
  	return (cc->free_pfn >> pageblock_order)
  		<= (cc->migrate_pfn >> pageblock_order);
  }
  
  /*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
972
973
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
974
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
975
  static void isolate_freepages(struct compact_control *cc)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
976
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
977
  	struct zone *zone = cc->zone;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
978
  	struct page *page;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
979
  	unsigned long block_start_pfn;	/* start of current pageblock */
e14c720ef   Vlastimil Babka   mm, compaction: r...
980
  	unsigned long isolate_start_pfn; /* exact pfn we start at */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
981
982
  	unsigned long block_end_pfn;	/* end of current pageblock */
  	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
983
  	struct list_head *freelist = &cc->freepages;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
984

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
985
986
  	/*
  	 * Initialise the free scanner. The starting point is where we last
49e068f0b   Vlastimil Babka   mm/compaction: ma...
987
  	 * successfully isolated from, zone-cached value, or the end of the
e14c720ef   Vlastimil Babka   mm, compaction: r...
988
989
  	 * zone when isolating for the first time. For looping we also need
  	 * this pfn aligned down to the pageblock boundary, because we do
c96b9e508   Vlastimil Babka   mm/compaction: cl...
990
991
992
  	 * block_start_pfn -= pageblock_nr_pages in the for loop.
  	 * For ending point, take care when isolating in last pageblock of a
  	 * a zone which ends in the middle of a pageblock.
49e068f0b   Vlastimil Babka   mm/compaction: ma...
993
994
  	 * The low boundary is the end of the pageblock the migration scanner
  	 * is using.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
995
  	 */
e14c720ef   Vlastimil Babka   mm, compaction: r...
996
  	isolate_start_pfn = cc->free_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
997
  	block_start_pfn = pageblock_start_pfn(cc->free_pfn);
c96b9e508   Vlastimil Babka   mm/compaction: cl...
998
999
  	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
  						zone_end_pfn(zone));
06b6640a3   Vlastimil Babka   mm, compaction: w...
1000
  	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1001

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1002
  	/*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1003
1004
1005
1006
  	 * Isolate free pages until enough are available to migrate the
  	 * pages on cc->migratepages. We stop searching if the migrate
  	 * and free page scanners meet or enough free pages are isolated.
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1007
  	for (; block_start_pfn >= low_pfn;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1008
  				block_end_pfn = block_start_pfn,
e14c720ef   Vlastimil Babka   mm, compaction: r...
1009
1010
  				block_start_pfn -= pageblock_nr_pages,
  				isolate_start_pfn = block_start_pfn) {
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1011
1012
1013
  		/*
  		 * This can iterate a massively long zone without finding any
  		 * suitable migration targets, so periodically check if we need
be9765722   Vlastimil Babka   mm, compaction: p...
1014
  		 * to schedule, or even abort async compaction.
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1015
  		 */
be9765722   Vlastimil Babka   mm, compaction: p...
1016
1017
1018
  		if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
  						&& compact_should_abort(cc))
  			break;
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1019

7d49d8868   Vlastimil Babka   mm, compaction: r...
1020
1021
1022
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
  		if (!page)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1023
1024
1025
  			continue;
  
  		/* Check the block is suitable for migration */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1026
  		if (!suitable_migration_target(cc, page))
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1027
  			continue;
68e3e9262   Linus Torvalds   Revert "mm: compa...
1028

bb13ffeb9   Mel Gorman   mm: compaction: c...
1029
1030
1031
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
e14c720ef   Vlastimil Babka   mm, compaction: r...
1032
  		/* Found a block suitable for isolating free pages from. */
a46cbf3bc   David Rientjes   mm, compaction: p...
1033
1034
  		isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
  					freelist, false);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1035
1036
  
  		/*
a46cbf3bc   David Rientjes   mm, compaction: p...
1037
1038
  		 * If we isolated enough freepages, or aborted due to lock
  		 * contention, terminate.
e14c720ef   Vlastimil Babka   mm, compaction: r...
1039
  		 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1040
1041
  		if ((cc->nr_freepages >= cc->nr_migratepages)
  							|| cc->contended) {
a46cbf3bc   David Rientjes   mm, compaction: p...
1042
1043
1044
1045
1046
  			if (isolate_start_pfn >= block_end_pfn) {
  				/*
  				 * Restart at previous pageblock if more
  				 * freepages can be isolated next time.
  				 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1047
1048
  				isolate_start_pfn =
  					block_start_pfn - pageblock_nr_pages;
a46cbf3bc   David Rientjes   mm, compaction: p...
1049
  			}
be9765722   Vlastimil Babka   mm, compaction: p...
1050
  			break;
a46cbf3bc   David Rientjes   mm, compaction: p...
1051
  		} else if (isolate_start_pfn < block_end_pfn) {
f5f61a320   Vlastimil Babka   mm, compaction: s...
1052
  			/*
a46cbf3bc   David Rientjes   mm, compaction: p...
1053
1054
  			 * If isolation failed early, do not continue
  			 * needlessly.
f5f61a320   Vlastimil Babka   mm, compaction: s...
1055
  			 */
a46cbf3bc   David Rientjes   mm, compaction: p...
1056
  			break;
f5f61a320   Vlastimil Babka   mm, compaction: s...
1057
  		}
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1058
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
1059
  	/* __isolate_free_page() does not map the pages */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1060
  	map_pages(freelist);
7ed695e06   Vlastimil Babka   mm: compaction: d...
1061
  	/*
f5f61a320   Vlastimil Babka   mm, compaction: s...
1062
1063
1064
1065
  	 * Record where the free scanner will restart next time. Either we
  	 * broke from the loop and set isolate_start_pfn based on the last
  	 * call to isolate_freepages_block(), or we met the migration scanner
  	 * and the loop terminated due to isolate_start_pfn < low_pfn
7ed695e06   Vlastimil Babka   mm: compaction: d...
1066
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1067
  	cc->free_pfn = isolate_start_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
  }
  
  /*
   * This is a migrate-callback that "allocates" freepages by taking pages
   * from the isolated freelists in the block we are migrating to.
   */
  static struct page *compaction_alloc(struct page *migratepage,
  					unsigned long data,
  					int **result)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  	struct page *freepage;
be9765722   Vlastimil Babka   mm, compaction: p...
1080
1081
1082
1083
  	/*
  	 * Isolate free pages if necessary, and if we are not aborting due to
  	 * contention.
  	 */
748446bb6   Mel Gorman   mm: compaction: m...
1084
  	if (list_empty(&cc->freepages)) {
be9765722   Vlastimil Babka   mm, compaction: p...
1085
  		if (!cc->contended)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1086
  			isolate_freepages(cc);
748446bb6   Mel Gorman   mm: compaction: m...
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
  
  		if (list_empty(&cc->freepages))
  			return NULL;
  	}
  
  	freepage = list_entry(cc->freepages.next, struct page, lru);
  	list_del(&freepage->lru);
  	cc->nr_freepages--;
  
  	return freepage;
  }
  
  /*
d53aea3d4   David Rientjes   mm, compaction: r...
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
   * This is a migrate-callback that "frees" freepages back to the isolated
   * freelist.  All pages on the freelist are from the same zone, so there is no
   * special handling needed for NUMA.
   */
  static void compaction_free(struct page *page, unsigned long data)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  
  	list_add(&page->lru, &cc->freepages);
  	cc->nr_freepages++;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1111
1112
1113
1114
1115
1116
1117
1118
  /* possible outcome of isolate_migratepages */
  typedef enum {
  	ISOLATE_ABORT,		/* Abort compaction now */
  	ISOLATE_NONE,		/* No pages isolated, continue scanning */
  	ISOLATE_SUCCESS,	/* Pages isolated, migrate */
  } isolate_migrate_t;
  
  /*
5bbe3547a   Eric B Munson   mm: allow compact...
1119
1120
1121
1122
1123
1124
   * Allow userspace to control policy on scanning the unevictable LRU for
   * compactable pages.
   */
  int sysctl_compact_unevictable_allowed __read_mostly = 1;
  
  /*
edc2ca612   Vlastimil Babka   mm, compaction: m...
1125
1126
1127
   * Isolate all pages that can be migrated from the first suitable block,
   * starting at the block pointed to by the migrate scanner pfn within
   * compact_control.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1128
1129
1130
1131
   */
  static isolate_migrate_t isolate_migratepages(struct zone *zone,
  					struct compact_control *cc)
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1132
1133
1134
  	unsigned long block_start_pfn;
  	unsigned long block_end_pfn;
  	unsigned long low_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1135
1136
  	struct page *page;
  	const isolate_mode_t isolate_mode =
5bbe3547a   Eric B Munson   mm: allow compact...
1137
  		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
1d2047fef   Hugh Dickins   mm, compaction: d...
1138
  		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1139

edc2ca612   Vlastimil Babka   mm, compaction: m...
1140
1141
1142
1143
1144
  	/*
  	 * Start at where we last stopped, or beginning of the zone as
  	 * initialized by compact_zone()
  	 */
  	low_pfn = cc->migrate_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1145
  	block_start_pfn = pageblock_start_pfn(low_pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1146
1147
  	if (block_start_pfn < zone->zone_start_pfn)
  		block_start_pfn = zone->zone_start_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1148
1149
  
  	/* Only scan within a pageblock boundary */
06b6640a3   Vlastimil Babka   mm, compaction: w...
1150
  	block_end_pfn = pageblock_end_pfn(low_pfn);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1151

edc2ca612   Vlastimil Babka   mm, compaction: m...
1152
1153
1154
1155
  	/*
  	 * Iterate over whole pageblocks until we find the first suitable.
  	 * Do not cross the free scanner.
  	 */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1156
1157
1158
1159
  	for (; block_end_pfn <= cc->free_pfn;
  			low_pfn = block_end_pfn,
  			block_start_pfn = block_end_pfn,
  			block_end_pfn += pageblock_nr_pages) {
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1160

edc2ca612   Vlastimil Babka   mm, compaction: m...
1161
1162
1163
1164
1165
1166
1167
1168
  		/*
  		 * This can potentially iterate a massively long zone with
  		 * many pageblocks unsuitable, so periodically check if we
  		 * need to schedule, or even abort async compaction.
  		 */
  		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
  						&& compact_should_abort(cc))
  			break;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1169

e1409c325   Joonsoo Kim   mm/compaction: pa...
1170
1171
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
7d49d8868   Vlastimil Babka   mm, compaction: r...
1172
  		if (!page)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1173
  			continue;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1174
1175
1176
1177
1178
1179
1180
1181
1182
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
  
  		/*
  		 * For async compaction, also only scan in MOVABLE blocks.
  		 * Async compaction is optimistic to see if the minimum amount
  		 * of work satisfies the allocation.
  		 */
b682debd9   Vlastimil Babka   mm, compaction: c...
1183
  		if (!suitable_migration_source(cc, page))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1184
1185
1186
  			continue;
  
  		/* Perform the isolation */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1187
1188
  		low_pfn = isolate_migratepages_block(cc, low_pfn,
  						block_end_pfn, isolate_mode);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1189

6afcf8ef0   Ming Ling   mm, compaction: f...
1190
  		if (!low_pfn || cc->contended)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1191
1192
1193
1194
1195
1196
1197
1198
1199
  			return ISOLATE_ABORT;
  
  		/*
  		 * Either we isolated something and proceed with migration. Or
  		 * we failed and compact_zone should decide if we should
  		 * continue or not.
  		 */
  		break;
  	}
f2849aa09   Vlastimil Babka   mm, compaction: m...
1200
1201
  	/* Record where migration scanner will be restarted. */
  	cc->migrate_pfn = low_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1202

edc2ca612   Vlastimil Babka   mm, compaction: m...
1203
  	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1204
  }
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1205
1206
1207
1208
1209
1210
1211
1212
  /*
   * order == -1 is expected when compacting via
   * /proc/sys/vm/compact_memory
   */
  static inline bool is_via_compact_memory(int order)
  {
  	return order == -1;
  }
d39773a06   Vlastimil Babka   mm, compaction: a...
1213
1214
  static enum compact_result __compact_finished(struct zone *zone,
  						struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
1215
  {
8fb74b9fb   Mel Gorman   mm: compaction: p...
1216
  	unsigned int order;
d39773a06   Vlastimil Babka   mm, compaction: a...
1217
  	const int migratetype = cc->migratetype;
56de7263f   Mel Gorman   mm: compaction: d...
1218

be9765722   Vlastimil Babka   mm, compaction: p...
1219
  	if (cc->contended || fatal_signal_pending(current))
2d1e10412   Vlastimil Babka   mm, compaction: d...
1220
  		return COMPACT_CONTENDED;
748446bb6   Mel Gorman   mm: compaction: m...
1221

753341a4b   Mel Gorman   revert "mm: have ...
1222
  	/* Compaction run completes if the migrate and free scanner meet */
f2849aa09   Vlastimil Babka   mm, compaction: m...
1223
  	if (compact_scanners_met(cc)) {
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1224
  		/* Let the next compaction start anew. */
02333641e   Vlastimil Babka   mm, compaction: e...
1225
  		reset_cached_positions(zone);
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1226

62997027c   Mel Gorman   mm: compaction: c...
1227
1228
  		/*
  		 * Mark that the PG_migrate_skip information should be cleared
accf62422   Vlastimil Babka   mm, kswapd: repla...
1229
  		 * by kswapd when it goes to sleep. kcompactd does not set the
62997027c   Mel Gorman   mm: compaction: c...
1230
1231
1232
  		 * flag itself as the decision to be clear should be directly
  		 * based on an allocation request.
  		 */
accf62422   Vlastimil Babka   mm, kswapd: repla...
1233
  		if (cc->direct_compaction)
62997027c   Mel Gorman   mm: compaction: c...
1234
  			zone->compact_blockskip_flush = true;
c8f7de0bf   Michal Hocko   mm, compaction: d...
1235
1236
1237
1238
  		if (cc->whole_zone)
  			return COMPACT_COMPLETE;
  		else
  			return COMPACT_PARTIAL_SKIPPED;
bb13ffeb9   Mel Gorman   mm: compaction: c...
1239
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1240

21c527a3c   Yaowei Bai   mm/compaction.c: ...
1241
  	if (is_via_compact_memory(cc->order))
56de7263f   Mel Gorman   mm: compaction: d...
1242
  		return COMPACT_CONTINUE;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
  	if (cc->finishing_block) {
  		/*
  		 * We have finished the pageblock, but better check again that
  		 * we really succeeded.
  		 */
  		if (IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
  			cc->finishing_block = false;
  		else
  			return COMPACT_CONTINUE;
  	}
56de7263f   Mel Gorman   mm: compaction: d...
1253
  	/* Direct compactor: Is a suitable page free? */
8fb74b9fb   Mel Gorman   mm: compaction: p...
1254
1255
  	for (order = cc->order; order < MAX_ORDER; order++) {
  		struct free_area *area = &zone->free_area[order];
2149cdaef   Joonsoo Kim   mm/compaction: en...
1256
  		bool can_steal;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1257
1258
  
  		/* Job done if page is free of the right migratetype */
6d7ce5594   David Rientjes   mm, compaction: p...
1259
  		if (!list_empty(&area->free_list[migratetype]))
cf378319d   Vlastimil Babka   mm, compaction: r...
1260
  			return COMPACT_SUCCESS;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1261

2149cdaef   Joonsoo Kim   mm/compaction: en...
1262
1263
1264
1265
  #ifdef CONFIG_CMA
  		/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
  		if (migratetype == MIGRATE_MOVABLE &&
  			!list_empty(&area->free_list[MIGRATE_CMA]))
cf378319d   Vlastimil Babka   mm, compaction: r...
1266
  			return COMPACT_SUCCESS;
2149cdaef   Joonsoo Kim   mm/compaction: en...
1267
1268
1269
1270
1271
1272
  #endif
  		/*
  		 * Job done if allocation would steal freepages from
  		 * other migratetype buddy lists.
  		 */
  		if (find_suitable_fallback(area, order, migratetype,
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
  						true, &can_steal) != -1) {
  
  			/* movable pages are OK in any pageblock */
  			if (migratetype == MIGRATE_MOVABLE)
  				return COMPACT_SUCCESS;
  
  			/*
  			 * We are stealing for a non-movable allocation. Make
  			 * sure we finish compacting the current pageblock
  			 * first so it is as free as possible and we won't
  			 * have to steal another one soon. This only applies
  			 * to sync compaction, as async compaction operates
  			 * on pageblocks of the same migratetype.
  			 */
  			if (cc->mode == MIGRATE_ASYNC ||
  					IS_ALIGNED(cc->migrate_pfn,
  							pageblock_nr_pages)) {
  				return COMPACT_SUCCESS;
  			}
  
  			cc->finishing_block = true;
  			return COMPACT_CONTINUE;
  		}
56de7263f   Mel Gorman   mm: compaction: d...
1296
  	}
837d026d5   Joonsoo Kim   mm/compaction: mo...
1297
1298
  	return COMPACT_NO_SUITABLE_PAGE;
  }
ea7ab982b   Michal Hocko   mm, compaction: c...
1299
  static enum compact_result compact_finished(struct zone *zone,
d39773a06   Vlastimil Babka   mm, compaction: a...
1300
  			struct compact_control *cc)
837d026d5   Joonsoo Kim   mm/compaction: mo...
1301
1302
  {
  	int ret;
d39773a06   Vlastimil Babka   mm, compaction: a...
1303
  	ret = __compact_finished(zone, cc);
837d026d5   Joonsoo Kim   mm/compaction: mo...
1304
1305
1306
1307
1308
  	trace_mm_compaction_finished(zone, cc->order, ret);
  	if (ret == COMPACT_NO_SUITABLE_PAGE)
  		ret = COMPACT_CONTINUE;
  
  	return ret;
748446bb6   Mel Gorman   mm: compaction: m...
1309
  }
3e7d34497   Mel Gorman   mm: vmscan: recla...
1310
1311
1312
1313
  /*
   * compaction_suitable: Is this suitable to run compaction on this zone now?
   * Returns
   *   COMPACT_SKIPPED  - If there are too few free pages for compaction
cf378319d   Vlastimil Babka   mm, compaction: r...
1314
   *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
3e7d34497   Mel Gorman   mm: vmscan: recla...
1315
1316
   *   COMPACT_CONTINUE - If compaction should run now
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
1317
  static enum compact_result __compaction_suitable(struct zone *zone, int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
1318
  					unsigned int alloc_flags,
86a294a81   Michal Hocko   mm, oom, compacti...
1319
1320
  					int classzone_idx,
  					unsigned long wmark_target)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1321
  {
3e7d34497   Mel Gorman   mm: vmscan: recla...
1322
  	unsigned long watermark;
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1323
  	if (is_via_compact_memory(order))
3957c7768   Michal Hocko   mm: compaction: f...
1324
  		return COMPACT_CONTINUE;
f2b8228c5   Vlastimil Babka   mm, compaction: u...
1325
  	watermark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
ebff39801   Vlastimil Babka   mm, compaction: p...
1326
1327
1328
1329
1330
1331
  	/*
  	 * If watermarks for high-order allocation are already met, there
  	 * should be no need for compaction at all.
  	 */
  	if (zone_watermark_ok(zone, order, watermark, classzone_idx,
  								alloc_flags))
cf378319d   Vlastimil Babka   mm, compaction: r...
1332
  		return COMPACT_SUCCESS;
ebff39801   Vlastimil Babka   mm, compaction: p...
1333

3957c7768   Michal Hocko   mm: compaction: f...
1334
  	/*
9861a62c3   Vlastimil Babka   mm, compaction: c...
1335
  	 * Watermarks for order-0 must be met for compaction to be able to
984fdba6a   Vlastimil Babka   mm, compaction: u...
1336
1337
1338
1339
1340
1341
1342
  	 * isolate free pages for migration targets. This means that the
  	 * watermark and alloc_flags have to match, or be more pessimistic than
  	 * the check in __isolate_free_page(). We don't use the direct
  	 * compactor's alloc_flags, as they are not relevant for freepage
  	 * isolation. We however do use the direct compactor's classzone_idx to
  	 * skip over zones where lowmem reserves would prevent allocation even
  	 * if compaction succeeds.
8348faf91   Vlastimil Babka   mm, compaction: r...
1343
1344
  	 * For costly orders, we require low watermark instead of min for
  	 * compaction to proceed to increase its chances.
984fdba6a   Vlastimil Babka   mm, compaction: u...
1345
1346
  	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
  	 * suitable migration targets
3e7d34497   Mel Gorman   mm: vmscan: recla...
1347
  	 */
8348faf91   Vlastimil Babka   mm, compaction: r...
1348
1349
1350
  	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
  				low_wmark_pages(zone) : min_wmark_pages(zone);
  	watermark += compact_gap(order);
86a294a81   Michal Hocko   mm, oom, compacti...
1351
  	if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
984fdba6a   Vlastimil Babka   mm, compaction: u...
1352
  						ALLOC_CMA, wmark_target))
3e7d34497   Mel Gorman   mm: vmscan: recla...
1353
  		return COMPACT_SKIPPED;
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
  	return COMPACT_CONTINUE;
  }
  
  enum compact_result compaction_suitable(struct zone *zone, int order,
  					unsigned int alloc_flags,
  					int classzone_idx)
  {
  	enum compact_result ret;
  	int fragindex;
  
  	ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
  				    zone_page_state(zone, NR_FREE_PAGES));
3e7d34497   Mel Gorman   mm: vmscan: recla...
1366
1367
1368
1369
  	/*
  	 * fragmentation index determines if allocation failures are due to
  	 * low memory or external fragmentation
  	 *
ebff39801   Vlastimil Babka   mm, compaction: p...
1370
1371
  	 * index of -1000 would imply allocations might succeed depending on
  	 * watermarks, but we already failed the high-order watermark check
3e7d34497   Mel Gorman   mm: vmscan: recla...
1372
1373
1374
  	 * index towards 0 implies failure is due to lack of memory
  	 * index towards 1000 implies failure is due to fragmentation
  	 *
203114202   Vlastimil Babka   mm, compaction: r...
1375
1376
1377
1378
1379
1380
  	 * Only compact if a failure would be due to fragmentation. Also
  	 * ignore fragindex for non-costly orders where the alternative to
  	 * a successful reclaim/compaction is OOM. Fragindex and the
  	 * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
  	 * excessive compaction for costly orders, but it should not be at the
  	 * expense of system stability.
3e7d34497   Mel Gorman   mm: vmscan: recla...
1381
  	 */
203114202   Vlastimil Babka   mm, compaction: r...
1382
  	if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1383
1384
1385
1386
  		fragindex = fragmentation_index(zone, order);
  		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
  			ret = COMPACT_NOT_SUITABLE_ZONE;
  	}
837d026d5   Joonsoo Kim   mm/compaction: mo...
1387

837d026d5   Joonsoo Kim   mm/compaction: mo...
1388
1389
1390
1391
1392
1393
  	trace_mm_compaction_suitable(zone, order, ret);
  	if (ret == COMPACT_NOT_SUITABLE_ZONE)
  		ret = COMPACT_SKIPPED;
  
  	return ret;
  }
86a294a81   Michal Hocko   mm, oom, compacti...
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
  bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
  		int alloc_flags)
  {
  	struct zone *zone;
  	struct zoneref *z;
  
  	/*
  	 * Make sure at least one zone would pass __compaction_suitable if we continue
  	 * retrying the reclaim.
  	 */
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
  					ac->nodemask) {
  		unsigned long available;
  		enum compact_result compact_result;
  
  		/*
  		 * Do not consider all the reclaimable memory because we do not
  		 * want to trash just for a single high order allocation which
  		 * is even not guaranteed to appear even if __compaction_suitable
  		 * is happy about the watermark check.
  		 */
5a1c84b40   Mel Gorman   mm: remove reclai...
1415
  		available = zone_reclaimable_pages(zone) / order;
86a294a81   Michal Hocko   mm, oom, compacti...
1416
1417
1418
  		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
  		compact_result = __compaction_suitable(zone, order, alloc_flags,
  				ac_classzone_idx(ac), available);
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1419
  		if (compact_result != COMPACT_SKIPPED)
86a294a81   Michal Hocko   mm, oom, compacti...
1420
1421
1422
1423
1424
  			return true;
  	}
  
  	return false;
  }
ea7ab982b   Michal Hocko   mm, compaction: c...
1425
  static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
1426
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
1427
  	enum compact_result ret;
c89511ab2   Mel Gorman   mm: compaction: R...
1428
  	unsigned long start_pfn = zone->zone_start_pfn;
108bcc96e   Cody P Schafer   mm: add & use zon...
1429
  	unsigned long end_pfn = zone_end_pfn(zone);
e0b9daeb4   David Rientjes   mm, compaction: e...
1430
  	const bool sync = cc->mode != MIGRATE_ASYNC;
748446bb6   Mel Gorman   mm: compaction: m...
1431

d39773a06   Vlastimil Babka   mm, compaction: a...
1432
  	cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
ebff39801   Vlastimil Babka   mm, compaction: p...
1433
1434
  	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
  							cc->classzone_idx);
c46649dea   Michal Hocko   mm, compaction: c...
1435
  	/* Compaction is likely to fail */
cf378319d   Vlastimil Babka   mm, compaction: r...
1436
  	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1437
  		return ret;
c46649dea   Michal Hocko   mm, compaction: c...
1438
1439
1440
  
  	/* huh, compaction_suitable is returning something unexpected */
  	VM_BUG_ON(ret != COMPACT_CONTINUE);
3e7d34497   Mel Gorman   mm: vmscan: recla...
1441

c89511ab2   Mel Gorman   mm: compaction: R...
1442
  	/*
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1443
  	 * Clear pageblock skip if there were failures recently and compaction
accf62422   Vlastimil Babka   mm, kswapd: repla...
1444
  	 * is about to be retried after being deferred.
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1445
  	 */
accf62422   Vlastimil Babka   mm, kswapd: repla...
1446
  	if (compaction_restarting(zone, cc->order))
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1447
1448
1449
  		__reset_isolation_suitable(zone);
  
  	/*
c89511ab2   Mel Gorman   mm: compaction: R...
1450
  	 * Setup to move all movable pages to the end of the zone. Used cached
06ed29989   Vlastimil Babka   mm, compaction: m...
1451
1452
1453
  	 * information on where the scanners should start (unless we explicitly
  	 * want to compact the whole zone), but check that it is initialised
  	 * by ensuring the values are within zone boundaries.
c89511ab2   Mel Gorman   mm: compaction: R...
1454
  	 */
06ed29989   Vlastimil Babka   mm, compaction: m...
1455
  	if (cc->whole_zone) {
c89511ab2   Mel Gorman   mm: compaction: R...
1456
  		cc->migrate_pfn = start_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
  		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
  	} else {
  		cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
  		cc->free_pfn = zone->compact_cached_free_pfn;
  		if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
  			cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
  			zone->compact_cached_free_pfn = cc->free_pfn;
  		}
  		if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
  			cc->migrate_pfn = start_pfn;
  			zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
  			zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
  		}
c8f7de0bf   Michal Hocko   mm, compaction: d...
1470

06ed29989   Vlastimil Babka   mm, compaction: m...
1471
1472
1473
  		if (cc->migrate_pfn == start_pfn)
  			cc->whole_zone = true;
  	}
c8f7de0bf   Michal Hocko   mm, compaction: d...
1474

1a16718cf   Joonsoo Kim   mm/compaction: co...
1475
  	cc->last_migrated_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
1476

16c4a097a   Joonsoo Kim   mm/compaction: en...
1477
1478
  	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync);
0eb927c0a   Mel Gorman   mm: compaction: t...
1479

748446bb6   Mel Gorman   mm: compaction: m...
1480
  	migrate_prep_local();
d39773a06   Vlastimil Babka   mm, compaction: a...
1481
  	while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
9d502c1c8   Minchan Kim   mm/compaction: ch...
1482
  		int err;
748446bb6   Mel Gorman   mm: compaction: m...
1483

f9e35b3b4   Mel Gorman   mm: compaction: a...
1484
1485
  		switch (isolate_migratepages(zone, cc)) {
  		case ISOLATE_ABORT:
2d1e10412   Vlastimil Babka   mm, compaction: d...
1486
  			ret = COMPACT_CONTENDED;
5733c7d11   Rafael Aquini   mm: introduce put...
1487
  			putback_movable_pages(&cc->migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
1488
  			cc->nr_migratepages = 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
1489
1490
  			goto out;
  		case ISOLATE_NONE:
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1491
1492
1493
1494
1495
1496
  			/*
  			 * We haven't isolated and migrated anything, but
  			 * there might still be unflushed migrations from
  			 * previous cc->order aligned block.
  			 */
  			goto check_drain;
f9e35b3b4   Mel Gorman   mm: compaction: a...
1497
1498
1499
  		case ISOLATE_SUCCESS:
  			;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
1500

d53aea3d4   David Rientjes   mm, compaction: r...
1501
  		err = migrate_pages(&cc->migratepages, compaction_alloc,
e0b9daeb4   David Rientjes   mm, compaction: e...
1502
  				compaction_free, (unsigned long)cc, cc->mode,
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
1503
  				MR_COMPACTION);
748446bb6   Mel Gorman   mm: compaction: m...
1504

f8c9301fa   Vlastimil Babka   mm/compaction: do...
1505
1506
  		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
  							&cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
1507

f8c9301fa   Vlastimil Babka   mm/compaction: do...
1508
1509
  		/* All pages were either migrated or will be released */
  		cc->nr_migratepages = 0;
9d502c1c8   Minchan Kim   mm/compaction: ch...
1510
  		if (err) {
5733c7d11   Rafael Aquini   mm: introduce put...
1511
  			putback_movable_pages(&cc->migratepages);
7ed695e06   Vlastimil Babka   mm: compaction: d...
1512
1513
1514
1515
  			/*
  			 * migrate_pages() may return -ENOMEM when scanners meet
  			 * and we want compact_finished() to detect it
  			 */
f2849aa09   Vlastimil Babka   mm, compaction: m...
1516
  			if (err == -ENOMEM && !compact_scanners_met(cc)) {
2d1e10412   Vlastimil Babka   mm, compaction: d...
1517
  				ret = COMPACT_CONTENDED;
4bf2bba37   David Rientjes   mm, thp: abort co...
1518
1519
  				goto out;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
  			/*
  			 * We failed to migrate at least one page in the current
  			 * order-aligned block, so skip the rest of it.
  			 */
  			if (cc->direct_compaction &&
  						(cc->mode == MIGRATE_ASYNC)) {
  				cc->migrate_pfn = block_end_pfn(
  						cc->migrate_pfn - 1, cc->order);
  				/* Draining pcplists is useless in this case */
  				cc->last_migrated_pfn = 0;
  
  			}
748446bb6   Mel Gorman   mm: compaction: m...
1532
  		}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1533

fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1534
1535
1536
1537
1538
1539
1540
1541
  check_drain:
  		/*
  		 * Has the migration scanner moved away from the previous
  		 * cc->order aligned block where we migrated from? If yes,
  		 * flush the pages that were freed, so that they can merge and
  		 * compact_finished() can detect immediately if allocation
  		 * would succeed.
  		 */
1a16718cf   Joonsoo Kim   mm/compaction: co...
1542
  		if (cc->order > 0 && cc->last_migrated_pfn) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1543
1544
  			int cpu;
  			unsigned long current_block_start =
06b6640a3   Vlastimil Babka   mm, compaction: w...
1545
  				block_start_pfn(cc->migrate_pfn, cc->order);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1546

1a16718cf   Joonsoo Kim   mm/compaction: co...
1547
  			if (cc->last_migrated_pfn < current_block_start) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1548
1549
1550
1551
1552
  				cpu = get_cpu();
  				lru_add_drain_cpu(cpu);
  				drain_local_pages(zone);
  				put_cpu();
  				/* No more flushing until we migrate again */
1a16718cf   Joonsoo Kim   mm/compaction: co...
1553
  				cc->last_migrated_pfn = 0;
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1554
1555
  			}
  		}
748446bb6   Mel Gorman   mm: compaction: m...
1556
  	}
f9e35b3b4   Mel Gorman   mm: compaction: a...
1557
  out:
6bace090a   Vlastimil Babka   mm, compaction: a...
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
  	/*
  	 * Release free pages and update where the free scanner should restart,
  	 * so we don't leave any returned pages behind in the next attempt.
  	 */
  	if (cc->nr_freepages > 0) {
  		unsigned long free_pfn = release_freepages(&cc->freepages);
  
  		cc->nr_freepages = 0;
  		VM_BUG_ON(free_pfn == 0);
  		/* The cached pfn is always the first in a pageblock */
06b6640a3   Vlastimil Babka   mm, compaction: w...
1568
  		free_pfn = pageblock_start_pfn(free_pfn);
6bace090a   Vlastimil Babka   mm, compaction: a...
1569
1570
1571
1572
1573
1574
1575
  		/*
  		 * Only go back, not forward. The cached pfn might have been
  		 * already reset to zone end in compact_finished()
  		 */
  		if (free_pfn > zone->compact_cached_free_pfn)
  			zone->compact_cached_free_pfn = free_pfn;
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1576

7f354a548   David Rientjes   mm, compaction: a...
1577
1578
  	count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
  	count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
16c4a097a   Joonsoo Kim   mm/compaction: en...
1579
1580
  	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync, ret);
0eb927c0a   Mel Gorman   mm: compaction: t...
1581

748446bb6   Mel Gorman   mm: compaction: m...
1582
1583
  	return ret;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
1584

ea7ab982b   Michal Hocko   mm, compaction: c...
1585
  static enum compact_result compact_zone_order(struct zone *zone, int order,
c3486f537   Vlastimil Babka   mm, compaction: s...
1586
  		gfp_t gfp_mask, enum compact_priority prio,
c603844bd   Mel Gorman   mm, page_alloc: c...
1587
  		unsigned int alloc_flags, int classzone_idx)
56de7263f   Mel Gorman   mm: compaction: d...
1588
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
1589
  	enum compact_result ret;
56de7263f   Mel Gorman   mm: compaction: d...
1590
1591
1592
  	struct compact_control cc = {
  		.nr_freepages = 0,
  		.nr_migratepages = 0,
7f354a548   David Rientjes   mm, compaction: a...
1593
1594
  		.total_migrate_scanned = 0,
  		.total_free_scanned = 0,
56de7263f   Mel Gorman   mm: compaction: d...
1595
  		.order = order,
6d7ce5594   David Rientjes   mm, compaction: p...
1596
  		.gfp_mask = gfp_mask,
56de7263f   Mel Gorman   mm: compaction: d...
1597
  		.zone = zone,
a5508cd83   Vlastimil Babka   mm, compaction: i...
1598
1599
  		.mode = (prio == COMPACT_PRIO_ASYNC) ?
  					MIGRATE_ASYNC :	MIGRATE_SYNC_LIGHT,
ebff39801   Vlastimil Babka   mm, compaction: p...
1600
1601
  		.alloc_flags = alloc_flags,
  		.classzone_idx = classzone_idx,
accf62422   Vlastimil Babka   mm, kswapd: repla...
1602
  		.direct_compaction = true,
a8e025e55   Vlastimil Babka   mm, compaction: a...
1603
  		.whole_zone = (prio == MIN_COMPACT_PRIORITY),
9f7e33879   Vlastimil Babka   mm, compaction: m...
1604
1605
  		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
  		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
56de7263f   Mel Gorman   mm: compaction: d...
1606
1607
1608
  	};
  	INIT_LIST_HEAD(&cc.freepages);
  	INIT_LIST_HEAD(&cc.migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
1609
1610
1611
1612
  	ret = compact_zone(zone, &cc);
  
  	VM_BUG_ON(!list_empty(&cc.freepages));
  	VM_BUG_ON(!list_empty(&cc.migratepages));
e64c5237c   Shaohua Li   mm: compaction: a...
1613
  	return ret;
56de7263f   Mel Gorman   mm: compaction: d...
1614
  }
5e7719058   Mel Gorman   mm: compaction: a...
1615
  int sysctl_extfrag_threshold = 500;
56de7263f   Mel Gorman   mm: compaction: d...
1616
1617
  /**
   * try_to_compact_pages - Direct compact to satisfy a high-order allocation
56de7263f   Mel Gorman   mm: compaction: d...
1618
   * @gfp_mask: The GFP mask of the current allocation
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
1619
1620
1621
   * @order: The order of the current allocation
   * @alloc_flags: The allocation flags of the current allocation
   * @ac: The context of current allocation
e0b9daeb4   David Rientjes   mm, compaction: e...
1622
   * @mode: The migration mode for async, sync light, or sync migration
56de7263f   Mel Gorman   mm: compaction: d...
1623
1624
1625
   *
   * This is the main entry point for direct page compaction.
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
1626
  enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
1627
  		unsigned int alloc_flags, const struct alloc_context *ac,
c3486f537   Vlastimil Babka   mm, compaction: s...
1628
  		enum compact_priority prio)
56de7263f   Mel Gorman   mm: compaction: d...
1629
  {
56de7263f   Mel Gorman   mm: compaction: d...
1630
  	int may_perform_io = gfp_mask & __GFP_IO;
56de7263f   Mel Gorman   mm: compaction: d...
1631
1632
  	struct zoneref *z;
  	struct zone *zone;
1d4746d39   Michal Hocko   mm, compaction: d...
1633
  	enum compact_result rc = COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
1634

73e64c51a   Michal Hocko   mm, compaction: a...
1635
1636
1637
1638
1639
  	/*
  	 * Check if the GFP flags allow compaction - GFP_NOIO is really
  	 * tricky context because the migration might require IO
  	 */
  	if (!may_perform_io)
53853e2d2   Vlastimil Babka   mm, compaction: d...
1640
  		return COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
1641

a5508cd83   Vlastimil Babka   mm, compaction: i...
1642
  	trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
837d026d5   Joonsoo Kim   mm/compaction: mo...
1643

56de7263f   Mel Gorman   mm: compaction: d...
1644
  	/* Compact each zone in the list */
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
1645
1646
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
  								ac->nodemask) {
ea7ab982b   Michal Hocko   mm, compaction: c...
1647
  		enum compact_result status;
56de7263f   Mel Gorman   mm: compaction: d...
1648

a8e025e55   Vlastimil Babka   mm, compaction: a...
1649
1650
  		if (prio > MIN_COMPACT_PRIORITY
  					&& compaction_deferred(zone, order)) {
1d4746d39   Michal Hocko   mm, compaction: d...
1651
  			rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
53853e2d2   Vlastimil Babka   mm, compaction: d...
1652
  			continue;
1d4746d39   Michal Hocko   mm, compaction: d...
1653
  		}
53853e2d2   Vlastimil Babka   mm, compaction: d...
1654

a5508cd83   Vlastimil Babka   mm, compaction: i...
1655
  		status = compact_zone_order(zone, order, gfp_mask, prio,
c3486f537   Vlastimil Babka   mm, compaction: s...
1656
  					alloc_flags, ac_classzone_idx(ac));
56de7263f   Mel Gorman   mm: compaction: d...
1657
  		rc = max(status, rc);
7ceb009a2   Vlastimil Babka   mm, compaction: d...
1658
1659
  		/* The allocation should succeed, stop compacting */
  		if (status == COMPACT_SUCCESS) {
53853e2d2   Vlastimil Babka   mm, compaction: d...
1660
1661
1662
1663
1664
1665
1666
  			/*
  			 * We think the allocation will succeed in this zone,
  			 * but it is not certain, hence the false. The caller
  			 * will repeat this with true if allocation indeed
  			 * succeeds in this zone.
  			 */
  			compaction_defer_reset(zone, order, false);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1667

c3486f537   Vlastimil Babka   mm, compaction: s...
1668
  			break;
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1669
  		}
a5508cd83   Vlastimil Babka   mm, compaction: i...
1670
  		if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
c3486f537   Vlastimil Babka   mm, compaction: s...
1671
  					status == COMPACT_PARTIAL_SKIPPED))
53853e2d2   Vlastimil Babka   mm, compaction: d...
1672
1673
1674
1675
1676
1677
  			/*
  			 * We think that allocation won't succeed in this zone
  			 * so we defer compaction there. If it ends up
  			 * succeeding after all, it will be reset.
  			 */
  			defer_compaction(zone, order);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1678
1679
1680
1681
  
  		/*
  		 * We might have stopped compacting due to need_resched() in
  		 * async compaction, or due to a fatal signal detected. In that
c3486f537   Vlastimil Babka   mm, compaction: s...
1682
  		 * case do not try further zones
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1683
  		 */
c3486f537   Vlastimil Babka   mm, compaction: s...
1684
1685
1686
  		if ((prio == COMPACT_PRIO_ASYNC && need_resched())
  					|| fatal_signal_pending(current))
  			break;
56de7263f   Mel Gorman   mm: compaction: d...
1687
1688
1689
1690
  	}
  
  	return rc;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
1691
  /* Compact all zones within a node */
791cae962   Vlastimil Babka   mm, compaction: c...
1692
  static void compact_node(int nid)
76ab0f530   Mel Gorman   mm: compaction: a...
1693
  {
791cae962   Vlastimil Babka   mm, compaction: c...
1694
  	pg_data_t *pgdat = NODE_DATA(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
1695
  	int zoneid;
76ab0f530   Mel Gorman   mm: compaction: a...
1696
  	struct zone *zone;
791cae962   Vlastimil Babka   mm, compaction: c...
1697
1698
  	struct compact_control cc = {
  		.order = -1,
7f354a548   David Rientjes   mm, compaction: a...
1699
1700
  		.total_migrate_scanned = 0,
  		.total_free_scanned = 0,
791cae962   Vlastimil Babka   mm, compaction: c...
1701
1702
1703
  		.mode = MIGRATE_SYNC,
  		.ignore_skip_hint = true,
  		.whole_zone = true,
73e64c51a   Michal Hocko   mm, compaction: a...
1704
  		.gfp_mask = GFP_KERNEL,
791cae962   Vlastimil Babka   mm, compaction: c...
1705
  	};
76ab0f530   Mel Gorman   mm: compaction: a...
1706

76ab0f530   Mel Gorman   mm: compaction: a...
1707
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
76ab0f530   Mel Gorman   mm: compaction: a...
1708
1709
1710
1711
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
791cae962   Vlastimil Babka   mm, compaction: c...
1712
1713
1714
1715
1716
  		cc.nr_freepages = 0;
  		cc.nr_migratepages = 0;
  		cc.zone = zone;
  		INIT_LIST_HEAD(&cc.freepages);
  		INIT_LIST_HEAD(&cc.migratepages);
76ab0f530   Mel Gorman   mm: compaction: a...
1717

791cae962   Vlastimil Babka   mm, compaction: c...
1718
  		compact_zone(zone, &cc);
754693457   Joonsoo Kim   mm/compaction.c: ...
1719

791cae962   Vlastimil Babka   mm, compaction: c...
1720
1721
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
76ab0f530   Mel Gorman   mm: compaction: a...
1722
  	}
76ab0f530   Mel Gorman   mm: compaction: a...
1723
1724
1725
  }
  
  /* Compact all nodes in the system */
7964c06d6   Jason Liu   mm: compaction: f...
1726
  static void compact_nodes(void)
76ab0f530   Mel Gorman   mm: compaction: a...
1727
1728
  {
  	int nid;
8575ec29f   Hugh Dickins   compact_pgdat: wo...
1729
1730
  	/* Flush pending updates to the LRU lists */
  	lru_add_drain_all();
76ab0f530   Mel Gorman   mm: compaction: a...
1731
1732
  	for_each_online_node(nid)
  		compact_node(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
1733
1734
1735
1736
  }
  
  /* The written value is actually unused, all memory is compacted */
  int sysctl_compact_memory;
fec4eb2c8   Yaowei Bai   mm/compaction: im...
1737
1738
1739
1740
  /*
   * This is the entry point for compacting all nodes via
   * /proc/sys/vm/compact_memory
   */
76ab0f530   Mel Gorman   mm: compaction: a...
1741
1742
1743
1744
  int sysctl_compaction_handler(struct ctl_table *table, int write,
  			void __user *buffer, size_t *length, loff_t *ppos)
  {
  	if (write)
7964c06d6   Jason Liu   mm: compaction: f...
1745
  		compact_nodes();
76ab0f530   Mel Gorman   mm: compaction: a...
1746
1747
1748
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1749

5e7719058   Mel Gorman   mm: compaction: a...
1750
1751
1752
1753
1754
1755
1756
  int sysctl_extfrag_handler(struct ctl_table *table, int write,
  			void __user *buffer, size_t *length, loff_t *ppos)
  {
  	proc_dointvec_minmax(table, write, buffer, length, ppos);
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1757
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
74e77fb9a   Rashika Kheria   mm/compaction.c: ...
1758
  static ssize_t sysfs_compact_node(struct device *dev,
10fbcf4c6   Kay Sievers   convert 'memory' ...
1759
  			struct device_attribute *attr,
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1760
1761
  			const char *buf, size_t count)
  {
8575ec29f   Hugh Dickins   compact_pgdat: wo...
1762
1763
1764
1765
1766
1767
1768
1769
  	int nid = dev->id;
  
  	if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
  		/* Flush pending updates to the LRU lists */
  		lru_add_drain_all();
  
  		compact_node(nid);
  	}
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1770
1771
1772
  
  	return count;
  }
10fbcf4c6   Kay Sievers   convert 'memory' ...
1773
  static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1774
1775
1776
  
  int compaction_register_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
1777
  	return device_create_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1778
1779
1780
1781
  }
  
  void compaction_unregister_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
1782
  	return device_remove_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1783
1784
  }
  #endif /* CONFIG_SYSFS && CONFIG_NUMA */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1785

698b1b306   Vlastimil Babka   mm, compaction: i...
1786
1787
  static inline bool kcompactd_work_requested(pg_data_t *pgdat)
  {
172400c69   Vlastimil Babka   mm: fix kcompactd...
1788
  	return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
698b1b306   Vlastimil Babka   mm, compaction: i...
1789
1790
1791
1792
1793
1794
1795
  }
  
  static bool kcompactd_node_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  	struct zone *zone;
  	enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
6cd9dc3e7   Chen Feng   mm/compaction.c: ...
1796
  	for (zoneid = 0; zoneid <= classzone_idx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
  		zone = &pgdat->node_zones[zoneid];
  
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
  					classzone_idx) == COMPACT_CONTINUE)
  			return true;
  	}
  
  	return false;
  }
  
  static void kcompactd_do_work(pg_data_t *pgdat)
  {
  	/*
  	 * With no special task, compact all zones so that a page of requested
  	 * order is allocatable.
  	 */
  	int zoneid;
  	struct zone *zone;
  	struct compact_control cc = {
  		.order = pgdat->kcompactd_max_order,
7f354a548   David Rientjes   mm, compaction: a...
1820
1821
  		.total_migrate_scanned = 0,
  		.total_free_scanned = 0,
698b1b306   Vlastimil Babka   mm, compaction: i...
1822
1823
1824
  		.classzone_idx = pgdat->kcompactd_classzone_idx,
  		.mode = MIGRATE_SYNC_LIGHT,
  		.ignore_skip_hint = true,
73e64c51a   Michal Hocko   mm, compaction: a...
1825
  		.gfp_mask = GFP_KERNEL,
698b1b306   Vlastimil Babka   mm, compaction: i...
1826
1827
  
  	};
698b1b306   Vlastimil Babka   mm, compaction: i...
1828
1829
  	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
  							cc.classzone_idx);
7f354a548   David Rientjes   mm, compaction: a...
1830
  	count_compact_event(KCOMPACTD_WAKE);
698b1b306   Vlastimil Babka   mm, compaction: i...
1831

6cd9dc3e7   Chen Feng   mm/compaction.c: ...
1832
  	for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
  		int status;
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_deferred(zone, cc.order))
  			continue;
  
  		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
  							COMPACT_CONTINUE)
  			continue;
  
  		cc.nr_freepages = 0;
  		cc.nr_migratepages = 0;
7f354a548   David Rientjes   mm, compaction: a...
1848
1849
  		cc.total_migrate_scanned = 0;
  		cc.total_free_scanned = 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
1850
1851
1852
  		cc.zone = zone;
  		INIT_LIST_HEAD(&cc.freepages);
  		INIT_LIST_HEAD(&cc.migratepages);
172400c69   Vlastimil Babka   mm: fix kcompactd...
1853
1854
  		if (kthread_should_stop())
  			return;
698b1b306   Vlastimil Babka   mm, compaction: i...
1855
  		status = compact_zone(zone, &cc);
7ceb009a2   Vlastimil Babka   mm, compaction: d...
1856
  		if (status == COMPACT_SUCCESS) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1857
  			compaction_defer_reset(zone, cc.order, false);
c8f7de0bf   Michal Hocko   mm, compaction: d...
1858
  		} else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1859
1860
1861
1862
1863
1864
  			/*
  			 * We use sync migration mode here, so we defer like
  			 * sync direct compaction does.
  			 */
  			defer_compaction(zone, cc.order);
  		}
7f354a548   David Rientjes   mm, compaction: a...
1865
1866
1867
1868
  		count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
  				     cc.total_migrate_scanned);
  		count_compact_events(KCOMPACTD_FREE_SCANNED,
  				     cc.total_free_scanned);
698b1b306   Vlastimil Babka   mm, compaction: i...
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
  	}
  
  	/*
  	 * Regardless of success, we are done until woken up next. But remember
  	 * the requested order/classzone_idx in case it was higher/tighter than
  	 * our current ones
  	 */
  	if (pgdat->kcompactd_max_order <= cc.order)
  		pgdat->kcompactd_max_order = 0;
  	if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
  		pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
  }
  
  void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
  {
  	if (!order)
  		return;
  
  	if (pgdat->kcompactd_max_order < order)
  		pgdat->kcompactd_max_order = order;
  
  	if (pgdat->kcompactd_classzone_idx > classzone_idx)
  		pgdat->kcompactd_classzone_idx = classzone_idx;
6818600ff   Davidlohr Bueso   mm,compaction: se...
1894
1895
1896
1897
1898
  	/*
  	 * Pairs with implicit barrier in wait_event_freezable()
  	 * such that wakeups are not missed.
  	 */
  	if (!wq_has_sleeper(&pgdat->kcompactd_wait))
698b1b306   Vlastimil Babka   mm, compaction: i...
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
  		return;
  
  	if (!kcompactd_node_suitable(pgdat))
  		return;
  
  	trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
  							classzone_idx);
  	wake_up_interruptible(&pgdat->kcompactd_wait);
  }
  
  /*
   * The background compaction daemon, started as a kernel thread
   * from the init process.
   */
  static int kcompactd(void *p)
  {
  	pg_data_t *pgdat = (pg_data_t*)p;
  	struct task_struct *tsk = current;
  
  	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
  	if (!cpumask_empty(cpumask))
  		set_cpus_allowed_ptr(tsk, cpumask);
  
  	set_freezable();
  
  	pgdat->kcompactd_max_order = 0;
  	pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
  
  	while (!kthread_should_stop()) {
  		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
  		wait_event_freezable(pgdat->kcompactd_wait,
  				kcompactd_work_requested(pgdat));
  
  		kcompactd_do_work(pgdat);
  	}
  
  	return 0;
  }
  
  /*
   * This kcompactd start function will be called by init and node-hot-add.
   * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
   */
  int kcompactd_run(int nid)
  {
  	pg_data_t *pgdat = NODE_DATA(nid);
  	int ret = 0;
  
  	if (pgdat->kcompactd)
  		return 0;
  
  	pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
  	if (IS_ERR(pgdat->kcompactd)) {
  		pr_err("Failed to start kcompactd on node %d
  ", nid);
  		ret = PTR_ERR(pgdat->kcompactd);
  		pgdat->kcompactd = NULL;
  	}
  	return ret;
  }
  
  /*
   * Called by memory hotplug when all memory in a node is offlined. Caller must
   * hold mem_hotplug_begin/end().
   */
  void kcompactd_stop(int nid)
  {
  	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
  
  	if (kcompactd) {
  		kthread_stop(kcompactd);
  		NODE_DATA(nid)->kcompactd = NULL;
  	}
  }
  
  /*
   * It's optimal to keep kcompactd on the same CPUs as their memory, but
   * not required for correctness. So if the last cpu in a node goes
   * away, we get changed to run anywhere: as the first one comes back,
   * restore their cpu bindings.
   */
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
1981
  static int kcompactd_cpu_online(unsigned int cpu)
698b1b306   Vlastimil Babka   mm, compaction: i...
1982
1983
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
1984
1985
1986
  	for_each_node_state(nid, N_MEMORY) {
  		pg_data_t *pgdat = NODE_DATA(nid);
  		const struct cpumask *mask;
698b1b306   Vlastimil Babka   mm, compaction: i...
1987

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
1988
  		mask = cpumask_of_node(pgdat->node_id);
698b1b306   Vlastimil Babka   mm, compaction: i...
1989

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
1990
1991
1992
  		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
  			/* One of our CPUs online: restore mask */
  			set_cpus_allowed_ptr(pgdat->kcompactd, mask);
698b1b306   Vlastimil Babka   mm, compaction: i...
1993
  	}
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
1994
  	return 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
1995
1996
1997
1998
1999
  }
  
  static int __init kcompactd_init(void)
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
  	int ret;
  
  	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  					"mm/compaction:online",
  					kcompactd_cpu_online, NULL);
  	if (ret < 0) {
  		pr_err("kcompactd: failed to register hotplug callbacks.
  ");
  		return ret;
  	}
698b1b306   Vlastimil Babka   mm, compaction: i...
2010
2011
2012
  
  	for_each_node_state(nid, N_MEMORY)
  		kcompactd_run(nid);
698b1b306   Vlastimil Babka   mm, compaction: i...
2013
2014
2015
  	return 0;
  }
  subsys_initcall(kcompactd_init)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2016
  #endif /* CONFIG_COMPACTION */