Blame view

mm/compaction.c 58.7 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
748446bb6   Mel Gorman   mm: compaction: m...
2
3
4
5
6
7
8
9
10
  /*
   * linux/mm/compaction.c
   *
   * Memory compaction for the reduction of external fragmentation. Note that
   * this heavily depends upon page migration to do all the real heavy
   * lifting
   *
   * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
   */
698b1b306   Vlastimil Babka   mm, compaction: i...
11
  #include <linux/cpu.h>
748446bb6   Mel Gorman   mm: compaction: m...
12
13
14
15
  #include <linux/swap.h>
  #include <linux/migrate.h>
  #include <linux/compaction.h>
  #include <linux/mm_inline.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
16
  #include <linux/sched/signal.h>
748446bb6   Mel Gorman   mm: compaction: m...
17
  #include <linux/backing-dev.h>
76ab0f530   Mel Gorman   mm: compaction: a...
18
  #include <linux/sysctl.h>
ed4a6d7f0   Mel Gorman   mm: compaction: a...
19
  #include <linux/sysfs.h>
194159fbc   Minchan Kim   mm: remove MIGRAT...
20
  #include <linux/page-isolation.h>
b8c73fc24   Andrey Ryabinin   mm: page_alloc: a...
21
  #include <linux/kasan.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
22
23
  #include <linux/kthread.h>
  #include <linux/freezer.h>
83358ece2   Joonsoo Kim   mm/page_owner: in...
24
  #include <linux/page_owner.h>
748446bb6   Mel Gorman   mm: compaction: m...
25
  #include "internal.h"
010fc29a4   Minchan Kim   compaction: fix b...
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  #ifdef CONFIG_COMPACTION
  static inline void count_compact_event(enum vm_event_item item)
  {
  	count_vm_event(item);
  }
  
  static inline void count_compact_events(enum vm_event_item item, long delta)
  {
  	count_vm_events(item, delta);
  }
  #else
  #define count_compact_event(item) do { } while (0)
  #define count_compact_events(item, delta) do { } while (0)
  #endif
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
40
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b7aba6984   Mel Gorman   mm: compaction: a...
41
42
  #define CREATE_TRACE_POINTS
  #include <trace/events/compaction.h>
06b6640a3   Vlastimil Babka   mm, compaction: w...
43
44
45
46
  #define block_start_pfn(pfn, order)	round_down(pfn, 1UL << (order))
  #define block_end_pfn(pfn, order)	ALIGN((pfn) + 1, 1UL << (order))
  #define pageblock_start_pfn(pfn)	block_start_pfn(pfn, pageblock_order)
  #define pageblock_end_pfn(pfn)		block_end_pfn(pfn, pageblock_order)
748446bb6   Mel Gorman   mm: compaction: m...
47
48
49
  static unsigned long release_freepages(struct list_head *freelist)
  {
  	struct page *page, *next;
6bace090a   Vlastimil Babka   mm, compaction: a...
50
  	unsigned long high_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
51
52
  
  	list_for_each_entry_safe(page, next, freelist, lru) {
6bace090a   Vlastimil Babka   mm, compaction: a...
53
  		unsigned long pfn = page_to_pfn(page);
748446bb6   Mel Gorman   mm: compaction: m...
54
55
  		list_del(&page->lru);
  		__free_page(page);
6bace090a   Vlastimil Babka   mm, compaction: a...
56
57
  		if (pfn > high_pfn)
  			high_pfn = pfn;
748446bb6   Mel Gorman   mm: compaction: m...
58
  	}
6bace090a   Vlastimil Babka   mm, compaction: a...
59
  	return high_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
60
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
61
62
  static void map_pages(struct list_head *list)
  {
66c64223a   Joonsoo Kim   mm/compaction: sp...
63
64
65
66
67
68
69
70
71
  	unsigned int i, order, nr_pages;
  	struct page *page, *next;
  	LIST_HEAD(tmp_list);
  
  	list_for_each_entry_safe(page, next, list, lru) {
  		list_del(&page->lru);
  
  		order = page_private(page);
  		nr_pages = 1 << order;
66c64223a   Joonsoo Kim   mm/compaction: sp...
72

46f24fd85   Joonsoo Kim   mm/page_alloc: in...
73
  		post_alloc_hook(page, order, __GFP_MOVABLE);
66c64223a   Joonsoo Kim   mm/compaction: sp...
74
75
  		if (order)
  			split_page(page, order);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
76

66c64223a   Joonsoo Kim   mm/compaction: sp...
77
78
79
80
  		for (i = 0; i < nr_pages; i++) {
  			list_add(&page->lru, &tmp_list);
  			page++;
  		}
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
81
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
82
83
  
  	list_splice(&tmp_list, list);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
84
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
85
  #ifdef CONFIG_COMPACTION
24e2716f6   Joonsoo Kim   mm/compaction: ad...
86

bda807d44   Minchan Kim   mm: migrate: supp...
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  int PageMovable(struct page *page)
  {
  	struct address_space *mapping;
  
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	if (!__PageMovable(page))
  		return 0;
  
  	mapping = page_mapping(page);
  	if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
  		return 1;
  
  	return 0;
  }
  EXPORT_SYMBOL(PageMovable);
  
  void __SetPageMovable(struct page *page, struct address_space *mapping)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
  	page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__SetPageMovable);
  
  void __ClearPageMovable(struct page *page)
  {
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageMovable(page), page);
  	/*
  	 * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
  	 * flag so that VM can catch up released page by driver after isolation.
  	 * With it, VM migration doesn't try to put it back.
  	 */
  	page->mapping = (void *)((unsigned long)page->mapping &
  				PAGE_MAPPING_MOVABLE);
  }
  EXPORT_SYMBOL(__ClearPageMovable);
24e2716f6   Joonsoo Kim   mm/compaction: ad...
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  /* Do not skip compaction more than 64 times */
  #define COMPACT_MAX_DEFER_SHIFT 6
  
  /*
   * Compaction is deferred when compaction fails to result in a page
   * allocation success. 1 << compact_defer_limit compactions are skipped up
   * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT
   */
  void defer_compaction(struct zone *zone, int order)
  {
  	zone->compact_considered = 0;
  	zone->compact_defer_shift++;
  
  	if (order < zone->compact_order_failed)
  		zone->compact_order_failed = order;
  
  	if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT)
  		zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT;
  
  	trace_mm_compaction_defer_compaction(zone, order);
  }
  
  /* Returns true if compaction should be skipped this time */
  bool compaction_deferred(struct zone *zone, int order)
  {
  	unsigned long defer_limit = 1UL << zone->compact_defer_shift;
  
  	if (order < zone->compact_order_failed)
  		return false;
  
  	/* Avoid possible overflow */
  	if (++zone->compact_considered > defer_limit)
  		zone->compact_considered = defer_limit;
  
  	if (zone->compact_considered >= defer_limit)
  		return false;
  
  	trace_mm_compaction_deferred(zone, order);
  
  	return true;
  }
  
  /*
   * Update defer tracking counters after successful compaction of given order,
   * which means an allocation either succeeded (alloc_success == true) or is
   * expected to succeed.
   */
  void compaction_defer_reset(struct zone *zone, int order,
  		bool alloc_success)
  {
  	if (alloc_success) {
  		zone->compact_considered = 0;
  		zone->compact_defer_shift = 0;
  	}
  	if (order >= zone->compact_order_failed)
  		zone->compact_order_failed = order + 1;
  
  	trace_mm_compaction_defer_reset(zone, order);
  }
  
  /* Returns true if restarting compaction after many failures */
  bool compaction_restarting(struct zone *zone, int order)
  {
  	if (order < zone->compact_order_failed)
  		return false;
  
  	return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
  		zone->compact_considered >= 1UL << zone->compact_defer_shift;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
193
194
195
196
197
198
199
200
201
  /* Returns true if the pageblock should be scanned for pages to isolate. */
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	if (cc->ignore_skip_hint)
  		return true;
  
  	return !get_pageblock_skip(page);
  }
02333641e   Vlastimil Babka   mm, compaction: e...
202
203
204
205
  static void reset_cached_positions(struct zone *zone)
  {
  	zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
  	zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
623446e4d   Joonsoo Kim   mm/compaction: fi...
206
  	zone->compact_cached_free_pfn =
06b6640a3   Vlastimil Babka   mm, compaction: w...
207
  				pageblock_start_pfn(zone_end_pfn(zone) - 1);
02333641e   Vlastimil Babka   mm, compaction: e...
208
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
209
  /*
b527cfe5b   Vlastimil Babka   mm, compaction: e...
210
211
212
   * Compound pages of >= pageblock_order should consistenly be skipped until
   * released. It is always pointless to compact pages of such order (if they are
   * migratable), and the pageblocks they occupy cannot contain any free pages.
21dc7e023   David Rientjes   mm, compaction: p...
213
   */
b527cfe5b   Vlastimil Babka   mm, compaction: e...
214
  static bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
215
  {
b527cfe5b   Vlastimil Babka   mm, compaction: e...
216
  	if (!PageCompound(page))
21dc7e023   David Rientjes   mm, compaction: p...
217
  		return false;
b527cfe5b   Vlastimil Babka   mm, compaction: e...
218
219
220
221
222
223
224
  
  	page = compound_head(page);
  
  	if (compound_order(page) >= pageblock_order)
  		return true;
  
  	return false;
21dc7e023   David Rientjes   mm, compaction: p...
225
226
227
  }
  
  /*
bb13ffeb9   Mel Gorman   mm: compaction: c...
228
229
230
231
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
   * meet.
   */
62997027c   Mel Gorman   mm: compaction: c...
232
  static void __reset_isolation_suitable(struct zone *zone)
bb13ffeb9   Mel Gorman   mm: compaction: c...
233
234
  {
  	unsigned long start_pfn = zone->zone_start_pfn;
108bcc96e   Cody P Schafer   mm: add & use zon...
235
  	unsigned long end_pfn = zone_end_pfn(zone);
bb13ffeb9   Mel Gorman   mm: compaction: c...
236
  	unsigned long pfn;
62997027c   Mel Gorman   mm: compaction: c...
237
  	zone->compact_blockskip_flush = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
238
239
240
241
242
243
  
  	/* Walk the zone and mark every pageblock as suitable for isolation */
  	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
  		struct page *page;
  
  		cond_resched();
ccbe1e4dd   Michal Hocko   mm, compaction: s...
244
245
  		page = pfn_to_online_page(pfn);
  		if (!page)
bb13ffeb9   Mel Gorman   mm: compaction: c...
246
  			continue;
bb13ffeb9   Mel Gorman   mm: compaction: c...
247
248
  		if (zone != page_zone(page))
  			continue;
b527cfe5b   Vlastimil Babka   mm, compaction: e...
249
  		if (pageblock_skip_persistent(page))
21dc7e023   David Rientjes   mm, compaction: p...
250
  			continue;
bb13ffeb9   Mel Gorman   mm: compaction: c...
251
252
253
  
  		clear_pageblock_skip(page);
  	}
02333641e   Vlastimil Babka   mm, compaction: e...
254
255
  
  	reset_cached_positions(zone);
bb13ffeb9   Mel Gorman   mm: compaction: c...
256
  }
62997027c   Mel Gorman   mm: compaction: c...
257
258
259
260
261
262
263
264
265
266
267
268
269
270
  void reset_isolation_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		/* Only flush if a full compaction finished recently */
  		if (zone->compact_blockskip_flush)
  			__reset_isolation_suitable(zone);
  	}
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
271
272
  /*
   * If no pages were isolated then mark this pageblock to be skipped in the
62997027c   Mel Gorman   mm: compaction: c...
273
   * future. The information is later cleared by __reset_isolation_suitable().
bb13ffeb9   Mel Gorman   mm: compaction: c...
274
   */
c89511ab2   Mel Gorman   mm: compaction: R...
275
276
  static void update_pageblock_skip(struct compact_control *cc,
  			struct page *page, unsigned long nr_isolated,
edc2ca612   Vlastimil Babka   mm, compaction: m...
277
  			bool migrate_scanner)
bb13ffeb9   Mel Gorman   mm: compaction: c...
278
  {
c89511ab2   Mel Gorman   mm: compaction: R...
279
  	struct zone *zone = cc->zone;
35979ef33   David Rientjes   mm, compaction: a...
280
  	unsigned long pfn;
6815bf3f2   Joonsoo Kim   mm/compaction: re...
281

2583d6713   Vlastimil Babka   mm, compaction: s...
282
  	if (cc->no_set_skip_hint)
6815bf3f2   Joonsoo Kim   mm/compaction: re...
283
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
284
285
  	if (!page)
  		return;
35979ef33   David Rientjes   mm, compaction: a...
286
287
  	if (nr_isolated)
  		return;
edc2ca612   Vlastimil Babka   mm, compaction: m...
288
  	set_pageblock_skip(page);
c89511ab2   Mel Gorman   mm: compaction: R...
289

35979ef33   David Rientjes   mm, compaction: a...
290
291
292
293
  	pfn = page_to_pfn(page);
  
  	/* Update where async and sync compaction should restart */
  	if (migrate_scanner) {
35979ef33   David Rientjes   mm, compaction: a...
294
295
  		if (pfn > zone->compact_cached_migrate_pfn[0])
  			zone->compact_cached_migrate_pfn[0] = pfn;
e0b9daeb4   David Rientjes   mm, compaction: e...
296
297
  		if (cc->mode != MIGRATE_ASYNC &&
  		    pfn > zone->compact_cached_migrate_pfn[1])
35979ef33   David Rientjes   mm, compaction: a...
298
299
  			zone->compact_cached_migrate_pfn[1] = pfn;
  	} else {
35979ef33   David Rientjes   mm, compaction: a...
300
301
  		if (pfn < zone->compact_cached_free_pfn)
  			zone->compact_cached_free_pfn = pfn;
c89511ab2   Mel Gorman   mm: compaction: R...
302
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
303
304
305
306
307
308
309
  }
  #else
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	return true;
  }
b527cfe5b   Vlastimil Babka   mm, compaction: e...
310
  static inline bool pageblock_skip_persistent(struct page *page)
21dc7e023   David Rientjes   mm, compaction: p...
311
312
313
314
315
  {
  	return false;
  }
  
  static inline void update_pageblock_skip(struct compact_control *cc,
c89511ab2   Mel Gorman   mm: compaction: R...
316
  			struct page *page, unsigned long nr_isolated,
edc2ca612   Vlastimil Babka   mm, compaction: m...
317
  			bool migrate_scanner)
bb13ffeb9   Mel Gorman   mm: compaction: c...
318
319
320
  {
  }
  #endif /* CONFIG_COMPACTION */
8b44d2791   Vlastimil Babka   mm, compaction: p...
321
322
323
324
325
326
327
328
329
330
  /*
   * Compaction requires the taking of some coarse locks that are potentially
   * very heavily contended. For async compaction, back out if the lock cannot
   * be taken immediately. For sync compaction, spin on the lock if needed.
   *
   * Returns true if the lock is held
   * Returns false if the lock is not held and compaction should abort
   */
  static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
  						struct compact_control *cc)
2a1402aa0   Mel Gorman   mm: compaction: a...
331
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
332
333
  	if (cc->mode == MIGRATE_ASYNC) {
  		if (!spin_trylock_irqsave(lock, *flags)) {
c3486f537   Vlastimil Babka   mm, compaction: s...
334
  			cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
335
336
337
338
339
  			return false;
  		}
  	} else {
  		spin_lock_irqsave(lock, *flags);
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
340

8b44d2791   Vlastimil Babka   mm, compaction: p...
341
  	return true;
2a1402aa0   Mel Gorman   mm: compaction: a...
342
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
343
  /*
c67fe3752   Mel Gorman   mm: compaction: A...
344
   * Compaction requires the taking of some coarse locks that are potentially
8b44d2791   Vlastimil Babka   mm, compaction: p...
345
346
347
348
349
350
351
   * very heavily contended. The lock should be periodically unlocked to avoid
   * having disabled IRQs for a long time, even when there is nobody waiting on
   * the lock. It might also be that allowing the IRQs will result in
   * need_resched() becoming true. If scheduling is needed, async compaction
   * aborts. Sync compaction schedules.
   * Either compaction type will also abort if a fatal signal is pending.
   * In either case if the lock was locked, it is dropped and not regained.
c67fe3752   Mel Gorman   mm: compaction: A...
352
   *
8b44d2791   Vlastimil Babka   mm, compaction: p...
353
354
355
356
   * Returns true if compaction should abort due to fatal signal pending, or
   *		async compaction due to need_resched()
   * Returns false when compaction can continue (sync compaction might have
   *		scheduled)
c67fe3752   Mel Gorman   mm: compaction: A...
357
   */
8b44d2791   Vlastimil Babka   mm, compaction: p...
358
359
  static bool compact_unlock_should_abort(spinlock_t *lock,
  		unsigned long flags, bool *locked, struct compact_control *cc)
c67fe3752   Mel Gorman   mm: compaction: A...
360
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
361
362
363
364
  	if (*locked) {
  		spin_unlock_irqrestore(lock, flags);
  		*locked = false;
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
365

8b44d2791   Vlastimil Babka   mm, compaction: p...
366
  	if (fatal_signal_pending(current)) {
c3486f537   Vlastimil Babka   mm, compaction: s...
367
  		cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
368
369
  		return true;
  	}
c67fe3752   Mel Gorman   mm: compaction: A...
370

8b44d2791   Vlastimil Babka   mm, compaction: p...
371
  	if (need_resched()) {
e0b9daeb4   David Rientjes   mm, compaction: e...
372
  		if (cc->mode == MIGRATE_ASYNC) {
c3486f537   Vlastimil Babka   mm, compaction: s...
373
  			cc->contended = true;
8b44d2791   Vlastimil Babka   mm, compaction: p...
374
  			return true;
c67fe3752   Mel Gorman   mm: compaction: A...
375
  		}
c67fe3752   Mel Gorman   mm: compaction: A...
376
  		cond_resched();
c67fe3752   Mel Gorman   mm: compaction: A...
377
  	}
8b44d2791   Vlastimil Babka   mm, compaction: p...
378
  	return false;
c67fe3752   Mel Gorman   mm: compaction: A...
379
  }
be9765722   Vlastimil Babka   mm, compaction: p...
380
381
382
  /*
   * Aside from avoiding lock contention, compaction also periodically checks
   * need_resched() and either schedules in sync compaction or aborts async
8b44d2791   Vlastimil Babka   mm, compaction: p...
383
   * compaction. This is similar to what compact_unlock_should_abort() does, but
be9765722   Vlastimil Babka   mm, compaction: p...
384
385
386
387
388
389
390
391
392
393
   * is used where no lock is concerned.
   *
   * Returns false when no scheduling was needed, or sync compaction scheduled.
   * Returns true when async compaction should abort.
   */
  static inline bool compact_should_abort(struct compact_control *cc)
  {
  	/* async compaction aborts if contended */
  	if (need_resched()) {
  		if (cc->mode == MIGRATE_ASYNC) {
c3486f537   Vlastimil Babka   mm, compaction: s...
394
  			cc->contended = true;
be9765722   Vlastimil Babka   mm, compaction: p...
395
396
397
398
399
400
401
402
  			return true;
  		}
  
  		cond_resched();
  	}
  
  	return false;
  }
c67fe3752   Mel Gorman   mm: compaction: A...
403
  /*
9e4be4708   Jerome Marchand   mm/compaction.c: ...
404
405
406
   * Isolate free pages onto a private freelist. If @strict is true, will abort
   * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
   * (even though it may still end up isolating some pages).
85aa125f0   Michal Nazarewicz   mm: compaction: i...
407
   */
f40d1e42b   Mel Gorman   mm: compaction: a...
408
  static unsigned long isolate_freepages_block(struct compact_control *cc,
e14c720ef   Vlastimil Babka   mm, compaction: r...
409
  				unsigned long *start_pfn,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
410
411
412
  				unsigned long end_pfn,
  				struct list_head *freelist,
  				bool strict)
748446bb6   Mel Gorman   mm: compaction: m...
413
  {
b7aba6984   Mel Gorman   mm: compaction: a...
414
  	int nr_scanned = 0, total_isolated = 0;
bb13ffeb9   Mel Gorman   mm: compaction: c...
415
  	struct page *cursor, *valid_page = NULL;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
416
  	unsigned long flags = 0;
f40d1e42b   Mel Gorman   mm: compaction: a...
417
  	bool locked = false;
e14c720ef   Vlastimil Babka   mm, compaction: r...
418
  	unsigned long blockpfn = *start_pfn;
66c64223a   Joonsoo Kim   mm/compaction: sp...
419
  	unsigned int order;
748446bb6   Mel Gorman   mm: compaction: m...
420

748446bb6   Mel Gorman   mm: compaction: m...
421
  	cursor = pfn_to_page(blockpfn);
f40d1e42b   Mel Gorman   mm: compaction: a...
422
  	/* Isolate free pages. */
748446bb6   Mel Gorman   mm: compaction: m...
423
  	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
66c64223a   Joonsoo Kim   mm/compaction: sp...
424
  		int isolated;
748446bb6   Mel Gorman   mm: compaction: m...
425
  		struct page *page = cursor;
8b44d2791   Vlastimil Babka   mm, compaction: p...
426
427
428
429
430
431
432
433
434
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort if fatal signal
  		 * pending or async compaction detects need_resched()
  		 */
  		if (!(blockpfn % SWAP_CLUSTER_MAX)
  		    && compact_unlock_should_abort(&cc->zone->lock, flags,
  								&locked, cc))
  			break;
b7aba6984   Mel Gorman   mm: compaction: a...
435
  		nr_scanned++;
f40d1e42b   Mel Gorman   mm: compaction: a...
436
  		if (!pfn_valid_within(blockpfn))
2af120bc0   Laura Abbott   mm/compaction: br...
437
  			goto isolate_fail;
bb13ffeb9   Mel Gorman   mm: compaction: c...
438
439
  		if (!valid_page)
  			valid_page = page;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
440
441
442
443
444
445
446
447
  
  		/*
  		 * For compound pages such as THP and hugetlbfs, we can save
  		 * potentially a lot of iterations if we skip them at once.
  		 * The check is racy, but we can consider only valid values
  		 * and the only danger is skipping too much.
  		 */
  		if (PageCompound(page)) {
21dc7e023   David Rientjes   mm, compaction: p...
448
  			const unsigned int order = compound_order(page);
d3c85bad8   Vlastimil Babka   mm, compaction: r...
449
  			if (likely(order < MAX_ORDER)) {
21dc7e023   David Rientjes   mm, compaction: p...
450
451
  				blockpfn += (1UL << order) - 1;
  				cursor += (1UL << order) - 1;
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
452
  			}
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
453
454
  			goto isolate_fail;
  		}
f40d1e42b   Mel Gorman   mm: compaction: a...
455
  		if (!PageBuddy(page))
2af120bc0   Laura Abbott   mm/compaction: br...
456
  			goto isolate_fail;
f40d1e42b   Mel Gorman   mm: compaction: a...
457
458
  
  		/*
69b7189f1   Vlastimil Babka   mm, compaction: s...
459
460
461
462
463
  		 * If we already hold the lock, we can skip some rechecking.
  		 * Note that if we hold the lock now, checked_pageblock was
  		 * already set in some previous iteration (or strict is true),
  		 * so it is correct to skip the suitable migration target
  		 * recheck as well.
f40d1e42b   Mel Gorman   mm: compaction: a...
464
  		 */
69b7189f1   Vlastimil Babka   mm, compaction: s...
465
466
467
468
469
470
471
472
473
  		if (!locked) {
  			/*
  			 * The zone lock must be held to isolate freepages.
  			 * Unfortunately this is a very coarse lock and can be
  			 * heavily contended if there are parallel allocations
  			 * or parallel compactions. For async compaction do not
  			 * spin on the lock and we acquire the lock as late as
  			 * possible.
  			 */
8b44d2791   Vlastimil Babka   mm, compaction: p...
474
475
  			locked = compact_trylock_irqsave(&cc->zone->lock,
  								&flags, cc);
69b7189f1   Vlastimil Babka   mm, compaction: s...
476
477
  			if (!locked)
  				break;
f40d1e42b   Mel Gorman   mm: compaction: a...
478

69b7189f1   Vlastimil Babka   mm, compaction: s...
479
480
481
482
  			/* Recheck this is a buddy page under lock */
  			if (!PageBuddy(page))
  				goto isolate_fail;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
483

66c64223a   Joonsoo Kim   mm/compaction: sp...
484
485
486
  		/* Found a free page, will break it into order-0 pages */
  		order = page_order(page);
  		isolated = __isolate_free_page(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
487
488
  		if (!isolated)
  			break;
66c64223a   Joonsoo Kim   mm/compaction: sp...
489
  		set_page_private(page, order);
a4f04f2c6   David Rientjes   mm, compaction: a...
490

748446bb6   Mel Gorman   mm: compaction: m...
491
  		total_isolated += isolated;
a4f04f2c6   David Rientjes   mm, compaction: a...
492
  		cc->nr_freepages += isolated;
66c64223a   Joonsoo Kim   mm/compaction: sp...
493
  		list_add_tail(&page->lru, freelist);
a4f04f2c6   David Rientjes   mm, compaction: a...
494
495
496
  		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
  			blockpfn += isolated;
  			break;
748446bb6   Mel Gorman   mm: compaction: m...
497
  		}
a4f04f2c6   David Rientjes   mm, compaction: a...
498
499
500
501
  		/* Advance to the end of split page */
  		blockpfn += isolated - 1;
  		cursor += isolated - 1;
  		continue;
2af120bc0   Laura Abbott   mm/compaction: br...
502
503
504
505
506
507
  
  isolate_fail:
  		if (strict)
  			break;
  		else
  			continue;
748446bb6   Mel Gorman   mm: compaction: m...
508
  	}
a4f04f2c6   David Rientjes   mm, compaction: a...
509
510
  	if (locked)
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
9fcd6d2e0   Vlastimil Babka   mm, compaction: s...
511
512
513
514
515
516
  	/*
  	 * There is a tiny chance that we have read bogus compound_order(),
  	 * so be careful to not go outside of the pageblock.
  	 */
  	if (unlikely(blockpfn > end_pfn))
  		blockpfn = end_pfn;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
517
518
  	trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn,
  					nr_scanned, total_isolated);
e14c720ef   Vlastimil Babka   mm, compaction: r...
519
520
  	/* Record how far we have got within the block */
  	*start_pfn = blockpfn;
f40d1e42b   Mel Gorman   mm: compaction: a...
521
522
523
524
525
  	/*
  	 * If strict isolation is requested by CMA then check that all the
  	 * pages requested were isolated. If there were any failures, 0 is
  	 * returned and CMA will fail.
  	 */
2af120bc0   Laura Abbott   mm/compaction: br...
526
  	if (strict && blockpfn < end_pfn)
f40d1e42b   Mel Gorman   mm: compaction: a...
527
  		total_isolated = 0;
bb13ffeb9   Mel Gorman   mm: compaction: c...
528
529
  	/* Update the pageblock-skip if the whole pageblock was scanned */
  	if (blockpfn == end_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
530
  		update_pageblock_skip(cc, valid_page, total_isolated, false);
bb13ffeb9   Mel Gorman   mm: compaction: c...
531

7f354a548   David Rientjes   mm, compaction: a...
532
  	cc->total_free_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
533
  	if (total_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
534
  		count_compact_events(COMPACTISOLATED, total_isolated);
748446bb6   Mel Gorman   mm: compaction: m...
535
536
  	return total_isolated;
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
537
538
  /**
   * isolate_freepages_range() - isolate free pages.
e8b098fc5   Mike Rapoport   mm: kernel-doc: a...
539
   * @cc:        Compaction control structure.
85aa125f0   Michal Nazarewicz   mm: compaction: i...
540
541
542
543
544
545
546
547
548
549
550
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Non-free pages, invalid PFNs, or zone boundaries within the
   * [start_pfn, end_pfn) range are considered errors, cause function to
   * undo its actions and return zero.
   *
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater then end_pfn if end fell in a middle of
   * a free page).
   */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
551
  unsigned long
bb13ffeb9   Mel Gorman   mm: compaction: c...
552
553
  isolate_freepages_range(struct compact_control *cc,
  			unsigned long start_pfn, unsigned long end_pfn)
85aa125f0   Michal Nazarewicz   mm: compaction: i...
554
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
555
  	unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
556
  	LIST_HEAD(freelist);
7d49d8868   Vlastimil Babka   mm, compaction: r...
557
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
558
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
559
560
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
561
  	block_end_pfn = pageblock_end_pfn(pfn);
7d49d8868   Vlastimil Babka   mm, compaction: r...
562
563
  
  	for (; pfn < end_pfn; pfn += isolated,
e1409c325   Joonsoo Kim   mm/compaction: pa...
564
  				block_start_pfn = block_end_pfn,
7d49d8868   Vlastimil Babka   mm, compaction: r...
565
  				block_end_pfn += pageblock_nr_pages) {
e14c720ef   Vlastimil Babka   mm, compaction: r...
566
567
  		/* Protect pfn from changing by isolate_freepages_block */
  		unsigned long isolate_start_pfn = pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
568

85aa125f0   Michal Nazarewicz   mm: compaction: i...
569
  		block_end_pfn = min(block_end_pfn, end_pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
570
571
572
573
574
575
  		/*
  		 * pfn could pass the block_end_pfn if isolated freepage
  		 * is more than pageblock order. In this case, we adjust
  		 * scanning range to right one.
  		 */
  		if (pfn >= block_end_pfn) {
06b6640a3   Vlastimil Babka   mm, compaction: w...
576
577
  			block_start_pfn = pageblock_start_pfn(pfn);
  			block_end_pfn = pageblock_end_pfn(pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
578
579
  			block_end_pfn = min(block_end_pfn, end_pfn);
  		}
e1409c325   Joonsoo Kim   mm/compaction: pa...
580
581
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
7d49d8868   Vlastimil Babka   mm, compaction: r...
582
  			break;
e14c720ef   Vlastimil Babka   mm, compaction: r...
583
584
  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
  						block_end_pfn, &freelist, true);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
  
  		/*
  		 * In strict mode, isolate_freepages_block() returns 0 if
  		 * there are any holes in the block (ie. invalid PFNs or
  		 * non-free pages).
  		 */
  		if (!isolated)
  			break;
  
  		/*
  		 * If we managed to isolate pages, it is always (1 << n) *
  		 * pageblock_nr_pages for some non-negative n.  (Max order
  		 * page may span two pageblocks).
  		 */
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
600
  	/* __isolate_free_page() does not map the pages */
85aa125f0   Michal Nazarewicz   mm: compaction: i...
601
602
603
604
605
606
607
608
609
610
611
  	map_pages(&freelist);
  
  	if (pfn < end_pfn) {
  		/* Loop terminated early, cleanup. */
  		release_freepages(&freelist);
  		return 0;
  	}
  
  	/* We don't use freelists for anything. */
  	return pfn;
  }
748446bb6   Mel Gorman   mm: compaction: m...
612
613
614
  /* Similar to reclaim, but different enough that they don't share logic */
  static bool too_many_isolated(struct zone *zone)
  {
bc6930457   Minchan Kim   mm: compaction: h...
615
  	unsigned long active, inactive, isolated;
748446bb6   Mel Gorman   mm: compaction: m...
616

599d0c954   Mel Gorman   mm, vmscan: move ...
617
618
619
620
621
622
  	inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
  			node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
  	active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
  			node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
  	isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
  			node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
748446bb6   Mel Gorman   mm: compaction: m...
623

bc6930457   Minchan Kim   mm: compaction: h...
624
  	return isolated > (inactive + active) / 2;
748446bb6   Mel Gorman   mm: compaction: m...
625
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
626
  /**
edc2ca612   Vlastimil Babka   mm, compaction: m...
627
628
   * isolate_migratepages_block() - isolate all migrate-able pages within
   *				  a single pageblock
2fe86e000   Michal Nazarewicz   mm: compaction: i...
629
   * @cc:		Compaction control structure.
edc2ca612   Vlastimil Babka   mm, compaction: m...
630
631
632
   * @low_pfn:	The first PFN to isolate
   * @end_pfn:	The one-past-the-last PFN to isolate, within same pageblock
   * @isolate_mode: Isolation mode to be used.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
633
634
   *
   * Isolate all pages that can be migrated from the range specified by
edc2ca612   Vlastimil Babka   mm, compaction: m...
635
636
637
638
   * [low_pfn, end_pfn). The range is expected to be within same pageblock.
   * Returns zero if there is a fatal signal pending, otherwise PFN of the
   * first page that was not scanned (which may be both less, equal to or more
   * than end_pfn).
2fe86e000   Michal Nazarewicz   mm: compaction: i...
639
   *
edc2ca612   Vlastimil Babka   mm, compaction: m...
640
641
642
   * The pages are isolated on cc->migratepages list (not required to be empty),
   * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
   * is neither read nor updated.
748446bb6   Mel Gorman   mm: compaction: m...
643
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
644
645
646
  static unsigned long
  isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  			unsigned long end_pfn, isolate_mode_t isolate_mode)
748446bb6   Mel Gorman   mm: compaction: m...
647
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
648
  	struct zone *zone = cc->zone;
b7aba6984   Mel Gorman   mm: compaction: a...
649
  	unsigned long nr_scanned = 0, nr_isolated = 0;
fa9add641   Hugh Dickins   mm/memcg: apply a...
650
  	struct lruvec *lruvec;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
651
  	unsigned long flags = 0;
2a1402aa0   Mel Gorman   mm: compaction: a...
652
  	bool locked = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
653
  	struct page *page = NULL, *valid_page = NULL;
e34d85f0e   Joonsoo Kim   mm/compaction: pr...
654
  	unsigned long start_pfn = low_pfn;
fdd048e12   Vlastimil Babka   mm, compaction: s...
655
656
  	bool skip_on_failure = false;
  	unsigned long next_skip_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
657

748446bb6   Mel Gorman   mm: compaction: m...
658
659
660
661
662
663
  	/*
  	 * Ensure that there are not too many pages isolated from the LRU
  	 * list by either parallel reclaimers or compaction. If there are,
  	 * delay for some time until fewer pages are isolated
  	 */
  	while (unlikely(too_many_isolated(zone))) {
f9e35b3b4   Mel Gorman   mm: compaction: a...
664
  		/* async migration should just abort */
e0b9daeb4   David Rientjes   mm, compaction: e...
665
  		if (cc->mode == MIGRATE_ASYNC)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
666
  			return 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
667

748446bb6   Mel Gorman   mm: compaction: m...
668
669
670
  		congestion_wait(BLK_RW_ASYNC, HZ/10);
  
  		if (fatal_signal_pending(current))
2fe86e000   Michal Nazarewicz   mm: compaction: i...
671
  			return 0;
748446bb6   Mel Gorman   mm: compaction: m...
672
  	}
be9765722   Vlastimil Babka   mm, compaction: p...
673
674
  	if (compact_should_abort(cc))
  		return 0;
aeef4b838   David Rientjes   mm, compaction: t...
675

fdd048e12   Vlastimil Babka   mm, compaction: s...
676
677
678
679
  	if (cc->direct_compaction && (cc->mode == MIGRATE_ASYNC)) {
  		skip_on_failure = true;
  		next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  	}
748446bb6   Mel Gorman   mm: compaction: m...
680
  	/* Time to isolate some pages for migration */
748446bb6   Mel Gorman   mm: compaction: m...
681
  	for (; low_pfn < end_pfn; low_pfn++) {
29c0dde83   Vlastimil Babka   mm, compaction: a...
682

fdd048e12   Vlastimil Babka   mm, compaction: s...
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
  		if (skip_on_failure && low_pfn >= next_skip_pfn) {
  			/*
  			 * We have isolated all migration candidates in the
  			 * previous order-aligned block, and did not skip it due
  			 * to failure. We should migrate the pages now and
  			 * hopefully succeed compaction.
  			 */
  			if (nr_isolated)
  				break;
  
  			/*
  			 * We failed to isolate in the previous order-aligned
  			 * block. Set the new boundary to the end of the
  			 * current block. Note we can't simply increase
  			 * next_skip_pfn by 1 << order, as low_pfn might have
  			 * been incremented by a higher number due to skipping
  			 * a compound or a high-order buddy page in the
  			 * previous loop iteration.
  			 */
  			next_skip_pfn = block_end_pfn(low_pfn, cc->order);
  		}
8b44d2791   Vlastimil Babka   mm, compaction: p...
704
705
706
707
708
709
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort async compaction
  		 * if contended.
  		 */
  		if (!(low_pfn % SWAP_CLUSTER_MAX)
a52633d8e   Mel Gorman   mm, vmscan: move ...
710
  		    && compact_unlock_should_abort(zone_lru_lock(zone), flags,
8b44d2791   Vlastimil Babka   mm, compaction: p...
711
712
  								&locked, cc))
  			break;
c67fe3752   Mel Gorman   mm: compaction: A...
713

748446bb6   Mel Gorman   mm: compaction: m...
714
  		if (!pfn_valid_within(low_pfn))
fdd048e12   Vlastimil Babka   mm, compaction: s...
715
  			goto isolate_fail;
b7aba6984   Mel Gorman   mm: compaction: a...
716
  		nr_scanned++;
748446bb6   Mel Gorman   mm: compaction: m...
717

748446bb6   Mel Gorman   mm: compaction: m...
718
  		page = pfn_to_page(low_pfn);
dc9086004   Mel Gorman   mm: compaction: c...
719

bb13ffeb9   Mel Gorman   mm: compaction: c...
720
721
  		if (!valid_page)
  			valid_page = page;
6c14466cc   Mel Gorman   mm: improve docum...
722
  		/*
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
723
724
725
726
  		 * Skip if free. We read page order here without zone lock
  		 * which is generally unsafe, but the race window is small and
  		 * the worst thing that can happen is that we skip some
  		 * potential isolation targets.
6c14466cc   Mel Gorman   mm: improve docum...
727
  		 */
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
728
729
730
731
732
733
734
735
736
737
  		if (PageBuddy(page)) {
  			unsigned long freepage_order = page_order_unsafe(page);
  
  			/*
  			 * Without lock, we cannot be sure that what we got is
  			 * a valid page order. Consider only values in the
  			 * valid order range to prevent low_pfn overflow.
  			 */
  			if (freepage_order > 0 && freepage_order < MAX_ORDER)
  				low_pfn += (1UL << freepage_order) - 1;
748446bb6   Mel Gorman   mm: compaction: m...
738
  			continue;
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
739
  		}
748446bb6   Mel Gorman   mm: compaction: m...
740

9927af740   Mel Gorman   mm: compaction: p...
741
  		/*
29c0dde83   Vlastimil Babka   mm, compaction: a...
742
743
744
745
746
  		 * Regardless of being on LRU, compound pages such as THP and
  		 * hugetlbfs are not to be compacted. We can potentially save
  		 * a lot of iterations if we skip them at once. The check is
  		 * racy, but we can consider only valid values and the only
  		 * danger is skipping too much.
bc835011a   Andrea Arcangeli   thp: transhuge is...
747
  		 */
29c0dde83   Vlastimil Babka   mm, compaction: a...
748
  		if (PageCompound(page)) {
21dc7e023   David Rientjes   mm, compaction: p...
749
  			const unsigned int order = compound_order(page);
edc2ca612   Vlastimil Babka   mm, compaction: m...
750

d3c85bad8   Vlastimil Babka   mm, compaction: r...
751
  			if (likely(order < MAX_ORDER))
21dc7e023   David Rientjes   mm, compaction: p...
752
  				low_pfn += (1UL << order) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
753
  			goto isolate_fail;
2a1402aa0   Mel Gorman   mm: compaction: a...
754
  		}
bda807d44   Minchan Kim   mm: migrate: supp...
755
756
757
758
759
760
  		/*
  		 * Check may be lockless but that's ok as we recheck later.
  		 * It's possible to migrate LRU and non-lru movable pages.
  		 * Skip any other type of page
  		 */
  		if (!PageLRU(page)) {
bda807d44   Minchan Kim   mm: migrate: supp...
761
762
763
764
765
766
767
  			/*
  			 * __PageMovable can return false positive so we need
  			 * to verify it under page_lock.
  			 */
  			if (unlikely(__PageMovable(page)) &&
  					!PageIsolated(page)) {
  				if (locked) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
768
  					spin_unlock_irqrestore(zone_lru_lock(zone),
bda807d44   Minchan Kim   mm: migrate: supp...
769
770
771
  									flags);
  					locked = false;
  				}
9e5bcd610   Yisheng Xie   mm/migration: mak...
772
  				if (!isolate_movable_page(page, isolate_mode))
bda807d44   Minchan Kim   mm: migrate: supp...
773
774
  					goto isolate_success;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
775
  			goto isolate_fail;
bda807d44   Minchan Kim   mm: migrate: supp...
776
  		}
29c0dde83   Vlastimil Babka   mm, compaction: a...
777

119d6d59d   David Rientjes   mm, compaction: a...
778
779
780
781
782
783
784
  		/*
  		 * Migration will fail if an anonymous page is pinned in memory,
  		 * so avoid taking lru_lock and isolating it unnecessarily in an
  		 * admittedly racy check.
  		 */
  		if (!page_mapping(page) &&
  		    page_count(page) > page_mapcount(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
785
  			goto isolate_fail;
119d6d59d   David Rientjes   mm, compaction: a...
786

73e64c51a   Michal Hocko   mm, compaction: a...
787
788
789
790
791
792
  		/*
  		 * Only allow to migrate anonymous pages in GFP_NOFS context
  		 * because those do not depend on fs locks.
  		 */
  		if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
  			goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
793
794
  		/* If we already hold the lock, we can skip some rechecking */
  		if (!locked) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
795
  			locked = compact_trylock_irqsave(zone_lru_lock(zone),
8b44d2791   Vlastimil Babka   mm, compaction: p...
796
  								&flags, cc);
69b7189f1   Vlastimil Babka   mm, compaction: s...
797
798
  			if (!locked)
  				break;
2a1402aa0   Mel Gorman   mm: compaction: a...
799

29c0dde83   Vlastimil Babka   mm, compaction: a...
800
  			/* Recheck PageLRU and PageCompound under lock */
69b7189f1   Vlastimil Babka   mm, compaction: s...
801
  			if (!PageLRU(page))
fdd048e12   Vlastimil Babka   mm, compaction: s...
802
  				goto isolate_fail;
29c0dde83   Vlastimil Babka   mm, compaction: a...
803
804
805
806
807
808
809
  
  			/*
  			 * Page become compound since the non-locked check,
  			 * and it's on LRU. It can only be a THP so the order
  			 * is safe to read and it's 0 for tail pages.
  			 */
  			if (unlikely(PageCompound(page))) {
d3c85bad8   Vlastimil Babka   mm, compaction: r...
810
  				low_pfn += (1UL << compound_order(page)) - 1;
fdd048e12   Vlastimil Babka   mm, compaction: s...
811
  				goto isolate_fail;
69b7189f1   Vlastimil Babka   mm, compaction: s...
812
  			}
bc835011a   Andrea Arcangeli   thp: transhuge is...
813
  		}
599d0c954   Mel Gorman   mm, vmscan: move ...
814
  		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
fa9add641   Hugh Dickins   mm/memcg: apply a...
815

748446bb6   Mel Gorman   mm: compaction: m...
816
  		/* Try isolate the page */
edc2ca612   Vlastimil Babka   mm, compaction: m...
817
  		if (__isolate_lru_page(page, isolate_mode) != 0)
fdd048e12   Vlastimil Babka   mm, compaction: s...
818
  			goto isolate_fail;
748446bb6   Mel Gorman   mm: compaction: m...
819

29c0dde83   Vlastimil Babka   mm, compaction: a...
820
  		VM_BUG_ON_PAGE(PageCompound(page), page);
bc835011a   Andrea Arcangeli   thp: transhuge is...
821

748446bb6   Mel Gorman   mm: compaction: m...
822
  		/* Successfully isolated */
fa9add641   Hugh Dickins   mm/memcg: apply a...
823
  		del_page_from_lru_list(page, lruvec, page_lru(page));
6afcf8ef0   Ming Ling   mm, compaction: f...
824
825
  		inc_node_page_state(page,
  				NR_ISOLATED_ANON + page_is_file_cache(page));
b6c750163   Joonsoo Kim   mm/compaction: cl...
826
827
  
  isolate_success:
fdd048e12   Vlastimil Babka   mm, compaction: s...
828
  		list_add(&page->lru, &cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
829
  		cc->nr_migratepages++;
b7aba6984   Mel Gorman   mm: compaction: a...
830
  		nr_isolated++;
748446bb6   Mel Gorman   mm: compaction: m...
831

a34753d27   Vlastimil Babka   mm, compaction: r...
832
833
834
835
836
837
838
839
  		/*
  		 * Record where we could have freed pages by migration and not
  		 * yet flushed them to buddy allocator.
  		 * - this is the lowest page that was isolated and likely be
  		 * then freed by migration.
  		 */
  		if (!cc->last_migrated_pfn)
  			cc->last_migrated_pfn = low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
840
  		/* Avoid isolating too much */
31b8384a5   Hillf Danton   mm: compaction: p...
841
842
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
  			++low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
843
  			break;
31b8384a5   Hillf Danton   mm: compaction: p...
844
  		}
fdd048e12   Vlastimil Babka   mm, compaction: s...
845
846
847
848
849
850
851
852
853
854
855
856
857
  
  		continue;
  isolate_fail:
  		if (!skip_on_failure)
  			continue;
  
  		/*
  		 * We have isolated some pages, but then failed. Release them
  		 * instead of migrating, as we cannot form the cc->order buddy
  		 * page anyway.
  		 */
  		if (nr_isolated) {
  			if (locked) {
a52633d8e   Mel Gorman   mm, vmscan: move ...
858
  				spin_unlock_irqrestore(zone_lru_lock(zone), flags);
fdd048e12   Vlastimil Babka   mm, compaction: s...
859
860
  				locked = false;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
861
862
863
864
865
866
867
868
869
870
871
872
873
874
  			putback_movable_pages(&cc->migratepages);
  			cc->nr_migratepages = 0;
  			cc->last_migrated_pfn = 0;
  			nr_isolated = 0;
  		}
  
  		if (low_pfn < next_skip_pfn) {
  			low_pfn = next_skip_pfn - 1;
  			/*
  			 * The check near the loop beginning would have updated
  			 * next_skip_pfn too, but this is a bit simpler.
  			 */
  			next_skip_pfn += 1UL << cc->order;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
875
  	}
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
876
877
878
879
880
881
  	/*
  	 * The PageBuddy() check could have potentially brought us outside
  	 * the range to be scanned.
  	 */
  	if (unlikely(low_pfn > end_pfn))
  		low_pfn = end_pfn;
c67fe3752   Mel Gorman   mm: compaction: A...
882
  	if (locked)
a52633d8e   Mel Gorman   mm, vmscan: move ...
883
  		spin_unlock_irqrestore(zone_lru_lock(zone), flags);
748446bb6   Mel Gorman   mm: compaction: m...
884

50b5b094e   Vlastimil Babka   mm: compaction: d...
885
886
887
  	/*
  	 * Update the pageblock-skip information and cached scanner pfn,
  	 * if the whole pageblock was scanned without isolating any page.
50b5b094e   Vlastimil Babka   mm: compaction: d...
888
  	 */
35979ef33   David Rientjes   mm, compaction: a...
889
  	if (low_pfn == end_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
890
  		update_pageblock_skip(cc, valid_page, nr_isolated, true);
bb13ffeb9   Mel Gorman   mm: compaction: c...
891

e34d85f0e   Joonsoo Kim   mm/compaction: pr...
892
893
  	trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn,
  						nr_scanned, nr_isolated);
b7aba6984   Mel Gorman   mm: compaction: a...
894

7f354a548   David Rientjes   mm, compaction: a...
895
  	cc->total_migrate_scanned += nr_scanned;
397487db6   Mel Gorman   mm: compaction: A...
896
  	if (nr_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
897
  		count_compact_events(COMPACTISOLATED, nr_isolated);
397487db6   Mel Gorman   mm: compaction: A...
898

2fe86e000   Michal Nazarewicz   mm: compaction: i...
899
900
  	return low_pfn;
  }
edc2ca612   Vlastimil Babka   mm, compaction: m...
901
902
903
904
905
906
907
908
909
910
911
912
913
914
  /**
   * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
   * @cc:        Compaction control structure.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Returns zero if isolation fails fatally due to e.g. pending signal.
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater than end_pfn if end fell in a middle of a THP page).
   */
  unsigned long
  isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
  							unsigned long end_pfn)
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
915
  	unsigned long pfn, block_start_pfn, block_end_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
916
917
918
  
  	/* Scan block by block. First and last block may be incomplete */
  	pfn = start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
919
  	block_start_pfn = pageblock_start_pfn(pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
920
921
  	if (block_start_pfn < cc->zone->zone_start_pfn)
  		block_start_pfn = cc->zone->zone_start_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
922
  	block_end_pfn = pageblock_end_pfn(pfn);
edc2ca612   Vlastimil Babka   mm, compaction: m...
923
924
  
  	for (; pfn < end_pfn; pfn = block_end_pfn,
e1409c325   Joonsoo Kim   mm/compaction: pa...
925
  				block_start_pfn = block_end_pfn,
edc2ca612   Vlastimil Babka   mm, compaction: m...
926
927
928
  				block_end_pfn += pageblock_nr_pages) {
  
  		block_end_pfn = min(block_end_pfn, end_pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
929
930
  		if (!pageblock_pfn_to_page(block_start_pfn,
  					block_end_pfn, cc->zone))
edc2ca612   Vlastimil Babka   mm, compaction: m...
931
932
933
934
  			continue;
  
  		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
  							ISOLATE_UNEVICTABLE);
14af4a5e9   Hugh Dickins   mm, cma: prevent ...
935
  		if (!pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
936
  			break;
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
937
938
939
  
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
  			break;
edc2ca612   Vlastimil Babka   mm, compaction: m...
940
  	}
edc2ca612   Vlastimil Babka   mm, compaction: m...
941
942
943
  
  	return pfn;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
944
945
  #endif /* CONFIG_COMPACTION || CONFIG_CMA */
  #ifdef CONFIG_COMPACTION
018e9a49a   Andrew Morton   mm/compaction.c: ...
946

b682debd9   Vlastimil Babka   mm, compaction: c...
947
948
949
  static bool suitable_migration_source(struct compact_control *cc,
  							struct page *page)
  {
282722b0d   Vlastimil Babka   mm, compaction: r...
950
951
952
  	int block_mt;
  
  	if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction)
b682debd9   Vlastimil Babka   mm, compaction: c...
953
  		return true;
282722b0d   Vlastimil Babka   mm, compaction: r...
954
955
956
957
958
959
  	block_mt = get_pageblock_migratetype(page);
  
  	if (cc->migratetype == MIGRATE_MOVABLE)
  		return is_migrate_movable(block_mt);
  	else
  		return block_mt == cc->migratetype;
b682debd9   Vlastimil Babka   mm, compaction: c...
960
  }
018e9a49a   Andrew Morton   mm/compaction.c: ...
961
  /* Returns true if the page is within a block suitable for migration to */
9f7e33879   Vlastimil Babka   mm, compaction: m...
962
963
  static bool suitable_migration_target(struct compact_control *cc,
  							struct page *page)
018e9a49a   Andrew Morton   mm/compaction.c: ...
964
965
966
967
968
969
970
971
972
973
974
  {
  	/* If the page is a large free page, then disallow migration */
  	if (PageBuddy(page)) {
  		/*
  		 * We are checking page_order without zone->lock taken. But
  		 * the only small danger is that we skip a potentially suitable
  		 * pageblock, so it's not worth to check order for valid range.
  		 */
  		if (page_order_unsafe(page) >= pageblock_order)
  			return false;
  	}
1ef36db2a   Yisheng Xie   mm/compaction: ig...
975
976
  	if (cc->ignore_block_suitable)
  		return true;
018e9a49a   Andrew Morton   mm/compaction.c: ...
977
  	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
b682debd9   Vlastimil Babka   mm, compaction: c...
978
  	if (is_migrate_movable(get_pageblock_migratetype(page)))
018e9a49a   Andrew Morton   mm/compaction.c: ...
979
980
981
982
983
  		return true;
  
  	/* Otherwise skip the block */
  	return false;
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
984
  /*
f2849aa09   Vlastimil Babka   mm, compaction: m...
985
986
987
988
989
990
991
992
993
994
   * Test whether the free scanner has reached the same or lower pageblock than
   * the migration scanner, and compaction should thus terminate.
   */
  static inline bool compact_scanners_met(struct compact_control *cc)
  {
  	return (cc->free_pfn >> pageblock_order)
  		<= (cc->migrate_pfn >> pageblock_order);
  }
  
  /*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
995
996
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
997
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
998
  static void isolate_freepages(struct compact_control *cc)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
999
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
1000
  	struct zone *zone = cc->zone;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1001
  	struct page *page;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1002
  	unsigned long block_start_pfn;	/* start of current pageblock */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1003
  	unsigned long isolate_start_pfn; /* exact pfn we start at */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1004
1005
  	unsigned long block_end_pfn;	/* end of current pageblock */
  	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1006
  	struct list_head *freelist = &cc->freepages;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1007

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1008
1009
  	/*
  	 * Initialise the free scanner. The starting point is where we last
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1010
  	 * successfully isolated from, zone-cached value, or the end of the
e14c720ef   Vlastimil Babka   mm, compaction: r...
1011
1012
  	 * zone when isolating for the first time. For looping we also need
  	 * this pfn aligned down to the pageblock boundary, because we do
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1013
1014
1015
  	 * block_start_pfn -= pageblock_nr_pages in the for loop.
  	 * For ending point, take care when isolating in last pageblock of a
  	 * a zone which ends in the middle of a pageblock.
49e068f0b   Vlastimil Babka   mm/compaction: ma...
1016
1017
  	 * The low boundary is the end of the pageblock the migration scanner
  	 * is using.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1018
  	 */
e14c720ef   Vlastimil Babka   mm, compaction: r...
1019
  	isolate_start_pfn = cc->free_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1020
  	block_start_pfn = pageblock_start_pfn(cc->free_pfn);
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1021
1022
  	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
  						zone_end_pfn(zone));
06b6640a3   Vlastimil Babka   mm, compaction: w...
1023
  	low_pfn = pageblock_end_pfn(cc->migrate_pfn);
2fe86e000   Michal Nazarewicz   mm: compaction: i...
1024

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1025
  	/*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1026
1027
1028
1029
  	 * Isolate free pages until enough are available to migrate the
  	 * pages on cc->migratepages. We stop searching if the migrate
  	 * and free page scanners meet or enough free pages are isolated.
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1030
  	for (; block_start_pfn >= low_pfn;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
1031
  				block_end_pfn = block_start_pfn,
e14c720ef   Vlastimil Babka   mm, compaction: r...
1032
1033
  				block_start_pfn -= pageblock_nr_pages,
  				isolate_start_pfn = block_start_pfn) {
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1034
1035
1036
  		/*
  		 * This can iterate a massively long zone without finding any
  		 * suitable migration targets, so periodically check if we need
be9765722   Vlastimil Babka   mm, compaction: p...
1037
  		 * to schedule, or even abort async compaction.
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1038
  		 */
be9765722   Vlastimil Babka   mm, compaction: p...
1039
1040
1041
  		if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
  						&& compact_should_abort(cc))
  			break;
f6ea3adb7   David Rientjes   mm/compaction.c: ...
1042

7d49d8868   Vlastimil Babka   mm, compaction: r...
1043
1044
1045
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
  		if (!page)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1046
1047
1048
  			continue;
  
  		/* Check the block is suitable for migration */
9f7e33879   Vlastimil Babka   mm, compaction: m...
1049
  		if (!suitable_migration_target(cc, page))
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1050
  			continue;
68e3e9262   Linus Torvalds   Revert "mm: compa...
1051

bb13ffeb9   Mel Gorman   mm: compaction: c...
1052
1053
1054
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
e14c720ef   Vlastimil Babka   mm, compaction: r...
1055
  		/* Found a block suitable for isolating free pages from. */
a46cbf3bc   David Rientjes   mm, compaction: p...
1056
1057
  		isolate_freepages_block(cc, &isolate_start_pfn, block_end_pfn,
  					freelist, false);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1058
1059
  
  		/*
a46cbf3bc   David Rientjes   mm, compaction: p...
1060
1061
  		 * If we isolated enough freepages, or aborted due to lock
  		 * contention, terminate.
e14c720ef   Vlastimil Babka   mm, compaction: r...
1062
  		 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1063
1064
  		if ((cc->nr_freepages >= cc->nr_migratepages)
  							|| cc->contended) {
a46cbf3bc   David Rientjes   mm, compaction: p...
1065
1066
1067
1068
1069
  			if (isolate_start_pfn >= block_end_pfn) {
  				/*
  				 * Restart at previous pageblock if more
  				 * freepages can be isolated next time.
  				 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1070
1071
  				isolate_start_pfn =
  					block_start_pfn - pageblock_nr_pages;
a46cbf3bc   David Rientjes   mm, compaction: p...
1072
  			}
be9765722   Vlastimil Babka   mm, compaction: p...
1073
  			break;
a46cbf3bc   David Rientjes   mm, compaction: p...
1074
  		} else if (isolate_start_pfn < block_end_pfn) {
f5f61a320   Vlastimil Babka   mm, compaction: s...
1075
  			/*
a46cbf3bc   David Rientjes   mm, compaction: p...
1076
1077
  			 * If isolation failed early, do not continue
  			 * needlessly.
f5f61a320   Vlastimil Babka   mm, compaction: s...
1078
  			 */
a46cbf3bc   David Rientjes   mm, compaction: p...
1079
  			break;
f5f61a320   Vlastimil Babka   mm, compaction: s...
1080
  		}
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1081
  	}
66c64223a   Joonsoo Kim   mm/compaction: sp...
1082
  	/* __isolate_free_page() does not map the pages */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1083
  	map_pages(freelist);
7ed695e06   Vlastimil Babka   mm: compaction: d...
1084
  	/*
f5f61a320   Vlastimil Babka   mm, compaction: s...
1085
1086
1087
1088
  	 * Record where the free scanner will restart next time. Either we
  	 * broke from the loop and set isolate_start_pfn based on the last
  	 * call to isolate_freepages_block(), or we met the migration scanner
  	 * and the loop terminated due to isolate_start_pfn < low_pfn
7ed695e06   Vlastimil Babka   mm: compaction: d...
1089
  	 */
f5f61a320   Vlastimil Babka   mm, compaction: s...
1090
  	cc->free_pfn = isolate_start_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
1091
1092
1093
1094
1095
1096
1097
  }
  
  /*
   * This is a migrate-callback that "allocates" freepages by taking pages
   * from the isolated freelists in the block we are migrating to.
   */
  static struct page *compaction_alloc(struct page *migratepage,
666feb21a   Michal Hocko   mm, migrate: remo...
1098
  					unsigned long data)
748446bb6   Mel Gorman   mm: compaction: m...
1099
1100
1101
  {
  	struct compact_control *cc = (struct compact_control *)data;
  	struct page *freepage;
be9765722   Vlastimil Babka   mm, compaction: p...
1102
1103
1104
1105
  	/*
  	 * Isolate free pages if necessary, and if we are not aborting due to
  	 * contention.
  	 */
748446bb6   Mel Gorman   mm: compaction: m...
1106
  	if (list_empty(&cc->freepages)) {
be9765722   Vlastimil Babka   mm, compaction: p...
1107
  		if (!cc->contended)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1108
  			isolate_freepages(cc);
748446bb6   Mel Gorman   mm: compaction: m...
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
  
  		if (list_empty(&cc->freepages))
  			return NULL;
  	}
  
  	freepage = list_entry(cc->freepages.next, struct page, lru);
  	list_del(&freepage->lru);
  	cc->nr_freepages--;
  
  	return freepage;
  }
  
  /*
d53aea3d4   David Rientjes   mm, compaction: r...
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
   * This is a migrate-callback that "frees" freepages back to the isolated
   * freelist.  All pages on the freelist are from the same zone, so there is no
   * special handling needed for NUMA.
   */
  static void compaction_free(struct page *page, unsigned long data)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  
  	list_add(&page->lru, &cc->freepages);
  	cc->nr_freepages++;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1133
1134
1135
1136
1137
1138
1139
1140
  /* possible outcome of isolate_migratepages */
  typedef enum {
  	ISOLATE_ABORT,		/* Abort compaction now */
  	ISOLATE_NONE,		/* No pages isolated, continue scanning */
  	ISOLATE_SUCCESS,	/* Pages isolated, migrate */
  } isolate_migrate_t;
  
  /*
5bbe3547a   Eric B Munson   mm: allow compact...
1141
1142
1143
1144
1145
1146
   * Allow userspace to control policy on scanning the unevictable LRU for
   * compactable pages.
   */
  int sysctl_compact_unevictable_allowed __read_mostly = 1;
  
  /*
edc2ca612   Vlastimil Babka   mm, compaction: m...
1147
1148
1149
   * Isolate all pages that can be migrated from the first suitable block,
   * starting at the block pointed to by the migrate scanner pfn within
   * compact_control.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1150
1151
1152
1153
   */
  static isolate_migrate_t isolate_migratepages(struct zone *zone,
  					struct compact_control *cc)
  {
e1409c325   Joonsoo Kim   mm/compaction: pa...
1154
1155
1156
  	unsigned long block_start_pfn;
  	unsigned long block_end_pfn;
  	unsigned long low_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1157
1158
  	struct page *page;
  	const isolate_mode_t isolate_mode =
5bbe3547a   Eric B Munson   mm: allow compact...
1159
  		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
1d2047fef   Hugh Dickins   mm, compaction: d...
1160
  		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1161

edc2ca612   Vlastimil Babka   mm, compaction: m...
1162
1163
1164
1165
1166
  	/*
  	 * Start at where we last stopped, or beginning of the zone as
  	 * initialized by compact_zone()
  	 */
  	low_pfn = cc->migrate_pfn;
06b6640a3   Vlastimil Babka   mm, compaction: w...
1167
  	block_start_pfn = pageblock_start_pfn(low_pfn);
e1409c325   Joonsoo Kim   mm/compaction: pa...
1168
1169
  	if (block_start_pfn < zone->zone_start_pfn)
  		block_start_pfn = zone->zone_start_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1170
1171
  
  	/* Only scan within a pageblock boundary */
06b6640a3   Vlastimil Babka   mm, compaction: w...
1172
  	block_end_pfn = pageblock_end_pfn(low_pfn);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1173

edc2ca612   Vlastimil Babka   mm, compaction: m...
1174
1175
1176
1177
  	/*
  	 * Iterate over whole pageblocks until we find the first suitable.
  	 * Do not cross the free scanner.
  	 */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1178
1179
1180
1181
  	for (; block_end_pfn <= cc->free_pfn;
  			low_pfn = block_end_pfn,
  			block_start_pfn = block_end_pfn,
  			block_end_pfn += pageblock_nr_pages) {
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1182

edc2ca612   Vlastimil Babka   mm, compaction: m...
1183
1184
1185
1186
1187
1188
1189
1190
  		/*
  		 * This can potentially iterate a massively long zone with
  		 * many pageblocks unsuitable, so periodically check if we
  		 * need to schedule, or even abort async compaction.
  		 */
  		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
  						&& compact_should_abort(cc))
  			break;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1191

e1409c325   Joonsoo Kim   mm/compaction: pa...
1192
1193
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
7d49d8868   Vlastimil Babka   mm, compaction: r...
1194
  		if (!page)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1195
  			continue;
edc2ca612   Vlastimil Babka   mm, compaction: m...
1196
1197
1198
1199
1200
1201
1202
1203
1204
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
  
  		/*
  		 * For async compaction, also only scan in MOVABLE blocks.
  		 * Async compaction is optimistic to see if the minimum amount
  		 * of work satisfies the allocation.
  		 */
b682debd9   Vlastimil Babka   mm, compaction: c...
1205
  		if (!suitable_migration_source(cc, page))
edc2ca612   Vlastimil Babka   mm, compaction: m...
1206
1207
1208
  			continue;
  
  		/* Perform the isolation */
e1409c325   Joonsoo Kim   mm/compaction: pa...
1209
1210
  		low_pfn = isolate_migratepages_block(cc, low_pfn,
  						block_end_pfn, isolate_mode);
edc2ca612   Vlastimil Babka   mm, compaction: m...
1211

6afcf8ef0   Ming Ling   mm, compaction: f...
1212
  		if (!low_pfn || cc->contended)
edc2ca612   Vlastimil Babka   mm, compaction: m...
1213
1214
1215
1216
1217
1218
1219
1220
1221
  			return ISOLATE_ABORT;
  
  		/*
  		 * Either we isolated something and proceed with migration. Or
  		 * we failed and compact_zone should decide if we should
  		 * continue or not.
  		 */
  		break;
  	}
f2849aa09   Vlastimil Babka   mm, compaction: m...
1222
1223
  	/* Record where migration scanner will be restarted. */
  	cc->migrate_pfn = low_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1224

edc2ca612   Vlastimil Babka   mm, compaction: m...
1225
  	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1226
  }
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1227
1228
1229
1230
1231
1232
1233
1234
  /*
   * order == -1 is expected when compacting via
   * /proc/sys/vm/compact_memory
   */
  static inline bool is_via_compact_memory(int order)
  {
  	return order == -1;
  }
d39773a06   Vlastimil Babka   mm, compaction: a...
1235
1236
  static enum compact_result __compact_finished(struct zone *zone,
  						struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
1237
  {
8fb74b9fb   Mel Gorman   mm: compaction: p...
1238
  	unsigned int order;
d39773a06   Vlastimil Babka   mm, compaction: a...
1239
  	const int migratetype = cc->migratetype;
56de7263f   Mel Gorman   mm: compaction: d...
1240

be9765722   Vlastimil Babka   mm, compaction: p...
1241
  	if (cc->contended || fatal_signal_pending(current))
2d1e10412   Vlastimil Babka   mm, compaction: d...
1242
  		return COMPACT_CONTENDED;
748446bb6   Mel Gorman   mm: compaction: m...
1243

753341a4b   Mel Gorman   revert "mm: have ...
1244
  	/* Compaction run completes if the migrate and free scanner meet */
f2849aa09   Vlastimil Babka   mm, compaction: m...
1245
  	if (compact_scanners_met(cc)) {
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1246
  		/* Let the next compaction start anew. */
02333641e   Vlastimil Babka   mm, compaction: e...
1247
  		reset_cached_positions(zone);
55b7c4c99   Vlastimil Babka   mm: compaction: r...
1248

62997027c   Mel Gorman   mm: compaction: c...
1249
1250
  		/*
  		 * Mark that the PG_migrate_skip information should be cleared
accf62422   Vlastimil Babka   mm, kswapd: repla...
1251
  		 * by kswapd when it goes to sleep. kcompactd does not set the
62997027c   Mel Gorman   mm: compaction: c...
1252
1253
1254
  		 * flag itself as the decision to be clear should be directly
  		 * based on an allocation request.
  		 */
accf62422   Vlastimil Babka   mm, kswapd: repla...
1255
  		if (cc->direct_compaction)
62997027c   Mel Gorman   mm: compaction: c...
1256
  			zone->compact_blockskip_flush = true;
c8f7de0bf   Michal Hocko   mm, compaction: d...
1257
1258
1259
1260
  		if (cc->whole_zone)
  			return COMPACT_COMPLETE;
  		else
  			return COMPACT_PARTIAL_SKIPPED;
bb13ffeb9   Mel Gorman   mm: compaction: c...
1261
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1262

21c527a3c   Yaowei Bai   mm/compaction.c: ...
1263
  	if (is_via_compact_memory(cc->order))
56de7263f   Mel Gorman   mm: compaction: d...
1264
  		return COMPACT_CONTINUE;
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
  	if (cc->finishing_block) {
  		/*
  		 * We have finished the pageblock, but better check again that
  		 * we really succeeded.
  		 */
  		if (IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages))
  			cc->finishing_block = false;
  		else
  			return COMPACT_CONTINUE;
  	}
56de7263f   Mel Gorman   mm: compaction: d...
1275
  	/* Direct compactor: Is a suitable page free? */
8fb74b9fb   Mel Gorman   mm: compaction: p...
1276
1277
  	for (order = cc->order; order < MAX_ORDER; order++) {
  		struct free_area *area = &zone->free_area[order];
2149cdaef   Joonsoo Kim   mm/compaction: en...
1278
  		bool can_steal;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1279
1280
  
  		/* Job done if page is free of the right migratetype */
6d7ce5594   David Rientjes   mm, compaction: p...
1281
  		if (!list_empty(&area->free_list[migratetype]))
cf378319d   Vlastimil Babka   mm, compaction: r...
1282
  			return COMPACT_SUCCESS;
8fb74b9fb   Mel Gorman   mm: compaction: p...
1283

2149cdaef   Joonsoo Kim   mm/compaction: en...
1284
1285
1286
1287
  #ifdef CONFIG_CMA
  		/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
  		if (migratetype == MIGRATE_MOVABLE &&
  			!list_empty(&area->free_list[MIGRATE_CMA]))
cf378319d   Vlastimil Babka   mm, compaction: r...
1288
  			return COMPACT_SUCCESS;
2149cdaef   Joonsoo Kim   mm/compaction: en...
1289
1290
1291
1292
1293
1294
  #endif
  		/*
  		 * Job done if allocation would steal freepages from
  		 * other migratetype buddy lists.
  		 */
  		if (find_suitable_fallback(area, order, migratetype,
baf6a9a1d   Vlastimil Babka   mm, compaction: f...
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
  						true, &can_steal) != -1) {
  
  			/* movable pages are OK in any pageblock */
  			if (migratetype == MIGRATE_MOVABLE)
  				return COMPACT_SUCCESS;
  
  			/*
  			 * We are stealing for a non-movable allocation. Make
  			 * sure we finish compacting the current pageblock
  			 * first so it is as free as possible and we won't
  			 * have to steal another one soon. This only applies
  			 * to sync compaction, as async compaction operates
  			 * on pageblocks of the same migratetype.
  			 */
  			if (cc->mode == MIGRATE_ASYNC ||
  					IS_ALIGNED(cc->migrate_pfn,
  							pageblock_nr_pages)) {
  				return COMPACT_SUCCESS;
  			}
  
  			cc->finishing_block = true;
  			return COMPACT_CONTINUE;
  		}
56de7263f   Mel Gorman   mm: compaction: d...
1318
  	}
837d026d5   Joonsoo Kim   mm/compaction: mo...
1319
1320
  	return COMPACT_NO_SUITABLE_PAGE;
  }
ea7ab982b   Michal Hocko   mm, compaction: c...
1321
  static enum compact_result compact_finished(struct zone *zone,
d39773a06   Vlastimil Babka   mm, compaction: a...
1322
  			struct compact_control *cc)
837d026d5   Joonsoo Kim   mm/compaction: mo...
1323
1324
  {
  	int ret;
d39773a06   Vlastimil Babka   mm, compaction: a...
1325
  	ret = __compact_finished(zone, cc);
837d026d5   Joonsoo Kim   mm/compaction: mo...
1326
1327
1328
1329
1330
  	trace_mm_compaction_finished(zone, cc->order, ret);
  	if (ret == COMPACT_NO_SUITABLE_PAGE)
  		ret = COMPACT_CONTINUE;
  
  	return ret;
748446bb6   Mel Gorman   mm: compaction: m...
1331
  }
3e7d34497   Mel Gorman   mm: vmscan: recla...
1332
1333
1334
1335
  /*
   * compaction_suitable: Is this suitable to run compaction on this zone now?
   * Returns
   *   COMPACT_SKIPPED  - If there are too few free pages for compaction
cf378319d   Vlastimil Babka   mm, compaction: r...
1336
   *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
3e7d34497   Mel Gorman   mm: vmscan: recla...
1337
1338
   *   COMPACT_CONTINUE - If compaction should run now
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
1339
  static enum compact_result __compaction_suitable(struct zone *zone, int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
1340
  					unsigned int alloc_flags,
86a294a81   Michal Hocko   mm, oom, compacti...
1341
1342
  					int classzone_idx,
  					unsigned long wmark_target)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1343
  {
3e7d34497   Mel Gorman   mm: vmscan: recla...
1344
  	unsigned long watermark;
21c527a3c   Yaowei Bai   mm/compaction.c: ...
1345
  	if (is_via_compact_memory(order))
3957c7768   Michal Hocko   mm: compaction: f...
1346
  		return COMPACT_CONTINUE;
f2b8228c5   Vlastimil Babka   mm, compaction: u...
1347
  	watermark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
ebff39801   Vlastimil Babka   mm, compaction: p...
1348
1349
1350
1351
1352
1353
  	/*
  	 * If watermarks for high-order allocation are already met, there
  	 * should be no need for compaction at all.
  	 */
  	if (zone_watermark_ok(zone, order, watermark, classzone_idx,
  								alloc_flags))
cf378319d   Vlastimil Babka   mm, compaction: r...
1354
  		return COMPACT_SUCCESS;
ebff39801   Vlastimil Babka   mm, compaction: p...
1355

3957c7768   Michal Hocko   mm: compaction: f...
1356
  	/*
9861a62c3   Vlastimil Babka   mm, compaction: c...
1357
  	 * Watermarks for order-0 must be met for compaction to be able to
984fdba6a   Vlastimil Babka   mm, compaction: u...
1358
1359
1360
1361
1362
1363
1364
  	 * isolate free pages for migration targets. This means that the
  	 * watermark and alloc_flags have to match, or be more pessimistic than
  	 * the check in __isolate_free_page(). We don't use the direct
  	 * compactor's alloc_flags, as they are not relevant for freepage
  	 * isolation. We however do use the direct compactor's classzone_idx to
  	 * skip over zones where lowmem reserves would prevent allocation even
  	 * if compaction succeeds.
8348faf91   Vlastimil Babka   mm, compaction: r...
1365
1366
  	 * For costly orders, we require low watermark instead of min for
  	 * compaction to proceed to increase its chances.
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
1367
1368
  	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
  	 * suitable migration targets
3e7d34497   Mel Gorman   mm: vmscan: recla...
1369
  	 */
8348faf91   Vlastimil Babka   mm, compaction: r...
1370
1371
1372
  	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
  				low_wmark_pages(zone) : min_wmark_pages(zone);
  	watermark += compact_gap(order);
86a294a81   Michal Hocko   mm, oom, compacti...
1373
  	if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
d883c6cf3   Joonsoo Kim   Revert "mm/cma: m...
1374
  						ALLOC_CMA, wmark_target))
3e7d34497   Mel Gorman   mm: vmscan: recla...
1375
  		return COMPACT_SKIPPED;
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
  	return COMPACT_CONTINUE;
  }
  
  enum compact_result compaction_suitable(struct zone *zone, int order,
  					unsigned int alloc_flags,
  					int classzone_idx)
  {
  	enum compact_result ret;
  	int fragindex;
  
  	ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
  				    zone_page_state(zone, NR_FREE_PAGES));
3e7d34497   Mel Gorman   mm: vmscan: recla...
1388
1389
1390
1391
  	/*
  	 * fragmentation index determines if allocation failures are due to
  	 * low memory or external fragmentation
  	 *
ebff39801   Vlastimil Babka   mm, compaction: p...
1392
1393
  	 * index of -1000 would imply allocations might succeed depending on
  	 * watermarks, but we already failed the high-order watermark check
3e7d34497   Mel Gorman   mm: vmscan: recla...
1394
1395
1396
  	 * index towards 0 implies failure is due to lack of memory
  	 * index towards 1000 implies failure is due to fragmentation
  	 *
203114202   Vlastimil Babka   mm, compaction: r...
1397
1398
1399
1400
1401
1402
  	 * Only compact if a failure would be due to fragmentation. Also
  	 * ignore fragindex for non-costly orders where the alternative to
  	 * a successful reclaim/compaction is OOM. Fragindex and the
  	 * vm.extfrag_threshold sysctl is meant as a heuristic to prevent
  	 * excessive compaction for costly orders, but it should not be at the
  	 * expense of system stability.
3e7d34497   Mel Gorman   mm: vmscan: recla...
1403
  	 */
203114202   Vlastimil Babka   mm, compaction: r...
1404
  	if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1405
1406
1407
1408
  		fragindex = fragmentation_index(zone, order);
  		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
  			ret = COMPACT_NOT_SUITABLE_ZONE;
  	}
837d026d5   Joonsoo Kim   mm/compaction: mo...
1409

837d026d5   Joonsoo Kim   mm/compaction: mo...
1410
1411
1412
1413
1414
1415
  	trace_mm_compaction_suitable(zone, order, ret);
  	if (ret == COMPACT_NOT_SUITABLE_ZONE)
  		ret = COMPACT_SKIPPED;
  
  	return ret;
  }
86a294a81   Michal Hocko   mm, oom, compacti...
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
  bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
  		int alloc_flags)
  {
  	struct zone *zone;
  	struct zoneref *z;
  
  	/*
  	 * Make sure at least one zone would pass __compaction_suitable if we continue
  	 * retrying the reclaim.
  	 */
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
  					ac->nodemask) {
  		unsigned long available;
  		enum compact_result compact_result;
  
  		/*
  		 * Do not consider all the reclaimable memory because we do not
  		 * want to trash just for a single high order allocation which
  		 * is even not guaranteed to appear even if __compaction_suitable
  		 * is happy about the watermark check.
  		 */
5a1c84b40   Mel Gorman   mm: remove reclai...
1437
  		available = zone_reclaimable_pages(zone) / order;
86a294a81   Michal Hocko   mm, oom, compacti...
1438
1439
1440
  		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
  		compact_result = __compaction_suitable(zone, order, alloc_flags,
  				ac_classzone_idx(ac), available);
cc5c9f098   Vlastimil Babka   mm, compaction: i...
1441
  		if (compact_result != COMPACT_SKIPPED)
86a294a81   Michal Hocko   mm, oom, compacti...
1442
1443
1444
1445
1446
  			return true;
  	}
  
  	return false;
  }
ea7ab982b   Michal Hocko   mm, compaction: c...
1447
  static enum compact_result compact_zone(struct zone *zone, struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
1448
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
1449
  	enum compact_result ret;
c89511ab2   Mel Gorman   mm: compaction: R...
1450
  	unsigned long start_pfn = zone->zone_start_pfn;
108bcc96e   Cody P Schafer   mm: add & use zon...
1451
  	unsigned long end_pfn = zone_end_pfn(zone);
e0b9daeb4   David Rientjes   mm, compaction: e...
1452
  	const bool sync = cc->mode != MIGRATE_ASYNC;
748446bb6   Mel Gorman   mm: compaction: m...
1453

d39773a06   Vlastimil Babka   mm, compaction: a...
1454
  	cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
ebff39801   Vlastimil Babka   mm, compaction: p...
1455
1456
  	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
  							cc->classzone_idx);
c46649dea   Michal Hocko   mm, compaction: c...
1457
  	/* Compaction is likely to fail */
cf378319d   Vlastimil Babka   mm, compaction: r...
1458
  	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1459
  		return ret;
c46649dea   Michal Hocko   mm, compaction: c...
1460
1461
1462
  
  	/* huh, compaction_suitable is returning something unexpected */
  	VM_BUG_ON(ret != COMPACT_CONTINUE);
3e7d34497   Mel Gorman   mm: vmscan: recla...
1463

c89511ab2   Mel Gorman   mm: compaction: R...
1464
  	/*
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1465
  	 * Clear pageblock skip if there were failures recently and compaction
accf62422   Vlastimil Babka   mm, kswapd: repla...
1466
  	 * is about to be retried after being deferred.
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1467
  	 */
accf62422   Vlastimil Babka   mm, kswapd: repla...
1468
  	if (compaction_restarting(zone, cc->order))
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1469
1470
1471
  		__reset_isolation_suitable(zone);
  
  	/*
c89511ab2   Mel Gorman   mm: compaction: R...
1472
  	 * Setup to move all movable pages to the end of the zone. Used cached
06ed29989   Vlastimil Babka   mm, compaction: m...
1473
1474
1475
  	 * information on where the scanners should start (unless we explicitly
  	 * want to compact the whole zone), but check that it is initialised
  	 * by ensuring the values are within zone boundaries.
c89511ab2   Mel Gorman   mm: compaction: R...
1476
  	 */
06ed29989   Vlastimil Babka   mm, compaction: m...
1477
  	if (cc->whole_zone) {
c89511ab2   Mel Gorman   mm: compaction: R...
1478
  		cc->migrate_pfn = start_pfn;
06ed29989   Vlastimil Babka   mm, compaction: m...
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
  		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
  	} else {
  		cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
  		cc->free_pfn = zone->compact_cached_free_pfn;
  		if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
  			cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
  			zone->compact_cached_free_pfn = cc->free_pfn;
  		}
  		if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
  			cc->migrate_pfn = start_pfn;
  			zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
  			zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
  		}
c8f7de0bf   Michal Hocko   mm, compaction: d...
1492

06ed29989   Vlastimil Babka   mm, compaction: m...
1493
1494
1495
  		if (cc->migrate_pfn == start_pfn)
  			cc->whole_zone = true;
  	}
c8f7de0bf   Michal Hocko   mm, compaction: d...
1496

1a16718cf   Joonsoo Kim   mm/compaction: co...
1497
  	cc->last_migrated_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
1498

16c4a097a   Joonsoo Kim   mm/compaction: en...
1499
1500
  	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync);
0eb927c0a   Mel Gorman   mm: compaction: t...
1501

748446bb6   Mel Gorman   mm: compaction: m...
1502
  	migrate_prep_local();
d39773a06   Vlastimil Babka   mm, compaction: a...
1503
  	while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
9d502c1c8   Minchan Kim   mm/compaction: ch...
1504
  		int err;
748446bb6   Mel Gorman   mm: compaction: m...
1505

f9e35b3b4   Mel Gorman   mm: compaction: a...
1506
1507
  		switch (isolate_migratepages(zone, cc)) {
  		case ISOLATE_ABORT:
2d1e10412   Vlastimil Babka   mm, compaction: d...
1508
  			ret = COMPACT_CONTENDED;
5733c7d11   Rafael Aquini   mm: introduce put...
1509
  			putback_movable_pages(&cc->migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
1510
  			cc->nr_migratepages = 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
1511
1512
  			goto out;
  		case ISOLATE_NONE:
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1513
1514
1515
1516
1517
1518
  			/*
  			 * We haven't isolated and migrated anything, but
  			 * there might still be unflushed migrations from
  			 * previous cc->order aligned block.
  			 */
  			goto check_drain;
f9e35b3b4   Mel Gorman   mm: compaction: a...
1519
1520
1521
  		case ISOLATE_SUCCESS:
  			;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
1522

d53aea3d4   David Rientjes   mm, compaction: r...
1523
  		err = migrate_pages(&cc->migratepages, compaction_alloc,
e0b9daeb4   David Rientjes   mm, compaction: e...
1524
  				compaction_free, (unsigned long)cc, cc->mode,
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
1525
  				MR_COMPACTION);
748446bb6   Mel Gorman   mm: compaction: m...
1526

f8c9301fa   Vlastimil Babka   mm/compaction: do...
1527
1528
  		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
  							&cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
1529

f8c9301fa   Vlastimil Babka   mm/compaction: do...
1530
1531
  		/* All pages were either migrated or will be released */
  		cc->nr_migratepages = 0;
9d502c1c8   Minchan Kim   mm/compaction: ch...
1532
  		if (err) {
5733c7d11   Rafael Aquini   mm: introduce put...
1533
  			putback_movable_pages(&cc->migratepages);
7ed695e06   Vlastimil Babka   mm: compaction: d...
1534
1535
1536
1537
  			/*
  			 * migrate_pages() may return -ENOMEM when scanners meet
  			 * and we want compact_finished() to detect it
  			 */
f2849aa09   Vlastimil Babka   mm, compaction: m...
1538
  			if (err == -ENOMEM && !compact_scanners_met(cc)) {
2d1e10412   Vlastimil Babka   mm, compaction: d...
1539
  				ret = COMPACT_CONTENDED;
4bf2bba37   David Rientjes   mm, thp: abort co...
1540
1541
  				goto out;
  			}
fdd048e12   Vlastimil Babka   mm, compaction: s...
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
  			/*
  			 * We failed to migrate at least one page in the current
  			 * order-aligned block, so skip the rest of it.
  			 */
  			if (cc->direct_compaction &&
  						(cc->mode == MIGRATE_ASYNC)) {
  				cc->migrate_pfn = block_end_pfn(
  						cc->migrate_pfn - 1, cc->order);
  				/* Draining pcplists is useless in this case */
  				cc->last_migrated_pfn = 0;
  
  			}
748446bb6   Mel Gorman   mm: compaction: m...
1554
  		}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1555

fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1556
1557
1558
1559
1560
1561
1562
1563
  check_drain:
  		/*
  		 * Has the migration scanner moved away from the previous
  		 * cc->order aligned block where we migrated from? If yes,
  		 * flush the pages that were freed, so that they can merge and
  		 * compact_finished() can detect immediately if allocation
  		 * would succeed.
  		 */
1a16718cf   Joonsoo Kim   mm/compaction: co...
1564
  		if (cc->order > 0 && cc->last_migrated_pfn) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1565
1566
  			int cpu;
  			unsigned long current_block_start =
06b6640a3   Vlastimil Babka   mm, compaction: w...
1567
  				block_start_pfn(cc->migrate_pfn, cc->order);
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1568

1a16718cf   Joonsoo Kim   mm/compaction: co...
1569
  			if (cc->last_migrated_pfn < current_block_start) {
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1570
1571
1572
1573
1574
  				cpu = get_cpu();
  				lru_add_drain_cpu(cpu);
  				drain_local_pages(zone);
  				put_cpu();
  				/* No more flushing until we migrate again */
1a16718cf   Joonsoo Kim   mm/compaction: co...
1575
  				cc->last_migrated_pfn = 0;
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1576
1577
  			}
  		}
748446bb6   Mel Gorman   mm: compaction: m...
1578
  	}
f9e35b3b4   Mel Gorman   mm: compaction: a...
1579
  out:
6bace090a   Vlastimil Babka   mm, compaction: a...
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
  	/*
  	 * Release free pages and update where the free scanner should restart,
  	 * so we don't leave any returned pages behind in the next attempt.
  	 */
  	if (cc->nr_freepages > 0) {
  		unsigned long free_pfn = release_freepages(&cc->freepages);
  
  		cc->nr_freepages = 0;
  		VM_BUG_ON(free_pfn == 0);
  		/* The cached pfn is always the first in a pageblock */
06b6640a3   Vlastimil Babka   mm, compaction: w...
1590
  		free_pfn = pageblock_start_pfn(free_pfn);
6bace090a   Vlastimil Babka   mm, compaction: a...
1591
1592
1593
1594
1595
1596
1597
  		/*
  		 * Only go back, not forward. The cached pfn might have been
  		 * already reset to zone end in compact_finished()
  		 */
  		if (free_pfn > zone->compact_cached_free_pfn)
  			zone->compact_cached_free_pfn = free_pfn;
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1598

7f354a548   David Rientjes   mm, compaction: a...
1599
1600
  	count_compact_events(COMPACTMIGRATE_SCANNED, cc->total_migrate_scanned);
  	count_compact_events(COMPACTFREE_SCANNED, cc->total_free_scanned);
16c4a097a   Joonsoo Kim   mm/compaction: en...
1601
1602
  	trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
  				cc->free_pfn, end_pfn, sync, ret);
0eb927c0a   Mel Gorman   mm: compaction: t...
1603

748446bb6   Mel Gorman   mm: compaction: m...
1604
1605
  	return ret;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
1606

ea7ab982b   Michal Hocko   mm, compaction: c...
1607
  static enum compact_result compact_zone_order(struct zone *zone, int order,
c3486f537   Vlastimil Babka   mm, compaction: s...
1608
  		gfp_t gfp_mask, enum compact_priority prio,
c603844bd   Mel Gorman   mm, page_alloc: c...
1609
  		unsigned int alloc_flags, int classzone_idx)
56de7263f   Mel Gorman   mm: compaction: d...
1610
  {
ea7ab982b   Michal Hocko   mm, compaction: c...
1611
  	enum compact_result ret;
56de7263f   Mel Gorman   mm: compaction: d...
1612
1613
1614
  	struct compact_control cc = {
  		.nr_freepages = 0,
  		.nr_migratepages = 0,
7f354a548   David Rientjes   mm, compaction: a...
1615
1616
  		.total_migrate_scanned = 0,
  		.total_free_scanned = 0,
56de7263f   Mel Gorman   mm: compaction: d...
1617
  		.order = order,
6d7ce5594   David Rientjes   mm, compaction: p...
1618
  		.gfp_mask = gfp_mask,
56de7263f   Mel Gorman   mm: compaction: d...
1619
  		.zone = zone,
a5508cd83   Vlastimil Babka   mm, compaction: i...
1620
1621
  		.mode = (prio == COMPACT_PRIO_ASYNC) ?
  					MIGRATE_ASYNC :	MIGRATE_SYNC_LIGHT,
ebff39801   Vlastimil Babka   mm, compaction: p...
1622
1623
  		.alloc_flags = alloc_flags,
  		.classzone_idx = classzone_idx,
accf62422   Vlastimil Babka   mm, kswapd: repla...
1624
  		.direct_compaction = true,
a8e025e55   Vlastimil Babka   mm, compaction: a...
1625
  		.whole_zone = (prio == MIN_COMPACT_PRIORITY),
9f7e33879   Vlastimil Babka   mm, compaction: m...
1626
1627
  		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
  		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
56de7263f   Mel Gorman   mm: compaction: d...
1628
1629
1630
  	};
  	INIT_LIST_HEAD(&cc.freepages);
  	INIT_LIST_HEAD(&cc.migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
1631
1632
1633
1634
  	ret = compact_zone(zone, &cc);
  
  	VM_BUG_ON(!list_empty(&cc.freepages));
  	VM_BUG_ON(!list_empty(&cc.migratepages));
e64c5237c   Shaohua Li   mm: compaction: a...
1635
  	return ret;
56de7263f   Mel Gorman   mm: compaction: d...
1636
  }
5e7719058   Mel Gorman   mm: compaction: a...
1637
  int sysctl_extfrag_threshold = 500;
56de7263f   Mel Gorman   mm: compaction: d...
1638
1639
  /**
   * try_to_compact_pages - Direct compact to satisfy a high-order allocation
56de7263f   Mel Gorman   mm: compaction: d...
1640
   * @gfp_mask: The GFP mask of the current allocation
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
1641
1642
1643
   * @order: The order of the current allocation
   * @alloc_flags: The allocation flags of the current allocation
   * @ac: The context of current allocation
112d2d29f   Yang Shi   mm/compaction.c: ...
1644
   * @prio: Determines how hard direct compaction should try to succeed
56de7263f   Mel Gorman   mm: compaction: d...
1645
1646
1647
   *
   * This is the main entry point for direct page compaction.
   */
ea7ab982b   Michal Hocko   mm, compaction: c...
1648
  enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
c603844bd   Mel Gorman   mm, page_alloc: c...
1649
  		unsigned int alloc_flags, const struct alloc_context *ac,
c3486f537   Vlastimil Babka   mm, compaction: s...
1650
  		enum compact_priority prio)
56de7263f   Mel Gorman   mm: compaction: d...
1651
  {
56de7263f   Mel Gorman   mm: compaction: d...
1652
  	int may_perform_io = gfp_mask & __GFP_IO;
56de7263f   Mel Gorman   mm: compaction: d...
1653
1654
  	struct zoneref *z;
  	struct zone *zone;
1d4746d39   Michal Hocko   mm, compaction: d...
1655
  	enum compact_result rc = COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
1656

73e64c51a   Michal Hocko   mm, compaction: a...
1657
1658
1659
1660
1661
  	/*
  	 * Check if the GFP flags allow compaction - GFP_NOIO is really
  	 * tricky context because the migration might require IO
  	 */
  	if (!may_perform_io)
53853e2d2   Vlastimil Babka   mm, compaction: d...
1662
  		return COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
1663

a5508cd83   Vlastimil Babka   mm, compaction: i...
1664
  	trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
837d026d5   Joonsoo Kim   mm/compaction: mo...
1665

56de7263f   Mel Gorman   mm: compaction: d...
1666
  	/* Compact each zone in the list */
1a6d53a10   Vlastimil Babka   mm: reduce try_to...
1667
1668
  	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
  								ac->nodemask) {
ea7ab982b   Michal Hocko   mm, compaction: c...
1669
  		enum compact_result status;
56de7263f   Mel Gorman   mm: compaction: d...
1670

a8e025e55   Vlastimil Babka   mm, compaction: a...
1671
1672
  		if (prio > MIN_COMPACT_PRIORITY
  					&& compaction_deferred(zone, order)) {
1d4746d39   Michal Hocko   mm, compaction: d...
1673
  			rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
53853e2d2   Vlastimil Babka   mm, compaction: d...
1674
  			continue;
1d4746d39   Michal Hocko   mm, compaction: d...
1675
  		}
53853e2d2   Vlastimil Babka   mm, compaction: d...
1676

a5508cd83   Vlastimil Babka   mm, compaction: i...
1677
  		status = compact_zone_order(zone, order, gfp_mask, prio,
c3486f537   Vlastimil Babka   mm, compaction: s...
1678
  					alloc_flags, ac_classzone_idx(ac));
56de7263f   Mel Gorman   mm: compaction: d...
1679
  		rc = max(status, rc);
7ceb009a2   Vlastimil Babka   mm, compaction: d...
1680
1681
  		/* The allocation should succeed, stop compacting */
  		if (status == COMPACT_SUCCESS) {
53853e2d2   Vlastimil Babka   mm, compaction: d...
1682
1683
1684
1685
1686
1687
1688
  			/*
  			 * We think the allocation will succeed in this zone,
  			 * but it is not certain, hence the false. The caller
  			 * will repeat this with true if allocation indeed
  			 * succeeds in this zone.
  			 */
  			compaction_defer_reset(zone, order, false);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1689

c3486f537   Vlastimil Babka   mm, compaction: s...
1690
  			break;
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1691
  		}
a5508cd83   Vlastimil Babka   mm, compaction: i...
1692
  		if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
c3486f537   Vlastimil Babka   mm, compaction: s...
1693
  					status == COMPACT_PARTIAL_SKIPPED))
53853e2d2   Vlastimil Babka   mm, compaction: d...
1694
1695
1696
1697
1698
1699
  			/*
  			 * We think that allocation won't succeed in this zone
  			 * so we defer compaction there. If it ends up
  			 * succeeding after all, it will be reset.
  			 */
  			defer_compaction(zone, order);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1700
1701
1702
1703
  
  		/*
  		 * We might have stopped compacting due to need_resched() in
  		 * async compaction, or due to a fatal signal detected. In that
c3486f537   Vlastimil Babka   mm, compaction: s...
1704
  		 * case do not try further zones
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1705
  		 */
c3486f537   Vlastimil Babka   mm, compaction: s...
1706
1707
1708
  		if ((prio == COMPACT_PRIO_ASYNC && need_resched())
  					|| fatal_signal_pending(current))
  			break;
56de7263f   Mel Gorman   mm: compaction: d...
1709
1710
1711
1712
  	}
  
  	return rc;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
1713
  /* Compact all zones within a node */
791cae962   Vlastimil Babka   mm, compaction: c...
1714
  static void compact_node(int nid)
76ab0f530   Mel Gorman   mm: compaction: a...
1715
  {
791cae962   Vlastimil Babka   mm, compaction: c...
1716
  	pg_data_t *pgdat = NODE_DATA(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
1717
  	int zoneid;
76ab0f530   Mel Gorman   mm: compaction: a...
1718
  	struct zone *zone;
791cae962   Vlastimil Babka   mm, compaction: c...
1719
1720
  	struct compact_control cc = {
  		.order = -1,
7f354a548   David Rientjes   mm, compaction: a...
1721
1722
  		.total_migrate_scanned = 0,
  		.total_free_scanned = 0,
791cae962   Vlastimil Babka   mm, compaction: c...
1723
1724
1725
  		.mode = MIGRATE_SYNC,
  		.ignore_skip_hint = true,
  		.whole_zone = true,
73e64c51a   Michal Hocko   mm, compaction: a...
1726
  		.gfp_mask = GFP_KERNEL,
791cae962   Vlastimil Babka   mm, compaction: c...
1727
  	};
76ab0f530   Mel Gorman   mm: compaction: a...
1728

76ab0f530   Mel Gorman   mm: compaction: a...
1729
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
76ab0f530   Mel Gorman   mm: compaction: a...
1730
1731
1732
1733
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
791cae962   Vlastimil Babka   mm, compaction: c...
1734
1735
1736
1737
1738
  		cc.nr_freepages = 0;
  		cc.nr_migratepages = 0;
  		cc.zone = zone;
  		INIT_LIST_HEAD(&cc.freepages);
  		INIT_LIST_HEAD(&cc.migratepages);
76ab0f530   Mel Gorman   mm: compaction: a...
1739

791cae962   Vlastimil Babka   mm, compaction: c...
1740
  		compact_zone(zone, &cc);
754693457   Joonsoo Kim   mm/compaction.c: ...
1741

791cae962   Vlastimil Babka   mm, compaction: c...
1742
1743
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
76ab0f530   Mel Gorman   mm: compaction: a...
1744
  	}
76ab0f530   Mel Gorman   mm: compaction: a...
1745
1746
1747
  }
  
  /* Compact all nodes in the system */
7964c06d6   Jason Liu   mm: compaction: f...
1748
  static void compact_nodes(void)
76ab0f530   Mel Gorman   mm: compaction: a...
1749
1750
  {
  	int nid;
8575ec29f   Hugh Dickins   compact_pgdat: wo...
1751
1752
  	/* Flush pending updates to the LRU lists */
  	lru_add_drain_all();
76ab0f530   Mel Gorman   mm: compaction: a...
1753
1754
  	for_each_online_node(nid)
  		compact_node(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
1755
1756
1757
1758
  }
  
  /* The written value is actually unused, all memory is compacted */
  int sysctl_compact_memory;
fec4eb2c8   Yaowei Bai   mm/compaction: im...
1759
1760
1761
1762
  /*
   * This is the entry point for compacting all nodes via
   * /proc/sys/vm/compact_memory
   */
76ab0f530   Mel Gorman   mm: compaction: a...
1763
1764
1765
1766
  int sysctl_compaction_handler(struct ctl_table *table, int write,
  			void __user *buffer, size_t *length, loff_t *ppos)
  {
  	if (write)
7964c06d6   Jason Liu   mm: compaction: f...
1767
  		compact_nodes();
76ab0f530   Mel Gorman   mm: compaction: a...
1768
1769
1770
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1771

5e7719058   Mel Gorman   mm: compaction: a...
1772
1773
1774
1775
1776
1777
1778
  int sysctl_extfrag_handler(struct ctl_table *table, int write,
  			void __user *buffer, size_t *length, loff_t *ppos)
  {
  	proc_dointvec_minmax(table, write, buffer, length, ppos);
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1779
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
74e77fb9a   Rashika Kheria   mm/compaction.c: ...
1780
  static ssize_t sysfs_compact_node(struct device *dev,
10fbcf4c6   Kay Sievers   convert 'memory' ...
1781
  			struct device_attribute *attr,
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1782
1783
  			const char *buf, size_t count)
  {
8575ec29f   Hugh Dickins   compact_pgdat: wo...
1784
1785
1786
1787
1788
1789
1790
1791
  	int nid = dev->id;
  
  	if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
  		/* Flush pending updates to the LRU lists */
  		lru_add_drain_all();
  
  		compact_node(nid);
  	}
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1792
1793
1794
  
  	return count;
  }
0825a6f98   Joe Perches   mm: use octal not...
1795
  static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1796
1797
1798
  
  int compaction_register_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
1799
  	return device_create_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1800
1801
1802
1803
  }
  
  void compaction_unregister_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
1804
  	return device_remove_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1805
1806
  }
  #endif /* CONFIG_SYSFS && CONFIG_NUMA */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1807

698b1b306   Vlastimil Babka   mm, compaction: i...
1808
1809
  static inline bool kcompactd_work_requested(pg_data_t *pgdat)
  {
172400c69   Vlastimil Babka   mm: fix kcompactd...
1810
  	return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
698b1b306   Vlastimil Babka   mm, compaction: i...
1811
1812
1813
1814
1815
1816
1817
  }
  
  static bool kcompactd_node_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  	struct zone *zone;
  	enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
6cd9dc3e7   Chen Feng   mm/compaction.c: ...
1818
  	for (zoneid = 0; zoneid <= classzone_idx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
  		zone = &pgdat->node_zones[zoneid];
  
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
  					classzone_idx) == COMPACT_CONTINUE)
  			return true;
  	}
  
  	return false;
  }
  
  static void kcompactd_do_work(pg_data_t *pgdat)
  {
  	/*
  	 * With no special task, compact all zones so that a page of requested
  	 * order is allocatable.
  	 */
  	int zoneid;
  	struct zone *zone;
  	struct compact_control cc = {
  		.order = pgdat->kcompactd_max_order,
7f354a548   David Rientjes   mm, compaction: a...
1842
1843
  		.total_migrate_scanned = 0,
  		.total_free_scanned = 0,
698b1b306   Vlastimil Babka   mm, compaction: i...
1844
1845
  		.classzone_idx = pgdat->kcompactd_classzone_idx,
  		.mode = MIGRATE_SYNC_LIGHT,
a0647dc92   David Rientjes   mm, compaction: k...
1846
  		.ignore_skip_hint = false,
73e64c51a   Michal Hocko   mm, compaction: a...
1847
  		.gfp_mask = GFP_KERNEL,
698b1b306   Vlastimil Babka   mm, compaction: i...
1848
  	};
698b1b306   Vlastimil Babka   mm, compaction: i...
1849
1850
  	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
  							cc.classzone_idx);
7f354a548   David Rientjes   mm, compaction: a...
1851
  	count_compact_event(KCOMPACTD_WAKE);
698b1b306   Vlastimil Babka   mm, compaction: i...
1852

6cd9dc3e7   Chen Feng   mm/compaction.c: ...
1853
  	for (zoneid = 0; zoneid <= cc.classzone_idx; zoneid++) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
  		int status;
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		if (compaction_deferred(zone, cc.order))
  			continue;
  
  		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
  							COMPACT_CONTINUE)
  			continue;
  
  		cc.nr_freepages = 0;
  		cc.nr_migratepages = 0;
7f354a548   David Rientjes   mm, compaction: a...
1869
1870
  		cc.total_migrate_scanned = 0;
  		cc.total_free_scanned = 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
1871
1872
1873
  		cc.zone = zone;
  		INIT_LIST_HEAD(&cc.freepages);
  		INIT_LIST_HEAD(&cc.migratepages);
172400c69   Vlastimil Babka   mm: fix kcompactd...
1874
1875
  		if (kthread_should_stop())
  			return;
698b1b306   Vlastimil Babka   mm, compaction: i...
1876
  		status = compact_zone(zone, &cc);
7ceb009a2   Vlastimil Babka   mm, compaction: d...
1877
  		if (status == COMPACT_SUCCESS) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1878
  			compaction_defer_reset(zone, cc.order, false);
c8f7de0bf   Michal Hocko   mm, compaction: d...
1879
  		} else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
698b1b306   Vlastimil Babka   mm, compaction: i...
1880
  			/*
bc3106b26   David Rientjes   mm, compaction: d...
1881
1882
1883
1884
1885
1886
1887
1888
  			 * Buddy pages may become stranded on pcps that could
  			 * otherwise coalesce on the zone's free area for
  			 * order >= cc.order.  This is ratelimited by the
  			 * upcoming deferral.
  			 */
  			drain_all_pages(zone);
  
  			/*
698b1b306   Vlastimil Babka   mm, compaction: i...
1889
1890
1891
1892
1893
  			 * We use sync migration mode here, so we defer like
  			 * sync direct compaction does.
  			 */
  			defer_compaction(zone, cc.order);
  		}
7f354a548   David Rientjes   mm, compaction: a...
1894
1895
1896
1897
  		count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
  				     cc.total_migrate_scanned);
  		count_compact_events(KCOMPACTD_FREE_SCANNED,
  				     cc.total_free_scanned);
698b1b306   Vlastimil Babka   mm, compaction: i...
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
  		VM_BUG_ON(!list_empty(&cc.freepages));
  		VM_BUG_ON(!list_empty(&cc.migratepages));
  	}
  
  	/*
  	 * Regardless of success, we are done until woken up next. But remember
  	 * the requested order/classzone_idx in case it was higher/tighter than
  	 * our current ones
  	 */
  	if (pgdat->kcompactd_max_order <= cc.order)
  		pgdat->kcompactd_max_order = 0;
  	if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
  		pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
  }
  
  void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
  {
  	if (!order)
  		return;
  
  	if (pgdat->kcompactd_max_order < order)
  		pgdat->kcompactd_max_order = order;
  
  	if (pgdat->kcompactd_classzone_idx > classzone_idx)
  		pgdat->kcompactd_classzone_idx = classzone_idx;
6818600ff   Davidlohr Bueso   mm,compaction: se...
1923
1924
1925
1926
1927
  	/*
  	 * Pairs with implicit barrier in wait_event_freezable()
  	 * such that wakeups are not missed.
  	 */
  	if (!wq_has_sleeper(&pgdat->kcompactd_wait))
698b1b306   Vlastimil Babka   mm, compaction: i...
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
  		return;
  
  	if (!kcompactd_node_suitable(pgdat))
  		return;
  
  	trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
  							classzone_idx);
  	wake_up_interruptible(&pgdat->kcompactd_wait);
  }
  
  /*
   * The background compaction daemon, started as a kernel thread
   * from the init process.
   */
  static int kcompactd(void *p)
  {
  	pg_data_t *pgdat = (pg_data_t*)p;
  	struct task_struct *tsk = current;
  
  	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
  	if (!cpumask_empty(cpumask))
  		set_cpus_allowed_ptr(tsk, cpumask);
  
  	set_freezable();
  
  	pgdat->kcompactd_max_order = 0;
  	pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
  
  	while (!kthread_should_stop()) {
  		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
  		wait_event_freezable(pgdat->kcompactd_wait,
  				kcompactd_work_requested(pgdat));
  
  		kcompactd_do_work(pgdat);
  	}
  
  	return 0;
  }
  
  /*
   * This kcompactd start function will be called by init and node-hot-add.
   * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
   */
  int kcompactd_run(int nid)
  {
  	pg_data_t *pgdat = NODE_DATA(nid);
  	int ret = 0;
  
  	if (pgdat->kcompactd)
  		return 0;
  
  	pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
  	if (IS_ERR(pgdat->kcompactd)) {
  		pr_err("Failed to start kcompactd on node %d
  ", nid);
  		ret = PTR_ERR(pgdat->kcompactd);
  		pgdat->kcompactd = NULL;
  	}
  	return ret;
  }
  
  /*
   * Called by memory hotplug when all memory in a node is offlined. Caller must
   * hold mem_hotplug_begin/end().
   */
  void kcompactd_stop(int nid)
  {
  	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
  
  	if (kcompactd) {
  		kthread_stop(kcompactd);
  		NODE_DATA(nid)->kcompactd = NULL;
  	}
  }
  
  /*
   * It's optimal to keep kcompactd on the same CPUs as their memory, but
   * not required for correctness. So if the last cpu in a node goes
   * away, we get changed to run anywhere: as the first one comes back,
   * restore their cpu bindings.
   */
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2010
  static int kcompactd_cpu_online(unsigned int cpu)
698b1b306   Vlastimil Babka   mm, compaction: i...
2011
2012
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2013
2014
2015
  	for_each_node_state(nid, N_MEMORY) {
  		pg_data_t *pgdat = NODE_DATA(nid);
  		const struct cpumask *mask;
698b1b306   Vlastimil Babka   mm, compaction: i...
2016

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2017
  		mask = cpumask_of_node(pgdat->node_id);
698b1b306   Vlastimil Babka   mm, compaction: i...
2018

e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2019
2020
2021
  		if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
  			/* One of our CPUs online: restore mask */
  			set_cpus_allowed_ptr(pgdat->kcompactd, mask);
698b1b306   Vlastimil Babka   mm, compaction: i...
2022
  	}
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2023
  	return 0;
698b1b306   Vlastimil Babka   mm, compaction: i...
2024
2025
2026
2027
2028
  }
  
  static int __init kcompactd_init(void)
  {
  	int nid;
e46b1db24   Anna-Maria Gleixner   mm/compaction: Co...
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
  	int ret;
  
  	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
  					"mm/compaction:online",
  					kcompactd_cpu_online, NULL);
  	if (ret < 0) {
  		pr_err("kcompactd: failed to register hotplug callbacks.
  ");
  		return ret;
  	}
698b1b306   Vlastimil Babka   mm, compaction: i...
2039
2040
2041
  
  	for_each_node_state(nid, N_MEMORY)
  		kcompactd_run(nid);
698b1b306   Vlastimil Babka   mm, compaction: i...
2042
2043
2044
  	return 0;
  }
  subsys_initcall(kcompactd_init)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
2045
  #endif /* CONFIG_COMPACTION */