Blame view

mm/compaction.c 43.6 KB
748446bb6   Mel Gorman   mm: compaction: m...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
  /*
   * linux/mm/compaction.c
   *
   * Memory compaction for the reduction of external fragmentation. Note that
   * this heavily depends upon page migration to do all the real heavy
   * lifting
   *
   * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
   */
  #include <linux/swap.h>
  #include <linux/migrate.h>
  #include <linux/compaction.h>
  #include <linux/mm_inline.h>
  #include <linux/backing-dev.h>
76ab0f530   Mel Gorman   mm: compaction: a...
15
  #include <linux/sysctl.h>
ed4a6d7f0   Mel Gorman   mm: compaction: a...
16
  #include <linux/sysfs.h>
bf6bddf19   Rafael Aquini   mm: introduce com...
17
  #include <linux/balloon_compaction.h>
194159fbc   Minchan Kim   mm: remove MIGRAT...
18
  #include <linux/page-isolation.h>
748446bb6   Mel Gorman   mm: compaction: m...
19
  #include "internal.h"
010fc29a4   Minchan Kim   compaction: fix b...
20
21
22
23
24
25
26
27
28
29
30
31
32
33
  #ifdef CONFIG_COMPACTION
  static inline void count_compact_event(enum vm_event_item item)
  {
  	count_vm_event(item);
  }
  
  static inline void count_compact_events(enum vm_event_item item, long delta)
  {
  	count_vm_events(item, delta);
  }
  #else
  #define count_compact_event(item) do { } while (0)
  #define count_compact_events(item, delta) do { } while (0)
  #endif
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
34
  #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b7aba6984   Mel Gorman   mm: compaction: a...
35
36
  #define CREATE_TRACE_POINTS
  #include <trace/events/compaction.h>
748446bb6   Mel Gorman   mm: compaction: m...
37
38
39
  static unsigned long release_freepages(struct list_head *freelist)
  {
  	struct page *page, *next;
6bace090a   Vlastimil Babka   mm, compaction: a...
40
  	unsigned long high_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
41
42
  
  	list_for_each_entry_safe(page, next, freelist, lru) {
6bace090a   Vlastimil Babka   mm, compaction: a...
43
  		unsigned long pfn = page_to_pfn(page);
748446bb6   Mel Gorman   mm: compaction: m...
44
45
  		list_del(&page->lru);
  		__free_page(page);
6bace090a   Vlastimil Babka   mm, compaction: a...
46
47
  		if (pfn > high_pfn)
  			high_pfn = pfn;
748446bb6   Mel Gorman   mm: compaction: m...
48
  	}
6bace090a   Vlastimil Babka   mm, compaction: a...
49
  	return high_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
50
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
51
52
53
54
55
56
57
58
59
  static void map_pages(struct list_head *list)
  {
  	struct page *page;
  
  	list_for_each_entry(page, list, lru) {
  		arch_alloc_page(page, 0);
  		kernel_map_pages(page, 1, 1);
  	}
  }
47118af07   Michal Nazarewicz   mm: mmzone: MIGRA...
60
61
62
63
  static inline bool migrate_async_suitable(int migratetype)
  {
  	return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
  }
7d49d8868   Vlastimil Babka   mm, compaction: r...
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  /*
   * Check that the whole (or subset of) a pageblock given by the interval of
   * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
   * with the migration of free compaction scanner. The scanners then need to
   * use only pfn_valid_within() check for arches that allow holes within
   * pageblocks.
   *
   * Return struct page pointer of start_pfn, or NULL if checks were not passed.
   *
   * It's possible on some configurations to have a setup like node0 node1 node0
   * i.e. it's possible that all pages within a zones range of pages do not
   * belong to a single zone. We assume that a border between node0 and node1
   * can occur within a single pageblock, but not a node0 node1 node0
   * interleaving within a single pageblock. It is therefore sufficient to check
   * the first and last page of a pageblock and avoid checking each individual
   * page in a pageblock.
   */
  static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
  				unsigned long end_pfn, struct zone *zone)
  {
  	struct page *start_page;
  	struct page *end_page;
  
  	/* end_pfn is one past the range we are checking */
  	end_pfn--;
  
  	if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
  		return NULL;
  
  	start_page = pfn_to_page(start_pfn);
  
  	if (page_zone(start_page) != zone)
  		return NULL;
  
  	end_page = pfn_to_page(end_pfn);
  
  	/* This gives a shorter code than deriving page_zone(end_page) */
  	if (page_zone_id(start_page) != page_zone_id(end_page))
  		return NULL;
  
  	return start_page;
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  #ifdef CONFIG_COMPACTION
  /* Returns true if the pageblock should be scanned for pages to isolate. */
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	if (cc->ignore_skip_hint)
  		return true;
  
  	return !get_pageblock_skip(page);
  }
  
  /*
   * This function is called to clear all cached information on pageblocks that
   * should be skipped for page isolation when the migrate and free page scanner
   * meet.
   */
62997027c   Mel Gorman   mm: compaction: c...
122
  static void __reset_isolation_suitable(struct zone *zone)
bb13ffeb9   Mel Gorman   mm: compaction: c...
123
124
  {
  	unsigned long start_pfn = zone->zone_start_pfn;
108bcc96e   Cody P Schafer   mm: add & use zon...
125
  	unsigned long end_pfn = zone_end_pfn(zone);
bb13ffeb9   Mel Gorman   mm: compaction: c...
126
  	unsigned long pfn;
35979ef33   David Rientjes   mm, compaction: a...
127
128
  	zone->compact_cached_migrate_pfn[0] = start_pfn;
  	zone->compact_cached_migrate_pfn[1] = start_pfn;
c89511ab2   Mel Gorman   mm: compaction: R...
129
  	zone->compact_cached_free_pfn = end_pfn;
62997027c   Mel Gorman   mm: compaction: c...
130
  	zone->compact_blockskip_flush = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  
  	/* Walk the zone and mark every pageblock as suitable for isolation */
  	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
  		struct page *page;
  
  		cond_resched();
  
  		if (!pfn_valid(pfn))
  			continue;
  
  		page = pfn_to_page(pfn);
  		if (zone != page_zone(page))
  			continue;
  
  		clear_pageblock_skip(page);
  	}
  }
62997027c   Mel Gorman   mm: compaction: c...
148
149
150
151
152
153
154
155
156
157
158
159
160
161
  void reset_isolation_suitable(pg_data_t *pgdat)
  {
  	int zoneid;
  
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  		struct zone *zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
  
  		/* Only flush if a full compaction finished recently */
  		if (zone->compact_blockskip_flush)
  			__reset_isolation_suitable(zone);
  	}
  }
bb13ffeb9   Mel Gorman   mm: compaction: c...
162
163
  /*
   * If no pages were isolated then mark this pageblock to be skipped in the
62997027c   Mel Gorman   mm: compaction: c...
164
   * future. The information is later cleared by __reset_isolation_suitable().
bb13ffeb9   Mel Gorman   mm: compaction: c...
165
   */
c89511ab2   Mel Gorman   mm: compaction: R...
166
167
  static void update_pageblock_skip(struct compact_control *cc,
  			struct page *page, unsigned long nr_isolated,
edc2ca612   Vlastimil Babka   mm, compaction: m...
168
  			bool migrate_scanner)
bb13ffeb9   Mel Gorman   mm: compaction: c...
169
  {
c89511ab2   Mel Gorman   mm: compaction: R...
170
  	struct zone *zone = cc->zone;
35979ef33   David Rientjes   mm, compaction: a...
171
  	unsigned long pfn;
6815bf3f2   Joonsoo Kim   mm/compaction: re...
172
173
174
  
  	if (cc->ignore_skip_hint)
  		return;
bb13ffeb9   Mel Gorman   mm: compaction: c...
175
176
  	if (!page)
  		return;
35979ef33   David Rientjes   mm, compaction: a...
177
178
  	if (nr_isolated)
  		return;
edc2ca612   Vlastimil Babka   mm, compaction: m...
179
  	set_pageblock_skip(page);
c89511ab2   Mel Gorman   mm: compaction: R...
180

35979ef33   David Rientjes   mm, compaction: a...
181
182
183
184
  	pfn = page_to_pfn(page);
  
  	/* Update where async and sync compaction should restart */
  	if (migrate_scanner) {
35979ef33   David Rientjes   mm, compaction: a...
185
186
  		if (pfn > zone->compact_cached_migrate_pfn[0])
  			zone->compact_cached_migrate_pfn[0] = pfn;
e0b9daeb4   David Rientjes   mm, compaction: e...
187
188
  		if (cc->mode != MIGRATE_ASYNC &&
  		    pfn > zone->compact_cached_migrate_pfn[1])
35979ef33   David Rientjes   mm, compaction: a...
189
190
  			zone->compact_cached_migrate_pfn[1] = pfn;
  	} else {
35979ef33   David Rientjes   mm, compaction: a...
191
192
  		if (pfn < zone->compact_cached_free_pfn)
  			zone->compact_cached_free_pfn = pfn;
c89511ab2   Mel Gorman   mm: compaction: R...
193
  	}
bb13ffeb9   Mel Gorman   mm: compaction: c...
194
195
196
197
198
199
200
  }
  #else
  static inline bool isolation_suitable(struct compact_control *cc,
  					struct page *page)
  {
  	return true;
  }
c89511ab2   Mel Gorman   mm: compaction: R...
201
202
  static void update_pageblock_skip(struct compact_control *cc,
  			struct page *page, unsigned long nr_isolated,
edc2ca612   Vlastimil Babka   mm, compaction: m...
203
  			bool migrate_scanner)
bb13ffeb9   Mel Gorman   mm: compaction: c...
204
205
206
  {
  }
  #endif /* CONFIG_COMPACTION */
8b44d2791   Vlastimil Babka   mm, compaction: p...
207
208
209
210
211
212
213
214
215
216
  /*
   * Compaction requires the taking of some coarse locks that are potentially
   * very heavily contended. For async compaction, back out if the lock cannot
   * be taken immediately. For sync compaction, spin on the lock if needed.
   *
   * Returns true if the lock is held
   * Returns false if the lock is not held and compaction should abort
   */
  static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
  						struct compact_control *cc)
2a1402aa0   Mel Gorman   mm: compaction: a...
217
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
218
219
220
221
222
223
224
225
  	if (cc->mode == MIGRATE_ASYNC) {
  		if (!spin_trylock_irqsave(lock, *flags)) {
  			cc->contended = COMPACT_CONTENDED_LOCK;
  			return false;
  		}
  	} else {
  		spin_lock_irqsave(lock, *flags);
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
226

8b44d2791   Vlastimil Babka   mm, compaction: p...
227
  	return true;
2a1402aa0   Mel Gorman   mm: compaction: a...
228
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
229
  /*
c67fe3752   Mel Gorman   mm: compaction: A...
230
   * Compaction requires the taking of some coarse locks that are potentially
8b44d2791   Vlastimil Babka   mm, compaction: p...
231
232
233
234
235
236
237
   * very heavily contended. The lock should be periodically unlocked to avoid
   * having disabled IRQs for a long time, even when there is nobody waiting on
   * the lock. It might also be that allowing the IRQs will result in
   * need_resched() becoming true. If scheduling is needed, async compaction
   * aborts. Sync compaction schedules.
   * Either compaction type will also abort if a fatal signal is pending.
   * In either case if the lock was locked, it is dropped and not regained.
c67fe3752   Mel Gorman   mm: compaction: A...
238
   *
8b44d2791   Vlastimil Babka   mm, compaction: p...
239
240
241
242
   * Returns true if compaction should abort due to fatal signal pending, or
   *		async compaction due to need_resched()
   * Returns false when compaction can continue (sync compaction might have
   *		scheduled)
c67fe3752   Mel Gorman   mm: compaction: A...
243
   */
8b44d2791   Vlastimil Babka   mm, compaction: p...
244
245
  static bool compact_unlock_should_abort(spinlock_t *lock,
  		unsigned long flags, bool *locked, struct compact_control *cc)
c67fe3752   Mel Gorman   mm: compaction: A...
246
  {
8b44d2791   Vlastimil Babka   mm, compaction: p...
247
248
249
250
  	if (*locked) {
  		spin_unlock_irqrestore(lock, flags);
  		*locked = false;
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
251

8b44d2791   Vlastimil Babka   mm, compaction: p...
252
253
254
255
  	if (fatal_signal_pending(current)) {
  		cc->contended = COMPACT_CONTENDED_SCHED;
  		return true;
  	}
c67fe3752   Mel Gorman   mm: compaction: A...
256

8b44d2791   Vlastimil Babka   mm, compaction: p...
257
  	if (need_resched()) {
e0b9daeb4   David Rientjes   mm, compaction: e...
258
  		if (cc->mode == MIGRATE_ASYNC) {
8b44d2791   Vlastimil Babka   mm, compaction: p...
259
260
  			cc->contended = COMPACT_CONTENDED_SCHED;
  			return true;
c67fe3752   Mel Gorman   mm: compaction: A...
261
  		}
c67fe3752   Mel Gorman   mm: compaction: A...
262
  		cond_resched();
c67fe3752   Mel Gorman   mm: compaction: A...
263
  	}
8b44d2791   Vlastimil Babka   mm, compaction: p...
264
  	return false;
c67fe3752   Mel Gorman   mm: compaction: A...
265
  }
be9765722   Vlastimil Babka   mm, compaction: p...
266
267
268
  /*
   * Aside from avoiding lock contention, compaction also periodically checks
   * need_resched() and either schedules in sync compaction or aborts async
8b44d2791   Vlastimil Babka   mm, compaction: p...
269
   * compaction. This is similar to what compact_unlock_should_abort() does, but
be9765722   Vlastimil Babka   mm, compaction: p...
270
271
272
273
274
275
276
277
278
279
   * is used where no lock is concerned.
   *
   * Returns false when no scheduling was needed, or sync compaction scheduled.
   * Returns true when async compaction should abort.
   */
  static inline bool compact_should_abort(struct compact_control *cc)
  {
  	/* async compaction aborts if contended */
  	if (need_resched()) {
  		if (cc->mode == MIGRATE_ASYNC) {
1f9efdef4   Vlastimil Babka   mm, compaction: k...
280
  			cc->contended = COMPACT_CONTENDED_SCHED;
be9765722   Vlastimil Babka   mm, compaction: p...
281
282
283
284
285
286
287
288
  			return true;
  		}
  
  		cond_resched();
  	}
  
  	return false;
  }
f40d1e42b   Mel Gorman   mm: compaction: a...
289
290
291
  /* Returns true if the page is within a block suitable for migration to */
  static bool suitable_migration_target(struct page *page)
  {
7d348b9ea   Joonsoo Kim   mm/compaction: di...
292
  	/* If the page is a large free page, then disallow migration */
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
293
294
295
296
297
298
299
300
301
  	if (PageBuddy(page)) {
  		/*
  		 * We are checking page_order without zone->lock taken. But
  		 * the only small danger is that we skip a potentially suitable
  		 * pageblock, so it's not worth to check order for valid range.
  		 */
  		if (page_order_unsafe(page) >= pageblock_order)
  			return false;
  	}
f40d1e42b   Mel Gorman   mm: compaction: a...
302
303
  
  	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
7d348b9ea   Joonsoo Kim   mm/compaction: di...
304
  	if (migrate_async_suitable(get_pageblock_migratetype(page)))
f40d1e42b   Mel Gorman   mm: compaction: a...
305
306
307
308
309
  		return true;
  
  	/* Otherwise skip the block */
  	return false;
  }
c67fe3752   Mel Gorman   mm: compaction: A...
310
  /*
9e4be4708   Jerome Marchand   mm/compaction.c: ...
311
312
313
   * Isolate free pages onto a private freelist. If @strict is true, will abort
   * returning 0 on any invalid PFNs or non-free pages inside of the pageblock
   * (even though it may still end up isolating some pages).
85aa125f0   Michal Nazarewicz   mm: compaction: i...
314
   */
f40d1e42b   Mel Gorman   mm: compaction: a...
315
  static unsigned long isolate_freepages_block(struct compact_control *cc,
e14c720ef   Vlastimil Babka   mm, compaction: r...
316
  				unsigned long *start_pfn,
85aa125f0   Michal Nazarewicz   mm: compaction: i...
317
318
319
  				unsigned long end_pfn,
  				struct list_head *freelist,
  				bool strict)
748446bb6   Mel Gorman   mm: compaction: m...
320
  {
b7aba6984   Mel Gorman   mm: compaction: a...
321
  	int nr_scanned = 0, total_isolated = 0;
bb13ffeb9   Mel Gorman   mm: compaction: c...
322
  	struct page *cursor, *valid_page = NULL;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
323
  	unsigned long flags = 0;
f40d1e42b   Mel Gorman   mm: compaction: a...
324
  	bool locked = false;
e14c720ef   Vlastimil Babka   mm, compaction: r...
325
  	unsigned long blockpfn = *start_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
326

748446bb6   Mel Gorman   mm: compaction: m...
327
  	cursor = pfn_to_page(blockpfn);
f40d1e42b   Mel Gorman   mm: compaction: a...
328
  	/* Isolate free pages. */
748446bb6   Mel Gorman   mm: compaction: m...
329
330
331
  	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
  		int isolated, i;
  		struct page *page = cursor;
8b44d2791   Vlastimil Babka   mm, compaction: p...
332
333
334
335
336
337
338
339
340
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort if fatal signal
  		 * pending or async compaction detects need_resched()
  		 */
  		if (!(blockpfn % SWAP_CLUSTER_MAX)
  		    && compact_unlock_should_abort(&cc->zone->lock, flags,
  								&locked, cc))
  			break;
b7aba6984   Mel Gorman   mm: compaction: a...
341
  		nr_scanned++;
f40d1e42b   Mel Gorman   mm: compaction: a...
342
  		if (!pfn_valid_within(blockpfn))
2af120bc0   Laura Abbott   mm/compaction: br...
343
  			goto isolate_fail;
bb13ffeb9   Mel Gorman   mm: compaction: c...
344
345
  		if (!valid_page)
  			valid_page = page;
f40d1e42b   Mel Gorman   mm: compaction: a...
346
  		if (!PageBuddy(page))
2af120bc0   Laura Abbott   mm/compaction: br...
347
  			goto isolate_fail;
f40d1e42b   Mel Gorman   mm: compaction: a...
348
349
  
  		/*
69b7189f1   Vlastimil Babka   mm, compaction: s...
350
351
352
353
354
  		 * If we already hold the lock, we can skip some rechecking.
  		 * Note that if we hold the lock now, checked_pageblock was
  		 * already set in some previous iteration (or strict is true),
  		 * so it is correct to skip the suitable migration target
  		 * recheck as well.
f40d1e42b   Mel Gorman   mm: compaction: a...
355
  		 */
69b7189f1   Vlastimil Babka   mm, compaction: s...
356
357
358
359
360
361
362
363
364
  		if (!locked) {
  			/*
  			 * The zone lock must be held to isolate freepages.
  			 * Unfortunately this is a very coarse lock and can be
  			 * heavily contended if there are parallel allocations
  			 * or parallel compactions. For async compaction do not
  			 * spin on the lock and we acquire the lock as late as
  			 * possible.
  			 */
8b44d2791   Vlastimil Babka   mm, compaction: p...
365
366
  			locked = compact_trylock_irqsave(&cc->zone->lock,
  								&flags, cc);
69b7189f1   Vlastimil Babka   mm, compaction: s...
367
368
  			if (!locked)
  				break;
f40d1e42b   Mel Gorman   mm: compaction: a...
369

69b7189f1   Vlastimil Babka   mm, compaction: s...
370
371
372
373
  			/* Recheck this is a buddy page under lock */
  			if (!PageBuddy(page))
  				goto isolate_fail;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
374
375
376
377
378
379
380
381
382
383
384
385
386
  
  		/* Found a free page, break it into order-0 pages */
  		isolated = split_free_page(page);
  		total_isolated += isolated;
  		for (i = 0; i < isolated; i++) {
  			list_add(&page->lru, freelist);
  			page++;
  		}
  
  		/* If a page was split, advance to the end of it */
  		if (isolated) {
  			blockpfn += isolated - 1;
  			cursor += isolated - 1;
2af120bc0   Laura Abbott   mm/compaction: br...
387
  			continue;
748446bb6   Mel Gorman   mm: compaction: m...
388
  		}
2af120bc0   Laura Abbott   mm/compaction: br...
389
390
391
392
393
394
  
  isolate_fail:
  		if (strict)
  			break;
  		else
  			continue;
748446bb6   Mel Gorman   mm: compaction: m...
395
  	}
e14c720ef   Vlastimil Babka   mm, compaction: r...
396
397
  	/* Record how far we have got within the block */
  	*start_pfn = blockpfn;
b7aba6984   Mel Gorman   mm: compaction: a...
398
  	trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
f40d1e42b   Mel Gorman   mm: compaction: a...
399
400
401
402
403
404
  
  	/*
  	 * If strict isolation is requested by CMA then check that all the
  	 * pages requested were isolated. If there were any failures, 0 is
  	 * returned and CMA will fail.
  	 */
2af120bc0   Laura Abbott   mm/compaction: br...
405
  	if (strict && blockpfn < end_pfn)
f40d1e42b   Mel Gorman   mm: compaction: a...
406
407
408
409
  		total_isolated = 0;
  
  	if (locked)
  		spin_unlock_irqrestore(&cc->zone->lock, flags);
bb13ffeb9   Mel Gorman   mm: compaction: c...
410
411
  	/* Update the pageblock-skip if the whole pageblock was scanned */
  	if (blockpfn == end_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
412
  		update_pageblock_skip(cc, valid_page, total_isolated, false);
bb13ffeb9   Mel Gorman   mm: compaction: c...
413

010fc29a4   Minchan Kim   compaction: fix b...
414
  	count_compact_events(COMPACTFREE_SCANNED, nr_scanned);
397487db6   Mel Gorman   mm: compaction: A...
415
  	if (total_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
416
  		count_compact_events(COMPACTISOLATED, total_isolated);
748446bb6   Mel Gorman   mm: compaction: m...
417
418
  	return total_isolated;
  }
85aa125f0   Michal Nazarewicz   mm: compaction: i...
419
420
421
422
423
424
425
426
427
428
429
430
431
  /**
   * isolate_freepages_range() - isolate free pages.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Non-free pages, invalid PFNs, or zone boundaries within the
   * [start_pfn, end_pfn) range are considered errors, cause function to
   * undo its actions and return zero.
   *
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater then end_pfn if end fell in a middle of
   * a free page).
   */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
432
  unsigned long
bb13ffeb9   Mel Gorman   mm: compaction: c...
433
434
  isolate_freepages_range(struct compact_control *cc,
  			unsigned long start_pfn, unsigned long end_pfn)
85aa125f0   Michal Nazarewicz   mm: compaction: i...
435
  {
f40d1e42b   Mel Gorman   mm: compaction: a...
436
  	unsigned long isolated, pfn, block_end_pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
437
  	LIST_HEAD(freelist);
7d49d8868   Vlastimil Babka   mm, compaction: r...
438
439
440
441
442
  	pfn = start_pfn;
  	block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
  
  	for (; pfn < end_pfn; pfn += isolated,
  				block_end_pfn += pageblock_nr_pages) {
e14c720ef   Vlastimil Babka   mm, compaction: r...
443
444
  		/* Protect pfn from changing by isolate_freepages_block */
  		unsigned long isolate_start_pfn = pfn;
85aa125f0   Michal Nazarewicz   mm: compaction: i...
445

85aa125f0   Michal Nazarewicz   mm: compaction: i...
446
  		block_end_pfn = min(block_end_pfn, end_pfn);
584200163   Joonsoo Kim   mm/compaction: sk...
447
448
449
450
451
452
453
454
455
  		/*
  		 * pfn could pass the block_end_pfn if isolated freepage
  		 * is more than pageblock order. In this case, we adjust
  		 * scanning range to right one.
  		 */
  		if (pfn >= block_end_pfn) {
  			block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
  			block_end_pfn = min(block_end_pfn, end_pfn);
  		}
7d49d8868   Vlastimil Babka   mm, compaction: r...
456
457
  		if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
  			break;
e14c720ef   Vlastimil Babka   mm, compaction: r...
458
459
  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
  						block_end_pfn, &freelist, true);
85aa125f0   Michal Nazarewicz   mm: compaction: i...
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
  
  		/*
  		 * In strict mode, isolate_freepages_block() returns 0 if
  		 * there are any holes in the block (ie. invalid PFNs or
  		 * non-free pages).
  		 */
  		if (!isolated)
  			break;
  
  		/*
  		 * If we managed to isolate pages, it is always (1 << n) *
  		 * pageblock_nr_pages for some non-negative n.  (Max order
  		 * page may span two pageblocks).
  		 */
  	}
  
  	/* split_free_page does not map the pages */
  	map_pages(&freelist);
  
  	if (pfn < end_pfn) {
  		/* Loop terminated early, cleanup. */
  		release_freepages(&freelist);
  		return 0;
  	}
  
  	/* We don't use freelists for anything. */
  	return pfn;
  }
748446bb6   Mel Gorman   mm: compaction: m...
488
  /* Update the number of anon and file isolated pages in the zone */
edc2ca612   Vlastimil Babka   mm, compaction: m...
489
  static void acct_isolated(struct zone *zone, struct compact_control *cc)
748446bb6   Mel Gorman   mm: compaction: m...
490
491
  {
  	struct page *page;
b9e84ac15   Minchan Kim   mm: compaction: t...
492
  	unsigned int count[2] = { 0, };
748446bb6   Mel Gorman   mm: compaction: m...
493

edc2ca612   Vlastimil Babka   mm, compaction: m...
494
495
  	if (list_empty(&cc->migratepages))
  		return;
b9e84ac15   Minchan Kim   mm: compaction: t...
496
497
  	list_for_each_entry(page, &cc->migratepages, lru)
  		count[!!page_is_file_cache(page)]++;
748446bb6   Mel Gorman   mm: compaction: m...
498

edc2ca612   Vlastimil Babka   mm, compaction: m...
499
500
  	mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
  	mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
748446bb6   Mel Gorman   mm: compaction: m...
501
502
503
504
505
  }
  
  /* Similar to reclaim, but different enough that they don't share logic */
  static bool too_many_isolated(struct zone *zone)
  {
bc6930457   Minchan Kim   mm: compaction: h...
506
  	unsigned long active, inactive, isolated;
748446bb6   Mel Gorman   mm: compaction: m...
507
508
509
  
  	inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
  					zone_page_state(zone, NR_INACTIVE_ANON);
bc6930457   Minchan Kim   mm: compaction: h...
510
511
  	active = zone_page_state(zone, NR_ACTIVE_FILE) +
  					zone_page_state(zone, NR_ACTIVE_ANON);
748446bb6   Mel Gorman   mm: compaction: m...
512
513
  	isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
  					zone_page_state(zone, NR_ISOLATED_ANON);
bc6930457   Minchan Kim   mm: compaction: h...
514
  	return isolated > (inactive + active) / 2;
748446bb6   Mel Gorman   mm: compaction: m...
515
  }
2fe86e000   Michal Nazarewicz   mm: compaction: i...
516
  /**
edc2ca612   Vlastimil Babka   mm, compaction: m...
517
518
   * isolate_migratepages_block() - isolate all migrate-able pages within
   *				  a single pageblock
2fe86e000   Michal Nazarewicz   mm: compaction: i...
519
   * @cc:		Compaction control structure.
edc2ca612   Vlastimil Babka   mm, compaction: m...
520
521
522
   * @low_pfn:	The first PFN to isolate
   * @end_pfn:	The one-past-the-last PFN to isolate, within same pageblock
   * @isolate_mode: Isolation mode to be used.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
523
524
   *
   * Isolate all pages that can be migrated from the range specified by
edc2ca612   Vlastimil Babka   mm, compaction: m...
525
526
527
528
   * [low_pfn, end_pfn). The range is expected to be within same pageblock.
   * Returns zero if there is a fatal signal pending, otherwise PFN of the
   * first page that was not scanned (which may be both less, equal to or more
   * than end_pfn).
2fe86e000   Michal Nazarewicz   mm: compaction: i...
529
   *
edc2ca612   Vlastimil Babka   mm, compaction: m...
530
531
532
   * The pages are isolated on cc->migratepages list (not required to be empty),
   * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
   * is neither read nor updated.
748446bb6   Mel Gorman   mm: compaction: m...
533
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
534
535
536
  static unsigned long
  isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  			unsigned long end_pfn, isolate_mode_t isolate_mode)
748446bb6   Mel Gorman   mm: compaction: m...
537
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
538
  	struct zone *zone = cc->zone;
b7aba6984   Mel Gorman   mm: compaction: a...
539
  	unsigned long nr_scanned = 0, nr_isolated = 0;
748446bb6   Mel Gorman   mm: compaction: m...
540
  	struct list_head *migratelist = &cc->migratepages;
fa9add641   Hugh Dickins   mm/memcg: apply a...
541
  	struct lruvec *lruvec;
b8b2d8253   Xiubo Li   mm/compaction.c: ...
542
  	unsigned long flags = 0;
2a1402aa0   Mel Gorman   mm: compaction: a...
543
  	bool locked = false;
bb13ffeb9   Mel Gorman   mm: compaction: c...
544
  	struct page *page = NULL, *valid_page = NULL;
748446bb6   Mel Gorman   mm: compaction: m...
545

748446bb6   Mel Gorman   mm: compaction: m...
546
547
548
549
550
551
  	/*
  	 * Ensure that there are not too many pages isolated from the LRU
  	 * list by either parallel reclaimers or compaction. If there are,
  	 * delay for some time until fewer pages are isolated
  	 */
  	while (unlikely(too_many_isolated(zone))) {
f9e35b3b4   Mel Gorman   mm: compaction: a...
552
  		/* async migration should just abort */
e0b9daeb4   David Rientjes   mm, compaction: e...
553
  		if (cc->mode == MIGRATE_ASYNC)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
554
  			return 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
555

748446bb6   Mel Gorman   mm: compaction: m...
556
557
558
  		congestion_wait(BLK_RW_ASYNC, HZ/10);
  
  		if (fatal_signal_pending(current))
2fe86e000   Michal Nazarewicz   mm: compaction: i...
559
  			return 0;
748446bb6   Mel Gorman   mm: compaction: m...
560
  	}
be9765722   Vlastimil Babka   mm, compaction: p...
561
562
  	if (compact_should_abort(cc))
  		return 0;
aeef4b838   David Rientjes   mm, compaction: t...
563

748446bb6   Mel Gorman   mm: compaction: m...
564
  	/* Time to isolate some pages for migration */
748446bb6   Mel Gorman   mm: compaction: m...
565
  	for (; low_pfn < end_pfn; low_pfn++) {
8b44d2791   Vlastimil Babka   mm, compaction: p...
566
567
568
569
570
571
572
573
574
  		/*
  		 * Periodically drop the lock (if held) regardless of its
  		 * contention, to give chance to IRQs. Abort async compaction
  		 * if contended.
  		 */
  		if (!(low_pfn % SWAP_CLUSTER_MAX)
  		    && compact_unlock_should_abort(&zone->lru_lock, flags,
  								&locked, cc))
  			break;
c67fe3752   Mel Gorman   mm: compaction: A...
575

748446bb6   Mel Gorman   mm: compaction: m...
576
577
  		if (!pfn_valid_within(low_pfn))
  			continue;
b7aba6984   Mel Gorman   mm: compaction: a...
578
  		nr_scanned++;
748446bb6   Mel Gorman   mm: compaction: m...
579

748446bb6   Mel Gorman   mm: compaction: m...
580
  		page = pfn_to_page(low_pfn);
dc9086004   Mel Gorman   mm: compaction: c...
581

bb13ffeb9   Mel Gorman   mm: compaction: c...
582
583
  		if (!valid_page)
  			valid_page = page;
6c14466cc   Mel Gorman   mm: improve docum...
584
  		/*
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
585
586
587
588
  		 * Skip if free. We read page order here without zone lock
  		 * which is generally unsafe, but the race window is small and
  		 * the worst thing that can happen is that we skip some
  		 * potential isolation targets.
6c14466cc   Mel Gorman   mm: improve docum...
589
  		 */
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
590
591
592
593
594
595
596
597
598
599
  		if (PageBuddy(page)) {
  			unsigned long freepage_order = page_order_unsafe(page);
  
  			/*
  			 * Without lock, we cannot be sure that what we got is
  			 * a valid page order. Consider only values in the
  			 * valid order range to prevent low_pfn overflow.
  			 */
  			if (freepage_order > 0 && freepage_order < MAX_ORDER)
  				low_pfn += (1UL << freepage_order) - 1;
748446bb6   Mel Gorman   mm: compaction: m...
600
  			continue;
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
601
  		}
748446bb6   Mel Gorman   mm: compaction: m...
602

9927af740   Mel Gorman   mm: compaction: p...
603
  		/*
bf6bddf19   Rafael Aquini   mm: introduce com...
604
605
606
607
608
609
  		 * Check may be lockless but that's ok as we recheck later.
  		 * It's possible to migrate LRU pages and balloon pages
  		 * Skip any other type of page
  		 */
  		if (!PageLRU(page)) {
  			if (unlikely(balloon_page_movable(page))) {
d6d86c0a7   Konstantin Khlebnikov   mm/balloon_compac...
610
  				if (balloon_page_isolate(page)) {
bf6bddf19   Rafael Aquini   mm: introduce com...
611
  					/* Successfully isolated */
b6c750163   Joonsoo Kim   mm/compaction: cl...
612
  					goto isolate_success;
bf6bddf19   Rafael Aquini   mm: introduce com...
613
614
  				}
  			}
bc835011a   Andrea Arcangeli   thp: transhuge is...
615
  			continue;
bf6bddf19   Rafael Aquini   mm: introduce com...
616
  		}
bc835011a   Andrea Arcangeli   thp: transhuge is...
617
618
  
  		/*
2a1402aa0   Mel Gorman   mm: compaction: a...
619
620
621
622
623
624
625
626
  		 * PageLRU is set. lru_lock normally excludes isolation
  		 * splitting and collapsing (collapsing has already happened
  		 * if PageLRU is set) but the lock is not necessarily taken
  		 * here and it is wasteful to take it just to check transhuge.
  		 * Check TransHuge without lock and skip the whole pageblock if
  		 * it's either a transhuge or hugetlbfs page, as calling
  		 * compound_order() without preventing THP from splitting the
  		 * page underneath us may return surprising results.
bc835011a   Andrea Arcangeli   thp: transhuge is...
627
628
  		 */
  		if (PageTransHuge(page)) {
2a1402aa0   Mel Gorman   mm: compaction: a...
629
  			if (!locked)
edc2ca612   Vlastimil Babka   mm, compaction: m...
630
631
632
633
  				low_pfn = ALIGN(low_pfn + 1,
  						pageblock_nr_pages) - 1;
  			else
  				low_pfn += (1 << compound_order(page)) - 1;
2a1402aa0   Mel Gorman   mm: compaction: a...
634
635
  			continue;
  		}
119d6d59d   David Rientjes   mm, compaction: a...
636
637
638
639
640
641
642
643
  		/*
  		 * Migration will fail if an anonymous page is pinned in memory,
  		 * so avoid taking lru_lock and isolating it unnecessarily in an
  		 * admittedly racy check.
  		 */
  		if (!page_mapping(page) &&
  		    page_count(page) > page_mapcount(page))
  			continue;
69b7189f1   Vlastimil Babka   mm, compaction: s...
644
645
  		/* If we already hold the lock, we can skip some rechecking */
  		if (!locked) {
8b44d2791   Vlastimil Babka   mm, compaction: p...
646
647
  			locked = compact_trylock_irqsave(&zone->lru_lock,
  								&flags, cc);
69b7189f1   Vlastimil Babka   mm, compaction: s...
648
649
  			if (!locked)
  				break;
2a1402aa0   Mel Gorman   mm: compaction: a...
650

69b7189f1   Vlastimil Babka   mm, compaction: s...
651
652
653
654
655
656
657
  			/* Recheck PageLRU and PageTransHuge under lock */
  			if (!PageLRU(page))
  				continue;
  			if (PageTransHuge(page)) {
  				low_pfn += (1 << compound_order(page)) - 1;
  				continue;
  			}
bc835011a   Andrea Arcangeli   thp: transhuge is...
658
  		}
fa9add641   Hugh Dickins   mm/memcg: apply a...
659
  		lruvec = mem_cgroup_page_lruvec(page, zone);
748446bb6   Mel Gorman   mm: compaction: m...
660
  		/* Try isolate the page */
edc2ca612   Vlastimil Babka   mm, compaction: m...
661
  		if (__isolate_lru_page(page, isolate_mode) != 0)
748446bb6   Mel Gorman   mm: compaction: m...
662
  			continue;
309381fea   Sasha Levin   mm: dump page whe...
663
  		VM_BUG_ON_PAGE(PageTransCompound(page), page);
bc835011a   Andrea Arcangeli   thp: transhuge is...
664

748446bb6   Mel Gorman   mm: compaction: m...
665
  		/* Successfully isolated */
fa9add641   Hugh Dickins   mm/memcg: apply a...
666
  		del_page_from_lru_list(page, lruvec, page_lru(page));
b6c750163   Joonsoo Kim   mm/compaction: cl...
667
668
  
  isolate_success:
748446bb6   Mel Gorman   mm: compaction: m...
669
  		list_add(&page->lru, migratelist);
748446bb6   Mel Gorman   mm: compaction: m...
670
  		cc->nr_migratepages++;
b7aba6984   Mel Gorman   mm: compaction: a...
671
  		nr_isolated++;
748446bb6   Mel Gorman   mm: compaction: m...
672
673
  
  		/* Avoid isolating too much */
31b8384a5   Hillf Danton   mm: compaction: p...
674
675
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
  			++low_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
676
  			break;
31b8384a5   Hillf Danton   mm: compaction: p...
677
  		}
748446bb6   Mel Gorman   mm: compaction: m...
678
  	}
99c0fd5e5   Vlastimil Babka   mm, compaction: s...
679
680
681
682
683
684
  	/*
  	 * The PageBuddy() check could have potentially brought us outside
  	 * the range to be scanned.
  	 */
  	if (unlikely(low_pfn > end_pfn))
  		low_pfn = end_pfn;
c67fe3752   Mel Gorman   mm: compaction: A...
685
686
  	if (locked)
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
748446bb6   Mel Gorman   mm: compaction: m...
687

50b5b094e   Vlastimil Babka   mm: compaction: d...
688
689
690
  	/*
  	 * Update the pageblock-skip information and cached scanner pfn,
  	 * if the whole pageblock was scanned without isolating any page.
50b5b094e   Vlastimil Babka   mm: compaction: d...
691
  	 */
35979ef33   David Rientjes   mm, compaction: a...
692
  	if (low_pfn == end_pfn)
edc2ca612   Vlastimil Babka   mm, compaction: m...
693
  		update_pageblock_skip(cc, valid_page, nr_isolated, true);
bb13ffeb9   Mel Gorman   mm: compaction: c...
694

b7aba6984   Mel Gorman   mm: compaction: a...
695
  	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
010fc29a4   Minchan Kim   compaction: fix b...
696
  	count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned);
397487db6   Mel Gorman   mm: compaction: A...
697
  	if (nr_isolated)
010fc29a4   Minchan Kim   compaction: fix b...
698
  		count_compact_events(COMPACTISOLATED, nr_isolated);
397487db6   Mel Gorman   mm: compaction: A...
699

2fe86e000   Michal Nazarewicz   mm: compaction: i...
700
701
  	return low_pfn;
  }
edc2ca612   Vlastimil Babka   mm, compaction: m...
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
  /**
   * isolate_migratepages_range() - isolate migrate-able pages in a PFN range
   * @cc:        Compaction control structure.
   * @start_pfn: The first PFN to start isolating.
   * @end_pfn:   The one-past-last PFN.
   *
   * Returns zero if isolation fails fatally due to e.g. pending signal.
   * Otherwise, function returns one-past-the-last PFN of isolated page
   * (which may be greater than end_pfn if end fell in a middle of a THP page).
   */
  unsigned long
  isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
  							unsigned long end_pfn)
  {
  	unsigned long pfn, block_end_pfn;
  
  	/* Scan block by block. First and last block may be incomplete */
  	pfn = start_pfn;
  	block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
  
  	for (; pfn < end_pfn; pfn = block_end_pfn,
  				block_end_pfn += pageblock_nr_pages) {
  
  		block_end_pfn = min(block_end_pfn, end_pfn);
7d49d8868   Vlastimil Babka   mm, compaction: r...
726
  		if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
edc2ca612   Vlastimil Babka   mm, compaction: m...
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
  			continue;
  
  		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
  							ISOLATE_UNEVICTABLE);
  
  		/*
  		 * In case of fatal failure, release everything that might
  		 * have been isolated in the previous iteration, and signal
  		 * the failure back to caller.
  		 */
  		if (!pfn) {
  			putback_movable_pages(&cc->migratepages);
  			cc->nr_migratepages = 0;
  			break;
  		}
6ea41c0c0   Joonsoo Kim   mm/compaction.c: ...
742
743
744
  
  		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
  			break;
edc2ca612   Vlastimil Babka   mm, compaction: m...
745
746
747
748
749
  	}
  	acct_isolated(cc->zone, cc);
  
  	return pfn;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
750
751
  #endif /* CONFIG_COMPACTION || CONFIG_CMA */
  #ifdef CONFIG_COMPACTION
2fe86e000   Michal Nazarewicz   mm: compaction: i...
752
  /*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
753
754
   * Based on information in the current compact_control, find blocks
   * suitable for isolating free pages from and then isolate them.
2fe86e000   Michal Nazarewicz   mm: compaction: i...
755
   */
edc2ca612   Vlastimil Babka   mm, compaction: m...
756
  static void isolate_freepages(struct compact_control *cc)
2fe86e000   Michal Nazarewicz   mm: compaction: i...
757
  {
edc2ca612   Vlastimil Babka   mm, compaction: m...
758
  	struct zone *zone = cc->zone;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
759
  	struct page *page;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
760
  	unsigned long block_start_pfn;	/* start of current pageblock */
e14c720ef   Vlastimil Babka   mm, compaction: r...
761
  	unsigned long isolate_start_pfn; /* exact pfn we start at */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
762
763
  	unsigned long block_end_pfn;	/* end of current pageblock */
  	unsigned long low_pfn;	     /* lowest pfn scanner is able to scan */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
764
765
  	int nr_freepages = cc->nr_freepages;
  	struct list_head *freelist = &cc->freepages;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
766

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
767
768
  	/*
  	 * Initialise the free scanner. The starting point is where we last
49e068f0b   Vlastimil Babka   mm/compaction: ma...
769
  	 * successfully isolated from, zone-cached value, or the end of the
e14c720ef   Vlastimil Babka   mm, compaction: r...
770
771
  	 * zone when isolating for the first time. For looping we also need
  	 * this pfn aligned down to the pageblock boundary, because we do
c96b9e508   Vlastimil Babka   mm/compaction: cl...
772
773
774
  	 * block_start_pfn -= pageblock_nr_pages in the for loop.
  	 * For ending point, take care when isolating in last pageblock of a
  	 * a zone which ends in the middle of a pageblock.
49e068f0b   Vlastimil Babka   mm/compaction: ma...
775
776
  	 * The low boundary is the end of the pageblock the migration scanner
  	 * is using.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
777
  	 */
e14c720ef   Vlastimil Babka   mm, compaction: r...
778
  	isolate_start_pfn = cc->free_pfn;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
779
780
781
  	block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
  	block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
  						zone_end_pfn(zone));
7ed695e06   Vlastimil Babka   mm: compaction: d...
782
  	low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
2fe86e000   Michal Nazarewicz   mm: compaction: i...
783

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
784
  	/*
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
785
786
787
788
  	 * Isolate free pages until enough are available to migrate the
  	 * pages on cc->migratepages. We stop searching if the migrate
  	 * and free page scanners meet or enough free pages are isolated.
  	 */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
789
790
  	for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
  				block_end_pfn = block_start_pfn,
e14c720ef   Vlastimil Babka   mm, compaction: r...
791
792
  				block_start_pfn -= pageblock_nr_pages,
  				isolate_start_pfn = block_start_pfn) {
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
793
  		unsigned long isolated;
2fe86e000   Michal Nazarewicz   mm: compaction: i...
794

f6ea3adb7   David Rientjes   mm/compaction.c: ...
795
796
797
  		/*
  		 * This can iterate a massively long zone without finding any
  		 * suitable migration targets, so periodically check if we need
be9765722   Vlastimil Babka   mm, compaction: p...
798
  		 * to schedule, or even abort async compaction.
f6ea3adb7   David Rientjes   mm/compaction.c: ...
799
  		 */
be9765722   Vlastimil Babka   mm, compaction: p...
800
801
802
  		if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
  						&& compact_should_abort(cc))
  			break;
f6ea3adb7   David Rientjes   mm/compaction.c: ...
803

7d49d8868   Vlastimil Babka   mm, compaction: r...
804
805
806
  		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
  									zone);
  		if (!page)
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
807
808
809
  			continue;
  
  		/* Check the block is suitable for migration */
68e3e9262   Linus Torvalds   Revert "mm: compa...
810
  		if (!suitable_migration_target(page))
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
811
  			continue;
68e3e9262   Linus Torvalds   Revert "mm: compa...
812

bb13ffeb9   Mel Gorman   mm: compaction: c...
813
814
815
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
e14c720ef   Vlastimil Babka   mm, compaction: r...
816
817
  		/* Found a block suitable for isolating free pages from. */
  		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
c96b9e508   Vlastimil Babka   mm/compaction: cl...
818
  					block_end_pfn, freelist, false);
f40d1e42b   Mel Gorman   mm: compaction: a...
819
  		nr_freepages += isolated;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
820
821
  
  		/*
e14c720ef   Vlastimil Babka   mm, compaction: r...
822
823
824
825
826
827
828
829
830
831
832
833
834
  		 * Remember where the free scanner should restart next time,
  		 * which is where isolate_freepages_block() left off.
  		 * But if it scanned the whole pageblock, isolate_start_pfn
  		 * now points at block_end_pfn, which is the start of the next
  		 * pageblock.
  		 * In that case we will however want to restart at the start
  		 * of the previous pageblock.
  		 */
  		cc->free_pfn = (isolate_start_pfn < block_end_pfn) ?
  				isolate_start_pfn :
  				block_start_pfn - pageblock_nr_pages;
  
  		/*
be9765722   Vlastimil Babka   mm, compaction: p...
835
836
837
838
839
  		 * isolate_freepages_block() might have aborted due to async
  		 * compaction being contended
  		 */
  		if (cc->contended)
  			break;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
840
841
842
843
  	}
  
  	/* split_free_page does not map the pages */
  	map_pages(freelist);
7ed695e06   Vlastimil Babka   mm: compaction: d...
844
845
846
847
  	/*
  	 * If we crossed the migrate scanner, we want to keep it that way
  	 * so that compact_finished() may detect this
  	 */
c96b9e508   Vlastimil Babka   mm/compaction: cl...
848
  	if (block_start_pfn < low_pfn)
e9ade5699   Vlastimil Babka   mm/compaction: av...
849
  		cc->free_pfn = cc->migrate_pfn;
c96b9e508   Vlastimil Babka   mm/compaction: cl...
850

ff9543fd3   Michal Nazarewicz   mm: compaction: e...
851
  	cc->nr_freepages = nr_freepages;
748446bb6   Mel Gorman   mm: compaction: m...
852
853
854
855
856
857
858
859
860
861
862
863
  }
  
  /*
   * This is a migrate-callback that "allocates" freepages by taking pages
   * from the isolated freelists in the block we are migrating to.
   */
  static struct page *compaction_alloc(struct page *migratepage,
  					unsigned long data,
  					int **result)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  	struct page *freepage;
be9765722   Vlastimil Babka   mm, compaction: p...
864
865
866
867
  	/*
  	 * Isolate free pages if necessary, and if we are not aborting due to
  	 * contention.
  	 */
748446bb6   Mel Gorman   mm: compaction: m...
868
  	if (list_empty(&cc->freepages)) {
be9765722   Vlastimil Babka   mm, compaction: p...
869
  		if (!cc->contended)
edc2ca612   Vlastimil Babka   mm, compaction: m...
870
  			isolate_freepages(cc);
748446bb6   Mel Gorman   mm: compaction: m...
871
872
873
874
875
876
877
878
879
880
881
882
883
  
  		if (list_empty(&cc->freepages))
  			return NULL;
  	}
  
  	freepage = list_entry(cc->freepages.next, struct page, lru);
  	list_del(&freepage->lru);
  	cc->nr_freepages--;
  
  	return freepage;
  }
  
  /*
d53aea3d4   David Rientjes   mm, compaction: r...
884
885
886
887
888
889
890
891
892
893
894
   * This is a migrate-callback that "frees" freepages back to the isolated
   * freelist.  All pages on the freelist are from the same zone, so there is no
   * special handling needed for NUMA.
   */
  static void compaction_free(struct page *page, unsigned long data)
  {
  	struct compact_control *cc = (struct compact_control *)data;
  
  	list_add(&page->lru, &cc->freepages);
  	cc->nr_freepages++;
  }
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
895
896
897
898
899
900
901
902
  /* possible outcome of isolate_migratepages */
  typedef enum {
  	ISOLATE_ABORT,		/* Abort compaction now */
  	ISOLATE_NONE,		/* No pages isolated, continue scanning */
  	ISOLATE_SUCCESS,	/* Pages isolated, migrate */
  } isolate_migrate_t;
  
  /*
edc2ca612   Vlastimil Babka   mm, compaction: m...
903
904
905
   * Isolate all pages that can be migrated from the first suitable block,
   * starting at the block pointed to by the migrate scanner pfn within
   * compact_control.
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
906
907
908
909
910
   */
  static isolate_migrate_t isolate_migratepages(struct zone *zone,
  					struct compact_control *cc)
  {
  	unsigned long low_pfn, end_pfn;
edc2ca612   Vlastimil Babka   mm, compaction: m...
911
912
913
  	struct page *page;
  	const isolate_mode_t isolate_mode =
  		(cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
914

edc2ca612   Vlastimil Babka   mm, compaction: m...
915
916
917
918
919
  	/*
  	 * Start at where we last stopped, or beginning of the zone as
  	 * initialized by compact_zone()
  	 */
  	low_pfn = cc->migrate_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
920
921
  
  	/* Only scan within a pageblock boundary */
a9aacbccf   Mel Gorman   mm: compaction: d...
922
  	end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
923

edc2ca612   Vlastimil Babka   mm, compaction: m...
924
925
926
927
928
929
  	/*
  	 * Iterate over whole pageblocks until we find the first suitable.
  	 * Do not cross the free scanner.
  	 */
  	for (; end_pfn <= cc->free_pfn;
  			low_pfn = end_pfn, end_pfn += pageblock_nr_pages) {
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
930

edc2ca612   Vlastimil Babka   mm, compaction: m...
931
932
933
934
935
936
937
938
  		/*
  		 * This can potentially iterate a massively long zone with
  		 * many pageblocks unsuitable, so periodically check if we
  		 * need to schedule, or even abort async compaction.
  		 */
  		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
  						&& compact_should_abort(cc))
  			break;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
939

7d49d8868   Vlastimil Babka   mm, compaction: r...
940
941
  		page = pageblock_pfn_to_page(low_pfn, end_pfn, zone);
  		if (!page)
edc2ca612   Vlastimil Babka   mm, compaction: m...
942
  			continue;
edc2ca612   Vlastimil Babka   mm, compaction: m...
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
  		/* If isolation recently failed, do not retry */
  		if (!isolation_suitable(cc, page))
  			continue;
  
  		/*
  		 * For async compaction, also only scan in MOVABLE blocks.
  		 * Async compaction is optimistic to see if the minimum amount
  		 * of work satisfies the allocation.
  		 */
  		if (cc->mode == MIGRATE_ASYNC &&
  		    !migrate_async_suitable(get_pageblock_migratetype(page)))
  			continue;
  
  		/* Perform the isolation */
  		low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn,
  								isolate_mode);
  
  		if (!low_pfn || cc->contended)
  			return ISOLATE_ABORT;
  
  		/*
  		 * Either we isolated something and proceed with migration. Or
  		 * we failed and compact_zone should decide if we should
  		 * continue or not.
  		 */
  		break;
  	}
  
  	acct_isolated(zone, cc);
1d5bfe1ff   Vlastimil Babka   mm, compaction: p...
972
973
974
975
976
977
  	/*
  	 * Record where migration scanner will be restarted. If we end up in
  	 * the same pageblock as the free scanner, make the scanners fully
  	 * meet so that compact_finished() terminates compaction.
  	 */
  	cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
978

edc2ca612   Vlastimil Babka   mm, compaction: m...
979
  	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
980
  }
6d7ce5594   David Rientjes   mm, compaction: p...
981
982
  static int compact_finished(struct zone *zone, struct compact_control *cc,
  			    const int migratetype)
748446bb6   Mel Gorman   mm: compaction: m...
983
  {
8fb74b9fb   Mel Gorman   mm: compaction: p...
984
  	unsigned int order;
5a03b051e   Andrea Arcangeli   thp: use compacti...
985
  	unsigned long watermark;
56de7263f   Mel Gorman   mm: compaction: d...
986

be9765722   Vlastimil Babka   mm, compaction: p...
987
  	if (cc->contended || fatal_signal_pending(current))
748446bb6   Mel Gorman   mm: compaction: m...
988
  		return COMPACT_PARTIAL;
753341a4b   Mel Gorman   revert "mm: have ...
989
  	/* Compaction run completes if the migrate and free scanner meet */
bb13ffeb9   Mel Gorman   mm: compaction: c...
990
  	if (cc->free_pfn <= cc->migrate_pfn) {
55b7c4c99   Vlastimil Babka   mm: compaction: r...
991
  		/* Let the next compaction start anew. */
35979ef33   David Rientjes   mm, compaction: a...
992
993
  		zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
  		zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
55b7c4c99   Vlastimil Babka   mm: compaction: r...
994
  		zone->compact_cached_free_pfn = zone_end_pfn(zone);
62997027c   Mel Gorman   mm: compaction: c...
995
996
997
998
999
1000
1001
1002
  		/*
  		 * Mark that the PG_migrate_skip information should be cleared
  		 * by kswapd when it goes to sleep. kswapd does not set the
  		 * flag itself as the decision to be clear should be directly
  		 * based on an allocation request.
  		 */
  		if (!current_is_kswapd())
  			zone->compact_blockskip_flush = true;
748446bb6   Mel Gorman   mm: compaction: m...
1003
  		return COMPACT_COMPLETE;
bb13ffeb9   Mel Gorman   mm: compaction: c...
1004
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1005

82478fb7b   Johannes Weiner   mm: compaction: p...
1006
1007
1008
1009
  	/*
  	 * order == -1 is expected when compacting via
  	 * /proc/sys/vm/compact_memory
  	 */
56de7263f   Mel Gorman   mm: compaction: d...
1010
1011
  	if (cc->order == -1)
  		return COMPACT_CONTINUE;
3957c7768   Michal Hocko   mm: compaction: f...
1012
1013
  	/* Compaction run is not finished if the watermark is not met */
  	watermark = low_wmark_pages(zone);
3957c7768   Michal Hocko   mm: compaction: f...
1014

ebff39801   Vlastimil Babka   mm, compaction: p...
1015
1016
  	if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx,
  							cc->alloc_flags))
3957c7768   Michal Hocko   mm: compaction: f...
1017
  		return COMPACT_CONTINUE;
56de7263f   Mel Gorman   mm: compaction: d...
1018
  	/* Direct compactor: Is a suitable page free? */
8fb74b9fb   Mel Gorman   mm: compaction: p...
1019
1020
1021
1022
  	for (order = cc->order; order < MAX_ORDER; order++) {
  		struct free_area *area = &zone->free_area[order];
  
  		/* Job done if page is free of the right migratetype */
6d7ce5594   David Rientjes   mm, compaction: p...
1023
  		if (!list_empty(&area->free_list[migratetype]))
8fb74b9fb   Mel Gorman   mm: compaction: p...
1024
1025
1026
1027
  			return COMPACT_PARTIAL;
  
  		/* Job done if allocation would set block type */
  		if (cc->order >= pageblock_order && area->nr_free)
56de7263f   Mel Gorman   mm: compaction: d...
1028
1029
  			return COMPACT_PARTIAL;
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1030
1031
  	return COMPACT_CONTINUE;
  }
3e7d34497   Mel Gorman   mm: vmscan: recla...
1032
1033
1034
1035
1036
1037
1038
  /*
   * compaction_suitable: Is this suitable to run compaction on this zone now?
   * Returns
   *   COMPACT_SKIPPED  - If there are too few free pages for compaction
   *   COMPACT_PARTIAL  - If the allocation would succeed without compaction
   *   COMPACT_CONTINUE - If compaction should run now
   */
ebff39801   Vlastimil Babka   mm, compaction: p...
1039
1040
  unsigned long compaction_suitable(struct zone *zone, int order,
  					int alloc_flags, int classzone_idx)
3e7d34497   Mel Gorman   mm: vmscan: recla...
1041
1042
1043
1044
1045
  {
  	int fragindex;
  	unsigned long watermark;
  
  	/*
3957c7768   Michal Hocko   mm: compaction: f...
1046
1047
1048
1049
1050
  	 * order == -1 is expected when compacting via
  	 * /proc/sys/vm/compact_memory
  	 */
  	if (order == -1)
  		return COMPACT_CONTINUE;
ebff39801   Vlastimil Babka   mm, compaction: p...
1051
1052
1053
1054
1055
1056
1057
1058
  	watermark = low_wmark_pages(zone);
  	/*
  	 * If watermarks for high-order allocation are already met, there
  	 * should be no need for compaction at all.
  	 */
  	if (zone_watermark_ok(zone, order, watermark, classzone_idx,
  								alloc_flags))
  		return COMPACT_PARTIAL;
3957c7768   Michal Hocko   mm: compaction: f...
1059
  	/*
3e7d34497   Mel Gorman   mm: vmscan: recla...
1060
1061
1062
1063
  	 * Watermarks for order-0 must be met for compaction. Note the 2UL.
  	 * This is because during migration, copies of pages need to be
  	 * allocated and for a short time, the footprint is higher
  	 */
ebff39801   Vlastimil Babka   mm, compaction: p...
1064
1065
  	watermark += (2UL << order);
  	if (!zone_watermark_ok(zone, 0, watermark, classzone_idx, alloc_flags))
3e7d34497   Mel Gorman   mm: vmscan: recla...
1066
1067
1068
1069
1070
1071
  		return COMPACT_SKIPPED;
  
  	/*
  	 * fragmentation index determines if allocation failures are due to
  	 * low memory or external fragmentation
  	 *
ebff39801   Vlastimil Babka   mm, compaction: p...
1072
1073
  	 * index of -1000 would imply allocations might succeed depending on
  	 * watermarks, but we already failed the high-order watermark check
3e7d34497   Mel Gorman   mm: vmscan: recla...
1074
1075
1076
1077
1078
1079
1080
1081
  	 * index towards 0 implies failure is due to lack of memory
  	 * index towards 1000 implies failure is due to fragmentation
  	 *
  	 * Only compact if a failure would be due to fragmentation.
  	 */
  	fragindex = fragmentation_index(zone, order);
  	if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
  		return COMPACT_SKIPPED;
3e7d34497   Mel Gorman   mm: vmscan: recla...
1082
1083
  	return COMPACT_CONTINUE;
  }
748446bb6   Mel Gorman   mm: compaction: m...
1084
1085
1086
  static int compact_zone(struct zone *zone, struct compact_control *cc)
  {
  	int ret;
c89511ab2   Mel Gorman   mm: compaction: R...
1087
  	unsigned long start_pfn = zone->zone_start_pfn;
108bcc96e   Cody P Schafer   mm: add & use zon...
1088
  	unsigned long end_pfn = zone_end_pfn(zone);
6d7ce5594   David Rientjes   mm, compaction: p...
1089
  	const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
e0b9daeb4   David Rientjes   mm, compaction: e...
1090
  	const bool sync = cc->mode != MIGRATE_ASYNC;
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1091
  	unsigned long last_migrated_pfn = 0;
748446bb6   Mel Gorman   mm: compaction: m...
1092

ebff39801   Vlastimil Babka   mm, compaction: p...
1093
1094
  	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
  							cc->classzone_idx);
3e7d34497   Mel Gorman   mm: vmscan: recla...
1095
1096
1097
1098
1099
1100
1101
1102
1103
  	switch (ret) {
  	case COMPACT_PARTIAL:
  	case COMPACT_SKIPPED:
  		/* Compaction is likely to fail */
  		return ret;
  	case COMPACT_CONTINUE:
  		/* Fall through to compaction */
  		;
  	}
c89511ab2   Mel Gorman   mm: compaction: R...
1104
  	/*
d3132e4b8   Vlastimil Babka   mm: compaction: r...
1105
1106
1107
1108
1109
1110
1111
1112
  	 * Clear pageblock skip if there were failures recently and compaction
  	 * is about to be retried after being deferred. kswapd does not do
  	 * this reset as it'll reset the cached information when going to sleep.
  	 */
  	if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
  		__reset_isolation_suitable(zone);
  
  	/*
c89511ab2   Mel Gorman   mm: compaction: R...
1113
1114
1115
1116
  	 * Setup to move all movable pages to the end of the zone. Used cached
  	 * information on where the scanners should start but check that it
  	 * is initialised by ensuring the values are within zone boundaries.
  	 */
e0b9daeb4   David Rientjes   mm, compaction: e...
1117
  	cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
c89511ab2   Mel Gorman   mm: compaction: R...
1118
1119
1120
1121
1122
1123
1124
  	cc->free_pfn = zone->compact_cached_free_pfn;
  	if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
  		cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
  		zone->compact_cached_free_pfn = cc->free_pfn;
  	}
  	if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
  		cc->migrate_pfn = start_pfn;
35979ef33   David Rientjes   mm, compaction: a...
1125
1126
  		zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
  		zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
c89511ab2   Mel Gorman   mm: compaction: R...
1127
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1128

0eb927c0a   Mel Gorman   mm: compaction: t...
1129
  	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn);
748446bb6   Mel Gorman   mm: compaction: m...
1130
  	migrate_prep_local();
6d7ce5594   David Rientjes   mm, compaction: p...
1131
1132
  	while ((ret = compact_finished(zone, cc, migratetype)) ==
  						COMPACT_CONTINUE) {
9d502c1c8   Minchan Kim   mm/compaction: ch...
1133
  		int err;
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1134
  		unsigned long isolate_start_pfn = cc->migrate_pfn;
748446bb6   Mel Gorman   mm: compaction: m...
1135

f9e35b3b4   Mel Gorman   mm: compaction: a...
1136
1137
1138
  		switch (isolate_migratepages(zone, cc)) {
  		case ISOLATE_ABORT:
  			ret = COMPACT_PARTIAL;
5733c7d11   Rafael Aquini   mm: introduce put...
1139
  			putback_movable_pages(&cc->migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
1140
  			cc->nr_migratepages = 0;
f9e35b3b4   Mel Gorman   mm: compaction: a...
1141
1142
  			goto out;
  		case ISOLATE_NONE:
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1143
1144
1145
1146
1147
1148
  			/*
  			 * We haven't isolated and migrated anything, but
  			 * there might still be unflushed migrations from
  			 * previous cc->order aligned block.
  			 */
  			goto check_drain;
f9e35b3b4   Mel Gorman   mm: compaction: a...
1149
1150
1151
  		case ISOLATE_SUCCESS:
  			;
  		}
748446bb6   Mel Gorman   mm: compaction: m...
1152

d53aea3d4   David Rientjes   mm, compaction: r...
1153
  		err = migrate_pages(&cc->migratepages, compaction_alloc,
e0b9daeb4   David Rientjes   mm, compaction: e...
1154
  				compaction_free, (unsigned long)cc, cc->mode,
7b2a2d4a1   Mel Gorman   mm: migrate: Add ...
1155
  				MR_COMPACTION);
748446bb6   Mel Gorman   mm: compaction: m...
1156

f8c9301fa   Vlastimil Babka   mm/compaction: do...
1157
1158
  		trace_mm_compaction_migratepages(cc->nr_migratepages, err,
  							&cc->migratepages);
748446bb6   Mel Gorman   mm: compaction: m...
1159

f8c9301fa   Vlastimil Babka   mm/compaction: do...
1160
1161
  		/* All pages were either migrated or will be released */
  		cc->nr_migratepages = 0;
9d502c1c8   Minchan Kim   mm/compaction: ch...
1162
  		if (err) {
5733c7d11   Rafael Aquini   mm: introduce put...
1163
  			putback_movable_pages(&cc->migratepages);
7ed695e06   Vlastimil Babka   mm: compaction: d...
1164
1165
1166
1167
1168
  			/*
  			 * migrate_pages() may return -ENOMEM when scanners meet
  			 * and we want compact_finished() to detect it
  			 */
  			if (err == -ENOMEM && cc->free_pfn > cc->migrate_pfn) {
4bf2bba37   David Rientjes   mm, thp: abort co...
1169
1170
1171
  				ret = COMPACT_PARTIAL;
  				goto out;
  			}
748446bb6   Mel Gorman   mm: compaction: m...
1172
  		}
fdaf7f5c4   Vlastimil Babka   mm, compaction: m...
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
  
  		/*
  		 * Record where we could have freed pages by migration and not
  		 * yet flushed them to buddy allocator. We use the pfn that
  		 * isolate_migratepages() started from in this loop iteration
  		 * - this is the lowest page that could have been isolated and
  		 * then freed by migration.
  		 */
  		if (!last_migrated_pfn)
  			last_migrated_pfn = isolate_start_pfn;
  
  check_drain:
  		/*
  		 * Has the migration scanner moved away from the previous
  		 * cc->order aligned block where we migrated from? If yes,
  		 * flush the pages that were freed, so that they can merge and
  		 * compact_finished() can detect immediately if allocation
  		 * would succeed.
  		 */
  		if (cc->order > 0 && last_migrated_pfn) {
  			int cpu;
  			unsigned long current_block_start =
  				cc->migrate_pfn & ~((1UL << cc->order) - 1);
  
  			if (last_migrated_pfn < current_block_start) {
  				cpu = get_cpu();
  				lru_add_drain_cpu(cpu);
  				drain_local_pages(zone);
  				put_cpu();
  				/* No more flushing until we migrate again */
  				last_migrated_pfn = 0;
  			}
  		}
748446bb6   Mel Gorman   mm: compaction: m...
1206
  	}
f9e35b3b4   Mel Gorman   mm: compaction: a...
1207
  out:
6bace090a   Vlastimil Babka   mm, compaction: a...
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
  	/*
  	 * Release free pages and update where the free scanner should restart,
  	 * so we don't leave any returned pages behind in the next attempt.
  	 */
  	if (cc->nr_freepages > 0) {
  		unsigned long free_pfn = release_freepages(&cc->freepages);
  
  		cc->nr_freepages = 0;
  		VM_BUG_ON(free_pfn == 0);
  		/* The cached pfn is always the first in a pageblock */
  		free_pfn &= ~(pageblock_nr_pages-1);
  		/*
  		 * Only go back, not forward. The cached pfn might have been
  		 * already reset to zone end in compact_finished()
  		 */
  		if (free_pfn > zone->compact_cached_free_pfn)
  			zone->compact_cached_free_pfn = free_pfn;
  	}
748446bb6   Mel Gorman   mm: compaction: m...
1226

0eb927c0a   Mel Gorman   mm: compaction: t...
1227
  	trace_mm_compaction_end(ret);
748446bb6   Mel Gorman   mm: compaction: m...
1228
1229
  	return ret;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
1230

e0b9daeb4   David Rientjes   mm, compaction: e...
1231
  static unsigned long compact_zone_order(struct zone *zone, int order,
ebff39801   Vlastimil Babka   mm, compaction: p...
1232
1233
  		gfp_t gfp_mask, enum migrate_mode mode, int *contended,
  		int alloc_flags, int classzone_idx)
56de7263f   Mel Gorman   mm: compaction: d...
1234
  {
e64c5237c   Shaohua Li   mm: compaction: a...
1235
  	unsigned long ret;
56de7263f   Mel Gorman   mm: compaction: d...
1236
1237
1238
1239
  	struct compact_control cc = {
  		.nr_freepages = 0,
  		.nr_migratepages = 0,
  		.order = order,
6d7ce5594   David Rientjes   mm, compaction: p...
1240
  		.gfp_mask = gfp_mask,
56de7263f   Mel Gorman   mm: compaction: d...
1241
  		.zone = zone,
e0b9daeb4   David Rientjes   mm, compaction: e...
1242
  		.mode = mode,
ebff39801   Vlastimil Babka   mm, compaction: p...
1243
1244
  		.alloc_flags = alloc_flags,
  		.classzone_idx = classzone_idx,
56de7263f   Mel Gorman   mm: compaction: d...
1245
1246
1247
  	};
  	INIT_LIST_HEAD(&cc.freepages);
  	INIT_LIST_HEAD(&cc.migratepages);
e64c5237c   Shaohua Li   mm: compaction: a...
1248
1249
1250
1251
1252
1253
1254
  	ret = compact_zone(zone, &cc);
  
  	VM_BUG_ON(!list_empty(&cc.freepages));
  	VM_BUG_ON(!list_empty(&cc.migratepages));
  
  	*contended = cc.contended;
  	return ret;
56de7263f   Mel Gorman   mm: compaction: d...
1255
  }
5e7719058   Mel Gorman   mm: compaction: a...
1256
  int sysctl_extfrag_threshold = 500;
56de7263f   Mel Gorman   mm: compaction: d...
1257
1258
1259
1260
1261
1262
  /**
   * try_to_compact_pages - Direct compact to satisfy a high-order allocation
   * @zonelist: The zonelist used for the current allocation
   * @order: The order of the current allocation
   * @gfp_mask: The GFP mask of the current allocation
   * @nodemask: The allowed nodes to allocate from
e0b9daeb4   David Rientjes   mm, compaction: e...
1263
   * @mode: The migration mode for async, sync light, or sync migration
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1264
1265
   * @contended: Return value that determines if compaction was aborted due to
   *	       need_resched() or lock contention
56de7263f   Mel Gorman   mm: compaction: d...
1266
1267
1268
1269
   *
   * This is the main entry point for direct page compaction.
   */
  unsigned long try_to_compact_pages(struct zonelist *zonelist,
77f1fe6b0   Mel Gorman   mm: migration: al...
1270
  			int order, gfp_t gfp_mask, nodemask_t *nodemask,
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1271
  			enum migrate_mode mode, int *contended,
97d47a65b   Vlastimil Babka   mm, compaction: s...
1272
  			int alloc_flags, int classzone_idx)
56de7263f   Mel Gorman   mm: compaction: d...
1273
1274
1275
1276
  {
  	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
  	int may_enter_fs = gfp_mask & __GFP_FS;
  	int may_perform_io = gfp_mask & __GFP_IO;
56de7263f   Mel Gorman   mm: compaction: d...
1277
1278
  	struct zoneref *z;
  	struct zone *zone;
53853e2d2   Vlastimil Babka   mm, compaction: d...
1279
  	int rc = COMPACT_DEFERRED;
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1280
1281
1282
  	int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
  
  	*contended = COMPACT_CONTENDED_NONE;
56de7263f   Mel Gorman   mm: compaction: d...
1283

4ffb6335d   Mel Gorman   mm: compaction: u...
1284
  	/* Check if the GFP flags allow compaction */
c5a73c3d5   Andrea Arcangeli   thp: use compacti...
1285
  	if (!order || !may_enter_fs || !may_perform_io)
53853e2d2   Vlastimil Babka   mm, compaction: d...
1286
  		return COMPACT_SKIPPED;
56de7263f   Mel Gorman   mm: compaction: d...
1287
1288
1289
1290
  
  	/* Compact each zone in the list */
  	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
  								nodemask) {
56de7263f   Mel Gorman   mm: compaction: d...
1291
  		int status;
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1292
  		int zone_contended;
56de7263f   Mel Gorman   mm: compaction: d...
1293

53853e2d2   Vlastimil Babka   mm, compaction: d...
1294
1295
  		if (compaction_deferred(zone, order))
  			continue;
e0b9daeb4   David Rientjes   mm, compaction: e...
1296
  		status = compact_zone_order(zone, order, gfp_mask, mode,
ebff39801   Vlastimil Babka   mm, compaction: p...
1297
  				&zone_contended, alloc_flags, classzone_idx);
56de7263f   Mel Gorman   mm: compaction: d...
1298
  		rc = max(status, rc);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1299
1300
1301
1302
1303
  		/*
  		 * It takes at least one zone that wasn't lock contended
  		 * to clear all_zones_contended.
  		 */
  		all_zones_contended &= zone_contended;
56de7263f   Mel Gorman   mm: compaction: d...
1304

3e7d34497   Mel Gorman   mm: vmscan: recla...
1305
  		/* If a normal allocation would succeed, stop compacting */
ebff39801   Vlastimil Babka   mm, compaction: p...
1306
1307
  		if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
  					classzone_idx, alloc_flags)) {
53853e2d2   Vlastimil Babka   mm, compaction: d...
1308
1309
1310
1311
1312
1313
1314
  			/*
  			 * We think the allocation will succeed in this zone,
  			 * but it is not certain, hence the false. The caller
  			 * will repeat this with true if allocation indeed
  			 * succeeds in this zone.
  			 */
  			compaction_defer_reset(zone, order, false);
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
  			/*
  			 * It is possible that async compaction aborted due to
  			 * need_resched() and the watermarks were ok thanks to
  			 * somebody else freeing memory. The allocation can
  			 * however still fail so we better signal the
  			 * need_resched() contention anyway (this will not
  			 * prevent the allocation attempt).
  			 */
  			if (zone_contended == COMPACT_CONTENDED_SCHED)
  				*contended = COMPACT_CONTENDED_SCHED;
  
  			goto break_loop;
  		}
f86697953   Vlastimil Babka   mm, compaction: d...
1328
  		if (mode != MIGRATE_ASYNC && status == COMPACT_COMPLETE) {
53853e2d2   Vlastimil Babka   mm, compaction: d...
1329
1330
1331
1332
1333
1334
1335
  			/*
  			 * We think that allocation won't succeed in this zone
  			 * so we defer compaction there. If it ends up
  			 * succeeding after all, it will be reset.
  			 */
  			defer_compaction(zone, order);
  		}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
  
  		/*
  		 * We might have stopped compacting due to need_resched() in
  		 * async compaction, or due to a fatal signal detected. In that
  		 * case do not try further zones and signal need_resched()
  		 * contention.
  		 */
  		if ((zone_contended == COMPACT_CONTENDED_SCHED)
  					|| fatal_signal_pending(current)) {
  			*contended = COMPACT_CONTENDED_SCHED;
  			goto break_loop;
  		}
  
  		continue;
  break_loop:
  		/*
  		 * We might not have tried all the zones, so  be conservative
  		 * and assume they are not all lock contended.
  		 */
  		all_zones_contended = 0;
  		break;
56de7263f   Mel Gorman   mm: compaction: d...
1357
  	}
1f9efdef4   Vlastimil Babka   mm, compaction: k...
1358
1359
1360
1361
1362
1363
  	/*
  	 * If at least one zone wasn't deferred or skipped, we report if all
  	 * zones that were tried were lock contended.
  	 */
  	if (rc > COMPACT_SKIPPED && all_zones_contended)
  		*contended = COMPACT_CONTENDED_LOCK;
56de7263f   Mel Gorman   mm: compaction: d...
1364
1365
  	return rc;
  }
76ab0f530   Mel Gorman   mm: compaction: a...
1366
  /* Compact all zones within a node */
7103f16db   Andrew Morton   mm: compaction: m...
1367
  static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
76ab0f530   Mel Gorman   mm: compaction: a...
1368
1369
  {
  	int zoneid;
76ab0f530   Mel Gorman   mm: compaction: a...
1370
  	struct zone *zone;
76ab0f530   Mel Gorman   mm: compaction: a...
1371
  	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
76ab0f530   Mel Gorman   mm: compaction: a...
1372
1373
1374
1375
  
  		zone = &pgdat->node_zones[zoneid];
  		if (!populated_zone(zone))
  			continue;
7be62de99   Rik van Riel   vmscan: kswapd ca...
1376
1377
1378
1379
1380
  		cc->nr_freepages = 0;
  		cc->nr_migratepages = 0;
  		cc->zone = zone;
  		INIT_LIST_HEAD(&cc->freepages);
  		INIT_LIST_HEAD(&cc->migratepages);
76ab0f530   Mel Gorman   mm: compaction: a...
1381

aad6ec377   Dan Carpenter   mm: compaction: m...
1382
  		if (cc->order == -1 || !compaction_deferred(zone, cc->order))
7be62de99   Rik van Riel   vmscan: kswapd ca...
1383
  			compact_zone(zone, cc);
76ab0f530   Mel Gorman   mm: compaction: a...
1384

aff622495   Rik van Riel   vmscan: only defe...
1385
  		if (cc->order > 0) {
de6c60a6c   Vlastimil Babka   mm: compaction: e...
1386
1387
1388
  			if (zone_watermark_ok(zone, cc->order,
  						low_wmark_pages(zone), 0, 0))
  				compaction_defer_reset(zone, cc->order, false);
aff622495   Rik van Riel   vmscan: only defe...
1389
  		}
7be62de99   Rik van Riel   vmscan: kswapd ca...
1390
1391
  		VM_BUG_ON(!list_empty(&cc->freepages));
  		VM_BUG_ON(!list_empty(&cc->migratepages));
76ab0f530   Mel Gorman   mm: compaction: a...
1392
  	}
76ab0f530   Mel Gorman   mm: compaction: a...
1393
  }
7103f16db   Andrew Morton   mm: compaction: m...
1394
  void compact_pgdat(pg_data_t *pgdat, int order)
7be62de99   Rik van Riel   vmscan: kswapd ca...
1395
1396
1397
  {
  	struct compact_control cc = {
  		.order = order,
e0b9daeb4   David Rientjes   mm, compaction: e...
1398
  		.mode = MIGRATE_ASYNC,
7be62de99   Rik van Riel   vmscan: kswapd ca...
1399
  	};
3a7200af3   Mel Gorman   mm: compaction: d...
1400
1401
  	if (!order)
  		return;
7103f16db   Andrew Morton   mm: compaction: m...
1402
  	__compact_pgdat(pgdat, &cc);
7be62de99   Rik van Riel   vmscan: kswapd ca...
1403
  }
7103f16db   Andrew Morton   mm: compaction: m...
1404
  static void compact_node(int nid)
7be62de99   Rik van Riel   vmscan: kswapd ca...
1405
  {
7be62de99   Rik van Riel   vmscan: kswapd ca...
1406
1407
  	struct compact_control cc = {
  		.order = -1,
e0b9daeb4   David Rientjes   mm, compaction: e...
1408
  		.mode = MIGRATE_SYNC,
91ca91864   David Rientjes   mm, compaction: i...
1409
  		.ignore_skip_hint = true,
7be62de99   Rik van Riel   vmscan: kswapd ca...
1410
  	};
7103f16db   Andrew Morton   mm: compaction: m...
1411
  	__compact_pgdat(NODE_DATA(nid), &cc);
7be62de99   Rik van Riel   vmscan: kswapd ca...
1412
  }
76ab0f530   Mel Gorman   mm: compaction: a...
1413
  /* Compact all nodes in the system */
7964c06d6   Jason Liu   mm: compaction: f...
1414
  static void compact_nodes(void)
76ab0f530   Mel Gorman   mm: compaction: a...
1415
1416
  {
  	int nid;
8575ec29f   Hugh Dickins   compact_pgdat: wo...
1417
1418
  	/* Flush pending updates to the LRU lists */
  	lru_add_drain_all();
76ab0f530   Mel Gorman   mm: compaction: a...
1419
1420
  	for_each_online_node(nid)
  		compact_node(nid);
76ab0f530   Mel Gorman   mm: compaction: a...
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
  }
  
  /* The written value is actually unused, all memory is compacted */
  int sysctl_compact_memory;
  
  /* This is the entry point for compacting all nodes via /proc/sys/vm */
  int sysctl_compaction_handler(struct ctl_table *table, int write,
  			void __user *buffer, size_t *length, loff_t *ppos)
  {
  	if (write)
7964c06d6   Jason Liu   mm: compaction: f...
1431
  		compact_nodes();
76ab0f530   Mel Gorman   mm: compaction: a...
1432
1433
1434
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1435

5e7719058   Mel Gorman   mm: compaction: a...
1436
1437
1438
1439
1440
1441
1442
  int sysctl_extfrag_handler(struct ctl_table *table, int write,
  			void __user *buffer, size_t *length, loff_t *ppos)
  {
  	proc_dointvec_minmax(table, write, buffer, length, ppos);
  
  	return 0;
  }
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1443
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
74e77fb9a   Rashika Kheria   mm/compaction.c: ...
1444
  static ssize_t sysfs_compact_node(struct device *dev,
10fbcf4c6   Kay Sievers   convert 'memory' ...
1445
  			struct device_attribute *attr,
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1446
1447
  			const char *buf, size_t count)
  {
8575ec29f   Hugh Dickins   compact_pgdat: wo...
1448
1449
1450
1451
1452
1453
1454
1455
  	int nid = dev->id;
  
  	if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
  		/* Flush pending updates to the LRU lists */
  		lru_add_drain_all();
  
  		compact_node(nid);
  	}
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1456
1457
1458
  
  	return count;
  }
10fbcf4c6   Kay Sievers   convert 'memory' ...
1459
  static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1460
1461
1462
  
  int compaction_register_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
1463
  	return device_create_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1464
1465
1466
1467
  }
  
  void compaction_unregister_node(struct node *node)
  {
10fbcf4c6   Kay Sievers   convert 'memory' ...
1468
  	return device_remove_file(&node->dev, &dev_attr_compact);
ed4a6d7f0   Mel Gorman   mm: compaction: a...
1469
1470
  }
  #endif /* CONFIG_SYSFS && CONFIG_NUMA */
ff9543fd3   Michal Nazarewicz   mm: compaction: e...
1471
1472
  
  #endif /* CONFIG_COMPACTION */