Blame view

mm/swap.c 20.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *  linux/mm/swap.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   */
  
  /*
183ff22bb   Simon Arlott   spelling fixes: mm/
8
   * This file contains the default values for the operation of the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
   * Linux VM subsystem. Fine-tuning documentation can be found in
   * Documentation/sysctl/vm.txt.
   * Started 18.12.91
   * Swap aging added 23.2.95, Stephen Tweedie.
   * Buffermem limits added 12.3.98, Rik van Riel.
   */
  
  #include <linux/mm.h>
  #include <linux/sched.h>
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #include <linux/mman.h>
  #include <linux/pagemap.h>
  #include <linux/pagevec.h>
  #include <linux/init.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
24
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <linux/mm_inline.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
29
  #include <linux/percpu_counter.h>
  #include <linux/percpu.h>
  #include <linux/cpu.h>
  #include <linux/notifier.h>
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
30
  #include <linux/backing-dev.h>
66e1707bc   Balbir Singh   Memory controller...
31
  #include <linux/memcontrol.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
32
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33

64d6519dd   Lee Schermerhorn   swap: cull unevic...
34
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35
36
  /* How many pages do we try to swap or page in/out together? */
  int page_cluster;
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
37
  static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
f84f9504b   Vegard Nossum   mm: remove initia...
38
  static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
315601809   Minchan Kim   mm: deactivate in...
39
  static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
40

b221385bc   Adrian Bunk   [PATCH] mm/: make...
41
42
43
44
  /*
   * This path almost never happens for VM activity - pages are normally
   * freed via pagevecs.  But it gets used by networking.
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
45
  static void __page_cache_release(struct page *page)
b221385bc   Adrian Bunk   [PATCH] mm/: make...
46
47
48
49
50
51
52
53
  {
  	if (PageLRU(page)) {
  		unsigned long flags;
  		struct zone *zone = page_zone(page);
  
  		spin_lock_irqsave(&zone->lru_lock, flags);
  		VM_BUG_ON(!PageLRU(page));
  		__ClearPageLRU(page);
1c1c53d43   Hugh Dickins   mm: remove del_pa...
54
  		del_page_from_lru_list(zone, page, page_off_lru(page));
b221385bc   Adrian Bunk   [PATCH] mm/: make...
55
56
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
  	}
918070634   Andrea Arcangeli   thp: alter compou...
57
58
59
60
61
  }
  
  static void __put_single_page(struct page *page)
  {
  	__page_cache_release(page);
fc91668ea   Li Hong   mm: remove free_h...
62
  	free_hot_cold_page(page, 0);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
63
  }
918070634   Andrea Arcangeli   thp: alter compou...
64
  static void __put_compound_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65
  {
918070634   Andrea Arcangeli   thp: alter compou...
66
  	compound_page_dtor *dtor;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67

918070634   Andrea Arcangeli   thp: alter compou...
68
69
70
71
72
73
74
75
76
  	__page_cache_release(page);
  	dtor = get_compound_page_dtor(page);
  	(*dtor)(page);
  }
  
  static void put_compound_page(struct page *page)
  {
  	if (unlikely(PageTail(page))) {
  		/* __split_huge_page_refcount can run under us */
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
77
78
79
80
  		struct page *page_head = compound_trans_head(page);
  
  		if (likely(page != page_head &&
  			   get_page_unless_zero(page_head))) {
918070634   Andrea Arcangeli   thp: alter compou...
81
82
  			unsigned long flags;
  			/*
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
83
84
85
86
  			 * page_head wasn't a dangling pointer but it
  			 * may not be a head page anymore by the time
  			 * we obtain the lock. That is ok as long as it
  			 * can't be freed from under us.
918070634   Andrea Arcangeli   thp: alter compou...
87
  			 */
918070634   Andrea Arcangeli   thp: alter compou...
88
89
90
91
92
  			flags = compound_lock_irqsave(page_head);
  			if (unlikely(!PageTail(page))) {
  				/* __split_huge_page_refcount run before us */
  				compound_unlock_irqrestore(page_head, flags);
  				VM_BUG_ON(PageHead(page_head));
918070634   Andrea Arcangeli   thp: alter compou...
93
94
95
96
97
98
99
100
101
102
  				if (put_page_testzero(page_head))
  					__put_single_page(page_head);
  			out_put_single:
  				if (put_page_testzero(page))
  					__put_single_page(page);
  				return;
  			}
  			VM_BUG_ON(page_head != page->first_page);
  			/*
  			 * We can release the refcount taken by
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
103
104
105
  			 * get_page_unless_zero() now that
  			 * __split_huge_page_refcount() is blocked on
  			 * the compound_lock.
918070634   Andrea Arcangeli   thp: alter compou...
106
107
108
109
  			 */
  			if (put_page_testzero(page_head))
  				VM_BUG_ON(1);
  			/* __split_huge_page_refcount will wait now */
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
110
111
  			VM_BUG_ON(page_mapcount(page) <= 0);
  			atomic_dec(&page->_mapcount);
918070634   Andrea Arcangeli   thp: alter compou...
112
  			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
113
  			VM_BUG_ON(atomic_read(&page->_count) != 0);
918070634   Andrea Arcangeli   thp: alter compou...
114
  			compound_unlock_irqrestore(page_head, flags);
a95a82e96   Andrea Arcangeli   thp: put_page: re...
115
116
117
118
119
120
  			if (put_page_testzero(page_head)) {
  				if (PageHead(page_head))
  					__put_compound_page(page_head);
  				else
  					__put_single_page(page_head);
  			}
918070634   Andrea Arcangeli   thp: alter compou...
121
122
123
124
125
126
127
128
129
130
  		} else {
  			/* page_head is a dangling pointer */
  			VM_BUG_ON(PageTail(page));
  			goto out_put_single;
  		}
  	} else if (put_page_testzero(page)) {
  		if (PageHead(page))
  			__put_compound_page(page);
  		else
  			__put_single_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
  	}
8519fb30e   Nick Piggin   [PATCH] mm: compo...
132
133
134
135
136
137
138
  }
  
  void put_page(struct page *page)
  {
  	if (unlikely(PageCompound(page)))
  		put_compound_page(page);
  	else if (put_page_testzero(page))
918070634   Andrea Arcangeli   thp: alter compou...
139
  		__put_single_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
141
  }
  EXPORT_SYMBOL(put_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142

70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
  /*
   * This function is exported but must not be called by anything other
   * than get_page(). It implements the slow path of get_page().
   */
  bool __get_page_tail(struct page *page)
  {
  	/*
  	 * This takes care of get_page() if run on a tail page
  	 * returned by one of the get_user_pages/follow_page variants.
  	 * get_user_pages/follow_page itself doesn't need the compound
  	 * lock because it runs __get_page_tail_foll() under the
  	 * proper PT lock that already serializes against
  	 * split_huge_page().
  	 */
  	unsigned long flags;
  	bool got = false;
  	struct page *page_head = compound_trans_head(page);
  
  	if (likely(page != page_head && get_page_unless_zero(page_head))) {
  		/*
  		 * page_head wasn't a dangling pointer but it
  		 * may not be a head page anymore by the time
  		 * we obtain the lock. That is ok as long as it
  		 * can't be freed from under us.
  		 */
  		flags = compound_lock_irqsave(page_head);
  		/* here __split_huge_page_refcount won't run anymore */
  		if (likely(PageTail(page))) {
  			__get_page_tail_foll(page, false);
  			got = true;
  		}
  		compound_unlock_irqrestore(page_head, flags);
  		if (unlikely(!got))
  			put_page(page_head);
  	}
  	return got;
  }
  EXPORT_SYMBOL(__get_page_tail);
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
181
  /**
7682486b3   Randy Dunlap   mm: fix various k...
182
183
   * put_pages_list() - release a list of pages
   * @pages: list of pages threaded on page->lru
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
184
185
186
   *
   * Release a list of pages which are strung together on page.lru.  Currently
   * used by read_cache_pages() and related error recovery code.
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
187
188
189
190
191
192
193
194
195
196
197
198
   */
  void put_pages_list(struct list_head *pages)
  {
  	while (!list_empty(pages)) {
  		struct page *victim;
  
  		victim = list_entry(pages->prev, struct page, lru);
  		list_del(&victim->lru);
  		page_cache_release(victim);
  	}
  }
  EXPORT_SYMBOL(put_pages_list);
3dd7ae8ec   Shaohua Li   mm: simplify code...
199
200
201
  static void pagevec_lru_move_fn(struct pagevec *pvec,
  				void (*move_fn)(struct page *page, void *arg),
  				void *arg)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
202
203
  {
  	int i;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
204
  	struct zone *zone = NULL;
3dd7ae8ec   Shaohua Li   mm: simplify code...
205
  	unsigned long flags = 0;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
206
207
208
209
210
211
212
  
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  		struct zone *pagezone = page_zone(page);
  
  		if (pagezone != zone) {
  			if (zone)
3dd7ae8ec   Shaohua Li   mm: simplify code...
213
  				spin_unlock_irqrestore(&zone->lru_lock, flags);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
214
  			zone = pagezone;
3dd7ae8ec   Shaohua Li   mm: simplify code...
215
  			spin_lock_irqsave(&zone->lru_lock, flags);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
216
  		}
3dd7ae8ec   Shaohua Li   mm: simplify code...
217
218
  
  		(*move_fn)(page, arg);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
219
220
  	}
  	if (zone)
3dd7ae8ec   Shaohua Li   mm: simplify code...
221
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
83896fb5e   Linus Torvalds   Revert "mm: simpl...
222
223
  	release_pages(pvec->pages, pvec->nr, pvec->cold);
  	pagevec_reinit(pvec);
d8505dee1   Shaohua Li   mm: simplify code...
224
  }
3dd7ae8ec   Shaohua Li   mm: simplify code...
225
226
227
  static void pagevec_move_tail_fn(struct page *page, void *arg)
  {
  	int *pgmoved = arg;
3dd7ae8ec   Shaohua Li   mm: simplify code...
228
229
230
  
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  		enum lru_list lru = page_lru_base_type(page);
925b7673c   Johannes Weiner   mm: make per-memc...
231
232
233
234
235
  		struct lruvec *lruvec;
  
  		lruvec = mem_cgroup_lru_move_lists(page_zone(page),
  						   page, lru, lru);
  		list_move_tail(&page->lru, &lruvec->lists[lru]);
3dd7ae8ec   Shaohua Li   mm: simplify code...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  		(*pgmoved)++;
  	}
  }
  
  /*
   * pagevec_move_tail() must be called with IRQ disabled.
   * Otherwise this may cause nasty races.
   */
  static void pagevec_move_tail(struct pagevec *pvec)
  {
  	int pgmoved = 0;
  
  	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
  	__count_vm_events(PGROTATED, pgmoved);
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
251
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
253
   * Writeback is about to end against a page which has been marked for immediate
   * reclaim.  If it still appears to be reclaimable, move it to the tail of the
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
254
   * inactive list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255
   */
3dd7ae8ec   Shaohua Li   mm: simplify code...
256
  void rotate_reclaimable_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
  {
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
258
  	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
894bc3104   Lee Schermerhorn   Unevictable LRU I...
259
  	    !PageUnevictable(page) && PageLRU(page)) {
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
260
261
262
263
264
265
266
267
268
269
  		struct pagevec *pvec;
  		unsigned long flags;
  
  		page_cache_get(page);
  		local_irq_save(flags);
  		pvec = &__get_cpu_var(lru_rotate_pvecs);
  		if (!pagevec_add(pvec, page))
  			pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270
  }
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
  static void update_page_reclaim_stat(struct zone *zone, struct page *page,
  				     int file, int rotated)
  {
  	struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
  	struct zone_reclaim_stat *memcg_reclaim_stat;
  
  	memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
  
  	reclaim_stat->recent_scanned[file]++;
  	if (rotated)
  		reclaim_stat->recent_rotated[file]++;
  
  	if (!memcg_reclaim_stat)
  		return;
  
  	memcg_reclaim_stat->recent_scanned[file]++;
  	if (rotated)
  		memcg_reclaim_stat->recent_rotated[file]++;
  }
eb709b0d0   Shaohua Li   mm: batch activat...
290
  static void __activate_page(struct page *page, void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
292
  {
  	struct zone *zone = page_zone(page);
744ed1442   Shaohua Li   mm: batch activat...
293

744ed1442   Shaohua Li   mm: batch activat...
294
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
7a608572a   Linus Torvalds   Revert "mm: batch...
295
296
297
  		int file = page_is_file_cache(page);
  		int lru = page_lru_base_type(page);
  		del_page_from_lru_list(zone, page, lru);
744ed1442   Shaohua Li   mm: batch activat...
298

7a608572a   Linus Torvalds   Revert "mm: batch...
299
300
301
302
  		SetPageActive(page);
  		lru += LRU_ACTIVE;
  		add_page_to_lru_list(zone, page, lru);
  		__count_vm_event(PGACTIVATE);
4f98a2fee   Rik van Riel   vmscan: split LRU...
303

7a608572a   Linus Torvalds   Revert "mm: batch...
304
  		update_page_reclaim_stat(zone, page, file, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
  	}
eb709b0d0   Shaohua Li   mm: batch activat...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
  }
  
  #ifdef CONFIG_SMP
  static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  
  static void activate_page_drain(int cpu)
  {
  	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
  
  	if (pagevec_count(pvec))
  		pagevec_lru_move_fn(pvec, __activate_page, NULL);
  }
  
  void activate_page(struct page *page)
  {
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  
  		page_cache_get(page);
  		if (!pagevec_add(pvec, page))
  			pagevec_lru_move_fn(pvec, __activate_page, NULL);
  		put_cpu_var(activate_page_pvecs);
  	}
  }
  
  #else
  static inline void activate_page_drain(int cpu)
  {
  }
  
  void activate_page(struct page *page)
  {
  	struct zone *zone = page_zone(page);
  
  	spin_lock_irq(&zone->lru_lock);
  	__activate_page(page, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
343
  	spin_unlock_irq(&zone->lru_lock);
  }
eb709b0d0   Shaohua Li   mm: batch activat...
344
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
345
346
347
348
349
350
351
352
  
  /*
   * Mark a page as having seen activity.
   *
   * inactive,unreferenced	->	inactive,referenced
   * inactive,referenced		->	active,unreferenced
   * active,unreferenced		->	active,referenced
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
353
  void mark_page_accessed(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
  {
894bc3104   Lee Schermerhorn   Unevictable LRU I...
355
356
  	if (!PageActive(page) && !PageUnevictable(page) &&
  			PageReferenced(page) && PageLRU(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
357
358
359
360
361
362
  		activate_page(page);
  		ClearPageReferenced(page);
  	} else if (!PageReferenced(page)) {
  		SetPageReferenced(page);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
  EXPORT_SYMBOL(mark_page_accessed);
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
364
  void __lru_cache_add(struct page *page, enum lru_list lru)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
365
  {
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
366
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
367
368
369
  
  	page_cache_get(page);
  	if (!pagevec_add(pvec, page))
5095ae837   Hugh Dickins   mm: fewer undersc...
370
  		__pagevec_lru_add(pvec, lru);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
  	put_cpu_var(lru_add_pvecs);
  }
47846b065   Miklos Szeredi   mm: export lru_ca...
373
  EXPORT_SYMBOL(__lru_cache_add);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374

f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
375
376
377
378
379
380
  /**
   * lru_cache_add_lru - add a page to a page list
   * @page: the page to be added to the LRU.
   * @lru: the LRU list to which the page is added.
   */
  void lru_cache_add_lru(struct page *page, enum lru_list lru)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
381
  {
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
382
  	if (PageActive(page)) {
894bc3104   Lee Schermerhorn   Unevictable LRU I...
383
  		VM_BUG_ON(PageUnevictable(page));
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
384
  		ClearPageActive(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
385
386
387
  	} else if (PageUnevictable(page)) {
  		VM_BUG_ON(PageActive(page));
  		ClearPageUnevictable(page);
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
388
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389

894bc3104   Lee Schermerhorn   Unevictable LRU I...
390
  	VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
391
  	__lru_cache_add(page, lru);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
392
  }
894bc3104   Lee Schermerhorn   Unevictable LRU I...
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
  /**
   * add_page_to_unevictable_list - add a page to the unevictable list
   * @page:  the page to be added to the unevictable list
   *
   * Add page directly to its zone's unevictable list.  To avoid races with
   * tasks that might be making the page evictable, through eg. munlock,
   * munmap or exit, while it's not on the lru, we want to add the page
   * while it's locked or otherwise "invisible" to other tasks.  This is
   * difficult to do when using the pagevec cache, so bypass that.
   */
  void add_page_to_unevictable_list(struct page *page)
  {
  	struct zone *zone = page_zone(page);
  
  	spin_lock_irq(&zone->lru_lock);
  	SetPageUnevictable(page);
  	SetPageLRU(page);
  	add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
  	spin_unlock_irq(&zone->lru_lock);
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
413
  /*
315601809   Minchan Kim   mm: deactivate in...
414
415
416
417
418
   * If the page can not be invalidated, it is moved to the
   * inactive list to speed up its reclaim.  It is moved to the
   * head of the list, rather than the tail, to give the flusher
   * threads some time to write it out, as this is much more
   * effective than the single-page writeout from reclaim.
278df9f45   Minchan Kim   mm: reclaim inval...
419
420
421
422
423
424
425
426
427
428
429
430
431
432
   *
   * If the page isn't page_mapped and dirty/writeback, the page
   * could reclaim asap using PG_reclaim.
   *
   * 1. active, mapped page -> none
   * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
   * 3. inactive, mapped page -> none
   * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
   * 5. inactive, clean -> inactive, tail
   * 6. Others -> none
   *
   * In 4, why it moves inactive's head, the VM expects the page would
   * be write it out by flusher threads as this is much more effective
   * than the single-page writeout from reclaim.
315601809   Minchan Kim   mm: deactivate in...
433
   */
3dd7ae8ec   Shaohua Li   mm: simplify code...
434
  static void lru_deactivate_fn(struct page *page, void *arg)
315601809   Minchan Kim   mm: deactivate in...
435
436
  {
  	int lru, file;
278df9f45   Minchan Kim   mm: reclaim inval...
437
  	bool active;
3dd7ae8ec   Shaohua Li   mm: simplify code...
438
  	struct zone *zone = page_zone(page);
315601809   Minchan Kim   mm: deactivate in...
439

278df9f45   Minchan Kim   mm: reclaim inval...
440
  	if (!PageLRU(page))
315601809   Minchan Kim   mm: deactivate in...
441
  		return;
bad49d9c8   Minchan Kim   mm: check PageUne...
442
443
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
444
445
446
  	/* Some processes are using the page */
  	if (page_mapped(page))
  		return;
278df9f45   Minchan Kim   mm: reclaim inval...
447
  	active = PageActive(page);
315601809   Minchan Kim   mm: deactivate in...
448
449
  	file = page_is_file_cache(page);
  	lru = page_lru_base_type(page);
278df9f45   Minchan Kim   mm: reclaim inval...
450
  	del_page_from_lru_list(zone, page, lru + active);
315601809   Minchan Kim   mm: deactivate in...
451
452
453
  	ClearPageActive(page);
  	ClearPageReferenced(page);
  	add_page_to_lru_list(zone, page, lru);
315601809   Minchan Kim   mm: deactivate in...
454

278df9f45   Minchan Kim   mm: reclaim inval...
455
456
457
458
459
460
461
462
  	if (PageWriteback(page) || PageDirty(page)) {
  		/*
  		 * PG_reclaim could be raced with end_page_writeback
  		 * It can make readahead confusing.  But race window
  		 * is _really_ small and  it's non-critical problem.
  		 */
  		SetPageReclaim(page);
  	} else {
925b7673c   Johannes Weiner   mm: make per-memc...
463
  		struct lruvec *lruvec;
278df9f45   Minchan Kim   mm: reclaim inval...
464
465
466
467
  		/*
  		 * The page's writeback ends up during pagevec
  		 * We moves tha page into tail of inactive.
  		 */
925b7673c   Johannes Weiner   mm: make per-memc...
468
469
  		lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru);
  		list_move_tail(&page->lru, &lruvec->lists[lru]);
278df9f45   Minchan Kim   mm: reclaim inval...
470
471
472
473
474
  		__count_vm_event(PGROTATED);
  	}
  
  	if (active)
  		__count_vm_event(PGDEACTIVATE);
315601809   Minchan Kim   mm: deactivate in...
475
476
  	update_page_reclaim_stat(zone, page, file, 0);
  }
315601809   Minchan Kim   mm: deactivate in...
477
  /*
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
478
479
480
481
482
   * Drain pages out of the cpu's pagevecs.
   * Either "cpu" is the current CPU, and preemption has already been
   * disabled; or "cpu" is being hot-unplugged, and is already dead.
   */
  static void drain_cpu_pagevecs(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
483
  {
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
484
  	struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
485
  	struct pagevec *pvec;
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
486
  	int lru;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
487

f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
488
489
490
  	for_each_lru(lru) {
  		pvec = &pvecs[lru - LRU_BASE];
  		if (pagevec_count(pvec))
5095ae837   Hugh Dickins   mm: fewer undersc...
491
  			__pagevec_lru_add(pvec, lru);
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
492
  	}
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
493
494
495
496
497
498
499
500
501
502
  
  	pvec = &per_cpu(lru_rotate_pvecs, cpu);
  	if (pagevec_count(pvec)) {
  		unsigned long flags;
  
  		/* No harm done if a racing interrupt already did this */
  		local_irq_save(flags);
  		pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
315601809   Minchan Kim   mm: deactivate in...
503
504
505
  
  	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
  	if (pagevec_count(pvec))
3dd7ae8ec   Shaohua Li   mm: simplify code...
506
  		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
eb709b0d0   Shaohua Li   mm: batch activat...
507
508
  
  	activate_page_drain(cpu);
315601809   Minchan Kim   mm: deactivate in...
509
510
511
512
513
514
515
516
517
518
519
520
  }
  
  /**
   * deactivate_page - forcefully deactivate a page
   * @page: page to deactivate
   *
   * This function hints the VM that @page is a good reclaim candidate,
   * for example if its invalidation fails due to the page being dirty
   * or under writeback.
   */
  void deactivate_page(struct page *page)
  {
821ed6bbe   Minchan Kim   mm: filter unevic...
521
522
523
524
525
526
  	/*
  	 * In a workload with many unevictable page such as mprotect, unevictable
  	 * page deactivation for accelerating reclaim is pointless.
  	 */
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
527
528
529
530
  	if (likely(get_page_unless_zero(page))) {
  		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  
  		if (!pagevec_add(pvec, page))
3dd7ae8ec   Shaohua Li   mm: simplify code...
531
  			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
315601809   Minchan Kim   mm: deactivate in...
532
533
  		put_cpu_var(lru_deactivate_pvecs);
  	}
80bfed904   Andrew Morton   [PATCH] consolida...
534
535
536
537
  }
  
  void lru_add_drain(void)
  {
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
538
  	drain_cpu_pagevecs(get_cpu());
80bfed904   Andrew Morton   [PATCH] consolida...
539
  	put_cpu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
540
  }
c4028958b   David Howells   WorkStruct: make ...
541
  static void lru_add_drain_per_cpu(struct work_struct *dummy)
053837fce   Nick Piggin   [PATCH] mm: migra...
542
543
544
545
546
547
548
549
550
  {
  	lru_add_drain();
  }
  
  /*
   * Returns 0 for success
   */
  int lru_add_drain_all(void)
  {
c4028958b   David Howells   WorkStruct: make ...
551
  	return schedule_on_each_cpu(lru_add_drain_per_cpu);
053837fce   Nick Piggin   [PATCH] mm: migra...
552
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554
555
556
557
558
559
560
   * Batched page_cache_release().  Decrement the reference count on all the
   * passed pages.  If it fell to zero then remove the page from the LRU and
   * free it.
   *
   * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
   * for the remainder of the operation.
   *
ab33dc09a   Fernando Luis Vazquez Cao   swap: update func...
561
562
563
564
   * The locking in this function is against shrink_inactive_list(): we recheck
   * the page count inside the lock to see whether shrink_inactive_list()
   * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
   * will free it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
566
567
568
   */
  void release_pages(struct page **pages, int nr, int cold)
  {
  	int i;
cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
569
  	LIST_HEAD(pages_to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
  	struct zone *zone = NULL;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
571
  	unsigned long uninitialized_var(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
572

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
574
  	for (i = 0; i < nr; i++) {
  		struct page *page = pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575

8519fb30e   Nick Piggin   [PATCH] mm: compo...
576
577
  		if (unlikely(PageCompound(page))) {
  			if (zone) {
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
578
  				spin_unlock_irqrestore(&zone->lru_lock, flags);
8519fb30e   Nick Piggin   [PATCH] mm: compo...
579
580
581
582
583
  				zone = NULL;
  			}
  			put_compound_page(page);
  			continue;
  		}
b5810039a   Nick Piggin   [PATCH] core remo...
584
  		if (!put_page_testzero(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
  			continue;
46453a6e1   Nick Piggin   [PATCH] mm: never...
586
587
  		if (PageLRU(page)) {
  			struct zone *pagezone = page_zone(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
588

46453a6e1   Nick Piggin   [PATCH] mm: never...
589
590
  			if (pagezone != zone) {
  				if (zone)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
591
592
  					spin_unlock_irqrestore(&zone->lru_lock,
  									flags);
46453a6e1   Nick Piggin   [PATCH] mm: never...
593
  				zone = pagezone;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
594
  				spin_lock_irqsave(&zone->lru_lock, flags);
46453a6e1   Nick Piggin   [PATCH] mm: never...
595
  			}
725d704ec   Nick Piggin   [PATCH] mm: VM_BU...
596
  			VM_BUG_ON(!PageLRU(page));
674539115   Nick Piggin   [PATCH] mm: less ...
597
  			__ClearPageLRU(page);
1c1c53d43   Hugh Dickins   mm: remove del_pa...
598
  			del_page_from_lru_list(zone, page, page_off_lru(page));
46453a6e1   Nick Piggin   [PATCH] mm: never...
599
  		}
cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
600
  		list_add(&page->lru, &pages_to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
602
  	}
  	if (zone)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
603
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
604

cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
605
  	free_hot_cold_page_list(&pages_to_free, cold);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
606
  }
0be8557bc   Miklos Szeredi   fuse: use release...
607
  EXPORT_SYMBOL(release_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
  
  /*
   * The pages which we're about to release may be in the deferred lru-addition
   * queues.  That would prevent them from really being freed right now.  That's
   * OK from a correctness point of view but is inefficient - those pages may be
   * cache-warm and we want to give them back to the page allocator ASAP.
   *
   * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
   * and __pagevec_lru_add_active() call release_pages() directly to avoid
   * mutual recursion.
   */
  void __pagevec_release(struct pagevec *pvec)
  {
  	lru_add_drain();
  	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
  	pagevec_reinit(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
625
  EXPORT_SYMBOL(__pagevec_release);
12d271078   Hugh Dickins   memcg: fix split_...
626
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
71e3aac07   Andrea Arcangeli   thp: transparent ...
627
628
629
630
631
632
633
  /* used by __split_huge_page_refcount() */
  void lru_add_page_tail(struct zone* zone,
  		       struct page *page, struct page *page_tail)
  {
  	int active;
  	enum lru_list lru;
  	const int file = 0;
71e3aac07   Andrea Arcangeli   thp: transparent ...
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
  
  	VM_BUG_ON(!PageHead(page));
  	VM_BUG_ON(PageCompound(page_tail));
  	VM_BUG_ON(PageLRU(page_tail));
  	VM_BUG_ON(!spin_is_locked(&zone->lru_lock));
  
  	SetPageLRU(page_tail);
  
  	if (page_evictable(page_tail, NULL)) {
  		if (PageActive(page)) {
  			SetPageActive(page_tail);
  			active = 1;
  			lru = LRU_ACTIVE_ANON;
  		} else {
  			active = 0;
  			lru = LRU_INACTIVE_ANON;
  		}
  		update_page_reclaim_stat(zone, page_tail, file, active);
71e3aac07   Andrea Arcangeli   thp: transparent ...
652
653
  	} else {
  		SetPageUnevictable(page_tail);
12d271078   Hugh Dickins   memcg: fix split_...
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
  		lru = LRU_UNEVICTABLE;
  	}
  
  	if (likely(PageLRU(page)))
  		list_add_tail(&page_tail->lru, &page->lru);
  	else {
  		struct list_head *list_head;
  		/*
  		 * Head page has not yet been counted, as an hpage,
  		 * so we must account for each subpage individually.
  		 *
  		 * Use the standard add function to put page_tail on the list,
  		 * but then correct its position so they all end up in order.
  		 */
  		add_page_to_lru_list(zone, page_tail, lru);
  		list_head = page_tail->lru.prev;
  		list_move_tail(&page_tail->lru, list_head);
71e3aac07   Andrea Arcangeli   thp: transparent ...
671
672
  	}
  }
12d271078   Hugh Dickins   memcg: fix split_...
673
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
71e3aac07   Andrea Arcangeli   thp: transparent ...
674

5095ae837   Hugh Dickins   mm: fewer undersc...
675
  static void __pagevec_lru_add_fn(struct page *page, void *arg)
3dd7ae8ec   Shaohua Li   mm: simplify code...
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
  {
  	enum lru_list lru = (enum lru_list)arg;
  	struct zone *zone = page_zone(page);
  	int file = is_file_lru(lru);
  	int active = is_active_lru(lru);
  
  	VM_BUG_ON(PageActive(page));
  	VM_BUG_ON(PageUnevictable(page));
  	VM_BUG_ON(PageLRU(page));
  
  	SetPageLRU(page);
  	if (active)
  		SetPageActive(page);
  	update_page_reclaim_stat(zone, page, file, active);
  	add_page_to_lru_list(zone, page, lru);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
694
695
   * Add the passed pages to the LRU, then drop the caller's refcount
   * on them.  Reinitialises the caller's pagevec.
   */
5095ae837   Hugh Dickins   mm: fewer undersc...
696
  void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
  {
894bc3104   Lee Schermerhorn   Unevictable LRU I...
698
  	VM_BUG_ON(is_unevictable_lru(lru));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
699

5095ae837   Hugh Dickins   mm: fewer undersc...
700
  	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
701
  }
5095ae837   Hugh Dickins   mm: fewer undersc...
702
  EXPORT_SYMBOL(__pagevec_lru_add);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
  /**
   * pagevec_lookup - gang pagecache lookup
   * @pvec:	Where the resulting pages are placed
   * @mapping:	The address_space to search
   * @start:	The starting page index
   * @nr_pages:	The maximum number of pages
   *
   * pagevec_lookup() will search for and return a group of up to @nr_pages pages
   * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
   * reference against the pages in @pvec.
   *
   * The search returns a group of mapping-contiguous pages with ascending
   * indexes.  There may be holes in the indices due to not-present pages.
   *
   * pagevec_lookup() returns the number of pages which were found.
   */
  unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t start, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
78539fdfa   Christoph Hellwig   [XFS] Export page...
726
  EXPORT_SYMBOL(pagevec_lookup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
728
729
730
731
732
733
  unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t *index, int tag, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages_tag(mapping, index, tag,
  					nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
734
  EXPORT_SYMBOL(pagevec_lookup_tag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
735

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
736
737
738
739
740
  /*
   * Perform any setup for the swap system
   */
  void __init swap_setup(void)
  {
4481374ce   Jan Beulich   mm: replace vario...
741
  	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
742

e0bf68dde   Peter Zijlstra   mm: bdi init hooks
743
744
745
  #ifdef CONFIG_SWAP
  	bdi_init(swapper_space.backing_dev_info);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
746
747
748
749
750
751
752
753
754
  	/* Use a smaller cluster for small-memory machines */
  	if (megs < 16)
  		page_cluster = 2;
  	else
  		page_cluster = 3;
  	/*
  	 * Right now other parts of the system means that we
  	 * _really_ don't want to cluster much more
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
755
  }