Blame view

mm/swap.c 29.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *  linux/mm/swap.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   */
  
  /*
183ff22bb   Simon Arlott   spelling fixes: mm/
8
   * This file contains the default values for the operation of the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
   * Linux VM subsystem. Fine-tuning documentation can be found in
   * Documentation/sysctl/vm.txt.
   * Started 18.12.91
   * Swap aging added 23.2.95, Stephen Tweedie.
   * Buffermem limits added 12.3.98, Rik van Riel.
   */
  
  #include <linux/mm.h>
  #include <linux/sched.h>
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #include <linux/mman.h>
  #include <linux/pagemap.h>
  #include <linux/pagevec.h>
  #include <linux/init.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
24
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <linux/mm_inline.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
29
  #include <linux/percpu_counter.h>
  #include <linux/percpu.h>
  #include <linux/cpu.h>
  #include <linux/notifier.h>
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
30
  #include <linux/backing-dev.h>
66e1707bc   Balbir Singh   Memory controller...
31
  #include <linux/memcontrol.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
32
  #include <linux/gfp.h>
a27bb332c   Kent Overstreet   aio: don't includ...
33
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34

64d6519dd   Lee Schermerhorn   swap: cull unevic...
35
  #include "internal.h"
c6286c983   Mel Gorman   mm: add tracepoin...
36
37
  #define CREATE_TRACE_POINTS
  #include <trace/events/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38
39
  /* How many pages do we try to swap or page in/out together? */
  int page_cluster;
13f7f7898   Mel Gorman   mm: pagevec: defe...
40
  static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
f84f9504b   Vegard Nossum   mm: remove initia...
41
  static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
315601809   Minchan Kim   mm: deactivate in...
42
  static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
43

b221385bc   Adrian Bunk   [PATCH] mm/: make...
44
45
46
47
  /*
   * This path almost never happens for VM activity - pages are normally
   * freed via pagevecs.  But it gets used by networking.
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
48
  static void __page_cache_release(struct page *page)
b221385bc   Adrian Bunk   [PATCH] mm/: make...
49
50
  {
  	if (PageLRU(page)) {
b221385bc   Adrian Bunk   [PATCH] mm/: make...
51
  		struct zone *zone = page_zone(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
52
53
  		struct lruvec *lruvec;
  		unsigned long flags;
b221385bc   Adrian Bunk   [PATCH] mm/: make...
54
55
  
  		spin_lock_irqsave(&zone->lru_lock, flags);
fa9add641   Hugh Dickins   mm/memcg: apply a...
56
  		lruvec = mem_cgroup_page_lruvec(page, zone);
309381fea   Sasha Levin   mm: dump page whe...
57
  		VM_BUG_ON_PAGE(!PageLRU(page), page);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
58
  		__ClearPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
59
  		del_page_from_lru_list(page, lruvec, page_off_lru(page));
b221385bc   Adrian Bunk   [PATCH] mm/: make...
60
61
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
  	}
918070634   Andrea Arcangeli   thp: alter compou...
62
63
64
65
66
  }
  
  static void __put_single_page(struct page *page)
  {
  	__page_cache_release(page);
fc91668ea   Li Hong   mm: remove free_h...
67
  	free_hot_cold_page(page, 0);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
68
  }
918070634   Andrea Arcangeli   thp: alter compou...
69
  static void __put_compound_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
  {
918070634   Andrea Arcangeli   thp: alter compou...
71
  	compound_page_dtor *dtor;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72

918070634   Andrea Arcangeli   thp: alter compou...
73
74
75
76
77
78
79
  	__page_cache_release(page);
  	dtor = get_compound_page_dtor(page);
  	(*dtor)(page);
  }
  
  static void put_compound_page(struct page *page)
  {
26296ad2d   Andrew Morton   mm/swap.c: reorga...
80
  	struct page *page_head;
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
81

26296ad2d   Andrew Morton   mm/swap.c: reorga...
82
83
  	if (likely(!PageTail(page))) {
  		if (put_page_testzero(page)) {
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
84
  			/*
26296ad2d   Andrew Morton   mm/swap.c: reorga...
85
86
  			 * By the time all refcounts have been released
  			 * split_huge_page cannot run anymore from under us.
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
87
  			 */
26296ad2d   Andrew Morton   mm/swap.c: reorga...
88
89
90
91
  			if (PageHead(page))
  				__put_compound_page(page);
  			else
  				__put_single_page(page);
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
92
  		}
26296ad2d   Andrew Morton   mm/swap.c: reorga...
93
94
  		return;
  	}
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
95

26296ad2d   Andrew Morton   mm/swap.c: reorga...
96
  	/* __split_huge_page_refcount can run under us */
668f9abbd   David Rientjes   mm: close PageTai...
97
  	page_head = compound_head(page);
5bf5f03c2   Pravin B Shelar   mm: fix slab->pag...
98

26296ad2d   Andrew Morton   mm/swap.c: reorga...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  	/*
  	 * THP can not break up slab pages so avoid taking
  	 * compound_lock() and skip the tail page refcounting (in
  	 * _mapcount) too. Slab performs non-atomic bit ops on
  	 * page->flags for better performance. In particular
  	 * slab_unlock() in slub used to be a hot path. It is still
  	 * hot on arches that do not support
  	 * this_cpu_cmpxchg_double().
  	 *
  	 * If "page" is part of a slab or hugetlbfs page it cannot be
  	 * splitted and the head page cannot change from under us. And
  	 * if "page" is part of a THP page under splitting, if the
  	 * head page pointed by the THP tail isn't a THP head anymore,
  	 * we'll find PageTail clear after smp_rmb() and we'll treat
  	 * it as a single page.
  	 */
  	if (!__compound_tail_refcounted(page_head)) {
  		/*
  		 * If "page" is a THP tail, we must read the tail page
  		 * flags after the head page flags. The
  		 * split_huge_page side enforces write memory barriers
  		 * between clearing PageTail and before the head page
  		 * can be freed and reallocated.
  		 */
  		smp_rmb();
  		if (likely(PageTail(page))) {
5bf5f03c2   Pravin B Shelar   mm: fix slab->pag...
125
  			/*
26296ad2d   Andrew Morton   mm/swap.c: reorga...
126
127
  			 * __split_huge_page_refcount cannot race
  			 * here.
918070634   Andrea Arcangeli   thp: alter compou...
128
  			 */
309381fea   Sasha Levin   mm: dump page whe...
129
130
  			VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
  			VM_BUG_ON_PAGE(page_mapcount(page) != 0, page);
26296ad2d   Andrew Morton   mm/swap.c: reorga...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  			if (put_page_testzero(page_head)) {
  				/*
  				 * If this is the tail of a slab
  				 * compound page, the tail pin must
  				 * not be the last reference held on
  				 * the page, because the PG_slab
  				 * cannot be cleared before all tail
  				 * pins (which skips the _mapcount
  				 * tail refcounting) have been
  				 * released. For hugetlbfs the tail
  				 * pin may be the last reference on
  				 * the page instead, because
  				 * PageHeadHuge will not go away until
  				 * the compound page enters the buddy
  				 * allocator.
  				 */
309381fea   Sasha Levin   mm: dump page whe...
147
  				VM_BUG_ON_PAGE(PageSlab(page_head), page_head);
26296ad2d   Andrew Morton   mm/swap.c: reorga...
148
  				__put_compound_page(page_head);
918070634   Andrea Arcangeli   thp: alter compou...
149
  			}
26296ad2d   Andrew Morton   mm/swap.c: reorga...
150
151
  			return;
  		} else
918070634   Andrea Arcangeli   thp: alter compou...
152
  			/*
26296ad2d   Andrew Morton   mm/swap.c: reorga...
153
154
155
156
157
  			 * __split_huge_page_refcount run before us,
  			 * "page" was a THP tail. The split page_head
  			 * has been freed and reallocated as slab or
  			 * hugetlbfs page of smaller order (only
  			 * possible if reallocated as slab on x86).
918070634   Andrea Arcangeli   thp: alter compou...
158
  			 */
26296ad2d   Andrew Morton   mm/swap.c: reorga...
159
160
  			goto out_put_single;
  	}
5bf5f03c2   Pravin B Shelar   mm: fix slab->pag...
161

26296ad2d   Andrew Morton   mm/swap.c: reorga...
162
163
164
165
166
167
168
169
170
171
172
173
174
  	if (likely(page != page_head && get_page_unless_zero(page_head))) {
  		unsigned long flags;
  
  		/*
  		 * page_head wasn't a dangling pointer but it may not
  		 * be a head page anymore by the time we obtain the
  		 * lock. That is ok as long as it can't be freed from
  		 * under us.
  		 */
  		flags = compound_lock_irqsave(page_head);
  		if (unlikely(!PageTail(page))) {
  			/* __split_huge_page_refcount run before us */
  			compound_unlock_irqrestore(page_head, flags);
a95a82e96   Andrea Arcangeli   thp: put_page: re...
175
  			if (put_page_testzero(page_head)) {
26296ad2d   Andrew Morton   mm/swap.c: reorga...
176
177
178
179
180
181
182
183
184
185
186
187
  				/*
  				 * The head page may have been freed
  				 * and reallocated as a compound page
  				 * of smaller order and then freed
  				 * again.  All we know is that it
  				 * cannot have become: a THP page, a
  				 * compound page of higher order, a
  				 * tail page.  That is because we
  				 * still hold the refcount of the
  				 * split THP tail and page_head was
  				 * the THP head before the split.
  				 */
a95a82e96   Andrea Arcangeli   thp: put_page: re...
188
189
190
191
192
  				if (PageHead(page_head))
  					__put_compound_page(page_head);
  				else
  					__put_single_page(page_head);
  			}
26296ad2d   Andrew Morton   mm/swap.c: reorga...
193
194
195
196
197
  out_put_single:
  			if (put_page_testzero(page))
  				__put_single_page(page);
  			return;
  		}
309381fea   Sasha Levin   mm: dump page whe...
198
  		VM_BUG_ON_PAGE(page_head != page->first_page, page);
26296ad2d   Andrew Morton   mm/swap.c: reorga...
199
200
201
202
203
204
205
  		/*
  		 * We can release the refcount taken by
  		 * get_page_unless_zero() now that
  		 * __split_huge_page_refcount() is blocked on the
  		 * compound_lock.
  		 */
  		if (put_page_testzero(page_head))
309381fea   Sasha Levin   mm: dump page whe...
206
  			VM_BUG_ON_PAGE(1, page_head);
26296ad2d   Andrew Morton   mm/swap.c: reorga...
207
  		/* __split_huge_page_refcount will wait now */
309381fea   Sasha Levin   mm: dump page whe...
208
  		VM_BUG_ON_PAGE(page_mapcount(page) <= 0, page);
26296ad2d   Andrew Morton   mm/swap.c: reorga...
209
  		atomic_dec(&page->_mapcount);
309381fea   Sasha Levin   mm: dump page whe...
210
211
  		VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page_head);
  		VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
26296ad2d   Andrew Morton   mm/swap.c: reorga...
212
213
214
215
216
217
218
  		compound_unlock_irqrestore(page_head, flags);
  
  		if (put_page_testzero(page_head)) {
  			if (PageHead(page_head))
  				__put_compound_page(page_head);
  			else
  				__put_single_page(page_head);
918070634   Andrea Arcangeli   thp: alter compou...
219
  		}
26296ad2d   Andrew Morton   mm/swap.c: reorga...
220
221
  	} else {
  		/* page_head is a dangling pointer */
309381fea   Sasha Levin   mm: dump page whe...
222
  		VM_BUG_ON_PAGE(PageTail(page), page);
26296ad2d   Andrew Morton   mm/swap.c: reorga...
223
  		goto out_put_single;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
  	}
8519fb30e   Nick Piggin   [PATCH] mm: compo...
225
226
227
228
229
230
231
  }
  
  void put_page(struct page *page)
  {
  	if (unlikely(PageCompound(page)))
  		put_compound_page(page);
  	else if (put_page_testzero(page))
918070634   Andrea Arcangeli   thp: alter compou...
232
  		__put_single_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
  }
  EXPORT_SYMBOL(put_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235

70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
  /*
   * This function is exported but must not be called by anything other
   * than get_page(). It implements the slow path of get_page().
   */
  bool __get_page_tail(struct page *page)
  {
  	/*
  	 * This takes care of get_page() if run on a tail page
  	 * returned by one of the get_user_pages/follow_page variants.
  	 * get_user_pages/follow_page itself doesn't need the compound
  	 * lock because it runs __get_page_tail_foll() under the
  	 * proper PT lock that already serializes against
  	 * split_huge_page().
  	 */
27c73ae75   Andrea Arcangeli   mm: hugetlbfs: fi...
250
  	unsigned long flags;
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
251
  	bool got;
668f9abbd   David Rientjes   mm: close PageTai...
252
  	struct page *page_head = compound_head(page);
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
253

ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
254
  	/* Ref to put_compound_page() comment. */
3bfcd13ec   Andrea Arcangeli   mm: hugetlbfs: us...
255
  	if (!__compound_tail_refcounted(page_head)) {
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
256
257
258
259
260
261
262
  		smp_rmb();
  		if (likely(PageTail(page))) {
  			/*
  			 * This is a hugetlbfs page or a slab
  			 * page. __split_huge_page_refcount
  			 * cannot race here.
  			 */
309381fea   Sasha Levin   mm: dump page whe...
263
  			VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
264
265
266
267
268
269
270
271
272
273
274
275
276
  			__get_page_tail_foll(page, true);
  			return true;
  		} else {
  			/*
  			 * __split_huge_page_refcount run
  			 * before us, "page" was a THP
  			 * tail. The split page_head has been
  			 * freed and reallocated as slab or
  			 * hugetlbfs page of smaller order
  			 * (only possible if reallocated as
  			 * slab on x86).
  			 */
  			return false;
27c73ae75   Andrea Arcangeli   mm: hugetlbfs: fi...
277
  		}
ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
278
  	}
27c73ae75   Andrea Arcangeli   mm: hugetlbfs: fi...
279

ebf360f9b   Andrea Arcangeli   mm: hugetlbfs: mo...
280
281
  	got = false;
  	if (likely(page != page_head && get_page_unless_zero(page_head))) {
27c73ae75   Andrea Arcangeli   mm: hugetlbfs: fi...
282
283
284
285
286
287
288
289
290
291
292
  		/*
  		 * page_head wasn't a dangling pointer but it
  		 * may not be a head page anymore by the time
  		 * we obtain the lock. That is ok as long as it
  		 * can't be freed from under us.
  		 */
  		flags = compound_lock_irqsave(page_head);
  		/* here __split_huge_page_refcount won't run anymore */
  		if (likely(PageTail(page))) {
  			__get_page_tail_foll(page, false);
  			got = true;
5bf5f03c2   Pravin B Shelar   mm: fix slab->pag...
293
  		}
27c73ae75   Andrea Arcangeli   mm: hugetlbfs: fi...
294
295
296
  		compound_unlock_irqrestore(page_head, flags);
  		if (unlikely(!got))
  			put_page(page_head);
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
297
298
299
300
  	}
  	return got;
  }
  EXPORT_SYMBOL(__get_page_tail);
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
301
  /**
7682486b3   Randy Dunlap   mm: fix various k...
302
303
   * put_pages_list() - release a list of pages
   * @pages: list of pages threaded on page->lru
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
304
305
306
   *
   * Release a list of pages which are strung together on page.lru.  Currently
   * used by read_cache_pages() and related error recovery code.
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
307
308
309
310
311
312
313
314
315
316
317
318
   */
  void put_pages_list(struct list_head *pages)
  {
  	while (!list_empty(pages)) {
  		struct page *victim;
  
  		victim = list_entry(pages->prev, struct page, lru);
  		list_del(&victim->lru);
  		page_cache_release(victim);
  	}
  }
  EXPORT_SYMBOL(put_pages_list);
18022c5d8   Mel Gorman   mm: add get_kerne...
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
  /*
   * get_kernel_pages() - pin kernel pages in memory
   * @kiov:	An array of struct kvec structures
   * @nr_segs:	number of segments to pin
   * @write:	pinning for read/write, currently ignored
   * @pages:	array that receives pointers to the pages pinned.
   *		Should be at least nr_segs long.
   *
   * Returns number of pages pinned. This may be fewer than the number
   * requested. If nr_pages is 0 or negative, returns 0. If no pages
   * were pinned, returns -errno. Each page returned must be released
   * with a put_page() call when it is finished with.
   */
  int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
  		struct page **pages)
  {
  	int seg;
  
  	for (seg = 0; seg < nr_segs; seg++) {
  		if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
  			return seg;
5a178119b   Mel Gorman   mm: add support f...
340
  		pages[seg] = kmap_to_page(kiov[seg].iov_base);
18022c5d8   Mel Gorman   mm: add get_kerne...
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
  		page_cache_get(pages[seg]);
  	}
  
  	return seg;
  }
  EXPORT_SYMBOL_GPL(get_kernel_pages);
  
  /*
   * get_kernel_page() - pin a kernel page in memory
   * @start:	starting kernel address
   * @write:	pinning for read/write, currently ignored
   * @pages:	array that receives pointer to the page pinned.
   *		Must be at least nr_segs long.
   *
   * Returns 1 if page is pinned. If the page was not pinned, returns
   * -errno. The page returned must be released with a put_page() call
   * when it is finished with.
   */
  int get_kernel_page(unsigned long start, int write, struct page **pages)
  {
  	const struct kvec kiov = {
  		.iov_base = (void *)start,
  		.iov_len = PAGE_SIZE
  	};
  
  	return get_kernel_pages(&kiov, 1, write, pages);
  }
  EXPORT_SYMBOL_GPL(get_kernel_page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
369
  static void pagevec_lru_move_fn(struct pagevec *pvec,
fa9add641   Hugh Dickins   mm/memcg: apply a...
370
371
  	void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
  	void *arg)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
372
373
  {
  	int i;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
374
  	struct zone *zone = NULL;
fa9add641   Hugh Dickins   mm/memcg: apply a...
375
  	struct lruvec *lruvec;
3dd7ae8ec   Shaohua Li   mm: simplify code...
376
  	unsigned long flags = 0;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
377
378
379
380
381
382
383
  
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  		struct zone *pagezone = page_zone(page);
  
  		if (pagezone != zone) {
  			if (zone)
3dd7ae8ec   Shaohua Li   mm: simplify code...
384
  				spin_unlock_irqrestore(&zone->lru_lock, flags);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
385
  			zone = pagezone;
3dd7ae8ec   Shaohua Li   mm: simplify code...
386
  			spin_lock_irqsave(&zone->lru_lock, flags);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
387
  		}
3dd7ae8ec   Shaohua Li   mm: simplify code...
388

fa9add641   Hugh Dickins   mm/memcg: apply a...
389
390
  		lruvec = mem_cgroup_page_lruvec(page, zone);
  		(*move_fn)(page, lruvec, arg);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
391
392
  	}
  	if (zone)
3dd7ae8ec   Shaohua Li   mm: simplify code...
393
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
83896fb5e   Linus Torvalds   Revert "mm: simpl...
394
395
  	release_pages(pvec->pages, pvec->nr, pvec->cold);
  	pagevec_reinit(pvec);
d8505dee1   Shaohua Li   mm: simplify code...
396
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
397
398
  static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
  				 void *arg)
3dd7ae8ec   Shaohua Li   mm: simplify code...
399
400
  {
  	int *pgmoved = arg;
3dd7ae8ec   Shaohua Li   mm: simplify code...
401
402
403
  
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  		enum lru_list lru = page_lru_base_type(page);
925b7673c   Johannes Weiner   mm: make per-memc...
404
  		list_move_tail(&page->lru, &lruvec->lists[lru]);
3dd7ae8ec   Shaohua Li   mm: simplify code...
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
  		(*pgmoved)++;
  	}
  }
  
  /*
   * pagevec_move_tail() must be called with IRQ disabled.
   * Otherwise this may cause nasty races.
   */
  static void pagevec_move_tail(struct pagevec *pvec)
  {
  	int pgmoved = 0;
  
  	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
  	__count_vm_events(PGROTATED, pgmoved);
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
420
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
422
   * Writeback is about to end against a page which has been marked for immediate
   * reclaim.  If it still appears to be reclaimable, move it to the tail of the
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
423
   * inactive list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424
   */
3dd7ae8ec   Shaohua Li   mm: simplify code...
425
  void rotate_reclaimable_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
426
  {
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
427
  	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
894bc3104   Lee Schermerhorn   Unevictable LRU I...
428
  	    !PageUnevictable(page) && PageLRU(page)) {
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
429
430
431
432
433
434
435
436
437
438
  		struct pagevec *pvec;
  		unsigned long flags;
  
  		page_cache_get(page);
  		local_irq_save(flags);
  		pvec = &__get_cpu_var(lru_rotate_pvecs);
  		if (!pagevec_add(pvec, page))
  			pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
440
  static void update_page_reclaim_stat(struct lruvec *lruvec,
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
441
442
  				     int file, int rotated)
  {
fa9add641   Hugh Dickins   mm/memcg: apply a...
443
  	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
444
445
446
447
  
  	reclaim_stat->recent_scanned[file]++;
  	if (rotated)
  		reclaim_stat->recent_rotated[file]++;
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
448
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
449
450
  static void __activate_page(struct page *page, struct lruvec *lruvec,
  			    void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
  {
744ed1442   Shaohua Li   mm: batch activat...
452
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
7a608572a   Linus Torvalds   Revert "mm: batch...
453
454
  		int file = page_is_file_cache(page);
  		int lru = page_lru_base_type(page);
744ed1442   Shaohua Li   mm: batch activat...
455

fa9add641   Hugh Dickins   mm/memcg: apply a...
456
  		del_page_from_lru_list(page, lruvec, lru);
7a608572a   Linus Torvalds   Revert "mm: batch...
457
458
  		SetPageActive(page);
  		lru += LRU_ACTIVE;
fa9add641   Hugh Dickins   mm/memcg: apply a...
459
  		add_page_to_lru_list(page, lruvec, lru);
c6286c983   Mel Gorman   mm: add tracepoin...
460
  		trace_mm_lru_activate(page, page_to_pfn(page));
4f98a2fee   Rik van Riel   vmscan: split LRU...
461

fa9add641   Hugh Dickins   mm/memcg: apply a...
462
463
  		__count_vm_event(PGACTIVATE);
  		update_page_reclaim_stat(lruvec, file, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
  	}
eb709b0d0   Shaohua Li   mm: batch activat...
465
466
467
468
469
470
471
472
473
474
475
476
  }
  
  #ifdef CONFIG_SMP
  static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  
  static void activate_page_drain(int cpu)
  {
  	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
  
  	if (pagevec_count(pvec))
  		pagevec_lru_move_fn(pvec, __activate_page, NULL);
  }
5fbc46163   Chris Metcalf   mm: make lru_add_...
477
478
479
480
  static bool need_activate_page_drain(int cpu)
  {
  	return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
  }
eb709b0d0   Shaohua Li   mm: batch activat...
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
  void activate_page(struct page *page)
  {
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  
  		page_cache_get(page);
  		if (!pagevec_add(pvec, page))
  			pagevec_lru_move_fn(pvec, __activate_page, NULL);
  		put_cpu_var(activate_page_pvecs);
  	}
  }
  
  #else
  static inline void activate_page_drain(int cpu)
  {
  }
5fbc46163   Chris Metcalf   mm: make lru_add_...
497
498
499
500
  static bool need_activate_page_drain(int cpu)
  {
  	return false;
  }
eb709b0d0   Shaohua Li   mm: batch activat...
501
502
503
504
505
  void activate_page(struct page *page)
  {
  	struct zone *zone = page_zone(page);
  
  	spin_lock_irq(&zone->lru_lock);
fa9add641   Hugh Dickins   mm/memcg: apply a...
506
  	__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
508
  	spin_unlock_irq(&zone->lru_lock);
  }
eb709b0d0   Shaohua Li   mm: batch activat...
509
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510

059285a25   Mel Gorman   mm: activate !Pag...
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
  static void __lru_cache_activate_page(struct page *page)
  {
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  	int i;
  
  	/*
  	 * Search backwards on the optimistic assumption that the page being
  	 * activated has just been added to this pagevec. Note that only
  	 * the local pagevec is examined as a !PageLRU page could be in the
  	 * process of being released, reclaimed, migrated or on a remote
  	 * pagevec that is currently being drained. Furthermore, marking
  	 * a remote pagevec's page PageActive potentially hits a race where
  	 * a page is marked PageActive just after it is added to the inactive
  	 * list causing accounting errors and BUG_ON checks to trigger.
  	 */
  	for (i = pagevec_count(pvec) - 1; i >= 0; i--) {
  		struct page *pagevec_page = pvec->pages[i];
  
  		if (pagevec_page == page) {
  			SetPageActive(page);
  			break;
  		}
  	}
  
  	put_cpu_var(lru_add_pvec);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
538
539
540
541
542
543
  /*
   * Mark a page as having seen activity.
   *
   * inactive,unreferenced	->	inactive,referenced
   * inactive,referenced		->	active,unreferenced
   * active,unreferenced		->	active,referenced
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
544
  void mark_page_accessed(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
545
  {
894bc3104   Lee Schermerhorn   Unevictable LRU I...
546
  	if (!PageActive(page) && !PageUnevictable(page) &&
059285a25   Mel Gorman   mm: activate !Pag...
547
548
549
550
551
552
553
554
555
556
557
558
  			PageReferenced(page)) {
  
  		/*
  		 * If the page is on the LRU, queue it for activation via
  		 * activate_page_pvecs. Otherwise, assume the page is on a
  		 * pagevec, mark it active and it'll be moved to the active
  		 * LRU on the next drain.
  		 */
  		if (PageLRU(page))
  			activate_page(page);
  		else
  			__lru_cache_activate_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
  		ClearPageReferenced(page);
a528910e1   Johannes Weiner   mm: thrash detect...
560
561
  		if (page_is_file_cache(page))
  			workingset_activation(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
563
564
565
  	} else if (!PageReferenced(page)) {
  		SetPageReferenced(page);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
  EXPORT_SYMBOL(mark_page_accessed);
d741c9cde   Robin Dong   mm: fix nonunifor...
567
  /*
13f7f7898   Mel Gorman   mm: pagevec: defe...
568
569
570
571
   * Queue the page for addition to the LRU via pagevec. The decision on whether
   * to add the page to the [in]active [file|anon] list is deferred until the
   * pagevec is drained. This gives a chance for the caller of __lru_cache_add()
   * have the page added to the active list using mark_page_accessed().
d741c9cde   Robin Dong   mm: fix nonunifor...
572
   */
c53954a09   Mel Gorman   mm: remove lru pa...
573
  void __lru_cache_add(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
575
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
  	page_cache_get(page);
d741c9cde   Robin Dong   mm: fix nonunifor...
577
  	if (!pagevec_space(pvec))
a0b8cab3b   Mel Gorman   mm: remove lru pa...
578
  		__pagevec_lru_add(pvec);
d741c9cde   Robin Dong   mm: fix nonunifor...
579
  	pagevec_add(pvec, page);
13f7f7898   Mel Gorman   mm: pagevec: defe...
580
  	put_cpu_var(lru_add_pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
581
  }
47846b065   Miklos Szeredi   mm: export lru_ca...
582
  EXPORT_SYMBOL(__lru_cache_add);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
583

f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
584
  /**
c53954a09   Mel Gorman   mm: remove lru pa...
585
   * lru_cache_add - add a page to a page list
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
586
   * @page: the page to be added to the LRU.
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
587
   */
c53954a09   Mel Gorman   mm: remove lru pa...
588
  void lru_cache_add(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589
  {
309381fea   Sasha Levin   mm: dump page whe...
590
591
  	VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
  	VM_BUG_ON_PAGE(PageLRU(page), page);
c53954a09   Mel Gorman   mm: remove lru pa...
592
  	__lru_cache_add(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
593
  }
894bc3104   Lee Schermerhorn   Unevictable LRU I...
594
595
596
597
598
599
600
601
602
603
604
605
606
  /**
   * add_page_to_unevictable_list - add a page to the unevictable list
   * @page:  the page to be added to the unevictable list
   *
   * Add page directly to its zone's unevictable list.  To avoid races with
   * tasks that might be making the page evictable, through eg. munlock,
   * munmap or exit, while it's not on the lru, we want to add the page
   * while it's locked or otherwise "invisible" to other tasks.  This is
   * difficult to do when using the pagevec cache, so bypass that.
   */
  void add_page_to_unevictable_list(struct page *page)
  {
  	struct zone *zone = page_zone(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
607
  	struct lruvec *lruvec;
894bc3104   Lee Schermerhorn   Unevictable LRU I...
608
609
  
  	spin_lock_irq(&zone->lru_lock);
fa9add641   Hugh Dickins   mm/memcg: apply a...
610
  	lruvec = mem_cgroup_page_lruvec(page, zone);
ef2a2cbdd   Naoya Horiguchi   mm/swap.c: clear ...
611
  	ClearPageActive(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
612
613
  	SetPageUnevictable(page);
  	SetPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
614
  	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
615
616
  	spin_unlock_irq(&zone->lru_lock);
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
617
  /*
315601809   Minchan Kim   mm: deactivate in...
618
619
620
621
622
   * If the page can not be invalidated, it is moved to the
   * inactive list to speed up its reclaim.  It is moved to the
   * head of the list, rather than the tail, to give the flusher
   * threads some time to write it out, as this is much more
   * effective than the single-page writeout from reclaim.
278df9f45   Minchan Kim   mm: reclaim inval...
623
624
625
626
627
628
629
630
631
632
633
634
635
636
   *
   * If the page isn't page_mapped and dirty/writeback, the page
   * could reclaim asap using PG_reclaim.
   *
   * 1. active, mapped page -> none
   * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
   * 3. inactive, mapped page -> none
   * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
   * 5. inactive, clean -> inactive, tail
   * 6. Others -> none
   *
   * In 4, why it moves inactive's head, the VM expects the page would
   * be write it out by flusher threads as this is much more effective
   * than the single-page writeout from reclaim.
315601809   Minchan Kim   mm: deactivate in...
637
   */
fa9add641   Hugh Dickins   mm/memcg: apply a...
638
639
  static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
  			      void *arg)
315601809   Minchan Kim   mm: deactivate in...
640
641
  {
  	int lru, file;
278df9f45   Minchan Kim   mm: reclaim inval...
642
  	bool active;
315601809   Minchan Kim   mm: deactivate in...
643

278df9f45   Minchan Kim   mm: reclaim inval...
644
  	if (!PageLRU(page))
315601809   Minchan Kim   mm: deactivate in...
645
  		return;
bad49d9c8   Minchan Kim   mm: check PageUne...
646
647
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
648
649
650
  	/* Some processes are using the page */
  	if (page_mapped(page))
  		return;
278df9f45   Minchan Kim   mm: reclaim inval...
651
  	active = PageActive(page);
315601809   Minchan Kim   mm: deactivate in...
652
653
  	file = page_is_file_cache(page);
  	lru = page_lru_base_type(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
654
655
  
  	del_page_from_lru_list(page, lruvec, lru + active);
315601809   Minchan Kim   mm: deactivate in...
656
657
  	ClearPageActive(page);
  	ClearPageReferenced(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
658
  	add_page_to_lru_list(page, lruvec, lru);
315601809   Minchan Kim   mm: deactivate in...
659

278df9f45   Minchan Kim   mm: reclaim inval...
660
661
662
663
664
665
666
667
668
669
670
671
  	if (PageWriteback(page) || PageDirty(page)) {
  		/*
  		 * PG_reclaim could be raced with end_page_writeback
  		 * It can make readahead confusing.  But race window
  		 * is _really_ small and  it's non-critical problem.
  		 */
  		SetPageReclaim(page);
  	} else {
  		/*
  		 * The page's writeback ends up during pagevec
  		 * We moves tha page into tail of inactive.
  		 */
925b7673c   Johannes Weiner   mm: make per-memc...
672
  		list_move_tail(&page->lru, &lruvec->lists[lru]);
278df9f45   Minchan Kim   mm: reclaim inval...
673
674
675
676
677
  		__count_vm_event(PGROTATED);
  	}
  
  	if (active)
  		__count_vm_event(PGDEACTIVATE);
fa9add641   Hugh Dickins   mm/memcg: apply a...
678
  	update_page_reclaim_stat(lruvec, file, 0);
315601809   Minchan Kim   mm: deactivate in...
679
  }
315601809   Minchan Kim   mm: deactivate in...
680
  /*
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
681
682
683
684
   * Drain pages out of the cpu's pagevecs.
   * Either "cpu" is the current CPU, and preemption has already been
   * disabled; or "cpu" is being hot-unplugged, and is already dead.
   */
f0cb3c76a   Konstantin Khlebnikov   mm: drain percpu ...
685
  void lru_add_drain_cpu(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
687
  	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688

13f7f7898   Mel Gorman   mm: pagevec: defe...
689
  	if (pagevec_count(pvec))
a0b8cab3b   Mel Gorman   mm: remove lru pa...
690
  		__pagevec_lru_add(pvec);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
691
692
693
694
695
696
697
698
699
700
  
  	pvec = &per_cpu(lru_rotate_pvecs, cpu);
  	if (pagevec_count(pvec)) {
  		unsigned long flags;
  
  		/* No harm done if a racing interrupt already did this */
  		local_irq_save(flags);
  		pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
315601809   Minchan Kim   mm: deactivate in...
701
702
703
  
  	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
  	if (pagevec_count(pvec))
3dd7ae8ec   Shaohua Li   mm: simplify code...
704
  		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
eb709b0d0   Shaohua Li   mm: batch activat...
705
706
  
  	activate_page_drain(cpu);
315601809   Minchan Kim   mm: deactivate in...
707
708
709
710
711
712
713
714
715
716
717
718
  }
  
  /**
   * deactivate_page - forcefully deactivate a page
   * @page: page to deactivate
   *
   * This function hints the VM that @page is a good reclaim candidate,
   * for example if its invalidation fails due to the page being dirty
   * or under writeback.
   */
  void deactivate_page(struct page *page)
  {
821ed6bbe   Minchan Kim   mm: filter unevic...
719
720
721
722
723
724
  	/*
  	 * In a workload with many unevictable page such as mprotect, unevictable
  	 * page deactivation for accelerating reclaim is pointless.
  	 */
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
725
726
727
728
  	if (likely(get_page_unless_zero(page))) {
  		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  
  		if (!pagevec_add(pvec, page))
3dd7ae8ec   Shaohua Li   mm: simplify code...
729
  			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
315601809   Minchan Kim   mm: deactivate in...
730
731
  		put_cpu_var(lru_deactivate_pvecs);
  	}
80bfed904   Andrew Morton   [PATCH] consolida...
732
733
734
735
  }
  
  void lru_add_drain(void)
  {
f0cb3c76a   Konstantin Khlebnikov   mm: drain percpu ...
736
  	lru_add_drain_cpu(get_cpu());
80bfed904   Andrew Morton   [PATCH] consolida...
737
  	put_cpu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
738
  }
c4028958b   David Howells   WorkStruct: make ...
739
  static void lru_add_drain_per_cpu(struct work_struct *dummy)
053837fce   Nick Piggin   [PATCH] mm: migra...
740
741
742
  {
  	lru_add_drain();
  }
5fbc46163   Chris Metcalf   mm: make lru_add_...
743
744
745
  static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  
  void lru_add_drain_all(void)
053837fce   Nick Piggin   [PATCH] mm: migra...
746
  {
5fbc46163   Chris Metcalf   mm: make lru_add_...
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
  	static DEFINE_MUTEX(lock);
  	static struct cpumask has_work;
  	int cpu;
  
  	mutex_lock(&lock);
  	get_online_cpus();
  	cpumask_clear(&has_work);
  
  	for_each_online_cpu(cpu) {
  		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  
  		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
  		    need_activate_page_drain(cpu)) {
  			INIT_WORK(work, lru_add_drain_per_cpu);
  			schedule_work_on(cpu, work);
  			cpumask_set_cpu(cpu, &has_work);
  		}
  	}
  
  	for_each_cpu(cpu, &has_work)
  		flush_work(&per_cpu(lru_add_drain_work, cpu));
  
  	put_online_cpus();
  	mutex_unlock(&lock);
053837fce   Nick Piggin   [PATCH] mm: migra...
773
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
774
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775
776
777
778
779
780
781
   * Batched page_cache_release().  Decrement the reference count on all the
   * passed pages.  If it fell to zero then remove the page from the LRU and
   * free it.
   *
   * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
   * for the remainder of the operation.
   *
ab33dc09a   Fernando Luis Vazquez Cao   swap: update func...
782
783
784
785
   * The locking in this function is against shrink_inactive_list(): we recheck
   * the page count inside the lock to see whether shrink_inactive_list()
   * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
   * will free it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
786
787
788
789
   */
  void release_pages(struct page **pages, int nr, int cold)
  {
  	int i;
cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
790
  	LIST_HEAD(pages_to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
791
  	struct zone *zone = NULL;
fa9add641   Hugh Dickins   mm/memcg: apply a...
792
  	struct lruvec *lruvec;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
793
  	unsigned long uninitialized_var(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
795
796
  	for (i = 0; i < nr; i++) {
  		struct page *page = pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
797

8519fb30e   Nick Piggin   [PATCH] mm: compo...
798
799
  		if (unlikely(PageCompound(page))) {
  			if (zone) {
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
800
  				spin_unlock_irqrestore(&zone->lru_lock, flags);
8519fb30e   Nick Piggin   [PATCH] mm: compo...
801
802
803
804
805
  				zone = NULL;
  			}
  			put_compound_page(page);
  			continue;
  		}
b5810039a   Nick Piggin   [PATCH] core remo...
806
  		if (!put_page_testzero(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
  			continue;
46453a6e1   Nick Piggin   [PATCH] mm: never...
808
809
  		if (PageLRU(page)) {
  			struct zone *pagezone = page_zone(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
810

46453a6e1   Nick Piggin   [PATCH] mm: never...
811
812
  			if (pagezone != zone) {
  				if (zone)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
813
814
  					spin_unlock_irqrestore(&zone->lru_lock,
  									flags);
46453a6e1   Nick Piggin   [PATCH] mm: never...
815
  				zone = pagezone;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
816
  				spin_lock_irqsave(&zone->lru_lock, flags);
46453a6e1   Nick Piggin   [PATCH] mm: never...
817
  			}
fa9add641   Hugh Dickins   mm/memcg: apply a...
818
819
  
  			lruvec = mem_cgroup_page_lruvec(page, zone);
309381fea   Sasha Levin   mm: dump page whe...
820
  			VM_BUG_ON_PAGE(!PageLRU(page), page);
674539115   Nick Piggin   [PATCH] mm: less ...
821
  			__ClearPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
822
  			del_page_from_lru_list(page, lruvec, page_off_lru(page));
46453a6e1   Nick Piggin   [PATCH] mm: never...
823
  		}
c53954a09   Mel Gorman   mm: remove lru pa...
824
825
  		/* Clear Active bit in case of parallel mark_page_accessed */
  		ClearPageActive(page);
cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
826
  		list_add(&page->lru, &pages_to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
827
828
  	}
  	if (zone)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
829
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
830

cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
831
  	free_hot_cold_page_list(&pages_to_free, cold);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832
  }
0be8557bc   Miklos Szeredi   fuse: use release...
833
  EXPORT_SYMBOL(release_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
  
  /*
   * The pages which we're about to release may be in the deferred lru-addition
   * queues.  That would prevent them from really being freed right now.  That's
   * OK from a correctness point of view but is inefficient - those pages may be
   * cache-warm and we want to give them back to the page allocator ASAP.
   *
   * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
   * and __pagevec_lru_add_active() call release_pages() directly to avoid
   * mutual recursion.
   */
  void __pagevec_release(struct pagevec *pvec)
  {
  	lru_add_drain();
  	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
  	pagevec_reinit(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
851
  EXPORT_SYMBOL(__pagevec_release);
12d271078   Hugh Dickins   memcg: fix split_...
852
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
71e3aac07   Andrea Arcangeli   thp: transparent ...
853
  /* used by __split_huge_page_refcount() */
fa9add641   Hugh Dickins   mm/memcg: apply a...
854
  void lru_add_page_tail(struct page *page, struct page *page_tail,
5bc7b8aca   Shaohua Li   mm: thp: add spli...
855
  		       struct lruvec *lruvec, struct list_head *list)
71e3aac07   Andrea Arcangeli   thp: transparent ...
856
  {
71e3aac07   Andrea Arcangeli   thp: transparent ...
857
  	const int file = 0;
71e3aac07   Andrea Arcangeli   thp: transparent ...
858

309381fea   Sasha Levin   mm: dump page whe...
859
860
861
  	VM_BUG_ON_PAGE(!PageHead(page), page);
  	VM_BUG_ON_PAGE(PageCompound(page_tail), page);
  	VM_BUG_ON_PAGE(PageLRU(page_tail), page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
862
863
  	VM_BUG_ON(NR_CPUS != 1 &&
  		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
71e3aac07   Andrea Arcangeli   thp: transparent ...
864

5bc7b8aca   Shaohua Li   mm: thp: add spli...
865
866
  	if (!list)
  		SetPageLRU(page_tail);
71e3aac07   Andrea Arcangeli   thp: transparent ...
867

12d271078   Hugh Dickins   memcg: fix split_...
868
869
  	if (likely(PageLRU(page)))
  		list_add_tail(&page_tail->lru, &page->lru);
5bc7b8aca   Shaohua Li   mm: thp: add spli...
870
871
872
873
874
  	else if (list) {
  		/* page reclaim is reclaiming a huge page */
  		get_page(page_tail);
  		list_add_tail(&page_tail->lru, list);
  	} else {
12d271078   Hugh Dickins   memcg: fix split_...
875
876
877
878
879
880
881
882
  		struct list_head *list_head;
  		/*
  		 * Head page has not yet been counted, as an hpage,
  		 * so we must account for each subpage individually.
  		 *
  		 * Use the standard add function to put page_tail on the list,
  		 * but then correct its position so they all end up in order.
  		 */
e180cf806   Kirill A. Shutemov   thp, mm: avoid Pa...
883
  		add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
12d271078   Hugh Dickins   memcg: fix split_...
884
885
  		list_head = page_tail->lru.prev;
  		list_move_tail(&page_tail->lru, list_head);
71e3aac07   Andrea Arcangeli   thp: transparent ...
886
  	}
7512102cf   Hugh Dickins   memcg: fix GPF wh...
887
888
  
  	if (!PageUnevictable(page))
e180cf806   Kirill A. Shutemov   thp, mm: avoid Pa...
889
  		update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
71e3aac07   Andrea Arcangeli   thp: transparent ...
890
  }
12d271078   Hugh Dickins   memcg: fix split_...
891
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
71e3aac07   Andrea Arcangeli   thp: transparent ...
892

fa9add641   Hugh Dickins   mm/memcg: apply a...
893
894
  static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
  				 void *arg)
3dd7ae8ec   Shaohua Li   mm: simplify code...
895
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
896
897
898
  	int file = page_is_file_cache(page);
  	int active = PageActive(page);
  	enum lru_list lru = page_lru(page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
899

309381fea   Sasha Levin   mm: dump page whe...
900
  	VM_BUG_ON_PAGE(PageLRU(page), page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
901
902
  
  	SetPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
903
904
  	add_page_to_lru_list(page, lruvec, lru);
  	update_page_reclaim_stat(lruvec, file, active);
c6286c983   Mel Gorman   mm: add tracepoin...
905
  	trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
3dd7ae8ec   Shaohua Li   mm: simplify code...
906
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
908
909
910
   * Add the passed pages to the LRU, then drop the caller's refcount
   * on them.  Reinitialises the caller's pagevec.
   */
a0b8cab3b   Mel Gorman   mm: remove lru pa...
911
  void __pagevec_lru_add(struct pagevec *pvec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
912
  {
a0b8cab3b   Mel Gorman   mm: remove lru pa...
913
  	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
914
  }
5095ae837   Hugh Dickins   mm: fewer undersc...
915
  EXPORT_SYMBOL(__pagevec_lru_add);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
916

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
917
  /**
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
   * pagevec_lookup_entries - gang pagecache lookup
   * @pvec:	Where the resulting entries are placed
   * @mapping:	The address_space to search
   * @start:	The starting entry index
   * @nr_entries:	The maximum number of entries
   * @indices:	The cache indices corresponding to the entries in @pvec
   *
   * pagevec_lookup_entries() will search for and return a group of up
   * to @nr_entries pages and shadow entries in the mapping.  All
   * entries are placed in @pvec.  pagevec_lookup_entries() takes a
   * reference against actual pages in @pvec.
   *
   * The search returns a group of mapping-contiguous entries with
   * ascending indexes.  There may be holes in the indices due to
   * not-present entries.
   *
   * pagevec_lookup_entries() returns the number of entries which were
   * found.
   */
  unsigned pagevec_lookup_entries(struct pagevec *pvec,
  				struct address_space *mapping,
  				pgoff_t start, unsigned nr_pages,
  				pgoff_t *indices)
  {
  	pvec->nr = find_get_entries(mapping, start, nr_pages,
  				    pvec->pages, indices);
  	return pagevec_count(pvec);
  }
  
  /**
   * pagevec_remove_exceptionals - pagevec exceptionals pruning
   * @pvec:	The pagevec to prune
   *
   * pagevec_lookup_entries() fills both pages and exceptional radix
   * tree entries into the pagevec.  This function prunes all
   * exceptionals from @pvec without leaving holes, so that it can be
   * passed on to page-only pagevec operations.
   */
  void pagevec_remove_exceptionals(struct pagevec *pvec)
  {
  	int i, j;
  
  	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  		if (!radix_tree_exceptional_entry(page))
  			pvec->pages[j++] = page;
  	}
  	pvec->nr = j;
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
   * pagevec_lookup - gang pagecache lookup
   * @pvec:	Where the resulting pages are placed
   * @mapping:	The address_space to search
   * @start:	The starting page index
   * @nr_pages:	The maximum number of pages
   *
   * pagevec_lookup() will search for and return a group of up to @nr_pages pages
   * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
   * reference against the pages in @pvec.
   *
   * The search returns a group of mapping-contiguous pages with ascending
   * indexes.  There may be holes in the indices due to not-present pages.
   *
   * pagevec_lookup() returns the number of pages which were found.
   */
  unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t start, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
78539fdfa   Christoph Hellwig   [XFS] Export page...
990
  EXPORT_SYMBOL(pagevec_lookup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
991
992
993
994
995
996
997
  unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t *index, int tag, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages_tag(mapping, index, tag,
  					nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
998
  EXPORT_SYMBOL(pagevec_lookup_tag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000
1001
1002
1003
1004
  /*
   * Perform any setup for the swap system
   */
  void __init swap_setup(void)
  {
4481374ce   Jan Beulich   mm: replace vario...
1005
  	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
1006
  #ifdef CONFIG_SWAP
33806f06d   Shaohua Li   swap: make each s...
1007
  	int i;
8077c0d98   Mikulas Patocka   bdi: test bdi_ini...
1008
1009
  	if (bdi_init(swapper_spaces[0].backing_dev_info))
  		panic("Failed to init swap bdi");
33806f06d   Shaohua Li   swap: make each s...
1010
1011
1012
1013
  	for (i = 0; i < MAX_SWAPFILES; i++) {
  		spin_lock_init(&swapper_spaces[i].tree_lock);
  		INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
  	}
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
1014
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1015
1016
1017
1018
1019
1020
1021
1022
1023
  	/* Use a smaller cluster for small-memory machines */
  	if (megs < 16)
  		page_cluster = 2;
  	else
  		page_cluster = 3;
  	/*
  	 * Right now other parts of the system means that we
  	 * _really_ don't want to cluster much more
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1024
  }