Blame view

mm/swap.c 27 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   *  linux/mm/swap.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   */
  
  /*
183ff22bb   Simon Arlott   spelling fixes: mm/
8
   * This file contains the default values for the operation of the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
   * Linux VM subsystem. Fine-tuning documentation can be found in
   * Documentation/sysctl/vm.txt.
   * Started 18.12.91
   * Swap aging added 23.2.95, Stephen Tweedie.
   * Buffermem limits added 12.3.98, Rik van Riel.
   */
  
  #include <linux/mm.h>
  #include <linux/sched.h>
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #include <linux/mman.h>
  #include <linux/pagemap.h>
  #include <linux/pagevec.h>
  #include <linux/init.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
24
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
  #include <linux/mm_inline.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
  #include <linux/percpu_counter.h>
3565fce3a   Dan Williams   mm, x86: get_user...
27
  #include <linux/memremap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
30
  #include <linux/percpu.h>
  #include <linux/cpu.h>
  #include <linux/notifier.h>
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
31
  #include <linux/backing-dev.h>
66e1707bc   Balbir Singh   Memory controller...
32
  #include <linux/memcontrol.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
33
  #include <linux/gfp.h>
a27bb332c   Kent Overstreet   aio: don't includ...
34
  #include <linux/uio.h>
822fc6136   Naoya Horiguchi   mm: don't call __...
35
  #include <linux/hugetlb.h>
33c3fc71c   Vladimir Davydov   mm: introduce idl...
36
  #include <linux/page_idle.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37

64d6519dd   Lee Schermerhorn   swap: cull unevic...
38
  #include "internal.h"
c6286c983   Mel Gorman   mm: add tracepoin...
39
40
  #define CREATE_TRACE_POINTS
  #include <trace/events/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41
42
  /* How many pages do we try to swap or page in/out together? */
  int page_cluster;
13f7f7898   Mel Gorman   mm: pagevec: defe...
43
  static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
f84f9504b   Vegard Nossum   mm: remove initia...
44
  static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
cc5993bd7   Minchan Kim   mm: rename deacti...
45
  static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
10853a039   Minchan Kim   mm: move lazily f...
46
  static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
a4a921aa5   Ming Li   mm/swap.c: put ac...
47
48
49
  #ifdef CONFIG_SMP
  static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  #endif
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
50

b221385bc   Adrian Bunk   [PATCH] mm/: make...
51
52
53
54
  /*
   * This path almost never happens for VM activity - pages are normally
   * freed via pagevecs.  But it gets used by networking.
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
55
  static void __page_cache_release(struct page *page)
b221385bc   Adrian Bunk   [PATCH] mm/: make...
56
57
  {
  	if (PageLRU(page)) {
b221385bc   Adrian Bunk   [PATCH] mm/: make...
58
  		struct zone *zone = page_zone(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
59
60
  		struct lruvec *lruvec;
  		unsigned long flags;
b221385bc   Adrian Bunk   [PATCH] mm/: make...
61

a52633d8e   Mel Gorman   mm, vmscan: move ...
62
  		spin_lock_irqsave(zone_lru_lock(zone), flags);
599d0c954   Mel Gorman   mm, vmscan: move ...
63
  		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
309381fea   Sasha Levin   mm: dump page whe...
64
  		VM_BUG_ON_PAGE(!PageLRU(page), page);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
65
  		__ClearPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
66
  		del_page_from_lru_list(page, lruvec, page_off_lru(page));
a52633d8e   Mel Gorman   mm, vmscan: move ...
67
  		spin_unlock_irqrestore(zone_lru_lock(zone), flags);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
68
  	}
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
69
  	mem_cgroup_uncharge(page);
918070634   Andrea Arcangeli   thp: alter compou...
70
71
72
73
74
  }
  
  static void __put_single_page(struct page *page)
  {
  	__page_cache_release(page);
b745bc85f   Mel Gorman   mm: page_alloc: c...
75
  	free_hot_cold_page(page, false);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
76
  }
918070634   Andrea Arcangeli   thp: alter compou...
77
  static void __put_compound_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
78
  {
918070634   Andrea Arcangeli   thp: alter compou...
79
  	compound_page_dtor *dtor;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80

822fc6136   Naoya Horiguchi   mm: don't call __...
81
82
83
84
85
86
87
88
  	/*
  	 * __page_cache_release() is supposed to be called for thp, not for
  	 * hugetlb. This is because hugetlb page does never have PageLRU set
  	 * (it's never listed to any LRU lists) and no memcg routines should
  	 * be called for hugetlb (it has a separate hugetlb_cgroup.)
  	 */
  	if (!PageHuge(page))
  		__page_cache_release(page);
918070634   Andrea Arcangeli   thp: alter compou...
89
90
91
  	dtor = get_compound_page_dtor(page);
  	(*dtor)(page);
  }
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
92
  void __put_page(struct page *page)
8519fb30e   Nick Piggin   [PATCH] mm: compo...
93
94
  {
  	if (unlikely(PageCompound(page)))
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
95
96
  		__put_compound_page(page);
  	else
918070634   Andrea Arcangeli   thp: alter compou...
97
  		__put_single_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
  }
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
99
  EXPORT_SYMBOL(__put_page);
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
100

1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
101
  /**
7682486b3   Randy Dunlap   mm: fix various k...
102
103
   * put_pages_list() - release a list of pages
   * @pages: list of pages threaded on page->lru
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
104
105
106
   *
   * Release a list of pages which are strung together on page.lru.  Currently
   * used by read_cache_pages() and related error recovery code.
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
107
108
109
110
111
112
113
114
   */
  void put_pages_list(struct list_head *pages)
  {
  	while (!list_empty(pages)) {
  		struct page *victim;
  
  		victim = list_entry(pages->prev, struct page, lru);
  		list_del(&victim->lru);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
115
  		put_page(victim);
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
116
117
118
  	}
  }
  EXPORT_SYMBOL(put_pages_list);
18022c5d8   Mel Gorman   mm: add get_kerne...
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  /*
   * get_kernel_pages() - pin kernel pages in memory
   * @kiov:	An array of struct kvec structures
   * @nr_segs:	number of segments to pin
   * @write:	pinning for read/write, currently ignored
   * @pages:	array that receives pointers to the pages pinned.
   *		Should be at least nr_segs long.
   *
   * Returns number of pages pinned. This may be fewer than the number
   * requested. If nr_pages is 0 or negative, returns 0. If no pages
   * were pinned, returns -errno. Each page returned must be released
   * with a put_page() call when it is finished with.
   */
  int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
  		struct page **pages)
  {
  	int seg;
  
  	for (seg = 0; seg < nr_segs; seg++) {
  		if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
  			return seg;
5a178119b   Mel Gorman   mm: add support f...
140
  		pages[seg] = kmap_to_page(kiov[seg].iov_base);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
141
  		get_page(pages[seg]);
18022c5d8   Mel Gorman   mm: add get_kerne...
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  	}
  
  	return seg;
  }
  EXPORT_SYMBOL_GPL(get_kernel_pages);
  
  /*
   * get_kernel_page() - pin a kernel page in memory
   * @start:	starting kernel address
   * @write:	pinning for read/write, currently ignored
   * @pages:	array that receives pointer to the page pinned.
   *		Must be at least nr_segs long.
   *
   * Returns 1 if page is pinned. If the page was not pinned, returns
   * -errno. The page returned must be released with a put_page() call
   * when it is finished with.
   */
  int get_kernel_page(unsigned long start, int write, struct page **pages)
  {
  	const struct kvec kiov = {
  		.iov_base = (void *)start,
  		.iov_len = PAGE_SIZE
  	};
  
  	return get_kernel_pages(&kiov, 1, write, pages);
  }
  EXPORT_SYMBOL_GPL(get_kernel_page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
169
  static void pagevec_lru_move_fn(struct pagevec *pvec,
fa9add641   Hugh Dickins   mm/memcg: apply a...
170
171
  	void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
  	void *arg)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
172
173
  {
  	int i;
68eb0731c   Mel Gorman   mm, pagevec: rele...
174
  	struct pglist_data *pgdat = NULL;
fa9add641   Hugh Dickins   mm/memcg: apply a...
175
  	struct lruvec *lruvec;
3dd7ae8ec   Shaohua Li   mm: simplify code...
176
  	unsigned long flags = 0;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
177
178
179
  
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
68eb0731c   Mel Gorman   mm, pagevec: rele...
180
  		struct pglist_data *pagepgdat = page_pgdat(page);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
181

68eb0731c   Mel Gorman   mm, pagevec: rele...
182
183
184
185
186
  		if (pagepgdat != pgdat) {
  			if (pgdat)
  				spin_unlock_irqrestore(&pgdat->lru_lock, flags);
  			pgdat = pagepgdat;
  			spin_lock_irqsave(&pgdat->lru_lock, flags);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
187
  		}
3dd7ae8ec   Shaohua Li   mm: simplify code...
188

68eb0731c   Mel Gorman   mm, pagevec: rele...
189
  		lruvec = mem_cgroup_page_lruvec(page, pgdat);
fa9add641   Hugh Dickins   mm/memcg: apply a...
190
  		(*move_fn)(page, lruvec, arg);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
191
  	}
68eb0731c   Mel Gorman   mm, pagevec: rele...
192
193
  	if (pgdat)
  		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
83896fb5e   Linus Torvalds   Revert "mm: simpl...
194
195
  	release_pages(pvec->pages, pvec->nr, pvec->cold);
  	pagevec_reinit(pvec);
d8505dee1   Shaohua Li   mm: simplify code...
196
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
197
198
  static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
  				 void *arg)
3dd7ae8ec   Shaohua Li   mm: simplify code...
199
200
  {
  	int *pgmoved = arg;
3dd7ae8ec   Shaohua Li   mm: simplify code...
201
202
203
  
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  		enum lru_list lru = page_lru_base_type(page);
925b7673c   Johannes Weiner   mm: make per-memc...
204
  		list_move_tail(&page->lru, &lruvec->lists[lru]);
3dd7ae8ec   Shaohua Li   mm: simplify code...
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  		(*pgmoved)++;
  	}
  }
  
  /*
   * pagevec_move_tail() must be called with IRQ disabled.
   * Otherwise this may cause nasty races.
   */
  static void pagevec_move_tail(struct pagevec *pvec)
  {
  	int pgmoved = 0;
  
  	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
  	__count_vm_events(PGROTATED, pgmoved);
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
220
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
222
   * Writeback is about to end against a page which has been marked for immediate
   * reclaim.  If it still appears to be reclaimable, move it to the tail of the
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
223
   * inactive list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
   */
3dd7ae8ec   Shaohua Li   mm: simplify code...
225
  void rotate_reclaimable_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
  {
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
227
  	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
894bc3104   Lee Schermerhorn   Unevictable LRU I...
228
  	    !PageUnevictable(page) && PageLRU(page)) {
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
229
230
  		struct pagevec *pvec;
  		unsigned long flags;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
231
  		get_page(page);
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
232
  		local_irq_save(flags);
7c8e0181e   Christoph Lameter   mm: replace __get...
233
  		pvec = this_cpu_ptr(&lru_rotate_pvecs);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
234
  		if (!pagevec_add(pvec, page) || PageCompound(page))
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
235
236
237
  			pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
239
  static void update_page_reclaim_stat(struct lruvec *lruvec,
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
240
241
  				     int file, int rotated)
  {
fa9add641   Hugh Dickins   mm/memcg: apply a...
242
  	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
243
244
245
246
  
  	reclaim_stat->recent_scanned[file]++;
  	if (rotated)
  		reclaim_stat->recent_rotated[file]++;
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
247
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
248
249
  static void __activate_page(struct page *page, struct lruvec *lruvec,
  			    void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
  {
744ed1442   Shaohua Li   mm: batch activat...
251
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
7a608572a   Linus Torvalds   Revert "mm: batch...
252
253
  		int file = page_is_file_cache(page);
  		int lru = page_lru_base_type(page);
744ed1442   Shaohua Li   mm: batch activat...
254

fa9add641   Hugh Dickins   mm/memcg: apply a...
255
  		del_page_from_lru_list(page, lruvec, lru);
7a608572a   Linus Torvalds   Revert "mm: batch...
256
257
  		SetPageActive(page);
  		lru += LRU_ACTIVE;
fa9add641   Hugh Dickins   mm/memcg: apply a...
258
  		add_page_to_lru_list(page, lruvec, lru);
24b7e5819   Mel Gorman   mm: pagemap: avoi...
259
  		trace_mm_lru_activate(page);
4f98a2fee   Rik van Riel   vmscan: split LRU...
260

fa9add641   Hugh Dickins   mm/memcg: apply a...
261
262
  		__count_vm_event(PGACTIVATE);
  		update_page_reclaim_stat(lruvec, file, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
263
  	}
eb709b0d0   Shaohua Li   mm: batch activat...
264
265
266
  }
  
  #ifdef CONFIG_SMP
eb709b0d0   Shaohua Li   mm: batch activat...
267
268
269
270
271
272
273
  static void activate_page_drain(int cpu)
  {
  	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
  
  	if (pagevec_count(pvec))
  		pagevec_lru_move_fn(pvec, __activate_page, NULL);
  }
5fbc46163   Chris Metcalf   mm: make lru_add_...
274
275
276
277
  static bool need_activate_page_drain(int cpu)
  {
  	return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
  }
eb709b0d0   Shaohua Li   mm: batch activat...
278
279
  void activate_page(struct page *page)
  {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
280
  	page = compound_head(page);
eb709b0d0   Shaohua Li   mm: batch activat...
281
282
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
283
  		get_page(page);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
284
  		if (!pagevec_add(pvec, page) || PageCompound(page))
eb709b0d0   Shaohua Li   mm: batch activat...
285
286
287
288
289
290
291
292
293
  			pagevec_lru_move_fn(pvec, __activate_page, NULL);
  		put_cpu_var(activate_page_pvecs);
  	}
  }
  
  #else
  static inline void activate_page_drain(int cpu)
  {
  }
5fbc46163   Chris Metcalf   mm: make lru_add_...
294
295
296
297
  static bool need_activate_page_drain(int cpu)
  {
  	return false;
  }
eb709b0d0   Shaohua Li   mm: batch activat...
298
299
300
  void activate_page(struct page *page)
  {
  	struct zone *zone = page_zone(page);
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
301
  	page = compound_head(page);
a52633d8e   Mel Gorman   mm, vmscan: move ...
302
  	spin_lock_irq(zone_lru_lock(zone));
599d0c954   Mel Gorman   mm, vmscan: move ...
303
  	__activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL);
a52633d8e   Mel Gorman   mm, vmscan: move ...
304
  	spin_unlock_irq(zone_lru_lock(zone));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
  }
eb709b0d0   Shaohua Li   mm: batch activat...
306
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307

059285a25   Mel Gorman   mm: activate !Pag...
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
  static void __lru_cache_activate_page(struct page *page)
  {
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  	int i;
  
  	/*
  	 * Search backwards on the optimistic assumption that the page being
  	 * activated has just been added to this pagevec. Note that only
  	 * the local pagevec is examined as a !PageLRU page could be in the
  	 * process of being released, reclaimed, migrated or on a remote
  	 * pagevec that is currently being drained. Furthermore, marking
  	 * a remote pagevec's page PageActive potentially hits a race where
  	 * a page is marked PageActive just after it is added to the inactive
  	 * list causing accounting errors and BUG_ON checks to trigger.
  	 */
  	for (i = pagevec_count(pvec) - 1; i >= 0; i--) {
  		struct page *pagevec_page = pvec->pages[i];
  
  		if (pagevec_page == page) {
  			SetPageActive(page);
  			break;
  		}
  	}
  
  	put_cpu_var(lru_add_pvec);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
335
336
337
338
339
  /*
   * Mark a page as having seen activity.
   *
   * inactive,unreferenced	->	inactive,referenced
   * inactive,referenced		->	active,unreferenced
   * active,unreferenced		->	active,referenced
eb39d618f   Hugh Dickins   mm: replace init_...
340
341
342
   *
   * When a newly allocated page is not yet visible, so safe for non-atomic ops,
   * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
344
  void mark_page_accessed(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
345
  {
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
346
  	page = compound_head(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
347
  	if (!PageActive(page) && !PageUnevictable(page) &&
059285a25   Mel Gorman   mm: activate !Pag...
348
349
350
351
352
353
354
355
356
357
358
359
  			PageReferenced(page)) {
  
  		/*
  		 * If the page is on the LRU, queue it for activation via
  		 * activate_page_pvecs. Otherwise, assume the page is on a
  		 * pagevec, mark it active and it'll be moved to the active
  		 * LRU on the next drain.
  		 */
  		if (PageLRU(page))
  			activate_page(page);
  		else
  			__lru_cache_activate_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360
  		ClearPageReferenced(page);
a528910e1   Johannes Weiner   mm: thrash detect...
361
362
  		if (page_is_file_cache(page))
  			workingset_activation(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
365
  	} else if (!PageReferenced(page)) {
  		SetPageReferenced(page);
  	}
33c3fc71c   Vladimir Davydov   mm: introduce idl...
366
367
  	if (page_is_idle(page))
  		clear_page_idle(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
368
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
  EXPORT_SYMBOL(mark_page_accessed);
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
370
  static void __lru_cache_add(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
372
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
373
  	get_page(page);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
374
  	if (!pagevec_add(pvec, page) || PageCompound(page))
a0b8cab3b   Mel Gorman   mm: remove lru pa...
375
  		__pagevec_lru_add(pvec);
13f7f7898   Mel Gorman   mm: pagevec: defe...
376
  	put_cpu_var(lru_add_pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
  }
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
378
379
380
381
382
383
384
  
  /**
   * lru_cache_add: add a page to the page lists
   * @page: the page to add
   */
  void lru_cache_add_anon(struct page *page)
  {
6fb81a17d   Mel Gorman   mm: do not use un...
385
386
  	if (PageActive(page))
  		ClearPageActive(page);
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
387
388
389
390
391
  	__lru_cache_add(page);
  }
  
  void lru_cache_add_file(struct page *page)
  {
6fb81a17d   Mel Gorman   mm: do not use un...
392
393
  	if (PageActive(page))
  		ClearPageActive(page);
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
394
395
396
  	__lru_cache_add(page);
  }
  EXPORT_SYMBOL(lru_cache_add_file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397

f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
398
  /**
c53954a09   Mel Gorman   mm: remove lru pa...
399
   * lru_cache_add - add a page to a page list
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
400
   * @page: the page to be added to the LRU.
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
401
402
403
404
405
   *
   * Queue the page for addition to the LRU via pagevec. The decision on whether
   * to add the page to the [in]active [file|anon] list is deferred until the
   * pagevec is drained. This gives a chance for the caller of lru_cache_add()
   * have the page added to the active list using mark_page_accessed().
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
406
   */
c53954a09   Mel Gorman   mm: remove lru pa...
407
  void lru_cache_add(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
  {
309381fea   Sasha Levin   mm: dump page whe...
409
410
  	VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
  	VM_BUG_ON_PAGE(PageLRU(page), page);
c53954a09   Mel Gorman   mm: remove lru pa...
411
  	__lru_cache_add(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
  }
894bc3104   Lee Schermerhorn   Unevictable LRU I...
413
414
415
416
417
418
419
420
421
422
423
424
  /**
   * add_page_to_unevictable_list - add a page to the unevictable list
   * @page:  the page to be added to the unevictable list
   *
   * Add page directly to its zone's unevictable list.  To avoid races with
   * tasks that might be making the page evictable, through eg. munlock,
   * munmap or exit, while it's not on the lru, we want to add the page
   * while it's locked or otherwise "invisible" to other tasks.  This is
   * difficult to do when using the pagevec cache, so bypass that.
   */
  void add_page_to_unevictable_list(struct page *page)
  {
599d0c954   Mel Gorman   mm, vmscan: move ...
425
  	struct pglist_data *pgdat = page_pgdat(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
426
  	struct lruvec *lruvec;
894bc3104   Lee Schermerhorn   Unevictable LRU I...
427

599d0c954   Mel Gorman   mm, vmscan: move ...
428
429
  	spin_lock_irq(&pgdat->lru_lock);
  	lruvec = mem_cgroup_page_lruvec(page, pgdat);
ef2a2cbdd   Naoya Horiguchi   mm/swap.c: clear ...
430
  	ClearPageActive(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
431
432
  	SetPageUnevictable(page);
  	SetPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
433
  	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
599d0c954   Mel Gorman   mm, vmscan: move ...
434
  	spin_unlock_irq(&pgdat->lru_lock);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
435
  }
00501b531   Johannes Weiner   mm: memcontrol: r...
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  /**
   * lru_cache_add_active_or_unevictable
   * @page:  the page to be added to LRU
   * @vma:   vma in which page is mapped for determining reclaimability
   *
   * Place @page on the active or unevictable LRU list, depending on its
   * evictability.  Note that if the page is not evictable, it goes
   * directly back onto it's zone's unevictable list, it does NOT use a
   * per cpu pagevec.
   */
  void lru_cache_add_active_or_unevictable(struct page *page,
  					 struct vm_area_struct *vma)
  {
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  
  	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
  		SetPageActive(page);
  		lru_cache_add(page);
  		return;
  	}
  
  	if (!TestSetPageMlocked(page)) {
  		/*
  		 * We use the irq-unsafe __mod_zone_page_stat because this
  		 * counter is not modified from interrupt context, and the pte
  		 * lock is held(spinlock), which implies preemption disabled.
  		 */
  		__mod_zone_page_state(page_zone(page), NR_MLOCK,
  				    hpage_nr_pages(page));
  		count_vm_event(UNEVICTABLE_PGMLOCKED);
  	}
  	add_page_to_unevictable_list(page);
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
469
  /*
315601809   Minchan Kim   mm: deactivate in...
470
471
472
473
474
   * If the page can not be invalidated, it is moved to the
   * inactive list to speed up its reclaim.  It is moved to the
   * head of the list, rather than the tail, to give the flusher
   * threads some time to write it out, as this is much more
   * effective than the single-page writeout from reclaim.
278df9f45   Minchan Kim   mm: reclaim inval...
475
476
477
478
479
480
481
482
483
484
485
486
487
488
   *
   * If the page isn't page_mapped and dirty/writeback, the page
   * could reclaim asap using PG_reclaim.
   *
   * 1. active, mapped page -> none
   * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
   * 3. inactive, mapped page -> none
   * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
   * 5. inactive, clean -> inactive, tail
   * 6. Others -> none
   *
   * In 4, why it moves inactive's head, the VM expects the page would
   * be write it out by flusher threads as this is much more effective
   * than the single-page writeout from reclaim.
315601809   Minchan Kim   mm: deactivate in...
489
   */
cc5993bd7   Minchan Kim   mm: rename deacti...
490
  static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
fa9add641   Hugh Dickins   mm/memcg: apply a...
491
  			      void *arg)
315601809   Minchan Kim   mm: deactivate in...
492
493
  {
  	int lru, file;
278df9f45   Minchan Kim   mm: reclaim inval...
494
  	bool active;
315601809   Minchan Kim   mm: deactivate in...
495

278df9f45   Minchan Kim   mm: reclaim inval...
496
  	if (!PageLRU(page))
315601809   Minchan Kim   mm: deactivate in...
497
  		return;
bad49d9c8   Minchan Kim   mm: check PageUne...
498
499
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
500
501
502
  	/* Some processes are using the page */
  	if (page_mapped(page))
  		return;
278df9f45   Minchan Kim   mm: reclaim inval...
503
  	active = PageActive(page);
315601809   Minchan Kim   mm: deactivate in...
504
505
  	file = page_is_file_cache(page);
  	lru = page_lru_base_type(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
506
507
  
  	del_page_from_lru_list(page, lruvec, lru + active);
315601809   Minchan Kim   mm: deactivate in...
508
509
  	ClearPageActive(page);
  	ClearPageReferenced(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
510
  	add_page_to_lru_list(page, lruvec, lru);
315601809   Minchan Kim   mm: deactivate in...
511

278df9f45   Minchan Kim   mm: reclaim inval...
512
513
514
515
516
517
518
519
520
521
522
523
  	if (PageWriteback(page) || PageDirty(page)) {
  		/*
  		 * PG_reclaim could be raced with end_page_writeback
  		 * It can make readahead confusing.  But race window
  		 * is _really_ small and  it's non-critical problem.
  		 */
  		SetPageReclaim(page);
  	} else {
  		/*
  		 * The page's writeback ends up during pagevec
  		 * We moves tha page into tail of inactive.
  		 */
925b7673c   Johannes Weiner   mm: make per-memc...
524
  		list_move_tail(&page->lru, &lruvec->lists[lru]);
278df9f45   Minchan Kim   mm: reclaim inval...
525
526
527
528
529
  		__count_vm_event(PGROTATED);
  	}
  
  	if (active)
  		__count_vm_event(PGDEACTIVATE);
fa9add641   Hugh Dickins   mm/memcg: apply a...
530
  	update_page_reclaim_stat(lruvec, file, 0);
315601809   Minchan Kim   mm: deactivate in...
531
  }
10853a039   Minchan Kim   mm: move lazily f...
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
  
  static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
  			    void *arg)
  {
  	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
  		int file = page_is_file_cache(page);
  		int lru = page_lru_base_type(page);
  
  		del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
  		ClearPageActive(page);
  		ClearPageReferenced(page);
  		add_page_to_lru_list(page, lruvec, lru);
  
  		__count_vm_event(PGDEACTIVATE);
  		update_page_reclaim_stat(lruvec, file, 0);
  	}
  }
315601809   Minchan Kim   mm: deactivate in...
549
  /*
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
550
551
552
553
   * Drain pages out of the cpu's pagevecs.
   * Either "cpu" is the current CPU, and preemption has already been
   * disabled; or "cpu" is being hot-unplugged, and is already dead.
   */
f0cb3c76a   Konstantin Khlebnikov   mm: drain percpu ...
554
  void lru_add_drain_cpu(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
555
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
556
  	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557

13f7f7898   Mel Gorman   mm: pagevec: defe...
558
  	if (pagevec_count(pvec))
a0b8cab3b   Mel Gorman   mm: remove lru pa...
559
  		__pagevec_lru_add(pvec);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
560
561
562
563
564
565
566
567
568
569
  
  	pvec = &per_cpu(lru_rotate_pvecs, cpu);
  	if (pagevec_count(pvec)) {
  		unsigned long flags;
  
  		/* No harm done if a racing interrupt already did this */
  		local_irq_save(flags);
  		pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
315601809   Minchan Kim   mm: deactivate in...
570

cc5993bd7   Minchan Kim   mm: rename deacti...
571
  	pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
315601809   Minchan Kim   mm: deactivate in...
572
  	if (pagevec_count(pvec))
cc5993bd7   Minchan Kim   mm: rename deacti...
573
  		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
eb709b0d0   Shaohua Li   mm: batch activat...
574

10853a039   Minchan Kim   mm: move lazily f...
575
576
577
  	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
  	if (pagevec_count(pvec))
  		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
eb709b0d0   Shaohua Li   mm: batch activat...
578
  	activate_page_drain(cpu);
315601809   Minchan Kim   mm: deactivate in...
579
580
581
  }
  
  /**
cc5993bd7   Minchan Kim   mm: rename deacti...
582
   * deactivate_file_page - forcefully deactivate a file page
315601809   Minchan Kim   mm: deactivate in...
583
584
585
586
587
588
   * @page: page to deactivate
   *
   * This function hints the VM that @page is a good reclaim candidate,
   * for example if its invalidation fails due to the page being dirty
   * or under writeback.
   */
cc5993bd7   Minchan Kim   mm: rename deacti...
589
  void deactivate_file_page(struct page *page)
315601809   Minchan Kim   mm: deactivate in...
590
  {
821ed6bbe   Minchan Kim   mm: filter unevic...
591
  	/*
cc5993bd7   Minchan Kim   mm: rename deacti...
592
593
  	 * In a workload with many unevictable page such as mprotect,
  	 * unevictable page deactivation for accelerating reclaim is pointless.
821ed6bbe   Minchan Kim   mm: filter unevic...
594
595
596
  	 */
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
597
  	if (likely(get_page_unless_zero(page))) {
cc5993bd7   Minchan Kim   mm: rename deacti...
598
  		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
315601809   Minchan Kim   mm: deactivate in...
599

8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
600
  		if (!pagevec_add(pvec, page) || PageCompound(page))
cc5993bd7   Minchan Kim   mm: rename deacti...
601
602
  			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  		put_cpu_var(lru_deactivate_file_pvecs);
315601809   Minchan Kim   mm: deactivate in...
603
  	}
80bfed904   Andrew Morton   [PATCH] consolida...
604
  }
10853a039   Minchan Kim   mm: move lazily f...
605
606
607
608
609
610
611
612
613
614
615
616
  /**
   * deactivate_page - deactivate a page
   * @page: page to deactivate
   *
   * deactivate_page() moves @page to the inactive list if @page was on the active
   * list and was not an unevictable page.  This is done to accelerate the reclaim
   * of @page.
   */
  void deactivate_page(struct page *page)
  {
  	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
  		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
617
  		get_page(page);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
618
  		if (!pagevec_add(pvec, page) || PageCompound(page))
10853a039   Minchan Kim   mm: move lazily f...
619
620
621
622
  			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  		put_cpu_var(lru_deactivate_pvecs);
  	}
  }
80bfed904   Andrew Morton   [PATCH] consolida...
623
624
  void lru_add_drain(void)
  {
f0cb3c76a   Konstantin Khlebnikov   mm: drain percpu ...
625
  	lru_add_drain_cpu(get_cpu());
80bfed904   Andrew Morton   [PATCH] consolida...
626
  	put_cpu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
627
  }
c4028958b   David Howells   WorkStruct: make ...
628
  static void lru_add_drain_per_cpu(struct work_struct *dummy)
053837fce   Nick Piggin   [PATCH] mm: migra...
629
630
631
  {
  	lru_add_drain();
  }
5fbc46163   Chris Metcalf   mm: make lru_add_...
632
  static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
f3a932baa   Wang Sheng-Hui   mm: introduce ded...
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
  /*
   * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
   * workqueue, aiding in getting memory freed.
   */
  static struct workqueue_struct *lru_add_drain_wq;
  
  static int __init lru_init(void)
  {
  	lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
  
  	if (WARN(!lru_add_drain_wq,
  		"Failed to create workqueue lru_add_drain_wq"))
  		return -ENOMEM;
  
  	return 0;
  }
  early_initcall(lru_init);
5fbc46163   Chris Metcalf   mm: make lru_add_...
650
  void lru_add_drain_all(void)
053837fce   Nick Piggin   [PATCH] mm: migra...
651
  {
5fbc46163   Chris Metcalf   mm: make lru_add_...
652
653
654
655
656
657
658
659
660
661
662
663
664
  	static DEFINE_MUTEX(lock);
  	static struct cpumask has_work;
  	int cpu;
  
  	mutex_lock(&lock);
  	get_online_cpus();
  	cpumask_clear(&has_work);
  
  	for_each_online_cpu(cpu) {
  		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  
  		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
cc5993bd7   Minchan Kim   mm: rename deacti...
665
  		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
10853a039   Minchan Kim   mm: move lazily f...
666
  		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
5fbc46163   Chris Metcalf   mm: make lru_add_...
667
668
  		    need_activate_page_drain(cpu)) {
  			INIT_WORK(work, lru_add_drain_per_cpu);
f3a932baa   Wang Sheng-Hui   mm: introduce ded...
669
  			queue_work_on(cpu, lru_add_drain_wq, work);
5fbc46163   Chris Metcalf   mm: make lru_add_...
670
671
672
673
674
675
676
677
678
  			cpumask_set_cpu(cpu, &has_work);
  		}
  	}
  
  	for_each_cpu(cpu, &has_work)
  		flush_work(&per_cpu(lru_add_drain_work, cpu));
  
  	put_online_cpus();
  	mutex_unlock(&lock);
053837fce   Nick Piggin   [PATCH] mm: migra...
679
  }
aabfb5729   Michal Hocko   mm: memcontrol: d...
680
  /**
ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
681
   * release_pages - batched put_page()
aabfb5729   Michal Hocko   mm: memcontrol: d...
682
683
684
   * @pages: array of pages to release
   * @nr: number of pages
   * @cold: whether the pages are cache cold
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685
   *
aabfb5729   Michal Hocko   mm: memcontrol: d...
686
687
   * Decrement the reference count on all the pages in @pages.  If it
   * fell to zero, remove the page from the LRU and free it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688
   */
b745bc85f   Mel Gorman   mm: page_alloc: c...
689
  void release_pages(struct page **pages, int nr, bool cold)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690
691
  {
  	int i;
cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
692
  	LIST_HEAD(pages_to_free);
599d0c954   Mel Gorman   mm, vmscan: move ...
693
  	struct pglist_data *locked_pgdat = NULL;
fa9add641   Hugh Dickins   mm/memcg: apply a...
694
  	struct lruvec *lruvec;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
695
  	unsigned long uninitialized_var(flags);
aabfb5729   Michal Hocko   mm: memcontrol: d...
696
  	unsigned int uninitialized_var(lock_batch);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
698
699
  	for (i = 0; i < nr; i++) {
  		struct page *page = pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
700

aabfb5729   Michal Hocko   mm: memcontrol: d...
701
702
703
  		/*
  		 * Make sure the IRQ-safe lock-holding time does not get
  		 * excessive with a continuous string of pages from the
599d0c954   Mel Gorman   mm, vmscan: move ...
704
  		 * same pgdat. The lock is held only if pgdat != NULL.
aabfb5729   Michal Hocko   mm: memcontrol: d...
705
  		 */
599d0c954   Mel Gorman   mm, vmscan: move ...
706
707
708
  		if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
  			spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
  			locked_pgdat = NULL;
aabfb5729   Michal Hocko   mm: memcontrol: d...
709
  		}
6fcb52a56   Aaron Lu   thp: reduce usage...
710
  		if (is_huge_zero_page(page))
aa88b68c3   Kirill A. Shutemov   thp: keep huge ze...
711
  			continue;
aa88b68c3   Kirill A. Shutemov   thp: keep huge ze...
712

ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
713
  		page = compound_head(page);
b5810039a   Nick Piggin   [PATCH] core remo...
714
  		if (!put_page_testzero(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
715
  			continue;
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
716
  		if (PageCompound(page)) {
599d0c954   Mel Gorman   mm, vmscan: move ...
717
718
719
  			if (locked_pgdat) {
  				spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
  				locked_pgdat = NULL;
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
720
721
722
723
  			}
  			__put_compound_page(page);
  			continue;
  		}
46453a6e1   Nick Piggin   [PATCH] mm: never...
724
  		if (PageLRU(page)) {
599d0c954   Mel Gorman   mm, vmscan: move ...
725
  			struct pglist_data *pgdat = page_pgdat(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
726

599d0c954   Mel Gorman   mm, vmscan: move ...
727
728
729
  			if (pgdat != locked_pgdat) {
  				if (locked_pgdat)
  					spin_unlock_irqrestore(&locked_pgdat->lru_lock,
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
730
  									flags);
aabfb5729   Michal Hocko   mm: memcontrol: d...
731
  				lock_batch = 0;
599d0c954   Mel Gorman   mm, vmscan: move ...
732
733
  				locked_pgdat = pgdat;
  				spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
46453a6e1   Nick Piggin   [PATCH] mm: never...
734
  			}
fa9add641   Hugh Dickins   mm/memcg: apply a...
735

599d0c954   Mel Gorman   mm, vmscan: move ...
736
  			lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
309381fea   Sasha Levin   mm: dump page whe...
737
  			VM_BUG_ON_PAGE(!PageLRU(page), page);
674539115   Nick Piggin   [PATCH] mm: less ...
738
  			__ClearPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
739
  			del_page_from_lru_list(page, lruvec, page_off_lru(page));
46453a6e1   Nick Piggin   [PATCH] mm: never...
740
  		}
c53954a09   Mel Gorman   mm: remove lru pa...
741
  		/* Clear Active bit in case of parallel mark_page_accessed */
e3741b506   Mel Gorman   mm: do not use at...
742
  		__ClearPageActive(page);
c53954a09   Mel Gorman   mm: remove lru pa...
743

cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
744
  		list_add(&page->lru, &pages_to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
745
  	}
599d0c954   Mel Gorman   mm, vmscan: move ...
746
747
  	if (locked_pgdat)
  		spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748

747db954c   Johannes Weiner   mm: memcontrol: u...
749
  	mem_cgroup_uncharge_list(&pages_to_free);
cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
750
  	free_hot_cold_page_list(&pages_to_free, cold);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
751
  }
0be8557bc   Miklos Szeredi   fuse: use release...
752
  EXPORT_SYMBOL(release_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
  
  /*
   * The pages which we're about to release may be in the deferred lru-addition
   * queues.  That would prevent them from really being freed right now.  That's
   * OK from a correctness point of view but is inefficient - those pages may be
   * cache-warm and we want to give them back to the page allocator ASAP.
   *
   * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
   * and __pagevec_lru_add_active() call release_pages() directly to avoid
   * mutual recursion.
   */
  void __pagevec_release(struct pagevec *pvec)
  {
  	lru_add_drain();
  	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
  	pagevec_reinit(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
770
  EXPORT_SYMBOL(__pagevec_release);
12d271078   Hugh Dickins   memcg: fix split_...
771
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
71e3aac07   Andrea Arcangeli   thp: transparent ...
772
  /* used by __split_huge_page_refcount() */
fa9add641   Hugh Dickins   mm/memcg: apply a...
773
  void lru_add_page_tail(struct page *page, struct page *page_tail,
5bc7b8aca   Shaohua Li   mm: thp: add spli...
774
  		       struct lruvec *lruvec, struct list_head *list)
71e3aac07   Andrea Arcangeli   thp: transparent ...
775
  {
71e3aac07   Andrea Arcangeli   thp: transparent ...
776
  	const int file = 0;
71e3aac07   Andrea Arcangeli   thp: transparent ...
777

309381fea   Sasha Levin   mm: dump page whe...
778
779
780
  	VM_BUG_ON_PAGE(!PageHead(page), page);
  	VM_BUG_ON_PAGE(PageCompound(page_tail), page);
  	VM_BUG_ON_PAGE(PageLRU(page_tail), page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
781
  	VM_BUG_ON(NR_CPUS != 1 &&
599d0c954   Mel Gorman   mm, vmscan: move ...
782
  		  !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock));
71e3aac07   Andrea Arcangeli   thp: transparent ...
783

5bc7b8aca   Shaohua Li   mm: thp: add spli...
784
785
  	if (!list)
  		SetPageLRU(page_tail);
71e3aac07   Andrea Arcangeli   thp: transparent ...
786

12d271078   Hugh Dickins   memcg: fix split_...
787
788
  	if (likely(PageLRU(page)))
  		list_add_tail(&page_tail->lru, &page->lru);
5bc7b8aca   Shaohua Li   mm: thp: add spli...
789
790
791
792
793
  	else if (list) {
  		/* page reclaim is reclaiming a huge page */
  		get_page(page_tail);
  		list_add_tail(&page_tail->lru, list);
  	} else {
12d271078   Hugh Dickins   memcg: fix split_...
794
795
796
797
798
799
800
801
  		struct list_head *list_head;
  		/*
  		 * Head page has not yet been counted, as an hpage,
  		 * so we must account for each subpage individually.
  		 *
  		 * Use the standard add function to put page_tail on the list,
  		 * but then correct its position so they all end up in order.
  		 */
e180cf806   Kirill A. Shutemov   thp, mm: avoid Pa...
802
  		add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
12d271078   Hugh Dickins   memcg: fix split_...
803
804
  		list_head = page_tail->lru.prev;
  		list_move_tail(&page_tail->lru, list_head);
71e3aac07   Andrea Arcangeli   thp: transparent ...
805
  	}
7512102cf   Hugh Dickins   memcg: fix GPF wh...
806
807
  
  	if (!PageUnevictable(page))
e180cf806   Kirill A. Shutemov   thp, mm: avoid Pa...
808
  		update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
71e3aac07   Andrea Arcangeli   thp: transparent ...
809
  }
12d271078   Hugh Dickins   memcg: fix split_...
810
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
71e3aac07   Andrea Arcangeli   thp: transparent ...
811

fa9add641   Hugh Dickins   mm/memcg: apply a...
812
813
  static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
  				 void *arg)
3dd7ae8ec   Shaohua Li   mm: simplify code...
814
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
815
816
817
  	int file = page_is_file_cache(page);
  	int active = PageActive(page);
  	enum lru_list lru = page_lru(page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
818

309381fea   Sasha Levin   mm: dump page whe...
819
  	VM_BUG_ON_PAGE(PageLRU(page), page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
820
821
  
  	SetPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
822
823
  	add_page_to_lru_list(page, lruvec, lru);
  	update_page_reclaim_stat(lruvec, file, active);
24b7e5819   Mel Gorman   mm: pagemap: avoi...
824
  	trace_mm_lru_insertion(page, lru);
3dd7ae8ec   Shaohua Li   mm: simplify code...
825
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
826
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
827
828
829
   * Add the passed pages to the LRU, then drop the caller's refcount
   * on them.  Reinitialises the caller's pagevec.
   */
a0b8cab3b   Mel Gorman   mm: remove lru pa...
830
  void __pagevec_lru_add(struct pagevec *pvec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
831
  {
a0b8cab3b   Mel Gorman   mm: remove lru pa...
832
  	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833
  }
5095ae837   Hugh Dickins   mm: fewer undersc...
834
  EXPORT_SYMBOL(__pagevec_lru_add);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
836
  /**
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
   * pagevec_lookup_entries - gang pagecache lookup
   * @pvec:	Where the resulting entries are placed
   * @mapping:	The address_space to search
   * @start:	The starting entry index
   * @nr_entries:	The maximum number of entries
   * @indices:	The cache indices corresponding to the entries in @pvec
   *
   * pagevec_lookup_entries() will search for and return a group of up
   * to @nr_entries pages and shadow entries in the mapping.  All
   * entries are placed in @pvec.  pagevec_lookup_entries() takes a
   * reference against actual pages in @pvec.
   *
   * The search returns a group of mapping-contiguous entries with
   * ascending indexes.  There may be holes in the indices due to
   * not-present entries.
   *
   * pagevec_lookup_entries() returns the number of entries which were
   * found.
   */
  unsigned pagevec_lookup_entries(struct pagevec *pvec,
  				struct address_space *mapping,
  				pgoff_t start, unsigned nr_pages,
  				pgoff_t *indices)
  {
  	pvec->nr = find_get_entries(mapping, start, nr_pages,
  				    pvec->pages, indices);
  	return pagevec_count(pvec);
  }
  
  /**
   * pagevec_remove_exceptionals - pagevec exceptionals pruning
   * @pvec:	The pagevec to prune
   *
   * pagevec_lookup_entries() fills both pages and exceptional radix
   * tree entries into the pagevec.  This function prunes all
   * exceptionals from @pvec without leaving holes, so that it can be
   * passed on to page-only pagevec operations.
   */
  void pagevec_remove_exceptionals(struct pagevec *pvec)
  {
  	int i, j;
  
  	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  		if (!radix_tree_exceptional_entry(page))
  			pvec->pages[j++] = page;
  	}
  	pvec->nr = j;
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
   * pagevec_lookup - gang pagecache lookup
   * @pvec:	Where the resulting pages are placed
   * @mapping:	The address_space to search
   * @start:	The starting page index
   * @nr_pages:	The maximum number of pages
   *
   * pagevec_lookup() will search for and return a group of up to @nr_pages pages
   * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
   * reference against the pages in @pvec.
   *
   * The search returns a group of mapping-contiguous pages with ascending
   * indexes.  There may be holes in the indices due to not-present pages.
   *
   * pagevec_lookup() returns the number of pages which were found.
   */
  unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t start, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
78539fdfa   Christoph Hellwig   [XFS] Export page...
909
  EXPORT_SYMBOL(pagevec_lookup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
910
911
912
913
914
915
916
  unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t *index, int tag, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages_tag(mapping, index, tag,
  					nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
917
  EXPORT_SYMBOL(pagevec_lookup_tag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
918

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
919
920
921
922
923
  /*
   * Perform any setup for the swap system
   */
  void __init swap_setup(void)
  {
4481374ce   Jan Beulich   mm: replace vario...
924
  	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
925
  #ifdef CONFIG_SWAP
33806f06d   Shaohua Li   swap: make each s...
926
  	int i;
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
927
  	for (i = 0; i < MAX_SWAPFILES; i++)
33806f06d   Shaohua Li   swap: make each s...
928
  		spin_lock_init(&swapper_spaces[i].tree_lock);
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
929
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
930
931
932
933
934
935
936
937
938
  	/* Use a smaller cluster for small-memory machines */
  	if (megs < 16)
  		page_cluster = 2;
  	else
  		page_cluster = 3;
  	/*
  	 * Right now other parts of the system means that we
  	 * _really_ don't want to cluster much more
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
939
  }