Blame view

mm/swap.c 29.9 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
  /*
   *  linux/mm/swap.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   */
  
  /*
183ff22bb   Simon Arlott   spelling fixes: mm/
9
   * This file contains the default values for the operation of the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
   * Linux VM subsystem. Fine-tuning documentation can be found in
570432470   Mauro Carvalho Chehab   docs: admin-guide...
11
   * Documentation/admin-guide/sysctl/vm.rst.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
16
17
18
19
20
21
22
23
24
   * Started 18.12.91
   * Swap aging added 23.2.95, Stephen Tweedie.
   * Buffermem limits added 12.3.98, Rik van Riel.
   */
  
  #include <linux/mm.h>
  #include <linux/sched.h>
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #include <linux/mman.h>
  #include <linux/pagemap.h>
  #include <linux/pagevec.h>
  #include <linux/init.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
25
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
  #include <linux/mm_inline.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
  #include <linux/percpu_counter.h>
3565fce3a   Dan Williams   mm, x86: get_user...
28
  #include <linux/memremap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
  #include <linux/percpu.h>
  #include <linux/cpu.h>
  #include <linux/notifier.h>
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
32
  #include <linux/backing-dev.h>
66e1707bc   Balbir Singh   Memory controller...
33
  #include <linux/memcontrol.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
34
  #include <linux/gfp.h>
a27bb332c   Kent Overstreet   aio: don't includ...
35
  #include <linux/uio.h>
822fc6136   Naoya Horiguchi   mm: don't call __...
36
  #include <linux/hugetlb.h>
33c3fc71c   Vladimir Davydov   mm: introduce idl...
37
  #include <linux/page_idle.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38

64d6519dd   Lee Schermerhorn   swap: cull unevic...
39
  #include "internal.h"
c6286c983   Mel Gorman   mm: add tracepoin...
40
41
  #define CREATE_TRACE_POINTS
  #include <trace/events/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
  /* How many pages do we try to swap or page in/out together? */
  int page_cluster;
13f7f7898   Mel Gorman   mm: pagevec: defe...
44
  static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
f84f9504b   Vegard Nossum   mm: remove initia...
45
  static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
cc5993bd7   Minchan Kim   mm: rename deacti...
46
  static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
9c276cc65   Minchan Kim   mm: introduce MAD...
47
  static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
48
  static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
a4a921aa5   Ming Li   mm/swap.c: put ac...
49
50
51
  #ifdef CONFIG_SMP
  static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  #endif
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
52

b221385bc   Adrian Bunk   [PATCH] mm/: make...
53
54
55
56
  /*
   * This path almost never happens for VM activity - pages are normally
   * freed via pagevecs.  But it gets used by networking.
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
57
  static void __page_cache_release(struct page *page)
b221385bc   Adrian Bunk   [PATCH] mm/: make...
58
59
  {
  	if (PageLRU(page)) {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
60
  		pg_data_t *pgdat = page_pgdat(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
61
62
  		struct lruvec *lruvec;
  		unsigned long flags;
b221385bc   Adrian Bunk   [PATCH] mm/: make...
63

f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
64
65
  		spin_lock_irqsave(&pgdat->lru_lock, flags);
  		lruvec = mem_cgroup_page_lruvec(page, pgdat);
309381fea   Sasha Levin   mm: dump page whe...
66
  		VM_BUG_ON_PAGE(!PageLRU(page), page);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
67
  		__ClearPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
68
  		del_page_from_lru_list(page, lruvec, page_off_lru(page));
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
69
  		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
70
  	}
629060270   Nicholas Piggin   mm: add PageWaite...
71
  	__ClearPageWaiters(page);
918070634   Andrea Arcangeli   thp: alter compou...
72
73
74
75
76
  }
  
  static void __put_single_page(struct page *page)
  {
  	__page_cache_release(page);
7ae88534c   Yang Shi   mm: move mem_cgro...
77
  	mem_cgroup_uncharge(page);
2d4894b5d   Mel Gorman   mm: remove cold p...
78
  	free_unref_page(page);
b221385bc   Adrian Bunk   [PATCH] mm/: make...
79
  }
918070634   Andrea Arcangeli   thp: alter compou...
80
  static void __put_compound_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
81
  {
918070634   Andrea Arcangeli   thp: alter compou...
82
  	compound_page_dtor *dtor;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83

822fc6136   Naoya Horiguchi   mm: don't call __...
84
85
86
87
88
89
90
91
  	/*
  	 * __page_cache_release() is supposed to be called for thp, not for
  	 * hugetlb. This is because hugetlb page does never have PageLRU set
  	 * (it's never listed to any LRU lists) and no memcg routines should
  	 * be called for hugetlb (it has a separate hugetlb_cgroup.)
  	 */
  	if (!PageHuge(page))
  		__page_cache_release(page);
918070634   Andrea Arcangeli   thp: alter compou...
92
93
94
  	dtor = get_compound_page_dtor(page);
  	(*dtor)(page);
  }
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
95
  void __put_page(struct page *page)
8519fb30e   Nick Piggin   [PATCH] mm: compo...
96
  {
713897038   Dan Williams   mm, zone_device: ...
97
98
99
100
101
102
103
104
105
  	if (is_zone_device_page(page)) {
  		put_dev_pagemap(page->pgmap);
  
  		/*
  		 * The page belongs to the device that created pgmap. Do
  		 * not return it to page allocator.
  		 */
  		return;
  	}
8519fb30e   Nick Piggin   [PATCH] mm: compo...
106
  	if (unlikely(PageCompound(page)))
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
107
108
  		__put_compound_page(page);
  	else
918070634   Andrea Arcangeli   thp: alter compou...
109
  		__put_single_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
  }
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
111
  EXPORT_SYMBOL(__put_page);
70b50f94f   Andrea Arcangeli   mm: thp: tail pag...
112

1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
113
  /**
7682486b3   Randy Dunlap   mm: fix various k...
114
115
   * put_pages_list() - release a list of pages
   * @pages: list of pages threaded on page->lru
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
116
117
118
   *
   * Release a list of pages which are strung together on page.lru.  Currently
   * used by read_cache_pages() and related error recovery code.
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
119
120
121
122
123
   */
  void put_pages_list(struct list_head *pages)
  {
  	while (!list_empty(pages)) {
  		struct page *victim;
f86196ea8   Nikolay Borisov   fs: don't open co...
124
  		victim = lru_to_page(pages);
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
125
  		list_del(&victim->lru);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
126
  		put_page(victim);
1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
127
128
129
  	}
  }
  EXPORT_SYMBOL(put_pages_list);
18022c5d8   Mel Gorman   mm: add get_kerne...
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
  /*
   * get_kernel_pages() - pin kernel pages in memory
   * @kiov:	An array of struct kvec structures
   * @nr_segs:	number of segments to pin
   * @write:	pinning for read/write, currently ignored
   * @pages:	array that receives pointers to the pages pinned.
   *		Should be at least nr_segs long.
   *
   * Returns number of pages pinned. This may be fewer than the number
   * requested. If nr_pages is 0 or negative, returns 0. If no pages
   * were pinned, returns -errno. Each page returned must be released
   * with a put_page() call when it is finished with.
   */
  int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
  		struct page **pages)
  {
  	int seg;
  
  	for (seg = 0; seg < nr_segs; seg++) {
  		if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
  			return seg;
5a178119b   Mel Gorman   mm: add support f...
151
  		pages[seg] = kmap_to_page(kiov[seg].iov_base);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
152
  		get_page(pages[seg]);
18022c5d8   Mel Gorman   mm: add get_kerne...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  	}
  
  	return seg;
  }
  EXPORT_SYMBOL_GPL(get_kernel_pages);
  
  /*
   * get_kernel_page() - pin a kernel page in memory
   * @start:	starting kernel address
   * @write:	pinning for read/write, currently ignored
   * @pages:	array that receives pointer to the page pinned.
   *		Must be at least nr_segs long.
   *
   * Returns 1 if page is pinned. If the page was not pinned, returns
   * -errno. The page returned must be released with a put_page() call
   * when it is finished with.
   */
  int get_kernel_page(unsigned long start, int write, struct page **pages)
  {
  	const struct kvec kiov = {
  		.iov_base = (void *)start,
  		.iov_len = PAGE_SIZE
  	};
  
  	return get_kernel_pages(&kiov, 1, write, pages);
  }
  EXPORT_SYMBOL_GPL(get_kernel_page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
180
  static void pagevec_lru_move_fn(struct pagevec *pvec,
fa9add641   Hugh Dickins   mm/memcg: apply a...
181
182
  	void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
  	void *arg)
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
183
184
  {
  	int i;
68eb0731c   Mel Gorman   mm, pagevec: rele...
185
  	struct pglist_data *pgdat = NULL;
fa9add641   Hugh Dickins   mm/memcg: apply a...
186
  	struct lruvec *lruvec;
3dd7ae8ec   Shaohua Li   mm: simplify code...
187
  	unsigned long flags = 0;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
188
189
190
  
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
68eb0731c   Mel Gorman   mm, pagevec: rele...
191
  		struct pglist_data *pagepgdat = page_pgdat(page);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
192

68eb0731c   Mel Gorman   mm, pagevec: rele...
193
194
195
196
197
  		if (pagepgdat != pgdat) {
  			if (pgdat)
  				spin_unlock_irqrestore(&pgdat->lru_lock, flags);
  			pgdat = pagepgdat;
  			spin_lock_irqsave(&pgdat->lru_lock, flags);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
198
  		}
3dd7ae8ec   Shaohua Li   mm: simplify code...
199

68eb0731c   Mel Gorman   mm, pagevec: rele...
200
  		lruvec = mem_cgroup_page_lruvec(page, pgdat);
fa9add641   Hugh Dickins   mm/memcg: apply a...
201
  		(*move_fn)(page, lruvec, arg);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
202
  	}
68eb0731c   Mel Gorman   mm, pagevec: rele...
203
204
  	if (pgdat)
  		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
c6f92f9fb   Mel Gorman   mm: remove cold p...
205
  	release_pages(pvec->pages, pvec->nr);
83896fb5e   Linus Torvalds   Revert "mm: simpl...
206
  	pagevec_reinit(pvec);
d8505dee1   Shaohua Li   mm: simplify code...
207
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
208
209
  static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
  				 void *arg)
3dd7ae8ec   Shaohua Li   mm: simplify code...
210
211
  {
  	int *pgmoved = arg;
3dd7ae8ec   Shaohua Li   mm: simplify code...
212

c55e8d035   Johannes Weiner   mm: vmscan: move ...
213
214
215
216
  	if (PageLRU(page) && !PageUnevictable(page)) {
  		del_page_from_lru_list(page, lruvec, page_lru(page));
  		ClearPageActive(page);
  		add_page_to_lru_list_tail(page, lruvec, page_lru(page));
3dd7ae8ec   Shaohua Li   mm: simplify code...
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
  		(*pgmoved)++;
  	}
  }
  
  /*
   * pagevec_move_tail() must be called with IRQ disabled.
   * Otherwise this may cause nasty races.
   */
  static void pagevec_move_tail(struct pagevec *pvec)
  {
  	int pgmoved = 0;
  
  	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
  	__count_vm_events(PGROTATED, pgmoved);
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
232
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
   * Writeback is about to end against a page which has been marked for immediate
   * reclaim.  If it still appears to be reclaimable, move it to the tail of the
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
235
   * inactive list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
   */
3dd7ae8ec   Shaohua Li   mm: simplify code...
237
  void rotate_reclaimable_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
  {
c55e8d035   Johannes Weiner   mm: vmscan: move ...
239
  	if (!PageLocked(page) && !PageDirty(page) &&
894bc3104   Lee Schermerhorn   Unevictable LRU I...
240
  	    !PageUnevictable(page) && PageLRU(page)) {
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
241
242
  		struct pagevec *pvec;
  		unsigned long flags;
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
243
  		get_page(page);
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
244
  		local_irq_save(flags);
7c8e0181e   Christoph Lameter   mm: replace __get...
245
  		pvec = this_cpu_ptr(&lru_rotate_pvecs);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
246
  		if (!pagevec_add(pvec, page) || PageCompound(page))
ac6aadb24   Miklos Szeredi   mm: rotate_reclai...
247
248
249
  			pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
251
  static void update_page_reclaim_stat(struct lruvec *lruvec,
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
252
253
  				     int file, int rotated)
  {
fa9add641   Hugh Dickins   mm/memcg: apply a...
254
  	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
255
256
257
258
  
  	reclaim_stat->recent_scanned[file]++;
  	if (rotated)
  		reclaim_stat->recent_rotated[file]++;
3e2f41f1f   KOSAKI Motohiro   memcg: add zone_r...
259
  }
fa9add641   Hugh Dickins   mm/memcg: apply a...
260
261
  static void __activate_page(struct page *page, struct lruvec *lruvec,
  			    void *arg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
  {
744ed1442   Shaohua Li   mm: batch activat...
263
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
7a608572a   Linus Torvalds   Revert "mm: batch...
264
265
  		int file = page_is_file_cache(page);
  		int lru = page_lru_base_type(page);
744ed1442   Shaohua Li   mm: batch activat...
266

fa9add641   Hugh Dickins   mm/memcg: apply a...
267
  		del_page_from_lru_list(page, lruvec, lru);
7a608572a   Linus Torvalds   Revert "mm: batch...
268
269
  		SetPageActive(page);
  		lru += LRU_ACTIVE;
fa9add641   Hugh Dickins   mm/memcg: apply a...
270
  		add_page_to_lru_list(page, lruvec, lru);
24b7e5819   Mel Gorman   mm: pagemap: avoi...
271
  		trace_mm_lru_activate(page);
4f98a2fee   Rik van Riel   vmscan: split LRU...
272

fa9add641   Hugh Dickins   mm/memcg: apply a...
273
274
  		__count_vm_event(PGACTIVATE);
  		update_page_reclaim_stat(lruvec, file, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
275
  	}
eb709b0d0   Shaohua Li   mm: batch activat...
276
277
278
  }
  
  #ifdef CONFIG_SMP
eb709b0d0   Shaohua Li   mm: batch activat...
279
280
281
282
283
284
285
  static void activate_page_drain(int cpu)
  {
  	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
  
  	if (pagevec_count(pvec))
  		pagevec_lru_move_fn(pvec, __activate_page, NULL);
  }
5fbc46163   Chris Metcalf   mm: make lru_add_...
286
287
288
289
  static bool need_activate_page_drain(int cpu)
  {
  	return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
  }
eb709b0d0   Shaohua Li   mm: batch activat...
290
291
  void activate_page(struct page *page)
  {
800d8c63b   Kirill A. Shutemov   shmem: add huge p...
292
  	page = compound_head(page);
eb709b0d0   Shaohua Li   mm: batch activat...
293
294
  	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
295
  		get_page(page);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
296
  		if (!pagevec_add(pvec, page) || PageCompound(page))
eb709b0d0   Shaohua Li   mm: batch activat...
297
298
299
300
301
302
303
304
305
306
307
308
  			pagevec_lru_move_fn(pvec, __activate_page, NULL);
  		put_cpu_var(activate_page_pvecs);
  	}
  }
  
  #else
  static inline void activate_page_drain(int cpu)
  {
  }
  
  void activate_page(struct page *page)
  {
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
309
  	pg_data_t *pgdat = page_pgdat(page);
eb709b0d0   Shaohua Li   mm: batch activat...
310

800d8c63b   Kirill A. Shutemov   shmem: add huge p...
311
  	page = compound_head(page);
f4b7e272b   Andrey Ryabinin   mm: remove zone_l...
312
313
314
  	spin_lock_irq(&pgdat->lru_lock);
  	__activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL);
  	spin_unlock_irq(&pgdat->lru_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
315
  }
eb709b0d0   Shaohua Li   mm: batch activat...
316
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317

059285a25   Mel Gorman   mm: activate !Pag...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
  static void __lru_cache_activate_page(struct page *page)
  {
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  	int i;
  
  	/*
  	 * Search backwards on the optimistic assumption that the page being
  	 * activated has just been added to this pagevec. Note that only
  	 * the local pagevec is examined as a !PageLRU page could be in the
  	 * process of being released, reclaimed, migrated or on a remote
  	 * pagevec that is currently being drained. Furthermore, marking
  	 * a remote pagevec's page PageActive potentially hits a race where
  	 * a page is marked PageActive just after it is added to the inactive
  	 * list causing accounting errors and BUG_ON checks to trigger.
  	 */
  	for (i = pagevec_count(pvec) - 1; i >= 0; i--) {
  		struct page *pagevec_page = pvec->pages[i];
  
  		if (pagevec_page == page) {
  			SetPageActive(page);
  			break;
  		}
  	}
  
  	put_cpu_var(lru_add_pvec);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
345
346
347
348
349
  /*
   * Mark a page as having seen activity.
   *
   * inactive,unreferenced	->	inactive,referenced
   * inactive,referenced		->	active,unreferenced
   * active,unreferenced		->	active,referenced
eb39d618f   Hugh Dickins   mm: replace init_...
350
351
352
   *
   * When a newly allocated page is not yet visible, so safe for non-atomic ops,
   * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
   */
920c7a5d0   Harvey Harrison   mm: remove fastca...
354
  void mark_page_accessed(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355
  {
e90309c9f   Kirill A. Shutemov   thp: allow mlocke...
356
  	page = compound_head(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
357
  	if (!PageActive(page) && !PageUnevictable(page) &&
059285a25   Mel Gorman   mm: activate !Pag...
358
359
360
361
362
363
364
365
366
367
368
369
  			PageReferenced(page)) {
  
  		/*
  		 * If the page is on the LRU, queue it for activation via
  		 * activate_page_pvecs. Otherwise, assume the page is on a
  		 * pagevec, mark it active and it'll be moved to the active
  		 * LRU on the next drain.
  		 */
  		if (PageLRU(page))
  			activate_page(page);
  		else
  			__lru_cache_activate_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
  		ClearPageReferenced(page);
a528910e1   Johannes Weiner   mm: thrash detect...
371
372
  		if (page_is_file_cache(page))
  			workingset_activation(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
373
374
375
  	} else if (!PageReferenced(page)) {
  		SetPageReferenced(page);
  	}
33c3fc71c   Vladimir Davydov   mm: introduce idl...
376
377
  	if (page_is_idle(page))
  		clear_page_idle(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379
  EXPORT_SYMBOL(mark_page_accessed);
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
380
  static void __lru_cache_add(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
381
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
382
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
383
  	get_page(page);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
384
  	if (!pagevec_add(pvec, page) || PageCompound(page))
a0b8cab3b   Mel Gorman   mm: remove lru pa...
385
  		__pagevec_lru_add(pvec);
13f7f7898   Mel Gorman   mm: pagevec: defe...
386
  	put_cpu_var(lru_add_pvec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
387
  }
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
388
389
  
  /**
e02a9f048   Randy Dunlap   mm/swap.c: make f...
390
   * lru_cache_add_anon - add a page to the page lists
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
391
392
393
394
   * @page: the page to add
   */
  void lru_cache_add_anon(struct page *page)
  {
6fb81a17d   Mel Gorman   mm: do not use un...
395
396
  	if (PageActive(page))
  		ClearPageActive(page);
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
397
398
399
400
401
  	__lru_cache_add(page);
  }
  
  void lru_cache_add_file(struct page *page)
  {
6fb81a17d   Mel Gorman   mm: do not use un...
402
403
  	if (PageActive(page))
  		ClearPageActive(page);
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
404
405
406
  	__lru_cache_add(page);
  }
  EXPORT_SYMBOL(lru_cache_add_file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407

f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
408
  /**
c53954a09   Mel Gorman   mm: remove lru pa...
409
   * lru_cache_add - add a page to a page list
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
410
   * @page: the page to be added to the LRU.
2329d3751   Jianyu Zhan   mm/swap.c: clean ...
411
412
413
414
415
   *
   * Queue the page for addition to the LRU via pagevec. The decision on whether
   * to add the page to the [in]active [file|anon] list is deferred until the
   * pagevec is drained. This gives a chance for the caller of lru_cache_add()
   * have the page added to the active list using mark_page_accessed().
f04e9ebbe   KOSAKI Motohiro   swap: use an arra...
416
   */
c53954a09   Mel Gorman   mm: remove lru pa...
417
  void lru_cache_add(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
  {
309381fea   Sasha Levin   mm: dump page whe...
419
420
  	VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
  	VM_BUG_ON_PAGE(PageLRU(page), page);
c53954a09   Mel Gorman   mm: remove lru pa...
421
  	__lru_cache_add(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422
  }
894bc3104   Lee Schermerhorn   Unevictable LRU I...
423
  /**
00501b531   Johannes Weiner   mm: memcontrol: r...
424
425
426
427
428
429
430
431
432
433
434
435
436
   * lru_cache_add_active_or_unevictable
   * @page:  the page to be added to LRU
   * @vma:   vma in which page is mapped for determining reclaimability
   *
   * Place @page on the active or unevictable LRU list, depending on its
   * evictability.  Note that if the page is not evictable, it goes
   * directly back onto it's zone's unevictable list, it does NOT use a
   * per cpu pagevec.
   */
  void lru_cache_add_active_or_unevictable(struct page *page,
  					 struct vm_area_struct *vma)
  {
  	VM_BUG_ON_PAGE(PageLRU(page), page);
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
437
  	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
00501b531   Johannes Weiner   mm: memcontrol: r...
438
  		SetPageActive(page);
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
439
  	else if (!TestSetPageMlocked(page)) {
00501b531   Johannes Weiner   mm: memcontrol: r...
440
441
442
443
444
445
446
447
448
  		/*
  		 * We use the irq-unsafe __mod_zone_page_stat because this
  		 * counter is not modified from interrupt context, and the pte
  		 * lock is held(spinlock), which implies preemption disabled.
  		 */
  		__mod_zone_page_state(page_zone(page), NR_MLOCK,
  				    hpage_nr_pages(page));
  		count_vm_event(UNEVICTABLE_PGMLOCKED);
  	}
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
449
  	lru_cache_add(page);
00501b531   Johannes Weiner   mm: memcontrol: r...
450
  }
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
451
  /*
315601809   Minchan Kim   mm: deactivate in...
452
453
454
455
456
   * If the page can not be invalidated, it is moved to the
   * inactive list to speed up its reclaim.  It is moved to the
   * head of the list, rather than the tail, to give the flusher
   * threads some time to write it out, as this is much more
   * effective than the single-page writeout from reclaim.
278df9f45   Minchan Kim   mm: reclaim inval...
457
458
459
460
461
462
463
464
465
466
467
468
469
470
   *
   * If the page isn't page_mapped and dirty/writeback, the page
   * could reclaim asap using PG_reclaim.
   *
   * 1. active, mapped page -> none
   * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
   * 3. inactive, mapped page -> none
   * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
   * 5. inactive, clean -> inactive, tail
   * 6. Others -> none
   *
   * In 4, why it moves inactive's head, the VM expects the page would
   * be write it out by flusher threads as this is much more effective
   * than the single-page writeout from reclaim.
315601809   Minchan Kim   mm: deactivate in...
471
   */
cc5993bd7   Minchan Kim   mm: rename deacti...
472
  static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
fa9add641   Hugh Dickins   mm/memcg: apply a...
473
  			      void *arg)
315601809   Minchan Kim   mm: deactivate in...
474
475
  {
  	int lru, file;
278df9f45   Minchan Kim   mm: reclaim inval...
476
  	bool active;
315601809   Minchan Kim   mm: deactivate in...
477

278df9f45   Minchan Kim   mm: reclaim inval...
478
  	if (!PageLRU(page))
315601809   Minchan Kim   mm: deactivate in...
479
  		return;
bad49d9c8   Minchan Kim   mm: check PageUne...
480
481
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
482
483
484
  	/* Some processes are using the page */
  	if (page_mapped(page))
  		return;
278df9f45   Minchan Kim   mm: reclaim inval...
485
  	active = PageActive(page);
315601809   Minchan Kim   mm: deactivate in...
486
487
  	file = page_is_file_cache(page);
  	lru = page_lru_base_type(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
488
489
  
  	del_page_from_lru_list(page, lruvec, lru + active);
315601809   Minchan Kim   mm: deactivate in...
490
491
  	ClearPageActive(page);
  	ClearPageReferenced(page);
315601809   Minchan Kim   mm: deactivate in...
492

278df9f45   Minchan Kim   mm: reclaim inval...
493
494
495
496
497
498
  	if (PageWriteback(page) || PageDirty(page)) {
  		/*
  		 * PG_reclaim could be raced with end_page_writeback
  		 * It can make readahead confusing.  But race window
  		 * is _really_ small and  it's non-critical problem.
  		 */
e7a1aaf28   Yu Zhao   mm: replace list_...
499
  		add_page_to_lru_list(page, lruvec, lru);
278df9f45   Minchan Kim   mm: reclaim inval...
500
501
502
503
504
505
  		SetPageReclaim(page);
  	} else {
  		/*
  		 * The page's writeback ends up during pagevec
  		 * We moves tha page into tail of inactive.
  		 */
e7a1aaf28   Yu Zhao   mm: replace list_...
506
  		add_page_to_lru_list_tail(page, lruvec, lru);
278df9f45   Minchan Kim   mm: reclaim inval...
507
508
509
510
511
  		__count_vm_event(PGROTATED);
  	}
  
  	if (active)
  		__count_vm_event(PGDEACTIVATE);
fa9add641   Hugh Dickins   mm/memcg: apply a...
512
  	update_page_reclaim_stat(lruvec, file, 0);
315601809   Minchan Kim   mm: deactivate in...
513
  }
9c276cc65   Minchan Kim   mm: introduce MAD...
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
  static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
  			    void *arg)
  {
  	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
  		int file = page_is_file_cache(page);
  		int lru = page_lru_base_type(page);
  
  		del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
  		ClearPageActive(page);
  		ClearPageReferenced(page);
  		add_page_to_lru_list(page, lruvec, lru);
  
  		__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
  		update_page_reclaim_stat(lruvec, file, 0);
  	}
  }
10853a039   Minchan Kim   mm: move lazily f...
530

f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
531
  static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
10853a039   Minchan Kim   mm: move lazily f...
532
533
  			    void *arg)
  {
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
534
  	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
24c92eb7d   Shaohua Li   mm: avoid marking...
535
  	    !PageSwapCache(page) && !PageUnevictable(page)) {
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
536
  		bool active = PageActive(page);
10853a039   Minchan Kim   mm: move lazily f...
537

f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
538
539
  		del_page_from_lru_list(page, lruvec,
  				       LRU_INACTIVE_ANON + active);
10853a039   Minchan Kim   mm: move lazily f...
540
541
  		ClearPageActive(page);
  		ClearPageReferenced(page);
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
542
543
544
545
546
547
548
  		/*
  		 * lazyfree pages are clean anonymous pages. They have
  		 * SwapBacked flag cleared to distinguish normal anonymous
  		 * pages
  		 */
  		ClearPageSwapBacked(page);
  		add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE);
10853a039   Minchan Kim   mm: move lazily f...
549

f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
550
  		__count_vm_events(PGLAZYFREE, hpage_nr_pages(page));
2262185c5   Roman Gushchin   mm: per-cgroup me...
551
  		count_memcg_page_event(page, PGLAZYFREE);
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
552
  		update_page_reclaim_stat(lruvec, 1, 0);
10853a039   Minchan Kim   mm: move lazily f...
553
554
  	}
  }
315601809   Minchan Kim   mm: deactivate in...
555
  /*
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
556
557
558
559
   * Drain pages out of the cpu's pagevecs.
   * Either "cpu" is the current CPU, and preemption has already been
   * disabled; or "cpu" is being hot-unplugged, and is already dead.
   */
f0cb3c76a   Konstantin Khlebnikov   mm: drain percpu ...
560
  void lru_add_drain_cpu(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
  {
13f7f7898   Mel Gorman   mm: pagevec: defe...
562
  	struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563

13f7f7898   Mel Gorman   mm: pagevec: defe...
564
  	if (pagevec_count(pvec))
a0b8cab3b   Mel Gorman   mm: remove lru pa...
565
  		__pagevec_lru_add(pvec);
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
566
567
568
569
570
571
572
573
574
575
  
  	pvec = &per_cpu(lru_rotate_pvecs, cpu);
  	if (pagevec_count(pvec)) {
  		unsigned long flags;
  
  		/* No harm done if a racing interrupt already did this */
  		local_irq_save(flags);
  		pagevec_move_tail(pvec);
  		local_irq_restore(flags);
  	}
315601809   Minchan Kim   mm: deactivate in...
576

cc5993bd7   Minchan Kim   mm: rename deacti...
577
  	pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
315601809   Minchan Kim   mm: deactivate in...
578
  	if (pagevec_count(pvec))
cc5993bd7   Minchan Kim   mm: rename deacti...
579
  		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
eb709b0d0   Shaohua Li   mm: batch activat...
580

9c276cc65   Minchan Kim   mm: introduce MAD...
581
582
583
  	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
  	if (pagevec_count(pvec))
  		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
584
  	pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
10853a039   Minchan Kim   mm: move lazily f...
585
  	if (pagevec_count(pvec))
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
586
  		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
10853a039   Minchan Kim   mm: move lazily f...
587

eb709b0d0   Shaohua Li   mm: batch activat...
588
  	activate_page_drain(cpu);
315601809   Minchan Kim   mm: deactivate in...
589
590
591
  }
  
  /**
cc5993bd7   Minchan Kim   mm: rename deacti...
592
   * deactivate_file_page - forcefully deactivate a file page
315601809   Minchan Kim   mm: deactivate in...
593
594
595
596
597
598
   * @page: page to deactivate
   *
   * This function hints the VM that @page is a good reclaim candidate,
   * for example if its invalidation fails due to the page being dirty
   * or under writeback.
   */
cc5993bd7   Minchan Kim   mm: rename deacti...
599
  void deactivate_file_page(struct page *page)
315601809   Minchan Kim   mm: deactivate in...
600
  {
821ed6bbe   Minchan Kim   mm: filter unevic...
601
  	/*
cc5993bd7   Minchan Kim   mm: rename deacti...
602
603
  	 * In a workload with many unevictable page such as mprotect,
  	 * unevictable page deactivation for accelerating reclaim is pointless.
821ed6bbe   Minchan Kim   mm: filter unevic...
604
605
606
  	 */
  	if (PageUnevictable(page))
  		return;
315601809   Minchan Kim   mm: deactivate in...
607
  	if (likely(get_page_unless_zero(page))) {
cc5993bd7   Minchan Kim   mm: rename deacti...
608
  		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
315601809   Minchan Kim   mm: deactivate in...
609

8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
610
  		if (!pagevec_add(pvec, page) || PageCompound(page))
cc5993bd7   Minchan Kim   mm: rename deacti...
611
612
  			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  		put_cpu_var(lru_deactivate_file_pvecs);
315601809   Minchan Kim   mm: deactivate in...
613
  	}
80bfed904   Andrew Morton   [PATCH] consolida...
614
  }
9c276cc65   Minchan Kim   mm: introduce MAD...
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
  /*
   * deactivate_page - deactivate a page
   * @page: page to deactivate
   *
   * deactivate_page() moves @page to the inactive list if @page was on the active
   * list and was not an unevictable page.  This is done to accelerate the reclaim
   * of @page.
   */
  void deactivate_page(struct page *page)
  {
  	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
  		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  
  		get_page(page);
  		if (!pagevec_add(pvec, page) || PageCompound(page))
  			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  		put_cpu_var(lru_deactivate_pvecs);
  	}
  }
10853a039   Minchan Kim   mm: move lazily f...
634
  /**
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
635
   * mark_page_lazyfree - make an anon page lazyfree
10853a039   Minchan Kim   mm: move lazily f...
636
637
   * @page: page to deactivate
   *
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
638
639
   * mark_page_lazyfree() moves @page to the inactive file list.
   * This is done to accelerate the reclaim of @page.
10853a039   Minchan Kim   mm: move lazily f...
640
   */
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
641
  void mark_page_lazyfree(struct page *page)
10853a039   Minchan Kim   mm: move lazily f...
642
  {
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
643
  	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
24c92eb7d   Shaohua Li   mm: avoid marking...
644
  	    !PageSwapCache(page) && !PageUnevictable(page)) {
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
645
  		struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
10853a039   Minchan Kim   mm: move lazily f...
646

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
647
  		get_page(page);
8f182270d   Lukasz Odzioba   mm/swap.c: flush ...
648
  		if (!pagevec_add(pvec, page) || PageCompound(page))
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
649
650
  			pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
  		put_cpu_var(lru_lazyfree_pvecs);
10853a039   Minchan Kim   mm: move lazily f...
651
652
  	}
  }
80bfed904   Andrew Morton   [PATCH] consolida...
653
654
  void lru_add_drain(void)
  {
f0cb3c76a   Konstantin Khlebnikov   mm: drain percpu ...
655
  	lru_add_drain_cpu(get_cpu());
80bfed904   Andrew Morton   [PATCH] consolida...
656
  	put_cpu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
  }
6ea183d60   Michal Hocko   mm: handle lru_ad...
658
659
660
  #ifdef CONFIG_SMP
  
  static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
c4028958b   David Howells   WorkStruct: make ...
661
  static void lru_add_drain_per_cpu(struct work_struct *dummy)
053837fce   Nick Piggin   [PATCH] mm: migra...
662
663
664
  {
  	lru_add_drain();
  }
9852a7212   Michal Hocko   mm: drop hotplug ...
665
666
667
668
669
670
671
672
  /*
   * Doesn't need any cpu hotplug locking because we do rely on per-cpu
   * kworkers being shut down before our page_alloc_cpu_dead callback is
   * executed on the offlined cpu.
   * Calling this function with cpu hotplug locks held can actually lead
   * to obscure indirect dependencies via WQ context.
   */
  void lru_add_drain_all(void)
053837fce   Nick Piggin   [PATCH] mm: migra...
673
  {
5fbc46163   Chris Metcalf   mm: make lru_add_...
674
675
676
  	static DEFINE_MUTEX(lock);
  	static struct cpumask has_work;
  	int cpu;
ce612879d   Michal Hocko   mm: move pcp and ...
677
678
679
680
681
682
  	/*
  	 * Make sure nobody triggers this path before mm_percpu_wq is fully
  	 * initialized.
  	 */
  	if (WARN_ON(!mm_percpu_wq))
  		return;
5fbc46163   Chris Metcalf   mm: make lru_add_...
683
  	mutex_lock(&lock);
5fbc46163   Chris Metcalf   mm: make lru_add_...
684
685
686
687
688
689
690
  	cpumask_clear(&has_work);
  
  	for_each_online_cpu(cpu) {
  		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  
  		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
cc5993bd7   Minchan Kim   mm: rename deacti...
691
  		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
9c276cc65   Minchan Kim   mm: introduce MAD...
692
  		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
f7ad2a6cb   Shaohua Li   mm: move MADV_FRE...
693
  		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
5fbc46163   Chris Metcalf   mm: make lru_add_...
694
695
  		    need_activate_page_drain(cpu)) {
  			INIT_WORK(work, lru_add_drain_per_cpu);
ce612879d   Michal Hocko   mm: move pcp and ...
696
  			queue_work_on(cpu, mm_percpu_wq, work);
5fbc46163   Chris Metcalf   mm: make lru_add_...
697
698
699
700
701
702
  			cpumask_set_cpu(cpu, &has_work);
  		}
  	}
  
  	for_each_cpu(cpu, &has_work)
  		flush_work(&per_cpu(lru_add_drain_work, cpu));
5fbc46163   Chris Metcalf   mm: make lru_add_...
703
  	mutex_unlock(&lock);
053837fce   Nick Piggin   [PATCH] mm: migra...
704
  }
6ea183d60   Michal Hocko   mm: handle lru_ad...
705
706
707
708
709
710
  #else
  void lru_add_drain_all(void)
  {
  	lru_add_drain();
  }
  #endif
053837fce   Nick Piggin   [PATCH] mm: migra...
711

aabfb5729   Michal Hocko   mm: memcontrol: d...
712
  /**
ea1754a08   Kirill A. Shutemov   mm, fs: remove re...
713
   * release_pages - batched put_page()
aabfb5729   Michal Hocko   mm: memcontrol: d...
714
715
   * @pages: array of pages to release
   * @nr: number of pages
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
716
   *
aabfb5729   Michal Hocko   mm: memcontrol: d...
717
718
   * Decrement the reference count on all the pages in @pages.  If it
   * fell to zero, remove the page from the LRU and free it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
719
   */
c6f92f9fb   Mel Gorman   mm: remove cold p...
720
  void release_pages(struct page **pages, int nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
721
722
  {
  	int i;
cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
723
  	LIST_HEAD(pages_to_free);
599d0c954   Mel Gorman   mm, vmscan: move ...
724
  	struct pglist_data *locked_pgdat = NULL;
fa9add641   Hugh Dickins   mm/memcg: apply a...
725
  	struct lruvec *lruvec;
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
726
  	unsigned long uninitialized_var(flags);
aabfb5729   Michal Hocko   mm: memcontrol: d...
727
  	unsigned int uninitialized_var(lock_batch);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
728

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729
730
  	for (i = 0; i < nr; i++) {
  		struct page *page = pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
731

aabfb5729   Michal Hocko   mm: memcontrol: d...
732
733
734
  		/*
  		 * Make sure the IRQ-safe lock-holding time does not get
  		 * excessive with a continuous string of pages from the
599d0c954   Mel Gorman   mm, vmscan: move ...
735
  		 * same pgdat. The lock is held only if pgdat != NULL.
aabfb5729   Michal Hocko   mm: memcontrol: d...
736
  		 */
599d0c954   Mel Gorman   mm, vmscan: move ...
737
738
739
  		if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
  			spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
  			locked_pgdat = NULL;
aabfb5729   Michal Hocko   mm: memcontrol: d...
740
  		}
6fcb52a56   Aaron Lu   thp: reduce usage...
741
  		if (is_huge_zero_page(page))
aa88b68c3   Kirill A. Shutemov   thp: keep huge ze...
742
  			continue;
aa88b68c3   Kirill A. Shutemov   thp: keep huge ze...
743

c5d6c45e9   Ira Weiny   mm/swap: fix rele...
744
  		if (is_zone_device_page(page)) {
df6ad6983   Jérôme Glisse   mm/device-public-...
745
746
747
748
749
  			if (locked_pgdat) {
  				spin_unlock_irqrestore(&locked_pgdat->lru_lock,
  						       flags);
  				locked_pgdat = NULL;
  			}
c5d6c45e9   Ira Weiny   mm/swap: fix rele...
750
751
752
753
754
755
756
757
  			/*
  			 * ZONE_DEVICE pages that return 'false' from
  			 * put_devmap_managed_page() do not require special
  			 * processing, and instead, expect a call to
  			 * put_page_testzero().
  			 */
  			if (put_devmap_managed_page(page))
  				continue;
df6ad6983   Jérôme Glisse   mm/device-public-...
758
  		}
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
759
  		page = compound_head(page);
b5810039a   Nick Piggin   [PATCH] core remo...
760
  		if (!put_page_testzero(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
761
  			continue;
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
762
  		if (PageCompound(page)) {
599d0c954   Mel Gorman   mm, vmscan: move ...
763
764
765
  			if (locked_pgdat) {
  				spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
  				locked_pgdat = NULL;
ddc58f27f   Kirill A. Shutemov   mm: drop tail pag...
766
767
768
769
  			}
  			__put_compound_page(page);
  			continue;
  		}
46453a6e1   Nick Piggin   [PATCH] mm: never...
770
  		if (PageLRU(page)) {
599d0c954   Mel Gorman   mm, vmscan: move ...
771
  			struct pglist_data *pgdat = page_pgdat(page);
894bc3104   Lee Schermerhorn   Unevictable LRU I...
772

599d0c954   Mel Gorman   mm, vmscan: move ...
773
774
775
  			if (pgdat != locked_pgdat) {
  				if (locked_pgdat)
  					spin_unlock_irqrestore(&locked_pgdat->lru_lock,
902aaed0d   Hisashi Hifumi   mm: use pagevec t...
776
  									flags);
aabfb5729   Michal Hocko   mm: memcontrol: d...
777
  				lock_batch = 0;
599d0c954   Mel Gorman   mm, vmscan: move ...
778
779
  				locked_pgdat = pgdat;
  				spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
46453a6e1   Nick Piggin   [PATCH] mm: never...
780
  			}
fa9add641   Hugh Dickins   mm/memcg: apply a...
781

599d0c954   Mel Gorman   mm, vmscan: move ...
782
  			lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
309381fea   Sasha Levin   mm: dump page whe...
783
  			VM_BUG_ON_PAGE(!PageLRU(page), page);
674539115   Nick Piggin   [PATCH] mm: less ...
784
  			__ClearPageLRU(page);
fa9add641   Hugh Dickins   mm/memcg: apply a...
785
  			del_page_from_lru_list(page, lruvec, page_off_lru(page));
46453a6e1   Nick Piggin   [PATCH] mm: never...
786
  		}
c53954a09   Mel Gorman   mm: remove lru pa...
787
  		/* Clear Active bit in case of parallel mark_page_accessed */
e3741b506   Mel Gorman   mm: do not use at...
788
  		__ClearPageActive(page);
629060270   Nicholas Piggin   mm: add PageWaite...
789
  		__ClearPageWaiters(page);
c53954a09   Mel Gorman   mm: remove lru pa...
790

cc59850ef   Konstantin Khlebnikov   mm: add free_hot_...
791
  		list_add(&page->lru, &pages_to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792
  	}
599d0c954   Mel Gorman   mm, vmscan: move ...
793
794
  	if (locked_pgdat)
  		spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
795

747db954c   Johannes Weiner   mm: memcontrol: u...
796
  	mem_cgroup_uncharge_list(&pages_to_free);
2d4894b5d   Mel Gorman   mm: remove cold p...
797
  	free_unref_page_list(&pages_to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
798
  }
0be8557bc   Miklos Szeredi   fuse: use release...
799
  EXPORT_SYMBOL(release_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
800
801
802
803
804
805
806
807
808
809
810
811
812
  
  /*
   * The pages which we're about to release may be in the deferred lru-addition
   * queues.  That would prevent them from really being freed right now.  That's
   * OK from a correctness point of view but is inefficient - those pages may be
   * cache-warm and we want to give them back to the page allocator ASAP.
   *
   * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
   * and __pagevec_lru_add_active() call release_pages() directly to avoid
   * mutual recursion.
   */
  void __pagevec_release(struct pagevec *pvec)
  {
7f0b5fb95   Mel Gorman   mm, pagevec: rena...
813
  	if (!pvec->percpu_pvec_drained) {
d9ed0d08b   Mel Gorman   mm: only drain pe...
814
  		lru_add_drain();
7f0b5fb95   Mel Gorman   mm, pagevec: rena...
815
  		pvec->percpu_pvec_drained = true;
d9ed0d08b   Mel Gorman   mm: only drain pe...
816
  	}
c6f92f9fb   Mel Gorman   mm: remove cold p...
817
  	release_pages(pvec->pages, pagevec_count(pvec));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
819
  	pagevec_reinit(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
820
  EXPORT_SYMBOL(__pagevec_release);
12d271078   Hugh Dickins   memcg: fix split_...
821
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
71e3aac07   Andrea Arcangeli   thp: transparent ...
822
  /* used by __split_huge_page_refcount() */
fa9add641   Hugh Dickins   mm/memcg: apply a...
823
  void lru_add_page_tail(struct page *page, struct page *page_tail,
5bc7b8aca   Shaohua Li   mm: thp: add spli...
824
  		       struct lruvec *lruvec, struct list_head *list)
71e3aac07   Andrea Arcangeli   thp: transparent ...
825
  {
71e3aac07   Andrea Arcangeli   thp: transparent ...
826
  	const int file = 0;
71e3aac07   Andrea Arcangeli   thp: transparent ...
827

309381fea   Sasha Levin   mm: dump page whe...
828
829
830
  	VM_BUG_ON_PAGE(!PageHead(page), page);
  	VM_BUG_ON_PAGE(PageCompound(page_tail), page);
  	VM_BUG_ON_PAGE(PageLRU(page_tail), page);
35f3aa39f   Lance Roy   mm: Replace spin_...
831
  	lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock);
71e3aac07   Andrea Arcangeli   thp: transparent ...
832

5bc7b8aca   Shaohua Li   mm: thp: add spli...
833
834
  	if (!list)
  		SetPageLRU(page_tail);
71e3aac07   Andrea Arcangeli   thp: transparent ...
835

12d271078   Hugh Dickins   memcg: fix split_...
836
837
  	if (likely(PageLRU(page)))
  		list_add_tail(&page_tail->lru, &page->lru);
5bc7b8aca   Shaohua Li   mm: thp: add spli...
838
839
840
841
842
  	else if (list) {
  		/* page reclaim is reclaiming a huge page */
  		get_page(page_tail);
  		list_add_tail(&page_tail->lru, list);
  	} else {
12d271078   Hugh Dickins   memcg: fix split_...
843
844
845
846
  		/*
  		 * Head page has not yet been counted, as an hpage,
  		 * so we must account for each subpage individually.
  		 *
e7a1aaf28   Yu Zhao   mm: replace list_...
847
848
  		 * Put page_tail on the list at the correct position
  		 * so they all end up in order.
12d271078   Hugh Dickins   memcg: fix split_...
849
  		 */
e7a1aaf28   Yu Zhao   mm: replace list_...
850
851
  		add_page_to_lru_list_tail(page_tail, lruvec,
  					  page_lru(page_tail));
71e3aac07   Andrea Arcangeli   thp: transparent ...
852
  	}
7512102cf   Hugh Dickins   memcg: fix GPF wh...
853
854
  
  	if (!PageUnevictable(page))
e180cf806   Kirill A. Shutemov   thp, mm: avoid Pa...
855
  		update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
71e3aac07   Andrea Arcangeli   thp: transparent ...
856
  }
12d271078   Hugh Dickins   memcg: fix split_...
857
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
71e3aac07   Andrea Arcangeli   thp: transparent ...
858

fa9add641   Hugh Dickins   mm/memcg: apply a...
859
860
  static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
  				 void *arg)
3dd7ae8ec   Shaohua Li   mm: simplify code...
861
  {
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
862
863
  	enum lru_list lru;
  	int was_unevictable = TestClearPageUnevictable(page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
864

309381fea   Sasha Levin   mm: dump page whe...
865
  	VM_BUG_ON_PAGE(PageLRU(page), page);
3dd7ae8ec   Shaohua Li   mm: simplify code...
866
867
  
  	SetPageLRU(page);
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
868
869
  	/*
  	 * Page becomes evictable in two ways:
dae966dc8   Peng Fan   mm/swap.c: __page...
870
  	 * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()].
9c4e6b1a7   Shakeel Butt   mm, mlock, vmscan...
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
  	 * 2) Before acquiring LRU lock to put the page to correct LRU and then
  	 *   a) do PageLRU check with lock [check_move_unevictable_pages]
  	 *   b) do PageLRU check before lock [clear_page_mlock]
  	 *
  	 * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
  	 * following strict ordering:
  	 *
  	 * #0: __pagevec_lru_add_fn		#1: clear_page_mlock
  	 *
  	 * SetPageLRU()				TestClearPageMlocked()
  	 * smp_mb() // explicit ordering	// above provides strict
  	 *					// ordering
  	 * PageMlocked()			PageLRU()
  	 *
  	 *
  	 * if '#1' does not observe setting of PG_lru by '#0' and fails
  	 * isolation, the explicit barrier will make sure that page_evictable
  	 * check will put the page in correct LRU. Without smp_mb(), SetPageLRU
  	 * can be reordered after PageMlocked check and can make '#1' to fail
  	 * the isolation of the page whose Mlocked bit is cleared (#0 is also
  	 * looking at the same page) and the evictable page will be stranded
  	 * in an unevictable LRU.
  	 */
  	smp_mb();
  
  	if (page_evictable(page)) {
  		lru = page_lru(page);
  		update_page_reclaim_stat(lruvec, page_is_file_cache(page),
  					 PageActive(page));
  		if (was_unevictable)
  			count_vm_event(UNEVICTABLE_PGRESCUED);
  	} else {
  		lru = LRU_UNEVICTABLE;
  		ClearPageActive(page);
  		SetPageUnevictable(page);
  		if (!was_unevictable)
  			count_vm_event(UNEVICTABLE_PGCULLED);
  	}
fa9add641   Hugh Dickins   mm/memcg: apply a...
909
  	add_page_to_lru_list(page, lruvec, lru);
24b7e5819   Mel Gorman   mm: pagemap: avoi...
910
  	trace_mm_lru_insertion(page, lru);
3dd7ae8ec   Shaohua Li   mm: simplify code...
911
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
912
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
913
914
915
   * Add the passed pages to the LRU, then drop the caller's refcount
   * on them.  Reinitialises the caller's pagevec.
   */
a0b8cab3b   Mel Gorman   mm: remove lru pa...
916
  void __pagevec_lru_add(struct pagevec *pvec)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
917
  {
a0b8cab3b   Mel Gorman   mm: remove lru pa...
918
  	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
919
  }
5095ae837   Hugh Dickins   mm: fewer undersc...
920
  EXPORT_SYMBOL(__pagevec_lru_add);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
921

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
922
  /**
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
923
924
925
926
   * pagevec_lookup_entries - gang pagecache lookup
   * @pvec:	Where the resulting entries are placed
   * @mapping:	The address_space to search
   * @start:	The starting entry index
cb6f0f348   Mike Rapoport   mm/swap.c: make f...
927
   * @nr_entries:	The maximum number of pages
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
928
929
930
   * @indices:	The cache indices corresponding to the entries in @pvec
   *
   * pagevec_lookup_entries() will search for and return a group of up
f144c390f   Mike Rapoport   mm: docs: fix par...
931
   * to @nr_pages pages and shadow entries in the mapping.  All
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
932
933
934
935
936
937
938
939
940
941
942
943
   * entries are placed in @pvec.  pagevec_lookup_entries() takes a
   * reference against actual pages in @pvec.
   *
   * The search returns a group of mapping-contiguous entries with
   * ascending indexes.  There may be holes in the indices due to
   * not-present entries.
   *
   * pagevec_lookup_entries() returns the number of entries which were
   * found.
   */
  unsigned pagevec_lookup_entries(struct pagevec *pvec,
  				struct address_space *mapping,
e02a9f048   Randy Dunlap   mm/swap.c: make f...
944
  				pgoff_t start, unsigned nr_entries,
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
945
946
  				pgoff_t *indices)
  {
e02a9f048   Randy Dunlap   mm/swap.c: make f...
947
  	pvec->nr = find_get_entries(mapping, start, nr_entries,
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
  				    pvec->pages, indices);
  	return pagevec_count(pvec);
  }
  
  /**
   * pagevec_remove_exceptionals - pagevec exceptionals pruning
   * @pvec:	The pagevec to prune
   *
   * pagevec_lookup_entries() fills both pages and exceptional radix
   * tree entries into the pagevec.  This function prunes all
   * exceptionals from @pvec without leaving holes, so that it can be
   * passed on to page-only pagevec operations.
   */
  void pagevec_remove_exceptionals(struct pagevec *pvec)
  {
  	int i, j;
  
  	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
3159f943a   Matthew Wilcox   xarray: Replace e...
967
  		if (!xa_is_value(page))
0cd6144aa   Johannes Weiner   mm + fs: prepare ...
968
969
970
971
972
973
  			pvec->pages[j++] = page;
  	}
  	pvec->nr = j;
  }
  
  /**
b947cee4b   Jan Kara   mm: implement fin...
974
   * pagevec_lookup_range - gang pagecache lookup
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
976
977
   * @pvec:	Where the resulting pages are placed
   * @mapping:	The address_space to search
   * @start:	The starting page index
b947cee4b   Jan Kara   mm: implement fin...
978
   * @end:	The final page index
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
979
   *
e02a9f048   Randy Dunlap   mm/swap.c: make f...
980
   * pagevec_lookup_range() will search for & return a group of up to PAGEVEC_SIZE
b947cee4b   Jan Kara   mm: implement fin...
981
982
   * pages in the mapping starting from index @start and upto index @end
   * (inclusive).  The pages are placed in @pvec.  pagevec_lookup() takes a
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983
984
985
   * reference against the pages in @pvec.
   *
   * The search returns a group of mapping-contiguous pages with ascending
d72dc8a25   Jan Kara   mm: make pagevec_...
986
987
   * indexes.  There may be holes in the indices due to not-present pages. We
   * also update @start to index the next page for the traversal.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
988
   *
b947cee4b   Jan Kara   mm: implement fin...
989
   * pagevec_lookup_range() returns the number of pages which were found. If this
e02a9f048   Randy Dunlap   mm/swap.c: make f...
990
   * number is smaller than PAGEVEC_SIZE, the end of specified range has been
b947cee4b   Jan Kara   mm: implement fin...
991
   * reached.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
992
   */
b947cee4b   Jan Kara   mm: implement fin...
993
  unsigned pagevec_lookup_range(struct pagevec *pvec,
397162ffa   Jan Kara   mm: remove nr_pag...
994
  		struct address_space *mapping, pgoff_t *start, pgoff_t end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
  {
397162ffa   Jan Kara   mm: remove nr_pag...
996
  	pvec->nr = find_get_pages_range(mapping, start, end, PAGEVEC_SIZE,
b947cee4b   Jan Kara   mm: implement fin...
997
  					pvec->pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
999
  	return pagevec_count(pvec);
  }
b947cee4b   Jan Kara   mm: implement fin...
1000
  EXPORT_SYMBOL(pagevec_lookup_range);
78539fdfa   Christoph Hellwig   [XFS] Export page...
1001

72b045aec   Jan Kara   mm: implement fin...
1002
1003
  unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
  		struct address_space *mapping, pgoff_t *index, pgoff_t end,
10bbd2358   Matthew Wilcox   pagevec: Use xa_m...
1004
  		xa_mark_t tag)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1005
  {
72b045aec   Jan Kara   mm: implement fin...
1006
  	pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
67fd707f4   Jan Kara   mm: remove nr_pag...
1007
  					PAGEVEC_SIZE, pvec->pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1008
1009
  	return pagevec_count(pvec);
  }
72b045aec   Jan Kara   mm: implement fin...
1010
  EXPORT_SYMBOL(pagevec_lookup_range_tag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1011

93d3b7140   Jan Kara   mm: add variant o...
1012
1013
  unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
  		struct address_space *mapping, pgoff_t *index, pgoff_t end,
10bbd2358   Matthew Wilcox   pagevec: Use xa_m...
1014
  		xa_mark_t tag, unsigned max_pages)
93d3b7140   Jan Kara   mm: add variant o...
1015
1016
1017
1018
1019
1020
  {
  	pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
  		min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
  	return pagevec_count(pvec);
  }
  EXPORT_SYMBOL(pagevec_lookup_range_nr_tag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021
1022
1023
1024
1025
  /*
   * Perform any setup for the swap system
   */
  void __init swap_setup(void)
  {
ca79b0c21   Arun KS   mm: convert total...
1026
  	unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
1027

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1028
1029
1030
1031
1032
1033
1034
1035
1036
  	/* Use a smaller cluster for small-memory machines */
  	if (megs < 16)
  		page_cluster = 2;
  	else
  		page_cluster = 3;
  	/*
  	 * Right now other parts of the system means that we
  	 * _really_ don't want to cluster much more
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1037
  }