Blame view

mm/swap_state.c 24.9 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
  /*
   *  linux/mm/swap_state.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   *  Swap reorganised 29.12.95, Stephen Tweedie
   *
   *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
11
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
46017e954   Hugh Dickins   swapin_readahead:...
14
  #include <linux/swapops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
  #include <linux/init.h>
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
  #include <linux/backing-dev.h>
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
18
  #include <linux/blkdev.h>
c484d4104   Hugh Dickins   [PATCH] mm: free_...
19
  #include <linux/pagevec.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
20
  #include <linux/migrate.h>
4b3ef9daa   Huang, Ying   mm/swap: split sw...
21
  #include <linux/vmalloc.h>
67afa38e0   Tim Chen   mm/swap: add cach...
22
  #include <linux/swap_slots.h>
38d8b4e6b   Huang Ying   mm, THP, swap: de...
23
  #include <linux/huge_mm.h>
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
24
  #include <linux/shmem_fs.h>
243bce09c   Hugh Dickins   mm: fix swap cach...
25
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
7eaceacca   Jens Axboe   block: remove per...
29
   * vmscan's shrink_page_list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
31
  static const struct address_space_operations swap_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  	.writepage	= swap_writepage,
62c230bc1   Mel Gorman   mm: add support f...
33
  	.set_page_dirty	= swap_set_page_dirty,
1c93923cc   Andrew Morton   include/linux/mig...
34
  #ifdef CONFIG_MIGRATION
e965f9630   Christoph Lameter   [PATCH] Direct Mi...
35
  	.migratepage	= migrate_page,
1c93923cc   Andrew Morton   include/linux/mig...
36
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  };
783cb68ee   Changbin Du   mm/swap_state.c: ...
38
39
  struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
  static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
f5c754d63   Colin Ian King   mm/swap_state.c: ...
40
  static bool enable_vma_readahead __read_mostly = true;
ec560175c   Huang Ying   mm, swap: VMA bas...
41

ec560175c   Huang Ying   mm, swap: VMA bas...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  #define SWAP_RA_WIN_SHIFT	(PAGE_SHIFT / 2)
  #define SWAP_RA_HITS_MASK	((1UL << SWAP_RA_WIN_SHIFT) - 1)
  #define SWAP_RA_HITS_MAX	SWAP_RA_HITS_MASK
  #define SWAP_RA_WIN_MASK	(~PAGE_MASK & ~SWAP_RA_HITS_MASK)
  
  #define SWAP_RA_HITS(v)		((v) & SWAP_RA_HITS_MASK)
  #define SWAP_RA_WIN(v)		(((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT)
  #define SWAP_RA_ADDR(v)		((v) & PAGE_MASK)
  
  #define SWAP_RA_VAL(addr, win, hits)				\
  	(((addr) & PAGE_MASK) |					\
  	 (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) |	\
  	 ((hits) & SWAP_RA_HITS_MASK))
  
  /* Initial readahead hits is 4 to start up with a small window */
  #define GET_SWAP_RA_VAL(vma)					\
  	(atomic_long_read(&(vma)->swap_readahead_info) ? : 4)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59

b96a3db2f   Qian Cai   mm/swap_state: ma...
60
61
  #define INC_CACHE_INFO(x)	data_race(swap_cache_info.x++)
  #define ADD_CACHE_INFO(x, nr)	data_race(swap_cache_info.x += (nr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
64
65
66
67
  
  static struct {
  	unsigned long add_total;
  	unsigned long del_total;
  	unsigned long find_success;
  	unsigned long find_total;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
  } swap_cache_info;
33806f06d   Shaohua Li   swap: make each s...
69
70
  unsigned long total_swapcache_pages(void)
  {
4b3ef9daa   Huang, Ying   mm/swap: split sw...
71
  	unsigned int i, j, nr;
33806f06d   Shaohua Li   swap: make each s...
72
  	unsigned long ret = 0;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
73
  	struct address_space *spaces;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
74
  	struct swap_info_struct *si;
33806f06d   Shaohua Li   swap: make each s...
75

4b3ef9daa   Huang, Ying   mm/swap: split sw...
76
  	for (i = 0; i < MAX_SWAPFILES; i++) {
054f1d1fa   Huang Ying   mm/swap_state.c: ...
77
78
79
80
81
82
83
84
  		swp_entry_t entry = swp_entry(i, 1);
  
  		/* Avoid get_swap_device() to warn for bad swap entry */
  		if (!swp_swap_info(entry))
  			continue;
  		/* Prevent swapoff to free swapper_spaces */
  		si = get_swap_device(entry);
  		if (!si)
4b3ef9daa   Huang, Ying   mm/swap: split sw...
85
  			continue;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
86
87
  		nr = nr_swapper_spaces[i];
  		spaces = swapper_spaces[i];
4b3ef9daa   Huang, Ying   mm/swap: split sw...
88
89
  		for (j = 0; j < nr; j++)
  			ret += spaces[j].nrpages;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
90
  		put_swap_device(si);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
91
  	}
33806f06d   Shaohua Li   swap: make each s...
92
93
  	return ret;
  }
579f82901   Shaohua Li   swap: add a simpl...
94
  static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
96
  void show_swap_cache_info(void)
  {
33806f06d   Shaohua Li   swap: make each s...
97
98
  	printk("%lu pages in swap cache
  ", total_swapcache_pages());
2c97b7fc0   Johannes Weiner   mm: print swapcac...
99
100
  	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
  		swap_cache_info.add_total, swap_cache_info.del_total,
bb63be0a0   Hugh Dickins   tmpfs: move swap_...
102
  		swap_cache_info.find_success, swap_cache_info.find_total);
ec8acf20a   Shaohua Li   swap: add per-par...
103
104
105
  	printk("Free swap  = %ldkB
  ",
  		get_nr_swap_pages() << (PAGE_SHIFT - 10));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
107
108
  	printk("Total swap = %lukB
  ", total_swap_pages << (PAGE_SHIFT - 10));
  }
aae466b00   Joonsoo Kim   mm/swap: implemen...
109
110
111
112
113
114
115
116
117
118
119
120
121
  void *get_shadow_from_swap_cache(swp_entry_t entry)
  {
  	struct address_space *address_space = swap_address_space(entry);
  	pgoff_t idx = swp_offset(entry);
  	struct page *page;
  
  	page = find_get_entry(address_space, idx);
  	if (xa_is_value(page))
  		return page;
  	if (page)
  		put_page(page);
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
  /*
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
123
   * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
125
   * but sets SwapCache flag and private instead of mapping and index.
   */
3852f6768   Joonsoo Kim   mm/swapcache: sup...
126
127
  int add_to_swap_cache(struct page *page, swp_entry_t entry,
  			gfp_t gfp, void **shadowp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
  {
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
129
  	struct address_space *address_space = swap_address_space(entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
130
  	pgoff_t idx = swp_offset(entry);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
131
  	XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
132
  	unsigned long i, nr = thp_nr_pages(page);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
133
  	void *old;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134

309381fea   Sasha Levin   mm: dump page whe...
135
136
137
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
51726b122   Hugh Dickins   mm: replace some ...
138

38d8b4e6b   Huang Ying   mm, THP, swap: de...
139
  	page_ref_add(page, nr);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
140
  	SetPageSwapCache(page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
141

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
142
  	do {
3852f6768   Joonsoo Kim   mm/swapcache: sup...
143
  		unsigned long nr_shadows = 0;
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
144
145
146
147
148
149
  		xas_lock_irq(&xas);
  		xas_create_range(&xas);
  		if (xas_error(&xas))
  			goto unlock;
  		for (i = 0; i < nr; i++) {
  			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
150
151
152
153
154
155
  			old = xas_load(&xas);
  			if (xa_is_value(old)) {
  				nr_shadows++;
  				if (shadowp)
  					*shadowp = old;
  			}
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
156
  			set_page_private(page + i, entry.val + i);
4101196b1   Matthew Wilcox (Oracle)   mm: page cache: s...
157
  			xas_store(&xas, page);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
158
159
  			xas_next(&xas);
  		}
3852f6768   Joonsoo Kim   mm/swapcache: sup...
160
  		address_space->nrexceptional -= nr_shadows;
38d8b4e6b   Huang Ying   mm, THP, swap: de...
161
162
163
  		address_space->nrpages += nr;
  		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
  		ADD_CACHE_INFO(add_total, nr);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
164
165
166
  unlock:
  		xas_unlock_irq(&xas);
  	} while (xas_nomem(&xas, gfp));
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
167

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
168
169
  	if (!xas_error(&xas))
  		return 0;
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
170

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
171
172
173
  	ClearPageSwapCache(page);
  	page_ref_sub(page, nr);
  	return xas_error(&xas);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
177
178
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache.
   */
3852f6768   Joonsoo Kim   mm/swapcache: sup...
179
180
  void __delete_from_swap_cache(struct page *page,
  			swp_entry_t entry, void *shadow)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
  {
4e17ec250   Matthew Wilcox   mm: Convert delet...
182
  	struct address_space *address_space = swap_address_space(entry);
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
183
  	int i, nr = thp_nr_pages(page);
4e17ec250   Matthew Wilcox   mm: Convert delet...
184
185
  	pgoff_t idx = swp_offset(entry);
  	XA_STATE(xas, &address_space->i_pages, idx);
33806f06d   Shaohua Li   swap: make each s...
186

309381fea   Sasha Levin   mm: dump page whe...
187
188
189
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(PageWriteback(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190

38d8b4e6b   Huang Ying   mm, THP, swap: de...
191
  	for (i = 0; i < nr; i++) {
3852f6768   Joonsoo Kim   mm/swapcache: sup...
192
  		void *entry = xas_store(&xas, shadow);
4101196b1   Matthew Wilcox (Oracle)   mm: page cache: s...
193
  		VM_BUG_ON_PAGE(entry != page, entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
194
  		set_page_private(page + i, 0);
4e17ec250   Matthew Wilcox   mm: Convert delet...
195
  		xas_next(&xas);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
196
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
  	ClearPageSwapCache(page);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
198
199
  	if (shadow)
  		address_space->nrexceptional += nr;
38d8b4e6b   Huang Ying   mm, THP, swap: de...
200
201
202
  	address_space->nrpages -= nr;
  	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
  	ADD_CACHE_INFO(del_total, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
205
206
207
208
209
210
211
  }
  
  /**
   * add_to_swap - allocate swap space for a page
   * @page: page we want to move to swap
   *
   * Allocate swap space for the page and add the page to the
   * swap cache.  Caller needs to hold the page lock. 
   */
0f0746589   Minchan Kim   mm, THP, swap: mo...
212
  int add_to_swap(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
214
  {
  	swp_entry_t entry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
  	int err;
309381fea   Sasha Levin   mm: dump page whe...
216
217
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageUptodate(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218

38d8b4e6b   Huang Ying   mm, THP, swap: de...
219
  	entry = get_swap_page(page);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
220
  	if (!entry.val)
0f0746589   Minchan Kim   mm, THP, swap: mo...
221
  		return 0;
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
222
  	/*
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
223
  	 * XArray node allocations from PF_MEMALLOC contexts could
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
224
225
226
227
228
229
230
  	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
  	 * stops emergency reserves from being allocated.
  	 *
  	 * TODO: this could cause a theoretical memory reclaim
  	 * deadlock in the swap out path.
  	 */
  	/*
854e9ed09   Minchan Kim   mm: support madvi...
231
  	 * Add it to the swap cache.
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
232
233
  	 */
  	err = add_to_swap_cache(page, entry,
3852f6768   Joonsoo Kim   mm/swapcache: sup...
234
  			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
235
  	if (err)
bd53b714d   Nick Piggin   [PATCH] mm: use _...
236
  		/*
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
237
238
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
239
  		 */
0f0746589   Minchan Kim   mm, THP, swap: mo...
240
  		goto fail;
9625456cc   Shaohua Li   mm: fix data corr...
241
242
  	/*
  	 * Normally the page will be dirtied in unmap because its pte should be
0e9aa6755   Miaohe Lin   mm: fix some brok...
243
  	 * dirty. A special case is MADV_FREE page. The page's pte could have
9625456cc   Shaohua Li   mm: fix data corr...
244
245
246
247
248
249
250
251
  	 * dirty bit cleared but the page's SwapBacked bit is still set because
  	 * clearing the dirty bit and SwapBacked bit has no lock protected. For
  	 * such page, unmap will not set dirty bit for it, so page reclaim will
  	 * not write the page out. This can cause data corruption when the page
  	 * is swap in later. Always setting the dirty bit for the page solves
  	 * the problem.
  	 */
  	set_page_dirty(page);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
252
253
  
  	return 1;
38d8b4e6b   Huang Ying   mm, THP, swap: de...
254
  fail:
0f0746589   Minchan Kim   mm, THP, swap: mo...
255
  	put_swap_page(page, entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
256
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
258
259
260
261
262
263
264
265
266
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache and locked.
   * It will never put the page into the free list,
   * the caller has a reference on the page.
   */
  void delete_from_swap_cache(struct page *page)
  {
4e17ec250   Matthew Wilcox   mm: Convert delet...
267
268
  	swp_entry_t entry = { .val = page_private(page) };
  	struct address_space *address_space = swap_address_space(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
269

b93b01631   Matthew Wilcox   page cache: use x...
270
  	xa_lock_irq(&address_space->i_pages);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
271
  	__delete_from_swap_cache(page, entry, NULL);
b93b01631   Matthew Wilcox   page cache: use x...
272
  	xa_unlock_irq(&address_space->i_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273

75f6d6d29   Minchan Kim   mm, THP, swap: un...
274
  	put_swap_page(page, entry);
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
275
  	page_ref_sub(page, thp_nr_pages(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276
  }
3852f6768   Joonsoo Kim   mm/swapcache: sup...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
  void clear_shadow_from_swap_cache(int type, unsigned long begin,
  				unsigned long end)
  {
  	unsigned long curr = begin;
  	void *old;
  
  	for (;;) {
  		unsigned long nr_shadows = 0;
  		swp_entry_t entry = swp_entry(type, curr);
  		struct address_space *address_space = swap_address_space(entry);
  		XA_STATE(xas, &address_space->i_pages, curr);
  
  		xa_lock_irq(&address_space->i_pages);
  		xas_for_each(&xas, old, end) {
  			if (!xa_is_value(old))
  				continue;
  			xas_store(&xas, NULL);
  			nr_shadows++;
  		}
  		address_space->nrexceptional -= nr_shadows;
  		xa_unlock_irq(&address_space->i_pages);
  
  		/* search the next swapcache until we meet end */
  		curr >>= SWAP_ADDRESS_SPACE_SHIFT;
  		curr++;
  		curr <<= SWAP_ADDRESS_SPACE_SHIFT;
  		if (curr > end)
  			break;
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
309
310
  /* 
   * If we are the only user, then try to free up the swap cache. 
   * 
   * Its ok to check for PageSwapCache without the page lock
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
311
312
   * here because we are going to recheck again inside
   * try_to_free_swap() _with_ the lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
313
314
315
316
   * 					- Marcelo
   */
  static inline void free_swap_cache(struct page *page)
  {
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
317
318
  	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
  		try_to_free_swap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319
320
321
322
323
324
  		unlock_page(page);
  	}
  }
  
  /* 
   * Perform a free_page(), also freeing any swap cache associated with
b8072f099   Hugh Dickins   [PATCH] mm: updat...
325
   * this page if it is the last user of the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
328
329
   */
  void free_page_and_swap_cache(struct page *page)
  {
  	free_swap_cache(page);
6fcb52a56   Aaron Lu   thp: reduce usage...
330
  	if (!is_huge_zero_page(page))
770a53702   Gerald Schaefer   mm: thp: broken p...
331
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
333
334
335
336
337
338
339
  }
  
  /*
   * Passed an array of pages, drop them all from swapcache and then release
   * them.  They are removed from the LRU and freed if this is their last use.
   */
  void free_pages_and_swap_cache(struct page **pages, int nr)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
  	struct page **pagep = pages;
aabfb5729   Michal Hocko   mm: memcontrol: d...
341
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
343
  
  	lru_add_drain();
aabfb5729   Michal Hocko   mm: memcontrol: d...
344
345
  	for (i = 0; i < nr; i++)
  		free_swap_cache(pagep[i]);
c6f92f9fb   Mel Gorman   mm: remove cold p...
346
  	release_pages(pagep, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
347
  }
e9e9b7ece   Minchan Kim   mm: swap: unify c...
348
349
350
351
  static inline bool swap_use_vma_readahead(void)
  {
  	return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
354
355
356
357
  /*
   * Lookup a swap entry in the swap cache. A found page will be returned
   * unlocked and with its refcount incremented - we rely on the kernel
   * lock getting page table operations atomic even if we drop the page
   * lock before returning.
   */
ec560175c   Huang Ying   mm, swap: VMA bas...
358
359
  struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
  			       unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360
361
  {
  	struct page *page;
eb085574a   Huang Ying   mm, swap: fix rac...
362
  	struct swap_info_struct *si;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363

eb085574a   Huang Ying   mm, swap: fix rac...
364
365
366
  	si = get_swap_device(entry);
  	if (!si)
  		return NULL;
f6ab1f7f6   Huang Ying   mm, swap: use off...
367
  	page = find_get_page(swap_address_space(entry), swp_offset(entry));
eb085574a   Huang Ying   mm, swap: fix rac...
368
  	put_swap_device(si);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369

ec560175c   Huang Ying   mm, swap: VMA bas...
370
371
  	INC_CACHE_INFO(find_total);
  	if (page) {
eaf649ebc   Minchan Kim   mm: swap: clean u...
372
373
  		bool vma_ra = swap_use_vma_readahead();
  		bool readahead;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
  		INC_CACHE_INFO(find_success);
eaf649ebc   Minchan Kim   mm: swap: clean u...
375
376
377
378
  		/*
  		 * At the moment, we don't support PG_readahead for anon THP
  		 * so let's bail out rather than confusing the readahead stat.
  		 */
ec560175c   Huang Ying   mm, swap: VMA bas...
379
380
  		if (unlikely(PageTransCompound(page)))
  			return page;
eaf649ebc   Minchan Kim   mm: swap: clean u...
381

ec560175c   Huang Ying   mm, swap: VMA bas...
382
  		readahead = TestClearPageReadahead(page);
eaf649ebc   Minchan Kim   mm: swap: clean u...
383
384
385
386
387
388
389
  		if (vma && vma_ra) {
  			unsigned long ra_val;
  			int win, hits;
  
  			ra_val = GET_SWAP_RA_VAL(vma);
  			win = SWAP_RA_WIN(ra_val);
  			hits = SWAP_RA_HITS(ra_val);
ec560175c   Huang Ying   mm, swap: VMA bas...
390
391
392
393
394
  			if (readahead)
  				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
  			atomic_long_set(&vma->swap_readahead_info,
  					SWAP_RA_VAL(addr, win, hits));
  		}
eaf649ebc   Minchan Kim   mm: swap: clean u...
395

ec560175c   Huang Ying   mm, swap: VMA bas...
396
  		if (readahead) {
cbc65df24   Huang Ying   mm, swap: add swa...
397
  			count_vm_event(SWAP_RA_HIT);
eaf649ebc   Minchan Kim   mm: swap: clean u...
398
  			if (!vma || !vma_ra)
ec560175c   Huang Ying   mm, swap: VMA bas...
399
  				atomic_inc(&swapin_readahead_hits);
cbc65df24   Huang Ying   mm, swap: add swa...
400
  		}
579f82901   Shaohua Li   swap: add a simpl...
401
  	}
eaf649ebc   Minchan Kim   mm: swap: clean u...
402

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
  	return page;
  }
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
  /**
   * find_get_incore_page - Find and get a page from the page or swap caches.
   * @mapping: The address_space to search.
   * @index: The page cache index.
   *
   * This differs from find_get_page() in that it will also look for the
   * page in the swap cache.
   *
   * Return: The found page or %NULL.
   */
  struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
  {
  	swp_entry_t swp;
  	struct swap_info_struct *si;
  	struct page *page = find_get_entry(mapping, index);
a6de4b487   Matthew Wilcox (Oracle)   mm: convert find_...
420
  	if (!page)
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
421
  		return page;
a6de4b487   Matthew Wilcox (Oracle)   mm: convert find_...
422
423
  	if (!xa_is_value(page))
  		return find_subpage(page, index);
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
424
425
426
427
428
429
430
431
432
433
434
435
  	if (!shmem_mapping(mapping))
  		return NULL;
  
  	swp = radix_to_swp_entry(page);
  	/* Prevent swapoff from happening to us */
  	si = get_swap_device(swp);
  	if (!si)
  		return NULL;
  	page = find_get_page(swap_address_space(swp), swp_offset(swp));
  	put_swap_device(si);
  	return page;
  }
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
436
437
438
  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  			struct vm_area_struct *vma, unsigned long addr,
  			bool *new_page_allocated)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439
  {
eb085574a   Huang Ying   mm, swap: fix rac...
440
  	struct swap_info_struct *si;
4c6355b25   Johannes Weiner   mm: memcontrol: c...
441
  	struct page *page;
aae466b00   Joonsoo Kim   mm/swap: implemen...
442
  	void *shadow = NULL;
4c6355b25   Johannes Weiner   mm: memcontrol: c...
443

5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
444
  	*new_page_allocated = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445

4c6355b25   Johannes Weiner   mm: memcontrol: c...
446
447
  	for (;;) {
  		int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
449
450
451
452
  		/*
  		 * First check the swap cache.  Since this is normally
  		 * called after lookup_swap_cache() failed, re-calling
  		 * that would confuse statistics.
  		 */
eb085574a   Huang Ying   mm, swap: fix rac...
453
454
  		si = get_swap_device(entry);
  		if (!si)
4c6355b25   Johannes Weiner   mm: memcontrol: c...
455
456
457
  			return NULL;
  		page = find_get_page(swap_address_space(entry),
  				     swp_offset(entry));
eb085574a   Huang Ying   mm, swap: fix rac...
458
  		put_swap_device(si);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
459
460
  		if (page)
  			return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461

ba81f8384   Huang Ying   mm/swap: skip rea...
462
463
464
465
466
467
468
469
470
  		/*
  		 * Just skip read ahead for unused swap slot.
  		 * During swap_off when swap_slot_cache is disabled,
  		 * we have to handle the race between putting
  		 * swap entry in swap cache and marking swap slot
  		 * as SWAP_HAS_CACHE.  That's done in later part of code or
  		 * else swap_off will be aborted if we return NULL.
  		 */
  		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
4c6355b25   Johannes Weiner   mm: memcontrol: c...
471
  			return NULL;
e8c26ab60   Tim Chen   mm/swap: skip rea...
472

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
  		/*
4c6355b25   Johannes Weiner   mm: memcontrol: c...
474
475
476
  		 * Get a new page to read into from swap.  Allocate it now,
  		 * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will
  		 * cause any racers to loop around until we add it to cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
  		 */
4c6355b25   Johannes Weiner   mm: memcontrol: c...
478
479
480
  		page = alloc_page_vma(gfp_mask, vma, addr);
  		if (!page)
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481
482
  
  		/*
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
483
484
  		 * Swap entry may have been freed since our caller observed it.
  		 */
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
485
  		err = swapcache_prepare(entry);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
486
  		if (!err)
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
487
  			break;
4c6355b25   Johannes Weiner   mm: memcontrol: c...
488
489
490
  		put_page(page);
  		if (err != -EEXIST)
  			return NULL;
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
491
  		/*
4c6355b25   Johannes Weiner   mm: memcontrol: c...
492
493
494
495
496
  		 * We might race against __delete_from_swap_cache(), and
  		 * stumble across a swap_map entry whose SWAP_HAS_CACHE
  		 * has not yet been cleared.  Or race against another
  		 * __read_swap_cache_async(), which has set SWAP_HAS_CACHE
  		 * in swap_map, but not yet added its page to swap cache.
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
497
  		 */
4c6355b25   Johannes Weiner   mm: memcontrol: c...
498
499
500
501
502
503
504
505
506
507
508
  		cond_resched();
  	}
  
  	/*
  	 * The swap entry is ours to swap in. Prepare the new page.
  	 */
  
  	__SetPageLocked(page);
  	__SetPageSwapBacked(page);
  
  	/* May fail (-ENOMEM) if XArray node allocation failed. */
aae466b00   Joonsoo Kim   mm/swap: implemen...
509
  	if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) {
4c6355b25   Johannes Weiner   mm: memcontrol: c...
510
511
512
  		put_swap_page(page, entry);
  		goto fail_unlock;
  	}
d9eb1ea2b   Johannes Weiner   mm: memcontrol: d...
513
  	if (mem_cgroup_charge(page, NULL, gfp_mask)) {
4c6355b25   Johannes Weiner   mm: memcontrol: c...
514
515
516
  		delete_from_swap_cache(page);
  		goto fail_unlock;
  	}
aae466b00   Joonsoo Kim   mm/swap: implemen...
517
518
  	if (shadow)
  		workingset_refault(page, shadow);
314b57fb0   Johannes Weiner   mm: balance LRU l...
519

4c6355b25   Johannes Weiner   mm: memcontrol: c...
520
521
  	/* Caller will initiate read into locked page */
  	SetPageWorkingset(page);
6058eaec8   Johannes Weiner   mm: fold and remo...
522
  	lru_cache_add(page);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
523
524
  	*new_page_allocated = true;
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525

4c6355b25   Johannes Weiner   mm: memcontrol: c...
526
527
528
529
  fail_unlock:
  	unlock_page(page);
  	put_page(page);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
530
  }
46017e954   Hugh Dickins   swapin_readahead:...
531

5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
532
533
534
535
536
537
538
  /*
   * Locate a page of swap in physical memory, reserving swap cache space
   * and reading the disk if it is not already cached.
   * A failure return means that either the page allocation failed or that
   * the swap entry is no longer in use.
   */
  struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
23955622f   Shaohua Li   swap: add block i...
539
  		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
540
541
542
543
544
545
  {
  	bool page_was_allocated;
  	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
  			vma, addr, &page_was_allocated);
  
  	if (page_was_allocated)
23955622f   Shaohua Li   swap: add block i...
546
  		swap_readpage(retpage, do_poll);
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
547
548
549
  
  	return retpage;
  }
ec560175c   Huang Ying   mm, swap: VMA bas...
550
551
552
553
554
  static unsigned int __swapin_nr_pages(unsigned long prev_offset,
  				      unsigned long offset,
  				      int hits,
  				      int max_pages,
  				      int prev_win)
579f82901   Shaohua Li   swap: add a simpl...
555
  {
ec560175c   Huang Ying   mm, swap: VMA bas...
556
  	unsigned int pages, last_ra;
579f82901   Shaohua Li   swap: add a simpl...
557
558
559
560
561
562
  
  	/*
  	 * This heuristic has been found to work well on both sequential and
  	 * random loads, swapping to hard disk or to SSD: please don't ask
  	 * what the "+ 2" means, it just happens to work well, that's all.
  	 */
ec560175c   Huang Ying   mm, swap: VMA bas...
563
  	pages = hits + 2;
579f82901   Shaohua Li   swap: add a simpl...
564
565
566
567
568
569
570
571
  	if (pages == 2) {
  		/*
  		 * We can have no readahead hits to judge by: but must not get
  		 * stuck here forever, so check for an adjacent offset instead
  		 * (and don't even bother to check whether swap type is same).
  		 */
  		if (offset != prev_offset + 1 && offset != prev_offset - 1)
  			pages = 1;
579f82901   Shaohua Li   swap: add a simpl...
572
573
574
575
576
577
578
579
580
581
582
  	} else {
  		unsigned int roundup = 4;
  		while (roundup < pages)
  			roundup <<= 1;
  		pages = roundup;
  	}
  
  	if (pages > max_pages)
  		pages = max_pages;
  
  	/* Don't shrink readahead too fast */
ec560175c   Huang Ying   mm, swap: VMA bas...
583
  	last_ra = prev_win / 2;
579f82901   Shaohua Li   swap: add a simpl...
584
585
  	if (pages < last_ra)
  		pages = last_ra;
ec560175c   Huang Ying   mm, swap: VMA bas...
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
  
  	return pages;
  }
  
  static unsigned long swapin_nr_pages(unsigned long offset)
  {
  	static unsigned long prev_offset;
  	unsigned int hits, pages, max_pages;
  	static atomic_t last_readahead_pages;
  
  	max_pages = 1 << READ_ONCE(page_cluster);
  	if (max_pages <= 1)
  		return 1;
  
  	hits = atomic_xchg(&swapin_readahead_hits, 0);
d6c1f098f   Qian Cai   mm/swap_state: fi...
601
602
  	pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits,
  				  max_pages,
ec560175c   Huang Ying   mm, swap: VMA bas...
603
604
  				  atomic_read(&last_readahead_pages));
  	if (!hits)
d6c1f098f   Qian Cai   mm/swap_state: fi...
605
  		WRITE_ONCE(prev_offset, offset);
579f82901   Shaohua Li   swap: add a simpl...
606
607
608
609
  	atomic_set(&last_readahead_pages, pages);
  
  	return pages;
  }
46017e954   Hugh Dickins   swapin_readahead:...
610
  /**
e9e9b7ece   Minchan Kim   mm: swap: unify c...
611
   * swap_cluster_readahead - swap in pages in hope we need them soon
46017e954   Hugh Dickins   swapin_readahead:...
612
   * @entry: swap entry of this memory
7682486b3   Randy Dunlap   mm: fix various k...
613
   * @gfp_mask: memory allocation flags
e9e9b7ece   Minchan Kim   mm: swap: unify c...
614
   * @vmf: fault information
46017e954   Hugh Dickins   swapin_readahead:...
615
616
617
618
619
620
621
622
623
624
625
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read an aligned block of
   * (1 << page_cluster) entries in the swap area. This method is chosen
   * because it doesn't cost us any seek time.  We also make sure to queue
   * the 'original' request together with the readahead ones...
   *
   * This has been extended to use the NUMA policies from the mm triggering
   * the readahead.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
626
   * Caller must hold read mmap_lock if vmf->vma is not NULL.
46017e954   Hugh Dickins   swapin_readahead:...
627
   */
e9e9b7ece   Minchan Kim   mm: swap: unify c...
628
629
  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
  				struct vm_fault *vmf)
46017e954   Hugh Dickins   swapin_readahead:...
630
  {
46017e954   Hugh Dickins   swapin_readahead:...
631
  	struct page *page;
579f82901   Shaohua Li   swap: add a simpl...
632
633
  	unsigned long entry_offset = swp_offset(entry);
  	unsigned long offset = entry_offset;
67f96aa25   Rik van Riel   mm: make swapin r...
634
  	unsigned long start_offset, end_offset;
579f82901   Shaohua Li   swap: add a simpl...
635
  	unsigned long mask;
e9a6effa5   Huang Ying   mm, swap: fix fal...
636
  	struct swap_info_struct *si = swp_swap_info(entry);
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
637
  	struct blk_plug plug;
c4fa63092   Huang Ying   mm, swap: fix swa...
638
  	bool do_poll = true, page_allocated;
e9e9b7ece   Minchan Kim   mm: swap: unify c...
639
640
  	struct vm_area_struct *vma = vmf->vma;
  	unsigned long addr = vmf->address;
46017e954   Hugh Dickins   swapin_readahead:...
641

579f82901   Shaohua Li   swap: add a simpl...
642
643
644
  	mask = swapin_nr_pages(offset) - 1;
  	if (!mask)
  		goto skip;
8fd2e0b50   Yang Shi   mm: swap: check i...
645
  	/* Test swap type to make sure the dereference is safe */
326463154   Gao Xiang   swap: rename SWP_...
646
  	if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
8fd2e0b50   Yang Shi   mm: swap: check i...
647
648
649
650
  		struct inode *inode = si->swap_file->f_mapping->host;
  		if (inode_read_congested(inode))
  			goto skip;
  	}
23955622f   Shaohua Li   swap: add block i...
651
  	do_poll = false;
67f96aa25   Rik van Riel   mm: make swapin r...
652
653
654
655
656
  	/* Read a page_cluster sized and aligned cluster around offset. */
  	start_offset = offset & ~mask;
  	end_offset = offset | mask;
  	if (!start_offset)	/* First page is swap header. */
  		start_offset++;
e9a6effa5   Huang Ying   mm, swap: fix fal...
657
658
  	if (end_offset >= si->max)
  		end_offset = si->max - 1;
67f96aa25   Rik van Riel   mm: make swapin r...
659

3fb5c298b   Christian Ehrhardt   swap: allow swap ...
660
  	blk_start_plug(&plug);
67f96aa25   Rik van Riel   mm: make swapin r...
661
  	for (offset = start_offset; offset <= end_offset ; offset++) {
46017e954   Hugh Dickins   swapin_readahead:...
662
  		/* Ok, do the async read-ahead now */
c4fa63092   Huang Ying   mm, swap: fix swa...
663
664
665
  		page = __read_swap_cache_async(
  			swp_entry(swp_type(entry), offset),
  			gfp_mask, vma, addr, &page_allocated);
46017e954   Hugh Dickins   swapin_readahead:...
666
  		if (!page)
67f96aa25   Rik van Riel   mm: make swapin r...
667
  			continue;
c4fa63092   Huang Ying   mm, swap: fix swa...
668
669
  		if (page_allocated) {
  			swap_readpage(page, false);
eaf649ebc   Minchan Kim   mm: swap: clean u...
670
  			if (offset != entry_offset) {
c4fa63092   Huang Ying   mm, swap: fix swa...
671
672
673
  				SetPageReadahead(page);
  				count_vm_event(SWAP_RA);
  			}
cbc65df24   Huang Ying   mm, swap: add swa...
674
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
675
  		put_page(page);
46017e954   Hugh Dickins   swapin_readahead:...
676
  	}
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
677
  	blk_finish_plug(&plug);
46017e954   Hugh Dickins   swapin_readahead:...
678
  	lru_add_drain();	/* Push any new pages onto the LRU now */
579f82901   Shaohua Li   swap: add a simpl...
679
  skip:
23955622f   Shaohua Li   swap: add block i...
680
  	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
46017e954   Hugh Dickins   swapin_readahead:...
681
  }
4b3ef9daa   Huang, Ying   mm/swap: split sw...
682
683
684
685
686
687
688
  
  int init_swap_address_space(unsigned int type, unsigned long nr_pages)
  {
  	struct address_space *spaces, *space;
  	unsigned int i, nr;
  
  	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
778e1cdd8   Kees Cook   treewide: kvzallo...
689
  	spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
690
691
692
693
  	if (!spaces)
  		return -ENOMEM;
  	for (i = 0; i < nr; i++) {
  		space = spaces + i;
a28334862   Matthew Wilcox   page cache: Finis...
694
  		xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
695
696
697
698
  		atomic_set(&space->i_mmap_writable, 0);
  		space->a_ops = &swap_aops;
  		/* swap cache doesn't use writeback related tags */
  		mapping_set_no_writeback_tags(space);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
699
700
  	}
  	nr_swapper_spaces[type] = nr;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
701
  	swapper_spaces[type] = spaces;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
702
703
704
705
706
707
  
  	return 0;
  }
  
  void exit_swap_address_space(unsigned int type)
  {
054f1d1fa   Huang Ying   mm/swap_state.c: ...
708
  	kvfree(swapper_spaces[type]);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
709
  	nr_swapper_spaces[type] = 0;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
710
  	swapper_spaces[type] = NULL;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
711
  }
ec560175c   Huang Ying   mm, swap: VMA bas...
712
713
714
715
716
717
718
719
720
721
722
723
724
  
  static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
  				     unsigned long faddr,
  				     unsigned long lpfn,
  				     unsigned long rpfn,
  				     unsigned long *start,
  				     unsigned long *end)
  {
  	*start = max3(lpfn, PFN_DOWN(vma->vm_start),
  		      PFN_DOWN(faddr & PMD_MASK));
  	*end = min3(rpfn, PFN_DOWN(vma->vm_end),
  		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
  }
eaf649ebc   Minchan Kim   mm: swap: clean u...
725
726
  static void swap_ra_info(struct vm_fault *vmf,
  			struct vma_swap_readahead *ra_info)
ec560175c   Huang Ying   mm, swap: VMA bas...
727
728
  {
  	struct vm_area_struct *vma = vmf->vma;
eaf649ebc   Minchan Kim   mm: swap: clean u...
729
  	unsigned long ra_val;
ec560175c   Huang Ying   mm, swap: VMA bas...
730
731
732
  	swp_entry_t entry;
  	unsigned long faddr, pfn, fpfn;
  	unsigned long start, end;
eaf649ebc   Minchan Kim   mm: swap: clean u...
733
  	pte_t *pte, *orig_pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
734
735
736
737
  	unsigned int max_win, hits, prev_win, win, left;
  #ifndef CONFIG_64BIT
  	pte_t *tpte;
  #endif
61b639723   Huang Ying   mm, swap: use pag...
738
739
740
  	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
  			     SWAP_RA_ORDER_CEILING);
  	if (max_win == 1) {
eaf649ebc   Minchan Kim   mm: swap: clean u...
741
742
  		ra_info->win = 1;
  		return;
61b639723   Huang Ying   mm, swap: use pag...
743
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
744
  	faddr = vmf->address;
eaf649ebc   Minchan Kim   mm: swap: clean u...
745
746
747
748
749
750
  	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
  	entry = pte_to_swp_entry(*pte);
  	if ((unlikely(non_swap_entry(entry)))) {
  		pte_unmap(orig_pte);
  		return;
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
751

ec560175c   Huang Ying   mm, swap: VMA bas...
752
  	fpfn = PFN_DOWN(faddr);
eaf649ebc   Minchan Kim   mm: swap: clean u...
753
754
755
756
757
  	ra_val = GET_SWAP_RA_VAL(vma);
  	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
  	prev_win = SWAP_RA_WIN(ra_val);
  	hits = SWAP_RA_HITS(ra_val);
  	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
ec560175c   Huang Ying   mm, swap: VMA bas...
758
759
760
  					       max_win, prev_win);
  	atomic_long_set(&vma->swap_readahead_info,
  			SWAP_RA_VAL(faddr, win, 0));
eaf649ebc   Minchan Kim   mm: swap: clean u...
761
762
763
764
  	if (win == 1) {
  		pte_unmap(orig_pte);
  		return;
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
765
766
767
768
769
770
771
772
773
774
775
776
  
  	/* Copy the PTEs because the page table may be unmapped */
  	if (fpfn == pfn + 1)
  		swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end);
  	else if (pfn == fpfn + 1)
  		swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1,
  				  &start, &end);
  	else {
  		left = (win - 1) / 2;
  		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
  				  &start, &end);
  	}
eaf649ebc   Minchan Kim   mm: swap: clean u...
777
778
779
  	ra_info->nr_pte = end - start;
  	ra_info->offset = fpfn - start;
  	pte -= ra_info->offset;
ec560175c   Huang Ying   mm, swap: VMA bas...
780
  #ifdef CONFIG_64BIT
eaf649ebc   Minchan Kim   mm: swap: clean u...
781
  	ra_info->ptes = pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
782
  #else
eaf649ebc   Minchan Kim   mm: swap: clean u...
783
  	tpte = ra_info->ptes;
ec560175c   Huang Ying   mm, swap: VMA bas...
784
785
786
  	for (pfn = start; pfn != end; pfn++)
  		*tpte++ = *pte++;
  #endif
eaf649ebc   Minchan Kim   mm: swap: clean u...
787
  	pte_unmap(orig_pte);
ec560175c   Huang Ying   mm, swap: VMA bas...
788
  }
e9f598730   Yang Shi   mm: swap: add com...
789
790
  /**
   * swap_vma_readahead - swap in pages in hope we need them soon
27ec4878d   Krzysztof Kozlowski   mm: swap: fix ker...
791
   * @fentry: swap entry of this memory
e9f598730   Yang Shi   mm: swap: add com...
792
793
794
795
796
797
798
799
   * @gfp_mask: memory allocation flags
   * @vmf: fault information
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read in a few pages whoes
   * virtual addresses are around the fault address in the same vma.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
800
   * Caller must hold read mmap_lock if vmf->vma is not NULL.
e9f598730   Yang Shi   mm: swap: add com...
801
802
   *
   */
f5c754d63   Colin Ian King   mm/swap_state.c: ...
803
804
  static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
  				       struct vm_fault *vmf)
ec560175c   Huang Ying   mm, swap: VMA bas...
805
806
807
808
809
810
811
812
  {
  	struct blk_plug plug;
  	struct vm_area_struct *vma = vmf->vma;
  	struct page *page;
  	pte_t *pte, pentry;
  	swp_entry_t entry;
  	unsigned int i;
  	bool page_allocated;
eaf649ebc   Minchan Kim   mm: swap: clean u...
813
  	struct vma_swap_readahead ra_info = {0,};
ec560175c   Huang Ying   mm, swap: VMA bas...
814

eaf649ebc   Minchan Kim   mm: swap: clean u...
815
816
  	swap_ra_info(vmf, &ra_info);
  	if (ra_info.win == 1)
ec560175c   Huang Ying   mm, swap: VMA bas...
817
818
819
  		goto skip;
  
  	blk_start_plug(&plug);
eaf649ebc   Minchan Kim   mm: swap: clean u...
820
  	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
  	     i++, pte++) {
  		pentry = *pte;
  		if (pte_none(pentry))
  			continue;
  		if (pte_present(pentry))
  			continue;
  		entry = pte_to_swp_entry(pentry);
  		if (unlikely(non_swap_entry(entry)))
  			continue;
  		page = __read_swap_cache_async(entry, gfp_mask, vma,
  					       vmf->address, &page_allocated);
  		if (!page)
  			continue;
  		if (page_allocated) {
  			swap_readpage(page, false);
eaf649ebc   Minchan Kim   mm: swap: clean u...
836
  			if (i != ra_info.offset) {
ec560175c   Huang Ying   mm, swap: VMA bas...
837
838
839
840
841
842
843
844
845
846
  				SetPageReadahead(page);
  				count_vm_event(SWAP_RA);
  			}
  		}
  		put_page(page);
  	}
  	blk_finish_plug(&plug);
  	lru_add_drain();
  skip:
  	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
eaf649ebc   Minchan Kim   mm: swap: clean u...
847
  				     ra_info.win == 1);
ec560175c   Huang Ying   mm, swap: VMA bas...
848
  }
d9bfcfdc4   Huang Ying   mm, swap: add sys...
849

e9e9b7ece   Minchan Kim   mm: swap: unify c...
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  /**
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
   * @gfp_mask: memory allocation flags
   * @vmf: fault information
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * It's a main entry function for swap readahead. By the configuration,
   * it will read ahead blocks by cluster-based(ie, physical disk based)
   * or vma-based(ie, virtual address based on faulty address) readahead.
   */
  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
  				struct vm_fault *vmf)
  {
  	return swap_use_vma_readahead() ?
  			swap_vma_readahead(entry, gfp_mask, vmf) :
  			swap_cluster_readahead(entry, gfp_mask, vmf);
  }
d9bfcfdc4   Huang Ying   mm, swap: add sys...
869
870
871
872
  #ifdef CONFIG_SYSFS
  static ssize_t vma_ra_enabled_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
e9e9b7ece   Minchan Kim   mm: swap: unify c...
873
874
  	return sprintf(buf, "%s
  ", enable_vma_readahead ? "true" : "false");
d9bfcfdc4   Huang Ying   mm, swap: add sys...
875
876
877
878
879
880
  }
  static ssize_t vma_ra_enabled_store(struct kobject *kobj,
  				      struct kobj_attribute *attr,
  				      const char *buf, size_t count)
  {
  	if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
e9e9b7ece   Minchan Kim   mm: swap: unify c...
881
  		enable_vma_readahead = true;
d9bfcfdc4   Huang Ying   mm, swap: add sys...
882
  	else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
e9e9b7ece   Minchan Kim   mm: swap: unify c...
883
  		enable_vma_readahead = false;
d9bfcfdc4   Huang Ying   mm, swap: add sys...
884
885
886
887
888
889
890
891
  	else
  		return -EINVAL;
  
  	return count;
  }
  static struct kobj_attribute vma_ra_enabled_attr =
  	__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
  	       vma_ra_enabled_store);
d9bfcfdc4   Huang Ying   mm, swap: add sys...
892
893
  static struct attribute *swap_attrs[] = {
  	&vma_ra_enabled_attr.attr,
d9bfcfdc4   Huang Ying   mm, swap: add sys...
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
  	NULL,
  };
  
  static struct attribute_group swap_attr_group = {
  	.attrs = swap_attrs,
  };
  
  static int __init swap_init_sysfs(void)
  {
  	int err;
  	struct kobject *swap_kobj;
  
  	swap_kobj = kobject_create_and_add("swap", mm_kobj);
  	if (!swap_kobj) {
  		pr_err("failed to create swap kobject
  ");
  		return -ENOMEM;
  	}
  	err = sysfs_create_group(swap_kobj, &swap_attr_group);
  	if (err) {
  		pr_err("failed to register swap group
  ");
  		goto delete_obj;
  	}
  	return 0;
  
  delete_obj:
  	kobject_put(swap_kobj);
  	return err;
  }
  subsys_initcall(swap_init_sysfs);
  #endif