Blame view

mm/swap_state.c 24 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
  /*
   *  linux/mm/swap_state.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   *  Swap reorganised 29.12.95, Stephen Tweedie
   *
   *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
11
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
46017e954   Hugh Dickins   swapin_readahead:...
14
  #include <linux/swapops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
  #include <linux/init.h>
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
  #include <linux/backing-dev.h>
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
18
  #include <linux/blkdev.h>
c484d4104   Hugh Dickins   [PATCH] mm: free_...
19
  #include <linux/pagevec.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
20
  #include <linux/migrate.h>
4b3ef9daa   Huang, Ying   mm/swap: split sw...
21
  #include <linux/vmalloc.h>
67afa38e0   Tim Chen   mm/swap: add cach...
22
  #include <linux/swap_slots.h>
38d8b4e6b   Huang Ying   mm, THP, swap: de...
23
  #include <linux/huge_mm.h>
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
24
  #include <linux/shmem_fs.h>
243bce09c   Hugh Dickins   mm: fix swap cach...
25
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
7eaceacca   Jens Axboe   block: remove per...
29
   * vmscan's shrink_page_list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
31
  static const struct address_space_operations swap_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  	.writepage	= swap_writepage,
62c230bc1   Mel Gorman   mm: add support f...
33
  	.set_page_dirty	= swap_set_page_dirty,
1c93923cc   Andrew Morton   include/linux/mig...
34
  #ifdef CONFIG_MIGRATION
e965f9630   Christoph Lameter   [PATCH] Direct Mi...
35
  	.migratepage	= migrate_page,
1c93923cc   Andrew Morton   include/linux/mig...
36
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  };
783cb68ee   Changbin Du   mm/swap_state.c: ...
38
39
  struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
  static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
f5c754d63   Colin Ian King   mm/swap_state.c: ...
40
  static bool enable_vma_readahead __read_mostly = true;
ec560175c   Huang Ying   mm, swap: VMA bas...
41

ec560175c   Huang Ying   mm, swap: VMA bas...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  #define SWAP_RA_WIN_SHIFT	(PAGE_SHIFT / 2)
  #define SWAP_RA_HITS_MASK	((1UL << SWAP_RA_WIN_SHIFT) - 1)
  #define SWAP_RA_HITS_MAX	SWAP_RA_HITS_MASK
  #define SWAP_RA_WIN_MASK	(~PAGE_MASK & ~SWAP_RA_HITS_MASK)
  
  #define SWAP_RA_HITS(v)		((v) & SWAP_RA_HITS_MASK)
  #define SWAP_RA_WIN(v)		(((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT)
  #define SWAP_RA_ADDR(v)		((v) & PAGE_MASK)
  
  #define SWAP_RA_VAL(addr, win, hits)				\
  	(((addr) & PAGE_MASK) |					\
  	 (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) |	\
  	 ((hits) & SWAP_RA_HITS_MASK))
  
  /* Initial readahead hits is 4 to start up with a small window */
  #define GET_SWAP_RA_VAL(vma)					\
  	(atomic_long_read(&(vma)->swap_readahead_info) ? : 4)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59

b96a3db2f   Qian Cai   mm/swap_state: ma...
60
61
  #define INC_CACHE_INFO(x)	data_race(swap_cache_info.x++)
  #define ADD_CACHE_INFO(x, nr)	data_race(swap_cache_info.x += (nr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
64
65
66
67
  
  static struct {
  	unsigned long add_total;
  	unsigned long del_total;
  	unsigned long find_success;
  	unsigned long find_total;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
  } swap_cache_info;
579f82901   Shaohua Li   swap: add a simpl...
69
  static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
71
  void show_swap_cache_info(void)
  {
33806f06d   Shaohua Li   swap: make each s...
72
73
  	printk("%lu pages in swap cache
  ", total_swapcache_pages());
2c97b7fc0   Johannes Weiner   mm: print swapcac...
74
75
  	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
  		swap_cache_info.add_total, swap_cache_info.del_total,
bb63be0a0   Hugh Dickins   tmpfs: move swap_...
77
  		swap_cache_info.find_success, swap_cache_info.find_total);
ec8acf20a   Shaohua Li   swap: add per-par...
78
79
80
  	printk("Free swap  = %ldkB
  ",
  		get_nr_swap_pages() << (PAGE_SHIFT - 10));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
81
82
83
  	printk("Total swap = %lukB
  ", total_swap_pages << (PAGE_SHIFT - 10));
  }
aae466b00   Joonsoo Kim   mm/swap: implemen...
84
85
86
87
88
  void *get_shadow_from_swap_cache(swp_entry_t entry)
  {
  	struct address_space *address_space = swap_address_space(entry);
  	pgoff_t idx = swp_offset(entry);
  	struct page *page;
8c647dd1e   Matthew Wilcox (Oracle)   mm/swap: optimise...
89
  	page = xa_load(&address_space->i_pages, idx);
aae466b00   Joonsoo Kim   mm/swap: implemen...
90
91
  	if (xa_is_value(page))
  		return page;
aae466b00   Joonsoo Kim   mm/swap: implemen...
92
93
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
94
  /*
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
95
   * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
97
   * but sets SwapCache flag and private instead of mapping and index.
   */
3852f6768   Joonsoo Kim   mm/swapcache: sup...
98
99
  int add_to_swap_cache(struct page *page, swp_entry_t entry,
  			gfp_t gfp, void **shadowp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
  {
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
101
  	struct address_space *address_space = swap_address_space(entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
102
  	pgoff_t idx = swp_offset(entry);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
103
  	XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
104
  	unsigned long i, nr = thp_nr_pages(page);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
105
  	void *old;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106

309381fea   Sasha Levin   mm: dump page whe...
107
108
109
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
51726b122   Hugh Dickins   mm: replace some ...
110

38d8b4e6b   Huang Ying   mm, THP, swap: de...
111
  	page_ref_add(page, nr);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
112
  	SetPageSwapCache(page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
113

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
114
115
116
117
118
119
120
  	do {
  		xas_lock_irq(&xas);
  		xas_create_range(&xas);
  		if (xas_error(&xas))
  			goto unlock;
  		for (i = 0; i < nr; i++) {
  			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
121
122
  			old = xas_load(&xas);
  			if (xa_is_value(old)) {
3852f6768   Joonsoo Kim   mm/swapcache: sup...
123
124
125
  				if (shadowp)
  					*shadowp = old;
  			}
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
126
  			set_page_private(page + i, entry.val + i);
4101196b1   Matthew Wilcox (Oracle)   mm: page cache: s...
127
  			xas_store(&xas, page);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
128
129
  			xas_next(&xas);
  		}
38d8b4e6b   Huang Ying   mm, THP, swap: de...
130
131
  		address_space->nrpages += nr;
  		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
b60389424   Shakeel Butt   mm: memcg: add sw...
132
  		__mod_lruvec_page_state(page, NR_SWAPCACHE, nr);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
133
  		ADD_CACHE_INFO(add_total, nr);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
134
135
136
  unlock:
  		xas_unlock_irq(&xas);
  	} while (xas_nomem(&xas, gfp));
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
137

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
138
139
  	if (!xas_error(&xas))
  		return 0;
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
140

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
141
142
143
  	ClearPageSwapCache(page);
  	page_ref_sub(page, nr);
  	return xas_error(&xas);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
145
146
147
148
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache.
   */
3852f6768   Joonsoo Kim   mm/swapcache: sup...
149
150
  void __delete_from_swap_cache(struct page *page,
  			swp_entry_t entry, void *shadow)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
  {
4e17ec250   Matthew Wilcox   mm: Convert delet...
152
  	struct address_space *address_space = swap_address_space(entry);
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
153
  	int i, nr = thp_nr_pages(page);
4e17ec250   Matthew Wilcox   mm: Convert delet...
154
155
  	pgoff_t idx = swp_offset(entry);
  	XA_STATE(xas, &address_space->i_pages, idx);
33806f06d   Shaohua Li   swap: make each s...
156

309381fea   Sasha Levin   mm: dump page whe...
157
158
159
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(PageWriteback(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160

38d8b4e6b   Huang Ying   mm, THP, swap: de...
161
  	for (i = 0; i < nr; i++) {
3852f6768   Joonsoo Kim   mm/swapcache: sup...
162
  		void *entry = xas_store(&xas, shadow);
4101196b1   Matthew Wilcox (Oracle)   mm: page cache: s...
163
  		VM_BUG_ON_PAGE(entry != page, entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
164
  		set_page_private(page + i, 0);
4e17ec250   Matthew Wilcox   mm: Convert delet...
165
  		xas_next(&xas);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
166
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
167
  	ClearPageSwapCache(page);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
168
169
  	address_space->nrpages -= nr;
  	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
b60389424   Shakeel Butt   mm: memcg: add sw...
170
  	__mod_lruvec_page_state(page, NR_SWAPCACHE, -nr);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
171
  	ADD_CACHE_INFO(del_total, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
174
175
176
177
178
179
180
  }
  
  /**
   * add_to_swap - allocate swap space for a page
   * @page: page we want to move to swap
   *
   * Allocate swap space for the page and add the page to the
   * swap cache.  Caller needs to hold the page lock. 
   */
0f0746589   Minchan Kim   mm, THP, swap: mo...
181
  int add_to_swap(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182
183
  {
  	swp_entry_t entry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
  	int err;
309381fea   Sasha Levin   mm: dump page whe...
185
186
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageUptodate(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187

38d8b4e6b   Huang Ying   mm, THP, swap: de...
188
  	entry = get_swap_page(page);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
189
  	if (!entry.val)
0f0746589   Minchan Kim   mm, THP, swap: mo...
190
  		return 0;
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
191
  	/*
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
192
  	 * XArray node allocations from PF_MEMALLOC contexts could
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
193
194
195
196
197
198
199
  	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
  	 * stops emergency reserves from being allocated.
  	 *
  	 * TODO: this could cause a theoretical memory reclaim
  	 * deadlock in the swap out path.
  	 */
  	/*
854e9ed09   Minchan Kim   mm: support madvi...
200
  	 * Add it to the swap cache.
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
201
202
  	 */
  	err = add_to_swap_cache(page, entry,
3852f6768   Joonsoo Kim   mm/swapcache: sup...
203
  			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
204
  	if (err)
bd53b714d   Nick Piggin   [PATCH] mm: use _...
205
  		/*
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
206
207
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
  		 */
0f0746589   Minchan Kim   mm, THP, swap: mo...
209
  		goto fail;
9625456cc   Shaohua Li   mm: fix data corr...
210
211
  	/*
  	 * Normally the page will be dirtied in unmap because its pte should be
0e9aa6755   Miaohe Lin   mm: fix some brok...
212
  	 * dirty. A special case is MADV_FREE page. The page's pte could have
9625456cc   Shaohua Li   mm: fix data corr...
213
214
215
216
217
218
219
220
  	 * dirty bit cleared but the page's SwapBacked bit is still set because
  	 * clearing the dirty bit and SwapBacked bit has no lock protected. For
  	 * such page, unmap will not set dirty bit for it, so page reclaim will
  	 * not write the page out. This can cause data corruption when the page
  	 * is swap in later. Always setting the dirty bit for the page solves
  	 * the problem.
  	 */
  	set_page_dirty(page);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
221
222
  
  	return 1;
38d8b4e6b   Huang Ying   mm, THP, swap: de...
223
  fail:
0f0746589   Minchan Kim   mm, THP, swap: mo...
224
  	put_swap_page(page, entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
225
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
227
228
229
230
231
232
233
234
235
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache and locked.
   * It will never put the page into the free list,
   * the caller has a reference on the page.
   */
  void delete_from_swap_cache(struct page *page)
  {
4e17ec250   Matthew Wilcox   mm: Convert delet...
236
237
  	swp_entry_t entry = { .val = page_private(page) };
  	struct address_space *address_space = swap_address_space(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238

b93b01631   Matthew Wilcox   page cache: use x...
239
  	xa_lock_irq(&address_space->i_pages);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
240
  	__delete_from_swap_cache(page, entry, NULL);
b93b01631   Matthew Wilcox   page cache: use x...
241
  	xa_unlock_irq(&address_space->i_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
242

75f6d6d29   Minchan Kim   mm, THP, swap: un...
243
  	put_swap_page(page, entry);
6c357848b   Matthew Wilcox (Oracle)   mm: replace hpage...
244
  	page_ref_sub(page, thp_nr_pages(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245
  }
3852f6768   Joonsoo Kim   mm/swapcache: sup...
246
247
248
249
250
251
252
  void clear_shadow_from_swap_cache(int type, unsigned long begin,
  				unsigned long end)
  {
  	unsigned long curr = begin;
  	void *old;
  
  	for (;;) {
3852f6768   Joonsoo Kim   mm/swapcache: sup...
253
254
255
256
257
258
259
260
261
  		swp_entry_t entry = swp_entry(type, curr);
  		struct address_space *address_space = swap_address_space(entry);
  		XA_STATE(xas, &address_space->i_pages, curr);
  
  		xa_lock_irq(&address_space->i_pages);
  		xas_for_each(&xas, old, end) {
  			if (!xa_is_value(old))
  				continue;
  			xas_store(&xas, NULL);
3852f6768   Joonsoo Kim   mm/swapcache: sup...
262
  		}
3852f6768   Joonsoo Kim   mm/swapcache: sup...
263
264
265
266
267
268
269
270
271
272
  		xa_unlock_irq(&address_space->i_pages);
  
  		/* search the next swapcache until we meet end */
  		curr >>= SWAP_ADDRESS_SPACE_SHIFT;
  		curr++;
  		curr <<= SWAP_ADDRESS_SPACE_SHIFT;
  		if (curr > end)
  			break;
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273
274
275
276
  /* 
   * If we are the only user, then try to free up the swap cache. 
   * 
   * Its ok to check for PageSwapCache without the page lock
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
277
278
   * here because we are going to recheck again inside
   * try_to_free_swap() _with_ the lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
280
   * 					- Marcelo
   */
f4c4a3f48   Huang Ying   mm: free idle swa...
281
  void free_swap_cache(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
  {
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
283
284
  	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
  		try_to_free_swap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
286
287
288
289
290
  		unlock_page(page);
  	}
  }
  
  /* 
   * Perform a free_page(), also freeing any swap cache associated with
b8072f099   Hugh Dickins   [PATCH] mm: updat...
291
   * this page if it is the last user of the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
293
294
295
   */
  void free_page_and_swap_cache(struct page *page)
  {
  	free_swap_cache(page);
6fcb52a56   Aaron Lu   thp: reduce usage...
296
  	if (!is_huge_zero_page(page))
770a53702   Gerald Schaefer   mm: thp: broken p...
297
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
298
299
300
301
302
303
304
305
  }
  
  /*
   * Passed an array of pages, drop them all from swapcache and then release
   * them.  They are removed from the LRU and freed if this is their last use.
   */
  void free_pages_and_swap_cache(struct page **pages, int nr)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
306
  	struct page **pagep = pages;
aabfb5729   Michal Hocko   mm: memcontrol: d...
307
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
308
309
  
  	lru_add_drain();
aabfb5729   Michal Hocko   mm: memcontrol: d...
310
311
  	for (i = 0; i < nr; i++)
  		free_swap_cache(pagep[i]);
c6f92f9fb   Mel Gorman   mm: remove cold p...
312
  	release_pages(pagep, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
313
  }
e9e9b7ece   Minchan Kim   mm: swap: unify c...
314
315
316
317
  static inline bool swap_use_vma_readahead(void)
  {
  	return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
318
319
320
321
322
323
  /*
   * Lookup a swap entry in the swap cache. A found page will be returned
   * unlocked and with its refcount incremented - we rely on the kernel
   * lock getting page table operations atomic even if we drop the page
   * lock before returning.
   */
ec560175c   Huang Ying   mm, swap: VMA bas...
324
325
  struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
  			       unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
  {
  	struct page *page;
eb085574a   Huang Ying   mm, swap: fix rac...
328
  	struct swap_info_struct *si;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
329

eb085574a   Huang Ying   mm, swap: fix rac...
330
331
332
  	si = get_swap_device(entry);
  	if (!si)
  		return NULL;
f6ab1f7f6   Huang Ying   mm, swap: use off...
333
  	page = find_get_page(swap_address_space(entry), swp_offset(entry));
eb085574a   Huang Ying   mm, swap: fix rac...
334
  	put_swap_device(si);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
335

ec560175c   Huang Ying   mm, swap: VMA bas...
336
337
  	INC_CACHE_INFO(find_total);
  	if (page) {
eaf649ebc   Minchan Kim   mm: swap: clean u...
338
339
  		bool vma_ra = swap_use_vma_readahead();
  		bool readahead;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
  		INC_CACHE_INFO(find_success);
eaf649ebc   Minchan Kim   mm: swap: clean u...
341
342
343
344
  		/*
  		 * At the moment, we don't support PG_readahead for anon THP
  		 * so let's bail out rather than confusing the readahead stat.
  		 */
ec560175c   Huang Ying   mm, swap: VMA bas...
345
346
  		if (unlikely(PageTransCompound(page)))
  			return page;
eaf649ebc   Minchan Kim   mm: swap: clean u...
347

ec560175c   Huang Ying   mm, swap: VMA bas...
348
  		readahead = TestClearPageReadahead(page);
eaf649ebc   Minchan Kim   mm: swap: clean u...
349
350
351
352
353
354
355
  		if (vma && vma_ra) {
  			unsigned long ra_val;
  			int win, hits;
  
  			ra_val = GET_SWAP_RA_VAL(vma);
  			win = SWAP_RA_WIN(ra_val);
  			hits = SWAP_RA_HITS(ra_val);
ec560175c   Huang Ying   mm, swap: VMA bas...
356
357
358
359
360
  			if (readahead)
  				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
  			atomic_long_set(&vma->swap_readahead_info,
  					SWAP_RA_VAL(addr, win, hits));
  		}
eaf649ebc   Minchan Kim   mm: swap: clean u...
361

ec560175c   Huang Ying   mm, swap: VMA bas...
362
  		if (readahead) {
cbc65df24   Huang Ying   mm, swap: add swa...
363
  			count_vm_event(SWAP_RA_HIT);
eaf649ebc   Minchan Kim   mm: swap: clean u...
364
  			if (!vma || !vma_ra)
ec560175c   Huang Ying   mm, swap: VMA bas...
365
  				atomic_inc(&swapin_readahead_hits);
cbc65df24   Huang Ying   mm, swap: add swa...
366
  		}
579f82901   Shaohua Li   swap: add a simpl...
367
  	}
eaf649ebc   Minchan Kim   mm: swap: clean u...
368

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
370
  	return page;
  }
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
371
372
373
374
375
376
377
378
379
380
381
382
383
384
  /**
   * find_get_incore_page - Find and get a page from the page or swap caches.
   * @mapping: The address_space to search.
   * @index: The page cache index.
   *
   * This differs from find_get_page() in that it will also look for the
   * page in the swap cache.
   *
   * Return: The found page or %NULL.
   */
  struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
  {
  	swp_entry_t swp;
  	struct swap_info_struct *si;
44835d20b   Matthew Wilcox (Oracle)   mm: add FGP_ENTRY
385
386
  	struct page *page = pagecache_get_page(mapping, index,
  						FGP_ENTRY | FGP_HEAD, 0);
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
387

a6de4b487   Matthew Wilcox (Oracle)   mm: convert find_...
388
  	if (!page)
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
389
  		return page;
a6de4b487   Matthew Wilcox (Oracle)   mm: convert find_...
390
391
  	if (!xa_is_value(page))
  		return find_subpage(page, index);
61ef18655   Matthew Wilcox (Oracle)   mm: factor find_g...
392
393
394
395
396
397
398
399
400
401
402
403
  	if (!shmem_mapping(mapping))
  		return NULL;
  
  	swp = radix_to_swp_entry(page);
  	/* Prevent swapoff from happening to us */
  	si = get_swap_device(swp);
  	if (!si)
  		return NULL;
  	page = find_get_page(swap_address_space(swp), swp_offset(swp));
  	put_swap_device(si);
  	return page;
  }
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
404
405
406
  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  			struct vm_area_struct *vma, unsigned long addr,
  			bool *new_page_allocated)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407
  {
eb085574a   Huang Ying   mm, swap: fix rac...
408
  	struct swap_info_struct *si;
4c6355b25   Johannes Weiner   mm: memcontrol: c...
409
  	struct page *page;
aae466b00   Joonsoo Kim   mm/swap: implemen...
410
  	void *shadow = NULL;
4c6355b25   Johannes Weiner   mm: memcontrol: c...
411

5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
412
  	*new_page_allocated = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413

4c6355b25   Johannes Weiner   mm: memcontrol: c...
414
415
  	for (;;) {
  		int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416
417
418
419
420
  		/*
  		 * First check the swap cache.  Since this is normally
  		 * called after lookup_swap_cache() failed, re-calling
  		 * that would confuse statistics.
  		 */
eb085574a   Huang Ying   mm, swap: fix rac...
421
422
  		si = get_swap_device(entry);
  		if (!si)
4c6355b25   Johannes Weiner   mm: memcontrol: c...
423
424
425
  			return NULL;
  		page = find_get_page(swap_address_space(entry),
  				     swp_offset(entry));
eb085574a   Huang Ying   mm, swap: fix rac...
426
  		put_swap_device(si);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
427
428
  		if (page)
  			return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429

ba81f8384   Huang Ying   mm/swap: skip rea...
430
431
432
433
434
435
436
437
438
  		/*
  		 * Just skip read ahead for unused swap slot.
  		 * During swap_off when swap_slot_cache is disabled,
  		 * we have to handle the race between putting
  		 * swap entry in swap cache and marking swap slot
  		 * as SWAP_HAS_CACHE.  That's done in later part of code or
  		 * else swap_off will be aborted if we return NULL.
  		 */
  		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
4c6355b25   Johannes Weiner   mm: memcontrol: c...
439
  			return NULL;
e8c26ab60   Tim Chen   mm/swap: skip rea...
440

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441
  		/*
4c6355b25   Johannes Weiner   mm: memcontrol: c...
442
443
444
  		 * Get a new page to read into from swap.  Allocate it now,
  		 * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will
  		 * cause any racers to loop around until we add it to cache.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
  		 */
4c6355b25   Johannes Weiner   mm: memcontrol: c...
446
447
448
  		page = alloc_page_vma(gfp_mask, vma, addr);
  		if (!page)
  			return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
450
  
  		/*
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
451
452
  		 * Swap entry may have been freed since our caller observed it.
  		 */
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
453
  		err = swapcache_prepare(entry);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
454
  		if (!err)
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
455
  			break;
4c6355b25   Johannes Weiner   mm: memcontrol: c...
456
457
458
  		put_page(page);
  		if (err != -EEXIST)
  			return NULL;
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
459
  		/*
4c6355b25   Johannes Weiner   mm: memcontrol: c...
460
461
462
463
464
  		 * We might race against __delete_from_swap_cache(), and
  		 * stumble across a swap_map entry whose SWAP_HAS_CACHE
  		 * has not yet been cleared.  Or race against another
  		 * __read_swap_cache_async(), which has set SWAP_HAS_CACHE
  		 * in swap_map, but not yet added its page to swap cache.
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
465
  		 */
6829aa17c   Guo Ziliang   mm: swap: get rid...
466
  		schedule_timeout_uninterruptible(1);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
467
468
469
470
471
472
473
474
  	}
  
  	/*
  	 * The swap entry is ours to swap in. Prepare the new page.
  	 */
  
  	__SetPageLocked(page);
  	__SetPageSwapBacked(page);
0add0c77a   Shakeel Butt   memcg: charge bef...
475
  	if (mem_cgroup_swapin_charge_page(page, NULL, gfp_mask, entry))
4c6355b25   Johannes Weiner   mm: memcontrol: c...
476
  		goto fail_unlock;
4c6355b25   Johannes Weiner   mm: memcontrol: c...
477

0add0c77a   Shakeel Butt   memcg: charge bef...
478
479
  	/* May fail (-ENOMEM) if XArray node allocation failed. */
  	if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
4c6355b25   Johannes Weiner   mm: memcontrol: c...
480
  		goto fail_unlock;
0add0c77a   Shakeel Butt   memcg: charge bef...
481
482
  
  	mem_cgroup_swapin_uncharge_swap(entry);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
483

aae466b00   Joonsoo Kim   mm/swap: implemen...
484
485
  	if (shadow)
  		workingset_refault(page, shadow);
314b57fb0   Johannes Weiner   mm: balance LRU l...
486

4c6355b25   Johannes Weiner   mm: memcontrol: c...
487
  	/* Caller will initiate read into locked page */
6058eaec8   Johannes Weiner   mm: fold and remo...
488
  	lru_cache_add(page);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
489
490
  	*new_page_allocated = true;
  	return page;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
491

4c6355b25   Johannes Weiner   mm: memcontrol: c...
492
  fail_unlock:
0add0c77a   Shakeel Butt   memcg: charge bef...
493
  	put_swap_page(page, entry);
4c6355b25   Johannes Weiner   mm: memcontrol: c...
494
495
496
  	unlock_page(page);
  	put_page(page);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
497
  }
46017e954   Hugh Dickins   swapin_readahead:...
498

5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
499
500
501
502
503
504
505
  /*
   * Locate a page of swap in physical memory, reserving swap cache space
   * and reading the disk if it is not already cached.
   * A failure return means that either the page allocation failed or that
   * the swap entry is no longer in use.
   */
  struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
23955622f   Shaohua Li   swap: add block i...
506
  		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
507
508
509
510
511
512
  {
  	bool page_was_allocated;
  	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
  			vma, addr, &page_was_allocated);
  
  	if (page_was_allocated)
23955622f   Shaohua Li   swap: add block i...
513
  		swap_readpage(retpage, do_poll);
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
514
515
516
  
  	return retpage;
  }
ec560175c   Huang Ying   mm, swap: VMA bas...
517
518
519
520
521
  static unsigned int __swapin_nr_pages(unsigned long prev_offset,
  				      unsigned long offset,
  				      int hits,
  				      int max_pages,
  				      int prev_win)
579f82901   Shaohua Li   swap: add a simpl...
522
  {
ec560175c   Huang Ying   mm, swap: VMA bas...
523
  	unsigned int pages, last_ra;
579f82901   Shaohua Li   swap: add a simpl...
524
525
526
527
528
529
  
  	/*
  	 * This heuristic has been found to work well on both sequential and
  	 * random loads, swapping to hard disk or to SSD: please don't ask
  	 * what the "+ 2" means, it just happens to work well, that's all.
  	 */
ec560175c   Huang Ying   mm, swap: VMA bas...
530
  	pages = hits + 2;
579f82901   Shaohua Li   swap: add a simpl...
531
532
533
534
535
536
537
538
  	if (pages == 2) {
  		/*
  		 * We can have no readahead hits to judge by: but must not get
  		 * stuck here forever, so check for an adjacent offset instead
  		 * (and don't even bother to check whether swap type is same).
  		 */
  		if (offset != prev_offset + 1 && offset != prev_offset - 1)
  			pages = 1;
579f82901   Shaohua Li   swap: add a simpl...
539
540
541
542
543
544
545
546
547
548
549
  	} else {
  		unsigned int roundup = 4;
  		while (roundup < pages)
  			roundup <<= 1;
  		pages = roundup;
  	}
  
  	if (pages > max_pages)
  		pages = max_pages;
  
  	/* Don't shrink readahead too fast */
ec560175c   Huang Ying   mm, swap: VMA bas...
550
  	last_ra = prev_win / 2;
579f82901   Shaohua Li   swap: add a simpl...
551
552
  	if (pages < last_ra)
  		pages = last_ra;
ec560175c   Huang Ying   mm, swap: VMA bas...
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
  
  	return pages;
  }
  
  static unsigned long swapin_nr_pages(unsigned long offset)
  {
  	static unsigned long prev_offset;
  	unsigned int hits, pages, max_pages;
  	static atomic_t last_readahead_pages;
  
  	max_pages = 1 << READ_ONCE(page_cluster);
  	if (max_pages <= 1)
  		return 1;
  
  	hits = atomic_xchg(&swapin_readahead_hits, 0);
d6c1f098f   Qian Cai   mm/swap_state: fi...
568
569
  	pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits,
  				  max_pages,
ec560175c   Huang Ying   mm, swap: VMA bas...
570
571
  				  atomic_read(&last_readahead_pages));
  	if (!hits)
d6c1f098f   Qian Cai   mm/swap_state: fi...
572
  		WRITE_ONCE(prev_offset, offset);
579f82901   Shaohua Li   swap: add a simpl...
573
574
575
576
  	atomic_set(&last_readahead_pages, pages);
  
  	return pages;
  }
46017e954   Hugh Dickins   swapin_readahead:...
577
  /**
e9e9b7ece   Minchan Kim   mm: swap: unify c...
578
   * swap_cluster_readahead - swap in pages in hope we need them soon
46017e954   Hugh Dickins   swapin_readahead:...
579
   * @entry: swap entry of this memory
7682486b3   Randy Dunlap   mm: fix various k...
580
   * @gfp_mask: memory allocation flags
e9e9b7ece   Minchan Kim   mm: swap: unify c...
581
   * @vmf: fault information
46017e954   Hugh Dickins   swapin_readahead:...
582
583
584
585
586
587
588
589
590
591
592
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read an aligned block of
   * (1 << page_cluster) entries in the swap area. This method is chosen
   * because it doesn't cost us any seek time.  We also make sure to queue
   * the 'original' request together with the readahead ones...
   *
   * This has been extended to use the NUMA policies from the mm triggering
   * the readahead.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
593
   * Caller must hold read mmap_lock if vmf->vma is not NULL.
46017e954   Hugh Dickins   swapin_readahead:...
594
   */
e9e9b7ece   Minchan Kim   mm: swap: unify c...
595
596
  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
  				struct vm_fault *vmf)
46017e954   Hugh Dickins   swapin_readahead:...
597
  {
46017e954   Hugh Dickins   swapin_readahead:...
598
  	struct page *page;
579f82901   Shaohua Li   swap: add a simpl...
599
600
  	unsigned long entry_offset = swp_offset(entry);
  	unsigned long offset = entry_offset;
67f96aa25   Rik van Riel   mm: make swapin r...
601
  	unsigned long start_offset, end_offset;
579f82901   Shaohua Li   swap: add a simpl...
602
  	unsigned long mask;
e9a6effa5   Huang Ying   mm, swap: fix fal...
603
  	struct swap_info_struct *si = swp_swap_info(entry);
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
604
  	struct blk_plug plug;
c4fa63092   Huang Ying   mm, swap: fix swa...
605
  	bool do_poll = true, page_allocated;
e9e9b7ece   Minchan Kim   mm: swap: unify c...
606
607
  	struct vm_area_struct *vma = vmf->vma;
  	unsigned long addr = vmf->address;
46017e954   Hugh Dickins   swapin_readahead:...
608

579f82901   Shaohua Li   swap: add a simpl...
609
610
611
  	mask = swapin_nr_pages(offset) - 1;
  	if (!mask)
  		goto skip;
23955622f   Shaohua Li   swap: add block i...
612
  	do_poll = false;
67f96aa25   Rik van Riel   mm: make swapin r...
613
614
615
616
617
  	/* Read a page_cluster sized and aligned cluster around offset. */
  	start_offset = offset & ~mask;
  	end_offset = offset | mask;
  	if (!start_offset)	/* First page is swap header. */
  		start_offset++;
e9a6effa5   Huang Ying   mm, swap: fix fal...
618
619
  	if (end_offset >= si->max)
  		end_offset = si->max - 1;
67f96aa25   Rik van Riel   mm: make swapin r...
620

3fb5c298b   Christian Ehrhardt   swap: allow swap ...
621
  	blk_start_plug(&plug);
67f96aa25   Rik van Riel   mm: make swapin r...
622
  	for (offset = start_offset; offset <= end_offset ; offset++) {
46017e954   Hugh Dickins   swapin_readahead:...
623
  		/* Ok, do the async read-ahead now */
c4fa63092   Huang Ying   mm, swap: fix swa...
624
625
626
  		page = __read_swap_cache_async(
  			swp_entry(swp_type(entry), offset),
  			gfp_mask, vma, addr, &page_allocated);
46017e954   Hugh Dickins   swapin_readahead:...
627
  		if (!page)
67f96aa25   Rik van Riel   mm: make swapin r...
628
  			continue;
c4fa63092   Huang Ying   mm, swap: fix swa...
629
630
  		if (page_allocated) {
  			swap_readpage(page, false);
eaf649ebc   Minchan Kim   mm: swap: clean u...
631
  			if (offset != entry_offset) {
c4fa63092   Huang Ying   mm, swap: fix swa...
632
633
634
  				SetPageReadahead(page);
  				count_vm_event(SWAP_RA);
  			}
cbc65df24   Huang Ying   mm, swap: add swa...
635
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
636
  		put_page(page);
46017e954   Hugh Dickins   swapin_readahead:...
637
  	}
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
638
  	blk_finish_plug(&plug);
46017e954   Hugh Dickins   swapin_readahead:...
639
  	lru_add_drain();	/* Push any new pages onto the LRU now */
579f82901   Shaohua Li   swap: add a simpl...
640
  skip:
23955622f   Shaohua Li   swap: add block i...
641
  	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
46017e954   Hugh Dickins   swapin_readahead:...
642
  }
4b3ef9daa   Huang, Ying   mm/swap: split sw...
643
644
645
646
647
648
649
  
  int init_swap_address_space(unsigned int type, unsigned long nr_pages)
  {
  	struct address_space *spaces, *space;
  	unsigned int i, nr;
  
  	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
778e1cdd8   Kees Cook   treewide: kvzallo...
650
  	spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
651
652
653
654
  	if (!spaces)
  		return -ENOMEM;
  	for (i = 0; i < nr; i++) {
  		space = spaces + i;
a28334862   Matthew Wilcox   page cache: Finis...
655
  		xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
656
657
658
659
  		atomic_set(&space->i_mmap_writable, 0);
  		space->a_ops = &swap_aops;
  		/* swap cache doesn't use writeback related tags */
  		mapping_set_no_writeback_tags(space);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
660
661
  	}
  	nr_swapper_spaces[type] = nr;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
662
  	swapper_spaces[type] = spaces;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
663
664
665
666
667
668
  
  	return 0;
  }
  
  void exit_swap_address_space(unsigned int type)
  {
eea4a5011   Huang Ying   swap: check mappi...
669
670
671
672
673
674
  	int i;
  	struct address_space *spaces = swapper_spaces[type];
  
  	for (i = 0; i < nr_swapper_spaces[type]; i++)
  		VM_WARN_ON_ONCE(!mapping_empty(&spaces[i]));
  	kvfree(spaces);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
675
  	nr_swapper_spaces[type] = 0;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
676
  	swapper_spaces[type] = NULL;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
677
  }
ec560175c   Huang Ying   mm, swap: VMA bas...
678
679
680
681
682
683
684
685
686
687
688
689
690
  
  static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
  				     unsigned long faddr,
  				     unsigned long lpfn,
  				     unsigned long rpfn,
  				     unsigned long *start,
  				     unsigned long *end)
  {
  	*start = max3(lpfn, PFN_DOWN(vma->vm_start),
  		      PFN_DOWN(faddr & PMD_MASK));
  	*end = min3(rpfn, PFN_DOWN(vma->vm_end),
  		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
  }
eaf649ebc   Minchan Kim   mm: swap: clean u...
691
692
  static void swap_ra_info(struct vm_fault *vmf,
  			struct vma_swap_readahead *ra_info)
ec560175c   Huang Ying   mm, swap: VMA bas...
693
694
  {
  	struct vm_area_struct *vma = vmf->vma;
eaf649ebc   Minchan Kim   mm: swap: clean u...
695
  	unsigned long ra_val;
ec560175c   Huang Ying   mm, swap: VMA bas...
696
697
  	unsigned long faddr, pfn, fpfn;
  	unsigned long start, end;
eaf649ebc   Minchan Kim   mm: swap: clean u...
698
  	pte_t *pte, *orig_pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
699
700
701
702
  	unsigned int max_win, hits, prev_win, win, left;
  #ifndef CONFIG_64BIT
  	pte_t *tpte;
  #endif
61b639723   Huang Ying   mm, swap: use pag...
703
704
705
  	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
  			     SWAP_RA_ORDER_CEILING);
  	if (max_win == 1) {
eaf649ebc   Minchan Kim   mm: swap: clean u...
706
707
  		ra_info->win = 1;
  		return;
61b639723   Huang Ying   mm, swap: use pag...
708
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
709
  	faddr = vmf->address;
eaf649ebc   Minchan Kim   mm: swap: clean u...
710
  	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
ec560175c   Huang Ying   mm, swap: VMA bas...
711

ec560175c   Huang Ying   mm, swap: VMA bas...
712
  	fpfn = PFN_DOWN(faddr);
eaf649ebc   Minchan Kim   mm: swap: clean u...
713
714
715
716
717
  	ra_val = GET_SWAP_RA_VAL(vma);
  	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
  	prev_win = SWAP_RA_WIN(ra_val);
  	hits = SWAP_RA_HITS(ra_val);
  	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
ec560175c   Huang Ying   mm, swap: VMA bas...
718
719
720
  					       max_win, prev_win);
  	atomic_long_set(&vma->swap_readahead_info,
  			SWAP_RA_VAL(faddr, win, 0));
eaf649ebc   Minchan Kim   mm: swap: clean u...
721
722
723
724
  	if (win == 1) {
  		pte_unmap(orig_pte);
  		return;
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
725
726
727
728
729
730
731
732
733
734
735
736
  
  	/* Copy the PTEs because the page table may be unmapped */
  	if (fpfn == pfn + 1)
  		swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end);
  	else if (pfn == fpfn + 1)
  		swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1,
  				  &start, &end);
  	else {
  		left = (win - 1) / 2;
  		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
  				  &start, &end);
  	}
eaf649ebc   Minchan Kim   mm: swap: clean u...
737
738
739
  	ra_info->nr_pte = end - start;
  	ra_info->offset = fpfn - start;
  	pte -= ra_info->offset;
ec560175c   Huang Ying   mm, swap: VMA bas...
740
  #ifdef CONFIG_64BIT
eaf649ebc   Minchan Kim   mm: swap: clean u...
741
  	ra_info->ptes = pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
742
  #else
eaf649ebc   Minchan Kim   mm: swap: clean u...
743
  	tpte = ra_info->ptes;
ec560175c   Huang Ying   mm, swap: VMA bas...
744
745
746
  	for (pfn = start; pfn != end; pfn++)
  		*tpte++ = *pte++;
  #endif
eaf649ebc   Minchan Kim   mm: swap: clean u...
747
  	pte_unmap(orig_pte);
ec560175c   Huang Ying   mm, swap: VMA bas...
748
  }
e9f598730   Yang Shi   mm: swap: add com...
749
750
  /**
   * swap_vma_readahead - swap in pages in hope we need them soon
27ec4878d   Krzysztof Kozlowski   mm: swap: fix ker...
751
   * @fentry: swap entry of this memory
e9f598730   Yang Shi   mm: swap: add com...
752
753
754
755
756
   * @gfp_mask: memory allocation flags
   * @vmf: fault information
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
cb152a1a9   Shijie Luo   mm: fix some typo...
757
   * Primitive swap readahead code. We simply read in a few pages whose
e9f598730   Yang Shi   mm: swap: add com...
758
759
   * virtual addresses are around the fault address in the same vma.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
760
   * Caller must hold read mmap_lock if vmf->vma is not NULL.
e9f598730   Yang Shi   mm: swap: add com...
761
762
   *
   */
f5c754d63   Colin Ian King   mm/swap_state.c: ...
763
764
  static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
  				       struct vm_fault *vmf)
ec560175c   Huang Ying   mm, swap: VMA bas...
765
766
767
768
769
770
771
772
  {
  	struct blk_plug plug;
  	struct vm_area_struct *vma = vmf->vma;
  	struct page *page;
  	pte_t *pte, pentry;
  	swp_entry_t entry;
  	unsigned int i;
  	bool page_allocated;
e97af6995   Miaohe Lin   mm/swap_state: sk...
773
774
775
  	struct vma_swap_readahead ra_info = {
  		.win = 1,
  	};
ec560175c   Huang Ying   mm, swap: VMA bas...
776

eaf649ebc   Minchan Kim   mm: swap: clean u...
777
778
  	swap_ra_info(vmf, &ra_info);
  	if (ra_info.win == 1)
ec560175c   Huang Ying   mm, swap: VMA bas...
779
780
781
  		goto skip;
  
  	blk_start_plug(&plug);
eaf649ebc   Minchan Kim   mm: swap: clean u...
782
  	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
  	     i++, pte++) {
  		pentry = *pte;
  		if (pte_none(pentry))
  			continue;
  		if (pte_present(pentry))
  			continue;
  		entry = pte_to_swp_entry(pentry);
  		if (unlikely(non_swap_entry(entry)))
  			continue;
  		page = __read_swap_cache_async(entry, gfp_mask, vma,
  					       vmf->address, &page_allocated);
  		if (!page)
  			continue;
  		if (page_allocated) {
  			swap_readpage(page, false);
eaf649ebc   Minchan Kim   mm: swap: clean u...
798
  			if (i != ra_info.offset) {
ec560175c   Huang Ying   mm, swap: VMA bas...
799
800
801
802
803
804
805
806
807
808
  				SetPageReadahead(page);
  				count_vm_event(SWAP_RA);
  			}
  		}
  		put_page(page);
  	}
  	blk_finish_plug(&plug);
  	lru_add_drain();
  skip:
  	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
eaf649ebc   Minchan Kim   mm: swap: clean u...
809
  				     ra_info.win == 1);
ec560175c   Huang Ying   mm, swap: VMA bas...
810
  }
d9bfcfdc4   Huang Ying   mm, swap: add sys...
811

e9e9b7ece   Minchan Kim   mm: swap: unify c...
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
  /**
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
   * @gfp_mask: memory allocation flags
   * @vmf: fault information
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * It's a main entry function for swap readahead. By the configuration,
   * it will read ahead blocks by cluster-based(ie, physical disk based)
   * or vma-based(ie, virtual address based on faulty address) readahead.
   */
  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
  				struct vm_fault *vmf)
  {
  	return swap_use_vma_readahead() ?
  			swap_vma_readahead(entry, gfp_mask, vmf) :
  			swap_cluster_readahead(entry, gfp_mask, vmf);
  }
d9bfcfdc4   Huang Ying   mm, swap: add sys...
831
832
833
834
  #ifdef CONFIG_SYSFS
  static ssize_t vma_ra_enabled_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
ae7a927d2   Joe Perches   mm: use sysfs_emi...
835
836
837
  	return sysfs_emit(buf, "%s
  ",
  			  enable_vma_readahead ? "true" : "false");
d9bfcfdc4   Huang Ying   mm, swap: add sys...
838
839
840
841
842
843
  }
  static ssize_t vma_ra_enabled_store(struct kobject *kobj,
  				      struct kobj_attribute *attr,
  				      const char *buf, size_t count)
  {
  	if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
e9e9b7ece   Minchan Kim   mm: swap: unify c...
844
  		enable_vma_readahead = true;
d9bfcfdc4   Huang Ying   mm, swap: add sys...
845
  	else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
e9e9b7ece   Minchan Kim   mm: swap: unify c...
846
  		enable_vma_readahead = false;
d9bfcfdc4   Huang Ying   mm, swap: add sys...
847
848
849
850
851
852
853
854
  	else
  		return -EINVAL;
  
  	return count;
  }
  static struct kobj_attribute vma_ra_enabled_attr =
  	__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
  	       vma_ra_enabled_store);
d9bfcfdc4   Huang Ying   mm, swap: add sys...
855
856
  static struct attribute *swap_attrs[] = {
  	&vma_ra_enabled_attr.attr,
d9bfcfdc4   Huang Ying   mm, swap: add sys...
857
858
  	NULL,
  };
e48333b66   Rikard Falkeborn   mm/swap_state: co...
859
  static const struct attribute_group swap_attr_group = {
d9bfcfdc4   Huang Ying   mm, swap: add sys...
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
  	.attrs = swap_attrs,
  };
  
  static int __init swap_init_sysfs(void)
  {
  	int err;
  	struct kobject *swap_kobj;
  
  	swap_kobj = kobject_create_and_add("swap", mm_kobj);
  	if (!swap_kobj) {
  		pr_err("failed to create swap kobject
  ");
  		return -ENOMEM;
  	}
  	err = sysfs_create_group(swap_kobj, &swap_attr_group);
  	if (err) {
  		pr_err("failed to register swap group
  ");
  		goto delete_obj;
  	}
  	return 0;
  
  delete_obj:
  	kobject_put(swap_kobj);
  	return err;
  }
  subsys_initcall(swap_init_sysfs);
  #endif