Blame view

mm/swap_state.c 22.4 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
9
  /*
   *  linux/mm/swap_state.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   *  Swap reorganised 29.12.95, Stephen Tweedie
   *
   *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
11
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
46017e954   Hugh Dickins   swapin_readahead:...
14
  #include <linux/swapops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
  #include <linux/init.h>
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
  #include <linux/backing-dev.h>
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
18
  #include <linux/blkdev.h>
c484d4104   Hugh Dickins   [PATCH] mm: free_...
19
  #include <linux/pagevec.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
20
  #include <linux/migrate.h>
4b3ef9daa   Huang, Ying   mm/swap: split sw...
21
  #include <linux/vmalloc.h>
67afa38e0   Tim Chen   mm/swap: add cach...
22
  #include <linux/swap_slots.h>
38d8b4e6b   Huang Ying   mm, THP, swap: de...
23
  #include <linux/huge_mm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
  
  #include <asm/pgtable.h>
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
7eaceacca   Jens Axboe   block: remove per...
29
   * vmscan's shrink_page_list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
31
  static const struct address_space_operations swap_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  	.writepage	= swap_writepage,
62c230bc1   Mel Gorman   mm: add support f...
33
  	.set_page_dirty	= swap_set_page_dirty,
1c93923cc   Andrew Morton   include/linux/mig...
34
  #ifdef CONFIG_MIGRATION
e965f9630   Christoph Lameter   [PATCH] Direct Mi...
35
  	.migratepage	= migrate_page,
1c93923cc   Andrew Morton   include/linux/mig...
36
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  };
783cb68ee   Changbin Du   mm/swap_state.c: ...
38
39
  struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
  static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
f5c754d63   Colin Ian King   mm/swap_state.c: ...
40
  static bool enable_vma_readahead __read_mostly = true;
ec560175c   Huang Ying   mm, swap: VMA bas...
41

ec560175c   Huang Ying   mm, swap: VMA bas...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  #define SWAP_RA_WIN_SHIFT	(PAGE_SHIFT / 2)
  #define SWAP_RA_HITS_MASK	((1UL << SWAP_RA_WIN_SHIFT) - 1)
  #define SWAP_RA_HITS_MAX	SWAP_RA_HITS_MASK
  #define SWAP_RA_WIN_MASK	(~PAGE_MASK & ~SWAP_RA_HITS_MASK)
  
  #define SWAP_RA_HITS(v)		((v) & SWAP_RA_HITS_MASK)
  #define SWAP_RA_WIN(v)		(((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT)
  #define SWAP_RA_ADDR(v)		((v) & PAGE_MASK)
  
  #define SWAP_RA_VAL(addr, win, hits)				\
  	(((addr) & PAGE_MASK) |					\
  	 (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) |	\
  	 ((hits) & SWAP_RA_HITS_MASK))
  
  /* Initial readahead hits is 4 to start up with a small window */
  #define GET_SWAP_RA_VAL(vma)					\
  	(atomic_long_read(&(vma)->swap_readahead_info) ? : 4)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
59
60
  
  #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
38d8b4e6b   Huang Ying   mm, THP, swap: de...
61
  #define ADD_CACHE_INFO(x, nr)	do { swap_cache_info.x += (nr); } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
64
65
66
67
  
  static struct {
  	unsigned long add_total;
  	unsigned long del_total;
  	unsigned long find_success;
  	unsigned long find_total;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
  } swap_cache_info;
33806f06d   Shaohua Li   swap: make each s...
69
70
  unsigned long total_swapcache_pages(void)
  {
4b3ef9daa   Huang, Ying   mm/swap: split sw...
71
  	unsigned int i, j, nr;
33806f06d   Shaohua Li   swap: make each s...
72
  	unsigned long ret = 0;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
73
  	struct address_space *spaces;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
74
  	struct swap_info_struct *si;
33806f06d   Shaohua Li   swap: make each s...
75

4b3ef9daa   Huang, Ying   mm/swap: split sw...
76
  	for (i = 0; i < MAX_SWAPFILES; i++) {
054f1d1fa   Huang Ying   mm/swap_state.c: ...
77
78
79
80
81
82
83
84
  		swp_entry_t entry = swp_entry(i, 1);
  
  		/* Avoid get_swap_device() to warn for bad swap entry */
  		if (!swp_swap_info(entry))
  			continue;
  		/* Prevent swapoff to free swapper_spaces */
  		si = get_swap_device(entry);
  		if (!si)
4b3ef9daa   Huang, Ying   mm/swap: split sw...
85
  			continue;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
86
87
  		nr = nr_swapper_spaces[i];
  		spaces = swapper_spaces[i];
4b3ef9daa   Huang, Ying   mm/swap: split sw...
88
89
  		for (j = 0; j < nr; j++)
  			ret += spaces[j].nrpages;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
90
  		put_swap_device(si);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
91
  	}
33806f06d   Shaohua Li   swap: make each s...
92
93
  	return ret;
  }
579f82901   Shaohua Li   swap: add a simpl...
94
  static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
96
  void show_swap_cache_info(void)
  {
33806f06d   Shaohua Li   swap: make each s...
97
98
  	printk("%lu pages in swap cache
  ", total_swapcache_pages());
2c97b7fc0   Johannes Weiner   mm: print swapcac...
99
100
  	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
  		swap_cache_info.add_total, swap_cache_info.del_total,
bb63be0a0   Hugh Dickins   tmpfs: move swap_...
102
  		swap_cache_info.find_success, swap_cache_info.find_total);
ec8acf20a   Shaohua Li   swap: add per-par...
103
104
105
  	printk("Free swap  = %ldkB
  ",
  		get_nr_swap_pages() << (PAGE_SHIFT - 10));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
107
108
109
110
  	printk("Total swap = %lukB
  ", total_swap_pages << (PAGE_SHIFT - 10));
  }
  
  /*
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
111
   * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
113
   * but sets SwapCache flag and private instead of mapping and index.
   */
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
114
  int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
  {
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
116
  	struct address_space *address_space = swap_address_space(entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
117
  	pgoff_t idx = swp_offset(entry);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
118
  	XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
d8c6546b1   Matthew Wilcox (Oracle)   mm: introduce com...
119
  	unsigned long i, nr = compound_nr(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120

309381fea   Sasha Levin   mm: dump page whe...
121
122
123
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
51726b122   Hugh Dickins   mm: replace some ...
124

38d8b4e6b   Huang Ying   mm, THP, swap: de...
125
  	page_ref_add(page, nr);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
126
  	SetPageSwapCache(page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
127

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
128
129
130
131
132
133
134
135
  	do {
  		xas_lock_irq(&xas);
  		xas_create_range(&xas);
  		if (xas_error(&xas))
  			goto unlock;
  		for (i = 0; i < nr; i++) {
  			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
  			set_page_private(page + i, entry.val + i);
4101196b1   Matthew Wilcox (Oracle)   mm: page cache: s...
136
  			xas_store(&xas, page);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
137
138
  			xas_next(&xas);
  		}
38d8b4e6b   Huang Ying   mm, THP, swap: de...
139
140
141
  		address_space->nrpages += nr;
  		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
  		ADD_CACHE_INFO(add_total, nr);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
142
143
144
  unlock:
  		xas_unlock_irq(&xas);
  	} while (xas_nomem(&xas, gfp));
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
145

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
146
147
  	if (!xas_error(&xas))
  		return 0;
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
148

8d93b41c0   Matthew Wilcox   mm: Convert add_t...
149
150
151
  	ClearPageSwapCache(page);
  	page_ref_sub(page, nr);
  	return xas_error(&xas);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
152
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
153
154
155
156
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache.
   */
4e17ec250   Matthew Wilcox   mm: Convert delet...
157
  void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
  {
4e17ec250   Matthew Wilcox   mm: Convert delet...
159
  	struct address_space *address_space = swap_address_space(entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
160
  	int i, nr = hpage_nr_pages(page);
4e17ec250   Matthew Wilcox   mm: Convert delet...
161
162
  	pgoff_t idx = swp_offset(entry);
  	XA_STATE(xas, &address_space->i_pages, idx);
33806f06d   Shaohua Li   swap: make each s...
163

309381fea   Sasha Levin   mm: dump page whe...
164
165
166
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(PageWriteback(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
167

38d8b4e6b   Huang Ying   mm, THP, swap: de...
168
  	for (i = 0; i < nr; i++) {
4e17ec250   Matthew Wilcox   mm: Convert delet...
169
  		void *entry = xas_store(&xas, NULL);
4101196b1   Matthew Wilcox (Oracle)   mm: page cache: s...
170
  		VM_BUG_ON_PAGE(entry != page, entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
171
  		set_page_private(page + i, 0);
4e17ec250   Matthew Wilcox   mm: Convert delet...
172
  		xas_next(&xas);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
173
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
  	ClearPageSwapCache(page);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
175
176
177
  	address_space->nrpages -= nr;
  	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
  	ADD_CACHE_INFO(del_total, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
179
180
181
182
183
184
185
186
  }
  
  /**
   * add_to_swap - allocate swap space for a page
   * @page: page we want to move to swap
   *
   * Allocate swap space for the page and add the page to the
   * swap cache.  Caller needs to hold the page lock. 
   */
0f0746589   Minchan Kim   mm, THP, swap: mo...
187
  int add_to_swap(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
189
  {
  	swp_entry_t entry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
  	int err;
309381fea   Sasha Levin   mm: dump page whe...
191
192
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageUptodate(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193

38d8b4e6b   Huang Ying   mm, THP, swap: de...
194
  	entry = get_swap_page(page);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
195
  	if (!entry.val)
0f0746589   Minchan Kim   mm, THP, swap: mo...
196
  		return 0;
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
197
  	/*
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
198
  	 * XArray node allocations from PF_MEMALLOC contexts could
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
199
200
201
202
203
204
205
  	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
  	 * stops emergency reserves from being allocated.
  	 *
  	 * TODO: this could cause a theoretical memory reclaim
  	 * deadlock in the swap out path.
  	 */
  	/*
854e9ed09   Minchan Kim   mm: support madvi...
206
  	 * Add it to the swap cache.
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
207
208
209
  	 */
  	err = add_to_swap_cache(page, entry,
  			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
210
  	if (err)
bd53b714d   Nick Piggin   [PATCH] mm: use _...
211
  		/*
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
212
213
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
  		 */
0f0746589   Minchan Kim   mm, THP, swap: mo...
215
  		goto fail;
9625456cc   Shaohua Li   mm: fix data corr...
216
217
218
219
220
221
222
223
224
225
226
  	/*
  	 * Normally the page will be dirtied in unmap because its pte should be
  	 * dirty. A special case is MADV_FREE page. The page'e pte could have
  	 * dirty bit cleared but the page's SwapBacked bit is still set because
  	 * clearing the dirty bit and SwapBacked bit has no lock protected. For
  	 * such page, unmap will not set dirty bit for it, so page reclaim will
  	 * not write the page out. This can cause data corruption when the page
  	 * is swap in later. Always setting the dirty bit for the page solves
  	 * the problem.
  	 */
  	set_page_dirty(page);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
227
228
  
  	return 1;
38d8b4e6b   Huang Ying   mm, THP, swap: de...
229
  fail:
0f0746589   Minchan Kim   mm, THP, swap: mo...
230
  	put_swap_page(page, entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
231
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
233
234
235
236
237
238
239
240
241
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache and locked.
   * It will never put the page into the free list,
   * the caller has a reference on the page.
   */
  void delete_from_swap_cache(struct page *page)
  {
4e17ec250   Matthew Wilcox   mm: Convert delet...
242
243
  	swp_entry_t entry = { .val = page_private(page) };
  	struct address_space *address_space = swap_address_space(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244

b93b01631   Matthew Wilcox   page cache: use x...
245
  	xa_lock_irq(&address_space->i_pages);
4e17ec250   Matthew Wilcox   mm: Convert delet...
246
  	__delete_from_swap_cache(page, entry);
b93b01631   Matthew Wilcox   page cache: use x...
247
  	xa_unlock_irq(&address_space->i_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248

75f6d6d29   Minchan Kim   mm, THP, swap: un...
249
  	put_swap_page(page, entry);
38d8b4e6b   Huang Ying   mm, THP, swap: de...
250
  	page_ref_sub(page, hpage_nr_pages(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
253
254
255
  /* 
   * If we are the only user, then try to free up the swap cache. 
   * 
   * Its ok to check for PageSwapCache without the page lock
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
256
257
   * here because we are going to recheck again inside
   * try_to_free_swap() _with_ the lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
258
259
260
261
   * 					- Marcelo
   */
  static inline void free_swap_cache(struct page *page)
  {
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
262
263
  	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
  		try_to_free_swap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264
265
266
267
268
269
  		unlock_page(page);
  	}
  }
  
  /* 
   * Perform a free_page(), also freeing any swap cache associated with
b8072f099   Hugh Dickins   [PATCH] mm: updat...
270
   * this page if it is the last user of the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
272
273
274
   */
  void free_page_and_swap_cache(struct page *page)
  {
  	free_swap_cache(page);
6fcb52a56   Aaron Lu   thp: reduce usage...
275
  	if (!is_huge_zero_page(page))
770a53702   Gerald Schaefer   mm: thp: broken p...
276
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
278
279
280
281
282
283
284
  }
  
  /*
   * Passed an array of pages, drop them all from swapcache and then release
   * them.  They are removed from the LRU and freed if this is their last use.
   */
  void free_pages_and_swap_cache(struct page **pages, int nr)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
285
  	struct page **pagep = pages;
aabfb5729   Michal Hocko   mm: memcontrol: d...
286
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
288
  
  	lru_add_drain();
aabfb5729   Michal Hocko   mm: memcontrol: d...
289
290
  	for (i = 0; i < nr; i++)
  		free_swap_cache(pagep[i]);
c6f92f9fb   Mel Gorman   mm: remove cold p...
291
  	release_pages(pagep, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
  }
e9e9b7ece   Minchan Kim   mm: swap: unify c...
293
294
295
296
  static inline bool swap_use_vma_readahead(void)
  {
  	return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
298
299
300
301
302
  /*
   * Lookup a swap entry in the swap cache. A found page will be returned
   * unlocked and with its refcount incremented - we rely on the kernel
   * lock getting page table operations atomic even if we drop the page
   * lock before returning.
   */
ec560175c   Huang Ying   mm, swap: VMA bas...
303
304
  struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
  			       unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
306
  {
  	struct page *page;
eb085574a   Huang Ying   mm, swap: fix rac...
307
  	struct swap_info_struct *si;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
308

eb085574a   Huang Ying   mm, swap: fix rac...
309
310
311
  	si = get_swap_device(entry);
  	if (!si)
  		return NULL;
f6ab1f7f6   Huang Ying   mm, swap: use off...
312
  	page = find_get_page(swap_address_space(entry), swp_offset(entry));
eb085574a   Huang Ying   mm, swap: fix rac...
313
  	put_swap_device(si);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
314

ec560175c   Huang Ying   mm, swap: VMA bas...
315
316
  	INC_CACHE_INFO(find_total);
  	if (page) {
eaf649ebc   Minchan Kim   mm: swap: clean u...
317
318
  		bool vma_ra = swap_use_vma_readahead();
  		bool readahead;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319
  		INC_CACHE_INFO(find_success);
eaf649ebc   Minchan Kim   mm: swap: clean u...
320
321
322
323
  		/*
  		 * At the moment, we don't support PG_readahead for anon THP
  		 * so let's bail out rather than confusing the readahead stat.
  		 */
ec560175c   Huang Ying   mm, swap: VMA bas...
324
325
  		if (unlikely(PageTransCompound(page)))
  			return page;
eaf649ebc   Minchan Kim   mm: swap: clean u...
326

ec560175c   Huang Ying   mm, swap: VMA bas...
327
  		readahead = TestClearPageReadahead(page);
eaf649ebc   Minchan Kim   mm: swap: clean u...
328
329
330
331
332
333
334
  		if (vma && vma_ra) {
  			unsigned long ra_val;
  			int win, hits;
  
  			ra_val = GET_SWAP_RA_VAL(vma);
  			win = SWAP_RA_WIN(ra_val);
  			hits = SWAP_RA_HITS(ra_val);
ec560175c   Huang Ying   mm, swap: VMA bas...
335
336
337
338
339
  			if (readahead)
  				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
  			atomic_long_set(&vma->swap_readahead_info,
  					SWAP_RA_VAL(addr, win, hits));
  		}
eaf649ebc   Minchan Kim   mm: swap: clean u...
340

ec560175c   Huang Ying   mm, swap: VMA bas...
341
  		if (readahead) {
cbc65df24   Huang Ying   mm, swap: add swa...
342
  			count_vm_event(SWAP_RA_HIT);
eaf649ebc   Minchan Kim   mm: swap: clean u...
343
  			if (!vma || !vma_ra)
ec560175c   Huang Ying   mm, swap: VMA bas...
344
  				atomic_inc(&swapin_readahead_hits);
cbc65df24   Huang Ying   mm, swap: add swa...
345
  		}
579f82901   Shaohua Li   swap: add a simpl...
346
  	}
eaf649ebc   Minchan Kim   mm: swap: clean u...
347

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
349
  	return page;
  }
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
350
351
352
  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  			struct vm_area_struct *vma, unsigned long addr,
  			bool *new_page_allocated)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
  {
eb085574a   Huang Ying   mm, swap: fix rac...
354
355
  	struct page *found_page = NULL, *new_page = NULL;
  	struct swap_info_struct *si;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
  	int err;
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
357
  	*new_page_allocated = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
359
360
361
362
363
364
  
  	do {
  		/*
  		 * First check the swap cache.  Since this is normally
  		 * called after lookup_swap_cache() failed, re-calling
  		 * that would confuse statistics.
  		 */
eb085574a   Huang Ying   mm, swap: fix rac...
365
366
367
368
369
370
  		si = get_swap_device(entry);
  		if (!si)
  			break;
  		found_page = find_get_page(swap_address_space(entry),
  					   swp_offset(entry));
  		put_swap_device(si);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
  		if (found_page)
  			break;
ba81f8384   Huang Ying   mm/swap: skip rea...
373
374
375
376
377
378
379
380
381
382
  		/*
  		 * Just skip read ahead for unused swap slot.
  		 * During swap_off when swap_slot_cache is disabled,
  		 * we have to handle the race between putting
  		 * swap entry in swap cache and marking swap slot
  		 * as SWAP_HAS_CACHE.  That's done in later part of code or
  		 * else swap_off will be aborted if we return NULL.
  		 */
  		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
  			break;
e8c26ab60   Tim Chen   mm/swap: skip rea...
383

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
384
385
386
387
  		/*
  		 * Get a new page to read into from swap.
  		 */
  		if (!new_page) {
02098feaa   Hugh Dickins   swapin needs gfp_...
388
  			new_page = alloc_page_vma(gfp_mask, vma, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389
390
391
392
393
  			if (!new_page)
  				break;		/* Out of memory */
  		}
  
  		/*
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
394
395
  		 * Swap entry may have been freed since our caller observed it.
  		 */
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
396
  		err = swapcache_prepare(entry);
cbab0e4ee   Rafael Aquini   swap: avoid read_...
397
  		if (err == -EEXIST) {
cbab0e4ee   Rafael Aquini   swap: avoid read_...
398
399
400
  			/*
  			 * We might race against get_swap_page() and stumble
  			 * across a SWAP_HAS_CACHE swap_map entry whose page
9c1cc2e4f   Huang Ying   mm, swap: fix com...
401
  			 * has not been brought into the swapcache yet.
cbab0e4ee   Rafael Aquini   swap: avoid read_...
402
403
  			 */
  			cond_resched();
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
404
  			continue;
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
405
  		} else if (err)		/* swp entry is obsolete ? */
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
406
  			break;
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
407
  		/* May fail (-ENOMEM) if XArray node allocation failed. */
48c935ad8   Kirill A. Shutemov   page-flags: defin...
408
  		__SetPageLocked(new_page);
fa9949da5   Hugh Dickins   mm: use __SetPage...
409
  		__SetPageSwapBacked(new_page);
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
410
  		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
529ae9aaa   Nick Piggin   mm: rename page t...
411
  		if (likely(!err)) {
8d93b41c0   Matthew Wilcox   mm: Convert add_t...
412
  			/* Initiate read into locked page */
1899ad18c   Johannes Weiner   mm: workingset: t...
413
  			SetPageWorkingset(new_page);
c5fdae469   Rik van Riel   vmscan: add newly...
414
  			lru_cache_add_anon(new_page);
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
415
  			*new_page_allocated = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416
417
  			return new_page;
  		}
48c935ad8   Kirill A. Shutemov   page-flags: defin...
418
  		__ClearPageLocked(new_page);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
419
420
421
422
  		/*
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
  		 */
75f6d6d29   Minchan Kim   mm, THP, swap: un...
423
  		put_swap_page(new_page, entry);
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
424
  	} while (err != -ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
426
  
  	if (new_page)
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
427
  		put_page(new_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
429
  	return found_page;
  }
46017e954   Hugh Dickins   swapin_readahead:...
430

5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
431
432
433
434
435
436
437
  /*
   * Locate a page of swap in physical memory, reserving swap cache space
   * and reading the disk if it is not already cached.
   * A failure return means that either the page allocation failed or that
   * the swap entry is no longer in use.
   */
  struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
23955622f   Shaohua Li   swap: add block i...
438
  		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
439
440
441
442
443
444
  {
  	bool page_was_allocated;
  	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
  			vma, addr, &page_was_allocated);
  
  	if (page_was_allocated)
23955622f   Shaohua Li   swap: add block i...
445
  		swap_readpage(retpage, do_poll);
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
446
447
448
  
  	return retpage;
  }
ec560175c   Huang Ying   mm, swap: VMA bas...
449
450
451
452
453
  static unsigned int __swapin_nr_pages(unsigned long prev_offset,
  				      unsigned long offset,
  				      int hits,
  				      int max_pages,
  				      int prev_win)
579f82901   Shaohua Li   swap: add a simpl...
454
  {
ec560175c   Huang Ying   mm, swap: VMA bas...
455
  	unsigned int pages, last_ra;
579f82901   Shaohua Li   swap: add a simpl...
456
457
458
459
460
461
  
  	/*
  	 * This heuristic has been found to work well on both sequential and
  	 * random loads, swapping to hard disk or to SSD: please don't ask
  	 * what the "+ 2" means, it just happens to work well, that's all.
  	 */
ec560175c   Huang Ying   mm, swap: VMA bas...
462
  	pages = hits + 2;
579f82901   Shaohua Li   swap: add a simpl...
463
464
465
466
467
468
469
470
  	if (pages == 2) {
  		/*
  		 * We can have no readahead hits to judge by: but must not get
  		 * stuck here forever, so check for an adjacent offset instead
  		 * (and don't even bother to check whether swap type is same).
  		 */
  		if (offset != prev_offset + 1 && offset != prev_offset - 1)
  			pages = 1;
579f82901   Shaohua Li   swap: add a simpl...
471
472
473
474
475
476
477
478
479
480
481
  	} else {
  		unsigned int roundup = 4;
  		while (roundup < pages)
  			roundup <<= 1;
  		pages = roundup;
  	}
  
  	if (pages > max_pages)
  		pages = max_pages;
  
  	/* Don't shrink readahead too fast */
ec560175c   Huang Ying   mm, swap: VMA bas...
482
  	last_ra = prev_win / 2;
579f82901   Shaohua Li   swap: add a simpl...
483
484
  	if (pages < last_ra)
  		pages = last_ra;
ec560175c   Huang Ying   mm, swap: VMA bas...
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
  
  	return pages;
  }
  
  static unsigned long swapin_nr_pages(unsigned long offset)
  {
  	static unsigned long prev_offset;
  	unsigned int hits, pages, max_pages;
  	static atomic_t last_readahead_pages;
  
  	max_pages = 1 << READ_ONCE(page_cluster);
  	if (max_pages <= 1)
  		return 1;
  
  	hits = atomic_xchg(&swapin_readahead_hits, 0);
  	pages = __swapin_nr_pages(prev_offset, offset, hits, max_pages,
  				  atomic_read(&last_readahead_pages));
  	if (!hits)
  		prev_offset = offset;
579f82901   Shaohua Li   swap: add a simpl...
504
505
506
507
  	atomic_set(&last_readahead_pages, pages);
  
  	return pages;
  }
46017e954   Hugh Dickins   swapin_readahead:...
508
  /**
e9e9b7ece   Minchan Kim   mm: swap: unify c...
509
   * swap_cluster_readahead - swap in pages in hope we need them soon
46017e954   Hugh Dickins   swapin_readahead:...
510
   * @entry: swap entry of this memory
7682486b3   Randy Dunlap   mm: fix various k...
511
   * @gfp_mask: memory allocation flags
e9e9b7ece   Minchan Kim   mm: swap: unify c...
512
   * @vmf: fault information
46017e954   Hugh Dickins   swapin_readahead:...
513
514
515
516
517
518
519
520
521
522
523
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read an aligned block of
   * (1 << page_cluster) entries in the swap area. This method is chosen
   * because it doesn't cost us any seek time.  We also make sure to queue
   * the 'original' request together with the readahead ones...
   *
   * This has been extended to use the NUMA policies from the mm triggering
   * the readahead.
   *
e9f598730   Yang Shi   mm: swap: add com...
524
   * Caller must hold read mmap_sem if vmf->vma is not NULL.
46017e954   Hugh Dickins   swapin_readahead:...
525
   */
e9e9b7ece   Minchan Kim   mm: swap: unify c...
526
527
  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
  				struct vm_fault *vmf)
46017e954   Hugh Dickins   swapin_readahead:...
528
  {
46017e954   Hugh Dickins   swapin_readahead:...
529
  	struct page *page;
579f82901   Shaohua Li   swap: add a simpl...
530
531
  	unsigned long entry_offset = swp_offset(entry);
  	unsigned long offset = entry_offset;
67f96aa25   Rik van Riel   mm: make swapin r...
532
  	unsigned long start_offset, end_offset;
579f82901   Shaohua Li   swap: add a simpl...
533
  	unsigned long mask;
e9a6effa5   Huang Ying   mm, swap: fix fal...
534
  	struct swap_info_struct *si = swp_swap_info(entry);
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
535
  	struct blk_plug plug;
c4fa63092   Huang Ying   mm, swap: fix swa...
536
  	bool do_poll = true, page_allocated;
e9e9b7ece   Minchan Kim   mm: swap: unify c...
537
538
  	struct vm_area_struct *vma = vmf->vma;
  	unsigned long addr = vmf->address;
46017e954   Hugh Dickins   swapin_readahead:...
539

579f82901   Shaohua Li   swap: add a simpl...
540
541
542
  	mask = swapin_nr_pages(offset) - 1;
  	if (!mask)
  		goto skip;
8fd2e0b50   Yang Shi   mm: swap: check i...
543
544
545
546
547
548
  	/* Test swap type to make sure the dereference is safe */
  	if (likely(si->flags & (SWP_BLKDEV | SWP_FS))) {
  		struct inode *inode = si->swap_file->f_mapping->host;
  		if (inode_read_congested(inode))
  			goto skip;
  	}
23955622f   Shaohua Li   swap: add block i...
549
  	do_poll = false;
67f96aa25   Rik van Riel   mm: make swapin r...
550
551
552
553
554
  	/* Read a page_cluster sized and aligned cluster around offset. */
  	start_offset = offset & ~mask;
  	end_offset = offset | mask;
  	if (!start_offset)	/* First page is swap header. */
  		start_offset++;
e9a6effa5   Huang Ying   mm, swap: fix fal...
555
556
  	if (end_offset >= si->max)
  		end_offset = si->max - 1;
67f96aa25   Rik van Riel   mm: make swapin r...
557

3fb5c298b   Christian Ehrhardt   swap: allow swap ...
558
  	blk_start_plug(&plug);
67f96aa25   Rik van Riel   mm: make swapin r...
559
  	for (offset = start_offset; offset <= end_offset ; offset++) {
46017e954   Hugh Dickins   swapin_readahead:...
560
  		/* Ok, do the async read-ahead now */
c4fa63092   Huang Ying   mm, swap: fix swa...
561
562
563
  		page = __read_swap_cache_async(
  			swp_entry(swp_type(entry), offset),
  			gfp_mask, vma, addr, &page_allocated);
46017e954   Hugh Dickins   swapin_readahead:...
564
  		if (!page)
67f96aa25   Rik van Riel   mm: make swapin r...
565
  			continue;
c4fa63092   Huang Ying   mm, swap: fix swa...
566
567
  		if (page_allocated) {
  			swap_readpage(page, false);
eaf649ebc   Minchan Kim   mm: swap: clean u...
568
  			if (offset != entry_offset) {
c4fa63092   Huang Ying   mm, swap: fix swa...
569
570
571
  				SetPageReadahead(page);
  				count_vm_event(SWAP_RA);
  			}
cbc65df24   Huang Ying   mm, swap: add swa...
572
  		}
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
573
  		put_page(page);
46017e954   Hugh Dickins   swapin_readahead:...
574
  	}
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
575
  	blk_finish_plug(&plug);
46017e954   Hugh Dickins   swapin_readahead:...
576
  	lru_add_drain();	/* Push any new pages onto the LRU now */
579f82901   Shaohua Li   swap: add a simpl...
577
  skip:
23955622f   Shaohua Li   swap: add block i...
578
  	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
46017e954   Hugh Dickins   swapin_readahead:...
579
  }
4b3ef9daa   Huang, Ying   mm/swap: split sw...
580
581
582
583
584
585
586
  
  int init_swap_address_space(unsigned int type, unsigned long nr_pages)
  {
  	struct address_space *spaces, *space;
  	unsigned int i, nr;
  
  	nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
778e1cdd8   Kees Cook   treewide: kvzallo...
587
  	spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
588
589
590
591
  	if (!spaces)
  		return -ENOMEM;
  	for (i = 0; i < nr; i++) {
  		space = spaces + i;
a28334862   Matthew Wilcox   page cache: Finis...
592
  		xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
593
594
595
596
  		atomic_set(&space->i_mmap_writable, 0);
  		space->a_ops = &swap_aops;
  		/* swap cache doesn't use writeback related tags */
  		mapping_set_no_writeback_tags(space);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
597
598
  	}
  	nr_swapper_spaces[type] = nr;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
599
  	swapper_spaces[type] = spaces;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
600
601
602
603
604
605
  
  	return 0;
  }
  
  void exit_swap_address_space(unsigned int type)
  {
054f1d1fa   Huang Ying   mm/swap_state.c: ...
606
  	kvfree(swapper_spaces[type]);
4b3ef9daa   Huang, Ying   mm/swap: split sw...
607
  	nr_swapper_spaces[type] = 0;
054f1d1fa   Huang Ying   mm/swap_state.c: ...
608
  	swapper_spaces[type] = NULL;
4b3ef9daa   Huang, Ying   mm/swap: split sw...
609
  }
ec560175c   Huang Ying   mm, swap: VMA bas...
610
611
612
613
614
615
616
617
618
619
620
621
622
  
  static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
  				     unsigned long faddr,
  				     unsigned long lpfn,
  				     unsigned long rpfn,
  				     unsigned long *start,
  				     unsigned long *end)
  {
  	*start = max3(lpfn, PFN_DOWN(vma->vm_start),
  		      PFN_DOWN(faddr & PMD_MASK));
  	*end = min3(rpfn, PFN_DOWN(vma->vm_end),
  		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
  }
eaf649ebc   Minchan Kim   mm: swap: clean u...
623
624
  static void swap_ra_info(struct vm_fault *vmf,
  			struct vma_swap_readahead *ra_info)
ec560175c   Huang Ying   mm, swap: VMA bas...
625
626
  {
  	struct vm_area_struct *vma = vmf->vma;
eaf649ebc   Minchan Kim   mm: swap: clean u...
627
  	unsigned long ra_val;
ec560175c   Huang Ying   mm, swap: VMA bas...
628
629
630
  	swp_entry_t entry;
  	unsigned long faddr, pfn, fpfn;
  	unsigned long start, end;
eaf649ebc   Minchan Kim   mm: swap: clean u...
631
  	pte_t *pte, *orig_pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
632
633
634
635
  	unsigned int max_win, hits, prev_win, win, left;
  #ifndef CONFIG_64BIT
  	pte_t *tpte;
  #endif
61b639723   Huang Ying   mm, swap: use pag...
636
637
638
  	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
  			     SWAP_RA_ORDER_CEILING);
  	if (max_win == 1) {
eaf649ebc   Minchan Kim   mm: swap: clean u...
639
640
  		ra_info->win = 1;
  		return;
61b639723   Huang Ying   mm, swap: use pag...
641
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
642
  	faddr = vmf->address;
eaf649ebc   Minchan Kim   mm: swap: clean u...
643
644
645
646
647
648
  	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
  	entry = pte_to_swp_entry(*pte);
  	if ((unlikely(non_swap_entry(entry)))) {
  		pte_unmap(orig_pte);
  		return;
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
649

ec560175c   Huang Ying   mm, swap: VMA bas...
650
  	fpfn = PFN_DOWN(faddr);
eaf649ebc   Minchan Kim   mm: swap: clean u...
651
652
653
654
655
  	ra_val = GET_SWAP_RA_VAL(vma);
  	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
  	prev_win = SWAP_RA_WIN(ra_val);
  	hits = SWAP_RA_HITS(ra_val);
  	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
ec560175c   Huang Ying   mm, swap: VMA bas...
656
657
658
  					       max_win, prev_win);
  	atomic_long_set(&vma->swap_readahead_info,
  			SWAP_RA_VAL(faddr, win, 0));
eaf649ebc   Minchan Kim   mm: swap: clean u...
659
660
661
662
  	if (win == 1) {
  		pte_unmap(orig_pte);
  		return;
  	}
ec560175c   Huang Ying   mm, swap: VMA bas...
663
664
665
666
667
668
669
670
671
672
673
674
  
  	/* Copy the PTEs because the page table may be unmapped */
  	if (fpfn == pfn + 1)
  		swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end);
  	else if (pfn == fpfn + 1)
  		swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1,
  				  &start, &end);
  	else {
  		left = (win - 1) / 2;
  		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
  				  &start, &end);
  	}
eaf649ebc   Minchan Kim   mm: swap: clean u...
675
676
677
  	ra_info->nr_pte = end - start;
  	ra_info->offset = fpfn - start;
  	pte -= ra_info->offset;
ec560175c   Huang Ying   mm, swap: VMA bas...
678
  #ifdef CONFIG_64BIT
eaf649ebc   Minchan Kim   mm: swap: clean u...
679
  	ra_info->ptes = pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
680
  #else
eaf649ebc   Minchan Kim   mm: swap: clean u...
681
  	tpte = ra_info->ptes;
ec560175c   Huang Ying   mm, swap: VMA bas...
682
683
684
  	for (pfn = start; pfn != end; pfn++)
  		*tpte++ = *pte++;
  #endif
eaf649ebc   Minchan Kim   mm: swap: clean u...
685
  	pte_unmap(orig_pte);
ec560175c   Huang Ying   mm, swap: VMA bas...
686
  }
e9f598730   Yang Shi   mm: swap: add com...
687
688
689
690
691
692
693
694
695
696
697
698
699
700
  /**
   * swap_vma_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
   * @gfp_mask: memory allocation flags
   * @vmf: fault information
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read in a few pages whoes
   * virtual addresses are around the fault address in the same vma.
   *
   * Caller must hold read mmap_sem if vmf->vma is not NULL.
   *
   */
f5c754d63   Colin Ian King   mm/swap_state.c: ...
701
702
  static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
  				       struct vm_fault *vmf)
ec560175c   Huang Ying   mm, swap: VMA bas...
703
704
705
706
707
708
709
710
  {
  	struct blk_plug plug;
  	struct vm_area_struct *vma = vmf->vma;
  	struct page *page;
  	pte_t *pte, pentry;
  	swp_entry_t entry;
  	unsigned int i;
  	bool page_allocated;
eaf649ebc   Minchan Kim   mm: swap: clean u...
711
  	struct vma_swap_readahead ra_info = {0,};
ec560175c   Huang Ying   mm, swap: VMA bas...
712

eaf649ebc   Minchan Kim   mm: swap: clean u...
713
714
  	swap_ra_info(vmf, &ra_info);
  	if (ra_info.win == 1)
ec560175c   Huang Ying   mm, swap: VMA bas...
715
716
717
  		goto skip;
  
  	blk_start_plug(&plug);
eaf649ebc   Minchan Kim   mm: swap: clean u...
718
  	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
ec560175c   Huang Ying   mm, swap: VMA bas...
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
  	     i++, pte++) {
  		pentry = *pte;
  		if (pte_none(pentry))
  			continue;
  		if (pte_present(pentry))
  			continue;
  		entry = pte_to_swp_entry(pentry);
  		if (unlikely(non_swap_entry(entry)))
  			continue;
  		page = __read_swap_cache_async(entry, gfp_mask, vma,
  					       vmf->address, &page_allocated);
  		if (!page)
  			continue;
  		if (page_allocated) {
  			swap_readpage(page, false);
eaf649ebc   Minchan Kim   mm: swap: clean u...
734
  			if (i != ra_info.offset) {
ec560175c   Huang Ying   mm, swap: VMA bas...
735
736
737
738
739
740
741
742
743
744
  				SetPageReadahead(page);
  				count_vm_event(SWAP_RA);
  			}
  		}
  		put_page(page);
  	}
  	blk_finish_plug(&plug);
  	lru_add_drain();
  skip:
  	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
eaf649ebc   Minchan Kim   mm: swap: clean u...
745
  				     ra_info.win == 1);
ec560175c   Huang Ying   mm, swap: VMA bas...
746
  }
d9bfcfdc4   Huang Ying   mm, swap: add sys...
747

e9e9b7ece   Minchan Kim   mm: swap: unify c...
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
  /**
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
   * @gfp_mask: memory allocation flags
   * @vmf: fault information
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * It's a main entry function for swap readahead. By the configuration,
   * it will read ahead blocks by cluster-based(ie, physical disk based)
   * or vma-based(ie, virtual address based on faulty address) readahead.
   */
  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
  				struct vm_fault *vmf)
  {
  	return swap_use_vma_readahead() ?
  			swap_vma_readahead(entry, gfp_mask, vmf) :
  			swap_cluster_readahead(entry, gfp_mask, vmf);
  }
d9bfcfdc4   Huang Ying   mm, swap: add sys...
767
768
769
770
  #ifdef CONFIG_SYSFS
  static ssize_t vma_ra_enabled_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
e9e9b7ece   Minchan Kim   mm: swap: unify c...
771
772
  	return sprintf(buf, "%s
  ", enable_vma_readahead ? "true" : "false");
d9bfcfdc4   Huang Ying   mm, swap: add sys...
773
774
775
776
777
778
  }
  static ssize_t vma_ra_enabled_store(struct kobject *kobj,
  				      struct kobj_attribute *attr,
  				      const char *buf, size_t count)
  {
  	if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
e9e9b7ece   Minchan Kim   mm: swap: unify c...
779
  		enable_vma_readahead = true;
d9bfcfdc4   Huang Ying   mm, swap: add sys...
780
  	else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
e9e9b7ece   Minchan Kim   mm: swap: unify c...
781
  		enable_vma_readahead = false;
d9bfcfdc4   Huang Ying   mm, swap: add sys...
782
783
784
785
786
787
788
789
  	else
  		return -EINVAL;
  
  	return count;
  }
  static struct kobj_attribute vma_ra_enabled_attr =
  	__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
  	       vma_ra_enabled_store);
d9bfcfdc4   Huang Ying   mm, swap: add sys...
790
791
  static struct attribute *swap_attrs[] = {
  	&vma_ra_enabled_attr.attr,
d9bfcfdc4   Huang Ying   mm, swap: add sys...
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
  	NULL,
  };
  
  static struct attribute_group swap_attr_group = {
  	.attrs = swap_attrs,
  };
  
  static int __init swap_init_sysfs(void)
  {
  	int err;
  	struct kobject *swap_kobj;
  
  	swap_kobj = kobject_create_and_add("swap", mm_kobj);
  	if (!swap_kobj) {
  		pr_err("failed to create swap kobject
  ");
  		return -ENOMEM;
  	}
  	err = sysfs_create_group(swap_kobj, &swap_attr_group);
  	if (err) {
  		pr_err("failed to register swap group
  ");
  		goto delete_obj;
  	}
  	return 0;
  
  delete_obj:
  	kobject_put(swap_kobj);
  	return err;
  }
  subsys_initcall(swap_init_sysfs);
  #endif