Blame view

mm/swap_state.c 13.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
  /*
   *  linux/mm/swap_state.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   *  Swap reorganised 29.12.95, Stephen Tweedie
   *
   *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
10
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
46017e954   Hugh Dickins   swapin_readahead:...
13
  #include <linux/swapops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
  #include <linux/init.h>
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/backing-dev.h>
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
17
  #include <linux/blkdev.h>
c484d4104   Hugh Dickins   [PATCH] mm: free_...
18
  #include <linux/pagevec.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/migrate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
23
24
  
  #include <asm/pgtable.h>
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
7eaceacca   Jens Axboe   block: remove per...
25
   * vmscan's shrink_page_list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
27
  static const struct address_space_operations swap_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
  	.writepage	= swap_writepage,
62c230bc1   Mel Gorman   mm: add support f...
29
  	.set_page_dirty	= swap_set_page_dirty,
1c93923cc   Andrew Morton   include/linux/mig...
30
  #ifdef CONFIG_MIGRATION
e965f9630   Christoph Lameter   [PATCH] Direct Mi...
31
  	.migratepage	= migrate_page,
1c93923cc   Andrew Morton   include/linux/mig...
32
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
  };
33806f06d   Shaohua Li   swap: make each s...
34
35
36
  struct address_space swapper_spaces[MAX_SWAPFILES] = {
  	[0 ... MAX_SWAPFILES - 1] = {
  		.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
4bb5f5d93   David Herrmann   mm: allow drivers...
37
  		.i_mmap_writable = ATOMIC_INIT(0),
33806f06d   Shaohua Li   swap: make each s...
38
  		.a_ops		= &swap_aops,
371a096ed   Huang Ying   mm: don't use rad...
39
40
  		/* swap cache doesn't use writeback related tags */
  		.flags		= 1 << AS_NO_WRITEBACK_TAGS,
33806f06d   Shaohua Li   swap: make each s...
41
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
45
46
47
48
49
50
  
  #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
  
  static struct {
  	unsigned long add_total;
  	unsigned long del_total;
  	unsigned long find_success;
  	unsigned long find_total;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51
  } swap_cache_info;
33806f06d   Shaohua Li   swap: make each s...
52
53
54
55
56
57
58
59
60
  unsigned long total_swapcache_pages(void)
  {
  	int i;
  	unsigned long ret = 0;
  
  	for (i = 0; i < MAX_SWAPFILES; i++)
  		ret += swapper_spaces[i].nrpages;
  	return ret;
  }
579f82901   Shaohua Li   swap: add a simpl...
61
  static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
  void show_swap_cache_info(void)
  {
33806f06d   Shaohua Li   swap: make each s...
64
65
  	printk("%lu pages in swap cache
  ", total_swapcache_pages());
2c97b7fc0   Johannes Weiner   mm: print swapcac...
66
67
  	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
  		swap_cache_info.add_total, swap_cache_info.del_total,
bb63be0a0   Hugh Dickins   tmpfs: move swap_...
69
  		swap_cache_info.find_success, swap_cache_info.find_total);
ec8acf20a   Shaohua Li   swap: add per-par...
70
71
72
  	printk("Free swap  = %ldkB
  ",
  		get_nr_swap_pages() << (PAGE_SHIFT - 10));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
74
75
76
77
  	printk("Total swap = %lukB
  ", total_swap_pages << (PAGE_SHIFT - 10));
  }
  
  /*
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
78
   * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79
80
   * but sets SwapCache flag and private instead of mapping and index.
   */
2f772e6ca   Seth Jennings   mm: break up swap...
81
  int __add_to_swap_cache(struct page *page, swp_entry_t entry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
83
  {
  	int error;
33806f06d   Shaohua Li   swap: make each s...
84
  	struct address_space *address_space;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85

309381fea   Sasha Levin   mm: dump page whe...
86
87
88
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
51726b122   Hugh Dickins   mm: replace some ...
89

09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
90
  	get_page(page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
91
92
  	SetPageSwapCache(page);
  	set_page_private(page, entry.val);
33806f06d   Shaohua Li   swap: make each s...
93
94
95
  	address_space = swap_address_space(entry);
  	spin_lock_irq(&address_space->tree_lock);
  	error = radix_tree_insert(&address_space->page_tree,
f6ab1f7f6   Huang Ying   mm, swap: use off...
96
  				  swp_offset(entry), page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
97
  	if (likely(!error)) {
33806f06d   Shaohua Li   swap: make each s...
98
  		address_space->nrpages++;
11fb99898   Mel Gorman   mm: move most fil...
99
  		__inc_node_page_state(page, NR_FILE_PAGES);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
100
101
  		INC_CACHE_INFO(add_total);
  	}
33806f06d   Shaohua Li   swap: make each s...
102
  	spin_unlock_irq(&address_space->tree_lock);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
103
104
  
  	if (unlikely(error)) {
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
105
106
107
108
109
110
  		/*
  		 * Only the context which have set SWAP_HAS_CACHE flag
  		 * would call add_to_swap_cache().
  		 * So add_to_swap_cache() doesn't returns -EEXIST.
  		 */
  		VM_BUG_ON(error == -EEXIST);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
111
112
  		set_page_private(page, 0UL);
  		ClearPageSwapCache(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
113
  		put_page(page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
114
115
116
117
118
119
120
121
122
  	}
  
  	return error;
  }
  
  
  int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  {
  	int error;
5e4c0d974   Jan Kara   lib/radix-tree.c:...
123
  	error = radix_tree_maybe_preload(gfp_mask);
35c754d79   Balbir Singh   memory controller...
124
  	if (!error) {
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
125
  		error = __add_to_swap_cache(page, entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
  		radix_tree_preload_end();
fa1de9008   Hugh Dickins   memcgroup: revert...
127
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
132
133
134
135
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache.
   */
  void __delete_from_swap_cache(struct page *page)
  {
33806f06d   Shaohua Li   swap: make each s...
136
137
  	swp_entry_t entry;
  	struct address_space *address_space;
309381fea   Sasha Levin   mm: dump page whe...
138
139
140
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
  	VM_BUG_ON_PAGE(PageWriteback(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
141

33806f06d   Shaohua Li   swap: make each s...
142
143
  	entry.val = page_private(page);
  	address_space = swap_address_space(entry);
f6ab1f7f6   Huang Ying   mm, swap: use off...
144
  	radix_tree_delete(&address_space->page_tree, swp_offset(entry));
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
145
  	set_page_private(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
  	ClearPageSwapCache(page);
33806f06d   Shaohua Li   swap: make each s...
147
  	address_space->nrpages--;
11fb99898   Mel Gorman   mm: move most fil...
148
  	__dec_node_page_state(page, NR_FILE_PAGES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
150
151
152
153
154
155
156
157
158
  	INC_CACHE_INFO(del_total);
  }
  
  /**
   * add_to_swap - allocate swap space for a page
   * @page: page we want to move to swap
   *
   * Allocate swap space for the page and add the page to the
   * swap cache.  Caller needs to hold the page lock. 
   */
5bc7b8aca   Shaohua Li   mm: thp: add spli...
159
  int add_to_swap(struct page *page, struct list_head *list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
161
  {
  	swp_entry_t entry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
  	int err;
309381fea   Sasha Levin   mm: dump page whe...
163
164
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
  	VM_BUG_ON_PAGE(!PageUptodate(page), page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
165

2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
166
167
168
  	entry = get_swap_page();
  	if (!entry.val)
  		return 0;
37e843511   Vladimir Davydov   mm: memcontrol: c...
169
170
171
172
  	if (mem_cgroup_try_charge_swap(page, entry)) {
  		swapcache_free(entry);
  		return 0;
  	}
3f04f62f9   Andrea Arcangeli   thp: split_huge_p...
173
  	if (unlikely(PageTransHuge(page)))
5bc7b8aca   Shaohua Li   mm: thp: add spli...
174
  		if (unlikely(split_huge_page_to_list(page, list))) {
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
175
  			swapcache_free(entry);
3f04f62f9   Andrea Arcangeli   thp: split_huge_p...
176
177
  			return 0;
  		}
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
178
179
180
181
182
183
184
185
186
  	/*
  	 * Radix-tree node allocations from PF_MEMALLOC contexts could
  	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
  	 * stops emergency reserves from being allocated.
  	 *
  	 * TODO: this could cause a theoretical memory reclaim
  	 * deadlock in the swap out path.
  	 */
  	/*
854e9ed09   Minchan Kim   mm: support madvi...
187
  	 * Add it to the swap cache.
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
188
189
190
  	 */
  	err = add_to_swap_cache(page, entry,
  			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
854e9ed09   Minchan Kim   mm: support madvi...
191
  	if (!err) {
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
192
193
  		return 1;
  	} else {	/* -ENOMEM radix-tree allocation failure */
bd53b714d   Nick Piggin   [PATCH] mm: use _...
194
  		/*
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
195
196
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
  		 */
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
198
  		swapcache_free(entry);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
199
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
201
202
203
204
205
206
207
208
209
210
211
  	}
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache and locked.
   * It will never put the page into the free list,
   * the caller has a reference on the page.
   */
  void delete_from_swap_cache(struct page *page)
  {
  	swp_entry_t entry;
33806f06d   Shaohua Li   swap: make each s...
212
  	struct address_space *address_space;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213

4c21e2f24   Hugh Dickins   [PATCH] mm: split...
214
  	entry.val = page_private(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215

33806f06d   Shaohua Li   swap: make each s...
216
217
  	address_space = swap_address_space(entry);
  	spin_lock_irq(&address_space->tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
  	__delete_from_swap_cache(page);
33806f06d   Shaohua Li   swap: make each s...
219
  	spin_unlock_irq(&address_space->tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220

0a31bc97c   Johannes Weiner   mm: memcontrol: r...
221
  	swapcache_free(entry);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
222
  	put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
225
226
227
  /* 
   * If we are the only user, then try to free up the swap cache. 
   * 
   * Its ok to check for PageSwapCache without the page lock
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
228
229
   * here because we are going to recheck again inside
   * try_to_free_swap() _with_ the lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
231
232
233
   * 					- Marcelo
   */
  static inline void free_swap_cache(struct page *page)
  {
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
234
235
  	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
  		try_to_free_swap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
237
238
239
240
241
  		unlock_page(page);
  	}
  }
  
  /* 
   * Perform a free_page(), also freeing any swap cache associated with
b8072f099   Hugh Dickins   [PATCH] mm: updat...
242
   * this page if it is the last user of the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243
244
245
246
   */
  void free_page_and_swap_cache(struct page *page)
  {
  	free_swap_cache(page);
6fcb52a56   Aaron Lu   thp: reduce usage...
247
  	if (!is_huge_zero_page(page))
770a53702   Gerald Schaefer   mm: thp: broken p...
248
  		put_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
249
250
251
252
253
254
255
256
  }
  
  /*
   * Passed an array of pages, drop them all from swapcache and then release
   * them.  They are removed from the LRU and freed if this is their last use.
   */
  void free_pages_and_swap_cache(struct page **pages, int nr)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
  	struct page **pagep = pages;
aabfb5729   Michal Hocko   mm: memcontrol: d...
258
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259
260
  
  	lru_add_drain();
aabfb5729   Michal Hocko   mm: memcontrol: d...
261
262
263
  	for (i = 0; i < nr; i++)
  		free_swap_cache(pagep[i]);
  	release_pages(pagep, nr, false);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264
265
266
267
268
269
270
271
272
273
274
  }
  
  /*
   * Lookup a swap entry in the swap cache. A found page will be returned
   * unlocked and with its refcount incremented - we rely on the kernel
   * lock getting page table operations atomic even if we drop the page
   * lock before returning.
   */
  struct page * lookup_swap_cache(swp_entry_t entry)
  {
  	struct page *page;
f6ab1f7f6   Huang Ying   mm, swap: use off...
275
  	page = find_get_page(swap_address_space(entry), swp_offset(entry));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276

579f82901   Shaohua Li   swap: add a simpl...
277
  	if (page) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
  		INC_CACHE_INFO(find_success);
579f82901   Shaohua Li   swap: add a simpl...
279
280
281
  		if (TestClearPageReadahead(page))
  			atomic_inc(&swapin_readahead_hits);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
283
284
285
  
  	INC_CACHE_INFO(find_total);
  	return page;
  }
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
286
287
288
  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  			struct vm_area_struct *vma, unsigned long addr,
  			bool *new_page_allocated)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
290
  {
  	struct page *found_page, *new_page = NULL;
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
291
  	struct address_space *swapper_space = swap_address_space(entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
292
  	int err;
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
293
  	*new_page_allocated = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
295
296
297
298
299
300
  
  	do {
  		/*
  		 * First check the swap cache.  Since this is normally
  		 * called after lookup_swap_cache() failed, re-calling
  		 * that would confuse statistics.
  		 */
f6ab1f7f6   Huang Ying   mm, swap: use off...
301
  		found_page = find_get_page(swapper_space, swp_offset(entry));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
303
304
305
306
307
308
  		if (found_page)
  			break;
  
  		/*
  		 * Get a new page to read into from swap.
  		 */
  		if (!new_page) {
02098feaa   Hugh Dickins   swapin needs gfp_...
309
  			new_page = alloc_page_vma(gfp_mask, vma, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
311
312
313
314
  			if (!new_page)
  				break;		/* Out of memory */
  		}
  
  		/*
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
315
316
  		 * call radix_tree_preload() while we can wait.
  		 */
5e4c0d974   Jan Kara   lib/radix-tree.c:...
317
  		err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
318
319
320
321
  		if (err)
  			break;
  
  		/*
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
322
323
  		 * Swap entry may have been freed since our caller observed it.
  		 */
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
324
  		err = swapcache_prepare(entry);
cbab0e4ee   Rafael Aquini   swap: avoid read_...
325
  		if (err == -EEXIST) {
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
326
  			radix_tree_preload_end();
cbab0e4ee   Rafael Aquini   swap: avoid read_...
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
  			/*
  			 * We might race against get_swap_page() and stumble
  			 * across a SWAP_HAS_CACHE swap_map entry whose page
  			 * has not been brought into the swapcache yet, while
  			 * the other end is scheduled away waiting on discard
  			 * I/O completion at scan_swap_map().
  			 *
  			 * In order to avoid turning this transitory state
  			 * into a permanent loop around this -EEXIST case
  			 * if !CONFIG_PREEMPT and the I/O completion happens
  			 * to be waiting on the CPU waitqueue where we are now
  			 * busy looping, we just conditionally invoke the
  			 * scheduler here, if there are some more important
  			 * tasks to run.
  			 */
  			cond_resched();
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
343
  			continue;
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
344
345
346
  		}
  		if (err) {		/* swp entry is obsolete ? */
  			radix_tree_preload_end();
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
347
  			break;
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
348
  		}
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
349

2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
350
  		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
48c935ad8   Kirill A. Shutemov   page-flags: defin...
351
  		__SetPageLocked(new_page);
fa9949da5   Hugh Dickins   mm: use __SetPage...
352
  		__SetPageSwapBacked(new_page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
353
  		err = __add_to_swap_cache(new_page, entry);
529ae9aaa   Nick Piggin   mm: rename page t...
354
  		if (likely(!err)) {
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
355
  			radix_tree_preload_end();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
357
358
  			/*
  			 * Initiate read into locked page and return.
  			 */
c5fdae469   Rik van Riel   vmscan: add newly...
359
  			lru_cache_add_anon(new_page);
5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
360
  			*new_page_allocated = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361
362
  			return new_page;
  		}
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
363
  		radix_tree_preload_end();
48c935ad8   Kirill A. Shutemov   page-flags: defin...
364
  		__ClearPageLocked(new_page);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
365
366
367
368
  		/*
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
  		 */
0a31bc97c   Johannes Weiner   mm: memcontrol: r...
369
  		swapcache_free(entry);
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
370
  	} while (err != -ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
  
  	if (new_page)
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
373
  		put_page(new_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
375
  	return found_page;
  }
46017e954   Hugh Dickins   swapin_readahead:...
376

5b999aadb   Dmitry Safonov   mm: swap: zswap: ...
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
  /*
   * Locate a page of swap in physical memory, reserving swap cache space
   * and reading the disk if it is not already cached.
   * A failure return means that either the page allocation failed or that
   * the swap entry is no longer in use.
   */
  struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  			struct vm_area_struct *vma, unsigned long addr)
  {
  	bool page_was_allocated;
  	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
  			vma, addr, &page_was_allocated);
  
  	if (page_was_allocated)
  		swap_readpage(retpage);
  
  	return retpage;
  }
579f82901   Shaohua Li   swap: add a simpl...
395
396
397
398
399
  static unsigned long swapin_nr_pages(unsigned long offset)
  {
  	static unsigned long prev_offset;
  	unsigned int pages, max_pages, last_ra;
  	static atomic_t last_readahead_pages;
4db0c3c29   Jason Low   mm: remove rest o...
400
  	max_pages = 1 << READ_ONCE(page_cluster);
579f82901   Shaohua Li   swap: add a simpl...
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
  	if (max_pages <= 1)
  		return 1;
  
  	/*
  	 * This heuristic has been found to work well on both sequential and
  	 * random loads, swapping to hard disk or to SSD: please don't ask
  	 * what the "+ 2" means, it just happens to work well, that's all.
  	 */
  	pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
  	if (pages == 2) {
  		/*
  		 * We can have no readahead hits to judge by: but must not get
  		 * stuck here forever, so check for an adjacent offset instead
  		 * (and don't even bother to check whether swap type is same).
  		 */
  		if (offset != prev_offset + 1 && offset != prev_offset - 1)
  			pages = 1;
  		prev_offset = offset;
  	} else {
  		unsigned int roundup = 4;
  		while (roundup < pages)
  			roundup <<= 1;
  		pages = roundup;
  	}
  
  	if (pages > max_pages)
  		pages = max_pages;
  
  	/* Don't shrink readahead too fast */
  	last_ra = atomic_read(&last_readahead_pages) / 2;
  	if (pages < last_ra)
  		pages = last_ra;
  	atomic_set(&last_readahead_pages, pages);
  
  	return pages;
  }
46017e954   Hugh Dickins   swapin_readahead:...
437
438
439
  /**
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
7682486b3   Randy Dunlap   mm: fix various k...
440
   * @gfp_mask: memory allocation flags
46017e954   Hugh Dickins   swapin_readahead:...
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
   * @vma: user vma this address belongs to
   * @addr: target address for mempolicy
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read an aligned block of
   * (1 << page_cluster) entries in the swap area. This method is chosen
   * because it doesn't cost us any seek time.  We also make sure to queue
   * the 'original' request together with the readahead ones...
   *
   * This has been extended to use the NUMA policies from the mm triggering
   * the readahead.
   *
   * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
   */
02098feaa   Hugh Dickins   swapin needs gfp_...
456
  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
46017e954   Hugh Dickins   swapin_readahead:...
457
458
  			struct vm_area_struct *vma, unsigned long addr)
  {
46017e954   Hugh Dickins   swapin_readahead:...
459
  	struct page *page;
579f82901   Shaohua Li   swap: add a simpl...
460
461
  	unsigned long entry_offset = swp_offset(entry);
  	unsigned long offset = entry_offset;
67f96aa25   Rik van Riel   mm: make swapin r...
462
  	unsigned long start_offset, end_offset;
579f82901   Shaohua Li   swap: add a simpl...
463
  	unsigned long mask;
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
464
  	struct blk_plug plug;
46017e954   Hugh Dickins   swapin_readahead:...
465

579f82901   Shaohua Li   swap: add a simpl...
466
467
468
  	mask = swapin_nr_pages(offset) - 1;
  	if (!mask)
  		goto skip;
67f96aa25   Rik van Riel   mm: make swapin r...
469
470
471
472
473
  	/* Read a page_cluster sized and aligned cluster around offset. */
  	start_offset = offset & ~mask;
  	end_offset = offset | mask;
  	if (!start_offset)	/* First page is swap header. */
  		start_offset++;
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
474
  	blk_start_plug(&plug);
67f96aa25   Rik van Riel   mm: make swapin r...
475
  	for (offset = start_offset; offset <= end_offset ; offset++) {
46017e954   Hugh Dickins   swapin_readahead:...
476
477
  		/* Ok, do the async read-ahead now */
  		page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
02098feaa   Hugh Dickins   swapin needs gfp_...
478
  						gfp_mask, vma, addr);
46017e954   Hugh Dickins   swapin_readahead:...
479
  		if (!page)
67f96aa25   Rik van Riel   mm: make swapin r...
480
  			continue;
579f82901   Shaohua Li   swap: add a simpl...
481
482
  		if (offset != entry_offset)
  			SetPageReadahead(page);
09cbfeaf1   Kirill A. Shutemov   mm, fs: get rid o...
483
  		put_page(page);
46017e954   Hugh Dickins   swapin_readahead:...
484
  	}
3fb5c298b   Christian Ehrhardt   swap: allow swap ...
485
  	blk_finish_plug(&plug);
46017e954   Hugh Dickins   swapin_readahead:...
486
  	lru_add_drain();	/* Push any new pages onto the LRU now */
579f82901   Shaohua Li   swap: add a simpl...
487
  skip:
02098feaa   Hugh Dickins   swapin needs gfp_...
488
  	return read_swap_cache_async(entry, gfp_mask, vma, addr);
46017e954   Hugh Dickins   swapin_readahead:...
489
  }