Blame view

mm/swap_state.c 10.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
  /*
   *  linux/mm/swap_state.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   *  Swap reorganised 29.12.95, Stephen Tweedie
   *
   *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
10
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
46017e954   Hugh Dickins   swapin_readahead:...
13
  #include <linux/swapops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
  #include <linux/init.h>
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
  #include <linux/backing-dev.h>
c484d4104   Hugh Dickins   [PATCH] mm: free_...
17
  #include <linux/pagevec.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
18
  #include <linux/migrate.h>
8c7c6e34a   KAMEZAWA Hiroyuki   memcg: mem+swap c...
19
  #include <linux/page_cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
23
24
  
  #include <asm/pgtable.h>
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
7eaceacca   Jens Axboe   block: remove per...
25
   * vmscan's shrink_page_list.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
27
  static const struct address_space_operations swap_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
  	.writepage	= swap_writepage,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  	.set_page_dirty	= __set_page_dirty_nobuffers,
e965f9630   Christoph Lameter   [PATCH] Direct Mi...
30
  	.migratepage	= migrate_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31
32
33
  };
  
  static struct backing_dev_info swap_backing_dev_info = {
d993831fa   Jens Axboe   writeback: add na...
34
  	.name		= "swap",
4f98a2fee   Rik van Riel   vmscan: split LRU...
35
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
39
  };
  
  struct address_space swapper_space = {
  	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
19fd62312   Nick Piggin   mm: spinlock tree...
40
  	.tree_lock	= __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41
42
43
44
  	.a_ops		= &swap_aops,
  	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
  	.backing_dev_info = &swap_backing_dev_info,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
49
50
51
52
  
  #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
  
  static struct {
  	unsigned long add_total;
  	unsigned long del_total;
  	unsigned long find_success;
  	unsigned long find_total;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
55
56
  } swap_cache_info;
  
  void show_swap_cache_info(void)
  {
2c97b7fc0   Johannes Weiner   mm: print swapcac...
57
58
59
60
  	printk("%lu pages in swap cache
  ", total_swapcache_pages);
  	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
  		swap_cache_info.add_total, swap_cache_info.del_total,
bb63be0a0   Hugh Dickins   tmpfs: move swap_...
62
  		swap_cache_info.find_success, swap_cache_info.find_total);
07279cdfd   Hugh Dickins   mm: show free swa...
63
64
  	printk("Free swap  = %ldkB
  ", nr_swap_pages << (PAGE_SHIFT - 10));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65
66
67
68
69
  	printk("Total swap = %lukB
  ", total_swap_pages << (PAGE_SHIFT - 10));
  }
  
  /*
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
70
   * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
   * but sets SwapCache flag and private instead of mapping and index.
   */
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
73
  static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
74
75
  {
  	int error;
51726b122   Hugh Dickins   mm: replace some ...
76
77
78
  	VM_BUG_ON(!PageLocked(page));
  	VM_BUG_ON(PageSwapCache(page));
  	VM_BUG_ON(!PageSwapBacked(page));
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  	page_cache_get(page);
  	SetPageSwapCache(page);
  	set_page_private(page, entry.val);
  
  	spin_lock_irq(&swapper_space.tree_lock);
  	error = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
  	if (likely(!error)) {
  		total_swapcache_pages++;
  		__inc_zone_page_state(page, NR_FILE_PAGES);
  		INC_CACHE_INFO(add_total);
  	}
  	spin_unlock_irq(&swapper_space.tree_lock);
  
  	if (unlikely(error)) {
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
93
94
95
96
97
98
  		/*
  		 * Only the context which have set SWAP_HAS_CACHE flag
  		 * would call add_to_swap_cache().
  		 * So add_to_swap_cache() doesn't returns -EEXIST.
  		 */
  		VM_BUG_ON(error == -EEXIST);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
99
100
101
102
103
104
105
106
107
108
109
110
  		set_page_private(page, 0UL);
  		ClearPageSwapCache(page);
  		page_cache_release(page);
  	}
  
  	return error;
  }
  
  
  int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  {
  	int error;
35c754d79   Balbir Singh   memory controller...
111
112
  	error = radix_tree_preload(gfp_mask);
  	if (!error) {
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
113
  		error = __add_to_swap_cache(page, entry);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
114
  		radix_tree_preload_end();
fa1de9008   Hugh Dickins   memcgroup: revert...
115
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
117
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
120
121
122
123
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache.
   */
  void __delete_from_swap_cache(struct page *page)
  {
51726b122   Hugh Dickins   mm: replace some ...
124
125
126
  	VM_BUG_ON(!PageLocked(page));
  	VM_BUG_ON(!PageSwapCache(page));
  	VM_BUG_ON(PageWriteback(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127

4c21e2f24   Hugh Dickins   [PATCH] mm: split...
128
129
  	radix_tree_delete(&swapper_space.page_tree, page_private(page));
  	set_page_private(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
  	ClearPageSwapCache(page);
  	total_swapcache_pages--;
347ce434d   Christoph Lameter   [PATCH] zoned vm ...
132
  	__dec_zone_page_state(page, NR_FILE_PAGES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
133
134
135
136
137
138
139
140
141
142
  	INC_CACHE_INFO(del_total);
  }
  
  /**
   * add_to_swap - allocate swap space for a page
   * @page: page we want to move to swap
   *
   * Allocate swap space for the page and add the page to the
   * swap cache.  Caller needs to hold the page lock. 
   */
ac47b003d   Hugh Dickins   mm: remove gfp_ma...
143
  int add_to_swap(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
144
145
  {
  	swp_entry_t entry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
  	int err;
51726b122   Hugh Dickins   mm: replace some ...
147
148
  	VM_BUG_ON(!PageLocked(page));
  	VM_BUG_ON(!PageUptodate(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149

2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
150
151
152
  	entry = get_swap_page();
  	if (!entry.val)
  		return 0;
3f04f62f9   Andrea Arcangeli   thp: split_huge_p...
153
154
155
156
157
  	if (unlikely(PageTransHuge(page)))
  		if (unlikely(split_huge_page(page))) {
  			swapcache_free(entry, NULL);
  			return 0;
  		}
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
  	/*
  	 * Radix-tree node allocations from PF_MEMALLOC contexts could
  	 * completely exhaust the page allocator. __GFP_NOMEMALLOC
  	 * stops emergency reserves from being allocated.
  	 *
  	 * TODO: this could cause a theoretical memory reclaim
  	 * deadlock in the swap out path.
  	 */
  	/*
  	 * Add it to the swap cache and mark it dirty
  	 */
  	err = add_to_swap_cache(page, entry,
  			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
  
  	if (!err) {	/* Success */
  		SetPageDirty(page);
  		return 1;
  	} else {	/* -ENOMEM radix-tree allocation failure */
bd53b714d   Nick Piggin   [PATCH] mm: use _...
176
  		/*
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
177
178
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  		 */
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
180
181
  		swapcache_free(entry, NULL);
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182
183
184
185
186
187
188
189
190
191
192
193
  	}
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache and locked.
   * It will never put the page into the free list,
   * the caller has a reference on the page.
   */
  void delete_from_swap_cache(struct page *page)
  {
  	swp_entry_t entry;
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
194
  	entry.val = page_private(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195

19fd62312   Nick Piggin   mm: spinlock tree...
196
  	spin_lock_irq(&swapper_space.tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
  	__delete_from_swap_cache(page);
19fd62312   Nick Piggin   mm: spinlock tree...
198
  	spin_unlock_irq(&swapper_space.tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
199

cb4b86ba4   KAMEZAWA Hiroyuki   mm: add swap cach...
200
  	swapcache_free(entry, page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
202
  	page_cache_release(page);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
205
206
  /* 
   * If we are the only user, then try to free up the swap cache. 
   * 
   * Its ok to check for PageSwapCache without the page lock
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
207
208
   * here because we are going to recheck again inside
   * try_to_free_swap() _with_ the lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
210
211
212
   * 					- Marcelo
   */
  static inline void free_swap_cache(struct page *page)
  {
a2c43eed8   Hugh Dickins   mm: try_to_free_s...
213
214
  	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
  		try_to_free_swap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
216
217
218
219
220
  		unlock_page(page);
  	}
  }
  
  /* 
   * Perform a free_page(), also freeing any swap cache associated with
b8072f099   Hugh Dickins   [PATCH] mm: updat...
221
   * this page if it is the last user of the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
223
224
225
226
227
228
229
230
231
232
233
234
   */
  void free_page_and_swap_cache(struct page *page)
  {
  	free_swap_cache(page);
  	page_cache_release(page);
  }
  
  /*
   * Passed an array of pages, drop them all from swapcache and then release
   * them.  They are removed from the LRU and freed if this is their last use.
   */
  void free_pages_and_swap_cache(struct page **pages, int nr)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
236
237
238
  	struct page **pagep = pages;
  
  	lru_add_drain();
  	while (nr) {
c484d4104   Hugh Dickins   [PATCH] mm: free_...
239
  		int todo = min(nr, PAGEVEC_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
  		int i;
  
  		for (i = 0; i < todo; i++)
  			free_swap_cache(pagep[i]);
  		release_pages(pagep, todo, 0);
  		pagep += todo;
  		nr -= todo;
  	}
  }
  
  /*
   * Lookup a swap entry in the swap cache. A found page will be returned
   * unlocked and with its refcount incremented - we rely on the kernel
   * lock getting page table operations atomic even if we drop the page
   * lock before returning.
   */
  struct page * lookup_swap_cache(swp_entry_t entry)
  {
  	struct page *page;
  
  	page = find_get_page(&swapper_space, entry.val);
  
  	if (page)
  		INC_CACHE_INFO(find_success);
  
  	INC_CACHE_INFO(find_total);
  	return page;
  }
  
  /* 
   * Locate a page of swap in physical memory, reserving swap cache space
   * and reading the disk if it is not already cached.
   * A failure return means that either the page allocation failed or that
   * the swap entry is no longer in use.
   */
02098feaa   Hugh Dickins   swapin needs gfp_...
275
  struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  			struct vm_area_struct *vma, unsigned long addr)
  {
  	struct page *found_page, *new_page = NULL;
  	int err;
  
  	do {
  		/*
  		 * First check the swap cache.  Since this is normally
  		 * called after lookup_swap_cache() failed, re-calling
  		 * that would confuse statistics.
  		 */
  		found_page = find_get_page(&swapper_space, entry.val);
  		if (found_page)
  			break;
  
  		/*
  		 * Get a new page to read into from swap.
  		 */
  		if (!new_page) {
02098feaa   Hugh Dickins   swapin needs gfp_...
295
  			new_page = alloc_page_vma(gfp_mask, vma, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
297
  			if (!new_page)
  				break;		/* Out of memory */
4e5f01c2b   KAMEZAWA Hiroyuki   memcg: clear pc->...
298
299
300
301
302
303
304
305
306
307
  			/*
  			 * The memcg-specific accounting when moving
  			 * pages around the LRU lists relies on the
  			 * page's owner (memcg) to be valid.  Usually,
  			 * pages are assigned to a new owner before
  			 * being put on the LRU list, but since this
  			 * is not the case here, the stale owner from
  			 * a previous allocation cycle must be reset.
  			 */
  			mem_cgroup_reset_owner(new_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
308
309
310
  		}
  
  		/*
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
311
312
313
314
315
316
317
  		 * call radix_tree_preload() while we can wait.
  		 */
  		err = radix_tree_preload(gfp_mask & GFP_KERNEL);
  		if (err)
  			break;
  
  		/*
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
318
319
  		 * Swap entry may have been freed since our caller observed it.
  		 */
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
320
  		err = swapcache_prepare(entry);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
321
322
  		if (err == -EEXIST) {	/* seems racy */
  			radix_tree_preload_end();
355cfa73d   KAMEZAWA Hiroyuki   mm: modify swap_m...
323
  			continue;
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
324
325
326
  		}
  		if (err) {		/* swp entry is obsolete ? */
  			radix_tree_preload_end();
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
327
  			break;
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
328
  		}
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
329

2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
330
  		/* May fail (-ENOMEM) if radix-tree node allocation failed. */
f45840b5c   Nick Piggin   mm: pagecache ins...
331
  		__set_page_locked(new_page);
b2e185384   Rik van Riel   define page_file_...
332
  		SetPageSwapBacked(new_page);
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
333
  		err = __add_to_swap_cache(new_page, entry);
529ae9aaa   Nick Piggin   mm: rename page t...
334
  		if (likely(!err)) {
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
335
  			radix_tree_preload_end();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
336
337
338
  			/*
  			 * Initiate read into locked page and return.
  			 */
c5fdae469   Rik van Riel   vmscan: add newly...
339
  			lru_cache_add_anon(new_page);
aca8bf323   Minchan Kim   mm: remove file a...
340
  			swap_readpage(new_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
341
342
  			return new_page;
  		}
31a563962   Daisuke Nishimura   mm: add_to_swap_c...
343
  		radix_tree_preload_end();
b2e185384   Rik van Riel   define page_file_...
344
  		ClearPageSwapBacked(new_page);
f45840b5c   Nick Piggin   mm: pagecache ins...
345
  		__clear_page_locked(new_page);
2ca4532a4   Daisuke Nishimura   mm: add_to_swap_c...
346
347
348
349
  		/*
  		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
  		 * clear SWAP_HAS_CACHE flag.
  		 */
cb4b86ba4   KAMEZAWA Hiroyuki   mm: add swap cach...
350
  		swapcache_free(entry, NULL);
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
351
  	} while (err != -ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
354
355
356
  
  	if (new_page)
  		page_cache_release(new_page);
  	return found_page;
  }
46017e954   Hugh Dickins   swapin_readahead:...
357
358
359
360
  
  /**
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
7682486b3   Randy Dunlap   mm: fix various k...
361
   * @gfp_mask: memory allocation flags
46017e954   Hugh Dickins   swapin_readahead:...
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
   * @vma: user vma this address belongs to
   * @addr: target address for mempolicy
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read an aligned block of
   * (1 << page_cluster) entries in the swap area. This method is chosen
   * because it doesn't cost us any seek time.  We also make sure to queue
   * the 'original' request together with the readahead ones...
   *
   * This has been extended to use the NUMA policies from the mm triggering
   * the readahead.
   *
   * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
   */
02098feaa   Hugh Dickins   swapin needs gfp_...
377
  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
46017e954   Hugh Dickins   swapin_readahead:...
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
  			struct vm_area_struct *vma, unsigned long addr)
  {
  	int nr_pages;
  	struct page *page;
  	unsigned long offset;
  	unsigned long end_offset;
  
  	/*
  	 * Get starting offset for readaround, and number of pages to read.
  	 * Adjust starting address by readbehind (for NUMA interleave case)?
  	 * No, it's very unlikely that swap layout would follow vma layout,
  	 * more likely that neighbouring swap pages came from the same node:
  	 * so use the same "addr" to choose the same node for each swap read.
  	 */
  	nr_pages = valid_swaphandles(entry, &offset);
  	for (end_offset = offset + nr_pages; offset < end_offset; offset++) {
  		/* Ok, do the async read-ahead now */
  		page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
02098feaa   Hugh Dickins   swapin needs gfp_...
396
  						gfp_mask, vma, addr);
46017e954   Hugh Dickins   swapin_readahead:...
397
398
399
400
401
  		if (!page)
  			break;
  		page_cache_release(page);
  	}
  	lru_add_drain();	/* Push any new pages onto the LRU now */
02098feaa   Hugh Dickins   swapin needs gfp_...
402
  	return read_swap_cache_async(entry, gfp_mask, vma, addr);
46017e954   Hugh Dickins   swapin_readahead:...
403
  }