Blame view

mm/swap_state.c 9.92 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/mm/swap_state.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   *  Swap reorganised 29.12.95, Stephen Tweedie
   *
   *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   */
  #include <linux/module.h>
  #include <linux/mm.h>
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
46017e954   Hugh Dickins   swapin_readahead:...
13
  #include <linux/swapops.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
17
  #include <linux/init.h>
  #include <linux/pagemap.h>
  #include <linux/buffer_head.h>
  #include <linux/backing-dev.h>
c484d4104   Hugh Dickins   [PATCH] mm: free_...
18
  #include <linux/pagevec.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
19
  #include <linux/migrate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
21
22
23
24
  
  #include <asm/pgtable.h>
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
2706a1b89   Anderson Briglia   vmscan: fix comme...
25
   * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
   * future use of radix_tree tags in the swap cache.
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
28
  static const struct address_space_operations swap_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
  	.writepage	= swap_writepage,
  	.sync_page	= block_sync_page,
  	.set_page_dirty	= __set_page_dirty_nobuffers,
e965f9630   Christoph Lameter   [PATCH] Direct Mi...
32
  	.migratepage	= migrate_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
34
35
  };
  
  static struct backing_dev_info swap_backing_dev_info = {
e4ad08fe6   Miklos Szeredi   mm: bdi: add sepa...
36
  	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
38
39
40
41
  	.unplug_io_fn	= swap_unplug_io_fn,
  };
  
  struct address_space swapper_space = {
  	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
19fd62312   Nick Piggin   mm: spinlock tree...
42
  	.tree_lock	= __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
45
46
  	.a_ops		= &swap_aops,
  	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
  	.backing_dev_info = &swap_backing_dev_info,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47
48
49
50
51
52
53
54
  
  #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
  
  static struct {
  	unsigned long add_total;
  	unsigned long del_total;
  	unsigned long find_success;
  	unsigned long find_total;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55
56
57
58
  } swap_cache_info;
  
  void show_swap_cache_info(void)
  {
2c97b7fc0   Johannes Weiner   mm: print swapcac...
59
60
61
62
  	printk("%lu pages in swap cache
  ", total_swapcache_pages);
  	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63
  		swap_cache_info.add_total, swap_cache_info.del_total,
bb63be0a0   Hugh Dickins   tmpfs: move swap_...
64
  		swap_cache_info.find_success, swap_cache_info.find_total);
07279cdfd   Hugh Dickins   mm: show free swa...
65
66
  	printk("Free swap  = %ldkB
  ", nr_swap_pages << (PAGE_SHIFT - 10));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
68
69
70
71
  	printk("Total swap = %lukB
  ", total_swap_pages << (PAGE_SHIFT - 10));
  }
  
  /*
e286781d5   Nick Piggin   mm: speculative p...
72
   * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
74
   * but sets SwapCache flag and private instead of mapping and index.
   */
73b1262fa   Hugh Dickins   tmpfs: move swap ...
75
  int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
77
  {
  	int error;
b55ed8162   Nick Piggin   mm: clarify __add...
78
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79
80
  	BUG_ON(PageSwapCache(page));
  	BUG_ON(PagePrivate(page));
35c754d79   Balbir Singh   memory controller...
81
82
  	error = radix_tree_preload(gfp_mask);
  	if (!error) {
e286781d5   Nick Piggin   mm: speculative p...
83
84
85
  		page_cache_get(page);
  		SetPageSwapCache(page);
  		set_page_private(page, entry.val);
19fd62312   Nick Piggin   mm: spinlock tree...
86
  		spin_lock_irq(&swapper_space.tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
  		error = radix_tree_insert(&swapper_space.page_tree,
  						entry.val, page);
e286781d5   Nick Piggin   mm: speculative p...
89
  		if (likely(!error)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
  			total_swapcache_pages++;
347ce434d   Christoph Lameter   [PATCH] zoned vm ...
91
  			__inc_zone_page_state(page, NR_FILE_PAGES);
bb63be0a0   Hugh Dickins   tmpfs: move swap_...
92
  			INC_CACHE_INFO(add_total);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
  		}
19fd62312   Nick Piggin   mm: spinlock tree...
94
  		spin_unlock_irq(&swapper_space.tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
  		radix_tree_preload_end();
e286781d5   Nick Piggin   mm: speculative p...
96
97
98
99
100
101
  
  		if (unlikely(error)) {
  			set_page_private(page, 0UL);
  			ClearPageSwapCache(page);
  			page_cache_release(page);
  		}
fa1de9008   Hugh Dickins   memcgroup: revert...
102
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
  	return error;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
105
106
107
108
109
110
111
112
113
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache.
   */
  void __delete_from_swap_cache(struct page *page)
  {
  	BUG_ON(!PageLocked(page));
  	BUG_ON(!PageSwapCache(page));
  	BUG_ON(PageWriteback(page));
3279ffd97   Hugh Dickins   [PATCH] delete fr...
114
  	BUG_ON(PagePrivate(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115

4c21e2f24   Hugh Dickins   [PATCH] mm: split...
116
117
  	radix_tree_delete(&swapper_space.page_tree, page_private(page));
  	set_page_private(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
  	ClearPageSwapCache(page);
  	total_swapcache_pages--;
347ce434d   Christoph Lameter   [PATCH] zoned vm ...
120
  	__dec_zone_page_state(page, NR_FILE_PAGES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
122
123
124
125
126
  	INC_CACHE_INFO(del_total);
  }
  
  /**
   * add_to_swap - allocate swap space for a page
   * @page: page we want to move to swap
7682486b3   Randy Dunlap   mm: fix various k...
127
   * @gfp_mask: memory allocation flags
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
129
130
131
   *
   * Allocate swap space for the page and add the page to the
   * swap cache.  Caller needs to hold the page lock. 
   */
1480a540c   Christoph Lameter   [PATCH] SwapMig: ...
132
  int add_to_swap(struct page * page, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
133
134
  {
  	swp_entry_t entry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
  	int err;
e74ca2b49   Eric Sesterhenn   BUG_ON() Conversi...
136
  	BUG_ON(!PageLocked(page));
0ed361dec   Nick Piggin   mm: fix PageUptod...
137
  	BUG_ON(!PageUptodate(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138
139
140
141
142
  
  	for (;;) {
  		entry = get_swap_page();
  		if (!entry.val)
  			return 0;
bd53b714d   Nick Piggin   [PATCH] mm: use _...
143
144
145
146
  		/*
  		 * Radix-tree node allocations from PF_MEMALLOC contexts could
  		 * completely exhaust the page allocator. __GFP_NOMEMALLOC
  		 * stops emergency reserves from being allocated.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
  		 *
bd53b714d   Nick Piggin   [PATCH] mm: use _...
148
149
  		 * TODO: this could cause a theoretical memory reclaim
  		 * deadlock in the swap out path.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
150
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
152
153
  		/*
  		 * Add it to the swap cache and mark it dirty
  		 */
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
154
  		err = add_to_swap_cache(page, entry,
1480a540c   Christoph Lameter   [PATCH] SwapMig: ...
155
  				gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
157
158
  
  		switch (err) {
  		case 0:				/* Success */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
159
  			SetPageDirty(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
161
162
  			return 1;
  		case -EEXIST:
  			/* Raced with "speculative" read_swap_cache_async */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
  			swap_free(entry);
  			continue;
  		default:
  			/* -ENOMEM radix-tree allocation failure */
  			swap_free(entry);
  			return 0;
  		}
  	}
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache and locked.
   * It will never put the page into the free list,
   * the caller has a reference on the page.
   */
  void delete_from_swap_cache(struct page *page)
  {
  	swp_entry_t entry;
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
182
  	entry.val = page_private(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183

19fd62312   Nick Piggin   mm: spinlock tree...
184
  	spin_lock_irq(&swapper_space.tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
  	__delete_from_swap_cache(page);
19fd62312   Nick Piggin   mm: spinlock tree...
186
  	spin_unlock_irq(&swapper_space.tree_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
188
189
190
  
  	swap_free(entry);
  	page_cache_release(page);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
192
193
194
195
196
197
198
199
200
  /* 
   * If we are the only user, then try to free up the swap cache. 
   * 
   * Its ok to check for PageSwapCache without the page lock
   * here because we are going to recheck again inside 
   * exclusive_swap_page() _with_ the lock. 
   * 					- Marcelo
   */
  static inline void free_swap_cache(struct page *page)
  {
529ae9aaa   Nick Piggin   mm: rename page t...
201
  	if (PageSwapCache(page) && trylock_page(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
204
205
206
207
208
  		remove_exclusive_swap_page(page);
  		unlock_page(page);
  	}
  }
  
  /* 
   * Perform a free_page(), also freeing any swap cache associated with
b8072f099   Hugh Dickins   [PATCH] mm: updat...
209
   * this page if it is the last user of the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
210
211
212
213
214
215
216
217
218
219
220
221
222
   */
  void free_page_and_swap_cache(struct page *page)
  {
  	free_swap_cache(page);
  	page_cache_release(page);
  }
  
  /*
   * Passed an array of pages, drop them all from swapcache and then release
   * them.  They are removed from the LRU and freed if this is their last use.
   */
  void free_pages_and_swap_cache(struct page **pages, int nr)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
224
225
226
  	struct page **pagep = pages;
  
  	lru_add_drain();
  	while (nr) {
c484d4104   Hugh Dickins   [PATCH] mm: free_...
227
  		int todo = min(nr, PAGEVEC_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
  		int i;
  
  		for (i = 0; i < todo; i++)
  			free_swap_cache(pagep[i]);
  		release_pages(pagep, todo, 0);
  		pagep += todo;
  		nr -= todo;
  	}
  }
  
  /*
   * Lookup a swap entry in the swap cache. A found page will be returned
   * unlocked and with its refcount incremented - we rely on the kernel
   * lock getting page table operations atomic even if we drop the page
   * lock before returning.
   */
  struct page * lookup_swap_cache(swp_entry_t entry)
  {
  	struct page *page;
  
  	page = find_get_page(&swapper_space, entry.val);
  
  	if (page)
  		INC_CACHE_INFO(find_success);
  
  	INC_CACHE_INFO(find_total);
  	return page;
  }
  
  /* 
   * Locate a page of swap in physical memory, reserving swap cache space
   * and reading the disk if it is not already cached.
   * A failure return means that either the page allocation failed or that
   * the swap entry is no longer in use.
   */
02098feaa   Hugh Dickins   swapin needs gfp_...
263
  struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
  			struct vm_area_struct *vma, unsigned long addr)
  {
  	struct page *found_page, *new_page = NULL;
  	int err;
  
  	do {
  		/*
  		 * First check the swap cache.  Since this is normally
  		 * called after lookup_swap_cache() failed, re-calling
  		 * that would confuse statistics.
  		 */
  		found_page = find_get_page(&swapper_space, entry.val);
  		if (found_page)
  			break;
  
  		/*
  		 * Get a new page to read into from swap.
  		 */
  		if (!new_page) {
02098feaa   Hugh Dickins   swapin needs gfp_...
283
  			new_page = alloc_page_vma(gfp_mask, vma, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
284
285
286
287
288
  			if (!new_page)
  				break;		/* Out of memory */
  		}
  
  		/*
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
289
290
291
292
293
294
  		 * Swap entry may have been freed since our caller observed it.
  		 */
  		if (!swap_duplicate(entry))
  			break;
  
  		/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
295
  		 * Associate the page with swap entry in the swap cache.
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
296
297
298
299
  		 * May fail (-EEXIST) if there is already a page associated
  		 * with this entry in the swap cache: added by a racing
  		 * read_swap_cache_async, or add_to_swap or shmem_writepage
  		 * re-using the just freed swap entry for an existing page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
300
301
  		 * May fail (-ENOMEM) if radix-tree node allocation failed.
  		 */
529ae9aaa   Nick Piggin   mm: rename page t...
302
  		set_page_locked(new_page);
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
303
  		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
529ae9aaa   Nick Piggin   mm: rename page t...
304
  		if (likely(!err)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
306
307
308
309
310
311
  			/*
  			 * Initiate read into locked page and return.
  			 */
  			lru_cache_add_active(new_page);
  			swap_readpage(NULL, new_page);
  			return new_page;
  		}
529ae9aaa   Nick Piggin   mm: rename page t...
312
  		clear_page_locked(new_page);
f000944d0   Hugh Dickins   tmpfs: shuffle ad...
313
314
  		swap_free(entry);
  	} while (err != -ENOMEM);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
315
316
317
318
319
  
  	if (new_page)
  		page_cache_release(new_page);
  	return found_page;
  }
46017e954   Hugh Dickins   swapin_readahead:...
320
321
322
323
  
  /**
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
7682486b3   Randy Dunlap   mm: fix various k...
324
   * @gfp_mask: memory allocation flags
46017e954   Hugh Dickins   swapin_readahead:...
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
   * @vma: user vma this address belongs to
   * @addr: target address for mempolicy
   *
   * Returns the struct page for entry and addr, after queueing swapin.
   *
   * Primitive swap readahead code. We simply read an aligned block of
   * (1 << page_cluster) entries in the swap area. This method is chosen
   * because it doesn't cost us any seek time.  We also make sure to queue
   * the 'original' request together with the readahead ones...
   *
   * This has been extended to use the NUMA policies from the mm triggering
   * the readahead.
   *
   * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
   */
02098feaa   Hugh Dickins   swapin needs gfp_...
340
  struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
46017e954   Hugh Dickins   swapin_readahead:...
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  			struct vm_area_struct *vma, unsigned long addr)
  {
  	int nr_pages;
  	struct page *page;
  	unsigned long offset;
  	unsigned long end_offset;
  
  	/*
  	 * Get starting offset for readaround, and number of pages to read.
  	 * Adjust starting address by readbehind (for NUMA interleave case)?
  	 * No, it's very unlikely that swap layout would follow vma layout,
  	 * more likely that neighbouring swap pages came from the same node:
  	 * so use the same "addr" to choose the same node for each swap read.
  	 */
  	nr_pages = valid_swaphandles(entry, &offset);
  	for (end_offset = offset + nr_pages; offset < end_offset; offset++) {
  		/* Ok, do the async read-ahead now */
  		page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
02098feaa   Hugh Dickins   swapin needs gfp_...
359
  						gfp_mask, vma, addr);
46017e954   Hugh Dickins   swapin_readahead:...
360
361
362
363
364
  		if (!page)
  			break;
  		page_cache_release(page);
  	}
  	lru_add_drain();	/* Push any new pages onto the LRU now */
02098feaa   Hugh Dickins   swapin needs gfp_...
365
  	return read_swap_cache_async(entry, gfp_mask, vma, addr);
46017e954   Hugh Dickins   swapin_readahead:...
366
  }