Blame view

mm/swap_state.c 9.4 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
  /*
   *  linux/mm/swap_state.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   *  Swap reorganised 29.12.95, Stephen Tweedie
   *
   *  Rewritten to use page cache, (C) 1998 Stephen Tweedie
   */
  #include <linux/module.h>
  #include <linux/mm.h>
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #include <linux/init.h>
  #include <linux/pagemap.h>
  #include <linux/buffer_head.h>
  #include <linux/backing-dev.h>
c484d4104   Hugh Dickins   [PATCH] mm: free_...
17
  #include <linux/pagevec.h>
b20a35035   Christoph Lameter   [PATCH] page migr...
18
  #include <linux/migrate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
23
  
  #include <asm/pgtable.h>
  
  /*
   * swapper_space is a fiction, retained to simplify the path through
2706a1b89   Anderson Briglia   vmscan: fix comme...
24
   * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
   * future use of radix_tree tags in the swap cache.
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
27
  static const struct address_space_operations swap_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
30
  	.writepage	= swap_writepage,
  	.sync_page	= block_sync_page,
  	.set_page_dirty	= __set_page_dirty_nobuffers,
e965f9630   Christoph Lameter   [PATCH] Direct Mi...
31
  	.migratepage	= migrate_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
33
34
35
36
37
38
39
40
  };
  
  static struct backing_dev_info swap_backing_dev_info = {
  	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
  	.unplug_io_fn	= swap_unplug_io_fn,
  };
  
  struct address_space swapper_space = {
  	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
e4d919188   Ingo Molnar   [PATCH] lockdep: ...
41
  	.tree_lock	= __RW_LOCK_UNLOCKED(swapper_space.tree_lock),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
45
  	.a_ops		= &swap_aops,
  	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
  	.backing_dev_info = &swap_backing_dev_info,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  
  #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
  
  static struct {
  	unsigned long add_total;
  	unsigned long del_total;
  	unsigned long find_success;
  	unsigned long find_total;
  	unsigned long noent_race;
  	unsigned long exist_race;
  } swap_cache_info;
  
  void show_swap_cache_info(void)
  {
  	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu
  ",
  		swap_cache_info.add_total, swap_cache_info.del_total,
  		swap_cache_info.find_success, swap_cache_info.find_total,
  		swap_cache_info.noent_race, swap_cache_info.exist_race);
  	printk("Free swap  = %lukB
  ", nr_swap_pages << (PAGE_SHIFT - 10));
  	printk("Total swap = %lukB
  ", total_swap_pages << (PAGE_SHIFT - 10));
  }
  
  /*
   * __add_to_swap_cache resembles add_to_page_cache on swapper_space,
   * but sets SwapCache flag and private instead of mapping and index.
   */
9de75d110   Victor Fusco   [PATCH] mm/swap_s...
75
  static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
dd0fc66fb   Al Viro   [PATCH] gfp flags...
76
  			       gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
78
  {
  	int error;
b55ed8162   Nick Piggin   mm: clarify __add...
79
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
81
82
83
84
85
86
87
88
  	BUG_ON(PageSwapCache(page));
  	BUG_ON(PagePrivate(page));
  	error = radix_tree_preload(gfp_mask);
  	if (!error) {
  		write_lock_irq(&swapper_space.tree_lock);
  		error = radix_tree_insert(&swapper_space.page_tree,
  						entry.val, page);
  		if (!error) {
  			page_cache_get(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
  			SetPageSwapCache(page);
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
90
  			set_page_private(page, entry.val);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
91
  			total_swapcache_pages++;
347ce434d   Christoph Lameter   [PATCH] zoned vm ...
92
  			__inc_zone_page_state(page, NR_FILE_PAGES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
94
95
96
97
98
99
100
101
102
  		}
  		write_unlock_irq(&swapper_space.tree_lock);
  		radix_tree_preload_end();
  	}
  	return error;
  }
  
  static int add_to_swap_cache(struct page *page, swp_entry_t entry)
  {
  	int error;
b55ed8162   Nick Piggin   mm: clarify __add...
103
  	BUG_ON(PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
104
105
106
107
  	if (!swap_duplicate(entry)) {
  		INC_CACHE_INFO(noent_race);
  		return -ENOENT;
  	}
b55ed8162   Nick Piggin   mm: clarify __add...
108
  	SetPageLocked(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
110
111
112
113
  	error = __add_to_swap_cache(page, entry, GFP_KERNEL);
  	/*
  	 * Anon pages are already on the LRU, we don't run lru_cache_add here.
  	 */
  	if (error) {
b55ed8162   Nick Piggin   mm: clarify __add...
114
  		ClearPageLocked(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  		swap_free(entry);
  		if (error == -EEXIST)
  			INC_CACHE_INFO(exist_race);
  		return error;
  	}
  	INC_CACHE_INFO(add_total);
  	return 0;
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache.
   */
  void __delete_from_swap_cache(struct page *page)
  {
  	BUG_ON(!PageLocked(page));
  	BUG_ON(!PageSwapCache(page));
  	BUG_ON(PageWriteback(page));
3279ffd97   Hugh Dickins   [PATCH] delete fr...
133
  	BUG_ON(PagePrivate(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134

4c21e2f24   Hugh Dickins   [PATCH] mm: split...
135
136
  	radix_tree_delete(&swapper_space.page_tree, page_private(page));
  	set_page_private(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
138
  	ClearPageSwapCache(page);
  	total_swapcache_pages--;
347ce434d   Christoph Lameter   [PATCH] zoned vm ...
139
  	__dec_zone_page_state(page, NR_FILE_PAGES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
141
142
143
144
145
146
147
148
149
  	INC_CACHE_INFO(del_total);
  }
  
  /**
   * add_to_swap - allocate swap space for a page
   * @page: page we want to move to swap
   *
   * Allocate swap space for the page and add the page to the
   * swap cache.  Caller needs to hold the page lock. 
   */
1480a540c   Christoph Lameter   [PATCH] SwapMig: ...
150
  int add_to_swap(struct page * page, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
152
  {
  	swp_entry_t entry;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
153
  	int err;
e74ca2b49   Eric Sesterhenn   BUG_ON() Conversi...
154
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
155
156
157
158
159
  
  	for (;;) {
  		entry = get_swap_page();
  		if (!entry.val)
  			return 0;
bd53b714d   Nick Piggin   [PATCH] mm: use _...
160
161
162
163
  		/*
  		 * Radix-tree node allocations from PF_MEMALLOC contexts could
  		 * completely exhaust the page allocator. __GFP_NOMEMALLOC
  		 * stops emergency reserves from being allocated.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
  		 *
bd53b714d   Nick Piggin   [PATCH] mm: use _...
165
166
  		 * TODO: this could cause a theoretical memory reclaim
  		 * deadlock in the swap out path.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
167
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
  		/*
  		 * Add it to the swap cache and mark it dirty
  		 */
bd53b714d   Nick Piggin   [PATCH] mm: use _...
171
  		err = __add_to_swap_cache(page, entry,
1480a540c   Christoph Lameter   [PATCH] SwapMig: ...
172
  				gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  
  		switch (err) {
  		case 0:				/* Success */
  			SetPageUptodate(page);
  			SetPageDirty(page);
  			INC_CACHE_INFO(add_total);
  			return 1;
  		case -EEXIST:
  			/* Raced with "speculative" read_swap_cache_async */
  			INC_CACHE_INFO(exist_race);
  			swap_free(entry);
  			continue;
  		default:
  			/* -ENOMEM radix-tree allocation failure */
  			swap_free(entry);
  			return 0;
  		}
  	}
  }
  
  /*
   * This must be called only on pages that have
   * been verified to be in the swap cache and locked.
   * It will never put the page into the free list,
   * the caller has a reference on the page.
   */
  void delete_from_swap_cache(struct page *page)
  {
  	swp_entry_t entry;
4c21e2f24   Hugh Dickins   [PATCH] mm: split...
202
  	entry.val = page_private(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
  
  	write_lock_irq(&swapper_space.tree_lock);
  	__delete_from_swap_cache(page);
  	write_unlock_irq(&swapper_space.tree_lock);
  
  	swap_free(entry);
  	page_cache_release(page);
  }
  
  /*
   * Strange swizzling function only for use by shmem_writepage
   */
  int move_to_swap_cache(struct page *page, swp_entry_t entry)
  {
  	int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
  	if (!err) {
  		remove_from_page_cache(page);
  		page_cache_release(page);	/* pagecache ref */
  		if (!swap_duplicate(entry))
  			BUG();
  		SetPageDirty(page);
  		INC_CACHE_INFO(add_total);
  	} else if (err == -EEXIST)
  		INC_CACHE_INFO(exist_race);
  	return err;
  }
  
  /*
   * Strange swizzling function for shmem_getpage (and shmem_unuse)
   */
  int move_from_swap_cache(struct page *page, unsigned long index,
  		struct address_space *mapping)
  {
  	int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC);
  	if (!err) {
  		delete_from_swap_cache(page);
  		/* shift page from clean_pages to dirty_pages list */
  		ClearPageDirty(page);
  		set_page_dirty(page);
  	}
  	return err;
  }
  
  /* 
   * If we are the only user, then try to free up the swap cache. 
   * 
   * Its ok to check for PageSwapCache without the page lock
   * here because we are going to recheck again inside 
   * exclusive_swap_page() _with_ the lock. 
   * 					- Marcelo
   */
  static inline void free_swap_cache(struct page *page)
  {
  	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
  		remove_exclusive_swap_page(page);
  		unlock_page(page);
  	}
  }
  
  /* 
   * Perform a free_page(), also freeing any swap cache associated with
b8072f099   Hugh Dickins   [PATCH] mm: updat...
264
   * this page if it is the last user of the page.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
266
267
268
269
270
271
272
273
274
275
276
277
   */
  void free_page_and_swap_cache(struct page *page)
  {
  	free_swap_cache(page);
  	page_cache_release(page);
  }
  
  /*
   * Passed an array of pages, drop them all from swapcache and then release
   * them.  They are removed from the LRU and freed if this is their last use.
   */
  void free_pages_and_swap_cache(struct page **pages, int nr)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
279
280
281
  	struct page **pagep = pages;
  
  	lru_add_drain();
  	while (nr) {
c484d4104   Hugh Dickins   [PATCH] mm: free_...
282
  		int todo = min(nr, PAGEVEC_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
  		int i;
  
  		for (i = 0; i < todo; i++)
  			free_swap_cache(pagep[i]);
  		release_pages(pagep, todo, 0);
  		pagep += todo;
  		nr -= todo;
  	}
  }
  
  /*
   * Lookup a swap entry in the swap cache. A found page will be returned
   * unlocked and with its refcount incremented - we rely on the kernel
   * lock getting page table operations atomic even if we drop the page
   * lock before returning.
   */
  struct page * lookup_swap_cache(swp_entry_t entry)
  {
  	struct page *page;
  
  	page = find_get_page(&swapper_space, entry.val);
  
  	if (page)
  		INC_CACHE_INFO(find_success);
  
  	INC_CACHE_INFO(find_total);
  	return page;
  }
  
  /* 
   * Locate a page of swap in physical memory, reserving swap cache space
   * and reading the disk if it is not already cached.
   * A failure return means that either the page allocation failed or that
   * the swap entry is no longer in use.
   */
  struct page *read_swap_cache_async(swp_entry_t entry,
  			struct vm_area_struct *vma, unsigned long addr)
  {
  	struct page *found_page, *new_page = NULL;
  	int err;
  
  	do {
  		/*
  		 * First check the swap cache.  Since this is normally
  		 * called after lookup_swap_cache() failed, re-calling
  		 * that would confuse statistics.
  		 */
  		found_page = find_get_page(&swapper_space, entry.val);
  		if (found_page)
  			break;
  
  		/*
  		 * Get a new page to read into from swap.
  		 */
  		if (!new_page) {
769848c03   Mel Gorman   Add __GFP_MOVABLE...
338
339
  			new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
  								vma, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
  			if (!new_page)
  				break;		/* Out of memory */
  		}
  
  		/*
  		 * Associate the page with swap entry in the swap cache.
  		 * May fail (-ENOENT) if swap entry has been freed since
  		 * our caller observed it.  May fail (-EEXIST) if there
  		 * is already a page associated with this entry in the
  		 * swap cache: added by a racing read_swap_cache_async,
  		 * or by try_to_swap_out (or shmem_writepage) re-using
  		 * the just freed swap entry for an existing page.
  		 * May fail (-ENOMEM) if radix-tree node allocation failed.
  		 */
  		err = add_to_swap_cache(new_page, entry);
  		if (!err) {
  			/*
  			 * Initiate read into locked page and return.
  			 */
  			lru_cache_add_active(new_page);
  			swap_readpage(NULL, new_page);
  			return new_page;
  		}
  	} while (err != -ENOENT && err != -ENOMEM);
  
  	if (new_page)
  		page_cache_release(new_page);
  	return found_page;
  }