Blame view

mm/swap.c 12.4 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
  /*
   *  linux/mm/swap.c
   *
   *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   */
  
  /*
   * This file contains the default values for the opereation of the
   * Linux VM subsystem. Fine-tuning documentation can be found in
   * Documentation/sysctl/vm.txt.
   * Started 18.12.91
   * Swap aging added 23.2.95, Stephen Tweedie.
   * Buffermem limits added 12.3.98, Rik van Riel.
   */
  
  #include <linux/mm.h>
  #include <linux/sched.h>
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
  #include <linux/mman.h>
  #include <linux/pagemap.h>
  #include <linux/pagevec.h>
  #include <linux/init.h>
  #include <linux/module.h>
  #include <linux/mm_inline.h>
  #include <linux/buffer_head.h>	/* for try_to_release_page() */
  #include <linux/module.h>
  #include <linux/percpu_counter.h>
  #include <linux/percpu.h>
  #include <linux/cpu.h>
  #include <linux/notifier.h>
  #include <linux/init.h>
  
  /* How many pages do we try to swap or page in/out together? */
  int page_cluster;
8519fb30e   Nick Piggin   [PATCH] mm: compo...
36
  static void put_compound_page(struct page *page)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
  {
8519fb30e   Nick Piggin   [PATCH] mm: compo...
38
39
40
  	page = (struct page *)page_private(page);
  	if (put_page_testzero(page)) {
  		void (*dtor)(struct page *page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41

41d78ba55   Hugh Dickins   [PATCH] compound ...
42
  		dtor = (void (*)(struct page *))page[1].lru.next;
8519fb30e   Nick Piggin   [PATCH] mm: compo...
43
  		(*dtor)(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
  	}
8519fb30e   Nick Piggin   [PATCH] mm: compo...
45
46
47
48
49
50
51
  }
  
  void put_page(struct page *page)
  {
  	if (unlikely(PageCompound(page)))
  		put_compound_page(page);
  	else if (put_page_testzero(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
54
  		__page_cache_release(page);
  }
  EXPORT_SYMBOL(put_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55

1d7ea7324   Alexander Zarochentsev   [PATCH] fuse: fix...
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  /**
   * put_pages_list(): release a list of pages
   *
   * Release a list of pages which are strung together on page.lru.  Currently
   * used by read_cache_pages() and related error recovery code.
   *
   * @pages: list of pages threaded on page->lru
   */
  void put_pages_list(struct list_head *pages)
  {
  	while (!list_empty(pages)) {
  		struct page *victim;
  
  		victim = list_entry(pages->prev, struct page, lru);
  		list_del(&victim->lru);
  		page_cache_release(victim);
  	}
  }
  EXPORT_SYMBOL(put_pages_list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  /*
   * Writeback is about to end against a page which has been marked for immediate
   * reclaim.  If it still appears to be reclaimable, move it to the tail of the
   * inactive list.  The page still has PageWriteback set, which will pin it.
   *
   * We don't expect many pages to come through here, so don't bother batching
   * things up.
   *
   * To avoid placing the page at the tail of the LRU while PG_writeback is still
   * set, this function will clear PG_writeback before performing the page
   * motion.  Do that inside the lru lock because once PG_writeback is cleared
   * we may not touch the page.
   *
   * Returns zero if it cleared PG_writeback.
   */
  int rotate_reclaimable_page(struct page *page)
  {
  	struct zone *zone;
  	unsigned long flags;
  
  	if (PageLocked(page))
  		return 1;
  	if (PageDirty(page))
  		return 1;
  	if (PageActive(page))
  		return 1;
  	if (!PageLRU(page))
  		return 1;
  
  	zone = page_zone(page);
  	spin_lock_irqsave(&zone->lru_lock, flags);
  	if (PageLRU(page) && !PageActive(page)) {
1bfba4e8e   Akinobu Mita   [PATCH] core: use...
107
  		list_move_tail(&page->lru, &zone->inactive_list);
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
108
  		__count_vm_event(PGROTATED);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  	}
  	if (!test_clear_page_writeback(page))
  		BUG();
  	spin_unlock_irqrestore(&zone->lru_lock, flags);
  	return 0;
  }
  
  /*
   * FIXME: speed this up?
   */
  void fastcall activate_page(struct page *page)
  {
  	struct zone *zone = page_zone(page);
  
  	spin_lock_irq(&zone->lru_lock);
  	if (PageLRU(page) && !PageActive(page)) {
  		del_page_from_inactive_list(zone, page);
  		SetPageActive(page);
  		add_page_to_active_list(zone, page);
f8891e5e1   Christoph Lameter   [PATCH] Light wei...
128
  		__count_vm_event(PGACTIVATE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  	}
  	spin_unlock_irq(&zone->lru_lock);
  }
  
  /*
   * Mark a page as having seen activity.
   *
   * inactive,unreferenced	->	inactive,referenced
   * inactive,referenced		->	active,unreferenced
   * active,unreferenced		->	active,referenced
   */
  void fastcall mark_page_accessed(struct page *page)
  {
  	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
  		activate_page(page);
  		ClearPageReferenced(page);
  	} else if (!PageReferenced(page)) {
  		SetPageReferenced(page);
  	}
  }
  
  EXPORT_SYMBOL(mark_page_accessed);
  
  /**
   * lru_cache_add: add a page to the page lists
   * @page: the page to add
   */
  static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
  static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
  
  void fastcall lru_cache_add(struct page *page)
  {
  	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
  
  	page_cache_get(page);
  	if (!pagevec_add(pvec, page))
  		__pagevec_lru_add(pvec);
  	put_cpu_var(lru_add_pvecs);
  }
  
  void fastcall lru_cache_add_active(struct page *page)
  {
  	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
  
  	page_cache_get(page);
  	if (!pagevec_add(pvec, page))
  		__pagevec_lru_add_active(pvec);
  	put_cpu_var(lru_add_active_pvecs);
  }
80bfed904   Andrew Morton   [PATCH] consolida...
178
  static void __lru_add_drain(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  {
80bfed904   Andrew Morton   [PATCH] consolida...
180
  	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181

80bfed904   Andrew Morton   [PATCH] consolida...
182
  	/* CPU is dead, so no locking needed. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
184
  	if (pagevec_count(pvec))
  		__pagevec_lru_add(pvec);
80bfed904   Andrew Morton   [PATCH] consolida...
185
  	pvec = &per_cpu(lru_add_active_pvecs, cpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186
187
  	if (pagevec_count(pvec))
  		__pagevec_lru_add_active(pvec);
80bfed904   Andrew Morton   [PATCH] consolida...
188
189
190
191
192
193
  }
  
  void lru_add_drain(void)
  {
  	__lru_add_drain(get_cpu());
  	put_cpu();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
  }
053837fce   Nick Piggin   [PATCH] mm: migra...
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  #ifdef CONFIG_NUMA
  static void lru_add_drain_per_cpu(void *dummy)
  {
  	lru_add_drain();
  }
  
  /*
   * Returns 0 for success
   */
  int lru_add_drain_all(void)
  {
  	return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
  }
  
  #else
  
  /*
   * Returns 0 for success
   */
  int lru_add_drain_all(void)
  {
  	lru_add_drain();
  	return 0;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
221
222
223
224
225
  /*
   * This path almost never happens for VM activity - pages are normally
   * freed via pagevecs.  But it gets used by networking.
   */
  void fastcall __page_cache_release(struct page *page)
  {
46453a6e1   Nick Piggin   [PATCH] mm: never...
226
227
228
  	if (PageLRU(page)) {
  		unsigned long flags;
  		struct zone *zone = page_zone(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
229

46453a6e1   Nick Piggin   [PATCH] mm: never...
230
  		spin_lock_irqsave(&zone->lru_lock, flags);
8d438f96d   Nick Piggin   [PATCH] mm: PageL...
231
  		BUG_ON(!PageLRU(page));
674539115   Nick Piggin   [PATCH] mm: less ...
232
  		__ClearPageLRU(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
  		del_page_from_lru(zone, page);
46453a6e1   Nick Piggin   [PATCH] mm: never...
234
235
236
  		spin_unlock_irqrestore(&zone->lru_lock, flags);
  	}
  	free_hot_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  EXPORT_SYMBOL(__page_cache_release);
  
  /*
   * Batched page_cache_release().  Decrement the reference count on all the
   * passed pages.  If it fell to zero then remove the page from the LRU and
   * free it.
   *
   * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
   * for the remainder of the operation.
   *
   * The locking in this function is against shrink_cache(): we recheck the
   * page count inside the lock to see whether shrink_cache grabbed the page
   * via the LRU.  If it did, give up: shrink_cache will free it.
   */
  void release_pages(struct page **pages, int nr, int cold)
  {
  	int i;
  	struct pagevec pages_to_free;
  	struct zone *zone = NULL;
  
  	pagevec_init(&pages_to_free, cold);
  	for (i = 0; i < nr; i++) {
  		struct page *page = pages[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
261

8519fb30e   Nick Piggin   [PATCH] mm: compo...
262
263
264
265
266
267
268
269
  		if (unlikely(PageCompound(page))) {
  			if (zone) {
  				spin_unlock_irq(&zone->lru_lock);
  				zone = NULL;
  			}
  			put_compound_page(page);
  			continue;
  		}
b5810039a   Nick Piggin   [PATCH] core remo...
270
  		if (!put_page_testzero(page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
  			continue;
46453a6e1   Nick Piggin   [PATCH] mm: never...
272
273
274
275
276
277
278
279
  		if (PageLRU(page)) {
  			struct zone *pagezone = page_zone(page);
  			if (pagezone != zone) {
  				if (zone)
  					spin_unlock_irq(&zone->lru_lock);
  				zone = pagezone;
  				spin_lock_irq(&zone->lru_lock);
  			}
8d438f96d   Nick Piggin   [PATCH] mm: PageL...
280
  			BUG_ON(!PageLRU(page));
674539115   Nick Piggin   [PATCH] mm: less ...
281
  			__ClearPageLRU(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
  			del_page_from_lru(zone, page);
46453a6e1   Nick Piggin   [PATCH] mm: never...
283
284
285
286
  		}
  
  		if (!pagevec_add(&pages_to_free, page)) {
  			if (zone) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
  				spin_unlock_irq(&zone->lru_lock);
46453a6e1   Nick Piggin   [PATCH] mm: never...
288
  				zone = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
  			}
46453a6e1   Nick Piggin   [PATCH] mm: never...
290
291
292
  			__pagevec_free(&pages_to_free);
  			pagevec_reinit(&pages_to_free);
    		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
  	}
  	if (zone)
  		spin_unlock_irq(&zone->lru_lock);
  
  	pagevec_free(&pages_to_free);
  }
  
  /*
   * The pages which we're about to release may be in the deferred lru-addition
   * queues.  That would prevent them from really being freed right now.  That's
   * OK from a correctness point of view but is inefficient - those pages may be
   * cache-warm and we want to give them back to the page allocator ASAP.
   *
   * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
   * and __pagevec_lru_add_active() call release_pages() directly to avoid
   * mutual recursion.
   */
  void __pagevec_release(struct pagevec *pvec)
  {
  	lru_add_drain();
  	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
  	pagevec_reinit(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
316
  EXPORT_SYMBOL(__pagevec_release);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317
318
319
320
321
322
323
324
325
326
327
  /*
   * pagevec_release() for pages which are known to not be on the LRU
   *
   * This function reinitialises the caller's pagevec.
   */
  void __pagevec_release_nonlru(struct pagevec *pvec)
  {
  	int i;
  	struct pagevec pages_to_free;
  
  	pagevec_init(&pages_to_free, pvec->cold);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  
  		BUG_ON(PageLRU(page));
  		if (put_page_testzero(page))
  			pagevec_add(&pages_to_free, page);
  	}
  	pagevec_free(&pages_to_free);
  	pagevec_reinit(pvec);
  }
  
  /*
   * Add the passed pages to the LRU, then drop the caller's refcount
   * on them.  Reinitialises the caller's pagevec.
   */
  void __pagevec_lru_add(struct pagevec *pvec)
  {
  	int i;
  	struct zone *zone = NULL;
  
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  		struct zone *pagezone = page_zone(page);
  
  		if (pagezone != zone) {
  			if (zone)
  				spin_unlock_irq(&zone->lru_lock);
  			zone = pagezone;
  			spin_lock_irq(&zone->lru_lock);
  		}
8d438f96d   Nick Piggin   [PATCH] mm: PageL...
358
359
  		BUG_ON(PageLRU(page));
  		SetPageLRU(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
  		add_page_to_inactive_list(zone, page);
  	}
  	if (zone)
  		spin_unlock_irq(&zone->lru_lock);
  	release_pages(pvec->pages, pvec->nr, pvec->cold);
  	pagevec_reinit(pvec);
  }
  
  EXPORT_SYMBOL(__pagevec_lru_add);
  
  void __pagevec_lru_add_active(struct pagevec *pvec)
  {
  	int i;
  	struct zone *zone = NULL;
  
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  		struct zone *pagezone = page_zone(page);
  
  		if (pagezone != zone) {
  			if (zone)
  				spin_unlock_irq(&zone->lru_lock);
  			zone = pagezone;
  			spin_lock_irq(&zone->lru_lock);
  		}
8d438f96d   Nick Piggin   [PATCH] mm: PageL...
385
386
  		BUG_ON(PageLRU(page));
  		SetPageLRU(page);
4c84cacfa   Nick Piggin   [PATCH] mm: PageA...
387
388
  		BUG_ON(PageActive(page));
  		SetPageActive(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
  		add_page_to_active_list(zone, page);
  	}
  	if (zone)
  		spin_unlock_irq(&zone->lru_lock);
  	release_pages(pvec->pages, pvec->nr, pvec->cold);
  	pagevec_reinit(pvec);
  }
  
  /*
   * Try to drop buffers from the pages in a pagevec
   */
  void pagevec_strip(struct pagevec *pvec)
  {
  	int i;
  
  	for (i = 0; i < pagevec_count(pvec); i++) {
  		struct page *page = pvec->pages[i];
  
  		if (PagePrivate(page) && !TestSetPageLocked(page)) {
5b40dc780   Christoph Lameter   [PATCH] fix race ...
408
409
  			if (PagePrivate(page))
  				try_to_release_page(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
  			unlock_page(page);
  		}
  	}
  }
  
  /**
   * pagevec_lookup - gang pagecache lookup
   * @pvec:	Where the resulting pages are placed
   * @mapping:	The address_space to search
   * @start:	The starting page index
   * @nr_pages:	The maximum number of pages
   *
   * pagevec_lookup() will search for and return a group of up to @nr_pages pages
   * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
   * reference against the pages in @pvec.
   *
   * The search returns a group of mapping-contiguous pages with ascending
   * indexes.  There may be holes in the indices due to not-present pages.
   *
   * pagevec_lookup() returns the number of pages which were found.
   */
  unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t start, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
78539fdfa   Christoph Hellwig   [XFS] Export page...
437
  EXPORT_SYMBOL(pagevec_lookup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
439
440
441
442
443
444
  unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
  		pgoff_t *index, int tag, unsigned nr_pages)
  {
  	pvec->nr = find_get_pages_tag(mapping, index, tag,
  					nr_pages, pvec->pages);
  	return pagevec_count(pvec);
  }
7f2857018   Steve French   Export __pagevec_...
445
  EXPORT_SYMBOL(pagevec_lookup_tag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  
  #ifdef CONFIG_SMP
  /*
   * We tolerate a little inaccuracy to avoid ping-ponging the counter between
   * CPUs
   */
  #define ACCT_THRESHOLD	max(16, NR_CPUS * 2)
  
  static DEFINE_PER_CPU(long, committed_space) = 0;
  
  void vm_acct_memory(long pages)
  {
  	long *local;
  
  	preempt_disable();
  	local = &__get_cpu_var(committed_space);
  	*local += pages;
  	if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
  		atomic_add(*local, &vm_committed_space);
  		*local = 0;
  	}
  	preempt_enable();
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469
470
  
  #ifdef CONFIG_HOTPLUG_CPU
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
472
473
474
475
476
477
478
479
480
481
482
  
  /* Drop the CPU's cached committed space back into the central pool. */
  static int cpu_swap_callback(struct notifier_block *nfb,
  			     unsigned long action,
  			     void *hcpu)
  {
  	long *committed;
  
  	committed = &per_cpu(committed_space, (long)hcpu);
  	if (action == CPU_DEAD) {
  		atomic_add(*committed, &vm_committed_space);
  		*committed = 0;
80bfed904   Andrew Morton   [PATCH] consolida...
483
  		__lru_add_drain((long)hcpu);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
484
485
486
487
488
  	}
  	return NOTIFY_OK;
  }
  #endif /* CONFIG_HOTPLUG_CPU */
  #endif /* CONFIG_SMP */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
  /*
   * Perform any setup for the swap system
   */
  void __init swap_setup(void)
  {
  	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
  
  	/* Use a smaller cluster for small-memory machines */
  	if (megs < 16)
  		page_cluster = 2;
  	else
  		page_cluster = 3;
  	/*
  	 * Right now other parts of the system means that we
  	 * _really_ don't want to cluster much more
  	 */
  	hotcpu_notifier(cpu_swap_callback, 0);
  }