Blame view
mm/swap.c
24.3 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 |
/* * linux/mm/swap.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ /* |
183ff22bb spelling fixes: mm/ |
8 |
* This file contains the default values for the operation of the |
1da177e4c Linux-2.6.12-rc2 |
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
* Linux VM subsystem. Fine-tuning documentation can be found in * Documentation/sysctl/vm.txt. * Started 18.12.91 * Swap aging added 23.2.95, Stephen Tweedie. * Buffermem limits added 12.3.98, Rik van Riel. */ #include <linux/mm.h> #include <linux/sched.h> #include <linux/kernel_stat.h> #include <linux/swap.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/init.h> |
b95f1b31b mm: Map most file... |
24 |
#include <linux/export.h> |
1da177e4c Linux-2.6.12-rc2 |
25 |
#include <linux/mm_inline.h> |
1da177e4c Linux-2.6.12-rc2 |
26 27 28 29 |
#include <linux/percpu_counter.h> #include <linux/percpu.h> #include <linux/cpu.h> #include <linux/notifier.h> |
e0bf68dde mm: bdi init hooks |
30 |
#include <linux/backing-dev.h> |
66e1707bc Memory controller... |
31 |
#include <linux/memcontrol.h> |
5a0e3ad6a include cleanup: ... |
32 |
#include <linux/gfp.h> |
a27bb332c aio: don't includ... |
33 |
#include <linux/uio.h> |
1da177e4c Linux-2.6.12-rc2 |
34 |
|
64d6519dd swap: cull unevic... |
35 |
#include "internal.h" |
c6286c983 mm: add tracepoin... |
36 37 |
#define CREATE_TRACE_POINTS #include <trace/events/pagemap.h> |
1da177e4c Linux-2.6.12-rc2 |
38 39 |
/* How many pages do we try to swap or page in/out together? */ int page_cluster; |
13f7f7898 mm: pagevec: defe... |
40 |
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); |
f84f9504b mm: remove initia... |
41 |
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); |
315601809 mm: deactivate in... |
42 |
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); |
902aaed0d mm: use pagevec t... |
43 |
|
b221385bc [PATCH] mm/: make... |
44 45 46 47 |
/* * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. */ |
920c7a5d0 mm: remove fastca... |
48 |
static void __page_cache_release(struct page *page) |
b221385bc [PATCH] mm/: make... |
49 50 |
{ if (PageLRU(page)) { |
b221385bc [PATCH] mm/: make... |
51 |
struct zone *zone = page_zone(page); |
fa9add641 mm/memcg: apply a... |
52 53 |
struct lruvec *lruvec; unsigned long flags; |
b221385bc [PATCH] mm/: make... |
54 55 |
spin_lock_irqsave(&zone->lru_lock, flags); |
fa9add641 mm/memcg: apply a... |
56 |
lruvec = mem_cgroup_page_lruvec(page, zone); |
b221385bc [PATCH] mm/: make... |
57 58 |
VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); |
fa9add641 mm/memcg: apply a... |
59 |
del_page_from_lru_list(page, lruvec, page_off_lru(page)); |
b221385bc [PATCH] mm/: make... |
60 61 |
spin_unlock_irqrestore(&zone->lru_lock, flags); } |
918070634 thp: alter compou... |
62 63 64 65 66 |
} static void __put_single_page(struct page *page) { __page_cache_release(page); |
fc91668ea mm: remove free_h... |
67 |
free_hot_cold_page(page, 0); |
b221385bc [PATCH] mm/: make... |
68 |
} |
918070634 thp: alter compou... |
69 |
static void __put_compound_page(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
70 |
{ |
918070634 thp: alter compou... |
71 |
compound_page_dtor *dtor; |
1da177e4c Linux-2.6.12-rc2 |
72 |
|
918070634 thp: alter compou... |
73 74 75 76 77 78 79 80 81 |
__page_cache_release(page); dtor = get_compound_page_dtor(page); (*dtor)(page); } static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ |
70b50f94f mm: thp: tail pag... |
82 83 84 85 |
struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { |
918070634 thp: alter compou... |
86 |
unsigned long flags; |
5bf5f03c2 mm: fix slab->pag... |
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
/* * THP can not break up slab pages so avoid taking * compound_lock(). Slab performs non-atomic bit ops * on page->flags for better performance. In particular * slab_unlock() in slub used to be a hot path. It is * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ if (PageSlab(page_head)) { if (PageTail(page)) { if (put_page_testzero(page_head)) VM_BUG_ON(1); atomic_dec(&page->_mapcount); goto skip_lock_tail; } else goto skip_lock; } |
918070634 thp: alter compou... |
106 |
/* |
70b50f94f mm: thp: tail pag... |
107 108 109 110 |
* page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. |
918070634 thp: alter compou... |
111 |
*/ |
918070634 thp: alter compou... |
112 113 114 115 |
flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); |
5bf5f03c2 mm: fix slab->pag... |
116 |
skip_lock: |
918070634 thp: alter compou... |
117 118 |
if (put_page_testzero(page_head)) __put_single_page(page_head); |
5bf5f03c2 mm: fix slab->pag... |
119 |
out_put_single: |
918070634 thp: alter compou... |
120 121 122 123 124 125 126 |
if (put_page_testzero(page)) __put_single_page(page); return; } VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by |
70b50f94f mm: thp: tail pag... |
127 128 129 |
* get_page_unless_zero() now that * __split_huge_page_refcount() is blocked on * the compound_lock. |
918070634 thp: alter compou... |
130 131 132 133 |
*/ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ |
70b50f94f mm: thp: tail pag... |
134 135 |
VM_BUG_ON(page_mapcount(page) <= 0); atomic_dec(&page->_mapcount); |
918070634 thp: alter compou... |
136 |
VM_BUG_ON(atomic_read(&page_head->_count) <= 0); |
70b50f94f mm: thp: tail pag... |
137 |
VM_BUG_ON(atomic_read(&page->_count) != 0); |
918070634 thp: alter compou... |
138 |
compound_unlock_irqrestore(page_head, flags); |
5bf5f03c2 mm: fix slab->pag... |
139 140 |
skip_lock_tail: |
a95a82e96 thp: put_page: re... |
141 142 143 144 145 146 |
if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); else __put_single_page(page_head); } |
918070634 thp: alter compou... |
147 148 149 150 151 152 153 154 155 156 |
} else { /* page_head is a dangling pointer */ VM_BUG_ON(PageTail(page)); goto out_put_single; } } else if (put_page_testzero(page)) { if (PageHead(page)) __put_compound_page(page); else __put_single_page(page); |
1da177e4c Linux-2.6.12-rc2 |
157 |
} |
8519fb30e [PATCH] mm: compo... |
158 159 160 161 162 163 164 |
} void put_page(struct page *page) { if (unlikely(PageCompound(page))) put_compound_page(page); else if (put_page_testzero(page)) |
918070634 thp: alter compou... |
165 |
__put_single_page(page); |
1da177e4c Linux-2.6.12-rc2 |
166 167 |
} EXPORT_SYMBOL(put_page); |
1da177e4c Linux-2.6.12-rc2 |
168 |
|
70b50f94f mm: thp: tail pag... |
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
/* * This function is exported but must not be called by anything other * than get_page(). It implements the slow path of get_page(). */ bool __get_page_tail(struct page *page) { /* * This takes care of get_page() if run on a tail page * returned by one of the get_user_pages/follow_page variants. * get_user_pages/follow_page itself doesn't need the compound * lock because it runs __get_page_tail_foll() under the * proper PT lock that already serializes against * split_huge_page(). */ unsigned long flags; bool got = false; struct page *page_head = compound_trans_head(page); if (likely(page != page_head && get_page_unless_zero(page_head))) { |
5bf5f03c2 mm: fix slab->pag... |
188 189 190 191 192 193 194 195 196 197 198 |
/* Ref to put_compound_page() comment. */ if (PageSlab(page_head)) { if (likely(PageTail(page))) { __get_page_tail_foll(page, false); return true; } else { put_page(page_head); return false; } } |
70b50f94f mm: thp: tail pag... |
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
/* * page_head wasn't a dangling pointer but it * may not be a head page anymore by the time * we obtain the lock. That is ok as long as it * can't be freed from under us. */ flags = compound_lock_irqsave(page_head); /* here __split_huge_page_refcount won't run anymore */ if (likely(PageTail(page))) { __get_page_tail_foll(page, false); got = true; } compound_unlock_irqrestore(page_head, flags); if (unlikely(!got)) put_page(page_head); } return got; } EXPORT_SYMBOL(__get_page_tail); |
1d7ea7324 [PATCH] fuse: fix... |
218 |
/** |
7682486b3 mm: fix various k... |
219 220 |
* put_pages_list() - release a list of pages * @pages: list of pages threaded on page->lru |
1d7ea7324 [PATCH] fuse: fix... |
221 222 223 |
* * Release a list of pages which are strung together on page.lru. Currently * used by read_cache_pages() and related error recovery code. |
1d7ea7324 [PATCH] fuse: fix... |
224 225 226 227 228 229 230 231 232 233 234 235 |
*/ void put_pages_list(struct list_head *pages) { while (!list_empty(pages)) { struct page *victim; victim = list_entry(pages->prev, struct page, lru); list_del(&victim->lru); page_cache_release(victim); } } EXPORT_SYMBOL(put_pages_list); |
18022c5d8 mm: add get_kerne... |
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
/* * get_kernel_pages() - pin kernel pages in memory * @kiov: An array of struct kvec structures * @nr_segs: number of segments to pin * @write: pinning for read/write, currently ignored * @pages: array that receives pointers to the pages pinned. * Should be at least nr_segs long. * * Returns number of pages pinned. This may be fewer than the number * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. Each page returned must be released * with a put_page() call when it is finished with. */ int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write, struct page **pages) { int seg; for (seg = 0; seg < nr_segs; seg++) { if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE)) return seg; |
5a178119b mm: add support f... |
257 |
pages[seg] = kmap_to_page(kiov[seg].iov_base); |
18022c5d8 mm: add get_kerne... |
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 |
page_cache_get(pages[seg]); } return seg; } EXPORT_SYMBOL_GPL(get_kernel_pages); /* * get_kernel_page() - pin a kernel page in memory * @start: starting kernel address * @write: pinning for read/write, currently ignored * @pages: array that receives pointer to the page pinned. * Must be at least nr_segs long. * * Returns 1 if page is pinned. If the page was not pinned, returns * -errno. The page returned must be released with a put_page() call * when it is finished with. */ int get_kernel_page(unsigned long start, int write, struct page **pages) { const struct kvec kiov = { .iov_base = (void *)start, .iov_len = PAGE_SIZE }; return get_kernel_pages(&kiov, 1, write, pages); } EXPORT_SYMBOL_GPL(get_kernel_page); |
3dd7ae8ec mm: simplify code... |
286 |
static void pagevec_lru_move_fn(struct pagevec *pvec, |
fa9add641 mm/memcg: apply a... |
287 288 |
void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), void *arg) |
902aaed0d mm: use pagevec t... |
289 290 |
{ int i; |
902aaed0d mm: use pagevec t... |
291 |
struct zone *zone = NULL; |
fa9add641 mm/memcg: apply a... |
292 |
struct lruvec *lruvec; |
3dd7ae8ec mm: simplify code... |
293 |
unsigned long flags = 0; |
902aaed0d mm: use pagevec t... |
294 295 296 297 298 299 300 |
for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; struct zone *pagezone = page_zone(page); if (pagezone != zone) { if (zone) |
3dd7ae8ec mm: simplify code... |
301 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
902aaed0d mm: use pagevec t... |
302 |
zone = pagezone; |
3dd7ae8ec mm: simplify code... |
303 |
spin_lock_irqsave(&zone->lru_lock, flags); |
902aaed0d mm: use pagevec t... |
304 |
} |
3dd7ae8ec mm: simplify code... |
305 |
|
fa9add641 mm/memcg: apply a... |
306 307 |
lruvec = mem_cgroup_page_lruvec(page, zone); (*move_fn)(page, lruvec, arg); |
902aaed0d mm: use pagevec t... |
308 309 |
} if (zone) |
3dd7ae8ec mm: simplify code... |
310 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
83896fb5e Revert "mm: simpl... |
311 312 |
release_pages(pvec->pages, pvec->nr, pvec->cold); pagevec_reinit(pvec); |
d8505dee1 mm: simplify code... |
313 |
} |
fa9add641 mm/memcg: apply a... |
314 315 |
static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, void *arg) |
3dd7ae8ec mm: simplify code... |
316 317 |
{ int *pgmoved = arg; |
3dd7ae8ec mm: simplify code... |
318 319 320 |
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { enum lru_list lru = page_lru_base_type(page); |
925b7673c mm: make per-memc... |
321 |
list_move_tail(&page->lru, &lruvec->lists[lru]); |
3dd7ae8ec mm: simplify code... |
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
(*pgmoved)++; } } /* * pagevec_move_tail() must be called with IRQ disabled. * Otherwise this may cause nasty races. */ static void pagevec_move_tail(struct pagevec *pvec) { int pgmoved = 0; pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved); __count_vm_events(PGROTATED, pgmoved); } |
902aaed0d mm: use pagevec t... |
337 |
/* |
1da177e4c Linux-2.6.12-rc2 |
338 339 |
* Writeback is about to end against a page which has been marked for immediate * reclaim. If it still appears to be reclaimable, move it to the tail of the |
902aaed0d mm: use pagevec t... |
340 |
* inactive list. |
1da177e4c Linux-2.6.12-rc2 |
341 |
*/ |
3dd7ae8ec mm: simplify code... |
342 |
void rotate_reclaimable_page(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
343 |
{ |
ac6aadb24 mm: rotate_reclai... |
344 |
if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && |
894bc3104 Unevictable LRU I... |
345 |
!PageUnevictable(page) && PageLRU(page)) { |
ac6aadb24 mm: rotate_reclai... |
346 347 348 349 350 351 352 353 354 355 |
struct pagevec *pvec; unsigned long flags; page_cache_get(page); local_irq_save(flags); pvec = &__get_cpu_var(lru_rotate_pvecs); if (!pagevec_add(pvec, page)) pagevec_move_tail(pvec); local_irq_restore(flags); } |
1da177e4c Linux-2.6.12-rc2 |
356 |
} |
fa9add641 mm/memcg: apply a... |
357 |
static void update_page_reclaim_stat(struct lruvec *lruvec, |
3e2f41f1f memcg: add zone_r... |
358 359 |
int file, int rotated) { |
fa9add641 mm/memcg: apply a... |
360 |
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
3e2f41f1f memcg: add zone_r... |
361 362 363 364 |
reclaim_stat->recent_scanned[file]++; if (rotated) reclaim_stat->recent_rotated[file]++; |
3e2f41f1f memcg: add zone_r... |
365 |
} |
fa9add641 mm/memcg: apply a... |
366 367 |
static void __activate_page(struct page *page, struct lruvec *lruvec, void *arg) |
1da177e4c Linux-2.6.12-rc2 |
368 |
{ |
744ed1442 mm: batch activat... |
369 |
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { |
7a608572a Revert "mm: batch... |
370 371 |
int file = page_is_file_cache(page); int lru = page_lru_base_type(page); |
744ed1442 mm: batch activat... |
372 |
|
fa9add641 mm/memcg: apply a... |
373 |
del_page_from_lru_list(page, lruvec, lru); |
7a608572a Revert "mm: batch... |
374 375 |
SetPageActive(page); lru += LRU_ACTIVE; |
fa9add641 mm/memcg: apply a... |
376 |
add_page_to_lru_list(page, lruvec, lru); |
c6286c983 mm: add tracepoin... |
377 |
trace_mm_lru_activate(page, page_to_pfn(page)); |
4f98a2fee vmscan: split LRU... |
378 |
|
fa9add641 mm/memcg: apply a... |
379 380 |
__count_vm_event(PGACTIVATE); update_page_reclaim_stat(lruvec, file, 1); |
1da177e4c Linux-2.6.12-rc2 |
381 |
} |
eb709b0d0 mm: batch activat... |
382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 |
} #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); static void activate_page_drain(int cpu) { struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); if (pagevec_count(pvec)) pagevec_lru_move_fn(pvec, __activate_page, NULL); } void activate_page(struct page *page) { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); page_cache_get(page); if (!pagevec_add(pvec, page)) pagevec_lru_move_fn(pvec, __activate_page, NULL); put_cpu_var(activate_page_pvecs); } } #else static inline void activate_page_drain(int cpu) { } void activate_page(struct page *page) { struct zone *zone = page_zone(page); spin_lock_irq(&zone->lru_lock); |
fa9add641 mm/memcg: apply a... |
417 |
__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL); |
1da177e4c Linux-2.6.12-rc2 |
418 419 |
spin_unlock_irq(&zone->lru_lock); } |
eb709b0d0 mm: batch activat... |
420 |
#endif |
1da177e4c Linux-2.6.12-rc2 |
421 |
|
059285a25 mm: activate !Pag... |
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 |
static void __lru_cache_activate_page(struct page *page) { struct pagevec *pvec = &get_cpu_var(lru_add_pvec); int i; /* * Search backwards on the optimistic assumption that the page being * activated has just been added to this pagevec. Note that only * the local pagevec is examined as a !PageLRU page could be in the * process of being released, reclaimed, migrated or on a remote * pagevec that is currently being drained. Furthermore, marking * a remote pagevec's page PageActive potentially hits a race where * a page is marked PageActive just after it is added to the inactive * list causing accounting errors and BUG_ON checks to trigger. */ for (i = pagevec_count(pvec) - 1; i >= 0; i--) { struct page *pagevec_page = pvec->pages[i]; if (pagevec_page == page) { SetPageActive(page); break; } } put_cpu_var(lru_add_pvec); } |
1da177e4c Linux-2.6.12-rc2 |
448 449 450 451 452 453 454 |
/* * Mark a page as having seen activity. * * inactive,unreferenced -> inactive,referenced * inactive,referenced -> active,unreferenced * active,unreferenced -> active,referenced */ |
920c7a5d0 mm: remove fastca... |
455 |
void mark_page_accessed(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
456 |
{ |
894bc3104 Unevictable LRU I... |
457 |
if (!PageActive(page) && !PageUnevictable(page) && |
059285a25 mm: activate !Pag... |
458 459 460 461 462 463 464 465 466 467 468 469 |
PageReferenced(page)) { /* * If the page is on the LRU, queue it for activation via * activate_page_pvecs. Otherwise, assume the page is on a * pagevec, mark it active and it'll be moved to the active * LRU on the next drain. */ if (PageLRU(page)) activate_page(page); else __lru_cache_activate_page(page); |
1da177e4c Linux-2.6.12-rc2 |
470 471 472 473 474 |
ClearPageReferenced(page); } else if (!PageReferenced(page)) { SetPageReferenced(page); } } |
1da177e4c Linux-2.6.12-rc2 |
475 |
EXPORT_SYMBOL(mark_page_accessed); |
d741c9cde mm: fix nonunifor... |
476 |
/* |
13f7f7898 mm: pagevec: defe... |
477 478 479 480 |
* Queue the page for addition to the LRU via pagevec. The decision on whether * to add the page to the [in]active [file|anon] list is deferred until the * pagevec is drained. This gives a chance for the caller of __lru_cache_add() * have the page added to the active list using mark_page_accessed(). |
d741c9cde mm: fix nonunifor... |
481 |
*/ |
c53954a09 mm: remove lru pa... |
482 |
void __lru_cache_add(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
483 |
{ |
13f7f7898 mm: pagevec: defe... |
484 |
struct pagevec *pvec = &get_cpu_var(lru_add_pvec); |
1da177e4c Linux-2.6.12-rc2 |
485 |
page_cache_get(page); |
d741c9cde mm: fix nonunifor... |
486 |
if (!pagevec_space(pvec)) |
a0b8cab3b mm: remove lru pa... |
487 |
__pagevec_lru_add(pvec); |
d741c9cde mm: fix nonunifor... |
488 |
pagevec_add(pvec, page); |
13f7f7898 mm: pagevec: defe... |
489 |
put_cpu_var(lru_add_pvec); |
1da177e4c Linux-2.6.12-rc2 |
490 |
} |
47846b065 mm: export lru_ca... |
491 |
EXPORT_SYMBOL(__lru_cache_add); |
1da177e4c Linux-2.6.12-rc2 |
492 |
|
f04e9ebbe swap: use an arra... |
493 |
/** |
c53954a09 mm: remove lru pa... |
494 |
* lru_cache_add - add a page to a page list |
f04e9ebbe swap: use an arra... |
495 |
* @page: the page to be added to the LRU. |
f04e9ebbe swap: use an arra... |
496 |
*/ |
c53954a09 mm: remove lru pa... |
497 |
void lru_cache_add(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
498 |
{ |
f04e9ebbe swap: use an arra... |
499 |
if (PageActive(page)) { |
894bc3104 Unevictable LRU I... |
500 |
VM_BUG_ON(PageUnevictable(page)); |
894bc3104 Unevictable LRU I... |
501 502 |
} else if (PageUnevictable(page)) { VM_BUG_ON(PageActive(page)); |
f04e9ebbe swap: use an arra... |
503 |
} |
1da177e4c Linux-2.6.12-rc2 |
504 |
|
13f7f7898 mm: pagevec: defe... |
505 |
VM_BUG_ON(PageLRU(page)); |
c53954a09 mm: remove lru pa... |
506 |
__lru_cache_add(page); |
1da177e4c Linux-2.6.12-rc2 |
507 |
} |
894bc3104 Unevictable LRU I... |
508 509 510 511 512 513 514 515 516 517 518 519 520 |
/** * add_page_to_unevictable_list - add a page to the unevictable list * @page: the page to be added to the unevictable list * * Add page directly to its zone's unevictable list. To avoid races with * tasks that might be making the page evictable, through eg. munlock, * munmap or exit, while it's not on the lru, we want to add the page * while it's locked or otherwise "invisible" to other tasks. This is * difficult to do when using the pagevec cache, so bypass that. */ void add_page_to_unevictable_list(struct page *page) { struct zone *zone = page_zone(page); |
fa9add641 mm/memcg: apply a... |
521 |
struct lruvec *lruvec; |
894bc3104 Unevictable LRU I... |
522 523 |
spin_lock_irq(&zone->lru_lock); |
fa9add641 mm/memcg: apply a... |
524 |
lruvec = mem_cgroup_page_lruvec(page, zone); |
894bc3104 Unevictable LRU I... |
525 526 |
SetPageUnevictable(page); SetPageLRU(page); |
fa9add641 mm/memcg: apply a... |
527 |
add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); |
894bc3104 Unevictable LRU I... |
528 529 |
spin_unlock_irq(&zone->lru_lock); } |
902aaed0d mm: use pagevec t... |
530 |
/* |
315601809 mm: deactivate in... |
531 532 533 534 535 |
* If the page can not be invalidated, it is moved to the * inactive list to speed up its reclaim. It is moved to the * head of the list, rather than the tail, to give the flusher * threads some time to write it out, as this is much more * effective than the single-page writeout from reclaim. |
278df9f45 mm: reclaim inval... |
536 537 538 539 540 541 542 543 544 545 546 547 548 549 |
* * If the page isn't page_mapped and dirty/writeback, the page * could reclaim asap using PG_reclaim. * * 1. active, mapped page -> none * 2. active, dirty/writeback page -> inactive, head, PG_reclaim * 3. inactive, mapped page -> none * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim * 5. inactive, clean -> inactive, tail * 6. Others -> none * * In 4, why it moves inactive's head, the VM expects the page would * be write it out by flusher threads as this is much more effective * than the single-page writeout from reclaim. |
315601809 mm: deactivate in... |
550 |
*/ |
fa9add641 mm/memcg: apply a... |
551 552 |
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, void *arg) |
315601809 mm: deactivate in... |
553 554 |
{ int lru, file; |
278df9f45 mm: reclaim inval... |
555 |
bool active; |
315601809 mm: deactivate in... |
556 |
|
278df9f45 mm: reclaim inval... |
557 |
if (!PageLRU(page)) |
315601809 mm: deactivate in... |
558 |
return; |
bad49d9c8 mm: check PageUne... |
559 560 |
if (PageUnevictable(page)) return; |
315601809 mm: deactivate in... |
561 562 563 |
/* Some processes are using the page */ if (page_mapped(page)) return; |
278df9f45 mm: reclaim inval... |
564 |
active = PageActive(page); |
315601809 mm: deactivate in... |
565 566 |
file = page_is_file_cache(page); lru = page_lru_base_type(page); |
fa9add641 mm/memcg: apply a... |
567 568 |
del_page_from_lru_list(page, lruvec, lru + active); |
315601809 mm: deactivate in... |
569 570 |
ClearPageActive(page); ClearPageReferenced(page); |
fa9add641 mm/memcg: apply a... |
571 |
add_page_to_lru_list(page, lruvec, lru); |
315601809 mm: deactivate in... |
572 |
|
278df9f45 mm: reclaim inval... |
573 574 575 576 577 578 579 580 581 582 583 584 |
if (PageWriteback(page) || PageDirty(page)) { /* * PG_reclaim could be raced with end_page_writeback * It can make readahead confusing. But race window * is _really_ small and it's non-critical problem. */ SetPageReclaim(page); } else { /* * The page's writeback ends up during pagevec * We moves tha page into tail of inactive. */ |
925b7673c mm: make per-memc... |
585 |
list_move_tail(&page->lru, &lruvec->lists[lru]); |
278df9f45 mm: reclaim inval... |
586 587 588 589 590 |
__count_vm_event(PGROTATED); } if (active) __count_vm_event(PGDEACTIVATE); |
fa9add641 mm/memcg: apply a... |
591 |
update_page_reclaim_stat(lruvec, file, 0); |
315601809 mm: deactivate in... |
592 |
} |
315601809 mm: deactivate in... |
593 |
/* |
902aaed0d mm: use pagevec t... |
594 595 596 597 |
* Drain pages out of the cpu's pagevecs. * Either "cpu" is the current CPU, and preemption has already been * disabled; or "cpu" is being hot-unplugged, and is already dead. */ |
f0cb3c76a mm: drain percpu ... |
598 |
void lru_add_drain_cpu(int cpu) |
1da177e4c Linux-2.6.12-rc2 |
599 |
{ |
13f7f7898 mm: pagevec: defe... |
600 |
struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu); |
1da177e4c Linux-2.6.12-rc2 |
601 |
|
13f7f7898 mm: pagevec: defe... |
602 |
if (pagevec_count(pvec)) |
a0b8cab3b mm: remove lru pa... |
603 |
__pagevec_lru_add(pvec); |
902aaed0d mm: use pagevec t... |
604 605 606 607 608 609 610 611 612 613 |
pvec = &per_cpu(lru_rotate_pvecs, cpu); if (pagevec_count(pvec)) { unsigned long flags; /* No harm done if a racing interrupt already did this */ local_irq_save(flags); pagevec_move_tail(pvec); local_irq_restore(flags); } |
315601809 mm: deactivate in... |
614 615 616 |
pvec = &per_cpu(lru_deactivate_pvecs, cpu); if (pagevec_count(pvec)) |
3dd7ae8ec mm: simplify code... |
617 |
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); |
eb709b0d0 mm: batch activat... |
618 619 |
activate_page_drain(cpu); |
315601809 mm: deactivate in... |
620 621 622 623 624 625 626 627 628 629 630 631 |
} /** * deactivate_page - forcefully deactivate a page * @page: page to deactivate * * This function hints the VM that @page is a good reclaim candidate, * for example if its invalidation fails due to the page being dirty * or under writeback. */ void deactivate_page(struct page *page) { |
821ed6bbe mm: filter unevic... |
632 633 634 635 636 637 |
/* * In a workload with many unevictable page such as mprotect, unevictable * page deactivation for accelerating reclaim is pointless. */ if (PageUnevictable(page)) return; |
315601809 mm: deactivate in... |
638 639 640 641 |
if (likely(get_page_unless_zero(page))) { struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); if (!pagevec_add(pvec, page)) |
3dd7ae8ec mm: simplify code... |
642 |
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); |
315601809 mm: deactivate in... |
643 644 |
put_cpu_var(lru_deactivate_pvecs); } |
80bfed904 [PATCH] consolida... |
645 646 647 648 |
} void lru_add_drain(void) { |
f0cb3c76a mm: drain percpu ... |
649 |
lru_add_drain_cpu(get_cpu()); |
80bfed904 [PATCH] consolida... |
650 |
put_cpu(); |
1da177e4c Linux-2.6.12-rc2 |
651 |
} |
c4028958b WorkStruct: make ... |
652 |
static void lru_add_drain_per_cpu(struct work_struct *dummy) |
053837fce [PATCH] mm: migra... |
653 654 655 656 657 658 659 660 661 |
{ lru_add_drain(); } /* * Returns 0 for success */ int lru_add_drain_all(void) { |
c4028958b WorkStruct: make ... |
662 |
return schedule_on_each_cpu(lru_add_drain_per_cpu); |
053837fce [PATCH] mm: migra... |
663 |
} |
1da177e4c Linux-2.6.12-rc2 |
664 |
/* |
1da177e4c Linux-2.6.12-rc2 |
665 666 667 668 669 670 671 |
* Batched page_cache_release(). Decrement the reference count on all the * passed pages. If it fell to zero then remove the page from the LRU and * free it. * * Avoid taking zone->lru_lock if possible, but if it is taken, retain it * for the remainder of the operation. * |
ab33dc09a swap: update func... |
672 673 674 675 |
* The locking in this function is against shrink_inactive_list(): we recheck * the page count inside the lock to see whether shrink_inactive_list() * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() * will free it. |
1da177e4c Linux-2.6.12-rc2 |
676 677 678 679 |
*/ void release_pages(struct page **pages, int nr, int cold) { int i; |
cc59850ef mm: add free_hot_... |
680 |
LIST_HEAD(pages_to_free); |
1da177e4c Linux-2.6.12-rc2 |
681 |
struct zone *zone = NULL; |
fa9add641 mm/memcg: apply a... |
682 |
struct lruvec *lruvec; |
902aaed0d mm: use pagevec t... |
683 |
unsigned long uninitialized_var(flags); |
1da177e4c Linux-2.6.12-rc2 |
684 |
|
1da177e4c Linux-2.6.12-rc2 |
685 686 |
for (i = 0; i < nr; i++) { struct page *page = pages[i]; |
1da177e4c Linux-2.6.12-rc2 |
687 |
|
8519fb30e [PATCH] mm: compo... |
688 689 |
if (unlikely(PageCompound(page))) { if (zone) { |
902aaed0d mm: use pagevec t... |
690 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
8519fb30e [PATCH] mm: compo... |
691 692 693 694 695 |
zone = NULL; } put_compound_page(page); continue; } |
b5810039a [PATCH] core remo... |
696 |
if (!put_page_testzero(page)) |
1da177e4c Linux-2.6.12-rc2 |
697 |
continue; |
46453a6e1 [PATCH] mm: never... |
698 699 |
if (PageLRU(page)) { struct zone *pagezone = page_zone(page); |
894bc3104 Unevictable LRU I... |
700 |
|
46453a6e1 [PATCH] mm: never... |
701 702 |
if (pagezone != zone) { if (zone) |
902aaed0d mm: use pagevec t... |
703 704 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
46453a6e1 [PATCH] mm: never... |
705 |
zone = pagezone; |
902aaed0d mm: use pagevec t... |
706 |
spin_lock_irqsave(&zone->lru_lock, flags); |
46453a6e1 [PATCH] mm: never... |
707 |
} |
fa9add641 mm/memcg: apply a... |
708 709 |
lruvec = mem_cgroup_page_lruvec(page, zone); |
725d704ec [PATCH] mm: VM_BU... |
710 |
VM_BUG_ON(!PageLRU(page)); |
674539115 [PATCH] mm: less ... |
711 |
__ClearPageLRU(page); |
fa9add641 mm/memcg: apply a... |
712 |
del_page_from_lru_list(page, lruvec, page_off_lru(page)); |
46453a6e1 [PATCH] mm: never... |
713 |
} |
c53954a09 mm: remove lru pa... |
714 715 |
/* Clear Active bit in case of parallel mark_page_accessed */ ClearPageActive(page); |
cc59850ef mm: add free_hot_... |
716 |
list_add(&page->lru, &pages_to_free); |
1da177e4c Linux-2.6.12-rc2 |
717 718 |
} if (zone) |
902aaed0d mm: use pagevec t... |
719 |
spin_unlock_irqrestore(&zone->lru_lock, flags); |
1da177e4c Linux-2.6.12-rc2 |
720 |
|
cc59850ef mm: add free_hot_... |
721 |
free_hot_cold_page_list(&pages_to_free, cold); |
1da177e4c Linux-2.6.12-rc2 |
722 |
} |
0be8557bc fuse: use release... |
723 |
EXPORT_SYMBOL(release_pages); |
1da177e4c Linux-2.6.12-rc2 |
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 |
/* * The pages which we're about to release may be in the deferred lru-addition * queues. That would prevent them from really being freed right now. That's * OK from a correctness point of view but is inefficient - those pages may be * cache-warm and we want to give them back to the page allocator ASAP. * * So __pagevec_release() will drain those queues here. __pagevec_lru_add() * and __pagevec_lru_add_active() call release_pages() directly to avoid * mutual recursion. */ void __pagevec_release(struct pagevec *pvec) { lru_add_drain(); release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); pagevec_reinit(pvec); } |
7f2857018 Export __pagevec_... |
741 |
EXPORT_SYMBOL(__pagevec_release); |
12d271078 memcg: fix split_... |
742 |
#ifdef CONFIG_TRANSPARENT_HUGEPAGE |
71e3aac07 thp: transparent ... |
743 |
/* used by __split_huge_page_refcount() */ |
fa9add641 mm/memcg: apply a... |
744 |
void lru_add_page_tail(struct page *page, struct page *page_tail, |
5bc7b8aca mm: thp: add spli... |
745 |
struct lruvec *lruvec, struct list_head *list) |
71e3aac07 thp: transparent ... |
746 |
{ |
7512102cf memcg: fix GPF wh... |
747 |
int uninitialized_var(active); |
71e3aac07 thp: transparent ... |
748 749 |
enum lru_list lru; const int file = 0; |
71e3aac07 thp: transparent ... |
750 751 752 753 |
VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); |
fa9add641 mm/memcg: apply a... |
754 755 |
VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); |
71e3aac07 thp: transparent ... |
756 |
|
5bc7b8aca mm: thp: add spli... |
757 758 |
if (!list) SetPageLRU(page_tail); |
71e3aac07 thp: transparent ... |
759 |
|
39b5f29ac mm: remove vma ar... |
760 |
if (page_evictable(page_tail)) { |
71e3aac07 thp: transparent ... |
761 762 763 764 765 766 767 768 |
if (PageActive(page)) { SetPageActive(page_tail); active = 1; lru = LRU_ACTIVE_ANON; } else { active = 0; lru = LRU_INACTIVE_ANON; } |
71e3aac07 thp: transparent ... |
769 770 |
} else { SetPageUnevictable(page_tail); |
12d271078 memcg: fix split_... |
771 772 773 774 775 |
lru = LRU_UNEVICTABLE; } if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); |
5bc7b8aca mm: thp: add spli... |
776 777 778 779 780 |
else if (list) { /* page reclaim is reclaiming a huge page */ get_page(page_tail); list_add_tail(&page_tail->lru, list); } else { |
12d271078 memcg: fix split_... |
781 782 783 784 785 786 787 788 |
struct list_head *list_head; /* * Head page has not yet been counted, as an hpage, * so we must account for each subpage individually. * * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ |
fa9add641 mm/memcg: apply a... |
789 |
add_page_to_lru_list(page_tail, lruvec, lru); |
12d271078 memcg: fix split_... |
790 791 |
list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); |
71e3aac07 thp: transparent ... |
792 |
} |
7512102cf memcg: fix GPF wh... |
793 794 |
if (!PageUnevictable(page)) |
fa9add641 mm/memcg: apply a... |
795 |
update_page_reclaim_stat(lruvec, file, active); |
71e3aac07 thp: transparent ... |
796 |
} |
12d271078 memcg: fix split_... |
797 |
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
71e3aac07 thp: transparent ... |
798 |
|
fa9add641 mm/memcg: apply a... |
799 800 |
static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, void *arg) |
3dd7ae8ec mm: simplify code... |
801 |
{ |
13f7f7898 mm: pagevec: defe... |
802 803 804 |
int file = page_is_file_cache(page); int active = PageActive(page); enum lru_list lru = page_lru(page); |
3dd7ae8ec mm: simplify code... |
805 |
|
3dd7ae8ec mm: simplify code... |
806 807 808 809 |
VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); |
fa9add641 mm/memcg: apply a... |
810 811 |
add_page_to_lru_list(page, lruvec, lru); update_page_reclaim_stat(lruvec, file, active); |
c6286c983 mm: add tracepoin... |
812 |
trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); |
3dd7ae8ec mm: simplify code... |
813 |
} |
1da177e4c Linux-2.6.12-rc2 |
814 |
/* |
1da177e4c Linux-2.6.12-rc2 |
815 816 817 |
* Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ |
a0b8cab3b mm: remove lru pa... |
818 |
void __pagevec_lru_add(struct pagevec *pvec) |
1da177e4c Linux-2.6.12-rc2 |
819 |
{ |
a0b8cab3b mm: remove lru pa... |
820 |
pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL); |
1da177e4c Linux-2.6.12-rc2 |
821 |
} |
5095ae837 mm: fewer undersc... |
822 |
EXPORT_SYMBOL(__pagevec_lru_add); |
1da177e4c Linux-2.6.12-rc2 |
823 |
|
1da177e4c Linux-2.6.12-rc2 |
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 |
/** * pagevec_lookup - gang pagecache lookup * @pvec: Where the resulting pages are placed * @mapping: The address_space to search * @start: The starting page index * @nr_pages: The maximum number of pages * * pagevec_lookup() will search for and return a group of up to @nr_pages pages * in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a * reference against the pages in @pvec. * * The search returns a group of mapping-contiguous pages with ascending * indexes. There may be holes in the indices due to not-present pages. * * pagevec_lookup() returns the number of pages which were found. */ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages) { pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages); return pagevec_count(pvec); } |
78539fdfa [XFS] Export page... |
846 |
EXPORT_SYMBOL(pagevec_lookup); |
1da177e4c Linux-2.6.12-rc2 |
847 848 849 850 851 852 853 |
unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, int tag, unsigned nr_pages) { pvec->nr = find_get_pages_tag(mapping, index, tag, nr_pages, pvec->pages); return pagevec_count(pvec); } |
7f2857018 Export __pagevec_... |
854 |
EXPORT_SYMBOL(pagevec_lookup_tag); |
1da177e4c Linux-2.6.12-rc2 |
855 |
|
1da177e4c Linux-2.6.12-rc2 |
856 857 858 859 860 |
/* * Perform any setup for the swap system */ void __init swap_setup(void) { |
4481374ce mm: replace vario... |
861 |
unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); |
e0bf68dde mm: bdi init hooks |
862 |
#ifdef CONFIG_SWAP |
33806f06d swap: make each s... |
863 864 865 866 867 868 869 |
int i; bdi_init(swapper_spaces[0].backing_dev_info); for (i = 0; i < MAX_SWAPFILES; i++) { spin_lock_init(&swapper_spaces[i].tree_lock); INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); } |
e0bf68dde mm: bdi init hooks |
870 |
#endif |
1da177e4c Linux-2.6.12-rc2 |
871 872 873 874 875 876 877 878 879 |
/* Use a smaller cluster for small-memory machines */ if (megs < 16) page_cluster = 2; else page_cluster = 3; /* * Right now other parts of the system means that we * _really_ don't want to cluster much more */ |
1da177e4c Linux-2.6.12-rc2 |
880 |
} |