Commit da6052f7b33abe55fbfd7d2213815f58c00a88d4
Committed by
Linus Torvalds
1 parent
e5ac9c5aec
Exists in
master
and in
4 other branches
[PATCH] update some mm/ comments
Let's try to keep mm/ comments more useful and up to date. This is a start. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 3 changed files with 64 additions and 47 deletions Side-by-side Diff
include/linux/mm.h
... | ... | @@ -219,7 +219,8 @@ |
219 | 219 | * Each physical page in the system has a struct page associated with |
220 | 220 | * it to keep track of whatever it is we are using the page for at the |
221 | 221 | * moment. Note that we have no way to track which tasks are using |
222 | - * a page. | |
222 | + * a page, though if it is a pagecache page, rmap structures can tell us | |
223 | + * who is mapping it. | |
223 | 224 | */ |
224 | 225 | struct page { |
225 | 226 | unsigned long flags; /* Atomic flags, some possibly |
... | ... | @@ -299,8 +300,7 @@ |
299 | 300 | */ |
300 | 301 | |
301 | 302 | /* |
302 | - * Drop a ref, return true if the logical refcount fell to zero (the page has | |
303 | - * no users) | |
303 | + * Drop a ref, return true if the refcount fell to zero (the page has no users) | |
304 | 304 | */ |
305 | 305 | static inline int put_page_testzero(struct page *page) |
306 | 306 | { |
307 | 307 | |
308 | 308 | |
309 | 309 | |
310 | 310 | |
311 | 311 | |
312 | 312 | |
313 | 313 | |
314 | 314 | |
315 | 315 | |
... | ... | @@ -356,43 +356,55 @@ |
356 | 356 | * For the non-reserved pages, page_count(page) denotes a reference count. |
357 | 357 | * page_count() == 0 means the page is free. page->lru is then used for |
358 | 358 | * freelist management in the buddy allocator. |
359 | - * page_count() == 1 means the page is used for exactly one purpose | |
360 | - * (e.g. a private data page of one process). | |
359 | + * page_count() > 0 means the page has been allocated. | |
361 | 360 | * |
362 | - * A page may be used for kmalloc() or anyone else who does a | |
363 | - * __get_free_page(). In this case the page_count() is at least 1, and | |
364 | - * all other fields are unused but should be 0 or NULL. The | |
365 | - * management of this page is the responsibility of the one who uses | |
366 | - * it. | |
361 | + * Pages are allocated by the slab allocator in order to provide memory | |
362 | + * to kmalloc and kmem_cache_alloc. In this case, the management of the | |
363 | + * page, and the fields in 'struct page' are the responsibility of mm/slab.c | |
364 | + * unless a particular usage is carefully commented. (the responsibility of | |
365 | + * freeing the kmalloc memory is the caller's, of course). | |
367 | 366 | * |
368 | - * The other pages (we may call them "process pages") are completely | |
367 | + * A page may be used by anyone else who does a __get_free_page(). | |
368 | + * In this case, page_count still tracks the references, and should only | |
369 | + * be used through the normal accessor functions. The top bits of page->flags | |
370 | + * and page->virtual store page management information, but all other fields | |
371 | + * are unused and could be used privately, carefully. The management of this | |
372 | + * page is the responsibility of the one who allocated it, and those who have | |
373 | + * subsequently been given references to it. | |
374 | + * | |
375 | + * The other pages (we may call them "pagecache pages") are completely | |
369 | 376 | * managed by the Linux memory manager: I/O, buffers, swapping etc. |
370 | 377 | * The following discussion applies only to them. |
371 | 378 | * |
372 | - * A page may belong to an inode's memory mapping. In this case, | |
373 | - * page->mapping is the pointer to the inode, and page->index is the | |
374 | - * file offset of the page, in units of PAGE_CACHE_SIZE. | |
379 | + * A pagecache page contains an opaque `private' member, which belongs to the | |
380 | + * page's address_space. Usually, this is the address of a circular list of | |
381 | + * the page's disk buffers. PG_private must be set to tell the VM to call | |
382 | + * into the filesystem to release these pages. | |
375 | 383 | * |
376 | - * A page contains an opaque `private' member, which belongs to the | |
377 | - * page's address_space. Usually, this is the address of a circular | |
378 | - * list of the page's disk buffers. | |
384 | + * A page may belong to an inode's memory mapping. In this case, page->mapping | |
385 | + * is the pointer to the inode, and page->index is the file offset of the page, | |
386 | + * in units of PAGE_CACHE_SIZE. | |
379 | 387 | * |
380 | - * For pages belonging to inodes, the page_count() is the number of | |
381 | - * attaches, plus 1 if `private' contains something, plus one for | |
382 | - * the page cache itself. | |
388 | + * If pagecache pages are not associated with an inode, they are said to be | |
389 | + * anonymous pages. These may become associated with the swapcache, and in that | |
390 | + * case PG_swapcache is set, and page->private is an offset into the swapcache. | |
383 | 391 | * |
384 | - * Instead of keeping dirty/clean pages in per address-space lists, we instead | |
385 | - * now tag pages as dirty/under writeback in the radix tree. | |
392 | + * In either case (swapcache or inode backed), the pagecache itself holds one | |
393 | + * reference to the page. Setting PG_private should also increment the | |
394 | + * refcount. The each user mapping also has a reference to the page. | |
386 | 395 | * |
387 | - * There is also a per-mapping radix tree mapping index to the page | |
388 | - * in memory if present. The tree is rooted at mapping->root. | |
396 | + * The pagecache pages are stored in a per-mapping radix tree, which is | |
397 | + * rooted at mapping->page_tree, and indexed by offset. | |
398 | + * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space | |
399 | + * lists, we instead now tag pages as dirty/writeback in the radix tree. | |
389 | 400 | * |
390 | - * All process pages can do I/O: | |
401 | + * All pagecache pages may be subject to I/O: | |
391 | 402 | * - inode pages may need to be read from disk, |
392 | 403 | * - inode pages which have been modified and are MAP_SHARED may need |
393 | - * to be written to disk, | |
394 | - * - private pages which have been modified may need to be swapped out | |
395 | - * to swap space and (later) to be read back into memory. | |
404 | + * to be written back to the inode on disk, | |
405 | + * - anonymous pages (including MAP_PRIVATE file mappings) which have been | |
406 | + * modified may need to be swapped out to swap space and (later) to be read | |
407 | + * back into memory. | |
396 | 408 | */ |
397 | 409 | |
398 | 410 | /* |
include/linux/page-flags.h
... | ... | @@ -13,25 +13,26 @@ |
13 | 13 | * PG_reserved is set for special pages, which can never be swapped out. Some |
14 | 14 | * of them might not even exist (eg empty_bad_page)... |
15 | 15 | * |
16 | - * The PG_private bitflag is set if page->private contains a valid value. | |
16 | + * The PG_private bitflag is set on pagecache pages if they contain filesystem | |
17 | + * specific data (which is normally at page->private). It can be used by | |
18 | + * private allocations for its own usage. | |
17 | 19 | * |
18 | - * During disk I/O, PG_locked is used. This bit is set before I/O and | |
19 | - * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks | |
20 | - * waiting for the I/O on this page to complete. | |
20 | + * During initiation of disk I/O, PG_locked is set. This bit is set before I/O | |
21 | + * and cleared when writeback _starts_ or when read _completes_. PG_writeback | |
22 | + * is set before writeback starts and cleared when it finishes. | |
21 | 23 | * |
24 | + * PG_locked also pins a page in pagecache, and blocks truncation of the file | |
25 | + * while it is held. | |
26 | + * | |
27 | + * page_waitqueue(page) is a wait queue of all tasks waiting for the page | |
28 | + * to become unlocked. | |
29 | + * | |
22 | 30 | * PG_uptodate tells whether the page's contents is valid. When a read |
23 | 31 | * completes, the page becomes uptodate, unless a disk I/O error happened. |
24 | 32 | * |
25 | - * For choosing which pages to swap out, inode pages carry a PG_referenced bit, | |
26 | - * which is set any time the system accesses that page through the (mapping, | |
27 | - * index) hash table. This referenced bit, together with the referenced bit | |
28 | - * in the page tables, is used to manipulate page->age and move the page across | |
29 | - * the active, inactive_dirty and inactive_clean lists. | |
33 | + * PG_referenced, PG_reclaim are used for page reclaim for anonymous and | |
34 | + * file-backed pagecache (see mm/vmscan.c). | |
30 | 35 | * |
31 | - * Note that the referenced bit, the page->lru list_head and the active, | |
32 | - * inactive_dirty and inactive_clean lists are protected by the | |
33 | - * zone->lru_lock, and *NOT* by the usual PG_locked bit! | |
34 | - * | |
35 | 36 | * PG_error is set to indicate that an I/O error occurred on this page. |
36 | 37 | * |
37 | 38 | * PG_arch_1 is an architecture specific page state bit. The generic code |
... | ... | @@ -42,6 +43,10 @@ |
42 | 43 | * space, they need to be kmapped separately for doing IO on the pages. The |
43 | 44 | * struct page (these bits with information) are always mapped into kernel |
44 | 45 | * address space... |
46 | + * | |
47 | + * PG_buddy is set to indicate that the page is free and in the buddy system | |
48 | + * (see mm/page_alloc.c). | |
49 | + * | |
45 | 50 | */ |
46 | 51 | |
47 | 52 | /* |
... | ... | @@ -74,7 +79,7 @@ |
74 | 79 | #define PG_checked 8 /* kill me in 2.5.<early>. */ |
75 | 80 | #define PG_arch_1 9 |
76 | 81 | #define PG_reserved 10 |
77 | -#define PG_private 11 /* Has something at ->private */ | |
82 | +#define PG_private 11 /* If pagecache, has fs-private data */ | |
78 | 83 | |
79 | 84 | #define PG_writeback 12 /* Page is under writeback */ |
80 | 85 | #define PG_nosave 13 /* Used for system suspend/resume */ |
... | ... | @@ -83,7 +88,7 @@ |
83 | 88 | |
84 | 89 | #define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ |
85 | 90 | #define PG_reclaim 17 /* To be reclaimed asap */ |
86 | -#define PG_nosave_free 18 /* Free, should not be written */ | |
91 | +#define PG_nosave_free 18 /* Used for system suspend/resume */ | |
87 | 92 | #define PG_buddy 19 /* Page is free, on buddy lists */ |
88 | 93 | |
89 | 94 |
mm/filemap.c
... | ... | @@ -599,8 +599,8 @@ |
599 | 599 | * @mapping: the address_space to search |
600 | 600 | * @offset: the page index |
601 | 601 | * |
602 | - * A rather lightweight function, finding and getting a reference to a | |
603 | - * hashed page atomically. | |
602 | + * Is there a pagecache struct page at the given (mapping, offset) tuple? | |
603 | + * If yes, increment its refcount and return it; if no, return NULL. | |
604 | 604 | */ |
605 | 605 | struct page * find_get_page(struct address_space *mapping, unsigned long offset) |
606 | 606 | { |
... | ... | @@ -987,7 +987,7 @@ |
987 | 987 | /* Get exclusive access to the page ... */ |
988 | 988 | lock_page(page); |
989 | 989 | |
990 | - /* Did it get unhashed before we got the lock? */ | |
990 | + /* Did it get truncated before we got the lock? */ | |
991 | 991 | if (!page->mapping) { |
992 | 992 | unlock_page(page); |
993 | 993 | page_cache_release(page); |
... | ... | @@ -1627,7 +1627,7 @@ |
1627 | 1627 | page_not_uptodate: |
1628 | 1628 | lock_page(page); |
1629 | 1629 | |
1630 | - /* Did it get unhashed while we waited for it? */ | |
1630 | + /* Did it get truncated while we waited for it? */ | |
1631 | 1631 | if (!page->mapping) { |
1632 | 1632 | unlock_page(page); |
1633 | 1633 | goto err; |