Commit 20a0307c0396c2edb651401d2f2db193dda2f3c9

Authored by Wu Fengguang
Committed by Linus Torvalds
1 parent a1dd268cf6

mm: introduce PageHuge() for testing huge/gigantic pages

A series of patches to enhance the /proc/pagemap interface and to add a
userspace executable which can be used to present the pagemap data.

Export 10 more flags to end users (and more for kernel developers):

        11. KPF_MMAP            (pseudo flag) memory mapped page
        12. KPF_ANON            (pseudo flag) memory mapped page (anonymous)
        13. KPF_SWAPCACHE       page is in swap cache
        14. KPF_SWAPBACKED      page is swap/RAM backed
        15. KPF_COMPOUND_HEAD   (*)
        16. KPF_COMPOUND_TAIL   (*)
        17. KPF_HUGE		hugeTLB pages
        18. KPF_UNEVICTABLE     page is in the unevictable LRU list
        19. KPF_HWPOISON        hardware detected corruption
        20. KPF_NOPAGE          (pseudo flag) no page frame at the address

        (*) For compound pages, exporting _both_ head/tail info enables
            users to tell where a compound page starts/ends, and its order.

a simple demo of the page-types tool

# ./page-types -h
page-types [options]
            -r|--raw                  Raw mode, for kernel developers
            -a|--addr    addr-spec    Walk a range of pages
            -b|--bits    bits-spec    Walk pages with specified bits
            -l|--list                 Show page details in ranges
            -L|--list-each            Show page details one by one
            -N|--no-summary           Don't show summay info
            -h|--help                 Show this usage message
addr-spec:
            N                         one page at offset N (unit: pages)
            N+M                       pages range from N to N+M-1
            N,M                       pages range from N to M-1
            N,                        pages range from N to end
            ,M                        pages range from 0 to M
bits-spec:
            bit1,bit2                 (flags & (bit1|bit2)) != 0
            bit1,bit2=bit1            (flags & (bit1|bit2)) == bit1
            bit1,~bit2                (flags & (bit1|bit2)) == bit1
            =bit1,bit2                flags == (bit1|bit2)
bit-names:
          locked              error         referenced           uptodate
           dirty                lru             active               slab
       writeback            reclaim              buddy               mmap
       anonymous          swapcache         swapbacked      compound_head
   compound_tail               huge        unevictable           hwpoison
          nopage           reserved(r)         mlocked(r)    mappedtodisk(r)
         private(r)       private_2(r)   owner_private(r)            arch(r)
        uncached(r)       readahead(o)       slob_free(o)     slub_frozen(o)
      slub_debug(o)
                                   (r) raw mode bits  (o) overloaded bits

# ./page-types
             flags      page-count       MB  symbolic-flags                     long-symbolic-flags
0x0000000000000000          487369     1903  _________________________________
0x0000000000000014               5        0  __R_D____________________________  referenced,dirty
0x0000000000000020               1        0  _____l___________________________  lru
0x0000000000000024              34        0  __R__l___________________________  referenced,lru
0x0000000000000028            3838       14  ___U_l___________________________  uptodate,lru
0x0001000000000028              48        0  ___U_l_______________________I___  uptodate,lru,readahead
0x000000000000002c            6478       25  __RU_l___________________________  referenced,uptodate,lru
0x000100000000002c              47        0  __RU_l_______________________I___  referenced,uptodate,lru,readahead
0x0000000000000040            8344       32  ______A__________________________  active
0x0000000000000060               1        0  _____lA__________________________  lru,active
0x0000000000000068             348        1  ___U_lA__________________________  uptodate,lru,active
0x0001000000000068              12        0  ___U_lA______________________I___  uptodate,lru,active,readahead
0x000000000000006c             988        3  __RU_lA__________________________  referenced,uptodate,lru,active
0x000100000000006c              48        0  __RU_lA______________________I___  referenced,uptodate,lru,active,readahead
0x0000000000004078               1        0  ___UDlA_______b__________________  uptodate,dirty,lru,active,swapbacked
0x000000000000407c              34        0  __RUDlA_______b__________________  referenced,uptodate,dirty,lru,active,swapbacked
0x0000000000000400             503        1  __________B______________________  buddy
0x0000000000000804               1        0  __R________M_____________________  referenced,mmap
0x0000000000000828            1029        4  ___U_l_____M_____________________  uptodate,lru,mmap
0x0001000000000828              43        0  ___U_l_____M_________________I___  uptodate,lru,mmap,readahead
0x000000000000082c             382        1  __RU_l_____M_____________________  referenced,uptodate,lru,mmap
0x000100000000082c              12        0  __RU_l_____M_________________I___  referenced,uptodate,lru,mmap,readahead
0x0000000000000868             192        0  ___U_lA____M_____________________  uptodate,lru,active,mmap
0x0001000000000868              12        0  ___U_lA____M_________________I___  uptodate,lru,active,mmap,readahead
0x000000000000086c             800        3  __RU_lA____M_____________________  referenced,uptodate,lru,active,mmap
0x000100000000086c              31        0  __RU_lA____M_________________I___  referenced,uptodate,lru,active,mmap,readahead
0x0000000000004878               2        0  ___UDlA____M__b__________________  uptodate,dirty,lru,active,mmap,swapbacked
0x0000000000001000             492        1  ____________a____________________  anonymous
0x0000000000005808               4        0  ___U_______Ma_b__________________  uptodate,mmap,anonymous,swapbacked
0x0000000000005868            2839       11  ___U_lA____Ma_b__________________  uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c              30        0  __RU_lA____Ma_b__________________  referenced,uptodate,lru,active,mmap,anonymous,swapbacked
             total          513968     2007

# ./page-types -r
             flags      page-count       MB  symbolic-flags                     long-symbolic-flags
0x0000000000000000          468002     1828  _________________________________
0x0000000100000000           19102       74  _____________________r___________  reserved
0x0000000000008000              41        0  _______________H_________________  compound_head
0x0000000000010000             188        0  ________________T________________  compound_tail
0x0000000000008014               1        0  __R_D__________H_________________  referenced,dirty,compound_head
0x0000000000010014               4        0  __R_D___________T________________  referenced,dirty,compound_tail
0x0000000000000020               1        0  _____l___________________________  lru
0x0000000800000024              34        0  __R__l__________________P________  referenced,lru,private
0x0000000000000028            3794       14  ___U_l___________________________  uptodate,lru
0x0001000000000028              46        0  ___U_l_______________________I___  uptodate,lru,readahead
0x0000000400000028              44        0  ___U_l_________________d_________  uptodate,lru,mappedtodisk
0x0001000400000028               2        0  ___U_l_________________d_____I___  uptodate,lru,mappedtodisk,readahead
0x000000000000002c            6434       25  __RU_l___________________________  referenced,uptodate,lru
0x000100000000002c              47        0  __RU_l_______________________I___  referenced,uptodate,lru,readahead
0x000000040000002c              14        0  __RU_l_________________d_________  referenced,uptodate,lru,mappedtodisk
0x000000080000002c              30        0  __RU_l__________________P________  referenced,uptodate,lru,private
0x0000000800000040            8124       31  ______A_________________P________  active,private
0x0000000000000040             219        0  ______A__________________________  active
0x0000000800000060               1        0  _____lA_________________P________  lru,active,private
0x0000000000000068             322        1  ___U_lA__________________________  uptodate,lru,active
0x0001000000000068              12        0  ___U_lA______________________I___  uptodate,lru,active,readahead
0x0000000400000068              13        0  ___U_lA________________d_________  uptodate,lru,active,mappedtodisk
0x0000000800000068              12        0  ___U_lA_________________P________  uptodate,lru,active,private
0x000000000000006c             977        3  __RU_lA__________________________  referenced,uptodate,lru,active
0x000100000000006c              48        0  __RU_lA______________________I___  referenced,uptodate,lru,active,readahead
0x000000040000006c               5        0  __RU_lA________________d_________  referenced,uptodate,lru,active,mappedtodisk
0x000000080000006c               3        0  __RU_lA_________________P________  referenced,uptodate,lru,active,private
0x0000000c0000006c               3        0  __RU_lA________________dP________  referenced,uptodate,lru,active,mappedtodisk,private
0x0000000c00000068               1        0  ___U_lA________________dP________  uptodate,lru,active,mappedtodisk,private
0x0000000000004078               1        0  ___UDlA_______b__________________  uptodate,dirty,lru,active,swapbacked
0x000000000000407c              34        0  __RUDlA_______b__________________  referenced,uptodate,dirty,lru,active,swapbacked
0x0000000000000400             538        2  __________B______________________  buddy
0x0000000000000804               1        0  __R________M_____________________  referenced,mmap
0x0000000000000828            1029        4  ___U_l_____M_____________________  uptodate,lru,mmap
0x0001000000000828              43        0  ___U_l_____M_________________I___  uptodate,lru,mmap,readahead
0x000000000000082c             382        1  __RU_l_____M_____________________  referenced,uptodate,lru,mmap
0x000100000000082c              12        0  __RU_l_____M_________________I___  referenced,uptodate,lru,mmap,readahead
0x0000000000000868             192        0  ___U_lA____M_____________________  uptodate,lru,active,mmap
0x0001000000000868              12        0  ___U_lA____M_________________I___  uptodate,lru,active,mmap,readahead
0x000000000000086c             800        3  __RU_lA____M_____________________  referenced,uptodate,lru,active,mmap
0x000100000000086c              31        0  __RU_lA____M_________________I___  referenced,uptodate,lru,active,mmap,readahead
0x0000000000004878               2        0  ___UDlA____M__b__________________  uptodate,dirty,lru,active,mmap,swapbacked
0x0000000000001000             492        1  ____________a____________________  anonymous
0x0000000000005008               2        0  ___U________a_b__________________  uptodate,anonymous,swapbacked
0x0000000000005808               4        0  ___U_______Ma_b__________________  uptodate,mmap,anonymous,swapbacked
0x000000000000580c               1        0  __RU_______Ma_b__________________  referenced,uptodate,mmap,anonymous,swapbacked
0x0000000000005868            2839       11  ___U_lA____Ma_b__________________  uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c              29        0  __RU_lA____Ma_b__________________  referenced,uptodate,lru,active,mmap,anonymous,swapbacked
             total          513968     2007

# ./page-types --raw --list --no-summary --bits reserved
offset  count   flags
0       15      _____________________r___________
31      4       _____________________r___________
159     97      _____________________r___________
4096    2067    _____________________r___________
6752    2390    _____________________r___________
9355    3       _____________________r___________
9728    14526   _____________________r___________

This patch:

Introduce PageHuge(), which identifies huge/gigantic pages by their
dedicated compound destructor functions.

Also move prep_compound_gigantic_page() to hugetlb.c and make
__free_pages_ok() non-static.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 73 additions and 55 deletions Side-by-side Diff

... ... @@ -6,6 +6,7 @@
6 6 #include <linux/mmzone.h>
7 7 #include <linux/proc_fs.h>
8 8 #include <linux/seq_file.h>
  9 +#include <linux/hugetlb.h>
9 10 #include <asm/uaccess.h>
10 11 #include "internal.h"
11 12  
include/linux/hugetlb.h
... ... @@ -11,6 +11,8 @@
11 11  
12 12 struct ctl_table;
13 13  
  14 +int PageHuge(struct page *page);
  15 +
14 16 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
15 17 {
16 18 return vma->vm_flags & VM_HUGETLB;
... ... @@ -60,6 +62,11 @@
60 62 unsigned long address, unsigned long end, pgprot_t newprot);
61 63  
62 64 #else /* !CONFIG_HUGETLB_PAGE */
  65 +
  66 +static inline int PageHuge(struct page *page)
  67 +{
  68 + return 0;
  69 +}
63 70  
64 71 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
65 72 {
... ... @@ -578,41 +578,6 @@
578 578 hugetlb_put_quota(mapping, 1);
579 579 }
580 580  
581   -/*
582   - * Increment or decrement surplus_huge_pages. Keep node-specific counters
583   - * balanced by operating on them in a round-robin fashion.
584   - * Returns 1 if an adjustment was made.
585   - */
586   -static int adjust_pool_surplus(struct hstate *h, int delta)
587   -{
588   - static int prev_nid;
589   - int nid = prev_nid;
590   - int ret = 0;
591   -
592   - VM_BUG_ON(delta != -1 && delta != 1);
593   - do {
594   - nid = next_node(nid, node_online_map);
595   - if (nid == MAX_NUMNODES)
596   - nid = first_node(node_online_map);
597   -
598   - /* To shrink on this node, there must be a surplus page */
599   - if (delta < 0 && !h->surplus_huge_pages_node[nid])
600   - continue;
601   - /* Surplus cannot exceed the total number of pages */
602   - if (delta > 0 && h->surplus_huge_pages_node[nid] >=
603   - h->nr_huge_pages_node[nid])
604   - continue;
605   -
606   - h->surplus_huge_pages += delta;
607   - h->surplus_huge_pages_node[nid] += delta;
608   - ret = 1;
609   - break;
610   - } while (nid != prev_nid);
611   -
612   - prev_nid = nid;
613   - return ret;
614   -}
615   -
616 581 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
617 582 {
618 583 set_compound_page_dtor(page, free_huge_page);
... ... @@ -623,6 +588,34 @@
623 588 put_page(page); /* free it into the hugepage allocator */
624 589 }
625 590  
  591 +static void prep_compound_gigantic_page(struct page *page, unsigned long order)
  592 +{
  593 + int i;
  594 + int nr_pages = 1 << order;
  595 + struct page *p = page + 1;
  596 +
  597 + /* we rely on prep_new_huge_page to set the destructor */
  598 + set_compound_order(page, order);
  599 + __SetPageHead(page);
  600 + for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
  601 + __SetPageTail(p);
  602 + p->first_page = page;
  603 + }
  604 +}
  605 +
  606 +int PageHuge(struct page *page)
  607 +{
  608 + compound_page_dtor *dtor;
  609 +
  610 + if (!PageCompound(page))
  611 + return 0;
  612 +
  613 + page = compound_head(page);
  614 + dtor = get_compound_page_dtor(page);
  615 +
  616 + return dtor == free_huge_page;
  617 +}
  618 +
626 619 static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
627 620 {
628 621 struct page *page;
... ... @@ -1139,6 +1132,41 @@
1139 1132 {
1140 1133 }
1141 1134 #endif
  1135 +
  1136 +/*
  1137 + * Increment or decrement surplus_huge_pages. Keep node-specific counters
  1138 + * balanced by operating on them in a round-robin fashion.
  1139 + * Returns 1 if an adjustment was made.
  1140 + */
  1141 +static int adjust_pool_surplus(struct hstate *h, int delta)
  1142 +{
  1143 + static int prev_nid;
  1144 + int nid = prev_nid;
  1145 + int ret = 0;
  1146 +
  1147 + VM_BUG_ON(delta != -1 && delta != 1);
  1148 + do {
  1149 + nid = next_node(nid, node_online_map);
  1150 + if (nid == MAX_NUMNODES)
  1151 + nid = first_node(node_online_map);
  1152 +
  1153 + /* To shrink on this node, there must be a surplus page */
  1154 + if (delta < 0 && !h->surplus_huge_pages_node[nid])
  1155 + continue;
  1156 + /* Surplus cannot exceed the total number of pages */
  1157 + if (delta > 0 && h->surplus_huge_pages_node[nid] >=
  1158 + h->nr_huge_pages_node[nid])
  1159 + continue;
  1160 +
  1161 + h->surplus_huge_pages += delta;
  1162 + h->surplus_huge_pages_node[nid] += delta;
  1163 + ret = 1;
  1164 + break;
  1165 + } while (nid != prev_nid);
  1166 +
  1167 + prev_nid = nid;
  1168 + return ret;
  1169 +}
1142 1170  
1143 1171 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
1144 1172 static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
... ... @@ -16,9 +16,6 @@
16 16 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
17 17 unsigned long floor, unsigned long ceiling);
18 18  
19   -extern void prep_compound_page(struct page *page, unsigned long order);
20   -extern void prep_compound_gigantic_page(struct page *page, unsigned long order);
21   -
22 19 static inline void set_page_count(struct page *page, int v)
23 20 {
24 21 atomic_set(&page->_count, v);
... ... @@ -51,6 +48,8 @@
51 48 */
52 49 extern unsigned long highest_memmap_pfn;
53 50 extern void __free_pages_bootmem(struct page *page, unsigned int order);
  51 +extern void prep_compound_page(struct page *page, unsigned long order);
  52 +
54 53  
55 54 /*
56 55 * function for dealing with page's order in buddy system.
... ... @@ -300,23 +300,6 @@
300 300 }
301 301 }
302 302  
303   -#ifdef CONFIG_HUGETLBFS
304   -void prep_compound_gigantic_page(struct page *page, unsigned long order)
305   -{
306   - int i;
307   - int nr_pages = 1 << order;
308   - struct page *p = page + 1;
309   -
310   - set_compound_page_dtor(page, free_compound_page);
311   - set_compound_order(page, order);
312   - __SetPageHead(page);
313   - for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
314   - __SetPageTail(p);
315   - p->first_page = page;
316   - }
317   -}
318   -#endif
319   -
320 303 static int destroy_compound_page(struct page *page, unsigned long order)
321 304 {
322 305 int i;