Commit 53492b1de46a7576170e865062ffcfc93bb5650b

Authored by Gerald Schaefer
Committed by Martin Schwidefsky
1 parent 2e5061e40a

[S390] System z large page support.

This adds hugetlbfs support on System z, using both hardware large page
support if available and software large page emulation on older hardware.
Shared (large) page tables are implemented in software emulation mode,
by using page->index of the first tail page from a compound large page
to store page table information.

Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

Showing 14 changed files with 437 additions and 42 deletions Side-by-side Diff

arch/s390/kernel/early.c
... ... @@ -268,6 +268,19 @@
268 268 s390_base_pgm_handler_fn = early_pgm_check_handler;
269 269 }
270 270  
  271 +static noinline __init void setup_hpage(void)
  272 +{
  273 +#ifndef CONFIG_DEBUG_PAGEALLOC
  274 + unsigned int facilities;
  275 +
  276 + facilities = stfl();
  277 + if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
  278 + return;
  279 + machine_flags |= MACHINE_FLAG_HPAGE;
  280 + __ctl_set_bit(0, 23);
  281 +#endif
  282 +}
  283 +
271 284 static __init void detect_mvpg(void)
272 285 {
273 286 #ifndef CONFIG_64BIT
... ... @@ -360,6 +373,8 @@
360 373 facilities = stfl();
361 374 if (facilities & (1 << 28))
362 375 machine_flags |= MACHINE_FLAG_IDTE;
  376 + if (facilities & (1 << 23))
  377 + machine_flags |= MACHINE_FLAG_PFMF;
363 378 if (facilities & (1 << 4))
364 379 machine_flags |= MACHINE_FLAG_MVCOS;
365 380 #endif
... ... @@ -388,6 +403,7 @@
388 403 detect_diag9c();
389 404 detect_diag44();
390 405 detect_machine_facilities();
  406 + setup_hpage();
391 407 sclp_read_info_early();
392 408 sclp_facilities_detect();
393 409 memsize = sclp_memory_detect();
arch/s390/kernel/head64.S
... ... @@ -129,7 +129,7 @@
129 129 # virtual and never return ...
130 130 .align 16
131 131 .Lentry:.quad 0x0000000180000000,_stext
132   -.Lctl: .quad 0x04b50002 # cr0: various things
  132 +.Lctl: .quad 0x04350002 # cr0: various things
133 133 .quad 0 # cr1: primary space segment table
134 134 .quad .Lduct # cr2: dispatchable unit control table
135 135 .quad 0 # cr3: instruction authorization
arch/s390/kernel/setup.c
... ... @@ -749,6 +749,9 @@
749 749 elf_hwcap |= 1UL << 6;
750 750 }
751 751  
  752 + if (MACHINE_HAS_HPAGE)
  753 + elf_hwcap |= 1UL << 7;
  754 +
752 755 switch (cpuinfo->cpu_id.machine) {
753 756 case 0x9672:
754 757 #if !defined(CONFIG_64BIT)
... ... @@ -872,8 +875,9 @@
872 875  
873 876 static int show_cpuinfo(struct seq_file *m, void *v)
874 877 {
875   - static const char *hwcap_str[7] = {
876   - "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp"
  878 + static const char *hwcap_str[8] = {
  879 + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
  880 + "edat"
877 881 };
878 882 struct cpuinfo_S390 *cpuinfo;
879 883 unsigned long n = (unsigned long) v - 1;
... ... @@ -888,7 +892,7 @@
888 892 num_online_cpus(), loops_per_jiffy/(500000/HZ),
889 893 (loops_per_jiffy/(5000/HZ))%100);
890 894 seq_puts(m, "features\t: ");
891   - for (i = 0; i < 7; i++)
  895 + for (i = 0; i < 8; i++)
892 896 if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
893 897 seq_printf(m, "%s ", hwcap_str[i]);
894 898 seq_puts(m, "\n");
arch/s390/mm/Makefile
... ... @@ -4,4 +4,5 @@
4 4  
5 5 obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
6 6 obj-$(CONFIG_CMM) += cmm.o
  7 +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
arch/s390/mm/fault.c
... ... @@ -28,6 +28,7 @@
28 28 #include <linux/hardirq.h>
29 29 #include <linux/kprobes.h>
30 30 #include <linux/uaccess.h>
  31 +#include <linux/hugetlb.h>
31 32 #include <asm/system.h>
32 33 #include <asm/pgtable.h>
33 34 #include <asm/s390_ext.h>
... ... @@ -367,6 +368,8 @@
367 368 }
368 369  
369 370 survive:
  371 + if (is_vm_hugetlb_page(vma))
  372 + address &= HPAGE_MASK;
370 373 /*
371 374 * If for any reason at all we couldn't handle the fault,
372 375 * make sure we exit gracefully rather than endlessly redo
arch/s390/mm/hugetlbpage.c
  1 +/*
  2 + * IBM System z Huge TLB Page Support for Kernel.
  3 + *
  4 + * Copyright 2007 IBM Corp.
  5 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  6 + */
  7 +
  8 +#include <linux/mm.h>
  9 +#include <linux/hugetlb.h>
  10 +
  11 +
  12 +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  13 + pte_t *pteptr, pte_t pteval)
  14 +{
  15 + pmd_t *pmdp = (pmd_t *) pteptr;
  16 + pte_t shadow_pteval = pteval;
  17 + unsigned long mask;
  18 +
  19 + if (!MACHINE_HAS_HPAGE) {
  20 + pteptr = (pte_t *) pte_page(pteval)[1].index;
  21 + mask = pte_val(pteval) &
  22 + (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
  23 + pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
  24 + if (mm->context.noexec) {
  25 + pteptr += PTRS_PER_PTE;
  26 + pte_val(shadow_pteval) =
  27 + (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
  28 + }
  29 + }
  30 +
  31 + pmd_val(*pmdp) = pte_val(pteval);
  32 + if (mm->context.noexec) {
  33 + pmdp = get_shadow_table(pmdp);
  34 + pmd_val(*pmdp) = pte_val(shadow_pteval);
  35 + }
  36 +}
  37 +
  38 +int arch_prepare_hugepage(struct page *page)
  39 +{
  40 + unsigned long addr = page_to_phys(page);
  41 + pte_t pte;
  42 + pte_t *ptep;
  43 + int i;
  44 +
  45 + if (MACHINE_HAS_HPAGE)
  46 + return 0;
  47 +
  48 + ptep = (pte_t *) pte_alloc_one(&init_mm, address);
  49 + if (!ptep)
  50 + return -ENOMEM;
  51 +
  52 + pte = mk_pte(page, PAGE_RW);
  53 + for (i = 0; i < PTRS_PER_PTE; i++) {
  54 + set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
  55 + pte_val(pte) += PAGE_SIZE;
  56 + }
  57 + page[1].index = (unsigned long) ptep;
  58 + return 0;
  59 +}
  60 +
  61 +void arch_release_hugepage(struct page *page)
  62 +{
  63 + pte_t *ptep;
  64 +
  65 + if (MACHINE_HAS_HPAGE)
  66 + return;
  67 +
  68 + ptep = (pte_t *) page[1].index;
  69 + if (!ptep)
  70 + return;
  71 + pte_free(&init_mm, ptep);
  72 + page[1].index = 0;
  73 +}
  74 +
  75 +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
  76 +{
  77 + pgd_t *pgdp;
  78 + pud_t *pudp;
  79 + pmd_t *pmdp = NULL;
  80 +
  81 + pgdp = pgd_offset(mm, addr);
  82 + pudp = pud_alloc(mm, pgdp, addr);
  83 + if (pudp)
  84 + pmdp = pmd_alloc(mm, pudp, addr);
  85 + return (pte_t *) pmdp;
  86 +}
  87 +
  88 +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  89 +{
  90 + pgd_t *pgdp;
  91 + pud_t *pudp;
  92 + pmd_t *pmdp = NULL;
  93 +
  94 + pgdp = pgd_offset(mm, addr);
  95 + if (pgd_present(*pgdp)) {
  96 + pudp = pud_offset(pgdp, addr);
  97 + if (pud_present(*pudp))
  98 + pmdp = pmd_offset(pudp, addr);
  99 + }
  100 + return (pte_t *) pmdp;
  101 +}
  102 +
  103 +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  104 +{
  105 + return 0;
  106 +}
  107 +
  108 +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
  109 + int write)
  110 +{
  111 + return ERR_PTR(-EINVAL);
  112 +}
  113 +
  114 +int pmd_huge(pmd_t pmd)
  115 +{
  116 + if (!MACHINE_HAS_HPAGE)
  117 + return 0;
  118 +
  119 + return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
  120 +}
  121 +
  122 +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  123 + pmd_t *pmdp, int write)
  124 +{
  125 + struct page *page;
  126 +
  127 + if (!MACHINE_HAS_HPAGE)
  128 + return NULL;
  129 +
  130 + page = pmd_page(*pmdp);
  131 + if (page)
  132 + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
  133 + return page;
  134 +}
... ... @@ -77,28 +77,6 @@
77 77 printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE));
78 78 }
79 79  
80   -static void __init setup_ro_region(void)
81   -{
82   - pgd_t *pgd;
83   - pud_t *pud;
84   - pmd_t *pmd;
85   - pte_t *pte;
86   - pte_t new_pte;
87   - unsigned long address, end;
88   -
89   - address = ((unsigned long)&_stext) & PAGE_MASK;
90   - end = PFN_ALIGN((unsigned long)&_eshared);
91   -
92   - for (; address < end; address += PAGE_SIZE) {
93   - pgd = pgd_offset_k(address);
94   - pud = pud_offset(pgd, address);
95   - pmd = pmd_offset(pud, address);
96   - pte = pte_offset_kernel(pmd, address);
97   - new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
98   - *pte = new_pte;
99   - }
100   -}
101   -
102 80 /*
103 81 * paging_init() sets up the page tables
104 82 */
... ... @@ -121,7 +99,6 @@
121 99 clear_table((unsigned long *) init_mm.pgd, pgd_type,
122 100 sizeof(unsigned long)*2048);
123 101 vmem_map_init();
124   - setup_ro_region();
125 102  
126 103 /* enable virtual mapping in kernel mode */
127 104 __ctl_load(S390_lowcore.kernel_asce, 1, 1);
... ... @@ -10,10 +10,12 @@
10 10 #include <linux/mm.h>
11 11 #include <linux/module.h>
12 12 #include <linux/list.h>
  13 +#include <linux/hugetlb.h>
13 14 #include <asm/pgalloc.h>
14 15 #include <asm/pgtable.h>
15 16 #include <asm/setup.h>
16 17 #include <asm/tlbflush.h>
  18 +#include <asm/sections.h>
17 19  
18 20 static DEFINE_MUTEX(vmem_mutex);
19 21  
... ... @@ -113,7 +115,7 @@
113 115 /*
114 116 * Add a physical memory range to the 1:1 mapping.
115 117 */
116   -static int vmem_add_range(unsigned long start, unsigned long size)
  118 +static int vmem_add_range(unsigned long start, unsigned long size, int ro)
117 119 {
118 120 unsigned long address;
119 121 pgd_t *pg_dir;
120 122  
... ... @@ -140,7 +142,19 @@
140 142 pud_populate_kernel(&init_mm, pu_dir, pm_dir);
141 143 }
142 144  
  145 + pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
143 146 pm_dir = pmd_offset(pu_dir, address);
  147 +
  148 +#ifdef __s390x__
  149 + if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
  150 + (address + HPAGE_SIZE <= start + size) &&
  151 + (address >= HPAGE_SIZE)) {
  152 + pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
  153 + pmd_val(*pm_dir) = pte_val(pte);
  154 + address += HPAGE_SIZE - PAGE_SIZE;
  155 + continue;
  156 + }
  157 +#endif
144 158 if (pmd_none(*pm_dir)) {
145 159 pt_dir = vmem_pte_alloc();
146 160 if (!pt_dir)
... ... @@ -149,7 +163,6 @@
149 163 }
150 164  
151 165 pt_dir = pte_offset_kernel(pm_dir, address);
152   - pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
153 166 *pt_dir = pte;
154 167 }
155 168 ret = 0;
... ... @@ -180,6 +193,13 @@
180 193 pm_dir = pmd_offset(pu_dir, address);
181 194 if (pmd_none(*pm_dir))
182 195 continue;
  196 +
  197 + if (pmd_huge(*pm_dir)) {
  198 + pmd_clear_kernel(pm_dir);
  199 + address += HPAGE_SIZE - PAGE_SIZE;
  200 + continue;
  201 + }
  202 +
183 203 pt_dir = pte_offset_kernel(pm_dir, address);
184 204 *pt_dir = pte;
185 205 }
186 206  
... ... @@ -248,14 +268,14 @@
248 268 return ret;
249 269 }
250 270  
251   -static int vmem_add_mem(unsigned long start, unsigned long size)
  271 +static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
252 272 {
253 273 int ret;
254 274  
255 275 ret = vmem_add_mem_map(start, size);
256 276 if (ret)
257 277 return ret;
258   - return vmem_add_range(start, size);
  278 + return vmem_add_range(start, size, ro);
259 279 }
260 280  
261 281 /*
... ... @@ -338,7 +358,7 @@
338 358 if (ret)
339 359 goto out_free;
340 360  
341   - ret = vmem_add_mem(start, size);
  361 + ret = vmem_add_mem(start, size, 0);
342 362 if (ret)
343 363 goto out_remove;
344 364  
345 365  
... ... @@ -374,14 +394,35 @@
374 394 */
375 395 void __init vmem_map_init(void)
376 396 {
  397 + unsigned long ro_start, ro_end;
  398 + unsigned long start, end;
377 399 int i;
378 400  
379 401 INIT_LIST_HEAD(&init_mm.context.crst_list);
380 402 INIT_LIST_HEAD(&init_mm.context.pgtable_list);
381 403 init_mm.context.noexec = 0;
382 404 NODE_DATA(0)->node_mem_map = VMEM_MAP;
383   - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
384   - vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
  405 + ro_start = ((unsigned long)&_stext) & PAGE_MASK;
  406 + ro_end = PFN_ALIGN((unsigned long)&_eshared);
  407 + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
  408 + start = memory_chunk[i].addr;
  409 + end = memory_chunk[i].addr + memory_chunk[i].size;
  410 + if (start >= ro_end || end <= ro_start)
  411 + vmem_add_mem(start, end - start, 0);
  412 + else if (start >= ro_start && end <= ro_end)
  413 + vmem_add_mem(start, end - start, 1);
  414 + else if (start >= ro_start) {
  415 + vmem_add_mem(start, ro_end - start, 1);
  416 + vmem_add_mem(ro_end, end - ro_end, 0);
  417 + } else if (end < ro_end) {
  418 + vmem_add_mem(start, ro_start - start, 0);
  419 + vmem_add_mem(ro_start, end - ro_start, 1);
  420 + } else {
  421 + vmem_add_mem(start, ro_start - start, 0);
  422 + vmem_add_mem(ro_start, ro_end - ro_start, 1);
  423 + vmem_add_mem(ro_end, end - ro_end, 0);
  424 + }
  425 + }
385 426 }
386 427  
387 428 /*
... ... @@ -1005,7 +1005,8 @@
1005 1005  
1006 1006 config HUGETLBFS
1007 1007 bool "HugeTLB file system support"
1008   - depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN
  1008 + depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
  1009 + (S390 && 64BIT) || BROKEN
1009 1010 help
1010 1011 hugetlbfs is a filesystem backing for HugeTLB pages, based on
1011 1012 ramfs. For architectures that support it, say Y here and read
include/asm-s390/hugetlb.h
  1 +/*
  2 + * IBM System z Huge TLB Page Support for Kernel.
  3 + *
  4 + * Copyright IBM Corp. 2008
  5 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  6 + */
  7 +
  8 +#ifndef _ASM_S390_HUGETLB_H
  9 +#define _ASM_S390_HUGETLB_H
  10 +
  11 +#include <asm/page.h>
  12 +#include <asm/pgtable.h>
  13 +
  14 +
  15 +#define is_hugepage_only_range(mm, addr, len) 0
  16 +#define hugetlb_free_pgd_range free_pgd_range
  17 +
  18 +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  19 + pte_t *ptep, pte_t pte);
  20 +
  21 +/*
  22 + * If the arch doesn't supply something else, assume that hugepage
  23 + * size aligned regions are ok without further preparation.
  24 + */
  25 +static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
  26 +{
  27 + if (len & ~HPAGE_MASK)
  28 + return -EINVAL;
  29 + if (addr & ~HPAGE_MASK)
  30 + return -EINVAL;
  31 + return 0;
  32 +}
  33 +
  34 +#define hugetlb_prefault_arch_hook(mm) do { } while (0)
  35 +
  36 +int arch_prepare_hugepage(struct page *page);
  37 +void arch_release_hugepage(struct page *page);
  38 +
  39 +static inline pte_t pte_mkhuge(pte_t pte)
  40 +{
  41 + /*
  42 + * PROT_NONE needs to be remapped from the pte type to the ste type.
  43 + * The HW invalid bit is also different for pte and ste. The pte
  44 + * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE
  45 + * bit, so we don't have to clear it.
  46 + */
  47 + if (pte_val(pte) & _PAGE_INVALID) {
  48 + if (pte_val(pte) & _PAGE_SWT)
  49 + pte_val(pte) |= _HPAGE_TYPE_NONE;
  50 + pte_val(pte) |= _SEGMENT_ENTRY_INV;
  51 + }
  52 + /*
  53 + * Clear SW pte bits SWT and SWX, there are no SW bits in a segment
  54 + * table entry.
  55 + */
  56 + pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX);
  57 + /*
  58 + * Also set the change-override bit because we don't need dirty bit
  59 + * tracking for hugetlbfs pages.
  60 + */
  61 + pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
  62 + return pte;
  63 +}
  64 +
  65 +static inline pte_t huge_pte_wrprotect(pte_t pte)
  66 +{
  67 + pte_val(pte) |= _PAGE_RO;
  68 + return pte;
  69 +}
  70 +
  71 +static inline int huge_pte_none(pte_t pte)
  72 +{
  73 + return (pte_val(pte) & _SEGMENT_ENTRY_INV) &&
  74 + !(pte_val(pte) & _SEGMENT_ENTRY_RO);
  75 +}
  76 +
  77 +static inline pte_t huge_ptep_get(pte_t *ptep)
  78 +{
  79 + pte_t pte = *ptep;
  80 + unsigned long mask;
  81 +
  82 + if (!MACHINE_HAS_HPAGE) {
  83 + ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
  84 + if (ptep) {
  85 + mask = pte_val(pte) &
  86 + (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
  87 + pte = pte_mkhuge(*ptep);
  88 + pte_val(pte) |= mask;
  89 + }
  90 + }
  91 + return pte;
  92 +}
  93 +
  94 +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
  95 + unsigned long addr, pte_t *ptep)
  96 +{
  97 + pte_t pte = huge_ptep_get(ptep);
  98 +
  99 + pmd_clear((pmd_t *) ptep);
  100 + return pte;
  101 +}
  102 +
  103 +static inline void __pmd_csp(pmd_t *pmdp)
  104 +{
  105 + register unsigned long reg2 asm("2") = pmd_val(*pmdp);
  106 + register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
  107 + _SEGMENT_ENTRY_INV;
  108 + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
  109 +
  110 + asm volatile(
  111 + " csp %1,%3"
  112 + : "=m" (*pmdp)
  113 + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
  114 + pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
  115 +}
  116 +
  117 +static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
  118 +{
  119 + unsigned long sto = (unsigned long) pmdp -
  120 + pmd_index(address) * sizeof(pmd_t);
  121 +
  122 + if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
  123 + asm volatile(
  124 + " .insn rrf,0xb98e0000,%2,%3,0,0"
  125 + : "=m" (*pmdp)
  126 + : "m" (*pmdp), "a" (sto),
  127 + "a" ((address & HPAGE_MASK))
  128 + );
  129 + }
  130 + pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
  131 +}
  132 +
  133 +static inline void huge_ptep_invalidate(struct mm_struct *mm,
  134 + unsigned long address, pte_t *ptep)
  135 +{
  136 + pmd_t *pmdp = (pmd_t *) ptep;
  137 +
  138 + if (!MACHINE_HAS_IDTE) {
  139 + __pmd_csp(pmdp);
  140 + if (mm->context.noexec) {
  141 + pmdp = get_shadow_table(pmdp);
  142 + __pmd_csp(pmdp);
  143 + }
  144 + return;
  145 + }
  146 +
  147 + __pmd_idte(address, pmdp);
  148 + if (mm->context.noexec) {
  149 + pmdp = get_shadow_table(pmdp);
  150 + __pmd_idte(address, pmdp);
  151 + }
  152 + return;
  153 +}
  154 +
  155 +#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
  156 +({ \
  157 + int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \
  158 + if (__changed) { \
  159 + huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
  160 + set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
  161 + } \
  162 + __changed; \
  163 +})
  164 +
  165 +#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \
  166 +({ \
  167 + pte_t __pte = huge_ptep_get(__ptep); \
  168 + if (pte_write(__pte)) { \
  169 + if (atomic_read(&(__mm)->mm_users) > 1 || \
  170 + (__mm) != current->active_mm) \
  171 + huge_ptep_invalidate(__mm, __addr, __ptep); \
  172 + set_huge_pte_at(__mm, __addr, __ptep, \
  173 + huge_pte_wrprotect(__pte)); \
  174 + } \
  175 +})
  176 +
  177 +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
  178 + unsigned long address, pte_t *ptep)
  179 +{
  180 + huge_ptep_invalidate(vma->vm_mm, address, ptep);
  181 +}
  182 +
  183 +#endif /* _ASM_S390_HUGETLB_H */
include/asm-s390/page.h
... ... @@ -19,17 +19,34 @@
19 19 #define PAGE_DEFAULT_ACC 0
20 20 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
21 21  
  22 +#define HPAGE_SHIFT 20
  23 +#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
  24 +#define HPAGE_MASK (~(HPAGE_SIZE - 1))
  25 +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
  26 +
  27 +#define ARCH_HAS_SETCLEAR_HUGE_PTE
  28 +#define ARCH_HAS_HUGE_PTE_TYPE
  29 +#define ARCH_HAS_PREPARE_HUGEPAGE
  30 +#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
  31 +
22 32 #include <asm/setup.h>
23 33 #ifndef __ASSEMBLY__
24 34  
25 35 static inline void clear_page(void *page)
26 36 {
27   - register unsigned long reg1 asm ("1") = 0;
28   - register void *reg2 asm ("2") = page;
29   - register unsigned long reg3 asm ("3") = 4096;
30   - asm volatile(
31   - " mvcl 2,0"
32   - : "+d" (reg2), "+d" (reg3) : "d" (reg1) : "memory", "cc");
  37 + if (MACHINE_HAS_PFMF) {
  38 + asm volatile(
  39 + " .insn rre,0xb9af0000,%0,%1"
  40 + : : "d" (0x10000), "a" (page) : "memory", "cc");
  41 + } else {
  42 + register unsigned long reg1 asm ("1") = 0;
  43 + register void *reg2 asm ("2") = page;
  44 + register unsigned long reg3 asm ("3") = 4096;
  45 + asm volatile(
  46 + " mvcl 2,0"
  47 + : "+d" (reg2), "+d" (reg3) : "d" (reg1)
  48 + : "memory", "cc");
  49 + }
33 50 }
34 51  
35 52 static inline void copy_page(void *to, void *from)
include/asm-s390/pgtable.h
... ... @@ -234,6 +234,15 @@
234 234 #define _PAGE_TYPE_EX_RW 0x002
235 235  
236 236 /*
  237 + * Only four types for huge pages, using the invalid bit and protection bit
  238 + * of a segment table entry.
  239 + */
  240 +#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */
  241 +#define _HPAGE_TYPE_NONE 0x220
  242 +#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */
  243 +#define _HPAGE_TYPE_RW 0x000
  244 +
  245 +/*
237 246 * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
238 247 * pte_none and pte_file to find out the pte type WITHOUT holding the page
239 248 * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
... ... @@ -324,6 +333,9 @@
324 333  
325 334 #define _SEGMENT_ENTRY (0)
326 335 #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV)
  336 +
  337 +#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
  338 +#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
327 339  
328 340 #endif /* __s390x__ */
329 341  
include/asm-s390/setup.h
... ... @@ -69,6 +69,8 @@
69 69 #define MACHINE_FLAG_DIAG9C (1UL << 7)
70 70 #define MACHINE_FLAG_MVCOS (1UL << 8)
71 71 #define MACHINE_FLAG_KVM (1UL << 9)
  72 +#define MACHINE_FLAG_HPAGE (1UL << 10)
  73 +#define MACHINE_FLAG_PFMF (1UL << 11)
72 74  
73 75 #define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM)
74 76 #define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM)
... ... @@ -82,6 +84,8 @@
82 84 #define MACHINE_HAS_DIAG44 (1)
83 85 #define MACHINE_HAS_MVPG (machine_flags & MACHINE_FLAG_MVPG)
84 86 #define MACHINE_HAS_MVCOS (0)
  87 +#define MACHINE_HAS_HPAGE (0)
  88 +#define MACHINE_HAS_PFMF (0)
85 89 #else /* __s390x__ */
86 90 #define MACHINE_HAS_IEEE (1)
87 91 #define MACHINE_HAS_CSP (1)
... ... @@ -89,6 +93,8 @@
89 93 #define MACHINE_HAS_DIAG44 (machine_flags & MACHINE_FLAG_DIAG44)
90 94 #define MACHINE_HAS_MVPG (1)
91 95 #define MACHINE_HAS_MVCOS (machine_flags & MACHINE_FLAG_MVCOS)
  96 +#define MACHINE_HAS_HPAGE (machine_flags & MACHINE_FLAG_HPAGE)
  97 +#define MACHINE_HAS_PFMF (machine_flags & MACHINE_FLAG_PFMF)
92 98 #endif /* __s390x__ */
93 99  
94 100 #define MACHINE_HAS_SCLP (!MACHINE_IS_P390)
include/asm-s390/tlbflush.h
... ... @@ -2,6 +2,7 @@
2 2 #define _S390_TLBFLUSH_H
3 3  
4 4 #include <linux/mm.h>
  5 +#include <linux/sched.h>
5 6 #include <asm/processor.h>
6 7 #include <asm/pgalloc.h>
7 8