Commit 1ae1c1d09f220ded48ee9a7d91a65e94f95c4af1
Committed by
Linus Torvalds
1 parent
274023da1e
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
thp, s390: architecture backend for thp on s390
This implements the architecture backend for transparent hugepages on s390. Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 6 changed files with 220 additions and 18 deletions Inline Diff
arch/s390/include/asm/hugetlb.h
1 | /* | 1 | /* |
2 | * IBM System z Huge TLB Page Support for Kernel. | 2 | * IBM System z Huge TLB Page Support for Kernel. |
3 | * | 3 | * |
4 | * Copyright IBM Corp. 2008 | 4 | * Copyright IBM Corp. 2008 |
5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> | 5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #ifndef _ASM_S390_HUGETLB_H | 8 | #ifndef _ASM_S390_HUGETLB_H |
9 | #define _ASM_S390_HUGETLB_H | 9 | #define _ASM_S390_HUGETLB_H |
10 | 10 | ||
11 | #include <asm/page.h> | 11 | #include <asm/page.h> |
12 | #include <asm/pgtable.h> | 12 | #include <asm/pgtable.h> |
13 | 13 | ||
14 | 14 | ||
15 | #define is_hugepage_only_range(mm, addr, len) 0 | 15 | #define is_hugepage_only_range(mm, addr, len) 0 |
16 | #define hugetlb_free_pgd_range free_pgd_range | 16 | #define hugetlb_free_pgd_range free_pgd_range |
17 | 17 | ||
18 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 18 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
19 | pte_t *ptep, pte_t pte); | 19 | pte_t *ptep, pte_t pte); |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * If the arch doesn't supply something else, assume that hugepage | 22 | * If the arch doesn't supply something else, assume that hugepage |
23 | * size aligned regions are ok without further preparation. | 23 | * size aligned regions are ok without further preparation. |
24 | */ | 24 | */ |
25 | static inline int prepare_hugepage_range(struct file *file, | 25 | static inline int prepare_hugepage_range(struct file *file, |
26 | unsigned long addr, unsigned long len) | 26 | unsigned long addr, unsigned long len) |
27 | { | 27 | { |
28 | if (len & ~HPAGE_MASK) | 28 | if (len & ~HPAGE_MASK) |
29 | return -EINVAL; | 29 | return -EINVAL; |
30 | if (addr & ~HPAGE_MASK) | 30 | if (addr & ~HPAGE_MASK) |
31 | return -EINVAL; | 31 | return -EINVAL; |
32 | return 0; | 32 | return 0; |
33 | } | 33 | } |
34 | 34 | ||
35 | #define hugetlb_prefault_arch_hook(mm) do { } while (0) | 35 | #define hugetlb_prefault_arch_hook(mm) do { } while (0) |
36 | #define arch_clear_hugepage_flags(page) do { } while (0) | 36 | #define arch_clear_hugepage_flags(page) do { } while (0) |
37 | 37 | ||
38 | int arch_prepare_hugepage(struct page *page); | 38 | int arch_prepare_hugepage(struct page *page); |
39 | void arch_release_hugepage(struct page *page); | 39 | void arch_release_hugepage(struct page *page); |
40 | 40 | ||
41 | static inline pte_t huge_pte_wrprotect(pte_t pte) | 41 | static inline pte_t huge_pte_wrprotect(pte_t pte) |
42 | { | 42 | { |
43 | pte_val(pte) |= _PAGE_RO; | 43 | pte_val(pte) |= _PAGE_RO; |
44 | return pte; | 44 | return pte; |
45 | } | 45 | } |
46 | 46 | ||
47 | static inline int huge_pte_none(pte_t pte) | 47 | static inline int huge_pte_none(pte_t pte) |
48 | { | 48 | { |
49 | return (pte_val(pte) & _SEGMENT_ENTRY_INV) && | 49 | return (pte_val(pte) & _SEGMENT_ENTRY_INV) && |
50 | !(pte_val(pte) & _SEGMENT_ENTRY_RO); | 50 | !(pte_val(pte) & _SEGMENT_ENTRY_RO); |
51 | } | 51 | } |
52 | 52 | ||
53 | static inline pte_t huge_ptep_get(pte_t *ptep) | 53 | static inline pte_t huge_ptep_get(pte_t *ptep) |
54 | { | 54 | { |
55 | pte_t pte = *ptep; | 55 | pte_t pte = *ptep; |
56 | unsigned long mask; | 56 | unsigned long mask; |
57 | 57 | ||
58 | if (!MACHINE_HAS_HPAGE) { | 58 | if (!MACHINE_HAS_HPAGE) { |
59 | ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); | 59 | ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); |
60 | if (ptep) { | 60 | if (ptep) { |
61 | mask = pte_val(pte) & | 61 | mask = pte_val(pte) & |
62 | (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); | 62 | (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); |
63 | pte = pte_mkhuge(*ptep); | 63 | pte = pte_mkhuge(*ptep); |
64 | pte_val(pte) |= mask; | 64 | pte_val(pte) |= mask; |
65 | } | 65 | } |
66 | } | 66 | } |
67 | return pte; | 67 | return pte; |
68 | } | 68 | } |
69 | 69 | ||
70 | static inline void __pmd_csp(pmd_t *pmdp) | 70 | static inline void __pmd_csp(pmd_t *pmdp) |
71 | { | 71 | { |
72 | register unsigned long reg2 asm("2") = pmd_val(*pmdp); | 72 | register unsigned long reg2 asm("2") = pmd_val(*pmdp); |
73 | register unsigned long reg3 asm("3") = pmd_val(*pmdp) | | 73 | register unsigned long reg3 asm("3") = pmd_val(*pmdp) | |
74 | _SEGMENT_ENTRY_INV; | 74 | _SEGMENT_ENTRY_INV; |
75 | register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; | 75 | register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; |
76 | 76 | ||
77 | asm volatile( | 77 | asm volatile( |
78 | " csp %1,%3" | 78 | " csp %1,%3" |
79 | : "=m" (*pmdp) | 79 | : "=m" (*pmdp) |
80 | : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); | 80 | : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); |
81 | pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; | ||
82 | } | 81 | } |
83 | 82 | ||
84 | static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) | ||
85 | { | ||
86 | unsigned long sto = (unsigned long) pmdp - | ||
87 | pmd_index(address) * sizeof(pmd_t); | ||
88 | |||
89 | if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { | ||
90 | asm volatile( | ||
91 | " .insn rrf,0xb98e0000,%2,%3,0,0" | ||
92 | : "=m" (*pmdp) | ||
93 | : "m" (*pmdp), "a" (sto), | ||
94 | "a" ((address & HPAGE_MASK)) | ||
95 | ); | ||
96 | } | ||
97 | pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; | ||
98 | } | ||
99 | |||
100 | static inline void huge_ptep_invalidate(struct mm_struct *mm, | 83 | static inline void huge_ptep_invalidate(struct mm_struct *mm, |
101 | unsigned long address, pte_t *ptep) | 84 | unsigned long address, pte_t *ptep) |
102 | { | 85 | { |
103 | pmd_t *pmdp = (pmd_t *) ptep; | 86 | pmd_t *pmdp = (pmd_t *) ptep; |
104 | 87 | ||
105 | if (MACHINE_HAS_IDTE) | 88 | if (MACHINE_HAS_IDTE) |
106 | __pmd_idte(address, pmdp); | 89 | __pmd_idte(address, pmdp); |
107 | else | 90 | else |
108 | __pmd_csp(pmdp); | 91 | __pmd_csp(pmdp); |
92 | pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; | ||
109 | } | 93 | } |
110 | 94 | ||
111 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | 95 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, |
112 | unsigned long addr, pte_t *ptep) | 96 | unsigned long addr, pte_t *ptep) |
113 | { | 97 | { |
114 | pte_t pte = huge_ptep_get(ptep); | 98 | pte_t pte = huge_ptep_get(ptep); |
115 | 99 | ||
116 | huge_ptep_invalidate(mm, addr, ptep); | 100 | huge_ptep_invalidate(mm, addr, ptep); |
117 | return pte; | 101 | return pte; |
118 | } | 102 | } |
119 | 103 | ||
120 | #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ | 104 | #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ |
121 | ({ \ | 105 | ({ \ |
122 | int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ | 106 | int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ |
123 | if (__changed) { \ | 107 | if (__changed) { \ |
124 | huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ | 108 | huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ |
125 | set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ | 109 | set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ |
126 | } \ | 110 | } \ |
127 | __changed; \ | 111 | __changed; \ |
128 | }) | 112 | }) |
129 | 113 | ||
130 | #define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ | 114 | #define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ |
131 | ({ \ | 115 | ({ \ |
132 | pte_t __pte = huge_ptep_get(__ptep); \ | 116 | pte_t __pte = huge_ptep_get(__ptep); \ |
133 | if (pte_write(__pte)) { \ | 117 | if (pte_write(__pte)) { \ |
134 | huge_ptep_invalidate(__mm, __addr, __ptep); \ | 118 | huge_ptep_invalidate(__mm, __addr, __ptep); \ |
135 | set_huge_pte_at(__mm, __addr, __ptep, \ | 119 | set_huge_pte_at(__mm, __addr, __ptep, \ |
136 | huge_pte_wrprotect(__pte)); \ | 120 | huge_pte_wrprotect(__pte)); \ |
137 | } \ | 121 | } \ |
138 | }) | 122 | }) |
139 | 123 | ||
140 | static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, | 124 | static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, |
141 | unsigned long address, pte_t *ptep) | 125 | unsigned long address, pte_t *ptep) |
142 | { | 126 | { |
143 | huge_ptep_invalidate(vma->vm_mm, address, ptep); | 127 | huge_ptep_invalidate(vma->vm_mm, address, ptep); |
144 | } | 128 | } |
145 | 129 | ||
146 | #endif /* _ASM_S390_HUGETLB_H */ | 130 | #endif /* _ASM_S390_HUGETLB_H */ |
arch/s390/include/asm/pgtable.h
1 | /* | 1 | /* |
2 | * S390 version | 2 | * S390 version |
3 | * Copyright IBM Corp. 1999, 2000 | 3 | * Copyright IBM Corp. 1999, 2000 |
4 | * Author(s): Hartmut Penner (hp@de.ibm.com) | 4 | * Author(s): Hartmut Penner (hp@de.ibm.com) |
5 | * Ulrich Weigand (weigand@de.ibm.com) | 5 | * Ulrich Weigand (weigand@de.ibm.com) |
6 | * Martin Schwidefsky (schwidefsky@de.ibm.com) | 6 | * Martin Schwidefsky (schwidefsky@de.ibm.com) |
7 | * | 7 | * |
8 | * Derived from "include/asm-i386/pgtable.h" | 8 | * Derived from "include/asm-i386/pgtable.h" |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _ASM_S390_PGTABLE_H | 11 | #ifndef _ASM_S390_PGTABLE_H |
12 | #define _ASM_S390_PGTABLE_H | 12 | #define _ASM_S390_PGTABLE_H |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * The Linux memory management assumes a three-level page table setup. For | 15 | * The Linux memory management assumes a three-level page table setup. For |
16 | * s390 31 bit we "fold" the mid level into the top-level page table, so | 16 | * s390 31 bit we "fold" the mid level into the top-level page table, so |
17 | * that we physically have the same two-level page table as the s390 mmu | 17 | * that we physically have the same two-level page table as the s390 mmu |
18 | * expects in 31 bit mode. For s390 64 bit we use three of the five levels | 18 | * expects in 31 bit mode. For s390 64 bit we use three of the five levels |
19 | * the hardware provides (region first and region second tables are not | 19 | * the hardware provides (region first and region second tables are not |
20 | * used). | 20 | * used). |
21 | * | 21 | * |
22 | * The "pgd_xxx()" functions are trivial for a folded two-level | 22 | * The "pgd_xxx()" functions are trivial for a folded two-level |
23 | * setup: the pgd is never bad, and a pmd always exists (as it's folded | 23 | * setup: the pgd is never bad, and a pmd always exists (as it's folded |
24 | * into the pgd entry) | 24 | * into the pgd entry) |
25 | * | 25 | * |
26 | * This file contains the functions and defines necessary to modify and use | 26 | * This file contains the functions and defines necessary to modify and use |
27 | * the S390 page table tree. | 27 | * the S390 page table tree. |
28 | */ | 28 | */ |
29 | #ifndef __ASSEMBLY__ | 29 | #ifndef __ASSEMBLY__ |
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/mm_types.h> | 31 | #include <linux/mm_types.h> |
32 | #include <asm/bug.h> | 32 | #include <asm/bug.h> |
33 | #include <asm/page.h> | 33 | #include <asm/page.h> |
34 | 34 | ||
35 | extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); | 35 | extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); |
36 | extern void paging_init(void); | 36 | extern void paging_init(void); |
37 | extern void vmem_map_init(void); | 37 | extern void vmem_map_init(void); |
38 | extern void fault_init(void); | 38 | extern void fault_init(void); |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * The S390 doesn't have any external MMU info: the kernel page | 41 | * The S390 doesn't have any external MMU info: the kernel page |
42 | * tables contain all the necessary information. | 42 | * tables contain all the necessary information. |
43 | */ | 43 | */ |
44 | #define update_mmu_cache(vma, address, ptep) do { } while (0) | 44 | #define update_mmu_cache(vma, address, ptep) do { } while (0) |
45 | 45 | ||
46 | /* | 46 | /* |
47 | * ZERO_PAGE is a global shared page that is always zero; used | 47 | * ZERO_PAGE is a global shared page that is always zero; used |
48 | * for zero-mapped memory areas etc.. | 48 | * for zero-mapped memory areas etc.. |
49 | */ | 49 | */ |
50 | 50 | ||
51 | extern unsigned long empty_zero_page; | 51 | extern unsigned long empty_zero_page; |
52 | extern unsigned long zero_page_mask; | 52 | extern unsigned long zero_page_mask; |
53 | 53 | ||
54 | #define ZERO_PAGE(vaddr) \ | 54 | #define ZERO_PAGE(vaddr) \ |
55 | (virt_to_page((void *)(empty_zero_page + \ | 55 | (virt_to_page((void *)(empty_zero_page + \ |
56 | (((unsigned long)(vaddr)) &zero_page_mask)))) | 56 | (((unsigned long)(vaddr)) &zero_page_mask)))) |
57 | 57 | ||
58 | #define is_zero_pfn is_zero_pfn | 58 | #define is_zero_pfn is_zero_pfn |
59 | static inline int is_zero_pfn(unsigned long pfn) | 59 | static inline int is_zero_pfn(unsigned long pfn) |
60 | { | 60 | { |
61 | extern unsigned long zero_pfn; | 61 | extern unsigned long zero_pfn; |
62 | unsigned long offset_from_zero_pfn = pfn - zero_pfn; | 62 | unsigned long offset_from_zero_pfn = pfn - zero_pfn; |
63 | return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); | 63 | return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); |
64 | } | 64 | } |
65 | 65 | ||
66 | #define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) | 66 | #define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) |
67 | 67 | ||
68 | #endif /* !__ASSEMBLY__ */ | 68 | #endif /* !__ASSEMBLY__ */ |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * PMD_SHIFT determines the size of the area a second-level page | 71 | * PMD_SHIFT determines the size of the area a second-level page |
72 | * table can map | 72 | * table can map |
73 | * PGDIR_SHIFT determines what a third-level page table entry can map | 73 | * PGDIR_SHIFT determines what a third-level page table entry can map |
74 | */ | 74 | */ |
75 | #ifndef CONFIG_64BIT | 75 | #ifndef CONFIG_64BIT |
76 | # define PMD_SHIFT 20 | 76 | # define PMD_SHIFT 20 |
77 | # define PUD_SHIFT 20 | 77 | # define PUD_SHIFT 20 |
78 | # define PGDIR_SHIFT 20 | 78 | # define PGDIR_SHIFT 20 |
79 | #else /* CONFIG_64BIT */ | 79 | #else /* CONFIG_64BIT */ |
80 | # define PMD_SHIFT 20 | 80 | # define PMD_SHIFT 20 |
81 | # define PUD_SHIFT 31 | 81 | # define PUD_SHIFT 31 |
82 | # define PGDIR_SHIFT 42 | 82 | # define PGDIR_SHIFT 42 |
83 | #endif /* CONFIG_64BIT */ | 83 | #endif /* CONFIG_64BIT */ |
84 | 84 | ||
85 | #define PMD_SIZE (1UL << PMD_SHIFT) | 85 | #define PMD_SIZE (1UL << PMD_SHIFT) |
86 | #define PMD_MASK (~(PMD_SIZE-1)) | 86 | #define PMD_MASK (~(PMD_SIZE-1)) |
87 | #define PUD_SIZE (1UL << PUD_SHIFT) | 87 | #define PUD_SIZE (1UL << PUD_SHIFT) |
88 | #define PUD_MASK (~(PUD_SIZE-1)) | 88 | #define PUD_MASK (~(PUD_SIZE-1)) |
89 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) | 89 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
90 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 90 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * entries per page directory level: the S390 is two-level, so | 93 | * entries per page directory level: the S390 is two-level, so |
94 | * we don't really have any PMD directory physically. | 94 | * we don't really have any PMD directory physically. |
95 | * for S390 segment-table entries are combined to one PGD | 95 | * for S390 segment-table entries are combined to one PGD |
96 | * that leads to 1024 pte per pgd | 96 | * that leads to 1024 pte per pgd |
97 | */ | 97 | */ |
98 | #define PTRS_PER_PTE 256 | 98 | #define PTRS_PER_PTE 256 |
99 | #ifndef CONFIG_64BIT | 99 | #ifndef CONFIG_64BIT |
100 | #define PTRS_PER_PMD 1 | 100 | #define PTRS_PER_PMD 1 |
101 | #define PTRS_PER_PUD 1 | 101 | #define PTRS_PER_PUD 1 |
102 | #else /* CONFIG_64BIT */ | 102 | #else /* CONFIG_64BIT */ |
103 | #define PTRS_PER_PMD 2048 | 103 | #define PTRS_PER_PMD 2048 |
104 | #define PTRS_PER_PUD 2048 | 104 | #define PTRS_PER_PUD 2048 |
105 | #endif /* CONFIG_64BIT */ | 105 | #endif /* CONFIG_64BIT */ |
106 | #define PTRS_PER_PGD 2048 | 106 | #define PTRS_PER_PGD 2048 |
107 | 107 | ||
108 | #define FIRST_USER_ADDRESS 0 | 108 | #define FIRST_USER_ADDRESS 0 |
109 | 109 | ||
110 | #define pte_ERROR(e) \ | 110 | #define pte_ERROR(e) \ |
111 | printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) | 111 | printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) |
112 | #define pmd_ERROR(e) \ | 112 | #define pmd_ERROR(e) \ |
113 | printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) | 113 | printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) |
114 | #define pud_ERROR(e) \ | 114 | #define pud_ERROR(e) \ |
115 | printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) | 115 | printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) |
116 | #define pgd_ERROR(e) \ | 116 | #define pgd_ERROR(e) \ |
117 | printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) | 117 | printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) |
118 | 118 | ||
119 | #ifndef __ASSEMBLY__ | 119 | #ifndef __ASSEMBLY__ |
120 | /* | 120 | /* |
121 | * The vmalloc area will always be on the topmost area of the kernel | 121 | * The vmalloc area will always be on the topmost area of the kernel |
122 | * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc, | 122 | * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc, |
123 | * which should be enough for any sane case. | 123 | * which should be enough for any sane case. |
124 | * By putting vmalloc at the top, we maximise the gap between physical | 124 | * By putting vmalloc at the top, we maximise the gap between physical |
125 | * memory and vmalloc to catch misplaced memory accesses. As a side | 125 | * memory and vmalloc to catch misplaced memory accesses. As a side |
126 | * effect, this also makes sure that 64 bit module code cannot be used | 126 | * effect, this also makes sure that 64 bit module code cannot be used |
127 | * as system call address. | 127 | * as system call address. |
128 | */ | 128 | */ |
129 | extern unsigned long VMALLOC_START; | 129 | extern unsigned long VMALLOC_START; |
130 | extern unsigned long VMALLOC_END; | 130 | extern unsigned long VMALLOC_END; |
131 | extern struct page *vmemmap; | 131 | extern struct page *vmemmap; |
132 | 132 | ||
133 | #define VMEM_MAX_PHYS ((unsigned long) vmemmap) | 133 | #define VMEM_MAX_PHYS ((unsigned long) vmemmap) |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * A 31 bit pagetable entry of S390 has following format: | 136 | * A 31 bit pagetable entry of S390 has following format: |
137 | * | PFRA | | OS | | 137 | * | PFRA | | OS | |
138 | * 0 0IP0 | 138 | * 0 0IP0 |
139 | * 00000000001111111111222222222233 | 139 | * 00000000001111111111222222222233 |
140 | * 01234567890123456789012345678901 | 140 | * 01234567890123456789012345678901 |
141 | * | 141 | * |
142 | * I Page-Invalid Bit: Page is not available for address-translation | 142 | * I Page-Invalid Bit: Page is not available for address-translation |
143 | * P Page-Protection Bit: Store access not possible for page | 143 | * P Page-Protection Bit: Store access not possible for page |
144 | * | 144 | * |
145 | * A 31 bit segmenttable entry of S390 has following format: | 145 | * A 31 bit segmenttable entry of S390 has following format: |
146 | * | P-table origin | |PTL | 146 | * | P-table origin | |PTL |
147 | * 0 IC | 147 | * 0 IC |
148 | * 00000000001111111111222222222233 | 148 | * 00000000001111111111222222222233 |
149 | * 01234567890123456789012345678901 | 149 | * 01234567890123456789012345678901 |
150 | * | 150 | * |
151 | * I Segment-Invalid Bit: Segment is not available for address-translation | 151 | * I Segment-Invalid Bit: Segment is not available for address-translation |
152 | * C Common-Segment Bit: Segment is not private (PoP 3-30) | 152 | * C Common-Segment Bit: Segment is not private (PoP 3-30) |
153 | * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) | 153 | * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) |
154 | * | 154 | * |
155 | * The 31 bit segmenttable origin of S390 has following format: | 155 | * The 31 bit segmenttable origin of S390 has following format: |
156 | * | 156 | * |
157 | * |S-table origin | | STL | | 157 | * |S-table origin | | STL | |
158 | * X **GPS | 158 | * X **GPS |
159 | * 00000000001111111111222222222233 | 159 | * 00000000001111111111222222222233 |
160 | * 01234567890123456789012345678901 | 160 | * 01234567890123456789012345678901 |
161 | * | 161 | * |
162 | * X Space-Switch event: | 162 | * X Space-Switch event: |
163 | * G Segment-Invalid Bit: * | 163 | * G Segment-Invalid Bit: * |
164 | * P Private-Space Bit: Segment is not private (PoP 3-30) | 164 | * P Private-Space Bit: Segment is not private (PoP 3-30) |
165 | * S Storage-Alteration: | 165 | * S Storage-Alteration: |
166 | * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) | 166 | * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) |
167 | * | 167 | * |
168 | * A 64 bit pagetable entry of S390 has following format: | 168 | * A 64 bit pagetable entry of S390 has following format: |
169 | * | PFRA |0IPC| OS | | 169 | * | PFRA |0IPC| OS | |
170 | * 0000000000111111111122222222223333333333444444444455555555556666 | 170 | * 0000000000111111111122222222223333333333444444444455555555556666 |
171 | * 0123456789012345678901234567890123456789012345678901234567890123 | 171 | * 0123456789012345678901234567890123456789012345678901234567890123 |
172 | * | 172 | * |
173 | * I Page-Invalid Bit: Page is not available for address-translation | 173 | * I Page-Invalid Bit: Page is not available for address-translation |
174 | * P Page-Protection Bit: Store access not possible for page | 174 | * P Page-Protection Bit: Store access not possible for page |
175 | * C Change-bit override: HW is not required to set change bit | 175 | * C Change-bit override: HW is not required to set change bit |
176 | * | 176 | * |
177 | * A 64 bit segmenttable entry of S390 has following format: | 177 | * A 64 bit segmenttable entry of S390 has following format: |
178 | * | P-table origin | TT | 178 | * | P-table origin | TT |
179 | * 0000000000111111111122222222223333333333444444444455555555556666 | 179 | * 0000000000111111111122222222223333333333444444444455555555556666 |
180 | * 0123456789012345678901234567890123456789012345678901234567890123 | 180 | * 0123456789012345678901234567890123456789012345678901234567890123 |
181 | * | 181 | * |
182 | * I Segment-Invalid Bit: Segment is not available for address-translation | 182 | * I Segment-Invalid Bit: Segment is not available for address-translation |
183 | * C Common-Segment Bit: Segment is not private (PoP 3-30) | 183 | * C Common-Segment Bit: Segment is not private (PoP 3-30) |
184 | * P Page-Protection Bit: Store access not possible for page | 184 | * P Page-Protection Bit: Store access not possible for page |
185 | * TT Type 00 | 185 | * TT Type 00 |
186 | * | 186 | * |
187 | * A 64 bit region table entry of S390 has following format: | 187 | * A 64 bit region table entry of S390 has following format: |
188 | * | S-table origin | TF TTTL | 188 | * | S-table origin | TF TTTL |
189 | * 0000000000111111111122222222223333333333444444444455555555556666 | 189 | * 0000000000111111111122222222223333333333444444444455555555556666 |
190 | * 0123456789012345678901234567890123456789012345678901234567890123 | 190 | * 0123456789012345678901234567890123456789012345678901234567890123 |
191 | * | 191 | * |
192 | * I Segment-Invalid Bit: Segment is not available for address-translation | 192 | * I Segment-Invalid Bit: Segment is not available for address-translation |
193 | * TT Type 01 | 193 | * TT Type 01 |
194 | * TF | 194 | * TF |
195 | * TL Table length | 195 | * TL Table length |
196 | * | 196 | * |
197 | * The 64 bit regiontable origin of S390 has following format: | 197 | * The 64 bit regiontable origin of S390 has following format: |
198 | * | region table origon | DTTL | 198 | * | region table origon | DTTL |
199 | * 0000000000111111111122222222223333333333444444444455555555556666 | 199 | * 0000000000111111111122222222223333333333444444444455555555556666 |
200 | * 0123456789012345678901234567890123456789012345678901234567890123 | 200 | * 0123456789012345678901234567890123456789012345678901234567890123 |
201 | * | 201 | * |
202 | * X Space-Switch event: | 202 | * X Space-Switch event: |
203 | * G Segment-Invalid Bit: | 203 | * G Segment-Invalid Bit: |
204 | * P Private-Space Bit: | 204 | * P Private-Space Bit: |
205 | * S Storage-Alteration: | 205 | * S Storage-Alteration: |
206 | * R Real space | 206 | * R Real space |
207 | * TL Table-Length: | 207 | * TL Table-Length: |
208 | * | 208 | * |
209 | * A storage key has the following format: | 209 | * A storage key has the following format: |
210 | * | ACC |F|R|C|0| | 210 | * | ACC |F|R|C|0| |
211 | * 0 3 4 5 6 7 | 211 | * 0 3 4 5 6 7 |
212 | * ACC: access key | 212 | * ACC: access key |
213 | * F : fetch protection bit | 213 | * F : fetch protection bit |
214 | * R : referenced bit | 214 | * R : referenced bit |
215 | * C : changed bit | 215 | * C : changed bit |
216 | */ | 216 | */ |
217 | 217 | ||
218 | /* Hardware bits in the page table entry */ | 218 | /* Hardware bits in the page table entry */ |
219 | #define _PAGE_CO 0x100 /* HW Change-bit override */ | 219 | #define _PAGE_CO 0x100 /* HW Change-bit override */ |
220 | #define _PAGE_RO 0x200 /* HW read-only bit */ | 220 | #define _PAGE_RO 0x200 /* HW read-only bit */ |
221 | #define _PAGE_INVALID 0x400 /* HW invalid bit */ | 221 | #define _PAGE_INVALID 0x400 /* HW invalid bit */ |
222 | 222 | ||
223 | /* Software bits in the page table entry */ | 223 | /* Software bits in the page table entry */ |
224 | #define _PAGE_SWT 0x001 /* SW pte type bit t */ | 224 | #define _PAGE_SWT 0x001 /* SW pte type bit t */ |
225 | #define _PAGE_SWX 0x002 /* SW pte type bit x */ | 225 | #define _PAGE_SWX 0x002 /* SW pte type bit x */ |
226 | #define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ | 226 | #define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ |
227 | #define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ | 227 | #define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ |
228 | #define _PAGE_SPECIAL 0x010 /* SW associated with special page */ | 228 | #define _PAGE_SPECIAL 0x010 /* SW associated with special page */ |
229 | #define __HAVE_ARCH_PTE_SPECIAL | 229 | #define __HAVE_ARCH_PTE_SPECIAL |
230 | 230 | ||
231 | /* Set of bits not changed in pte_modify */ | 231 | /* Set of bits not changed in pte_modify */ |
232 | #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) | 232 | #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) |
233 | 233 | ||
234 | /* Six different types of pages. */ | 234 | /* Six different types of pages. */ |
235 | #define _PAGE_TYPE_EMPTY 0x400 | 235 | #define _PAGE_TYPE_EMPTY 0x400 |
236 | #define _PAGE_TYPE_NONE 0x401 | 236 | #define _PAGE_TYPE_NONE 0x401 |
237 | #define _PAGE_TYPE_SWAP 0x403 | 237 | #define _PAGE_TYPE_SWAP 0x403 |
238 | #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ | 238 | #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ |
239 | #define _PAGE_TYPE_RO 0x200 | 239 | #define _PAGE_TYPE_RO 0x200 |
240 | #define _PAGE_TYPE_RW 0x000 | 240 | #define _PAGE_TYPE_RW 0x000 |
241 | 241 | ||
242 | /* | 242 | /* |
243 | * Only four types for huge pages, using the invalid bit and protection bit | 243 | * Only four types for huge pages, using the invalid bit and protection bit |
244 | * of a segment table entry. | 244 | * of a segment table entry. |
245 | */ | 245 | */ |
246 | #define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ | 246 | #define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ |
247 | #define _HPAGE_TYPE_NONE 0x220 | 247 | #define _HPAGE_TYPE_NONE 0x220 |
248 | #define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ | 248 | #define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ |
249 | #define _HPAGE_TYPE_RW 0x000 | 249 | #define _HPAGE_TYPE_RW 0x000 |
250 | 250 | ||
251 | /* | 251 | /* |
252 | * PTE type bits are rather complicated. handle_pte_fault uses pte_present, | 252 | * PTE type bits are rather complicated. handle_pte_fault uses pte_present, |
253 | * pte_none and pte_file to find out the pte type WITHOUT holding the page | 253 | * pte_none and pte_file to find out the pte type WITHOUT holding the page |
254 | * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to | 254 | * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to |
255 | * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs | 255 | * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs |
256 | * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. | 256 | * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. |
257 | * This change is done while holding the lock, but the intermediate step | 257 | * This change is done while holding the lock, but the intermediate step |
258 | * of a previously valid pte with the hw invalid bit set can be observed by | 258 | * of a previously valid pte with the hw invalid bit set can be observed by |
259 | * handle_pte_fault. That makes it necessary that all valid pte types with | 259 | * handle_pte_fault. That makes it necessary that all valid pte types with |
260 | * the hw invalid bit set must be distinguishable from the four pte types | 260 | * the hw invalid bit set must be distinguishable from the four pte types |
261 | * empty, none, swap and file. | 261 | * empty, none, swap and file. |
262 | * | 262 | * |
263 | * irxt ipte irxt | 263 | * irxt ipte irxt |
264 | * _PAGE_TYPE_EMPTY 1000 -> 1000 | 264 | * _PAGE_TYPE_EMPTY 1000 -> 1000 |
265 | * _PAGE_TYPE_NONE 1001 -> 1001 | 265 | * _PAGE_TYPE_NONE 1001 -> 1001 |
266 | * _PAGE_TYPE_SWAP 1011 -> 1011 | 266 | * _PAGE_TYPE_SWAP 1011 -> 1011 |
267 | * _PAGE_TYPE_FILE 11?1 -> 11?1 | 267 | * _PAGE_TYPE_FILE 11?1 -> 11?1 |
268 | * _PAGE_TYPE_RO 0100 -> 1100 | 268 | * _PAGE_TYPE_RO 0100 -> 1100 |
269 | * _PAGE_TYPE_RW 0000 -> 1000 | 269 | * _PAGE_TYPE_RW 0000 -> 1000 |
270 | * | 270 | * |
271 | * pte_none is true for bits combinations 1000, 1010, 1100, 1110 | 271 | * pte_none is true for bits combinations 1000, 1010, 1100, 1110 |
272 | * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 | 272 | * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 |
273 | * pte_file is true for bits combinations 1101, 1111 | 273 | * pte_file is true for bits combinations 1101, 1111 |
274 | * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. | 274 | * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. |
275 | */ | 275 | */ |
276 | 276 | ||
277 | #ifndef CONFIG_64BIT | 277 | #ifndef CONFIG_64BIT |
278 | 278 | ||
279 | /* Bits in the segment table address-space-control-element */ | 279 | /* Bits in the segment table address-space-control-element */ |
280 | #define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */ | 280 | #define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */ |
281 | #define _ASCE_ORIGIN_MASK 0x7ffff000UL /* segment table origin */ | 281 | #define _ASCE_ORIGIN_MASK 0x7ffff000UL /* segment table origin */ |
282 | #define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ | 282 | #define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ |
283 | #define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ | 283 | #define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ |
284 | #define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ | 284 | #define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ |
285 | 285 | ||
286 | /* Bits in the segment table entry */ | 286 | /* Bits in the segment table entry */ |
287 | #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ | 287 | #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ |
288 | #define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ | 288 | #define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ |
289 | #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ | 289 | #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ |
290 | #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ | 290 | #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ |
291 | #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ | 291 | #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ |
292 | 292 | ||
293 | #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) | 293 | #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) |
294 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) | 294 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) |
295 | 295 | ||
296 | /* Page status table bits for virtualization */ | 296 | /* Page status table bits for virtualization */ |
297 | #define RCP_ACC_BITS 0xf0000000UL | 297 | #define RCP_ACC_BITS 0xf0000000UL |
298 | #define RCP_FP_BIT 0x08000000UL | 298 | #define RCP_FP_BIT 0x08000000UL |
299 | #define RCP_PCL_BIT 0x00800000UL | 299 | #define RCP_PCL_BIT 0x00800000UL |
300 | #define RCP_HR_BIT 0x00400000UL | 300 | #define RCP_HR_BIT 0x00400000UL |
301 | #define RCP_HC_BIT 0x00200000UL | 301 | #define RCP_HC_BIT 0x00200000UL |
302 | #define RCP_GR_BIT 0x00040000UL | 302 | #define RCP_GR_BIT 0x00040000UL |
303 | #define RCP_GC_BIT 0x00020000UL | 303 | #define RCP_GC_BIT 0x00020000UL |
304 | 304 | ||
305 | /* User dirty / referenced bit for KVM's migration feature */ | 305 | /* User dirty / referenced bit for KVM's migration feature */ |
306 | #define KVM_UR_BIT 0x00008000UL | 306 | #define KVM_UR_BIT 0x00008000UL |
307 | #define KVM_UC_BIT 0x00004000UL | 307 | #define KVM_UC_BIT 0x00004000UL |
308 | 308 | ||
309 | #else /* CONFIG_64BIT */ | 309 | #else /* CONFIG_64BIT */ |
310 | 310 | ||
311 | /* Bits in the segment/region table address-space-control-element */ | 311 | /* Bits in the segment/region table address-space-control-element */ |
312 | #define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ | 312 | #define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ |
313 | #define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ | 313 | #define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ |
314 | #define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ | 314 | #define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ |
315 | #define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ | 315 | #define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ |
316 | #define _ASCE_REAL_SPACE 0x20 /* real space control */ | 316 | #define _ASCE_REAL_SPACE 0x20 /* real space control */ |
317 | #define _ASCE_TYPE_MASK 0x0c /* asce table type mask */ | 317 | #define _ASCE_TYPE_MASK 0x0c /* asce table type mask */ |
318 | #define _ASCE_TYPE_REGION1 0x0c /* region first table type */ | 318 | #define _ASCE_TYPE_REGION1 0x0c /* region first table type */ |
319 | #define _ASCE_TYPE_REGION2 0x08 /* region second table type */ | 319 | #define _ASCE_TYPE_REGION2 0x08 /* region second table type */ |
320 | #define _ASCE_TYPE_REGION3 0x04 /* region third table type */ | 320 | #define _ASCE_TYPE_REGION3 0x04 /* region third table type */ |
321 | #define _ASCE_TYPE_SEGMENT 0x00 /* segment table type */ | 321 | #define _ASCE_TYPE_SEGMENT 0x00 /* segment table type */ |
322 | #define _ASCE_TABLE_LENGTH 0x03 /* region table length */ | 322 | #define _ASCE_TABLE_LENGTH 0x03 /* region table length */ |
323 | 323 | ||
324 | /* Bits in the region table entry */ | 324 | /* Bits in the region table entry */ |
325 | #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ | 325 | #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ |
326 | #define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ | 326 | #define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ |
327 | #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ | 327 | #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ |
328 | #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ | 328 | #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ |
329 | #define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ | 329 | #define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ |
330 | #define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */ | 330 | #define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */ |
331 | #define _REGION_ENTRY_LENGTH 0x03 /* region third length */ | 331 | #define _REGION_ENTRY_LENGTH 0x03 /* region third length */ |
332 | 332 | ||
333 | #define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) | 333 | #define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) |
334 | #define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) | 334 | #define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) |
335 | #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) | 335 | #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) |
336 | #define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) | 336 | #define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) |
337 | #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) | 337 | #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) |
338 | #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) | 338 | #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) |
339 | 339 | ||
340 | /* Bits in the segment table entry */ | 340 | /* Bits in the segment table entry */ |
341 | #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ | 341 | #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ |
342 | #define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ | 342 | #define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ |
343 | #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ | 343 | #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ |
344 | 344 | ||
345 | #define _SEGMENT_ENTRY (0) | 345 | #define _SEGMENT_ENTRY (0) |
346 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) | 346 | #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) |
347 | 347 | ||
348 | #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ | 348 | #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ |
349 | #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ | 349 | #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ |
350 | #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ | 350 | #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ |
351 | #define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) | 351 | #define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) |
352 | 352 | ||
353 | /* Set of bits not changed in pmd_modify */ | ||
354 | #define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ | ||
355 | | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) | ||
356 | |||
353 | /* Page status table bits for virtualization */ | 357 | /* Page status table bits for virtualization */ |
354 | #define RCP_ACC_BITS 0xf000000000000000UL | 358 | #define RCP_ACC_BITS 0xf000000000000000UL |
355 | #define RCP_FP_BIT 0x0800000000000000UL | 359 | #define RCP_FP_BIT 0x0800000000000000UL |
356 | #define RCP_PCL_BIT 0x0080000000000000UL | 360 | #define RCP_PCL_BIT 0x0080000000000000UL |
357 | #define RCP_HR_BIT 0x0040000000000000UL | 361 | #define RCP_HR_BIT 0x0040000000000000UL |
358 | #define RCP_HC_BIT 0x0020000000000000UL | 362 | #define RCP_HC_BIT 0x0020000000000000UL |
359 | #define RCP_GR_BIT 0x0004000000000000UL | 363 | #define RCP_GR_BIT 0x0004000000000000UL |
360 | #define RCP_GC_BIT 0x0002000000000000UL | 364 | #define RCP_GC_BIT 0x0002000000000000UL |
361 | 365 | ||
362 | /* User dirty / referenced bit for KVM's migration feature */ | 366 | /* User dirty / referenced bit for KVM's migration feature */ |
363 | #define KVM_UR_BIT 0x0000800000000000UL | 367 | #define KVM_UR_BIT 0x0000800000000000UL |
364 | #define KVM_UC_BIT 0x0000400000000000UL | 368 | #define KVM_UC_BIT 0x0000400000000000UL |
365 | 369 | ||
366 | #endif /* CONFIG_64BIT */ | 370 | #endif /* CONFIG_64BIT */ |
367 | 371 | ||
368 | /* | 372 | /* |
369 | * A user page table pointer has the space-switch-event bit, the | 373 | * A user page table pointer has the space-switch-event bit, the |
370 | * private-space-control bit and the storage-alteration-event-control | 374 | * private-space-control bit and the storage-alteration-event-control |
371 | * bit set. A kernel page table pointer doesn't need them. | 375 | * bit set. A kernel page table pointer doesn't need them. |
372 | */ | 376 | */ |
373 | #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ | 377 | #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ |
374 | _ASCE_ALT_EVENT) | 378 | _ASCE_ALT_EVENT) |
375 | 379 | ||
376 | /* | 380 | /* |
377 | * Page protection definitions. | 381 | * Page protection definitions. |
378 | */ | 382 | */ |
379 | #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) | 383 | #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) |
380 | #define PAGE_RO __pgprot(_PAGE_TYPE_RO) | 384 | #define PAGE_RO __pgprot(_PAGE_TYPE_RO) |
381 | #define PAGE_RW __pgprot(_PAGE_TYPE_RW) | 385 | #define PAGE_RW __pgprot(_PAGE_TYPE_RW) |
382 | 386 | ||
383 | #define PAGE_KERNEL PAGE_RW | 387 | #define PAGE_KERNEL PAGE_RW |
384 | #define PAGE_COPY PAGE_RO | 388 | #define PAGE_COPY PAGE_RO |
385 | 389 | ||
386 | /* | 390 | /* |
387 | * On s390 the page table entry has an invalid bit and a read-only bit. | 391 | * On s390 the page table entry has an invalid bit and a read-only bit. |
388 | * Read permission implies execute permission and write permission | 392 | * Read permission implies execute permission and write permission |
389 | * implies read permission. | 393 | * implies read permission. |
390 | */ | 394 | */ |
391 | /*xwr*/ | 395 | /*xwr*/ |
392 | #define __P000 PAGE_NONE | 396 | #define __P000 PAGE_NONE |
393 | #define __P001 PAGE_RO | 397 | #define __P001 PAGE_RO |
394 | #define __P010 PAGE_RO | 398 | #define __P010 PAGE_RO |
395 | #define __P011 PAGE_RO | 399 | #define __P011 PAGE_RO |
396 | #define __P100 PAGE_RO | 400 | #define __P100 PAGE_RO |
397 | #define __P101 PAGE_RO | 401 | #define __P101 PAGE_RO |
398 | #define __P110 PAGE_RO | 402 | #define __P110 PAGE_RO |
399 | #define __P111 PAGE_RO | 403 | #define __P111 PAGE_RO |
400 | 404 | ||
401 | #define __S000 PAGE_NONE | 405 | #define __S000 PAGE_NONE |
402 | #define __S001 PAGE_RO | 406 | #define __S001 PAGE_RO |
403 | #define __S010 PAGE_RW | 407 | #define __S010 PAGE_RW |
404 | #define __S011 PAGE_RW | 408 | #define __S011 PAGE_RW |
405 | #define __S100 PAGE_RO | 409 | #define __S100 PAGE_RO |
406 | #define __S101 PAGE_RO | 410 | #define __S101 PAGE_RO |
407 | #define __S110 PAGE_RW | 411 | #define __S110 PAGE_RW |
408 | #define __S111 PAGE_RW | 412 | #define __S111 PAGE_RW |
409 | 413 | ||
410 | static inline int mm_exclusive(struct mm_struct *mm) | 414 | static inline int mm_exclusive(struct mm_struct *mm) |
411 | { | 415 | { |
412 | return likely(mm == current->active_mm && | 416 | return likely(mm == current->active_mm && |
413 | atomic_read(&mm->context.attach_count) <= 1); | 417 | atomic_read(&mm->context.attach_count) <= 1); |
414 | } | 418 | } |
415 | 419 | ||
416 | static inline int mm_has_pgste(struct mm_struct *mm) | 420 | static inline int mm_has_pgste(struct mm_struct *mm) |
417 | { | 421 | { |
418 | #ifdef CONFIG_PGSTE | 422 | #ifdef CONFIG_PGSTE |
419 | if (unlikely(mm->context.has_pgste)) | 423 | if (unlikely(mm->context.has_pgste)) |
420 | return 1; | 424 | return 1; |
421 | #endif | 425 | #endif |
422 | return 0; | 426 | return 0; |
423 | } | 427 | } |
424 | /* | 428 | /* |
425 | * pgd/pmd/pte query functions | 429 | * pgd/pmd/pte query functions |
426 | */ | 430 | */ |
427 | #ifndef CONFIG_64BIT | 431 | #ifndef CONFIG_64BIT |
428 | 432 | ||
429 | static inline int pgd_present(pgd_t pgd) { return 1; } | 433 | static inline int pgd_present(pgd_t pgd) { return 1; } |
430 | static inline int pgd_none(pgd_t pgd) { return 0; } | 434 | static inline int pgd_none(pgd_t pgd) { return 0; } |
431 | static inline int pgd_bad(pgd_t pgd) { return 0; } | 435 | static inline int pgd_bad(pgd_t pgd) { return 0; } |
432 | 436 | ||
433 | static inline int pud_present(pud_t pud) { return 1; } | 437 | static inline int pud_present(pud_t pud) { return 1; } |
434 | static inline int pud_none(pud_t pud) { return 0; } | 438 | static inline int pud_none(pud_t pud) { return 0; } |
435 | static inline int pud_bad(pud_t pud) { return 0; } | 439 | static inline int pud_bad(pud_t pud) { return 0; } |
436 | 440 | ||
437 | #else /* CONFIG_64BIT */ | 441 | #else /* CONFIG_64BIT */ |
438 | 442 | ||
439 | static inline int pgd_present(pgd_t pgd) | 443 | static inline int pgd_present(pgd_t pgd) |
440 | { | 444 | { |
441 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | 445 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) |
442 | return 1; | 446 | return 1; |
443 | return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; | 447 | return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; |
444 | } | 448 | } |
445 | 449 | ||
446 | static inline int pgd_none(pgd_t pgd) | 450 | static inline int pgd_none(pgd_t pgd) |
447 | { | 451 | { |
448 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) | 452 | if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) |
449 | return 0; | 453 | return 0; |
450 | return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; | 454 | return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; |
451 | } | 455 | } |
452 | 456 | ||
453 | static inline int pgd_bad(pgd_t pgd) | 457 | static inline int pgd_bad(pgd_t pgd) |
454 | { | 458 | { |
455 | /* | 459 | /* |
456 | * With dynamic page table levels the pgd can be a region table | 460 | * With dynamic page table levels the pgd can be a region table |
457 | * entry or a segment table entry. Check for the bit that are | 461 | * entry or a segment table entry. Check for the bit that are |
458 | * invalid for either table entry. | 462 | * invalid for either table entry. |
459 | */ | 463 | */ |
460 | unsigned long mask = | 464 | unsigned long mask = |
461 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & | 465 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & |
462 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; | 466 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; |
463 | return (pgd_val(pgd) & mask) != 0; | 467 | return (pgd_val(pgd) & mask) != 0; |
464 | } | 468 | } |
465 | 469 | ||
466 | static inline int pud_present(pud_t pud) | 470 | static inline int pud_present(pud_t pud) |
467 | { | 471 | { |
468 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) | 472 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) |
469 | return 1; | 473 | return 1; |
470 | return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; | 474 | return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; |
471 | } | 475 | } |
472 | 476 | ||
473 | static inline int pud_none(pud_t pud) | 477 | static inline int pud_none(pud_t pud) |
474 | { | 478 | { |
475 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) | 479 | if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) |
476 | return 0; | 480 | return 0; |
477 | return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; | 481 | return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; |
478 | } | 482 | } |
479 | 483 | ||
480 | static inline int pud_bad(pud_t pud) | 484 | static inline int pud_bad(pud_t pud) |
481 | { | 485 | { |
482 | /* | 486 | /* |
483 | * With dynamic page table levels the pud can be a region table | 487 | * With dynamic page table levels the pud can be a region table |
484 | * entry or a segment table entry. Check for the bit that are | 488 | * entry or a segment table entry. Check for the bit that are |
485 | * invalid for either table entry. | 489 | * invalid for either table entry. |
486 | */ | 490 | */ |
487 | unsigned long mask = | 491 | unsigned long mask = |
488 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & | 492 | ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & |
489 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; | 493 | ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; |
490 | return (pud_val(pud) & mask) != 0; | 494 | return (pud_val(pud) & mask) != 0; |
491 | } | 495 | } |
492 | 496 | ||
493 | #endif /* CONFIG_64BIT */ | 497 | #endif /* CONFIG_64BIT */ |
494 | 498 | ||
495 | static inline int pmd_present(pmd_t pmd) | 499 | static inline int pmd_present(pmd_t pmd) |
496 | { | 500 | { |
497 | return (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) != 0UL; | 501 | return (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) != 0UL; |
498 | } | 502 | } |
499 | 503 | ||
500 | static inline int pmd_none(pmd_t pmd) | 504 | static inline int pmd_none(pmd_t pmd) |
501 | { | 505 | { |
502 | return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) != 0UL; | 506 | return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) != 0UL; |
503 | } | 507 | } |
504 | 508 | ||
505 | static inline int pmd_bad(pmd_t pmd) | 509 | static inline int pmd_bad(pmd_t pmd) |
506 | { | 510 | { |
507 | unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; | 511 | unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; |
508 | return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; | 512 | return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; |
509 | } | 513 | } |
510 | 514 | ||
511 | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | 515 | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH |
512 | extern void pmdp_splitting_flush(struct vm_area_struct *vma, | 516 | extern void pmdp_splitting_flush(struct vm_area_struct *vma, |
513 | unsigned long addr, pmd_t *pmdp); | 517 | unsigned long addr, pmd_t *pmdp); |
514 | 518 | ||
519 | #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS | ||
520 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
521 | unsigned long address, pmd_t *pmdp, | ||
522 | pmd_t entry, int dirty); | ||
523 | |||
524 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH | ||
525 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
526 | unsigned long address, pmd_t *pmdp); | ||
527 | |||
528 | #define __HAVE_ARCH_PMD_WRITE | ||
529 | static inline int pmd_write(pmd_t pmd) | ||
530 | { | ||
531 | return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0; | ||
532 | } | ||
533 | |||
534 | static inline int pmd_young(pmd_t pmd) | ||
535 | { | ||
536 | return 0; | ||
537 | } | ||
538 | |||
515 | static inline int pte_none(pte_t pte) | 539 | static inline int pte_none(pte_t pte) |
516 | { | 540 | { |
517 | return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); | 541 | return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); |
518 | } | 542 | } |
519 | 543 | ||
520 | static inline int pte_present(pte_t pte) | 544 | static inline int pte_present(pte_t pte) |
521 | { | 545 | { |
522 | unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; | 546 | unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; |
523 | return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || | 547 | return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || |
524 | (!(pte_val(pte) & _PAGE_INVALID) && | 548 | (!(pte_val(pte) & _PAGE_INVALID) && |
525 | !(pte_val(pte) & _PAGE_SWT)); | 549 | !(pte_val(pte) & _PAGE_SWT)); |
526 | } | 550 | } |
527 | 551 | ||
528 | static inline int pte_file(pte_t pte) | 552 | static inline int pte_file(pte_t pte) |
529 | { | 553 | { |
530 | unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; | 554 | unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; |
531 | return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; | 555 | return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; |
532 | } | 556 | } |
533 | 557 | ||
534 | static inline int pte_special(pte_t pte) | 558 | static inline int pte_special(pte_t pte) |
535 | { | 559 | { |
536 | return (pte_val(pte) & _PAGE_SPECIAL); | 560 | return (pte_val(pte) & _PAGE_SPECIAL); |
537 | } | 561 | } |
538 | 562 | ||
539 | #define __HAVE_ARCH_PTE_SAME | 563 | #define __HAVE_ARCH_PTE_SAME |
540 | static inline int pte_same(pte_t a, pte_t b) | 564 | static inline int pte_same(pte_t a, pte_t b) |
541 | { | 565 | { |
542 | return pte_val(a) == pte_val(b); | 566 | return pte_val(a) == pte_val(b); |
543 | } | 567 | } |
544 | 568 | ||
545 | static inline pgste_t pgste_get_lock(pte_t *ptep) | 569 | static inline pgste_t pgste_get_lock(pte_t *ptep) |
546 | { | 570 | { |
547 | unsigned long new = 0; | 571 | unsigned long new = 0; |
548 | #ifdef CONFIG_PGSTE | 572 | #ifdef CONFIG_PGSTE |
549 | unsigned long old; | 573 | unsigned long old; |
550 | 574 | ||
551 | preempt_disable(); | 575 | preempt_disable(); |
552 | asm( | 576 | asm( |
553 | " lg %0,%2\n" | 577 | " lg %0,%2\n" |
554 | "0: lgr %1,%0\n" | 578 | "0: lgr %1,%0\n" |
555 | " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ | 579 | " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ |
556 | " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ | 580 | " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ |
557 | " csg %0,%1,%2\n" | 581 | " csg %0,%1,%2\n" |
558 | " jl 0b\n" | 582 | " jl 0b\n" |
559 | : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) | 583 | : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) |
560 | : "Q" (ptep[PTRS_PER_PTE]) : "cc"); | 584 | : "Q" (ptep[PTRS_PER_PTE]) : "cc"); |
561 | #endif | 585 | #endif |
562 | return __pgste(new); | 586 | return __pgste(new); |
563 | } | 587 | } |
564 | 588 | ||
565 | static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) | 589 | static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) |
566 | { | 590 | { |
567 | #ifdef CONFIG_PGSTE | 591 | #ifdef CONFIG_PGSTE |
568 | asm( | 592 | asm( |
569 | " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ | 593 | " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ |
570 | " stg %1,%0\n" | 594 | " stg %1,%0\n" |
571 | : "=Q" (ptep[PTRS_PER_PTE]) | 595 | : "=Q" (ptep[PTRS_PER_PTE]) |
572 | : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); | 596 | : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); |
573 | preempt_enable(); | 597 | preempt_enable(); |
574 | #endif | 598 | #endif |
575 | } | 599 | } |
576 | 600 | ||
577 | static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) | 601 | static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) |
578 | { | 602 | { |
579 | #ifdef CONFIG_PGSTE | 603 | #ifdef CONFIG_PGSTE |
580 | unsigned long address, bits; | 604 | unsigned long address, bits; |
581 | unsigned char skey; | 605 | unsigned char skey; |
582 | 606 | ||
583 | if (!pte_present(*ptep)) | 607 | if (!pte_present(*ptep)) |
584 | return pgste; | 608 | return pgste; |
585 | address = pte_val(*ptep) & PAGE_MASK; | 609 | address = pte_val(*ptep) & PAGE_MASK; |
586 | skey = page_get_storage_key(address); | 610 | skey = page_get_storage_key(address); |
587 | bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); | 611 | bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); |
588 | /* Clear page changed & referenced bit in the storage key */ | 612 | /* Clear page changed & referenced bit in the storage key */ |
589 | if (bits & _PAGE_CHANGED) | 613 | if (bits & _PAGE_CHANGED) |
590 | page_set_storage_key(address, skey ^ bits, 1); | 614 | page_set_storage_key(address, skey ^ bits, 1); |
591 | else if (bits) | 615 | else if (bits) |
592 | page_reset_referenced(address); | 616 | page_reset_referenced(address); |
593 | /* Transfer page changed & referenced bit to guest bits in pgste */ | 617 | /* Transfer page changed & referenced bit to guest bits in pgste */ |
594 | pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ | 618 | pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ |
595 | /* Get host changed & referenced bits from pgste */ | 619 | /* Get host changed & referenced bits from pgste */ |
596 | bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; | 620 | bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; |
597 | /* Clear host bits in pgste. */ | 621 | /* Clear host bits in pgste. */ |
598 | pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); | 622 | pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); |
599 | pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); | 623 | pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); |
600 | /* Copy page access key and fetch protection bit to pgste */ | 624 | /* Copy page access key and fetch protection bit to pgste */ |
601 | pgste_val(pgste) |= | 625 | pgste_val(pgste) |= |
602 | (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; | 626 | (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; |
603 | /* Transfer changed and referenced to kvm user bits */ | 627 | /* Transfer changed and referenced to kvm user bits */ |
604 | pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ | 628 | pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ |
605 | /* Transfer changed & referenced to pte sofware bits */ | 629 | /* Transfer changed & referenced to pte sofware bits */ |
606 | pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ | 630 | pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ |
607 | #endif | 631 | #endif |
608 | return pgste; | 632 | return pgste; |
609 | 633 | ||
610 | } | 634 | } |
611 | 635 | ||
612 | static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) | 636 | static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) |
613 | { | 637 | { |
614 | #ifdef CONFIG_PGSTE | 638 | #ifdef CONFIG_PGSTE |
615 | int young; | 639 | int young; |
616 | 640 | ||
617 | if (!pte_present(*ptep)) | 641 | if (!pte_present(*ptep)) |
618 | return pgste; | 642 | return pgste; |
619 | young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); | 643 | young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); |
620 | /* Transfer page referenced bit to pte software bit (host view) */ | 644 | /* Transfer page referenced bit to pte software bit (host view) */ |
621 | if (young || (pgste_val(pgste) & RCP_HR_BIT)) | 645 | if (young || (pgste_val(pgste) & RCP_HR_BIT)) |
622 | pte_val(*ptep) |= _PAGE_SWR; | 646 | pte_val(*ptep) |= _PAGE_SWR; |
623 | /* Clear host referenced bit in pgste. */ | 647 | /* Clear host referenced bit in pgste. */ |
624 | pgste_val(pgste) &= ~RCP_HR_BIT; | 648 | pgste_val(pgste) &= ~RCP_HR_BIT; |
625 | /* Transfer page referenced bit to guest bit in pgste */ | 649 | /* Transfer page referenced bit to guest bit in pgste */ |
626 | pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ | 650 | pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ |
627 | #endif | 651 | #endif |
628 | return pgste; | 652 | return pgste; |
629 | 653 | ||
630 | } | 654 | } |
631 | 655 | ||
632 | static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) | 656 | static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) |
633 | { | 657 | { |
634 | #ifdef CONFIG_PGSTE | 658 | #ifdef CONFIG_PGSTE |
635 | unsigned long address; | 659 | unsigned long address; |
636 | unsigned long okey, nkey; | 660 | unsigned long okey, nkey; |
637 | 661 | ||
638 | if (!pte_present(entry)) | 662 | if (!pte_present(entry)) |
639 | return; | 663 | return; |
640 | address = pte_val(entry) & PAGE_MASK; | 664 | address = pte_val(entry) & PAGE_MASK; |
641 | okey = nkey = page_get_storage_key(address); | 665 | okey = nkey = page_get_storage_key(address); |
642 | nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); | 666 | nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); |
643 | /* Set page access key and fetch protection bit from pgste */ | 667 | /* Set page access key and fetch protection bit from pgste */ |
644 | nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; | 668 | nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; |
645 | if (okey != nkey) | 669 | if (okey != nkey) |
646 | page_set_storage_key(address, nkey, 1); | 670 | page_set_storage_key(address, nkey, 1); |
647 | #endif | 671 | #endif |
648 | } | 672 | } |
649 | 673 | ||
650 | /** | 674 | /** |
651 | * struct gmap_struct - guest address space | 675 | * struct gmap_struct - guest address space |
652 | * @mm: pointer to the parent mm_struct | 676 | * @mm: pointer to the parent mm_struct |
653 | * @table: pointer to the page directory | 677 | * @table: pointer to the page directory |
654 | * @asce: address space control element for gmap page table | 678 | * @asce: address space control element for gmap page table |
655 | * @crst_list: list of all crst tables used in the guest address space | 679 | * @crst_list: list of all crst tables used in the guest address space |
656 | */ | 680 | */ |
657 | struct gmap { | 681 | struct gmap { |
658 | struct list_head list; | 682 | struct list_head list; |
659 | struct mm_struct *mm; | 683 | struct mm_struct *mm; |
660 | unsigned long *table; | 684 | unsigned long *table; |
661 | unsigned long asce; | 685 | unsigned long asce; |
662 | struct list_head crst_list; | 686 | struct list_head crst_list; |
663 | }; | 687 | }; |
664 | 688 | ||
665 | /** | 689 | /** |
666 | * struct gmap_rmap - reverse mapping for segment table entries | 690 | * struct gmap_rmap - reverse mapping for segment table entries |
667 | * @next: pointer to the next gmap_rmap structure in the list | 691 | * @next: pointer to the next gmap_rmap structure in the list |
668 | * @entry: pointer to a segment table entry | 692 | * @entry: pointer to a segment table entry |
669 | */ | 693 | */ |
670 | struct gmap_rmap { | 694 | struct gmap_rmap { |
671 | struct list_head list; | 695 | struct list_head list; |
672 | unsigned long *entry; | 696 | unsigned long *entry; |
673 | }; | 697 | }; |
674 | 698 | ||
675 | /** | 699 | /** |
676 | * struct gmap_pgtable - gmap information attached to a page table | 700 | * struct gmap_pgtable - gmap information attached to a page table |
677 | * @vmaddr: address of the 1MB segment in the process virtual memory | 701 | * @vmaddr: address of the 1MB segment in the process virtual memory |
678 | * @mapper: list of segment table entries maping a page table | 702 | * @mapper: list of segment table entries maping a page table |
679 | */ | 703 | */ |
680 | struct gmap_pgtable { | 704 | struct gmap_pgtable { |
681 | unsigned long vmaddr; | 705 | unsigned long vmaddr; |
682 | struct list_head mapper; | 706 | struct list_head mapper; |
683 | }; | 707 | }; |
684 | 708 | ||
685 | struct gmap *gmap_alloc(struct mm_struct *mm); | 709 | struct gmap *gmap_alloc(struct mm_struct *mm); |
686 | void gmap_free(struct gmap *gmap); | 710 | void gmap_free(struct gmap *gmap); |
687 | void gmap_enable(struct gmap *gmap); | 711 | void gmap_enable(struct gmap *gmap); |
688 | void gmap_disable(struct gmap *gmap); | 712 | void gmap_disable(struct gmap *gmap); |
689 | int gmap_map_segment(struct gmap *gmap, unsigned long from, | 713 | int gmap_map_segment(struct gmap *gmap, unsigned long from, |
690 | unsigned long to, unsigned long length); | 714 | unsigned long to, unsigned long length); |
691 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); | 715 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); |
692 | unsigned long __gmap_fault(unsigned long address, struct gmap *); | 716 | unsigned long __gmap_fault(unsigned long address, struct gmap *); |
693 | unsigned long gmap_fault(unsigned long address, struct gmap *); | 717 | unsigned long gmap_fault(unsigned long address, struct gmap *); |
694 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *); | 718 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *); |
695 | 719 | ||
696 | /* | 720 | /* |
697 | * Certain architectures need to do special things when PTEs | 721 | * Certain architectures need to do special things when PTEs |
698 | * within a page table are directly modified. Thus, the following | 722 | * within a page table are directly modified. Thus, the following |
699 | * hook is made available. | 723 | * hook is made available. |
700 | */ | 724 | */ |
701 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | 725 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, |
702 | pte_t *ptep, pte_t entry) | 726 | pte_t *ptep, pte_t entry) |
703 | { | 727 | { |
704 | pgste_t pgste; | 728 | pgste_t pgste; |
705 | 729 | ||
706 | if (mm_has_pgste(mm)) { | 730 | if (mm_has_pgste(mm)) { |
707 | pgste = pgste_get_lock(ptep); | 731 | pgste = pgste_get_lock(ptep); |
708 | pgste_set_pte(ptep, pgste, entry); | 732 | pgste_set_pte(ptep, pgste, entry); |
709 | *ptep = entry; | 733 | *ptep = entry; |
710 | pgste_set_unlock(ptep, pgste); | 734 | pgste_set_unlock(ptep, pgste); |
711 | } else | 735 | } else |
712 | *ptep = entry; | 736 | *ptep = entry; |
713 | } | 737 | } |
714 | 738 | ||
715 | /* | 739 | /* |
716 | * query functions pte_write/pte_dirty/pte_young only work if | 740 | * query functions pte_write/pte_dirty/pte_young only work if |
717 | * pte_present() is true. Undefined behaviour if not.. | 741 | * pte_present() is true. Undefined behaviour if not.. |
718 | */ | 742 | */ |
719 | static inline int pte_write(pte_t pte) | 743 | static inline int pte_write(pte_t pte) |
720 | { | 744 | { |
721 | return (pte_val(pte) & _PAGE_RO) == 0; | 745 | return (pte_val(pte) & _PAGE_RO) == 0; |
722 | } | 746 | } |
723 | 747 | ||
724 | static inline int pte_dirty(pte_t pte) | 748 | static inline int pte_dirty(pte_t pte) |
725 | { | 749 | { |
726 | #ifdef CONFIG_PGSTE | 750 | #ifdef CONFIG_PGSTE |
727 | if (pte_val(pte) & _PAGE_SWC) | 751 | if (pte_val(pte) & _PAGE_SWC) |
728 | return 1; | 752 | return 1; |
729 | #endif | 753 | #endif |
730 | return 0; | 754 | return 0; |
731 | } | 755 | } |
732 | 756 | ||
733 | static inline int pte_young(pte_t pte) | 757 | static inline int pte_young(pte_t pte) |
734 | { | 758 | { |
735 | #ifdef CONFIG_PGSTE | 759 | #ifdef CONFIG_PGSTE |
736 | if (pte_val(pte) & _PAGE_SWR) | 760 | if (pte_val(pte) & _PAGE_SWR) |
737 | return 1; | 761 | return 1; |
738 | #endif | 762 | #endif |
739 | return 0; | 763 | return 0; |
740 | } | 764 | } |
741 | 765 | ||
742 | /* | 766 | /* |
743 | * pgd/pmd/pte modification functions | 767 | * pgd/pmd/pte modification functions |
744 | */ | 768 | */ |
745 | 769 | ||
746 | static inline void pgd_clear(pgd_t *pgd) | 770 | static inline void pgd_clear(pgd_t *pgd) |
747 | { | 771 | { |
748 | #ifdef CONFIG_64BIT | 772 | #ifdef CONFIG_64BIT |
749 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | 773 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) |
750 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; | 774 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; |
751 | #endif | 775 | #endif |
752 | } | 776 | } |
753 | 777 | ||
754 | static inline void pud_clear(pud_t *pud) | 778 | static inline void pud_clear(pud_t *pud) |
755 | { | 779 | { |
756 | #ifdef CONFIG_64BIT | 780 | #ifdef CONFIG_64BIT |
757 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | 781 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) |
758 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; | 782 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; |
759 | #endif | 783 | #endif |
760 | } | 784 | } |
761 | 785 | ||
762 | static inline void pmd_clear(pmd_t *pmdp) | 786 | static inline void pmd_clear(pmd_t *pmdp) |
763 | { | 787 | { |
764 | pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; | 788 | pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; |
765 | } | 789 | } |
766 | 790 | ||
767 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 791 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
768 | { | 792 | { |
769 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 793 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
770 | } | 794 | } |
771 | 795 | ||
772 | /* | 796 | /* |
773 | * The following pte modification functions only work if | 797 | * The following pte modification functions only work if |
774 | * pte_present() is true. Undefined behaviour if not.. | 798 | * pte_present() is true. Undefined behaviour if not.. |
775 | */ | 799 | */ |
776 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | 800 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
777 | { | 801 | { |
778 | pte_val(pte) &= _PAGE_CHG_MASK; | 802 | pte_val(pte) &= _PAGE_CHG_MASK; |
779 | pte_val(pte) |= pgprot_val(newprot); | 803 | pte_val(pte) |= pgprot_val(newprot); |
780 | return pte; | 804 | return pte; |
781 | } | 805 | } |
782 | 806 | ||
783 | static inline pte_t pte_wrprotect(pte_t pte) | 807 | static inline pte_t pte_wrprotect(pte_t pte) |
784 | { | 808 | { |
785 | /* Do not clobber _PAGE_TYPE_NONE pages! */ | 809 | /* Do not clobber _PAGE_TYPE_NONE pages! */ |
786 | if (!(pte_val(pte) & _PAGE_INVALID)) | 810 | if (!(pte_val(pte) & _PAGE_INVALID)) |
787 | pte_val(pte) |= _PAGE_RO; | 811 | pte_val(pte) |= _PAGE_RO; |
788 | return pte; | 812 | return pte; |
789 | } | 813 | } |
790 | 814 | ||
791 | static inline pte_t pte_mkwrite(pte_t pte) | 815 | static inline pte_t pte_mkwrite(pte_t pte) |
792 | { | 816 | { |
793 | pte_val(pte) &= ~_PAGE_RO; | 817 | pte_val(pte) &= ~_PAGE_RO; |
794 | return pte; | 818 | return pte; |
795 | } | 819 | } |
796 | 820 | ||
797 | static inline pte_t pte_mkclean(pte_t pte) | 821 | static inline pte_t pte_mkclean(pte_t pte) |
798 | { | 822 | { |
799 | #ifdef CONFIG_PGSTE | 823 | #ifdef CONFIG_PGSTE |
800 | pte_val(pte) &= ~_PAGE_SWC; | 824 | pte_val(pte) &= ~_PAGE_SWC; |
801 | #endif | 825 | #endif |
802 | return pte; | 826 | return pte; |
803 | } | 827 | } |
804 | 828 | ||
805 | static inline pte_t pte_mkdirty(pte_t pte) | 829 | static inline pte_t pte_mkdirty(pte_t pte) |
806 | { | 830 | { |
807 | return pte; | 831 | return pte; |
808 | } | 832 | } |
809 | 833 | ||
810 | static inline pte_t pte_mkold(pte_t pte) | 834 | static inline pte_t pte_mkold(pte_t pte) |
811 | { | 835 | { |
812 | #ifdef CONFIG_PGSTE | 836 | #ifdef CONFIG_PGSTE |
813 | pte_val(pte) &= ~_PAGE_SWR; | 837 | pte_val(pte) &= ~_PAGE_SWR; |
814 | #endif | 838 | #endif |
815 | return pte; | 839 | return pte; |
816 | } | 840 | } |
817 | 841 | ||
818 | static inline pte_t pte_mkyoung(pte_t pte) | 842 | static inline pte_t pte_mkyoung(pte_t pte) |
819 | { | 843 | { |
820 | return pte; | 844 | return pte; |
821 | } | 845 | } |
822 | 846 | ||
823 | static inline pte_t pte_mkspecial(pte_t pte) | 847 | static inline pte_t pte_mkspecial(pte_t pte) |
824 | { | 848 | { |
825 | pte_val(pte) |= _PAGE_SPECIAL; | 849 | pte_val(pte) |= _PAGE_SPECIAL; |
826 | return pte; | 850 | return pte; |
827 | } | 851 | } |
828 | 852 | ||
829 | #ifdef CONFIG_HUGETLB_PAGE | 853 | #ifdef CONFIG_HUGETLB_PAGE |
830 | static inline pte_t pte_mkhuge(pte_t pte) | 854 | static inline pte_t pte_mkhuge(pte_t pte) |
831 | { | 855 | { |
832 | /* | 856 | /* |
833 | * PROT_NONE needs to be remapped from the pte type to the ste type. | 857 | * PROT_NONE needs to be remapped from the pte type to the ste type. |
834 | * The HW invalid bit is also different for pte and ste. The pte | 858 | * The HW invalid bit is also different for pte and ste. The pte |
835 | * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE | 859 | * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE |
836 | * bit, so we don't have to clear it. | 860 | * bit, so we don't have to clear it. |
837 | */ | 861 | */ |
838 | if (pte_val(pte) & _PAGE_INVALID) { | 862 | if (pte_val(pte) & _PAGE_INVALID) { |
839 | if (pte_val(pte) & _PAGE_SWT) | 863 | if (pte_val(pte) & _PAGE_SWT) |
840 | pte_val(pte) |= _HPAGE_TYPE_NONE; | 864 | pte_val(pte) |= _HPAGE_TYPE_NONE; |
841 | pte_val(pte) |= _SEGMENT_ENTRY_INV; | 865 | pte_val(pte) |= _SEGMENT_ENTRY_INV; |
842 | } | 866 | } |
843 | /* | 867 | /* |
844 | * Clear SW pte bits SWT and SWX, there are no SW bits in a segment | 868 | * Clear SW pte bits SWT and SWX, there are no SW bits in a segment |
845 | * table entry. | 869 | * table entry. |
846 | */ | 870 | */ |
847 | pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); | 871 | pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); |
848 | /* | 872 | /* |
849 | * Also set the change-override bit because we don't need dirty bit | 873 | * Also set the change-override bit because we don't need dirty bit |
850 | * tracking for hugetlbfs pages. | 874 | * tracking for hugetlbfs pages. |
851 | */ | 875 | */ |
852 | pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); | 876 | pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); |
853 | return pte; | 877 | return pte; |
854 | } | 878 | } |
855 | #endif | 879 | #endif |
856 | 880 | ||
857 | /* | 881 | /* |
858 | * Get (and clear) the user dirty bit for a pte. | 882 | * Get (and clear) the user dirty bit for a pte. |
859 | */ | 883 | */ |
860 | static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, | 884 | static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, |
861 | pte_t *ptep) | 885 | pte_t *ptep) |
862 | { | 886 | { |
863 | pgste_t pgste; | 887 | pgste_t pgste; |
864 | int dirty = 0; | 888 | int dirty = 0; |
865 | 889 | ||
866 | if (mm_has_pgste(mm)) { | 890 | if (mm_has_pgste(mm)) { |
867 | pgste = pgste_get_lock(ptep); | 891 | pgste = pgste_get_lock(ptep); |
868 | pgste = pgste_update_all(ptep, pgste); | 892 | pgste = pgste_update_all(ptep, pgste); |
869 | dirty = !!(pgste_val(pgste) & KVM_UC_BIT); | 893 | dirty = !!(pgste_val(pgste) & KVM_UC_BIT); |
870 | pgste_val(pgste) &= ~KVM_UC_BIT; | 894 | pgste_val(pgste) &= ~KVM_UC_BIT; |
871 | pgste_set_unlock(ptep, pgste); | 895 | pgste_set_unlock(ptep, pgste); |
872 | return dirty; | 896 | return dirty; |
873 | } | 897 | } |
874 | return dirty; | 898 | return dirty; |
875 | } | 899 | } |
876 | 900 | ||
877 | /* | 901 | /* |
878 | * Get (and clear) the user referenced bit for a pte. | 902 | * Get (and clear) the user referenced bit for a pte. |
879 | */ | 903 | */ |
880 | static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, | 904 | static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, |
881 | pte_t *ptep) | 905 | pte_t *ptep) |
882 | { | 906 | { |
883 | pgste_t pgste; | 907 | pgste_t pgste; |
884 | int young = 0; | 908 | int young = 0; |
885 | 909 | ||
886 | if (mm_has_pgste(mm)) { | 910 | if (mm_has_pgste(mm)) { |
887 | pgste = pgste_get_lock(ptep); | 911 | pgste = pgste_get_lock(ptep); |
888 | pgste = pgste_update_young(ptep, pgste); | 912 | pgste = pgste_update_young(ptep, pgste); |
889 | young = !!(pgste_val(pgste) & KVM_UR_BIT); | 913 | young = !!(pgste_val(pgste) & KVM_UR_BIT); |
890 | pgste_val(pgste) &= ~KVM_UR_BIT; | 914 | pgste_val(pgste) &= ~KVM_UR_BIT; |
891 | pgste_set_unlock(ptep, pgste); | 915 | pgste_set_unlock(ptep, pgste); |
892 | } | 916 | } |
893 | return young; | 917 | return young; |
894 | } | 918 | } |
895 | 919 | ||
896 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | 920 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
897 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | 921 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, |
898 | unsigned long addr, pte_t *ptep) | 922 | unsigned long addr, pte_t *ptep) |
899 | { | 923 | { |
900 | pgste_t pgste; | 924 | pgste_t pgste; |
901 | pte_t pte; | 925 | pte_t pte; |
902 | 926 | ||
903 | if (mm_has_pgste(vma->vm_mm)) { | 927 | if (mm_has_pgste(vma->vm_mm)) { |
904 | pgste = pgste_get_lock(ptep); | 928 | pgste = pgste_get_lock(ptep); |
905 | pgste = pgste_update_young(ptep, pgste); | 929 | pgste = pgste_update_young(ptep, pgste); |
906 | pte = *ptep; | 930 | pte = *ptep; |
907 | *ptep = pte_mkold(pte); | 931 | *ptep = pte_mkold(pte); |
908 | pgste_set_unlock(ptep, pgste); | 932 | pgste_set_unlock(ptep, pgste); |
909 | return pte_young(pte); | 933 | return pte_young(pte); |
910 | } | 934 | } |
911 | return 0; | 935 | return 0; |
912 | } | 936 | } |
913 | 937 | ||
914 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH | 938 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH |
915 | static inline int ptep_clear_flush_young(struct vm_area_struct *vma, | 939 | static inline int ptep_clear_flush_young(struct vm_area_struct *vma, |
916 | unsigned long address, pte_t *ptep) | 940 | unsigned long address, pte_t *ptep) |
917 | { | 941 | { |
918 | /* No need to flush TLB | 942 | /* No need to flush TLB |
919 | * On s390 reference bits are in storage key and never in TLB | 943 | * On s390 reference bits are in storage key and never in TLB |
920 | * With virtualization we handle the reference bit, without we | 944 | * With virtualization we handle the reference bit, without we |
921 | * we can simply return */ | 945 | * we can simply return */ |
922 | return ptep_test_and_clear_young(vma, address, ptep); | 946 | return ptep_test_and_clear_young(vma, address, ptep); |
923 | } | 947 | } |
924 | 948 | ||
925 | static inline void __ptep_ipte(unsigned long address, pte_t *ptep) | 949 | static inline void __ptep_ipte(unsigned long address, pte_t *ptep) |
926 | { | 950 | { |
927 | if (!(pte_val(*ptep) & _PAGE_INVALID)) { | 951 | if (!(pte_val(*ptep) & _PAGE_INVALID)) { |
928 | #ifndef CONFIG_64BIT | 952 | #ifndef CONFIG_64BIT |
929 | /* pto must point to the start of the segment table */ | 953 | /* pto must point to the start of the segment table */ |
930 | pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); | 954 | pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); |
931 | #else | 955 | #else |
932 | /* ipte in zarch mode can do the math */ | 956 | /* ipte in zarch mode can do the math */ |
933 | pte_t *pto = ptep; | 957 | pte_t *pto = ptep; |
934 | #endif | 958 | #endif |
935 | asm volatile( | 959 | asm volatile( |
936 | " ipte %2,%3" | 960 | " ipte %2,%3" |
937 | : "=m" (*ptep) : "m" (*ptep), | 961 | : "=m" (*ptep) : "m" (*ptep), |
938 | "a" (pto), "a" (address)); | 962 | "a" (pto), "a" (address)); |
939 | } | 963 | } |
940 | } | 964 | } |
941 | 965 | ||
942 | /* | 966 | /* |
943 | * This is hard to understand. ptep_get_and_clear and ptep_clear_flush | 967 | * This is hard to understand. ptep_get_and_clear and ptep_clear_flush |
944 | * both clear the TLB for the unmapped pte. The reason is that | 968 | * both clear the TLB for the unmapped pte. The reason is that |
945 | * ptep_get_and_clear is used in common code (e.g. change_pte_range) | 969 | * ptep_get_and_clear is used in common code (e.g. change_pte_range) |
946 | * to modify an active pte. The sequence is | 970 | * to modify an active pte. The sequence is |
947 | * 1) ptep_get_and_clear | 971 | * 1) ptep_get_and_clear |
948 | * 2) set_pte_at | 972 | * 2) set_pte_at |
949 | * 3) flush_tlb_range | 973 | * 3) flush_tlb_range |
950 | * On s390 the tlb needs to get flushed with the modification of the pte | 974 | * On s390 the tlb needs to get flushed with the modification of the pte |
951 | * if the pte is active. The only way how this can be implemented is to | 975 | * if the pte is active. The only way how this can be implemented is to |
952 | * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range | 976 | * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range |
953 | * is a nop. | 977 | * is a nop. |
954 | */ | 978 | */ |
955 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR | 979 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
956 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | 980 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, |
957 | unsigned long address, pte_t *ptep) | 981 | unsigned long address, pte_t *ptep) |
958 | { | 982 | { |
959 | pgste_t pgste; | 983 | pgste_t pgste; |
960 | pte_t pte; | 984 | pte_t pte; |
961 | 985 | ||
962 | mm->context.flush_mm = 1; | 986 | mm->context.flush_mm = 1; |
963 | if (mm_has_pgste(mm)) | 987 | if (mm_has_pgste(mm)) |
964 | pgste = pgste_get_lock(ptep); | 988 | pgste = pgste_get_lock(ptep); |
965 | 989 | ||
966 | pte = *ptep; | 990 | pte = *ptep; |
967 | if (!mm_exclusive(mm)) | 991 | if (!mm_exclusive(mm)) |
968 | __ptep_ipte(address, ptep); | 992 | __ptep_ipte(address, ptep); |
969 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 993 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
970 | 994 | ||
971 | if (mm_has_pgste(mm)) { | 995 | if (mm_has_pgste(mm)) { |
972 | pgste = pgste_update_all(&pte, pgste); | 996 | pgste = pgste_update_all(&pte, pgste); |
973 | pgste_set_unlock(ptep, pgste); | 997 | pgste_set_unlock(ptep, pgste); |
974 | } | 998 | } |
975 | return pte; | 999 | return pte; |
976 | } | 1000 | } |
977 | 1001 | ||
978 | #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION | 1002 | #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION |
979 | static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, | 1003 | static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, |
980 | unsigned long address, | 1004 | unsigned long address, |
981 | pte_t *ptep) | 1005 | pte_t *ptep) |
982 | { | 1006 | { |
983 | pte_t pte; | 1007 | pte_t pte; |
984 | 1008 | ||
985 | mm->context.flush_mm = 1; | 1009 | mm->context.flush_mm = 1; |
986 | if (mm_has_pgste(mm)) | 1010 | if (mm_has_pgste(mm)) |
987 | pgste_get_lock(ptep); | 1011 | pgste_get_lock(ptep); |
988 | 1012 | ||
989 | pte = *ptep; | 1013 | pte = *ptep; |
990 | if (!mm_exclusive(mm)) | 1014 | if (!mm_exclusive(mm)) |
991 | __ptep_ipte(address, ptep); | 1015 | __ptep_ipte(address, ptep); |
992 | return pte; | 1016 | return pte; |
993 | } | 1017 | } |
994 | 1018 | ||
995 | static inline void ptep_modify_prot_commit(struct mm_struct *mm, | 1019 | static inline void ptep_modify_prot_commit(struct mm_struct *mm, |
996 | unsigned long address, | 1020 | unsigned long address, |
997 | pte_t *ptep, pte_t pte) | 1021 | pte_t *ptep, pte_t pte) |
998 | { | 1022 | { |
999 | *ptep = pte; | 1023 | *ptep = pte; |
1000 | if (mm_has_pgste(mm)) | 1024 | if (mm_has_pgste(mm)) |
1001 | pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); | 1025 | pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); |
1002 | } | 1026 | } |
1003 | 1027 | ||
1004 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | 1028 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
1005 | static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, | 1029 | static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, |
1006 | unsigned long address, pte_t *ptep) | 1030 | unsigned long address, pte_t *ptep) |
1007 | { | 1031 | { |
1008 | pgste_t pgste; | 1032 | pgste_t pgste; |
1009 | pte_t pte; | 1033 | pte_t pte; |
1010 | 1034 | ||
1011 | if (mm_has_pgste(vma->vm_mm)) | 1035 | if (mm_has_pgste(vma->vm_mm)) |
1012 | pgste = pgste_get_lock(ptep); | 1036 | pgste = pgste_get_lock(ptep); |
1013 | 1037 | ||
1014 | pte = *ptep; | 1038 | pte = *ptep; |
1015 | __ptep_ipte(address, ptep); | 1039 | __ptep_ipte(address, ptep); |
1016 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 1040 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
1017 | 1041 | ||
1018 | if (mm_has_pgste(vma->vm_mm)) { | 1042 | if (mm_has_pgste(vma->vm_mm)) { |
1019 | pgste = pgste_update_all(&pte, pgste); | 1043 | pgste = pgste_update_all(&pte, pgste); |
1020 | pgste_set_unlock(ptep, pgste); | 1044 | pgste_set_unlock(ptep, pgste); |
1021 | } | 1045 | } |
1022 | return pte; | 1046 | return pte; |
1023 | } | 1047 | } |
1024 | 1048 | ||
1025 | /* | 1049 | /* |
1026 | * The batched pte unmap code uses ptep_get_and_clear_full to clear the | 1050 | * The batched pte unmap code uses ptep_get_and_clear_full to clear the |
1027 | * ptes. Here an optimization is possible. tlb_gather_mmu flushes all | 1051 | * ptes. Here an optimization is possible. tlb_gather_mmu flushes all |
1028 | * tlbs of an mm if it can guarantee that the ptes of the mm_struct | 1052 | * tlbs of an mm if it can guarantee that the ptes of the mm_struct |
1029 | * cannot be accessed while the batched unmap is running. In this case | 1053 | * cannot be accessed while the batched unmap is running. In this case |
1030 | * full==1 and a simple pte_clear is enough. See tlb.h. | 1054 | * full==1 and a simple pte_clear is enough. See tlb.h. |
1031 | */ | 1055 | */ |
1032 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | 1056 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
1033 | static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, | 1057 | static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, |
1034 | unsigned long address, | 1058 | unsigned long address, |
1035 | pte_t *ptep, int full) | 1059 | pte_t *ptep, int full) |
1036 | { | 1060 | { |
1037 | pgste_t pgste; | 1061 | pgste_t pgste; |
1038 | pte_t pte; | 1062 | pte_t pte; |
1039 | 1063 | ||
1040 | if (mm_has_pgste(mm)) | 1064 | if (mm_has_pgste(mm)) |
1041 | pgste = pgste_get_lock(ptep); | 1065 | pgste = pgste_get_lock(ptep); |
1042 | 1066 | ||
1043 | pte = *ptep; | 1067 | pte = *ptep; |
1044 | if (!full) | 1068 | if (!full) |
1045 | __ptep_ipte(address, ptep); | 1069 | __ptep_ipte(address, ptep); |
1046 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 1070 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
1047 | 1071 | ||
1048 | if (mm_has_pgste(mm)) { | 1072 | if (mm_has_pgste(mm)) { |
1049 | pgste = pgste_update_all(&pte, pgste); | 1073 | pgste = pgste_update_all(&pte, pgste); |
1050 | pgste_set_unlock(ptep, pgste); | 1074 | pgste_set_unlock(ptep, pgste); |
1051 | } | 1075 | } |
1052 | return pte; | 1076 | return pte; |
1053 | } | 1077 | } |
1054 | 1078 | ||
1055 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | 1079 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT |
1056 | static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, | 1080 | static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, |
1057 | unsigned long address, pte_t *ptep) | 1081 | unsigned long address, pte_t *ptep) |
1058 | { | 1082 | { |
1059 | pgste_t pgste; | 1083 | pgste_t pgste; |
1060 | pte_t pte = *ptep; | 1084 | pte_t pte = *ptep; |
1061 | 1085 | ||
1062 | if (pte_write(pte)) { | 1086 | if (pte_write(pte)) { |
1063 | mm->context.flush_mm = 1; | 1087 | mm->context.flush_mm = 1; |
1064 | if (mm_has_pgste(mm)) | 1088 | if (mm_has_pgste(mm)) |
1065 | pgste = pgste_get_lock(ptep); | 1089 | pgste = pgste_get_lock(ptep); |
1066 | 1090 | ||
1067 | if (!mm_exclusive(mm)) | 1091 | if (!mm_exclusive(mm)) |
1068 | __ptep_ipte(address, ptep); | 1092 | __ptep_ipte(address, ptep); |
1069 | *ptep = pte_wrprotect(pte); | 1093 | *ptep = pte_wrprotect(pte); |
1070 | 1094 | ||
1071 | if (mm_has_pgste(mm)) | 1095 | if (mm_has_pgste(mm)) |
1072 | pgste_set_unlock(ptep, pgste); | 1096 | pgste_set_unlock(ptep, pgste); |
1073 | } | 1097 | } |
1074 | return pte; | 1098 | return pte; |
1075 | } | 1099 | } |
1076 | 1100 | ||
1077 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | 1101 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
1078 | static inline int ptep_set_access_flags(struct vm_area_struct *vma, | 1102 | static inline int ptep_set_access_flags(struct vm_area_struct *vma, |
1079 | unsigned long address, pte_t *ptep, | 1103 | unsigned long address, pte_t *ptep, |
1080 | pte_t entry, int dirty) | 1104 | pte_t entry, int dirty) |
1081 | { | 1105 | { |
1082 | pgste_t pgste; | 1106 | pgste_t pgste; |
1083 | 1107 | ||
1084 | if (pte_same(*ptep, entry)) | 1108 | if (pte_same(*ptep, entry)) |
1085 | return 0; | 1109 | return 0; |
1086 | if (mm_has_pgste(vma->vm_mm)) | 1110 | if (mm_has_pgste(vma->vm_mm)) |
1087 | pgste = pgste_get_lock(ptep); | 1111 | pgste = pgste_get_lock(ptep); |
1088 | 1112 | ||
1089 | __ptep_ipte(address, ptep); | 1113 | __ptep_ipte(address, ptep); |
1090 | *ptep = entry; | 1114 | *ptep = entry; |
1091 | 1115 | ||
1092 | if (mm_has_pgste(vma->vm_mm)) | 1116 | if (mm_has_pgste(vma->vm_mm)) |
1093 | pgste_set_unlock(ptep, pgste); | 1117 | pgste_set_unlock(ptep, pgste); |
1094 | return 1; | 1118 | return 1; |
1095 | } | 1119 | } |
1096 | 1120 | ||
1097 | /* | 1121 | /* |
1098 | * Conversion functions: convert a page and protection to a page entry, | 1122 | * Conversion functions: convert a page and protection to a page entry, |
1099 | * and a page entry and page directory to the page they refer to. | 1123 | * and a page entry and page directory to the page they refer to. |
1100 | */ | 1124 | */ |
1101 | static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) | 1125 | static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) |
1102 | { | 1126 | { |
1103 | pte_t __pte; | 1127 | pte_t __pte; |
1104 | pte_val(__pte) = physpage + pgprot_val(pgprot); | 1128 | pte_val(__pte) = physpage + pgprot_val(pgprot); |
1105 | return __pte; | 1129 | return __pte; |
1106 | } | 1130 | } |
1107 | 1131 | ||
1108 | static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) | 1132 | static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) |
1109 | { | 1133 | { |
1110 | unsigned long physpage = page_to_phys(page); | 1134 | unsigned long physpage = page_to_phys(page); |
1111 | 1135 | ||
1112 | return mk_pte_phys(physpage, pgprot); | 1136 | return mk_pte_phys(physpage, pgprot); |
1113 | } | 1137 | } |
1114 | 1138 | ||
1115 | #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) | 1139 | #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) |
1116 | #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) | 1140 | #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) |
1117 | #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) | 1141 | #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) |
1118 | #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) | 1142 | #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) |
1119 | 1143 | ||
1120 | #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) | 1144 | #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) |
1121 | #define pgd_offset_k(address) pgd_offset(&init_mm, address) | 1145 | #define pgd_offset_k(address) pgd_offset(&init_mm, address) |
1122 | 1146 | ||
1123 | #ifndef CONFIG_64BIT | 1147 | #ifndef CONFIG_64BIT |
1124 | 1148 | ||
1125 | #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) | 1149 | #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) |
1126 | #define pud_deref(pmd) ({ BUG(); 0UL; }) | 1150 | #define pud_deref(pmd) ({ BUG(); 0UL; }) |
1127 | #define pgd_deref(pmd) ({ BUG(); 0UL; }) | 1151 | #define pgd_deref(pmd) ({ BUG(); 0UL; }) |
1128 | 1152 | ||
1129 | #define pud_offset(pgd, address) ((pud_t *) pgd) | 1153 | #define pud_offset(pgd, address) ((pud_t *) pgd) |
1130 | #define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address)) | 1154 | #define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address)) |
1131 | 1155 | ||
1132 | #else /* CONFIG_64BIT */ | 1156 | #else /* CONFIG_64BIT */ |
1133 | 1157 | ||
1134 | #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) | 1158 | #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) |
1135 | #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) | 1159 | #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) |
1136 | #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) | 1160 | #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) |
1137 | 1161 | ||
1138 | static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) | 1162 | static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) |
1139 | { | 1163 | { |
1140 | pud_t *pud = (pud_t *) pgd; | 1164 | pud_t *pud = (pud_t *) pgd; |
1141 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | 1165 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) |
1142 | pud = (pud_t *) pgd_deref(*pgd); | 1166 | pud = (pud_t *) pgd_deref(*pgd); |
1143 | return pud + pud_index(address); | 1167 | return pud + pud_index(address); |
1144 | } | 1168 | } |
1145 | 1169 | ||
1146 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | 1170 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) |
1147 | { | 1171 | { |
1148 | pmd_t *pmd = (pmd_t *) pud; | 1172 | pmd_t *pmd = (pmd_t *) pud; |
1149 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | 1173 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) |
1150 | pmd = (pmd_t *) pud_deref(*pud); | 1174 | pmd = (pmd_t *) pud_deref(*pud); |
1151 | return pmd + pmd_index(address); | 1175 | return pmd + pmd_index(address); |
1152 | } | 1176 | } |
1153 | 1177 | ||
1154 | #endif /* CONFIG_64BIT */ | 1178 | #endif /* CONFIG_64BIT */ |
1155 | 1179 | ||
1156 | #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) | 1180 | #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) |
1157 | #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) | 1181 | #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) |
1158 | #define pte_page(x) pfn_to_page(pte_pfn(x)) | 1182 | #define pte_page(x) pfn_to_page(pte_pfn(x)) |
1159 | 1183 | ||
1160 | #define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) | 1184 | #define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) |
1161 | 1185 | ||
1162 | /* Find an entry in the lowest level page table.. */ | 1186 | /* Find an entry in the lowest level page table.. */ |
1163 | #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) | 1187 | #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) |
1164 | #define pte_offset_kernel(pmd, address) pte_offset(pmd,address) | 1188 | #define pte_offset_kernel(pmd, address) pte_offset(pmd,address) |
1165 | #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) | 1189 | #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) |
1166 | #define pte_unmap(pte) do { } while (0) | 1190 | #define pte_unmap(pte) do { } while (0) |
1167 | 1191 | ||
1192 | static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) | ||
1193 | { | ||
1194 | unsigned long sto = (unsigned long) pmdp - | ||
1195 | pmd_index(address) * sizeof(pmd_t); | ||
1196 | |||
1197 | if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { | ||
1198 | asm volatile( | ||
1199 | " .insn rrf,0xb98e0000,%2,%3,0,0" | ||
1200 | : "=m" (*pmdp) | ||
1201 | : "m" (*pmdp), "a" (sto), | ||
1202 | "a" ((address & HPAGE_MASK)) | ||
1203 | : "cc" | ||
1204 | ); | ||
1205 | } | ||
1206 | } | ||
1207 | |||
1168 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 1208 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1169 | #define __HAVE_ARCH_PGTABLE_DEPOSIT | 1209 | #define __HAVE_ARCH_PGTABLE_DEPOSIT |
1170 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); | 1210 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); |
1171 | 1211 | ||
1172 | #define __HAVE_ARCH_PGTABLE_WITHDRAW | 1212 | #define __HAVE_ARCH_PGTABLE_WITHDRAW |
1173 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); | 1213 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); |
1174 | 1214 | ||
1175 | static inline int pmd_trans_splitting(pmd_t pmd) | 1215 | static inline int pmd_trans_splitting(pmd_t pmd) |
1176 | { | 1216 | { |
1177 | return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; | 1217 | return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; |
1218 | } | ||
1219 | |||
1220 | static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
1221 | pmd_t *pmdp, pmd_t entry) | ||
1222 | { | ||
1223 | *pmdp = entry; | ||
1224 | } | ||
1225 | |||
1226 | static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) | ||
1227 | { | ||
1228 | unsigned long pgprot_pmd = 0; | ||
1229 | |||
1230 | if (pgprot_val(pgprot) & _PAGE_INVALID) { | ||
1231 | if (pgprot_val(pgprot) & _PAGE_SWT) | ||
1232 | pgprot_pmd |= _HPAGE_TYPE_NONE; | ||
1233 | pgprot_pmd |= _SEGMENT_ENTRY_INV; | ||
1234 | } | ||
1235 | if (pgprot_val(pgprot) & _PAGE_RO) | ||
1236 | pgprot_pmd |= _SEGMENT_ENTRY_RO; | ||
1237 | return pgprot_pmd; | ||
1238 | } | ||
1239 | |||
1240 | static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
1241 | { | ||
1242 | pmd_val(pmd) &= _SEGMENT_CHG_MASK; | ||
1243 | pmd_val(pmd) |= massage_pgprot_pmd(newprot); | ||
1244 | return pmd; | ||
1245 | } | ||
1246 | |||
1247 | static inline pmd_t pmd_mkhuge(pmd_t pmd) | ||
1248 | { | ||
1249 | pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; | ||
1250 | return pmd; | ||
1251 | } | ||
1252 | |||
1253 | static inline pmd_t pmd_mkwrite(pmd_t pmd) | ||
1254 | { | ||
1255 | pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; | ||
1256 | return pmd; | ||
1257 | } | ||
1258 | |||
1259 | static inline pmd_t pmd_wrprotect(pmd_t pmd) | ||
1260 | { | ||
1261 | pmd_val(pmd) |= _SEGMENT_ENTRY_RO; | ||
1262 | return pmd; | ||
1263 | } | ||
1264 | |||
1265 | static inline pmd_t pmd_mkdirty(pmd_t pmd) | ||
1266 | { | ||
1267 | /* No dirty bit in the segment table entry. */ | ||
1268 | return pmd; | ||
1269 | } | ||
1270 | |||
1271 | static inline pmd_t pmd_mkold(pmd_t pmd) | ||
1272 | { | ||
1273 | /* No referenced bit in the segment table entry. */ | ||
1274 | return pmd; | ||
1275 | } | ||
1276 | |||
1277 | static inline pmd_t pmd_mkyoung(pmd_t pmd) | ||
1278 | { | ||
1279 | /* No referenced bit in the segment table entry. */ | ||
1280 | return pmd; | ||
1281 | } | ||
1282 | |||
1283 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG | ||
1284 | static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
1285 | unsigned long address, pmd_t *pmdp) | ||
1286 | { | ||
1287 | unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; | ||
1288 | long tmp, rc; | ||
1289 | int counter; | ||
1290 | |||
1291 | rc = 0; | ||
1292 | if (MACHINE_HAS_RRBM) { | ||
1293 | counter = PTRS_PER_PTE >> 6; | ||
1294 | asm volatile( | ||
1295 | "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */ | ||
1296 | " ogr %1,%0\n" | ||
1297 | " la %3,0(%4,%3)\n" | ||
1298 | " brct %2,0b\n" | ||
1299 | : "=&d" (tmp), "+&d" (rc), "+d" (counter), | ||
1300 | "+a" (pmd_addr) | ||
1301 | : "a" (64 * 4096UL) : "cc"); | ||
1302 | rc = !!rc; | ||
1303 | } else { | ||
1304 | counter = PTRS_PER_PTE; | ||
1305 | asm volatile( | ||
1306 | "0: rrbe 0,%2\n" | ||
1307 | " la %2,0(%3,%2)\n" | ||
1308 | " brc 12,1f\n" | ||
1309 | " lhi %0,1\n" | ||
1310 | "1: brct %1,0b\n" | ||
1311 | : "+d" (rc), "+d" (counter), "+a" (pmd_addr) | ||
1312 | : "a" (4096UL) : "cc"); | ||
1313 | } | ||
1314 | return rc; | ||
1315 | } | ||
1316 | |||
1317 | #define __HAVE_ARCH_PMDP_GET_AND_CLEAR | ||
1318 | static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, | ||
1319 | unsigned long address, pmd_t *pmdp) | ||
1320 | { | ||
1321 | pmd_t pmd = *pmdp; | ||
1322 | |||
1323 | __pmd_idte(address, pmdp); | ||
1324 | pmd_clear(pmdp); | ||
1325 | return pmd; | ||
1326 | } | ||
1327 | |||
1328 | #define __HAVE_ARCH_PMDP_CLEAR_FLUSH | ||
1329 | static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, | ||
1330 | unsigned long address, pmd_t *pmdp) | ||
1331 | { | ||
1332 | return pmdp_get_and_clear(vma->vm_mm, address, pmdp); | ||
1333 | } | ||
1334 | |||
1335 | #define __HAVE_ARCH_PMDP_INVALIDATE | ||
1336 | static inline void pmdp_invalidate(struct vm_area_struct *vma, | ||
1337 | unsigned long address, pmd_t *pmdp) | ||
1338 | { | ||
1339 | __pmd_idte(address, pmdp); | ||
1340 | } | ||
1341 | |||
1342 | static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) | ||
1343 | { | ||
1344 | pmd_t __pmd; | ||
1345 | pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); | ||
1346 | return __pmd; | ||
1347 | } | ||
1348 | |||
1349 | #define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) | ||
1350 | #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) | ||
1351 | |||
1352 | static inline int pmd_trans_huge(pmd_t pmd) | ||
1353 | { | ||
1354 | return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; | ||
1355 | } | ||
1356 | |||
1357 | static inline int has_transparent_hugepage(void) | ||
1358 | { | ||
1359 | return MACHINE_HAS_HPAGE ? 1 : 0; | ||
1360 | } | ||
1361 | |||
1362 | static inline unsigned long pmd_pfn(pmd_t pmd) | ||
1363 | { | ||
1364 | if (pmd_trans_huge(pmd)) | ||
1365 | return pmd_val(pmd) >> HPAGE_SHIFT; | ||
1366 | else | ||
1367 | return pmd_val(pmd) >> PAGE_SHIFT; | ||
1178 | } | 1368 | } |
1179 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1369 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1180 | 1370 | ||
1181 | /* | 1371 | /* |
1182 | * 31 bit swap entry format: | 1372 | * 31 bit swap entry format: |
1183 | * A page-table entry has some bits we have to treat in a special way. | 1373 | * A page-table entry has some bits we have to treat in a special way. |
1184 | * Bits 0, 20 and bit 23 have to be zero, otherwise an specification | 1374 | * Bits 0, 20 and bit 23 have to be zero, otherwise an specification |
1185 | * exception will occur instead of a page translation exception. The | 1375 | * exception will occur instead of a page translation exception. The |
1186 | * specifiation exception has the bad habit not to store necessary | 1376 | * specifiation exception has the bad habit not to store necessary |
1187 | * information in the lowcore. | 1377 | * information in the lowcore. |
1188 | * Bit 21 and bit 22 are the page invalid bit and the page protection | 1378 | * Bit 21 and bit 22 are the page invalid bit and the page protection |
1189 | * bit. We set both to indicate a swapped page. | 1379 | * bit. We set both to indicate a swapped page. |
1190 | * Bit 30 and 31 are used to distinguish the different page types. For | 1380 | * Bit 30 and 31 are used to distinguish the different page types. For |
1191 | * a swapped page these bits need to be zero. | 1381 | * a swapped page these bits need to be zero. |
1192 | * This leaves the bits 1-19 and bits 24-29 to store type and offset. | 1382 | * This leaves the bits 1-19 and bits 24-29 to store type and offset. |
1193 | * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 | 1383 | * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 |
1194 | * plus 24 for the offset. | 1384 | * plus 24 for the offset. |
1195 | * 0| offset |0110|o|type |00| | 1385 | * 0| offset |0110|o|type |00| |
1196 | * 0 0000000001111111111 2222 2 22222 33 | 1386 | * 0 0000000001111111111 2222 2 22222 33 |
1197 | * 0 1234567890123456789 0123 4 56789 01 | 1387 | * 0 1234567890123456789 0123 4 56789 01 |
1198 | * | 1388 | * |
1199 | * 64 bit swap entry format: | 1389 | * 64 bit swap entry format: |
1200 | * A page-table entry has some bits we have to treat in a special way. | 1390 | * A page-table entry has some bits we have to treat in a special way. |
1201 | * Bits 52 and bit 55 have to be zero, otherwise an specification | 1391 | * Bits 52 and bit 55 have to be zero, otherwise an specification |
1202 | * exception will occur instead of a page translation exception. The | 1392 | * exception will occur instead of a page translation exception. The |
1203 | * specifiation exception has the bad habit not to store necessary | 1393 | * specifiation exception has the bad habit not to store necessary |
1204 | * information in the lowcore. | 1394 | * information in the lowcore. |
1205 | * Bit 53 and bit 54 are the page invalid bit and the page protection | 1395 | * Bit 53 and bit 54 are the page invalid bit and the page protection |
1206 | * bit. We set both to indicate a swapped page. | 1396 | * bit. We set both to indicate a swapped page. |
1207 | * Bit 62 and 63 are used to distinguish the different page types. For | 1397 | * Bit 62 and 63 are used to distinguish the different page types. For |
1208 | * a swapped page these bits need to be zero. | 1398 | * a swapped page these bits need to be zero. |
1209 | * This leaves the bits 0-51 and bits 56-61 to store type and offset. | 1399 | * This leaves the bits 0-51 and bits 56-61 to store type and offset. |
1210 | * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 | 1400 | * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 |
1211 | * plus 56 for the offset. | 1401 | * plus 56 for the offset. |
1212 | * | offset |0110|o|type |00| | 1402 | * | offset |0110|o|type |00| |
1213 | * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 | 1403 | * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 |
1214 | * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 | 1404 | * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 |
1215 | */ | 1405 | */ |
1216 | #ifndef CONFIG_64BIT | 1406 | #ifndef CONFIG_64BIT |
1217 | #define __SWP_OFFSET_MASK (~0UL >> 12) | 1407 | #define __SWP_OFFSET_MASK (~0UL >> 12) |
1218 | #else | 1408 | #else |
1219 | #define __SWP_OFFSET_MASK (~0UL >> 11) | 1409 | #define __SWP_OFFSET_MASK (~0UL >> 11) |
1220 | #endif | 1410 | #endif |
1221 | static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) | 1411 | static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) |
1222 | { | 1412 | { |
1223 | pte_t pte; | 1413 | pte_t pte; |
1224 | offset &= __SWP_OFFSET_MASK; | 1414 | offset &= __SWP_OFFSET_MASK; |
1225 | pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | | 1415 | pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | |
1226 | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); | 1416 | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); |
1227 | return pte; | 1417 | return pte; |
1228 | } | 1418 | } |
1229 | 1419 | ||
1230 | #define __swp_type(entry) (((entry).val >> 2) & 0x1f) | 1420 | #define __swp_type(entry) (((entry).val >> 2) & 0x1f) |
1231 | #define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) | 1421 | #define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) |
1232 | #define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) | 1422 | #define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) |
1233 | 1423 | ||
1234 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) | 1424 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) |
1235 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | 1425 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |
1236 | 1426 | ||
1237 | #ifndef CONFIG_64BIT | 1427 | #ifndef CONFIG_64BIT |
1238 | # define PTE_FILE_MAX_BITS 26 | 1428 | # define PTE_FILE_MAX_BITS 26 |
1239 | #else /* CONFIG_64BIT */ | 1429 | #else /* CONFIG_64BIT */ |
1240 | # define PTE_FILE_MAX_BITS 59 | 1430 | # define PTE_FILE_MAX_BITS 59 |
1241 | #endif /* CONFIG_64BIT */ | 1431 | #endif /* CONFIG_64BIT */ |
1242 | 1432 | ||
1243 | #define pte_to_pgoff(__pte) \ | 1433 | #define pte_to_pgoff(__pte) \ |
1244 | ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f)) | 1434 | ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f)) |
1245 | 1435 | ||
1246 | #define pgoff_to_pte(__off) \ | 1436 | #define pgoff_to_pte(__off) \ |
1247 | ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ | 1437 | ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ |
1248 | | _PAGE_TYPE_FILE }) | 1438 | | _PAGE_TYPE_FILE }) |
1249 | 1439 | ||
1250 | #endif /* !__ASSEMBLY__ */ | 1440 | #endif /* !__ASSEMBLY__ */ |
1251 | 1441 | ||
1252 | #define kern_addr_valid(addr) (1) | 1442 | #define kern_addr_valid(addr) (1) |
1253 | 1443 | ||
1254 | extern int vmem_add_mapping(unsigned long start, unsigned long size); | 1444 | extern int vmem_add_mapping(unsigned long start, unsigned long size); |
1255 | extern int vmem_remove_mapping(unsigned long start, unsigned long size); | 1445 | extern int vmem_remove_mapping(unsigned long start, unsigned long size); |
1256 | extern int s390_enable_sie(void); | 1446 | extern int s390_enable_sie(void); |
1257 | 1447 | ||
1258 | /* | 1448 | /* |
1259 | * No page table caches to initialise | 1449 | * No page table caches to initialise |
1260 | */ | 1450 | */ |
1261 | #define pgtable_cache_init() do { } while (0) | 1451 | #define pgtable_cache_init() do { } while (0) |
1262 | 1452 | ||
1263 | #include <asm-generic/pgtable.h> | 1453 | #include <asm-generic/pgtable.h> |
1264 | 1454 | ||
1265 | #endif /* _S390_PAGE_H */ | 1455 | #endif /* _S390_PAGE_H */ |
1266 | 1456 |
arch/s390/include/asm/setup.h
1 | /* | 1 | /* |
2 | * S390 version | 2 | * S390 version |
3 | * Copyright IBM Corp. 1999, 2010 | 3 | * Copyright IBM Corp. 1999, 2010 |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #ifndef _ASM_S390_SETUP_H | 6 | #ifndef _ASM_S390_SETUP_H |
7 | #define _ASM_S390_SETUP_H | 7 | #define _ASM_S390_SETUP_H |
8 | 8 | ||
9 | #define COMMAND_LINE_SIZE 4096 | 9 | #define COMMAND_LINE_SIZE 4096 |
10 | 10 | ||
11 | #define ARCH_COMMAND_LINE_SIZE 896 | 11 | #define ARCH_COMMAND_LINE_SIZE 896 |
12 | 12 | ||
13 | #ifdef __KERNEL__ | 13 | #ifdef __KERNEL__ |
14 | 14 | ||
15 | #define PARMAREA 0x10400 | 15 | #define PARMAREA 0x10400 |
16 | #define MEMORY_CHUNKS 256 | 16 | #define MEMORY_CHUNKS 256 |
17 | 17 | ||
18 | #ifndef __ASSEMBLY__ | 18 | #ifndef __ASSEMBLY__ |
19 | 19 | ||
20 | #include <asm/lowcore.h> | 20 | #include <asm/lowcore.h> |
21 | #include <asm/types.h> | 21 | #include <asm/types.h> |
22 | 22 | ||
23 | #ifndef CONFIG_64BIT | 23 | #ifndef CONFIG_64BIT |
24 | #define IPL_DEVICE (*(unsigned long *) (0x10404)) | 24 | #define IPL_DEVICE (*(unsigned long *) (0x10404)) |
25 | #define INITRD_START (*(unsigned long *) (0x1040C)) | 25 | #define INITRD_START (*(unsigned long *) (0x1040C)) |
26 | #define INITRD_SIZE (*(unsigned long *) (0x10414)) | 26 | #define INITRD_SIZE (*(unsigned long *) (0x10414)) |
27 | #define OLDMEM_BASE (*(unsigned long *) (0x1041C)) | 27 | #define OLDMEM_BASE (*(unsigned long *) (0x1041C)) |
28 | #define OLDMEM_SIZE (*(unsigned long *) (0x10424)) | 28 | #define OLDMEM_SIZE (*(unsigned long *) (0x10424)) |
29 | #else /* CONFIG_64BIT */ | 29 | #else /* CONFIG_64BIT */ |
30 | #define IPL_DEVICE (*(unsigned long *) (0x10400)) | 30 | #define IPL_DEVICE (*(unsigned long *) (0x10400)) |
31 | #define INITRD_START (*(unsigned long *) (0x10408)) | 31 | #define INITRD_START (*(unsigned long *) (0x10408)) |
32 | #define INITRD_SIZE (*(unsigned long *) (0x10410)) | 32 | #define INITRD_SIZE (*(unsigned long *) (0x10410)) |
33 | #define OLDMEM_BASE (*(unsigned long *) (0x10418)) | 33 | #define OLDMEM_BASE (*(unsigned long *) (0x10418)) |
34 | #define OLDMEM_SIZE (*(unsigned long *) (0x10420)) | 34 | #define OLDMEM_SIZE (*(unsigned long *) (0x10420)) |
35 | #endif /* CONFIG_64BIT */ | 35 | #endif /* CONFIG_64BIT */ |
36 | #define COMMAND_LINE ((char *) (0x10480)) | 36 | #define COMMAND_LINE ((char *) (0x10480)) |
37 | 37 | ||
38 | #define CHUNK_READ_WRITE 0 | 38 | #define CHUNK_READ_WRITE 0 |
39 | #define CHUNK_READ_ONLY 1 | 39 | #define CHUNK_READ_ONLY 1 |
40 | #define CHUNK_OLDMEM 4 | 40 | #define CHUNK_OLDMEM 4 |
41 | #define CHUNK_CRASHK 5 | 41 | #define CHUNK_CRASHK 5 |
42 | 42 | ||
43 | struct mem_chunk { | 43 | struct mem_chunk { |
44 | unsigned long addr; | 44 | unsigned long addr; |
45 | unsigned long size; | 45 | unsigned long size; |
46 | int type; | 46 | int type; |
47 | }; | 47 | }; |
48 | 48 | ||
49 | extern struct mem_chunk memory_chunk[]; | 49 | extern struct mem_chunk memory_chunk[]; |
50 | extern unsigned long real_memory_size; | 50 | extern unsigned long real_memory_size; |
51 | extern int memory_end_set; | 51 | extern int memory_end_set; |
52 | extern unsigned long memory_end; | 52 | extern unsigned long memory_end; |
53 | 53 | ||
54 | void detect_memory_layout(struct mem_chunk chunk[]); | 54 | void detect_memory_layout(struct mem_chunk chunk[]); |
55 | void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, | 55 | void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, |
56 | unsigned long size, int type); | 56 | unsigned long size, int type); |
57 | 57 | ||
58 | #define PRIMARY_SPACE_MODE 0 | 58 | #define PRIMARY_SPACE_MODE 0 |
59 | #define ACCESS_REGISTER_MODE 1 | 59 | #define ACCESS_REGISTER_MODE 1 |
60 | #define SECONDARY_SPACE_MODE 2 | 60 | #define SECONDARY_SPACE_MODE 2 |
61 | #define HOME_SPACE_MODE 3 | 61 | #define HOME_SPACE_MODE 3 |
62 | 62 | ||
63 | extern unsigned int s390_user_mode; | 63 | extern unsigned int s390_user_mode; |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Machine features detected in head.S | 66 | * Machine features detected in head.S |
67 | */ | 67 | */ |
68 | 68 | ||
69 | #define MACHINE_FLAG_VM (1UL << 0) | 69 | #define MACHINE_FLAG_VM (1UL << 0) |
70 | #define MACHINE_FLAG_IEEE (1UL << 1) | 70 | #define MACHINE_FLAG_IEEE (1UL << 1) |
71 | #define MACHINE_FLAG_CSP (1UL << 3) | 71 | #define MACHINE_FLAG_CSP (1UL << 3) |
72 | #define MACHINE_FLAG_MVPG (1UL << 4) | 72 | #define MACHINE_FLAG_MVPG (1UL << 4) |
73 | #define MACHINE_FLAG_DIAG44 (1UL << 5) | 73 | #define MACHINE_FLAG_DIAG44 (1UL << 5) |
74 | #define MACHINE_FLAG_IDTE (1UL << 6) | 74 | #define MACHINE_FLAG_IDTE (1UL << 6) |
75 | #define MACHINE_FLAG_DIAG9C (1UL << 7) | 75 | #define MACHINE_FLAG_DIAG9C (1UL << 7) |
76 | #define MACHINE_FLAG_MVCOS (1UL << 8) | 76 | #define MACHINE_FLAG_MVCOS (1UL << 8) |
77 | #define MACHINE_FLAG_KVM (1UL << 9) | 77 | #define MACHINE_FLAG_KVM (1UL << 9) |
78 | #define MACHINE_FLAG_HPAGE (1UL << 10) | 78 | #define MACHINE_FLAG_HPAGE (1UL << 10) |
79 | #define MACHINE_FLAG_PFMF (1UL << 11) | 79 | #define MACHINE_FLAG_PFMF (1UL << 11) |
80 | #define MACHINE_FLAG_LPAR (1UL << 12) | 80 | #define MACHINE_FLAG_LPAR (1UL << 12) |
81 | #define MACHINE_FLAG_SPP (1UL << 13) | 81 | #define MACHINE_FLAG_SPP (1UL << 13) |
82 | #define MACHINE_FLAG_TOPOLOGY (1UL << 14) | 82 | #define MACHINE_FLAG_TOPOLOGY (1UL << 14) |
83 | #define MACHINE_FLAG_TE (1UL << 15) | 83 | #define MACHINE_FLAG_TE (1UL << 15) |
84 | #define MACHINE_FLAG_RRBM (1UL << 16) | ||
84 | 85 | ||
85 | #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) | 86 | #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) |
86 | #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) | 87 | #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) |
87 | #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) | 88 | #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) |
88 | 89 | ||
89 | #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) | 90 | #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) |
90 | 91 | ||
91 | #ifndef CONFIG_64BIT | 92 | #ifndef CONFIG_64BIT |
92 | #define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE) | 93 | #define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE) |
93 | #define MACHINE_HAS_CSP (S390_lowcore.machine_flags & MACHINE_FLAG_CSP) | 94 | #define MACHINE_HAS_CSP (S390_lowcore.machine_flags & MACHINE_FLAG_CSP) |
94 | #define MACHINE_HAS_IDTE (0) | 95 | #define MACHINE_HAS_IDTE (0) |
95 | #define MACHINE_HAS_DIAG44 (1) | 96 | #define MACHINE_HAS_DIAG44 (1) |
96 | #define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG) | 97 | #define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG) |
97 | #define MACHINE_HAS_MVCOS (0) | 98 | #define MACHINE_HAS_MVCOS (0) |
98 | #define MACHINE_HAS_HPAGE (0) | 99 | #define MACHINE_HAS_HPAGE (0) |
99 | #define MACHINE_HAS_PFMF (0) | 100 | #define MACHINE_HAS_PFMF (0) |
100 | #define MACHINE_HAS_SPP (0) | 101 | #define MACHINE_HAS_SPP (0) |
101 | #define MACHINE_HAS_TOPOLOGY (0) | 102 | #define MACHINE_HAS_TOPOLOGY (0) |
102 | #define MACHINE_HAS_TE (0) | 103 | #define MACHINE_HAS_TE (0) |
104 | #define MACHINE_HAS_RRBM (0) | ||
103 | #else /* CONFIG_64BIT */ | 105 | #else /* CONFIG_64BIT */ |
104 | #define MACHINE_HAS_IEEE (1) | 106 | #define MACHINE_HAS_IEEE (1) |
105 | #define MACHINE_HAS_CSP (1) | 107 | #define MACHINE_HAS_CSP (1) |
106 | #define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) | 108 | #define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) |
107 | #define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) | 109 | #define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) |
108 | #define MACHINE_HAS_MVPG (1) | 110 | #define MACHINE_HAS_MVPG (1) |
109 | #define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS) | 111 | #define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS) |
110 | #define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) | 112 | #define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) |
111 | #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) | 113 | #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) |
112 | #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) | 114 | #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) |
113 | #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) | 115 | #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) |
114 | #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) | 116 | #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) |
117 | #define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) | ||
115 | #endif /* CONFIG_64BIT */ | 118 | #endif /* CONFIG_64BIT */ |
116 | 119 | ||
117 | #define ZFCPDUMP_HSA_SIZE (32UL<<20) | 120 | #define ZFCPDUMP_HSA_SIZE (32UL<<20) |
118 | #define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) | 121 | #define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) |
119 | 122 | ||
120 | /* | 123 | /* |
121 | * Console mode. Override with conmode= | 124 | * Console mode. Override with conmode= |
122 | */ | 125 | */ |
123 | extern unsigned int console_mode; | 126 | extern unsigned int console_mode; |
124 | extern unsigned int console_devno; | 127 | extern unsigned int console_devno; |
125 | extern unsigned int console_irq; | 128 | extern unsigned int console_irq; |
126 | 129 | ||
127 | extern char vmhalt_cmd[]; | 130 | extern char vmhalt_cmd[]; |
128 | extern char vmpoff_cmd[]; | 131 | extern char vmpoff_cmd[]; |
129 | 132 | ||
130 | #define CONSOLE_IS_UNDEFINED (console_mode == 0) | 133 | #define CONSOLE_IS_UNDEFINED (console_mode == 0) |
131 | #define CONSOLE_IS_SCLP (console_mode == 1) | 134 | #define CONSOLE_IS_SCLP (console_mode == 1) |
132 | #define CONSOLE_IS_3215 (console_mode == 2) | 135 | #define CONSOLE_IS_3215 (console_mode == 2) |
133 | #define CONSOLE_IS_3270 (console_mode == 3) | 136 | #define CONSOLE_IS_3270 (console_mode == 3) |
134 | #define SET_CONSOLE_SCLP do { console_mode = 1; } while (0) | 137 | #define SET_CONSOLE_SCLP do { console_mode = 1; } while (0) |
135 | #define SET_CONSOLE_3215 do { console_mode = 2; } while (0) | 138 | #define SET_CONSOLE_3215 do { console_mode = 2; } while (0) |
136 | #define SET_CONSOLE_3270 do { console_mode = 3; } while (0) | 139 | #define SET_CONSOLE_3270 do { console_mode = 3; } while (0) |
137 | 140 | ||
138 | #define NSS_NAME_SIZE 8 | 141 | #define NSS_NAME_SIZE 8 |
139 | extern char kernel_nss_name[]; | 142 | extern char kernel_nss_name[]; |
140 | 143 | ||
141 | #ifdef CONFIG_PFAULT | 144 | #ifdef CONFIG_PFAULT |
142 | extern int pfault_init(void); | 145 | extern int pfault_init(void); |
143 | extern void pfault_fini(void); | 146 | extern void pfault_fini(void); |
144 | #else /* CONFIG_PFAULT */ | 147 | #else /* CONFIG_PFAULT */ |
145 | #define pfault_init() ({-1;}) | 148 | #define pfault_init() ({-1;}) |
146 | #define pfault_fini() do { } while (0) | 149 | #define pfault_fini() do { } while (0) |
147 | #endif /* CONFIG_PFAULT */ | 150 | #endif /* CONFIG_PFAULT */ |
148 | 151 | ||
149 | extern void cmma_init(void); | 152 | extern void cmma_init(void); |
150 | 153 | ||
151 | extern void (*_machine_restart)(char *command); | 154 | extern void (*_machine_restart)(char *command); |
152 | extern void (*_machine_halt)(void); | 155 | extern void (*_machine_halt)(void); |
153 | extern void (*_machine_power_off)(void); | 156 | extern void (*_machine_power_off)(void); |
154 | 157 | ||
155 | #else /* __ASSEMBLY__ */ | 158 | #else /* __ASSEMBLY__ */ |
156 | 159 | ||
157 | #ifndef CONFIG_64BIT | 160 | #ifndef CONFIG_64BIT |
158 | #define IPL_DEVICE 0x10404 | 161 | #define IPL_DEVICE 0x10404 |
159 | #define INITRD_START 0x1040C | 162 | #define INITRD_START 0x1040C |
160 | #define INITRD_SIZE 0x10414 | 163 | #define INITRD_SIZE 0x10414 |
161 | #define OLDMEM_BASE 0x1041C | 164 | #define OLDMEM_BASE 0x1041C |
162 | #define OLDMEM_SIZE 0x10424 | 165 | #define OLDMEM_SIZE 0x10424 |
163 | #else /* CONFIG_64BIT */ | 166 | #else /* CONFIG_64BIT */ |
164 | #define IPL_DEVICE 0x10400 | 167 | #define IPL_DEVICE 0x10400 |
165 | #define INITRD_START 0x10408 | 168 | #define INITRD_START 0x10408 |
166 | #define INITRD_SIZE 0x10410 | 169 | #define INITRD_SIZE 0x10410 |
167 | #define OLDMEM_BASE 0x10418 | 170 | #define OLDMEM_BASE 0x10418 |
168 | #define OLDMEM_SIZE 0x10420 | 171 | #define OLDMEM_SIZE 0x10420 |
169 | #endif /* CONFIG_64BIT */ | 172 | #endif /* CONFIG_64BIT */ |
170 | #define COMMAND_LINE 0x10480 | 173 | #define COMMAND_LINE 0x10480 |
171 | 174 | ||
172 | #endif /* __ASSEMBLY__ */ | 175 | #endif /* __ASSEMBLY__ */ |
173 | #endif /* __KERNEL__ */ | 176 | #endif /* __KERNEL__ */ |
174 | #endif /* _ASM_S390_SETUP_H */ | 177 | #endif /* _ASM_S390_SETUP_H */ |
175 | 178 |
arch/s390/include/asm/tlb.h
1 | #ifndef _S390_TLB_H | 1 | #ifndef _S390_TLB_H |
2 | #define _S390_TLB_H | 2 | #define _S390_TLB_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * TLB flushing on s390 is complicated. The following requirement | 5 | * TLB flushing on s390 is complicated. The following requirement |
6 | * from the principles of operation is the most arduous: | 6 | * from the principles of operation is the most arduous: |
7 | * | 7 | * |
8 | * "A valid table entry must not be changed while it is attached | 8 | * "A valid table entry must not be changed while it is attached |
9 | * to any CPU and may be used for translation by that CPU except to | 9 | * to any CPU and may be used for translation by that CPU except to |
10 | * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY, | 10 | * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY, |
11 | * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page | 11 | * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page |
12 | * table entry, or (3) make a change by means of a COMPARE AND SWAP | 12 | * table entry, or (3) make a change by means of a COMPARE AND SWAP |
13 | * AND PURGE instruction that purges the TLB." | 13 | * AND PURGE instruction that purges the TLB." |
14 | * | 14 | * |
15 | * The modification of a pte of an active mm struct therefore is | 15 | * The modification of a pte of an active mm struct therefore is |
16 | * a two step process: i) invalidate the pte, ii) store the new pte. | 16 | * a two step process: i) invalidate the pte, ii) store the new pte. |
17 | * This is true for the page protection bit as well. | 17 | * This is true for the page protection bit as well. |
18 | * The only possible optimization is to flush at the beginning of | 18 | * The only possible optimization is to flush at the beginning of |
19 | * a tlb_gather_mmu cycle if the mm_struct is currently not in use. | 19 | * a tlb_gather_mmu cycle if the mm_struct is currently not in use. |
20 | * | 20 | * |
21 | * Pages used for the page tables is a different story. FIXME: more | 21 | * Pages used for the page tables is a different story. FIXME: more |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
25 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <asm/processor.h> | 27 | #include <asm/processor.h> |
28 | #include <asm/pgalloc.h> | 28 | #include <asm/pgalloc.h> |
29 | #include <asm/tlbflush.h> | 29 | #include <asm/tlbflush.h> |
30 | 30 | ||
31 | struct mmu_gather { | 31 | struct mmu_gather { |
32 | struct mm_struct *mm; | 32 | struct mm_struct *mm; |
33 | struct mmu_table_batch *batch; | 33 | struct mmu_table_batch *batch; |
34 | unsigned int fullmm; | 34 | unsigned int fullmm; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | struct mmu_table_batch { | 37 | struct mmu_table_batch { |
38 | struct rcu_head rcu; | 38 | struct rcu_head rcu; |
39 | unsigned int nr; | 39 | unsigned int nr; |
40 | void *tables[0]; | 40 | void *tables[0]; |
41 | }; | 41 | }; |
42 | 42 | ||
43 | #define MAX_TABLE_BATCH \ | 43 | #define MAX_TABLE_BATCH \ |
44 | ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) | 44 | ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) |
45 | 45 | ||
46 | extern void tlb_table_flush(struct mmu_gather *tlb); | 46 | extern void tlb_table_flush(struct mmu_gather *tlb); |
47 | extern void tlb_remove_table(struct mmu_gather *tlb, void *table); | 47 | extern void tlb_remove_table(struct mmu_gather *tlb, void *table); |
48 | 48 | ||
49 | static inline void tlb_gather_mmu(struct mmu_gather *tlb, | 49 | static inline void tlb_gather_mmu(struct mmu_gather *tlb, |
50 | struct mm_struct *mm, | 50 | struct mm_struct *mm, |
51 | unsigned int full_mm_flush) | 51 | unsigned int full_mm_flush) |
52 | { | 52 | { |
53 | tlb->mm = mm; | 53 | tlb->mm = mm; |
54 | tlb->fullmm = full_mm_flush; | 54 | tlb->fullmm = full_mm_flush; |
55 | tlb->batch = NULL; | 55 | tlb->batch = NULL; |
56 | if (tlb->fullmm) | 56 | if (tlb->fullmm) |
57 | __tlb_flush_mm(mm); | 57 | __tlb_flush_mm(mm); |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline void tlb_flush_mmu(struct mmu_gather *tlb) | 60 | static inline void tlb_flush_mmu(struct mmu_gather *tlb) |
61 | { | 61 | { |
62 | tlb_table_flush(tlb); | 62 | tlb_table_flush(tlb); |
63 | } | 63 | } |
64 | 64 | ||
65 | static inline void tlb_finish_mmu(struct mmu_gather *tlb, | 65 | static inline void tlb_finish_mmu(struct mmu_gather *tlb, |
66 | unsigned long start, unsigned long end) | 66 | unsigned long start, unsigned long end) |
67 | { | 67 | { |
68 | tlb_table_flush(tlb); | 68 | tlb_table_flush(tlb); |
69 | } | 69 | } |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * Release the page cache reference for a pte removed by | 72 | * Release the page cache reference for a pte removed by |
73 | * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page | 73 | * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page |
74 | * has already been freed, so just do free_page_and_swap_cache. | 74 | * has already been freed, so just do free_page_and_swap_cache. |
75 | */ | 75 | */ |
76 | static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) | 76 | static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
77 | { | 77 | { |
78 | free_page_and_swap_cache(page); | 78 | free_page_and_swap_cache(page); |
79 | return 1; /* avoid calling tlb_flush_mmu */ | 79 | return 1; /* avoid calling tlb_flush_mmu */ |
80 | } | 80 | } |
81 | 81 | ||
82 | static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) | 82 | static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) |
83 | { | 83 | { |
84 | free_page_and_swap_cache(page); | 84 | free_page_and_swap_cache(page); |
85 | } | 85 | } |
86 | 86 | ||
87 | /* | 87 | /* |
88 | * pte_free_tlb frees a pte table and clears the CRSTE for the | 88 | * pte_free_tlb frees a pte table and clears the CRSTE for the |
89 | * page table from the tlb. | 89 | * page table from the tlb. |
90 | */ | 90 | */ |
91 | static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, | 91 | static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, |
92 | unsigned long address) | 92 | unsigned long address) |
93 | { | 93 | { |
94 | if (!tlb->fullmm) | 94 | if (!tlb->fullmm) |
95 | return page_table_free_rcu(tlb, (unsigned long *) pte); | 95 | return page_table_free_rcu(tlb, (unsigned long *) pte); |
96 | page_table_free(tlb->mm, (unsigned long *) pte); | 96 | page_table_free(tlb->mm, (unsigned long *) pte); |
97 | } | 97 | } |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * pmd_free_tlb frees a pmd table and clears the CRSTE for the | 100 | * pmd_free_tlb frees a pmd table and clears the CRSTE for the |
101 | * segment table entry from the tlb. | 101 | * segment table entry from the tlb. |
102 | * If the mm uses a two level page table the single pmd is freed | 102 | * If the mm uses a two level page table the single pmd is freed |
103 | * as the pgd. pmd_free_tlb checks the asce_limit against 2GB | 103 | * as the pgd. pmd_free_tlb checks the asce_limit against 2GB |
104 | * to avoid the double free of the pmd in this case. | 104 | * to avoid the double free of the pmd in this case. |
105 | */ | 105 | */ |
106 | static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, | 106 | static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, |
107 | unsigned long address) | 107 | unsigned long address) |
108 | { | 108 | { |
109 | #ifdef CONFIG_64BIT | 109 | #ifdef CONFIG_64BIT |
110 | if (tlb->mm->context.asce_limit <= (1UL << 31)) | 110 | if (tlb->mm->context.asce_limit <= (1UL << 31)) |
111 | return; | 111 | return; |
112 | if (!tlb->fullmm) | 112 | if (!tlb->fullmm) |
113 | return tlb_remove_table(tlb, pmd); | 113 | return tlb_remove_table(tlb, pmd); |
114 | crst_table_free(tlb->mm, (unsigned long *) pmd); | 114 | crst_table_free(tlb->mm, (unsigned long *) pmd); |
115 | #endif | 115 | #endif |
116 | } | 116 | } |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * pud_free_tlb frees a pud table and clears the CRSTE for the | 119 | * pud_free_tlb frees a pud table and clears the CRSTE for the |
120 | * region third table entry from the tlb. | 120 | * region third table entry from the tlb. |
121 | * If the mm uses a three level page table the single pud is freed | 121 | * If the mm uses a three level page table the single pud is freed |
122 | * as the pgd. pud_free_tlb checks the asce_limit against 4TB | 122 | * as the pgd. pud_free_tlb checks the asce_limit against 4TB |
123 | * to avoid the double free of the pud in this case. | 123 | * to avoid the double free of the pud in this case. |
124 | */ | 124 | */ |
125 | static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, | 125 | static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, |
126 | unsigned long address) | 126 | unsigned long address) |
127 | { | 127 | { |
128 | #ifdef CONFIG_64BIT | 128 | #ifdef CONFIG_64BIT |
129 | if (tlb->mm->context.asce_limit <= (1UL << 42)) | 129 | if (tlb->mm->context.asce_limit <= (1UL << 42)) |
130 | return; | 130 | return; |
131 | if (!tlb->fullmm) | 131 | if (!tlb->fullmm) |
132 | return tlb_remove_table(tlb, pud); | 132 | return tlb_remove_table(tlb, pud); |
133 | crst_table_free(tlb->mm, (unsigned long *) pud); | 133 | crst_table_free(tlb->mm, (unsigned long *) pud); |
134 | #endif | 134 | #endif |
135 | } | 135 | } |
136 | 136 | ||
137 | #define tlb_start_vma(tlb, vma) do { } while (0) | 137 | #define tlb_start_vma(tlb, vma) do { } while (0) |
138 | #define tlb_end_vma(tlb, vma) do { } while (0) | 138 | #define tlb_end_vma(tlb, vma) do { } while (0) |
139 | #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) | 139 | #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) |
140 | #define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0) | ||
140 | #define tlb_migrate_finish(mm) do { } while (0) | 141 | #define tlb_migrate_finish(mm) do { } while (0) |
141 | 142 | ||
142 | #endif /* _S390_TLB_H */ | 143 | #endif /* _S390_TLB_H */ |
143 | 144 |
arch/s390/kernel/early.c
1 | /* | 1 | /* |
2 | * Copyright IBM Corp. 2007, 2009 | 2 | * Copyright IBM Corp. 2007, 2009 |
3 | * Author(s): Hongjie Yang <hongjie@us.ibm.com>, | 3 | * Author(s): Hongjie Yang <hongjie@us.ibm.com>, |
4 | * Heiko Carstens <heiko.carstens@de.ibm.com> | 4 | * Heiko Carstens <heiko.carstens@de.ibm.com> |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #define KMSG_COMPONENT "setup" | 7 | #define KMSG_COMPONENT "setup" |
8 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | 8 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
9 | 9 | ||
10 | #include <linux/compiler.h> | 10 | #include <linux/compiler.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
13 | #include <linux/string.h> | 13 | #include <linux/string.h> |
14 | #include <linux/ctype.h> | 14 | #include <linux/ctype.h> |
15 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
16 | #include <linux/lockdep.h> | 16 | #include <linux/lockdep.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/pfn.h> | 18 | #include <linux/pfn.h> |
19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
20 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
21 | #include <asm/ebcdic.h> | 21 | #include <asm/ebcdic.h> |
22 | #include <asm/ipl.h> | 22 | #include <asm/ipl.h> |
23 | #include <asm/lowcore.h> | 23 | #include <asm/lowcore.h> |
24 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
25 | #include <asm/sections.h> | 25 | #include <asm/sections.h> |
26 | #include <asm/setup.h> | 26 | #include <asm/setup.h> |
27 | #include <asm/sysinfo.h> | 27 | #include <asm/sysinfo.h> |
28 | #include <asm/cpcmd.h> | 28 | #include <asm/cpcmd.h> |
29 | #include <asm/sclp.h> | 29 | #include <asm/sclp.h> |
30 | #include <asm/facility.h> | 30 | #include <asm/facility.h> |
31 | #include "entry.h" | 31 | #include "entry.h" |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * Create a Kernel NSS if the SAVESYS= parameter is defined | 34 | * Create a Kernel NSS if the SAVESYS= parameter is defined |
35 | */ | 35 | */ |
36 | #define DEFSYS_CMD_SIZE 128 | 36 | #define DEFSYS_CMD_SIZE 128 |
37 | #define SAVESYS_CMD_SIZE 32 | 37 | #define SAVESYS_CMD_SIZE 32 |
38 | 38 | ||
39 | char kernel_nss_name[NSS_NAME_SIZE + 1]; | 39 | char kernel_nss_name[NSS_NAME_SIZE + 1]; |
40 | 40 | ||
41 | static void __init setup_boot_command_line(void); | 41 | static void __init setup_boot_command_line(void); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Get the TOD clock running. | 44 | * Get the TOD clock running. |
45 | */ | 45 | */ |
46 | static void __init reset_tod_clock(void) | 46 | static void __init reset_tod_clock(void) |
47 | { | 47 | { |
48 | u64 time; | 48 | u64 time; |
49 | 49 | ||
50 | if (store_clock(&time) == 0) | 50 | if (store_clock(&time) == 0) |
51 | return; | 51 | return; |
52 | /* TOD clock not running. Set the clock to Unix Epoch. */ | 52 | /* TOD clock not running. Set the clock to Unix Epoch. */ |
53 | if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0) | 53 | if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0) |
54 | disabled_wait(0); | 54 | disabled_wait(0); |
55 | 55 | ||
56 | sched_clock_base_cc = TOD_UNIX_EPOCH; | 56 | sched_clock_base_cc = TOD_UNIX_EPOCH; |
57 | S390_lowcore.last_update_clock = sched_clock_base_cc; | 57 | S390_lowcore.last_update_clock = sched_clock_base_cc; |
58 | } | 58 | } |
59 | 59 | ||
60 | #ifdef CONFIG_SHARED_KERNEL | 60 | #ifdef CONFIG_SHARED_KERNEL |
61 | int __init savesys_ipl_nss(char *cmd, const int cmdlen); | 61 | int __init savesys_ipl_nss(char *cmd, const int cmdlen); |
62 | 62 | ||
63 | asm( | 63 | asm( |
64 | " .section .init.text,\"ax\",@progbits\n" | 64 | " .section .init.text,\"ax\",@progbits\n" |
65 | " .align 4\n" | 65 | " .align 4\n" |
66 | " .type savesys_ipl_nss, @function\n" | 66 | " .type savesys_ipl_nss, @function\n" |
67 | "savesys_ipl_nss:\n" | 67 | "savesys_ipl_nss:\n" |
68 | #ifdef CONFIG_64BIT | 68 | #ifdef CONFIG_64BIT |
69 | " stmg 6,15,48(15)\n" | 69 | " stmg 6,15,48(15)\n" |
70 | " lgr 14,3\n" | 70 | " lgr 14,3\n" |
71 | " sam31\n" | 71 | " sam31\n" |
72 | " diag 2,14,0x8\n" | 72 | " diag 2,14,0x8\n" |
73 | " sam64\n" | 73 | " sam64\n" |
74 | " lgr 2,14\n" | 74 | " lgr 2,14\n" |
75 | " lmg 6,15,48(15)\n" | 75 | " lmg 6,15,48(15)\n" |
76 | #else | 76 | #else |
77 | " stm 6,15,24(15)\n" | 77 | " stm 6,15,24(15)\n" |
78 | " lr 14,3\n" | 78 | " lr 14,3\n" |
79 | " diag 2,14,0x8\n" | 79 | " diag 2,14,0x8\n" |
80 | " lr 2,14\n" | 80 | " lr 2,14\n" |
81 | " lm 6,15,24(15)\n" | 81 | " lm 6,15,24(15)\n" |
82 | #endif | 82 | #endif |
83 | " br 14\n" | 83 | " br 14\n" |
84 | " .size savesys_ipl_nss, .-savesys_ipl_nss\n" | 84 | " .size savesys_ipl_nss, .-savesys_ipl_nss\n" |
85 | " .previous\n"); | 85 | " .previous\n"); |
86 | 86 | ||
87 | static __initdata char upper_command_line[COMMAND_LINE_SIZE]; | 87 | static __initdata char upper_command_line[COMMAND_LINE_SIZE]; |
88 | 88 | ||
89 | static noinline __init void create_kernel_nss(void) | 89 | static noinline __init void create_kernel_nss(void) |
90 | { | 90 | { |
91 | unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; | 91 | unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; |
92 | #ifdef CONFIG_BLK_DEV_INITRD | 92 | #ifdef CONFIG_BLK_DEV_INITRD |
93 | unsigned int sinitrd_pfn, einitrd_pfn; | 93 | unsigned int sinitrd_pfn, einitrd_pfn; |
94 | #endif | 94 | #endif |
95 | int response; | 95 | int response; |
96 | int hlen; | 96 | int hlen; |
97 | size_t len; | 97 | size_t len; |
98 | char *savesys_ptr; | 98 | char *savesys_ptr; |
99 | char defsys_cmd[DEFSYS_CMD_SIZE]; | 99 | char defsys_cmd[DEFSYS_CMD_SIZE]; |
100 | char savesys_cmd[SAVESYS_CMD_SIZE]; | 100 | char savesys_cmd[SAVESYS_CMD_SIZE]; |
101 | 101 | ||
102 | /* Do nothing if we are not running under VM */ | 102 | /* Do nothing if we are not running under VM */ |
103 | if (!MACHINE_IS_VM) | 103 | if (!MACHINE_IS_VM) |
104 | return; | 104 | return; |
105 | 105 | ||
106 | /* Convert COMMAND_LINE to upper case */ | 106 | /* Convert COMMAND_LINE to upper case */ |
107 | for (i = 0; i < strlen(boot_command_line); i++) | 107 | for (i = 0; i < strlen(boot_command_line); i++) |
108 | upper_command_line[i] = toupper(boot_command_line[i]); | 108 | upper_command_line[i] = toupper(boot_command_line[i]); |
109 | 109 | ||
110 | savesys_ptr = strstr(upper_command_line, "SAVESYS="); | 110 | savesys_ptr = strstr(upper_command_line, "SAVESYS="); |
111 | 111 | ||
112 | if (!savesys_ptr) | 112 | if (!savesys_ptr) |
113 | return; | 113 | return; |
114 | 114 | ||
115 | savesys_ptr += 8; /* Point to the beginning of the NSS name */ | 115 | savesys_ptr += 8; /* Point to the beginning of the NSS name */ |
116 | for (i = 0; i < NSS_NAME_SIZE; i++) { | 116 | for (i = 0; i < NSS_NAME_SIZE; i++) { |
117 | if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0') | 117 | if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0') |
118 | break; | 118 | break; |
119 | kernel_nss_name[i] = savesys_ptr[i]; | 119 | kernel_nss_name[i] = savesys_ptr[i]; |
120 | } | 120 | } |
121 | 121 | ||
122 | stext_pfn = PFN_DOWN(__pa(&_stext)); | 122 | stext_pfn = PFN_DOWN(__pa(&_stext)); |
123 | eshared_pfn = PFN_DOWN(__pa(&_eshared)); | 123 | eshared_pfn = PFN_DOWN(__pa(&_eshared)); |
124 | end_pfn = PFN_UP(__pa(&_end)); | 124 | end_pfn = PFN_UP(__pa(&_end)); |
125 | min_size = end_pfn << 2; | 125 | min_size = end_pfn << 2; |
126 | 126 | ||
127 | hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, | 127 | hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, |
128 | "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", | 128 | "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", |
129 | kernel_nss_name, stext_pfn - 1, stext_pfn, | 129 | kernel_nss_name, stext_pfn - 1, stext_pfn, |
130 | eshared_pfn - 1, eshared_pfn, end_pfn); | 130 | eshared_pfn - 1, eshared_pfn, end_pfn); |
131 | 131 | ||
132 | #ifdef CONFIG_BLK_DEV_INITRD | 132 | #ifdef CONFIG_BLK_DEV_INITRD |
133 | if (INITRD_START && INITRD_SIZE) { | 133 | if (INITRD_START && INITRD_SIZE) { |
134 | sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); | 134 | sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); |
135 | einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); | 135 | einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); |
136 | min_size = einitrd_pfn << 2; | 136 | min_size = einitrd_pfn << 2; |
137 | hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, | 137 | hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, |
138 | " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); | 138 | " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); |
139 | } | 139 | } |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, | 142 | snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, |
143 | " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); | 143 | " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); |
144 | defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; | 144 | defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; |
145 | snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", | 145 | snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", |
146 | kernel_nss_name, kernel_nss_name); | 146 | kernel_nss_name, kernel_nss_name); |
147 | savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; | 147 | savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; |
148 | 148 | ||
149 | __cpcmd(defsys_cmd, NULL, 0, &response); | 149 | __cpcmd(defsys_cmd, NULL, 0, &response); |
150 | 150 | ||
151 | if (response != 0) { | 151 | if (response != 0) { |
152 | pr_err("Defining the Linux kernel NSS failed with rc=%d\n", | 152 | pr_err("Defining the Linux kernel NSS failed with rc=%d\n", |
153 | response); | 153 | response); |
154 | kernel_nss_name[0] = '\0'; | 154 | kernel_nss_name[0] = '\0'; |
155 | return; | 155 | return; |
156 | } | 156 | } |
157 | 157 | ||
158 | len = strlen(savesys_cmd); | 158 | len = strlen(savesys_cmd); |
159 | ASCEBC(savesys_cmd, len); | 159 | ASCEBC(savesys_cmd, len); |
160 | response = savesys_ipl_nss(savesys_cmd, len); | 160 | response = savesys_ipl_nss(savesys_cmd, len); |
161 | 161 | ||
162 | /* On success: response is equal to the command size, | 162 | /* On success: response is equal to the command size, |
163 | * max SAVESYS_CMD_SIZE | 163 | * max SAVESYS_CMD_SIZE |
164 | * On error: response contains the numeric portion of cp error message. | 164 | * On error: response contains the numeric portion of cp error message. |
165 | * for SAVESYS it will be >= 263 | 165 | * for SAVESYS it will be >= 263 |
166 | * for missing privilege class, it will be 1 | 166 | * for missing privilege class, it will be 1 |
167 | */ | 167 | */ |
168 | if (response > SAVESYS_CMD_SIZE || response == 1) { | 168 | if (response > SAVESYS_CMD_SIZE || response == 1) { |
169 | pr_err("Saving the Linux kernel NSS failed with rc=%d\n", | 169 | pr_err("Saving the Linux kernel NSS failed with rc=%d\n", |
170 | response); | 170 | response); |
171 | kernel_nss_name[0] = '\0'; | 171 | kernel_nss_name[0] = '\0'; |
172 | return; | 172 | return; |
173 | } | 173 | } |
174 | 174 | ||
175 | /* re-initialize cputime accounting. */ | 175 | /* re-initialize cputime accounting. */ |
176 | sched_clock_base_cc = get_clock(); | 176 | sched_clock_base_cc = get_clock(); |
177 | S390_lowcore.last_update_clock = sched_clock_base_cc; | 177 | S390_lowcore.last_update_clock = sched_clock_base_cc; |
178 | S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; | 178 | S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; |
179 | S390_lowcore.user_timer = 0; | 179 | S390_lowcore.user_timer = 0; |
180 | S390_lowcore.system_timer = 0; | 180 | S390_lowcore.system_timer = 0; |
181 | asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer)); | 181 | asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer)); |
182 | 182 | ||
183 | /* re-setup boot command line with new ipl vm parms */ | 183 | /* re-setup boot command line with new ipl vm parms */ |
184 | ipl_update_parameters(); | 184 | ipl_update_parameters(); |
185 | setup_boot_command_line(); | 185 | setup_boot_command_line(); |
186 | 186 | ||
187 | ipl_flags = IPL_NSS_VALID; | 187 | ipl_flags = IPL_NSS_VALID; |
188 | } | 188 | } |
189 | 189 | ||
190 | #else /* CONFIG_SHARED_KERNEL */ | 190 | #else /* CONFIG_SHARED_KERNEL */ |
191 | 191 | ||
192 | static inline void create_kernel_nss(void) { } | 192 | static inline void create_kernel_nss(void) { } |
193 | 193 | ||
194 | #endif /* CONFIG_SHARED_KERNEL */ | 194 | #endif /* CONFIG_SHARED_KERNEL */ |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Clear bss memory | 197 | * Clear bss memory |
198 | */ | 198 | */ |
199 | static noinline __init void clear_bss_section(void) | 199 | static noinline __init void clear_bss_section(void) |
200 | { | 200 | { |
201 | memset(__bss_start, 0, __bss_stop - __bss_start); | 201 | memset(__bss_start, 0, __bss_stop - __bss_start); |
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * Initialize storage key for kernel pages | 205 | * Initialize storage key for kernel pages |
206 | */ | 206 | */ |
207 | static noinline __init void init_kernel_storage_key(void) | 207 | static noinline __init void init_kernel_storage_key(void) |
208 | { | 208 | { |
209 | unsigned long end_pfn, init_pfn; | 209 | unsigned long end_pfn, init_pfn; |
210 | 210 | ||
211 | end_pfn = PFN_UP(__pa(&_end)); | 211 | end_pfn = PFN_UP(__pa(&_end)); |
212 | 212 | ||
213 | for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) | 213 | for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) |
214 | page_set_storage_key(init_pfn << PAGE_SHIFT, | 214 | page_set_storage_key(init_pfn << PAGE_SHIFT, |
215 | PAGE_DEFAULT_KEY, 0); | 215 | PAGE_DEFAULT_KEY, 0); |
216 | } | 216 | } |
217 | 217 | ||
218 | static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); | 218 | static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); |
219 | 219 | ||
220 | static noinline __init void detect_machine_type(void) | 220 | static noinline __init void detect_machine_type(void) |
221 | { | 221 | { |
222 | struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; | 222 | struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; |
223 | 223 | ||
224 | /* Check current-configuration-level */ | 224 | /* Check current-configuration-level */ |
225 | if (stsi(NULL, 0, 0, 0) <= 2) { | 225 | if (stsi(NULL, 0, 0, 0) <= 2) { |
226 | S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; | 226 | S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; |
227 | return; | 227 | return; |
228 | } | 228 | } |
229 | /* Get virtual-machine cpu information. */ | 229 | /* Get virtual-machine cpu information. */ |
230 | if (stsi(vmms, 3, 2, 2) || !vmms->count) | 230 | if (stsi(vmms, 3, 2, 2) || !vmms->count) |
231 | return; | 231 | return; |
232 | 232 | ||
233 | /* Running under KVM? If not we assume z/VM */ | 233 | /* Running under KVM? If not we assume z/VM */ |
234 | if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) | 234 | if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) |
235 | S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; | 235 | S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; |
236 | else | 236 | else |
237 | S390_lowcore.machine_flags |= MACHINE_FLAG_VM; | 237 | S390_lowcore.machine_flags |= MACHINE_FLAG_VM; |
238 | } | 238 | } |
239 | 239 | ||
240 | static __init void setup_topology(void) | 240 | static __init void setup_topology(void) |
241 | { | 241 | { |
242 | #ifdef CONFIG_64BIT | 242 | #ifdef CONFIG_64BIT |
243 | int max_mnest; | 243 | int max_mnest; |
244 | 244 | ||
245 | if (!test_facility(11)) | 245 | if (!test_facility(11)) |
246 | return; | 246 | return; |
247 | S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; | 247 | S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; |
248 | for (max_mnest = 6; max_mnest > 1; max_mnest--) { | 248 | for (max_mnest = 6; max_mnest > 1; max_mnest--) { |
249 | if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) | 249 | if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) |
250 | break; | 250 | break; |
251 | } | 251 | } |
252 | topology_max_mnest = max_mnest; | 252 | topology_max_mnest = max_mnest; |
253 | #endif | 253 | #endif |
254 | } | 254 | } |
255 | 255 | ||
256 | static void early_pgm_check_handler(void) | 256 | static void early_pgm_check_handler(void) |
257 | { | 257 | { |
258 | const struct exception_table_entry *fixup; | 258 | const struct exception_table_entry *fixup; |
259 | unsigned long addr; | 259 | unsigned long addr; |
260 | 260 | ||
261 | addr = S390_lowcore.program_old_psw.addr; | 261 | addr = S390_lowcore.program_old_psw.addr; |
262 | fixup = search_exception_tables(addr & PSW_ADDR_INSN); | 262 | fixup = search_exception_tables(addr & PSW_ADDR_INSN); |
263 | if (!fixup) | 263 | if (!fixup) |
264 | disabled_wait(0); | 264 | disabled_wait(0); |
265 | S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; | 265 | S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; |
266 | } | 266 | } |
267 | 267 | ||
268 | static noinline __init void setup_lowcore_early(void) | 268 | static noinline __init void setup_lowcore_early(void) |
269 | { | 269 | { |
270 | psw_t psw; | 270 | psw_t psw; |
271 | 271 | ||
272 | psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; | 272 | psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; |
273 | psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; | 273 | psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; |
274 | S390_lowcore.external_new_psw = psw; | 274 | S390_lowcore.external_new_psw = psw; |
275 | psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; | 275 | psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; |
276 | S390_lowcore.program_new_psw = psw; | 276 | S390_lowcore.program_new_psw = psw; |
277 | s390_base_pgm_handler_fn = early_pgm_check_handler; | 277 | s390_base_pgm_handler_fn = early_pgm_check_handler; |
278 | } | 278 | } |
279 | 279 | ||
280 | static noinline __init void setup_facility_list(void) | 280 | static noinline __init void setup_facility_list(void) |
281 | { | 281 | { |
282 | stfle(S390_lowcore.stfle_fac_list, | 282 | stfle(S390_lowcore.stfle_fac_list, |
283 | ARRAY_SIZE(S390_lowcore.stfle_fac_list)); | 283 | ARRAY_SIZE(S390_lowcore.stfle_fac_list)); |
284 | } | 284 | } |
285 | 285 | ||
286 | static noinline __init void setup_hpage(void) | 286 | static noinline __init void setup_hpage(void) |
287 | { | 287 | { |
288 | if (!test_facility(2) || !test_facility(8)) | 288 | if (!test_facility(2) || !test_facility(8)) |
289 | return; | 289 | return; |
290 | S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; | 290 | S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; |
291 | __ctl_set_bit(0, 23); | 291 | __ctl_set_bit(0, 23); |
292 | } | 292 | } |
293 | 293 | ||
294 | static __init void detect_mvpg(void) | 294 | static __init void detect_mvpg(void) |
295 | { | 295 | { |
296 | #ifndef CONFIG_64BIT | 296 | #ifndef CONFIG_64BIT |
297 | int rc; | 297 | int rc; |
298 | 298 | ||
299 | asm volatile( | 299 | asm volatile( |
300 | " la 0,0\n" | 300 | " la 0,0\n" |
301 | " mvpg %2,%2\n" | 301 | " mvpg %2,%2\n" |
302 | "0: la %0,0\n" | 302 | "0: la %0,0\n" |
303 | "1:\n" | 303 | "1:\n" |
304 | EX_TABLE(0b,1b) | 304 | EX_TABLE(0b,1b) |
305 | : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); | 305 | : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); |
306 | if (!rc) | 306 | if (!rc) |
307 | S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG; | 307 | S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG; |
308 | #endif | 308 | #endif |
309 | } | 309 | } |
310 | 310 | ||
311 | static __init void detect_ieee(void) | 311 | static __init void detect_ieee(void) |
312 | { | 312 | { |
313 | #ifndef CONFIG_64BIT | 313 | #ifndef CONFIG_64BIT |
314 | int rc, tmp; | 314 | int rc, tmp; |
315 | 315 | ||
316 | asm volatile( | 316 | asm volatile( |
317 | " efpc %1,0\n" | 317 | " efpc %1,0\n" |
318 | "0: la %0,0\n" | 318 | "0: la %0,0\n" |
319 | "1:\n" | 319 | "1:\n" |
320 | EX_TABLE(0b,1b) | 320 | EX_TABLE(0b,1b) |
321 | : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); | 321 | : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); |
322 | if (!rc) | 322 | if (!rc) |
323 | S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE; | 323 | S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE; |
324 | #endif | 324 | #endif |
325 | } | 325 | } |
326 | 326 | ||
327 | static __init void detect_csp(void) | 327 | static __init void detect_csp(void) |
328 | { | 328 | { |
329 | #ifndef CONFIG_64BIT | 329 | #ifndef CONFIG_64BIT |
330 | int rc; | 330 | int rc; |
331 | 331 | ||
332 | asm volatile( | 332 | asm volatile( |
333 | " la 0,0\n" | 333 | " la 0,0\n" |
334 | " la 1,0\n" | 334 | " la 1,0\n" |
335 | " la 2,4\n" | 335 | " la 2,4\n" |
336 | " csp 0,2\n" | 336 | " csp 0,2\n" |
337 | "0: la %0,0\n" | 337 | "0: la %0,0\n" |
338 | "1:\n" | 338 | "1:\n" |
339 | EX_TABLE(0b,1b) | 339 | EX_TABLE(0b,1b) |
340 | : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); | 340 | : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); |
341 | if (!rc) | 341 | if (!rc) |
342 | S390_lowcore.machine_flags |= MACHINE_FLAG_CSP; | 342 | S390_lowcore.machine_flags |= MACHINE_FLAG_CSP; |
343 | #endif | 343 | #endif |
344 | } | 344 | } |
345 | 345 | ||
346 | static __init void detect_diag9c(void) | 346 | static __init void detect_diag9c(void) |
347 | { | 347 | { |
348 | unsigned int cpu_address; | 348 | unsigned int cpu_address; |
349 | int rc; | 349 | int rc; |
350 | 350 | ||
351 | cpu_address = stap(); | 351 | cpu_address = stap(); |
352 | asm volatile( | 352 | asm volatile( |
353 | " diag %2,0,0x9c\n" | 353 | " diag %2,0,0x9c\n" |
354 | "0: la %0,0\n" | 354 | "0: la %0,0\n" |
355 | "1:\n" | 355 | "1:\n" |
356 | EX_TABLE(0b,1b) | 356 | EX_TABLE(0b,1b) |
357 | : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); | 357 | : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); |
358 | if (!rc) | 358 | if (!rc) |
359 | S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; | 359 | S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; |
360 | } | 360 | } |
361 | 361 | ||
362 | static __init void detect_diag44(void) | 362 | static __init void detect_diag44(void) |
363 | { | 363 | { |
364 | #ifdef CONFIG_64BIT | 364 | #ifdef CONFIG_64BIT |
365 | int rc; | 365 | int rc; |
366 | 366 | ||
367 | asm volatile( | 367 | asm volatile( |
368 | " diag 0,0,0x44\n" | 368 | " diag 0,0,0x44\n" |
369 | "0: la %0,0\n" | 369 | "0: la %0,0\n" |
370 | "1:\n" | 370 | "1:\n" |
371 | EX_TABLE(0b,1b) | 371 | EX_TABLE(0b,1b) |
372 | : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); | 372 | : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); |
373 | if (!rc) | 373 | if (!rc) |
374 | S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; | 374 | S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; |
375 | #endif | 375 | #endif |
376 | } | 376 | } |
377 | 377 | ||
378 | static __init void detect_machine_facilities(void) | 378 | static __init void detect_machine_facilities(void) |
379 | { | 379 | { |
380 | #ifdef CONFIG_64BIT | 380 | #ifdef CONFIG_64BIT |
381 | if (test_facility(3)) | 381 | if (test_facility(3)) |
382 | S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; | 382 | S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; |
383 | if (test_facility(8)) | 383 | if (test_facility(8)) |
384 | S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; | 384 | S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; |
385 | if (test_facility(27)) | 385 | if (test_facility(27)) |
386 | S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; | 386 | S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; |
387 | if (test_facility(40)) | 387 | if (test_facility(40)) |
388 | S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; | 388 | S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; |
389 | if (test_facility(50) && test_facility(73)) | 389 | if (test_facility(50) && test_facility(73)) |
390 | S390_lowcore.machine_flags |= MACHINE_FLAG_TE; | 390 | S390_lowcore.machine_flags |= MACHINE_FLAG_TE; |
391 | if (test_facility(66)) | ||
392 | S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; | ||
391 | #endif | 393 | #endif |
392 | } | 394 | } |
393 | 395 | ||
394 | static __init void rescue_initrd(void) | 396 | static __init void rescue_initrd(void) |
395 | { | 397 | { |
396 | #ifdef CONFIG_BLK_DEV_INITRD | 398 | #ifdef CONFIG_BLK_DEV_INITRD |
397 | unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); | 399 | unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); |
398 | /* | 400 | /* |
399 | * Just like in case of IPL from VM reader we make sure there is a | 401 | * Just like in case of IPL from VM reader we make sure there is a |
400 | * gap of 4MB between end of kernel and start of initrd. | 402 | * gap of 4MB between end of kernel and start of initrd. |
401 | * That way we can also be sure that saving an NSS will succeed, | 403 | * That way we can also be sure that saving an NSS will succeed, |
402 | * which however only requires different segments. | 404 | * which however only requires different segments. |
403 | */ | 405 | */ |
404 | if (!INITRD_START || !INITRD_SIZE) | 406 | if (!INITRD_START || !INITRD_SIZE) |
405 | return; | 407 | return; |
406 | if (INITRD_START >= min_initrd_addr) | 408 | if (INITRD_START >= min_initrd_addr) |
407 | return; | 409 | return; |
408 | memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); | 410 | memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); |
409 | INITRD_START = min_initrd_addr; | 411 | INITRD_START = min_initrd_addr; |
410 | #endif | 412 | #endif |
411 | } | 413 | } |
412 | 414 | ||
413 | /* Set up boot command line */ | 415 | /* Set up boot command line */ |
414 | static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) | 416 | static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) |
415 | { | 417 | { |
416 | char *parm, *delim; | 418 | char *parm, *delim; |
417 | size_t rc, len; | 419 | size_t rc, len; |
418 | 420 | ||
419 | len = strlen(boot_command_line); | 421 | len = strlen(boot_command_line); |
420 | 422 | ||
421 | delim = boot_command_line + len; /* '\0' character position */ | 423 | delim = boot_command_line + len; /* '\0' character position */ |
422 | parm = boot_command_line + len + 1; /* append right after '\0' */ | 424 | parm = boot_command_line + len + 1; /* append right after '\0' */ |
423 | 425 | ||
424 | rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); | 426 | rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); |
425 | if (rc) { | 427 | if (rc) { |
426 | if (*parm == '=') | 428 | if (*parm == '=') |
427 | memmove(boot_command_line, parm + 1, rc); | 429 | memmove(boot_command_line, parm + 1, rc); |
428 | else | 430 | else |
429 | *delim = ' '; /* replace '\0' with space */ | 431 | *delim = ' '; /* replace '\0' with space */ |
430 | } | 432 | } |
431 | } | 433 | } |
432 | 434 | ||
433 | static inline int has_ebcdic_char(const char *str) | 435 | static inline int has_ebcdic_char(const char *str) |
434 | { | 436 | { |
435 | int i; | 437 | int i; |
436 | 438 | ||
437 | for (i = 0; str[i]; i++) | 439 | for (i = 0; str[i]; i++) |
438 | if (str[i] & 0x80) | 440 | if (str[i] & 0x80) |
439 | return 1; | 441 | return 1; |
440 | return 0; | 442 | return 0; |
441 | } | 443 | } |
442 | 444 | ||
443 | static void __init setup_boot_command_line(void) | 445 | static void __init setup_boot_command_line(void) |
444 | { | 446 | { |
445 | COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; | 447 | COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; |
446 | /* convert arch command line to ascii if necessary */ | 448 | /* convert arch command line to ascii if necessary */ |
447 | if (has_ebcdic_char(COMMAND_LINE)) | 449 | if (has_ebcdic_char(COMMAND_LINE)) |
448 | EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); | 450 | EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); |
449 | /* copy arch command line */ | 451 | /* copy arch command line */ |
450 | strlcpy(boot_command_line, strstrip(COMMAND_LINE), | 452 | strlcpy(boot_command_line, strstrip(COMMAND_LINE), |
451 | ARCH_COMMAND_LINE_SIZE); | 453 | ARCH_COMMAND_LINE_SIZE); |
452 | 454 | ||
453 | /* append IPL PARM data to the boot command line */ | 455 | /* append IPL PARM data to the boot command line */ |
454 | if (MACHINE_IS_VM) | 456 | if (MACHINE_IS_VM) |
455 | append_to_cmdline(append_ipl_vmparm); | 457 | append_to_cmdline(append_ipl_vmparm); |
456 | 458 | ||
457 | append_to_cmdline(append_ipl_scpdata); | 459 | append_to_cmdline(append_ipl_scpdata); |
458 | } | 460 | } |
459 | 461 | ||
460 | /* | 462 | /* |
461 | * Save ipl parameters, clear bss memory, initialize storage keys | 463 | * Save ipl parameters, clear bss memory, initialize storage keys |
462 | * and create a kernel NSS at startup if the SAVESYS= parm is defined | 464 | * and create a kernel NSS at startup if the SAVESYS= parm is defined |
463 | */ | 465 | */ |
464 | void __init startup_init(void) | 466 | void __init startup_init(void) |
465 | { | 467 | { |
466 | reset_tod_clock(); | 468 | reset_tod_clock(); |
467 | ipl_save_parameters(); | 469 | ipl_save_parameters(); |
468 | rescue_initrd(); | 470 | rescue_initrd(); |
469 | clear_bss_section(); | 471 | clear_bss_section(); |
470 | init_kernel_storage_key(); | 472 | init_kernel_storage_key(); |
471 | lockdep_init(); | 473 | lockdep_init(); |
472 | lockdep_off(); | 474 | lockdep_off(); |
473 | setup_lowcore_early(); | 475 | setup_lowcore_early(); |
474 | setup_facility_list(); | 476 | setup_facility_list(); |
475 | detect_machine_type(); | 477 | detect_machine_type(); |
476 | ipl_update_parameters(); | 478 | ipl_update_parameters(); |
477 | setup_boot_command_line(); | 479 | setup_boot_command_line(); |
478 | create_kernel_nss(); | 480 | create_kernel_nss(); |
479 | detect_mvpg(); | 481 | detect_mvpg(); |
480 | detect_ieee(); | 482 | detect_ieee(); |
481 | detect_csp(); | 483 | detect_csp(); |
482 | detect_diag9c(); | 484 | detect_diag9c(); |
483 | detect_diag44(); | 485 | detect_diag44(); |
484 | detect_machine_facilities(); | 486 | detect_machine_facilities(); |
485 | setup_hpage(); | 487 | setup_hpage(); |
486 | setup_topology(); | 488 | setup_topology(); |
487 | sclp_facilities_detect(); | 489 | sclp_facilities_detect(); |
488 | detect_memory_layout(memory_chunk); | 490 | detect_memory_layout(memory_chunk); |
489 | #ifdef CONFIG_DYNAMIC_FTRACE | 491 | #ifdef CONFIG_DYNAMIC_FTRACE |
490 | S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; | 492 | S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; |
491 | #endif | 493 | #endif |
492 | lockdep_on(); | 494 | lockdep_on(); |
493 | } | 495 | } |
494 | 496 |
arch/s390/mm/pgtable.c
1 | /* | 1 | /* |
2 | * Copyright IBM Corp. 2007, 2011 | 2 | * Copyright IBM Corp. 2007, 2011 |
3 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | 3 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | #include <linux/gfp.h> | 9 | #include <linux/gfp.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/swap.h> | 11 | #include <linux/swap.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/highmem.h> | 13 | #include <linux/highmem.h> |
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/quicklist.h> | 17 | #include <linux/quicklist.h> |
18 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | 20 | ||
21 | #include <asm/pgtable.h> | 21 | #include <asm/pgtable.h> |
22 | #include <asm/pgalloc.h> | 22 | #include <asm/pgalloc.h> |
23 | #include <asm/tlb.h> | 23 | #include <asm/tlb.h> |
24 | #include <asm/tlbflush.h> | 24 | #include <asm/tlbflush.h> |
25 | #include <asm/mmu_context.h> | 25 | #include <asm/mmu_context.h> |
26 | 26 | ||
27 | #ifndef CONFIG_64BIT | 27 | #ifndef CONFIG_64BIT |
28 | #define ALLOC_ORDER 1 | 28 | #define ALLOC_ORDER 1 |
29 | #define FRAG_MASK 0x0f | 29 | #define FRAG_MASK 0x0f |
30 | #else | 30 | #else |
31 | #define ALLOC_ORDER 2 | 31 | #define ALLOC_ORDER 2 |
32 | #define FRAG_MASK 0x03 | 32 | #define FRAG_MASK 0x03 |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | 35 | ||
36 | unsigned long *crst_table_alloc(struct mm_struct *mm) | 36 | unsigned long *crst_table_alloc(struct mm_struct *mm) |
37 | { | 37 | { |
38 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 38 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
39 | 39 | ||
40 | if (!page) | 40 | if (!page) |
41 | return NULL; | 41 | return NULL; |
42 | return (unsigned long *) page_to_phys(page); | 42 | return (unsigned long *) page_to_phys(page); |
43 | } | 43 | } |
44 | 44 | ||
45 | void crst_table_free(struct mm_struct *mm, unsigned long *table) | 45 | void crst_table_free(struct mm_struct *mm, unsigned long *table) |
46 | { | 46 | { |
47 | free_pages((unsigned long) table, ALLOC_ORDER); | 47 | free_pages((unsigned long) table, ALLOC_ORDER); |
48 | } | 48 | } |
49 | 49 | ||
50 | #ifdef CONFIG_64BIT | 50 | #ifdef CONFIG_64BIT |
51 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | 51 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) |
52 | { | 52 | { |
53 | unsigned long *table, *pgd; | 53 | unsigned long *table, *pgd; |
54 | unsigned long entry; | 54 | unsigned long entry; |
55 | 55 | ||
56 | BUG_ON(limit > (1UL << 53)); | 56 | BUG_ON(limit > (1UL << 53)); |
57 | repeat: | 57 | repeat: |
58 | table = crst_table_alloc(mm); | 58 | table = crst_table_alloc(mm); |
59 | if (!table) | 59 | if (!table) |
60 | return -ENOMEM; | 60 | return -ENOMEM; |
61 | spin_lock_bh(&mm->page_table_lock); | 61 | spin_lock_bh(&mm->page_table_lock); |
62 | if (mm->context.asce_limit < limit) { | 62 | if (mm->context.asce_limit < limit) { |
63 | pgd = (unsigned long *) mm->pgd; | 63 | pgd = (unsigned long *) mm->pgd; |
64 | if (mm->context.asce_limit <= (1UL << 31)) { | 64 | if (mm->context.asce_limit <= (1UL << 31)) { |
65 | entry = _REGION3_ENTRY_EMPTY; | 65 | entry = _REGION3_ENTRY_EMPTY; |
66 | mm->context.asce_limit = 1UL << 42; | 66 | mm->context.asce_limit = 1UL << 42; |
67 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 67 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | |
68 | _ASCE_USER_BITS | | 68 | _ASCE_USER_BITS | |
69 | _ASCE_TYPE_REGION3; | 69 | _ASCE_TYPE_REGION3; |
70 | } else { | 70 | } else { |
71 | entry = _REGION2_ENTRY_EMPTY; | 71 | entry = _REGION2_ENTRY_EMPTY; |
72 | mm->context.asce_limit = 1UL << 53; | 72 | mm->context.asce_limit = 1UL << 53; |
73 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 73 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | |
74 | _ASCE_USER_BITS | | 74 | _ASCE_USER_BITS | |
75 | _ASCE_TYPE_REGION2; | 75 | _ASCE_TYPE_REGION2; |
76 | } | 76 | } |
77 | crst_table_init(table, entry); | 77 | crst_table_init(table, entry); |
78 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | 78 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); |
79 | mm->pgd = (pgd_t *) table; | 79 | mm->pgd = (pgd_t *) table; |
80 | mm->task_size = mm->context.asce_limit; | 80 | mm->task_size = mm->context.asce_limit; |
81 | table = NULL; | 81 | table = NULL; |
82 | } | 82 | } |
83 | spin_unlock_bh(&mm->page_table_lock); | 83 | spin_unlock_bh(&mm->page_table_lock); |
84 | if (table) | 84 | if (table) |
85 | crst_table_free(mm, table); | 85 | crst_table_free(mm, table); |
86 | if (mm->context.asce_limit < limit) | 86 | if (mm->context.asce_limit < limit) |
87 | goto repeat; | 87 | goto repeat; |
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
90 | 90 | ||
91 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | 91 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) |
92 | { | 92 | { |
93 | pgd_t *pgd; | 93 | pgd_t *pgd; |
94 | 94 | ||
95 | while (mm->context.asce_limit > limit) { | 95 | while (mm->context.asce_limit > limit) { |
96 | pgd = mm->pgd; | 96 | pgd = mm->pgd; |
97 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | 97 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { |
98 | case _REGION_ENTRY_TYPE_R2: | 98 | case _REGION_ENTRY_TYPE_R2: |
99 | mm->context.asce_limit = 1UL << 42; | 99 | mm->context.asce_limit = 1UL << 42; |
100 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 100 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | |
101 | _ASCE_USER_BITS | | 101 | _ASCE_USER_BITS | |
102 | _ASCE_TYPE_REGION3; | 102 | _ASCE_TYPE_REGION3; |
103 | break; | 103 | break; |
104 | case _REGION_ENTRY_TYPE_R3: | 104 | case _REGION_ENTRY_TYPE_R3: |
105 | mm->context.asce_limit = 1UL << 31; | 105 | mm->context.asce_limit = 1UL << 31; |
106 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 106 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | |
107 | _ASCE_USER_BITS | | 107 | _ASCE_USER_BITS | |
108 | _ASCE_TYPE_SEGMENT; | 108 | _ASCE_TYPE_SEGMENT; |
109 | break; | 109 | break; |
110 | default: | 110 | default: |
111 | BUG(); | 111 | BUG(); |
112 | } | 112 | } |
113 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | 113 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); |
114 | mm->task_size = mm->context.asce_limit; | 114 | mm->task_size = mm->context.asce_limit; |
115 | crst_table_free(mm, (unsigned long *) pgd); | 115 | crst_table_free(mm, (unsigned long *) pgd); |
116 | } | 116 | } |
117 | } | 117 | } |
118 | #endif | 118 | #endif |
119 | 119 | ||
120 | #ifdef CONFIG_PGSTE | 120 | #ifdef CONFIG_PGSTE |
121 | 121 | ||
122 | /** | 122 | /** |
123 | * gmap_alloc - allocate a guest address space | 123 | * gmap_alloc - allocate a guest address space |
124 | * @mm: pointer to the parent mm_struct | 124 | * @mm: pointer to the parent mm_struct |
125 | * | 125 | * |
126 | * Returns a guest address space structure. | 126 | * Returns a guest address space structure. |
127 | */ | 127 | */ |
128 | struct gmap *gmap_alloc(struct mm_struct *mm) | 128 | struct gmap *gmap_alloc(struct mm_struct *mm) |
129 | { | 129 | { |
130 | struct gmap *gmap; | 130 | struct gmap *gmap; |
131 | struct page *page; | 131 | struct page *page; |
132 | unsigned long *table; | 132 | unsigned long *table; |
133 | 133 | ||
134 | gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); | 134 | gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); |
135 | if (!gmap) | 135 | if (!gmap) |
136 | goto out; | 136 | goto out; |
137 | INIT_LIST_HEAD(&gmap->crst_list); | 137 | INIT_LIST_HEAD(&gmap->crst_list); |
138 | gmap->mm = mm; | 138 | gmap->mm = mm; |
139 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 139 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
140 | if (!page) | 140 | if (!page) |
141 | goto out_free; | 141 | goto out_free; |
142 | list_add(&page->lru, &gmap->crst_list); | 142 | list_add(&page->lru, &gmap->crst_list); |
143 | table = (unsigned long *) page_to_phys(page); | 143 | table = (unsigned long *) page_to_phys(page); |
144 | crst_table_init(table, _REGION1_ENTRY_EMPTY); | 144 | crst_table_init(table, _REGION1_ENTRY_EMPTY); |
145 | gmap->table = table; | 145 | gmap->table = table; |
146 | gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | | 146 | gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | |
147 | _ASCE_USER_BITS | __pa(table); | 147 | _ASCE_USER_BITS | __pa(table); |
148 | list_add(&gmap->list, &mm->context.gmap_list); | 148 | list_add(&gmap->list, &mm->context.gmap_list); |
149 | return gmap; | 149 | return gmap; |
150 | 150 | ||
151 | out_free: | 151 | out_free: |
152 | kfree(gmap); | 152 | kfree(gmap); |
153 | out: | 153 | out: |
154 | return NULL; | 154 | return NULL; |
155 | } | 155 | } |
156 | EXPORT_SYMBOL_GPL(gmap_alloc); | 156 | EXPORT_SYMBOL_GPL(gmap_alloc); |
157 | 157 | ||
158 | static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) | 158 | static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) |
159 | { | 159 | { |
160 | struct gmap_pgtable *mp; | 160 | struct gmap_pgtable *mp; |
161 | struct gmap_rmap *rmap; | 161 | struct gmap_rmap *rmap; |
162 | struct page *page; | 162 | struct page *page; |
163 | 163 | ||
164 | if (*table & _SEGMENT_ENTRY_INV) | 164 | if (*table & _SEGMENT_ENTRY_INV) |
165 | return 0; | 165 | return 0; |
166 | page = pfn_to_page(*table >> PAGE_SHIFT); | 166 | page = pfn_to_page(*table >> PAGE_SHIFT); |
167 | mp = (struct gmap_pgtable *) page->index; | 167 | mp = (struct gmap_pgtable *) page->index; |
168 | list_for_each_entry(rmap, &mp->mapper, list) { | 168 | list_for_each_entry(rmap, &mp->mapper, list) { |
169 | if (rmap->entry != table) | 169 | if (rmap->entry != table) |
170 | continue; | 170 | continue; |
171 | list_del(&rmap->list); | 171 | list_del(&rmap->list); |
172 | kfree(rmap); | 172 | kfree(rmap); |
173 | break; | 173 | break; |
174 | } | 174 | } |
175 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; | 175 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; |
176 | return 1; | 176 | return 1; |
177 | } | 177 | } |
178 | 178 | ||
179 | static void gmap_flush_tlb(struct gmap *gmap) | 179 | static void gmap_flush_tlb(struct gmap *gmap) |
180 | { | 180 | { |
181 | if (MACHINE_HAS_IDTE) | 181 | if (MACHINE_HAS_IDTE) |
182 | __tlb_flush_idte((unsigned long) gmap->table | | 182 | __tlb_flush_idte((unsigned long) gmap->table | |
183 | _ASCE_TYPE_REGION1); | 183 | _ASCE_TYPE_REGION1); |
184 | else | 184 | else |
185 | __tlb_flush_global(); | 185 | __tlb_flush_global(); |
186 | } | 186 | } |
187 | 187 | ||
188 | /** | 188 | /** |
189 | * gmap_free - free a guest address space | 189 | * gmap_free - free a guest address space |
190 | * @gmap: pointer to the guest address space structure | 190 | * @gmap: pointer to the guest address space structure |
191 | */ | 191 | */ |
192 | void gmap_free(struct gmap *gmap) | 192 | void gmap_free(struct gmap *gmap) |
193 | { | 193 | { |
194 | struct page *page, *next; | 194 | struct page *page, *next; |
195 | unsigned long *table; | 195 | unsigned long *table; |
196 | int i; | 196 | int i; |
197 | 197 | ||
198 | 198 | ||
199 | /* Flush tlb. */ | 199 | /* Flush tlb. */ |
200 | if (MACHINE_HAS_IDTE) | 200 | if (MACHINE_HAS_IDTE) |
201 | __tlb_flush_idte((unsigned long) gmap->table | | 201 | __tlb_flush_idte((unsigned long) gmap->table | |
202 | _ASCE_TYPE_REGION1); | 202 | _ASCE_TYPE_REGION1); |
203 | else | 203 | else |
204 | __tlb_flush_global(); | 204 | __tlb_flush_global(); |
205 | 205 | ||
206 | /* Free all segment & region tables. */ | 206 | /* Free all segment & region tables. */ |
207 | down_read(&gmap->mm->mmap_sem); | 207 | down_read(&gmap->mm->mmap_sem); |
208 | spin_lock(&gmap->mm->page_table_lock); | 208 | spin_lock(&gmap->mm->page_table_lock); |
209 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { | 209 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { |
210 | table = (unsigned long *) page_to_phys(page); | 210 | table = (unsigned long *) page_to_phys(page); |
211 | if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) | 211 | if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) |
212 | /* Remove gmap rmap structures for segment table. */ | 212 | /* Remove gmap rmap structures for segment table. */ |
213 | for (i = 0; i < PTRS_PER_PMD; i++, table++) | 213 | for (i = 0; i < PTRS_PER_PMD; i++, table++) |
214 | gmap_unlink_segment(gmap, table); | 214 | gmap_unlink_segment(gmap, table); |
215 | __free_pages(page, ALLOC_ORDER); | 215 | __free_pages(page, ALLOC_ORDER); |
216 | } | 216 | } |
217 | spin_unlock(&gmap->mm->page_table_lock); | 217 | spin_unlock(&gmap->mm->page_table_lock); |
218 | up_read(&gmap->mm->mmap_sem); | 218 | up_read(&gmap->mm->mmap_sem); |
219 | list_del(&gmap->list); | 219 | list_del(&gmap->list); |
220 | kfree(gmap); | 220 | kfree(gmap); |
221 | } | 221 | } |
222 | EXPORT_SYMBOL_GPL(gmap_free); | 222 | EXPORT_SYMBOL_GPL(gmap_free); |
223 | 223 | ||
224 | /** | 224 | /** |
225 | * gmap_enable - switch primary space to the guest address space | 225 | * gmap_enable - switch primary space to the guest address space |
226 | * @gmap: pointer to the guest address space structure | 226 | * @gmap: pointer to the guest address space structure |
227 | */ | 227 | */ |
228 | void gmap_enable(struct gmap *gmap) | 228 | void gmap_enable(struct gmap *gmap) |
229 | { | 229 | { |
230 | S390_lowcore.gmap = (unsigned long) gmap; | 230 | S390_lowcore.gmap = (unsigned long) gmap; |
231 | } | 231 | } |
232 | EXPORT_SYMBOL_GPL(gmap_enable); | 232 | EXPORT_SYMBOL_GPL(gmap_enable); |
233 | 233 | ||
234 | /** | 234 | /** |
235 | * gmap_disable - switch back to the standard primary address space | 235 | * gmap_disable - switch back to the standard primary address space |
236 | * @gmap: pointer to the guest address space structure | 236 | * @gmap: pointer to the guest address space structure |
237 | */ | 237 | */ |
238 | void gmap_disable(struct gmap *gmap) | 238 | void gmap_disable(struct gmap *gmap) |
239 | { | 239 | { |
240 | S390_lowcore.gmap = 0UL; | 240 | S390_lowcore.gmap = 0UL; |
241 | } | 241 | } |
242 | EXPORT_SYMBOL_GPL(gmap_disable); | 242 | EXPORT_SYMBOL_GPL(gmap_disable); |
243 | 243 | ||
244 | /* | 244 | /* |
245 | * gmap_alloc_table is assumed to be called with mmap_sem held | 245 | * gmap_alloc_table is assumed to be called with mmap_sem held |
246 | */ | 246 | */ |
247 | static int gmap_alloc_table(struct gmap *gmap, | 247 | static int gmap_alloc_table(struct gmap *gmap, |
248 | unsigned long *table, unsigned long init) | 248 | unsigned long *table, unsigned long init) |
249 | { | 249 | { |
250 | struct page *page; | 250 | struct page *page; |
251 | unsigned long *new; | 251 | unsigned long *new; |
252 | 252 | ||
253 | /* since we dont free the gmap table until gmap_free we can unlock */ | 253 | /* since we dont free the gmap table until gmap_free we can unlock */ |
254 | spin_unlock(&gmap->mm->page_table_lock); | 254 | spin_unlock(&gmap->mm->page_table_lock); |
255 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 255 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
256 | spin_lock(&gmap->mm->page_table_lock); | 256 | spin_lock(&gmap->mm->page_table_lock); |
257 | if (!page) | 257 | if (!page) |
258 | return -ENOMEM; | 258 | return -ENOMEM; |
259 | new = (unsigned long *) page_to_phys(page); | 259 | new = (unsigned long *) page_to_phys(page); |
260 | crst_table_init(new, init); | 260 | crst_table_init(new, init); |
261 | if (*table & _REGION_ENTRY_INV) { | 261 | if (*table & _REGION_ENTRY_INV) { |
262 | list_add(&page->lru, &gmap->crst_list); | 262 | list_add(&page->lru, &gmap->crst_list); |
263 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | | 263 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | |
264 | (*table & _REGION_ENTRY_TYPE_MASK); | 264 | (*table & _REGION_ENTRY_TYPE_MASK); |
265 | } else | 265 | } else |
266 | __free_pages(page, ALLOC_ORDER); | 266 | __free_pages(page, ALLOC_ORDER); |
267 | return 0; | 267 | return 0; |
268 | } | 268 | } |
269 | 269 | ||
270 | /** | 270 | /** |
271 | * gmap_unmap_segment - unmap segment from the guest address space | 271 | * gmap_unmap_segment - unmap segment from the guest address space |
272 | * @gmap: pointer to the guest address space structure | 272 | * @gmap: pointer to the guest address space structure |
273 | * @addr: address in the guest address space | 273 | * @addr: address in the guest address space |
274 | * @len: length of the memory area to unmap | 274 | * @len: length of the memory area to unmap |
275 | * | 275 | * |
276 | * Returns 0 if the unmap succeded, -EINVAL if not. | 276 | * Returns 0 if the unmap succeded, -EINVAL if not. |
277 | */ | 277 | */ |
278 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | 278 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) |
279 | { | 279 | { |
280 | unsigned long *table; | 280 | unsigned long *table; |
281 | unsigned long off; | 281 | unsigned long off; |
282 | int flush; | 282 | int flush; |
283 | 283 | ||
284 | if ((to | len) & (PMD_SIZE - 1)) | 284 | if ((to | len) & (PMD_SIZE - 1)) |
285 | return -EINVAL; | 285 | return -EINVAL; |
286 | if (len == 0 || to + len < to) | 286 | if (len == 0 || to + len < to) |
287 | return -EINVAL; | 287 | return -EINVAL; |
288 | 288 | ||
289 | flush = 0; | 289 | flush = 0; |
290 | down_read(&gmap->mm->mmap_sem); | 290 | down_read(&gmap->mm->mmap_sem); |
291 | spin_lock(&gmap->mm->page_table_lock); | 291 | spin_lock(&gmap->mm->page_table_lock); |
292 | for (off = 0; off < len; off += PMD_SIZE) { | 292 | for (off = 0; off < len; off += PMD_SIZE) { |
293 | /* Walk the guest addr space page table */ | 293 | /* Walk the guest addr space page table */ |
294 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | 294 | table = gmap->table + (((to + off) >> 53) & 0x7ff); |
295 | if (*table & _REGION_ENTRY_INV) | 295 | if (*table & _REGION_ENTRY_INV) |
296 | goto out; | 296 | goto out; |
297 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 297 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
298 | table = table + (((to + off) >> 42) & 0x7ff); | 298 | table = table + (((to + off) >> 42) & 0x7ff); |
299 | if (*table & _REGION_ENTRY_INV) | 299 | if (*table & _REGION_ENTRY_INV) |
300 | goto out; | 300 | goto out; |
301 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 301 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
302 | table = table + (((to + off) >> 31) & 0x7ff); | 302 | table = table + (((to + off) >> 31) & 0x7ff); |
303 | if (*table & _REGION_ENTRY_INV) | 303 | if (*table & _REGION_ENTRY_INV) |
304 | goto out; | 304 | goto out; |
305 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 305 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
306 | table = table + (((to + off) >> 20) & 0x7ff); | 306 | table = table + (((to + off) >> 20) & 0x7ff); |
307 | 307 | ||
308 | /* Clear segment table entry in guest address space. */ | 308 | /* Clear segment table entry in guest address space. */ |
309 | flush |= gmap_unlink_segment(gmap, table); | 309 | flush |= gmap_unlink_segment(gmap, table); |
310 | *table = _SEGMENT_ENTRY_INV; | 310 | *table = _SEGMENT_ENTRY_INV; |
311 | } | 311 | } |
312 | out: | 312 | out: |
313 | spin_unlock(&gmap->mm->page_table_lock); | 313 | spin_unlock(&gmap->mm->page_table_lock); |
314 | up_read(&gmap->mm->mmap_sem); | 314 | up_read(&gmap->mm->mmap_sem); |
315 | if (flush) | 315 | if (flush) |
316 | gmap_flush_tlb(gmap); | 316 | gmap_flush_tlb(gmap); |
317 | return 0; | 317 | return 0; |
318 | } | 318 | } |
319 | EXPORT_SYMBOL_GPL(gmap_unmap_segment); | 319 | EXPORT_SYMBOL_GPL(gmap_unmap_segment); |
320 | 320 | ||
321 | /** | 321 | /** |
322 | * gmap_mmap_segment - map a segment to the guest address space | 322 | * gmap_mmap_segment - map a segment to the guest address space |
323 | * @gmap: pointer to the guest address space structure | 323 | * @gmap: pointer to the guest address space structure |
324 | * @from: source address in the parent address space | 324 | * @from: source address in the parent address space |
325 | * @to: target address in the guest address space | 325 | * @to: target address in the guest address space |
326 | * | 326 | * |
327 | * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. | 327 | * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. |
328 | */ | 328 | */ |
329 | int gmap_map_segment(struct gmap *gmap, unsigned long from, | 329 | int gmap_map_segment(struct gmap *gmap, unsigned long from, |
330 | unsigned long to, unsigned long len) | 330 | unsigned long to, unsigned long len) |
331 | { | 331 | { |
332 | unsigned long *table; | 332 | unsigned long *table; |
333 | unsigned long off; | 333 | unsigned long off; |
334 | int flush; | 334 | int flush; |
335 | 335 | ||
336 | if ((from | to | len) & (PMD_SIZE - 1)) | 336 | if ((from | to | len) & (PMD_SIZE - 1)) |
337 | return -EINVAL; | 337 | return -EINVAL; |
338 | if (len == 0 || from + len > PGDIR_SIZE || | 338 | if (len == 0 || from + len > PGDIR_SIZE || |
339 | from + len < from || to + len < to) | 339 | from + len < from || to + len < to) |
340 | return -EINVAL; | 340 | return -EINVAL; |
341 | 341 | ||
342 | flush = 0; | 342 | flush = 0; |
343 | down_read(&gmap->mm->mmap_sem); | 343 | down_read(&gmap->mm->mmap_sem); |
344 | spin_lock(&gmap->mm->page_table_lock); | 344 | spin_lock(&gmap->mm->page_table_lock); |
345 | for (off = 0; off < len; off += PMD_SIZE) { | 345 | for (off = 0; off < len; off += PMD_SIZE) { |
346 | /* Walk the gmap address space page table */ | 346 | /* Walk the gmap address space page table */ |
347 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | 347 | table = gmap->table + (((to + off) >> 53) & 0x7ff); |
348 | if ((*table & _REGION_ENTRY_INV) && | 348 | if ((*table & _REGION_ENTRY_INV) && |
349 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) | 349 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) |
350 | goto out_unmap; | 350 | goto out_unmap; |
351 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 351 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
352 | table = table + (((to + off) >> 42) & 0x7ff); | 352 | table = table + (((to + off) >> 42) & 0x7ff); |
353 | if ((*table & _REGION_ENTRY_INV) && | 353 | if ((*table & _REGION_ENTRY_INV) && |
354 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) | 354 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) |
355 | goto out_unmap; | 355 | goto out_unmap; |
356 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 356 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
357 | table = table + (((to + off) >> 31) & 0x7ff); | 357 | table = table + (((to + off) >> 31) & 0x7ff); |
358 | if ((*table & _REGION_ENTRY_INV) && | 358 | if ((*table & _REGION_ENTRY_INV) && |
359 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) | 359 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) |
360 | goto out_unmap; | 360 | goto out_unmap; |
361 | table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); | 361 | table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); |
362 | table = table + (((to + off) >> 20) & 0x7ff); | 362 | table = table + (((to + off) >> 20) & 0x7ff); |
363 | 363 | ||
364 | /* Store 'from' address in an invalid segment table entry. */ | 364 | /* Store 'from' address in an invalid segment table entry. */ |
365 | flush |= gmap_unlink_segment(gmap, table); | 365 | flush |= gmap_unlink_segment(gmap, table); |
366 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); | 366 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); |
367 | } | 367 | } |
368 | spin_unlock(&gmap->mm->page_table_lock); | 368 | spin_unlock(&gmap->mm->page_table_lock); |
369 | up_read(&gmap->mm->mmap_sem); | 369 | up_read(&gmap->mm->mmap_sem); |
370 | if (flush) | 370 | if (flush) |
371 | gmap_flush_tlb(gmap); | 371 | gmap_flush_tlb(gmap); |
372 | return 0; | 372 | return 0; |
373 | 373 | ||
374 | out_unmap: | 374 | out_unmap: |
375 | spin_unlock(&gmap->mm->page_table_lock); | 375 | spin_unlock(&gmap->mm->page_table_lock); |
376 | up_read(&gmap->mm->mmap_sem); | 376 | up_read(&gmap->mm->mmap_sem); |
377 | gmap_unmap_segment(gmap, to, len); | 377 | gmap_unmap_segment(gmap, to, len); |
378 | return -ENOMEM; | 378 | return -ENOMEM; |
379 | } | 379 | } |
380 | EXPORT_SYMBOL_GPL(gmap_map_segment); | 380 | EXPORT_SYMBOL_GPL(gmap_map_segment); |
381 | 381 | ||
382 | /* | 382 | /* |
383 | * this function is assumed to be called with mmap_sem held | 383 | * this function is assumed to be called with mmap_sem held |
384 | */ | 384 | */ |
385 | unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) | 385 | unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) |
386 | { | 386 | { |
387 | unsigned long *table, vmaddr, segment; | 387 | unsigned long *table, vmaddr, segment; |
388 | struct mm_struct *mm; | 388 | struct mm_struct *mm; |
389 | struct gmap_pgtable *mp; | 389 | struct gmap_pgtable *mp; |
390 | struct gmap_rmap *rmap; | 390 | struct gmap_rmap *rmap; |
391 | struct vm_area_struct *vma; | 391 | struct vm_area_struct *vma; |
392 | struct page *page; | 392 | struct page *page; |
393 | pgd_t *pgd; | 393 | pgd_t *pgd; |
394 | pud_t *pud; | 394 | pud_t *pud; |
395 | pmd_t *pmd; | 395 | pmd_t *pmd; |
396 | 396 | ||
397 | current->thread.gmap_addr = address; | 397 | current->thread.gmap_addr = address; |
398 | mm = gmap->mm; | 398 | mm = gmap->mm; |
399 | /* Walk the gmap address space page table */ | 399 | /* Walk the gmap address space page table */ |
400 | table = gmap->table + ((address >> 53) & 0x7ff); | 400 | table = gmap->table + ((address >> 53) & 0x7ff); |
401 | if (unlikely(*table & _REGION_ENTRY_INV)) | 401 | if (unlikely(*table & _REGION_ENTRY_INV)) |
402 | return -EFAULT; | 402 | return -EFAULT; |
403 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 403 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
404 | table = table + ((address >> 42) & 0x7ff); | 404 | table = table + ((address >> 42) & 0x7ff); |
405 | if (unlikely(*table & _REGION_ENTRY_INV)) | 405 | if (unlikely(*table & _REGION_ENTRY_INV)) |
406 | return -EFAULT; | 406 | return -EFAULT; |
407 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 407 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
408 | table = table + ((address >> 31) & 0x7ff); | 408 | table = table + ((address >> 31) & 0x7ff); |
409 | if (unlikely(*table & _REGION_ENTRY_INV)) | 409 | if (unlikely(*table & _REGION_ENTRY_INV)) |
410 | return -EFAULT; | 410 | return -EFAULT; |
411 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 411 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
412 | table = table + ((address >> 20) & 0x7ff); | 412 | table = table + ((address >> 20) & 0x7ff); |
413 | 413 | ||
414 | /* Convert the gmap address to an mm address. */ | 414 | /* Convert the gmap address to an mm address. */ |
415 | segment = *table; | 415 | segment = *table; |
416 | if (likely(!(segment & _SEGMENT_ENTRY_INV))) { | 416 | if (likely(!(segment & _SEGMENT_ENTRY_INV))) { |
417 | page = pfn_to_page(segment >> PAGE_SHIFT); | 417 | page = pfn_to_page(segment >> PAGE_SHIFT); |
418 | mp = (struct gmap_pgtable *) page->index; | 418 | mp = (struct gmap_pgtable *) page->index; |
419 | return mp->vmaddr | (address & ~PMD_MASK); | 419 | return mp->vmaddr | (address & ~PMD_MASK); |
420 | } else if (segment & _SEGMENT_ENTRY_RO) { | 420 | } else if (segment & _SEGMENT_ENTRY_RO) { |
421 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; | 421 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; |
422 | vma = find_vma(mm, vmaddr); | 422 | vma = find_vma(mm, vmaddr); |
423 | if (!vma || vma->vm_start > vmaddr) | 423 | if (!vma || vma->vm_start > vmaddr) |
424 | return -EFAULT; | 424 | return -EFAULT; |
425 | 425 | ||
426 | /* Walk the parent mm page table */ | 426 | /* Walk the parent mm page table */ |
427 | pgd = pgd_offset(mm, vmaddr); | 427 | pgd = pgd_offset(mm, vmaddr); |
428 | pud = pud_alloc(mm, pgd, vmaddr); | 428 | pud = pud_alloc(mm, pgd, vmaddr); |
429 | if (!pud) | 429 | if (!pud) |
430 | return -ENOMEM; | 430 | return -ENOMEM; |
431 | pmd = pmd_alloc(mm, pud, vmaddr); | 431 | pmd = pmd_alloc(mm, pud, vmaddr); |
432 | if (!pmd) | 432 | if (!pmd) |
433 | return -ENOMEM; | 433 | return -ENOMEM; |
434 | if (!pmd_present(*pmd) && | 434 | if (!pmd_present(*pmd) && |
435 | __pte_alloc(mm, vma, pmd, vmaddr)) | 435 | __pte_alloc(mm, vma, pmd, vmaddr)) |
436 | return -ENOMEM; | 436 | return -ENOMEM; |
437 | /* pmd now points to a valid segment table entry. */ | 437 | /* pmd now points to a valid segment table entry. */ |
438 | rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); | 438 | rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); |
439 | if (!rmap) | 439 | if (!rmap) |
440 | return -ENOMEM; | 440 | return -ENOMEM; |
441 | /* Link gmap segment table entry location to page table. */ | 441 | /* Link gmap segment table entry location to page table. */ |
442 | page = pmd_page(*pmd); | 442 | page = pmd_page(*pmd); |
443 | mp = (struct gmap_pgtable *) page->index; | 443 | mp = (struct gmap_pgtable *) page->index; |
444 | rmap->entry = table; | 444 | rmap->entry = table; |
445 | spin_lock(&mm->page_table_lock); | 445 | spin_lock(&mm->page_table_lock); |
446 | list_add(&rmap->list, &mp->mapper); | 446 | list_add(&rmap->list, &mp->mapper); |
447 | spin_unlock(&mm->page_table_lock); | 447 | spin_unlock(&mm->page_table_lock); |
448 | /* Set gmap segment table entry to page table. */ | 448 | /* Set gmap segment table entry to page table. */ |
449 | *table = pmd_val(*pmd) & PAGE_MASK; | 449 | *table = pmd_val(*pmd) & PAGE_MASK; |
450 | return vmaddr | (address & ~PMD_MASK); | 450 | return vmaddr | (address & ~PMD_MASK); |
451 | } | 451 | } |
452 | return -EFAULT; | 452 | return -EFAULT; |
453 | } | 453 | } |
454 | 454 | ||
455 | unsigned long gmap_fault(unsigned long address, struct gmap *gmap) | 455 | unsigned long gmap_fault(unsigned long address, struct gmap *gmap) |
456 | { | 456 | { |
457 | unsigned long rc; | 457 | unsigned long rc; |
458 | 458 | ||
459 | down_read(&gmap->mm->mmap_sem); | 459 | down_read(&gmap->mm->mmap_sem); |
460 | rc = __gmap_fault(address, gmap); | 460 | rc = __gmap_fault(address, gmap); |
461 | up_read(&gmap->mm->mmap_sem); | 461 | up_read(&gmap->mm->mmap_sem); |
462 | 462 | ||
463 | return rc; | 463 | return rc; |
464 | } | 464 | } |
465 | EXPORT_SYMBOL_GPL(gmap_fault); | 465 | EXPORT_SYMBOL_GPL(gmap_fault); |
466 | 466 | ||
467 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) | 467 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) |
468 | { | 468 | { |
469 | 469 | ||
470 | unsigned long *table, address, size; | 470 | unsigned long *table, address, size; |
471 | struct vm_area_struct *vma; | 471 | struct vm_area_struct *vma; |
472 | struct gmap_pgtable *mp; | 472 | struct gmap_pgtable *mp; |
473 | struct page *page; | 473 | struct page *page; |
474 | 474 | ||
475 | down_read(&gmap->mm->mmap_sem); | 475 | down_read(&gmap->mm->mmap_sem); |
476 | address = from; | 476 | address = from; |
477 | while (address < to) { | 477 | while (address < to) { |
478 | /* Walk the gmap address space page table */ | 478 | /* Walk the gmap address space page table */ |
479 | table = gmap->table + ((address >> 53) & 0x7ff); | 479 | table = gmap->table + ((address >> 53) & 0x7ff); |
480 | if (unlikely(*table & _REGION_ENTRY_INV)) { | 480 | if (unlikely(*table & _REGION_ENTRY_INV)) { |
481 | address = (address + PMD_SIZE) & PMD_MASK; | 481 | address = (address + PMD_SIZE) & PMD_MASK; |
482 | continue; | 482 | continue; |
483 | } | 483 | } |
484 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 484 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
485 | table = table + ((address >> 42) & 0x7ff); | 485 | table = table + ((address >> 42) & 0x7ff); |
486 | if (unlikely(*table & _REGION_ENTRY_INV)) { | 486 | if (unlikely(*table & _REGION_ENTRY_INV)) { |
487 | address = (address + PMD_SIZE) & PMD_MASK; | 487 | address = (address + PMD_SIZE) & PMD_MASK; |
488 | continue; | 488 | continue; |
489 | } | 489 | } |
490 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 490 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
491 | table = table + ((address >> 31) & 0x7ff); | 491 | table = table + ((address >> 31) & 0x7ff); |
492 | if (unlikely(*table & _REGION_ENTRY_INV)) { | 492 | if (unlikely(*table & _REGION_ENTRY_INV)) { |
493 | address = (address + PMD_SIZE) & PMD_MASK; | 493 | address = (address + PMD_SIZE) & PMD_MASK; |
494 | continue; | 494 | continue; |
495 | } | 495 | } |
496 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 496 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
497 | table = table + ((address >> 20) & 0x7ff); | 497 | table = table + ((address >> 20) & 0x7ff); |
498 | if (unlikely(*table & _SEGMENT_ENTRY_INV)) { | 498 | if (unlikely(*table & _SEGMENT_ENTRY_INV)) { |
499 | address = (address + PMD_SIZE) & PMD_MASK; | 499 | address = (address + PMD_SIZE) & PMD_MASK; |
500 | continue; | 500 | continue; |
501 | } | 501 | } |
502 | page = pfn_to_page(*table >> PAGE_SHIFT); | 502 | page = pfn_to_page(*table >> PAGE_SHIFT); |
503 | mp = (struct gmap_pgtable *) page->index; | 503 | mp = (struct gmap_pgtable *) page->index; |
504 | vma = find_vma(gmap->mm, mp->vmaddr); | 504 | vma = find_vma(gmap->mm, mp->vmaddr); |
505 | size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); | 505 | size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); |
506 | zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), | 506 | zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), |
507 | size, NULL); | 507 | size, NULL); |
508 | address = (address + PMD_SIZE) & PMD_MASK; | 508 | address = (address + PMD_SIZE) & PMD_MASK; |
509 | } | 509 | } |
510 | up_read(&gmap->mm->mmap_sem); | 510 | up_read(&gmap->mm->mmap_sem); |
511 | } | 511 | } |
512 | EXPORT_SYMBOL_GPL(gmap_discard); | 512 | EXPORT_SYMBOL_GPL(gmap_discard); |
513 | 513 | ||
514 | void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) | 514 | void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) |
515 | { | 515 | { |
516 | struct gmap_rmap *rmap, *next; | 516 | struct gmap_rmap *rmap, *next; |
517 | struct gmap_pgtable *mp; | 517 | struct gmap_pgtable *mp; |
518 | struct page *page; | 518 | struct page *page; |
519 | int flush; | 519 | int flush; |
520 | 520 | ||
521 | flush = 0; | 521 | flush = 0; |
522 | spin_lock(&mm->page_table_lock); | 522 | spin_lock(&mm->page_table_lock); |
523 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 523 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
524 | mp = (struct gmap_pgtable *) page->index; | 524 | mp = (struct gmap_pgtable *) page->index; |
525 | list_for_each_entry_safe(rmap, next, &mp->mapper, list) { | 525 | list_for_each_entry_safe(rmap, next, &mp->mapper, list) { |
526 | *rmap->entry = | 526 | *rmap->entry = |
527 | _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; | 527 | _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; |
528 | list_del(&rmap->list); | 528 | list_del(&rmap->list); |
529 | kfree(rmap); | 529 | kfree(rmap); |
530 | flush = 1; | 530 | flush = 1; |
531 | } | 531 | } |
532 | spin_unlock(&mm->page_table_lock); | 532 | spin_unlock(&mm->page_table_lock); |
533 | if (flush) | 533 | if (flush) |
534 | __tlb_flush_global(); | 534 | __tlb_flush_global(); |
535 | } | 535 | } |
536 | 536 | ||
537 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 537 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
538 | unsigned long vmaddr) | 538 | unsigned long vmaddr) |
539 | { | 539 | { |
540 | struct page *page; | 540 | struct page *page; |
541 | unsigned long *table; | 541 | unsigned long *table; |
542 | struct gmap_pgtable *mp; | 542 | struct gmap_pgtable *mp; |
543 | 543 | ||
544 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 544 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
545 | if (!page) | 545 | if (!page) |
546 | return NULL; | 546 | return NULL; |
547 | mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); | 547 | mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); |
548 | if (!mp) { | 548 | if (!mp) { |
549 | __free_page(page); | 549 | __free_page(page); |
550 | return NULL; | 550 | return NULL; |
551 | } | 551 | } |
552 | pgtable_page_ctor(page); | 552 | pgtable_page_ctor(page); |
553 | mp->vmaddr = vmaddr & PMD_MASK; | 553 | mp->vmaddr = vmaddr & PMD_MASK; |
554 | INIT_LIST_HEAD(&mp->mapper); | 554 | INIT_LIST_HEAD(&mp->mapper); |
555 | page->index = (unsigned long) mp; | 555 | page->index = (unsigned long) mp; |
556 | atomic_set(&page->_mapcount, 3); | 556 | atomic_set(&page->_mapcount, 3); |
557 | table = (unsigned long *) page_to_phys(page); | 557 | table = (unsigned long *) page_to_phys(page); |
558 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | 558 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); |
559 | clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); | 559 | clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); |
560 | return table; | 560 | return table; |
561 | } | 561 | } |
562 | 562 | ||
563 | static inline void page_table_free_pgste(unsigned long *table) | 563 | static inline void page_table_free_pgste(unsigned long *table) |
564 | { | 564 | { |
565 | struct page *page; | 565 | struct page *page; |
566 | struct gmap_pgtable *mp; | 566 | struct gmap_pgtable *mp; |
567 | 567 | ||
568 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 568 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
569 | mp = (struct gmap_pgtable *) page->index; | 569 | mp = (struct gmap_pgtable *) page->index; |
570 | BUG_ON(!list_empty(&mp->mapper)); | 570 | BUG_ON(!list_empty(&mp->mapper)); |
571 | pgtable_page_dtor(page); | 571 | pgtable_page_dtor(page); |
572 | atomic_set(&page->_mapcount, -1); | 572 | atomic_set(&page->_mapcount, -1); |
573 | kfree(mp); | 573 | kfree(mp); |
574 | __free_page(page); | 574 | __free_page(page); |
575 | } | 575 | } |
576 | 576 | ||
577 | #else /* CONFIG_PGSTE */ | 577 | #else /* CONFIG_PGSTE */ |
578 | 578 | ||
579 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 579 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
580 | unsigned long vmaddr) | 580 | unsigned long vmaddr) |
581 | { | 581 | { |
582 | return NULL; | 582 | return NULL; |
583 | } | 583 | } |
584 | 584 | ||
585 | static inline void page_table_free_pgste(unsigned long *table) | 585 | static inline void page_table_free_pgste(unsigned long *table) |
586 | { | 586 | { |
587 | } | 587 | } |
588 | 588 | ||
589 | static inline void gmap_unmap_notifier(struct mm_struct *mm, | 589 | static inline void gmap_unmap_notifier(struct mm_struct *mm, |
590 | unsigned long *table) | 590 | unsigned long *table) |
591 | { | 591 | { |
592 | } | 592 | } |
593 | 593 | ||
594 | #endif /* CONFIG_PGSTE */ | 594 | #endif /* CONFIG_PGSTE */ |
595 | 595 | ||
596 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | 596 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) |
597 | { | 597 | { |
598 | unsigned int old, new; | 598 | unsigned int old, new; |
599 | 599 | ||
600 | do { | 600 | do { |
601 | old = atomic_read(v); | 601 | old = atomic_read(v); |
602 | new = old ^ bits; | 602 | new = old ^ bits; |
603 | } while (atomic_cmpxchg(v, old, new) != old); | 603 | } while (atomic_cmpxchg(v, old, new) != old); |
604 | return new; | 604 | return new; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | 607 | /* |
608 | * page table entry allocation/free routines. | 608 | * page table entry allocation/free routines. |
609 | */ | 609 | */ |
610 | unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) | 610 | unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) |
611 | { | 611 | { |
612 | unsigned long *uninitialized_var(table); | 612 | unsigned long *uninitialized_var(table); |
613 | struct page *uninitialized_var(page); | 613 | struct page *uninitialized_var(page); |
614 | unsigned int mask, bit; | 614 | unsigned int mask, bit; |
615 | 615 | ||
616 | if (mm_has_pgste(mm)) | 616 | if (mm_has_pgste(mm)) |
617 | return page_table_alloc_pgste(mm, vmaddr); | 617 | return page_table_alloc_pgste(mm, vmaddr); |
618 | /* Allocate fragments of a 4K page as 1K/2K page table */ | 618 | /* Allocate fragments of a 4K page as 1K/2K page table */ |
619 | spin_lock_bh(&mm->context.list_lock); | 619 | spin_lock_bh(&mm->context.list_lock); |
620 | mask = FRAG_MASK; | 620 | mask = FRAG_MASK; |
621 | if (!list_empty(&mm->context.pgtable_list)) { | 621 | if (!list_empty(&mm->context.pgtable_list)) { |
622 | page = list_first_entry(&mm->context.pgtable_list, | 622 | page = list_first_entry(&mm->context.pgtable_list, |
623 | struct page, lru); | 623 | struct page, lru); |
624 | table = (unsigned long *) page_to_phys(page); | 624 | table = (unsigned long *) page_to_phys(page); |
625 | mask = atomic_read(&page->_mapcount); | 625 | mask = atomic_read(&page->_mapcount); |
626 | mask = mask | (mask >> 4); | 626 | mask = mask | (mask >> 4); |
627 | } | 627 | } |
628 | if ((mask & FRAG_MASK) == FRAG_MASK) { | 628 | if ((mask & FRAG_MASK) == FRAG_MASK) { |
629 | spin_unlock_bh(&mm->context.list_lock); | 629 | spin_unlock_bh(&mm->context.list_lock); |
630 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 630 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
631 | if (!page) | 631 | if (!page) |
632 | return NULL; | 632 | return NULL; |
633 | pgtable_page_ctor(page); | 633 | pgtable_page_ctor(page); |
634 | atomic_set(&page->_mapcount, 1); | 634 | atomic_set(&page->_mapcount, 1); |
635 | table = (unsigned long *) page_to_phys(page); | 635 | table = (unsigned long *) page_to_phys(page); |
636 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 636 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); |
637 | spin_lock_bh(&mm->context.list_lock); | 637 | spin_lock_bh(&mm->context.list_lock); |
638 | list_add(&page->lru, &mm->context.pgtable_list); | 638 | list_add(&page->lru, &mm->context.pgtable_list); |
639 | } else { | 639 | } else { |
640 | for (bit = 1; mask & bit; bit <<= 1) | 640 | for (bit = 1; mask & bit; bit <<= 1) |
641 | table += PTRS_PER_PTE; | 641 | table += PTRS_PER_PTE; |
642 | mask = atomic_xor_bits(&page->_mapcount, bit); | 642 | mask = atomic_xor_bits(&page->_mapcount, bit); |
643 | if ((mask & FRAG_MASK) == FRAG_MASK) | 643 | if ((mask & FRAG_MASK) == FRAG_MASK) |
644 | list_del(&page->lru); | 644 | list_del(&page->lru); |
645 | } | 645 | } |
646 | spin_unlock_bh(&mm->context.list_lock); | 646 | spin_unlock_bh(&mm->context.list_lock); |
647 | return table; | 647 | return table; |
648 | } | 648 | } |
649 | 649 | ||
650 | void page_table_free(struct mm_struct *mm, unsigned long *table) | 650 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
651 | { | 651 | { |
652 | struct page *page; | 652 | struct page *page; |
653 | unsigned int bit, mask; | 653 | unsigned int bit, mask; |
654 | 654 | ||
655 | if (mm_has_pgste(mm)) { | 655 | if (mm_has_pgste(mm)) { |
656 | gmap_unmap_notifier(mm, table); | 656 | gmap_unmap_notifier(mm, table); |
657 | return page_table_free_pgste(table); | 657 | return page_table_free_pgste(table); |
658 | } | 658 | } |
659 | /* Free 1K/2K page table fragment of a 4K page */ | 659 | /* Free 1K/2K page table fragment of a 4K page */ |
660 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 660 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
661 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); | 661 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
662 | spin_lock_bh(&mm->context.list_lock); | 662 | spin_lock_bh(&mm->context.list_lock); |
663 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 663 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
664 | list_del(&page->lru); | 664 | list_del(&page->lru); |
665 | mask = atomic_xor_bits(&page->_mapcount, bit); | 665 | mask = atomic_xor_bits(&page->_mapcount, bit); |
666 | if (mask & FRAG_MASK) | 666 | if (mask & FRAG_MASK) |
667 | list_add(&page->lru, &mm->context.pgtable_list); | 667 | list_add(&page->lru, &mm->context.pgtable_list); |
668 | spin_unlock_bh(&mm->context.list_lock); | 668 | spin_unlock_bh(&mm->context.list_lock); |
669 | if (mask == 0) { | 669 | if (mask == 0) { |
670 | pgtable_page_dtor(page); | 670 | pgtable_page_dtor(page); |
671 | atomic_set(&page->_mapcount, -1); | 671 | atomic_set(&page->_mapcount, -1); |
672 | __free_page(page); | 672 | __free_page(page); |
673 | } | 673 | } |
674 | } | 674 | } |
675 | 675 | ||
676 | static void __page_table_free_rcu(void *table, unsigned bit) | 676 | static void __page_table_free_rcu(void *table, unsigned bit) |
677 | { | 677 | { |
678 | struct page *page; | 678 | struct page *page; |
679 | 679 | ||
680 | if (bit == FRAG_MASK) | 680 | if (bit == FRAG_MASK) |
681 | return page_table_free_pgste(table); | 681 | return page_table_free_pgste(table); |
682 | /* Free 1K/2K page table fragment of a 4K page */ | 682 | /* Free 1K/2K page table fragment of a 4K page */ |
683 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 683 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
684 | if (atomic_xor_bits(&page->_mapcount, bit) == 0) { | 684 | if (atomic_xor_bits(&page->_mapcount, bit) == 0) { |
685 | pgtable_page_dtor(page); | 685 | pgtable_page_dtor(page); |
686 | atomic_set(&page->_mapcount, -1); | 686 | atomic_set(&page->_mapcount, -1); |
687 | __free_page(page); | 687 | __free_page(page); |
688 | } | 688 | } |
689 | } | 689 | } |
690 | 690 | ||
691 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | 691 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) |
692 | { | 692 | { |
693 | struct mm_struct *mm; | 693 | struct mm_struct *mm; |
694 | struct page *page; | 694 | struct page *page; |
695 | unsigned int bit, mask; | 695 | unsigned int bit, mask; |
696 | 696 | ||
697 | mm = tlb->mm; | 697 | mm = tlb->mm; |
698 | if (mm_has_pgste(mm)) { | 698 | if (mm_has_pgste(mm)) { |
699 | gmap_unmap_notifier(mm, table); | 699 | gmap_unmap_notifier(mm, table); |
700 | table = (unsigned long *) (__pa(table) | FRAG_MASK); | 700 | table = (unsigned long *) (__pa(table) | FRAG_MASK); |
701 | tlb_remove_table(tlb, table); | 701 | tlb_remove_table(tlb, table); |
702 | return; | 702 | return; |
703 | } | 703 | } |
704 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); | 704 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); |
705 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 705 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
706 | spin_lock_bh(&mm->context.list_lock); | 706 | spin_lock_bh(&mm->context.list_lock); |
707 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 707 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
708 | list_del(&page->lru); | 708 | list_del(&page->lru); |
709 | mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); | 709 | mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); |
710 | if (mask & FRAG_MASK) | 710 | if (mask & FRAG_MASK) |
711 | list_add_tail(&page->lru, &mm->context.pgtable_list); | 711 | list_add_tail(&page->lru, &mm->context.pgtable_list); |
712 | spin_unlock_bh(&mm->context.list_lock); | 712 | spin_unlock_bh(&mm->context.list_lock); |
713 | table = (unsigned long *) (__pa(table) | (bit << 4)); | 713 | table = (unsigned long *) (__pa(table) | (bit << 4)); |
714 | tlb_remove_table(tlb, table); | 714 | tlb_remove_table(tlb, table); |
715 | } | 715 | } |
716 | 716 | ||
717 | void __tlb_remove_table(void *_table) | 717 | void __tlb_remove_table(void *_table) |
718 | { | 718 | { |
719 | const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; | 719 | const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; |
720 | void *table = (void *)((unsigned long) _table & ~mask); | 720 | void *table = (void *)((unsigned long) _table & ~mask); |
721 | unsigned type = (unsigned long) _table & mask; | 721 | unsigned type = (unsigned long) _table & mask; |
722 | 722 | ||
723 | if (type) | 723 | if (type) |
724 | __page_table_free_rcu(table, type); | 724 | __page_table_free_rcu(table, type); |
725 | else | 725 | else |
726 | free_pages((unsigned long) table, ALLOC_ORDER); | 726 | free_pages((unsigned long) table, ALLOC_ORDER); |
727 | } | 727 | } |
728 | 728 | ||
729 | static void tlb_remove_table_smp_sync(void *arg) | 729 | static void tlb_remove_table_smp_sync(void *arg) |
730 | { | 730 | { |
731 | /* Simply deliver the interrupt */ | 731 | /* Simply deliver the interrupt */ |
732 | } | 732 | } |
733 | 733 | ||
734 | static void tlb_remove_table_one(void *table) | 734 | static void tlb_remove_table_one(void *table) |
735 | { | 735 | { |
736 | /* | 736 | /* |
737 | * This isn't an RCU grace period and hence the page-tables cannot be | 737 | * This isn't an RCU grace period and hence the page-tables cannot be |
738 | * assumed to be actually RCU-freed. | 738 | * assumed to be actually RCU-freed. |
739 | * | 739 | * |
740 | * It is however sufficient for software page-table walkers that rely | 740 | * It is however sufficient for software page-table walkers that rely |
741 | * on IRQ disabling. See the comment near struct mmu_table_batch. | 741 | * on IRQ disabling. See the comment near struct mmu_table_batch. |
742 | */ | 742 | */ |
743 | smp_call_function(tlb_remove_table_smp_sync, NULL, 1); | 743 | smp_call_function(tlb_remove_table_smp_sync, NULL, 1); |
744 | __tlb_remove_table(table); | 744 | __tlb_remove_table(table); |
745 | } | 745 | } |
746 | 746 | ||
747 | static void tlb_remove_table_rcu(struct rcu_head *head) | 747 | static void tlb_remove_table_rcu(struct rcu_head *head) |
748 | { | 748 | { |
749 | struct mmu_table_batch *batch; | 749 | struct mmu_table_batch *batch; |
750 | int i; | 750 | int i; |
751 | 751 | ||
752 | batch = container_of(head, struct mmu_table_batch, rcu); | 752 | batch = container_of(head, struct mmu_table_batch, rcu); |
753 | 753 | ||
754 | for (i = 0; i < batch->nr; i++) | 754 | for (i = 0; i < batch->nr; i++) |
755 | __tlb_remove_table(batch->tables[i]); | 755 | __tlb_remove_table(batch->tables[i]); |
756 | 756 | ||
757 | free_page((unsigned long)batch); | 757 | free_page((unsigned long)batch); |
758 | } | 758 | } |
759 | 759 | ||
760 | void tlb_table_flush(struct mmu_gather *tlb) | 760 | void tlb_table_flush(struct mmu_gather *tlb) |
761 | { | 761 | { |
762 | struct mmu_table_batch **batch = &tlb->batch; | 762 | struct mmu_table_batch **batch = &tlb->batch; |
763 | 763 | ||
764 | if (*batch) { | 764 | if (*batch) { |
765 | __tlb_flush_mm(tlb->mm); | 765 | __tlb_flush_mm(tlb->mm); |
766 | call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); | 766 | call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); |
767 | *batch = NULL; | 767 | *batch = NULL; |
768 | } | 768 | } |
769 | } | 769 | } |
770 | 770 | ||
771 | void tlb_remove_table(struct mmu_gather *tlb, void *table) | 771 | void tlb_remove_table(struct mmu_gather *tlb, void *table) |
772 | { | 772 | { |
773 | struct mmu_table_batch **batch = &tlb->batch; | 773 | struct mmu_table_batch **batch = &tlb->batch; |
774 | 774 | ||
775 | if (*batch == NULL) { | 775 | if (*batch == NULL) { |
776 | *batch = (struct mmu_table_batch *) | 776 | *batch = (struct mmu_table_batch *) |
777 | __get_free_page(GFP_NOWAIT | __GFP_NOWARN); | 777 | __get_free_page(GFP_NOWAIT | __GFP_NOWARN); |
778 | if (*batch == NULL) { | 778 | if (*batch == NULL) { |
779 | __tlb_flush_mm(tlb->mm); | 779 | __tlb_flush_mm(tlb->mm); |
780 | tlb_remove_table_one(table); | 780 | tlb_remove_table_one(table); |
781 | return; | 781 | return; |
782 | } | 782 | } |
783 | (*batch)->nr = 0; | 783 | (*batch)->nr = 0; |
784 | } | 784 | } |
785 | (*batch)->tables[(*batch)->nr++] = table; | 785 | (*batch)->tables[(*batch)->nr++] = table; |
786 | if ((*batch)->nr == MAX_TABLE_BATCH) | 786 | if ((*batch)->nr == MAX_TABLE_BATCH) |
787 | tlb_table_flush(tlb); | 787 | tlb_table_flush(tlb); |
788 | } | 788 | } |
789 | 789 | ||
790 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 790 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
791 | void thp_split_vma(struct vm_area_struct *vma) | 791 | void thp_split_vma(struct vm_area_struct *vma) |
792 | { | 792 | { |
793 | unsigned long addr; | 793 | unsigned long addr; |
794 | struct page *page; | 794 | struct page *page; |
795 | 795 | ||
796 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { | 796 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { |
797 | page = follow_page(vma, addr, FOLL_SPLIT); | 797 | page = follow_page(vma, addr, FOLL_SPLIT); |
798 | } | 798 | } |
799 | } | 799 | } |
800 | 800 | ||
801 | void thp_split_mm(struct mm_struct *mm) | 801 | void thp_split_mm(struct mm_struct *mm) |
802 | { | 802 | { |
803 | struct vm_area_struct *vma = mm->mmap; | 803 | struct vm_area_struct *vma = mm->mmap; |
804 | 804 | ||
805 | while (vma != NULL) { | 805 | while (vma != NULL) { |
806 | thp_split_vma(vma); | 806 | thp_split_vma(vma); |
807 | vma->vm_flags &= ~VM_HUGEPAGE; | 807 | vma->vm_flags &= ~VM_HUGEPAGE; |
808 | vma->vm_flags |= VM_NOHUGEPAGE; | 808 | vma->vm_flags |= VM_NOHUGEPAGE; |
809 | vma = vma->vm_next; | 809 | vma = vma->vm_next; |
810 | } | 810 | } |
811 | } | 811 | } |
812 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 812 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
813 | 813 | ||
814 | /* | 814 | /* |
815 | * switch on pgstes for its userspace process (for kvm) | 815 | * switch on pgstes for its userspace process (for kvm) |
816 | */ | 816 | */ |
817 | int s390_enable_sie(void) | 817 | int s390_enable_sie(void) |
818 | { | 818 | { |
819 | struct task_struct *tsk = current; | 819 | struct task_struct *tsk = current; |
820 | struct mm_struct *mm, *old_mm; | 820 | struct mm_struct *mm, *old_mm; |
821 | 821 | ||
822 | /* Do we have switched amode? If no, we cannot do sie */ | 822 | /* Do we have switched amode? If no, we cannot do sie */ |
823 | if (s390_user_mode == HOME_SPACE_MODE) | 823 | if (s390_user_mode == HOME_SPACE_MODE) |
824 | return -EINVAL; | 824 | return -EINVAL; |
825 | 825 | ||
826 | /* Do we have pgstes? if yes, we are done */ | 826 | /* Do we have pgstes? if yes, we are done */ |
827 | if (mm_has_pgste(tsk->mm)) | 827 | if (mm_has_pgste(tsk->mm)) |
828 | return 0; | 828 | return 0; |
829 | 829 | ||
830 | /* lets check if we are allowed to replace the mm */ | 830 | /* lets check if we are allowed to replace the mm */ |
831 | task_lock(tsk); | 831 | task_lock(tsk); |
832 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 832 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || |
833 | #ifdef CONFIG_AIO | 833 | #ifdef CONFIG_AIO |
834 | !hlist_empty(&tsk->mm->ioctx_list) || | 834 | !hlist_empty(&tsk->mm->ioctx_list) || |
835 | #endif | 835 | #endif |
836 | tsk->mm != tsk->active_mm) { | 836 | tsk->mm != tsk->active_mm) { |
837 | task_unlock(tsk); | 837 | task_unlock(tsk); |
838 | return -EINVAL; | 838 | return -EINVAL; |
839 | } | 839 | } |
840 | task_unlock(tsk); | 840 | task_unlock(tsk); |
841 | 841 | ||
842 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ | 842 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ |
843 | tsk->mm->context.alloc_pgste = 1; | 843 | tsk->mm->context.alloc_pgste = 1; |
844 | /* make sure that both mms have a correct rss state */ | 844 | /* make sure that both mms have a correct rss state */ |
845 | sync_mm_rss(tsk->mm); | 845 | sync_mm_rss(tsk->mm); |
846 | mm = dup_mm(tsk); | 846 | mm = dup_mm(tsk); |
847 | tsk->mm->context.alloc_pgste = 0; | 847 | tsk->mm->context.alloc_pgste = 0; |
848 | if (!mm) | 848 | if (!mm) |
849 | return -ENOMEM; | 849 | return -ENOMEM; |
850 | 850 | ||
851 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 851 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
852 | /* split thp mappings and disable thp for future mappings */ | 852 | /* split thp mappings and disable thp for future mappings */ |
853 | thp_split_mm(mm); | 853 | thp_split_mm(mm); |
854 | mm->def_flags |= VM_NOHUGEPAGE; | 854 | mm->def_flags |= VM_NOHUGEPAGE; |
855 | #endif | 855 | #endif |
856 | 856 | ||
857 | /* Now lets check again if something happened */ | 857 | /* Now lets check again if something happened */ |
858 | task_lock(tsk); | 858 | task_lock(tsk); |
859 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 859 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || |
860 | #ifdef CONFIG_AIO | 860 | #ifdef CONFIG_AIO |
861 | !hlist_empty(&tsk->mm->ioctx_list) || | 861 | !hlist_empty(&tsk->mm->ioctx_list) || |
862 | #endif | 862 | #endif |
863 | tsk->mm != tsk->active_mm) { | 863 | tsk->mm != tsk->active_mm) { |
864 | mmput(mm); | 864 | mmput(mm); |
865 | task_unlock(tsk); | 865 | task_unlock(tsk); |
866 | return -EINVAL; | 866 | return -EINVAL; |
867 | } | 867 | } |
868 | 868 | ||
869 | /* ok, we are alone. No ptrace, no threads, etc. */ | 869 | /* ok, we are alone. No ptrace, no threads, etc. */ |
870 | old_mm = tsk->mm; | 870 | old_mm = tsk->mm; |
871 | tsk->mm = tsk->active_mm = mm; | 871 | tsk->mm = tsk->active_mm = mm; |
872 | preempt_disable(); | 872 | preempt_disable(); |
873 | update_mm(mm, tsk); | 873 | update_mm(mm, tsk); |
874 | atomic_inc(&mm->context.attach_count); | 874 | atomic_inc(&mm->context.attach_count); |
875 | atomic_dec(&old_mm->context.attach_count); | 875 | atomic_dec(&old_mm->context.attach_count); |
876 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | 876 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); |
877 | preempt_enable(); | 877 | preempt_enable(); |
878 | task_unlock(tsk); | 878 | task_unlock(tsk); |
879 | mmput(old_mm); | 879 | mmput(old_mm); |
880 | return 0; | 880 | return 0; |
881 | } | 881 | } |
882 | EXPORT_SYMBOL_GPL(s390_enable_sie); | 882 | EXPORT_SYMBOL_GPL(s390_enable_sie); |
883 | 883 | ||
884 | #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) | 884 | #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) |
885 | bool kernel_page_present(struct page *page) | 885 | bool kernel_page_present(struct page *page) |
886 | { | 886 | { |
887 | unsigned long addr; | 887 | unsigned long addr; |
888 | int cc; | 888 | int cc; |
889 | 889 | ||
890 | addr = page_to_phys(page); | 890 | addr = page_to_phys(page); |
891 | asm volatile( | 891 | asm volatile( |
892 | " lra %1,0(%1)\n" | 892 | " lra %1,0(%1)\n" |
893 | " ipm %0\n" | 893 | " ipm %0\n" |
894 | " srl %0,28" | 894 | " srl %0,28" |
895 | : "=d" (cc), "+a" (addr) : : "cc"); | 895 | : "=d" (cc), "+a" (addr) : : "cc"); |
896 | return cc == 0; | 896 | return cc == 0; |
897 | } | 897 | } |
898 | #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ | 898 | #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ |
899 | 899 | ||
900 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 900 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
901 | int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, | ||
902 | pmd_t *pmdp) | ||
903 | { | ||
904 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
905 | /* No need to flush TLB | ||
906 | * On s390 reference bits are in storage key and never in TLB */ | ||
907 | return pmdp_test_and_clear_young(vma, address, pmdp); | ||
908 | } | ||
909 | |||
910 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
911 | unsigned long address, pmd_t *pmdp, | ||
912 | pmd_t entry, int dirty) | ||
913 | { | ||
914 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
915 | |||
916 | if (pmd_same(*pmdp, entry)) | ||
917 | return 0; | ||
918 | pmdp_invalidate(vma, address, pmdp); | ||
919 | set_pmd_at(vma->vm_mm, address, pmdp, entry); | ||
920 | return 1; | ||
921 | } | ||
922 | |||
901 | static void pmdp_splitting_flush_sync(void *arg) | 923 | static void pmdp_splitting_flush_sync(void *arg) |
902 | { | 924 | { |
903 | /* Simply deliver the interrupt */ | 925 | /* Simply deliver the interrupt */ |
904 | } | 926 | } |
905 | 927 | ||
906 | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | 928 | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, |
907 | pmd_t *pmdp) | 929 | pmd_t *pmdp) |
908 | { | 930 | { |
909 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 931 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
910 | if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, | 932 | if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, |
911 | (unsigned long *) pmdp)) { | 933 | (unsigned long *) pmdp)) { |
912 | /* need to serialize against gup-fast (IRQ disabled) */ | 934 | /* need to serialize against gup-fast (IRQ disabled) */ |
913 | smp_call_function(pmdp_splitting_flush_sync, NULL, 1); | 935 | smp_call_function(pmdp_splitting_flush_sync, NULL, 1); |
914 | } | 936 | } |
915 | } | 937 | } |
916 | 938 | ||
917 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) | 939 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) |
918 | { | 940 | { |
919 | struct list_head *lh = (struct list_head *) pgtable; | 941 | struct list_head *lh = (struct list_head *) pgtable; |
920 | 942 | ||
921 | assert_spin_locked(&mm->page_table_lock); | 943 | assert_spin_locked(&mm->page_table_lock); |
922 | 944 | ||
923 | /* FIFO */ | 945 | /* FIFO */ |
924 | if (!mm->pmd_huge_pte) | 946 | if (!mm->pmd_huge_pte) |
925 | INIT_LIST_HEAD(lh); | 947 | INIT_LIST_HEAD(lh); |
926 | else | 948 | else |
927 | list_add(lh, (struct list_head *) mm->pmd_huge_pte); | 949 | list_add(lh, (struct list_head *) mm->pmd_huge_pte); |
928 | mm->pmd_huge_pte = pgtable; | 950 | mm->pmd_huge_pte = pgtable; |
929 | } | 951 | } |
930 | 952 | ||
931 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) | 953 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) |
932 | { | 954 | { |
933 | struct list_head *lh; | 955 | struct list_head *lh; |
934 | pgtable_t pgtable; | 956 | pgtable_t pgtable; |
935 | pte_t *ptep; | 957 | pte_t *ptep; |
936 | 958 | ||
937 | assert_spin_locked(&mm->page_table_lock); | 959 | assert_spin_locked(&mm->page_table_lock); |
938 | 960 | ||
939 | /* FIFO */ | 961 | /* FIFO */ |
940 | pgtable = mm->pmd_huge_pte; | 962 | pgtable = mm->pmd_huge_pte; |
941 | lh = (struct list_head *) pgtable; | 963 | lh = (struct list_head *) pgtable; |
942 | if (list_empty(lh)) | 964 | if (list_empty(lh)) |
943 | mm->pmd_huge_pte = NULL; | 965 | mm->pmd_huge_pte = NULL; |
944 | else { | 966 | else { |
945 | mm->pmd_huge_pte = (pgtable_t) lh->next; | 967 | mm->pmd_huge_pte = (pgtable_t) lh->next; |
946 | list_del(lh); | 968 | list_del(lh); |
947 | } | 969 | } |
948 | ptep = (pte_t *) pgtable; | 970 | ptep = (pte_t *) pgtable; |
949 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 971 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
950 | ptep++; | 972 | ptep++; |
951 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 973 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
952 | return pgtable; | 974 | return pgtable; |
953 | } | 975 | } |
954 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 976 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
955 | 977 |