Commit 5c1f6ee9a31cbdac90bbb8ae1ba4475031ac74b4

Authored by Aneesh Kumar K.V
Committed by Benjamin Herrenschmidt
1 parent d614bb0412

powerpc: Reduce PTE table memory wastage

We allocate one page for the last level of linux page table. With THP and
large page size of 16MB, that would mean we are wasting large part
of that page. To map 16MB area, we only need a PTE space of 2K with 64K
page size. This patch reduce the space wastage by sharing the page
allocated for the last level of linux page table with multiple pmd
entries. We call these smaller chunks PTE page fragments and allocated
page, PTE page.

In order to support systems which doesn't have 64K HPTE support, we also
add another 2K to PTE page fragment. The second half of the PTE fragments
is used for storing slot and secondary bit information of an HPTE. With this
we now have a 4K PTE fragment.

We use a simple approach to share the PTE page. On allocation, we bump the
PTE page refcount to 16 and share the PTE page with the next 16 pte alloc
request. This should help in the node locality of the PTE page fragment,
assuming that the immediate pte alloc request will mostly come from the
same NUMA node. We don't try to reuse the freed PTE page fragment. Hence
we could be waisting some space.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Showing 7 changed files with 195 additions and 58 deletions Inline Diff

arch/powerpc/include/asm/mmu-book3e.h
1 #ifndef _ASM_POWERPC_MMU_BOOK3E_H_ 1 #ifndef _ASM_POWERPC_MMU_BOOK3E_H_
2 #define _ASM_POWERPC_MMU_BOOK3E_H_ 2 #define _ASM_POWERPC_MMU_BOOK3E_H_
3 /* 3 /*
4 * Freescale Book-E/Book-3e (ISA 2.06+) MMU support 4 * Freescale Book-E/Book-3e (ISA 2.06+) MMU support
5 */ 5 */
6 6
7 /* Book-3e defined page sizes */ 7 /* Book-3e defined page sizes */
8 #define BOOK3E_PAGESZ_1K 0 8 #define BOOK3E_PAGESZ_1K 0
9 #define BOOK3E_PAGESZ_2K 1 9 #define BOOK3E_PAGESZ_2K 1
10 #define BOOK3E_PAGESZ_4K 2 10 #define BOOK3E_PAGESZ_4K 2
11 #define BOOK3E_PAGESZ_8K 3 11 #define BOOK3E_PAGESZ_8K 3
12 #define BOOK3E_PAGESZ_16K 4 12 #define BOOK3E_PAGESZ_16K 4
13 #define BOOK3E_PAGESZ_32K 5 13 #define BOOK3E_PAGESZ_32K 5
14 #define BOOK3E_PAGESZ_64K 6 14 #define BOOK3E_PAGESZ_64K 6
15 #define BOOK3E_PAGESZ_128K 7 15 #define BOOK3E_PAGESZ_128K 7
16 #define BOOK3E_PAGESZ_256K 8 16 #define BOOK3E_PAGESZ_256K 8
17 #define BOOK3E_PAGESZ_512K 9 17 #define BOOK3E_PAGESZ_512K 9
18 #define BOOK3E_PAGESZ_1M 10 18 #define BOOK3E_PAGESZ_1M 10
19 #define BOOK3E_PAGESZ_2M 11 19 #define BOOK3E_PAGESZ_2M 11
20 #define BOOK3E_PAGESZ_4M 12 20 #define BOOK3E_PAGESZ_4M 12
21 #define BOOK3E_PAGESZ_8M 13 21 #define BOOK3E_PAGESZ_8M 13
22 #define BOOK3E_PAGESZ_16M 14 22 #define BOOK3E_PAGESZ_16M 14
23 #define BOOK3E_PAGESZ_32M 15 23 #define BOOK3E_PAGESZ_32M 15
24 #define BOOK3E_PAGESZ_64M 16 24 #define BOOK3E_PAGESZ_64M 16
25 #define BOOK3E_PAGESZ_128M 17 25 #define BOOK3E_PAGESZ_128M 17
26 #define BOOK3E_PAGESZ_256M 18 26 #define BOOK3E_PAGESZ_256M 18
27 #define BOOK3E_PAGESZ_512M 19 27 #define BOOK3E_PAGESZ_512M 19
28 #define BOOK3E_PAGESZ_1GB 20 28 #define BOOK3E_PAGESZ_1GB 20
29 #define BOOK3E_PAGESZ_2GB 21 29 #define BOOK3E_PAGESZ_2GB 21
30 #define BOOK3E_PAGESZ_4GB 22 30 #define BOOK3E_PAGESZ_4GB 22
31 #define BOOK3E_PAGESZ_8GB 23 31 #define BOOK3E_PAGESZ_8GB 23
32 #define BOOK3E_PAGESZ_16GB 24 32 #define BOOK3E_PAGESZ_16GB 24
33 #define BOOK3E_PAGESZ_32GB 25 33 #define BOOK3E_PAGESZ_32GB 25
34 #define BOOK3E_PAGESZ_64GB 26 34 #define BOOK3E_PAGESZ_64GB 26
35 #define BOOK3E_PAGESZ_128GB 27 35 #define BOOK3E_PAGESZ_128GB 27
36 #define BOOK3E_PAGESZ_256GB 28 36 #define BOOK3E_PAGESZ_256GB 28
37 #define BOOK3E_PAGESZ_512GB 29 37 #define BOOK3E_PAGESZ_512GB 29
38 #define BOOK3E_PAGESZ_1TB 30 38 #define BOOK3E_PAGESZ_1TB 30
39 #define BOOK3E_PAGESZ_2TB 31 39 #define BOOK3E_PAGESZ_2TB 31
40 40
41 /* MAS registers bit definitions */ 41 /* MAS registers bit definitions */
42 42
43 #define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) 43 #define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000)
44 #define MAS0_ESEL_MASK 0x0FFF0000 44 #define MAS0_ESEL_MASK 0x0FFF0000
45 #define MAS0_ESEL_SHIFT 16 45 #define MAS0_ESEL_SHIFT 16
46 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) 46 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
47 #define MAS0_NV(x) ((x) & 0x00000FFF) 47 #define MAS0_NV(x) ((x) & 0x00000FFF)
48 #define MAS0_HES 0x00004000 48 #define MAS0_HES 0x00004000
49 #define MAS0_WQ_ALLWAYS 0x00000000 49 #define MAS0_WQ_ALLWAYS 0x00000000
50 #define MAS0_WQ_COND 0x00001000 50 #define MAS0_WQ_COND 0x00001000
51 #define MAS0_WQ_CLR_RSRV 0x00002000 51 #define MAS0_WQ_CLR_RSRV 0x00002000
52 52
53 #define MAS1_VALID 0x80000000 53 #define MAS1_VALID 0x80000000
54 #define MAS1_IPROT 0x40000000 54 #define MAS1_IPROT 0x40000000
55 #define MAS1_TID(x) (((x) << 16) & 0x3FFF0000) 55 #define MAS1_TID(x) (((x) << 16) & 0x3FFF0000)
56 #define MAS1_IND 0x00002000 56 #define MAS1_IND 0x00002000
57 #define MAS1_TS 0x00001000 57 #define MAS1_TS 0x00001000
58 #define MAS1_TSIZE_MASK 0x00000f80 58 #define MAS1_TSIZE_MASK 0x00000f80
59 #define MAS1_TSIZE_SHIFT 7 59 #define MAS1_TSIZE_SHIFT 7
60 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK) 60 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
61 61
62 #define MAS2_EPN (~0xFFFUL) 62 #define MAS2_EPN (~0xFFFUL)
63 #define MAS2_X0 0x00000040 63 #define MAS2_X0 0x00000040
64 #define MAS2_X1 0x00000020 64 #define MAS2_X1 0x00000020
65 #define MAS2_W 0x00000010 65 #define MAS2_W 0x00000010
66 #define MAS2_I 0x00000008 66 #define MAS2_I 0x00000008
67 #define MAS2_M 0x00000004 67 #define MAS2_M 0x00000004
68 #define MAS2_G 0x00000002 68 #define MAS2_G 0x00000002
69 #define MAS2_E 0x00000001 69 #define MAS2_E 0x00000001
70 #define MAS2_WIMGE_MASK 0x0000001f 70 #define MAS2_WIMGE_MASK 0x0000001f
71 #define MAS2_EPN_MASK(size) (~0 << (size + 10)) 71 #define MAS2_EPN_MASK(size) (~0 << (size + 10))
72 #define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags)) 72 #define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags))
73 73
74 #define MAS3_RPN 0xFFFFF000 74 #define MAS3_RPN 0xFFFFF000
75 #define MAS3_U0 0x00000200 75 #define MAS3_U0 0x00000200
76 #define MAS3_U1 0x00000100 76 #define MAS3_U1 0x00000100
77 #define MAS3_U2 0x00000080 77 #define MAS3_U2 0x00000080
78 #define MAS3_U3 0x00000040 78 #define MAS3_U3 0x00000040
79 #define MAS3_UX 0x00000020 79 #define MAS3_UX 0x00000020
80 #define MAS3_SX 0x00000010 80 #define MAS3_SX 0x00000010
81 #define MAS3_UW 0x00000008 81 #define MAS3_UW 0x00000008
82 #define MAS3_SW 0x00000004 82 #define MAS3_SW 0x00000004
83 #define MAS3_UR 0x00000002 83 #define MAS3_UR 0x00000002
84 #define MAS3_SR 0x00000001 84 #define MAS3_SR 0x00000001
85 #define MAS3_BAP_MASK 0x0000003f 85 #define MAS3_BAP_MASK 0x0000003f
86 #define MAS3_SPSIZE 0x0000003e 86 #define MAS3_SPSIZE 0x0000003e
87 #define MAS3_SPSIZE_SHIFT 1 87 #define MAS3_SPSIZE_SHIFT 1
88 88
89 #define MAS4_TLBSELD(x) MAS0_TLBSEL(x) 89 #define MAS4_TLBSELD(x) MAS0_TLBSEL(x)
90 #define MAS4_INDD 0x00008000 /* Default IND */ 90 #define MAS4_INDD 0x00008000 /* Default IND */
91 #define MAS4_TSIZED(x) MAS1_TSIZE(x) 91 #define MAS4_TSIZED(x) MAS1_TSIZE(x)
92 #define MAS4_X0D 0x00000040 92 #define MAS4_X0D 0x00000040
93 #define MAS4_X1D 0x00000020 93 #define MAS4_X1D 0x00000020
94 #define MAS4_WD 0x00000010 94 #define MAS4_WD 0x00000010
95 #define MAS4_ID 0x00000008 95 #define MAS4_ID 0x00000008
96 #define MAS4_MD 0x00000004 96 #define MAS4_MD 0x00000004
97 #define MAS4_GD 0x00000002 97 #define MAS4_GD 0x00000002
98 #define MAS4_ED 0x00000001 98 #define MAS4_ED 0x00000001
99 #define MAS4_WIMGED_MASK 0x0000001f /* Default WIMGE */ 99 #define MAS4_WIMGED_MASK 0x0000001f /* Default WIMGE */
100 #define MAS4_WIMGED_SHIFT 0 100 #define MAS4_WIMGED_SHIFT 0
101 #define MAS4_VLED MAS4_X1D /* Default VLE */ 101 #define MAS4_VLED MAS4_X1D /* Default VLE */
102 #define MAS4_ACMD 0x000000c0 /* Default ACM */ 102 #define MAS4_ACMD 0x000000c0 /* Default ACM */
103 #define MAS4_ACMD_SHIFT 6 103 #define MAS4_ACMD_SHIFT 6
104 #define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */ 104 #define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */
105 #define MAS4_TSIZED_SHIFT 7 105 #define MAS4_TSIZED_SHIFT 7
106 106
107 #define MAS5_SGS 0x80000000 107 #define MAS5_SGS 0x80000000
108 108
109 #define MAS6_SPID0 0x3FFF0000 109 #define MAS6_SPID0 0x3FFF0000
110 #define MAS6_SPID1 0x00007FFE 110 #define MAS6_SPID1 0x00007FFE
111 #define MAS6_ISIZE(x) MAS1_TSIZE(x) 111 #define MAS6_ISIZE(x) MAS1_TSIZE(x)
112 #define MAS6_SAS 0x00000001 112 #define MAS6_SAS 0x00000001
113 #define MAS6_SPID MAS6_SPID0 113 #define MAS6_SPID MAS6_SPID0
114 #define MAS6_SIND 0x00000002 /* Indirect page */ 114 #define MAS6_SIND 0x00000002 /* Indirect page */
115 #define MAS6_SIND_SHIFT 1 115 #define MAS6_SIND_SHIFT 1
116 #define MAS6_SPID_MASK 0x3fff0000 116 #define MAS6_SPID_MASK 0x3fff0000
117 #define MAS6_SPID_SHIFT 16 117 #define MAS6_SPID_SHIFT 16
118 #define MAS6_ISIZE_MASK 0x00000f80 118 #define MAS6_ISIZE_MASK 0x00000f80
119 #define MAS6_ISIZE_SHIFT 7 119 #define MAS6_ISIZE_SHIFT 7
120 120
121 #define MAS7_RPN 0xFFFFFFFF 121 #define MAS7_RPN 0xFFFFFFFF
122 122
123 #define MAS8_TGS 0x80000000 /* Guest space */ 123 #define MAS8_TGS 0x80000000 /* Guest space */
124 #define MAS8_VF 0x40000000 /* Virtualization Fault */ 124 #define MAS8_VF 0x40000000 /* Virtualization Fault */
125 #define MAS8_TLPID 0x000000ff 125 #define MAS8_TLPID 0x000000ff
126 126
127 /* Bit definitions for MMUCFG */ 127 /* Bit definitions for MMUCFG */
128 #define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */ 128 #define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */
129 #define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */ 129 #define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */
130 #define MMUCFG_MAVN_V2 0x00000001 /* v2.0 */ 130 #define MMUCFG_MAVN_V2 0x00000001 /* v2.0 */
131 #define MMUCFG_NTLBS 0x0000000c /* Number of TLBs */ 131 #define MMUCFG_NTLBS 0x0000000c /* Number of TLBs */
132 #define MMUCFG_PIDSIZE 0x000007c0 /* PID Reg Size */ 132 #define MMUCFG_PIDSIZE 0x000007c0 /* PID Reg Size */
133 #define MMUCFG_TWC 0x00008000 /* TLB Write Conditional (v2.0) */ 133 #define MMUCFG_TWC 0x00008000 /* TLB Write Conditional (v2.0) */
134 #define MMUCFG_LRAT 0x00010000 /* LRAT Supported (v2.0) */ 134 #define MMUCFG_LRAT 0x00010000 /* LRAT Supported (v2.0) */
135 #define MMUCFG_RASIZE 0x00fe0000 /* Real Addr Size */ 135 #define MMUCFG_RASIZE 0x00fe0000 /* Real Addr Size */
136 #define MMUCFG_LPIDSIZE 0x0f000000 /* LPID Reg Size */ 136 #define MMUCFG_LPIDSIZE 0x0f000000 /* LPID Reg Size */
137 137
138 /* Bit definitions for MMUCSR0 */ 138 /* Bit definitions for MMUCSR0 */
139 #define MMUCSR0_TLB1FI 0x00000002 /* TLB1 Flash invalidate */ 139 #define MMUCSR0_TLB1FI 0x00000002 /* TLB1 Flash invalidate */
140 #define MMUCSR0_TLB0FI 0x00000004 /* TLB0 Flash invalidate */ 140 #define MMUCSR0_TLB0FI 0x00000004 /* TLB0 Flash invalidate */
141 #define MMUCSR0_TLB2FI 0x00000040 /* TLB2 Flash invalidate */ 141 #define MMUCSR0_TLB2FI 0x00000040 /* TLB2 Flash invalidate */
142 #define MMUCSR0_TLB3FI 0x00000020 /* TLB3 Flash invalidate */ 142 #define MMUCSR0_TLB3FI 0x00000020 /* TLB3 Flash invalidate */
143 #define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ 143 #define MMUCSR0_TLBFI (MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \
144 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) 144 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI)
145 #define MMUCSR0_TLB0PS 0x00000780 /* TLB0 Page Size */ 145 #define MMUCSR0_TLB0PS 0x00000780 /* TLB0 Page Size */
146 #define MMUCSR0_TLB1PS 0x00007800 /* TLB1 Page Size */ 146 #define MMUCSR0_TLB1PS 0x00007800 /* TLB1 Page Size */
147 #define MMUCSR0_TLB2PS 0x00078000 /* TLB2 Page Size */ 147 #define MMUCSR0_TLB2PS 0x00078000 /* TLB2 Page Size */
148 #define MMUCSR0_TLB3PS 0x00780000 /* TLB3 Page Size */ 148 #define MMUCSR0_TLB3PS 0x00780000 /* TLB3 Page Size */
149 149
150 /* MMUCFG bits */ 150 /* MMUCFG bits */
151 #define MMUCFG_MAVN_NASK 0x00000003 151 #define MMUCFG_MAVN_NASK 0x00000003
152 #define MMUCFG_MAVN_V1_0 0x00000000 152 #define MMUCFG_MAVN_V1_0 0x00000000
153 #define MMUCFG_MAVN_V2_0 0x00000001 153 #define MMUCFG_MAVN_V2_0 0x00000001
154 #define MMUCFG_NTLB_MASK 0x0000000c 154 #define MMUCFG_NTLB_MASK 0x0000000c
155 #define MMUCFG_NTLB_SHIFT 2 155 #define MMUCFG_NTLB_SHIFT 2
156 #define MMUCFG_PIDSIZE_MASK 0x000007c0 156 #define MMUCFG_PIDSIZE_MASK 0x000007c0
157 #define MMUCFG_PIDSIZE_SHIFT 6 157 #define MMUCFG_PIDSIZE_SHIFT 6
158 #define MMUCFG_TWC 0x00008000 158 #define MMUCFG_TWC 0x00008000
159 #define MMUCFG_LRAT 0x00010000 159 #define MMUCFG_LRAT 0x00010000
160 #define MMUCFG_RASIZE_MASK 0x00fe0000 160 #define MMUCFG_RASIZE_MASK 0x00fe0000
161 #define MMUCFG_RASIZE_SHIFT 17 161 #define MMUCFG_RASIZE_SHIFT 17
162 #define MMUCFG_LPIDSIZE_MASK 0x0f000000 162 #define MMUCFG_LPIDSIZE_MASK 0x0f000000
163 #define MMUCFG_LPIDSIZE_SHIFT 24 163 #define MMUCFG_LPIDSIZE_SHIFT 24
164 164
165 /* TLBnCFG encoding */ 165 /* TLBnCFG encoding */
166 #define TLBnCFG_N_ENTRY 0x00000fff /* number of entries */ 166 #define TLBnCFG_N_ENTRY 0x00000fff /* number of entries */
167 #define TLBnCFG_HES 0x00002000 /* HW select supported */ 167 #define TLBnCFG_HES 0x00002000 /* HW select supported */
168 #define TLBnCFG_IPROT 0x00008000 /* IPROT supported */ 168 #define TLBnCFG_IPROT 0x00008000 /* IPROT supported */
169 #define TLBnCFG_GTWE 0x00010000 /* Guest can write */ 169 #define TLBnCFG_GTWE 0x00010000 /* Guest can write */
170 #define TLBnCFG_IND 0x00020000 /* IND entries supported */ 170 #define TLBnCFG_IND 0x00020000 /* IND entries supported */
171 #define TLBnCFG_PT 0x00040000 /* Can load from page table */ 171 #define TLBnCFG_PT 0x00040000 /* Can load from page table */
172 #define TLBnCFG_MINSIZE 0x00f00000 /* Minimum Page Size (v1.0) */ 172 #define TLBnCFG_MINSIZE 0x00f00000 /* Minimum Page Size (v1.0) */
173 #define TLBnCFG_MINSIZE_SHIFT 20 173 #define TLBnCFG_MINSIZE_SHIFT 20
174 #define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */ 174 #define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */
175 #define TLBnCFG_MAXSIZE_SHIFT 16 175 #define TLBnCFG_MAXSIZE_SHIFT 16
176 #define TLBnCFG_ASSOC 0xff000000 /* Associativity */ 176 #define TLBnCFG_ASSOC 0xff000000 /* Associativity */
177 #define TLBnCFG_ASSOC_SHIFT 24 177 #define TLBnCFG_ASSOC_SHIFT 24
178 178
179 /* TLBnPS encoding */ 179 /* TLBnPS encoding */
180 #define TLBnPS_4K 0x00000004 180 #define TLBnPS_4K 0x00000004
181 #define TLBnPS_8K 0x00000008 181 #define TLBnPS_8K 0x00000008
182 #define TLBnPS_16K 0x00000010 182 #define TLBnPS_16K 0x00000010
183 #define TLBnPS_32K 0x00000020 183 #define TLBnPS_32K 0x00000020
184 #define TLBnPS_64K 0x00000040 184 #define TLBnPS_64K 0x00000040
185 #define TLBnPS_128K 0x00000080 185 #define TLBnPS_128K 0x00000080
186 #define TLBnPS_256K 0x00000100 186 #define TLBnPS_256K 0x00000100
187 #define TLBnPS_512K 0x00000200 187 #define TLBnPS_512K 0x00000200
188 #define TLBnPS_1M 0x00000400 188 #define TLBnPS_1M 0x00000400
189 #define TLBnPS_2M 0x00000800 189 #define TLBnPS_2M 0x00000800
190 #define TLBnPS_4M 0x00001000 190 #define TLBnPS_4M 0x00001000
191 #define TLBnPS_8M 0x00002000 191 #define TLBnPS_8M 0x00002000
192 #define TLBnPS_16M 0x00004000 192 #define TLBnPS_16M 0x00004000
193 #define TLBnPS_32M 0x00008000 193 #define TLBnPS_32M 0x00008000
194 #define TLBnPS_64M 0x00010000 194 #define TLBnPS_64M 0x00010000
195 #define TLBnPS_128M 0x00020000 195 #define TLBnPS_128M 0x00020000
196 #define TLBnPS_256M 0x00040000 196 #define TLBnPS_256M 0x00040000
197 #define TLBnPS_512M 0x00080000 197 #define TLBnPS_512M 0x00080000
198 #define TLBnPS_1G 0x00100000 198 #define TLBnPS_1G 0x00100000
199 #define TLBnPS_2G 0x00200000 199 #define TLBnPS_2G 0x00200000
200 #define TLBnPS_4G 0x00400000 200 #define TLBnPS_4G 0x00400000
201 #define TLBnPS_8G 0x00800000 201 #define TLBnPS_8G 0x00800000
202 #define TLBnPS_16G 0x01000000 202 #define TLBnPS_16G 0x01000000
203 #define TLBnPS_32G 0x02000000 203 #define TLBnPS_32G 0x02000000
204 #define TLBnPS_64G 0x04000000 204 #define TLBnPS_64G 0x04000000
205 #define TLBnPS_128G 0x08000000 205 #define TLBnPS_128G 0x08000000
206 #define TLBnPS_256G 0x10000000 206 #define TLBnPS_256G 0x10000000
207 207
208 /* tlbilx action encoding */ 208 /* tlbilx action encoding */
209 #define TLBILX_T_ALL 0 209 #define TLBILX_T_ALL 0
210 #define TLBILX_T_TID 1 210 #define TLBILX_T_TID 1
211 #define TLBILX_T_FULLMATCH 3 211 #define TLBILX_T_FULLMATCH 3
212 #define TLBILX_T_CLASS0 4 212 #define TLBILX_T_CLASS0 4
213 #define TLBILX_T_CLASS1 5 213 #define TLBILX_T_CLASS1 5
214 #define TLBILX_T_CLASS2 6 214 #define TLBILX_T_CLASS2 6
215 #define TLBILX_T_CLASS3 7 215 #define TLBILX_T_CLASS3 7
216 216
217 #ifndef __ASSEMBLY__ 217 #ifndef __ASSEMBLY__
218 218
219 extern unsigned int tlbcam_index; 219 extern unsigned int tlbcam_index;
220 220
221 typedef struct { 221 typedef struct {
222 unsigned int id; 222 unsigned int id;
223 unsigned int active; 223 unsigned int active;
224 unsigned long vdso_base; 224 unsigned long vdso_base;
225 #ifdef CONFIG_PPC_ICSWX 225 #ifdef CONFIG_PPC_ICSWX
226 struct spinlock *cop_lockp; /* guard cop related stuff */ 226 struct spinlock *cop_lockp; /* guard cop related stuff */
227 unsigned long acop; /* mask of enabled coprocessor types */ 227 unsigned long acop; /* mask of enabled coprocessor types */
228 #endif /* CONFIG_PPC_ICSWX */ 228 #endif /* CONFIG_PPC_ICSWX */
229 #ifdef CONFIG_PPC_MM_SLICES 229 #ifdef CONFIG_PPC_MM_SLICES
230 u64 low_slices_psize; /* SLB page size encodings */ 230 u64 low_slices_psize; /* SLB page size encodings */
231 u64 high_slices_psize; /* 4 bits per slice for now */ 231 u64 high_slices_psize; /* 4 bits per slice for now */
232 u16 user_psize; /* page size index */ 232 u16 user_psize; /* page size index */
233 #endif 233 #endif
234 #ifdef CONFIG_PPC_64K_PAGES
235 /* for 4K PTE fragment support */
236 void *pte_frag;
237 #endif
234 } mm_context_t; 238 } mm_context_t;
235 239
236 /* Page size definitions, common between 32 and 64-bit 240 /* Page size definitions, common between 32 and 64-bit
237 * 241 *
238 * shift : is the "PAGE_SHIFT" value for that page size 242 * shift : is the "PAGE_SHIFT" value for that page size
239 * penc : is the pte encoding mask 243 * penc : is the pte encoding mask
240 * 244 *
241 */ 245 */
242 struct mmu_psize_def 246 struct mmu_psize_def
243 { 247 {
244 unsigned int shift; /* number of bits */ 248 unsigned int shift; /* number of bits */
245 unsigned int enc; /* PTE encoding */ 249 unsigned int enc; /* PTE encoding */
246 unsigned int ind; /* Corresponding indirect page size shift */ 250 unsigned int ind; /* Corresponding indirect page size shift */
247 unsigned int flags; 251 unsigned int flags;
248 #define MMU_PAGE_SIZE_DIRECT 0x1 /* Supported as a direct size */ 252 #define MMU_PAGE_SIZE_DIRECT 0x1 /* Supported as a direct size */
249 #define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */ 253 #define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */
250 }; 254 };
251 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 255 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
252 256
253 /* The page sizes use the same names as 64-bit hash but are 257 /* The page sizes use the same names as 64-bit hash but are
254 * constants 258 * constants
255 */ 259 */
256 #if defined(CONFIG_PPC_4K_PAGES) 260 #if defined(CONFIG_PPC_4K_PAGES)
257 #define mmu_virtual_psize MMU_PAGE_4K 261 #define mmu_virtual_psize MMU_PAGE_4K
258 #elif defined(CONFIG_PPC_64K_PAGES) 262 #elif defined(CONFIG_PPC_64K_PAGES)
259 #define mmu_virtual_psize MMU_PAGE_64K 263 #define mmu_virtual_psize MMU_PAGE_64K
260 #else 264 #else
261 #error Unsupported page size 265 #error Unsupported page size
262 #endif 266 #endif
263 267
264 extern int mmu_linear_psize; 268 extern int mmu_linear_psize;
265 extern int mmu_vmemmap_psize; 269 extern int mmu_vmemmap_psize;
266 270
267 #ifdef CONFIG_PPC64 271 #ifdef CONFIG_PPC64
268 extern unsigned long linear_map_top; 272 extern unsigned long linear_map_top;
269 273
270 /* 274 /*
271 * 64-bit booke platforms don't load the tlb in the tlb miss handler code. 275 * 64-bit booke platforms don't load the tlb in the tlb miss handler code.
272 * HUGETLB_NEED_PRELOAD handles this - it causes huge_ptep_set_access_flags to 276 * HUGETLB_NEED_PRELOAD handles this - it causes huge_ptep_set_access_flags to
273 * return 1, indicating that the tlb requires preloading. 277 * return 1, indicating that the tlb requires preloading.
274 */ 278 */
275 #define HUGETLB_NEED_PRELOAD 279 #define HUGETLB_NEED_PRELOAD
276 #endif 280 #endif
277 281
278 #endif /* !__ASSEMBLY__ */ 282 #endif /* !__ASSEMBLY__ */
279 283
280 #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */ 284 #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
281 285
arch/powerpc/include/asm/mmu-hash64.h
1 #ifndef _ASM_POWERPC_MMU_HASH64_H_ 1 #ifndef _ASM_POWERPC_MMU_HASH64_H_
2 #define _ASM_POWERPC_MMU_HASH64_H_ 2 #define _ASM_POWERPC_MMU_HASH64_H_
3 /* 3 /*
4 * PowerPC64 memory management structures 4 * PowerPC64 memory management structures
5 * 5 *
6 * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com> 6 * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com>
7 * PPC64 rework. 7 * PPC64 rework.
8 * 8 *
9 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version. 12 * 2 of the License, or (at your option) any later version.
13 */ 13 */
14 14
15 #include <asm/asm-compat.h> 15 #include <asm/asm-compat.h>
16 #include <asm/page.h> 16 #include <asm/page.h>
17 17
18 /* 18 /*
19 * This is necessary to get the definition of PGTABLE_RANGE which we 19 * This is necessary to get the definition of PGTABLE_RANGE which we
20 * need for various slices related matters. Note that this isn't the 20 * need for various slices related matters. Note that this isn't the
21 * complete pgtable.h but only a portion of it. 21 * complete pgtable.h but only a portion of it.
22 */ 22 */
23 #include <asm/pgtable-ppc64.h> 23 #include <asm/pgtable-ppc64.h>
24 #include <asm/bug.h> 24 #include <asm/bug.h>
25 25
26 /* 26 /*
27 * Segment table 27 * Segment table
28 */ 28 */
29 29
30 #define STE_ESID_V 0x80 30 #define STE_ESID_V 0x80
31 #define STE_ESID_KS 0x20 31 #define STE_ESID_KS 0x20
32 #define STE_ESID_KP 0x10 32 #define STE_ESID_KP 0x10
33 #define STE_ESID_N 0x08 33 #define STE_ESID_N 0x08
34 34
35 #define STE_VSID_SHIFT 12 35 #define STE_VSID_SHIFT 12
36 36
37 /* Location of cpu0's segment table */ 37 /* Location of cpu0's segment table */
38 #define STAB0_PAGE 0x8 38 #define STAB0_PAGE 0x8
39 #define STAB0_OFFSET (STAB0_PAGE << 12) 39 #define STAB0_OFFSET (STAB0_PAGE << 12)
40 #define STAB0_PHYS_ADDR (STAB0_OFFSET + PHYSICAL_START) 40 #define STAB0_PHYS_ADDR (STAB0_OFFSET + PHYSICAL_START)
41 41
42 #ifndef __ASSEMBLY__ 42 #ifndef __ASSEMBLY__
43 extern char initial_stab[]; 43 extern char initial_stab[];
44 #endif /* ! __ASSEMBLY */ 44 #endif /* ! __ASSEMBLY */
45 45
46 /* 46 /*
47 * SLB 47 * SLB
48 */ 48 */
49 49
50 #define SLB_NUM_BOLTED 3 50 #define SLB_NUM_BOLTED 3
51 #define SLB_CACHE_ENTRIES 8 51 #define SLB_CACHE_ENTRIES 8
52 #define SLB_MIN_SIZE 32 52 #define SLB_MIN_SIZE 32
53 53
54 /* Bits in the SLB ESID word */ 54 /* Bits in the SLB ESID word */
55 #define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */ 55 #define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */
56 56
57 /* Bits in the SLB VSID word */ 57 /* Bits in the SLB VSID word */
58 #define SLB_VSID_SHIFT 12 58 #define SLB_VSID_SHIFT 12
59 #define SLB_VSID_SHIFT_1T 24 59 #define SLB_VSID_SHIFT_1T 24
60 #define SLB_VSID_SSIZE_SHIFT 62 60 #define SLB_VSID_SSIZE_SHIFT 62
61 #define SLB_VSID_B ASM_CONST(0xc000000000000000) 61 #define SLB_VSID_B ASM_CONST(0xc000000000000000)
62 #define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) 62 #define SLB_VSID_B_256M ASM_CONST(0x0000000000000000)
63 #define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) 63 #define SLB_VSID_B_1T ASM_CONST(0x4000000000000000)
64 #define SLB_VSID_KS ASM_CONST(0x0000000000000800) 64 #define SLB_VSID_KS ASM_CONST(0x0000000000000800)
65 #define SLB_VSID_KP ASM_CONST(0x0000000000000400) 65 #define SLB_VSID_KP ASM_CONST(0x0000000000000400)
66 #define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ 66 #define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */
67 #define SLB_VSID_L ASM_CONST(0x0000000000000100) 67 #define SLB_VSID_L ASM_CONST(0x0000000000000100)
68 #define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ 68 #define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */
69 #define SLB_VSID_LP ASM_CONST(0x0000000000000030) 69 #define SLB_VSID_LP ASM_CONST(0x0000000000000030)
70 #define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000) 70 #define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000)
71 #define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010) 71 #define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010)
72 #define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020) 72 #define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020)
73 #define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030) 73 #define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030)
74 #define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP) 74 #define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP)
75 75
76 #define SLB_VSID_KERNEL (SLB_VSID_KP) 76 #define SLB_VSID_KERNEL (SLB_VSID_KP)
77 #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) 77 #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
78 78
79 #define SLBIE_C (0x08000000) 79 #define SLBIE_C (0x08000000)
80 #define SLBIE_SSIZE_SHIFT 25 80 #define SLBIE_SSIZE_SHIFT 25
81 81
82 /* 82 /*
83 * Hash table 83 * Hash table
84 */ 84 */
85 85
86 #define HPTES_PER_GROUP 8 86 #define HPTES_PER_GROUP 8
87 87
88 #define HPTE_V_SSIZE_SHIFT 62 88 #define HPTE_V_SSIZE_SHIFT 62
89 #define HPTE_V_AVPN_SHIFT 7 89 #define HPTE_V_AVPN_SHIFT 7
90 #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) 90 #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
91 #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) 91 #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
92 #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) 92 #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
93 #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) 93 #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
94 #define HPTE_V_LOCK ASM_CONST(0x0000000000000008) 94 #define HPTE_V_LOCK ASM_CONST(0x0000000000000008)
95 #define HPTE_V_LARGE ASM_CONST(0x0000000000000004) 95 #define HPTE_V_LARGE ASM_CONST(0x0000000000000004)
96 #define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002) 96 #define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002)
97 #define HPTE_V_VALID ASM_CONST(0x0000000000000001) 97 #define HPTE_V_VALID ASM_CONST(0x0000000000000001)
98 98
99 #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) 99 #define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
100 #define HPTE_R_TS ASM_CONST(0x4000000000000000) 100 #define HPTE_R_TS ASM_CONST(0x4000000000000000)
101 #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) 101 #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
102 #define HPTE_R_RPN_SHIFT 12 102 #define HPTE_R_RPN_SHIFT 12
103 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) 103 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
104 #define HPTE_R_PP ASM_CONST(0x0000000000000003) 104 #define HPTE_R_PP ASM_CONST(0x0000000000000003)
105 #define HPTE_R_N ASM_CONST(0x0000000000000004) 105 #define HPTE_R_N ASM_CONST(0x0000000000000004)
106 #define HPTE_R_G ASM_CONST(0x0000000000000008) 106 #define HPTE_R_G ASM_CONST(0x0000000000000008)
107 #define HPTE_R_M ASM_CONST(0x0000000000000010) 107 #define HPTE_R_M ASM_CONST(0x0000000000000010)
108 #define HPTE_R_I ASM_CONST(0x0000000000000020) 108 #define HPTE_R_I ASM_CONST(0x0000000000000020)
109 #define HPTE_R_W ASM_CONST(0x0000000000000040) 109 #define HPTE_R_W ASM_CONST(0x0000000000000040)
110 #define HPTE_R_WIMG ASM_CONST(0x0000000000000078) 110 #define HPTE_R_WIMG ASM_CONST(0x0000000000000078)
111 #define HPTE_R_C ASM_CONST(0x0000000000000080) 111 #define HPTE_R_C ASM_CONST(0x0000000000000080)
112 #define HPTE_R_R ASM_CONST(0x0000000000000100) 112 #define HPTE_R_R ASM_CONST(0x0000000000000100)
113 #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) 113 #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
114 114
115 #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) 115 #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
116 #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) 116 #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
117 117
118 /* Values for PP (assumes Ks=0, Kp=1) */ 118 /* Values for PP (assumes Ks=0, Kp=1) */
119 #define PP_RWXX 0 /* Supervisor read/write, User none */ 119 #define PP_RWXX 0 /* Supervisor read/write, User none */
120 #define PP_RWRX 1 /* Supervisor read/write, User read */ 120 #define PP_RWRX 1 /* Supervisor read/write, User read */
121 #define PP_RWRW 2 /* Supervisor read/write, User read/write */ 121 #define PP_RWRW 2 /* Supervisor read/write, User read/write */
122 #define PP_RXRX 3 /* Supervisor read, User read */ 122 #define PP_RXRX 3 /* Supervisor read, User read */
123 #define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */ 123 #define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
124 124
125 /* Fields for tlbiel instruction in architecture 2.06 */ 125 /* Fields for tlbiel instruction in architecture 2.06 */
126 #define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */ 126 #define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
127 #define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */ 127 #define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
128 #define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */ 128 #define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
129 #define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */ 129 #define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
130 #define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */ 130 #define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
131 #define TLBIEL_INVAL_SET_SHIFT 12 131 #define TLBIEL_INVAL_SET_SHIFT 12
132 132
133 #define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ 133 #define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
134 134
135 #ifndef __ASSEMBLY__ 135 #ifndef __ASSEMBLY__
136 136
137 struct hash_pte { 137 struct hash_pte {
138 unsigned long v; 138 unsigned long v;
139 unsigned long r; 139 unsigned long r;
140 }; 140 };
141 141
142 extern struct hash_pte *htab_address; 142 extern struct hash_pte *htab_address;
143 extern unsigned long htab_size_bytes; 143 extern unsigned long htab_size_bytes;
144 extern unsigned long htab_hash_mask; 144 extern unsigned long htab_hash_mask;
145 145
146 /* 146 /*
147 * Page size definition 147 * Page size definition
148 * 148 *
149 * shift : is the "PAGE_SHIFT" value for that page size 149 * shift : is the "PAGE_SHIFT" value for that page size
150 * sllp : is a bit mask with the value of SLB L || LP to be or'ed 150 * sllp : is a bit mask with the value of SLB L || LP to be or'ed
151 * directly to a slbmte "vsid" value 151 * directly to a slbmte "vsid" value
152 * penc : is the HPTE encoding mask for the "LP" field: 152 * penc : is the HPTE encoding mask for the "LP" field:
153 * 153 *
154 */ 154 */
155 struct mmu_psize_def 155 struct mmu_psize_def
156 { 156 {
157 unsigned int shift; /* number of bits */ 157 unsigned int shift; /* number of bits */
158 unsigned int penc; /* HPTE encoding */ 158 unsigned int penc; /* HPTE encoding */
159 unsigned int tlbiel; /* tlbiel supported for that page size */ 159 unsigned int tlbiel; /* tlbiel supported for that page size */
160 unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ 160 unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
161 unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ 161 unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
162 }; 162 };
163 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 163 extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
164 164
165 static inline int shift_to_mmu_psize(unsigned int shift) 165 static inline int shift_to_mmu_psize(unsigned int shift)
166 { 166 {
167 int psize; 167 int psize;
168 168
169 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) 169 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
170 if (mmu_psize_defs[psize].shift == shift) 170 if (mmu_psize_defs[psize].shift == shift)
171 return psize; 171 return psize;
172 return -1; 172 return -1;
173 } 173 }
174 174
175 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) 175 static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
176 { 176 {
177 if (mmu_psize_defs[mmu_psize].shift) 177 if (mmu_psize_defs[mmu_psize].shift)
178 return mmu_psize_defs[mmu_psize].shift; 178 return mmu_psize_defs[mmu_psize].shift;
179 BUG(); 179 BUG();
180 } 180 }
181 181
182 #endif /* __ASSEMBLY__ */ 182 #endif /* __ASSEMBLY__ */
183 183
184 /* 184 /*
185 * Segment sizes. 185 * Segment sizes.
186 * These are the values used by hardware in the B field of 186 * These are the values used by hardware in the B field of
187 * SLB entries and the first dword of MMU hashtable entries. 187 * SLB entries and the first dword of MMU hashtable entries.
188 * The B field is 2 bits; the values 2 and 3 are unused and reserved. 188 * The B field is 2 bits; the values 2 and 3 are unused and reserved.
189 */ 189 */
190 #define MMU_SEGSIZE_256M 0 190 #define MMU_SEGSIZE_256M 0
191 #define MMU_SEGSIZE_1T 1 191 #define MMU_SEGSIZE_1T 1
192 192
193 /* 193 /*
194 * encode page number shift. 194 * encode page number shift.
195 * in order to fit the 78 bit va in a 64 bit variable we shift the va by 195 * in order to fit the 78 bit va in a 64 bit variable we shift the va by
196 * 12 bits. This enable us to address upto 76 bit va. 196 * 12 bits. This enable us to address upto 76 bit va.
197 * For hpt hash from a va we can ignore the page size bits of va and for 197 * For hpt hash from a va we can ignore the page size bits of va and for
198 * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure 198 * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure
199 * we work in all cases including 4k page size. 199 * we work in all cases including 4k page size.
200 */ 200 */
201 #define VPN_SHIFT 12 201 #define VPN_SHIFT 12
202 202
203 #ifndef __ASSEMBLY__ 203 #ifndef __ASSEMBLY__
204 204
205 static inline int segment_shift(int ssize) 205 static inline int segment_shift(int ssize)
206 { 206 {
207 if (ssize == MMU_SEGSIZE_256M) 207 if (ssize == MMU_SEGSIZE_256M)
208 return SID_SHIFT; 208 return SID_SHIFT;
209 return SID_SHIFT_1T; 209 return SID_SHIFT_1T;
210 } 210 }
211 211
212 /* 212 /*
213 * The current system page and segment sizes 213 * The current system page and segment sizes
214 */ 214 */
215 extern int mmu_linear_psize; 215 extern int mmu_linear_psize;
216 extern int mmu_virtual_psize; 216 extern int mmu_virtual_psize;
217 extern int mmu_vmalloc_psize; 217 extern int mmu_vmalloc_psize;
218 extern int mmu_vmemmap_psize; 218 extern int mmu_vmemmap_psize;
219 extern int mmu_io_psize; 219 extern int mmu_io_psize;
220 extern int mmu_kernel_ssize; 220 extern int mmu_kernel_ssize;
221 extern int mmu_highuser_ssize; 221 extern int mmu_highuser_ssize;
222 extern u16 mmu_slb_size; 222 extern u16 mmu_slb_size;
223 extern unsigned long tce_alloc_start, tce_alloc_end; 223 extern unsigned long tce_alloc_start, tce_alloc_end;
224 224
225 /* 225 /*
226 * If the processor supports 64k normal pages but not 64k cache 226 * If the processor supports 64k normal pages but not 64k cache
227 * inhibited pages, we have to be prepared to switch processes 227 * inhibited pages, we have to be prepared to switch processes
228 * to use 4k pages when they create cache-inhibited mappings. 228 * to use 4k pages when they create cache-inhibited mappings.
229 * If this is the case, mmu_ci_restrictions will be set to 1. 229 * If this is the case, mmu_ci_restrictions will be set to 1.
230 */ 230 */
231 extern int mmu_ci_restrictions; 231 extern int mmu_ci_restrictions;
232 232
233 /* 233 /*
234 * This computes the AVPN and B fields of the first dword of a HPTE, 234 * This computes the AVPN and B fields of the first dword of a HPTE,
235 * for use when we want to match an existing PTE. The bottom 7 bits 235 * for use when we want to match an existing PTE. The bottom 7 bits
236 * of the returned value are zero. 236 * of the returned value are zero.
237 */ 237 */
238 static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, 238 static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
239 int ssize) 239 int ssize)
240 { 240 {
241 unsigned long v; 241 unsigned long v;
242 /* 242 /*
243 * The AVA field omits the low-order 23 bits of the 78 bits VA. 243 * The AVA field omits the low-order 23 bits of the 78 bits VA.
244 * These bits are not needed in the PTE, because the 244 * These bits are not needed in the PTE, because the
245 * low-order b of these bits are part of the byte offset 245 * low-order b of these bits are part of the byte offset
246 * into the virtual page and, if b < 23, the high-order 246 * into the virtual page and, if b < 23, the high-order
247 * 23-b of these bits are always used in selecting the 247 * 23-b of these bits are always used in selecting the
248 * PTEGs to be searched 248 * PTEGs to be searched
249 */ 249 */
250 v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); 250 v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
251 v <<= HPTE_V_AVPN_SHIFT; 251 v <<= HPTE_V_AVPN_SHIFT;
252 v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; 252 v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
253 return v; 253 return v;
254 } 254 }
255 255
256 /* 256 /*
257 * This function sets the AVPN and L fields of the HPTE appropriately 257 * This function sets the AVPN and L fields of the HPTE appropriately
258 * for the page size 258 * for the page size
259 */ 259 */
260 static inline unsigned long hpte_encode_v(unsigned long vpn, 260 static inline unsigned long hpte_encode_v(unsigned long vpn,
261 int psize, int ssize) 261 int psize, int ssize)
262 { 262 {
263 unsigned long v; 263 unsigned long v;
264 v = hpte_encode_avpn(vpn, psize, ssize); 264 v = hpte_encode_avpn(vpn, psize, ssize);
265 if (psize != MMU_PAGE_4K) 265 if (psize != MMU_PAGE_4K)
266 v |= HPTE_V_LARGE; 266 v |= HPTE_V_LARGE;
267 return v; 267 return v;
268 } 268 }
269 269
270 /* 270 /*
271 * This function sets the ARPN, and LP fields of the HPTE appropriately 271 * This function sets the ARPN, and LP fields of the HPTE appropriately
272 * for the page size. We assume the pa is already "clean" that is properly 272 * for the page size. We assume the pa is already "clean" that is properly
273 * aligned for the requested page size 273 * aligned for the requested page size
274 */ 274 */
275 static inline unsigned long hpte_encode_r(unsigned long pa, int psize) 275 static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
276 { 276 {
277 unsigned long r; 277 unsigned long r;
278 278
279 /* A 4K page needs no special encoding */ 279 /* A 4K page needs no special encoding */
280 if (psize == MMU_PAGE_4K) 280 if (psize == MMU_PAGE_4K)
281 return pa & HPTE_R_RPN; 281 return pa & HPTE_R_RPN;
282 else { 282 else {
283 unsigned int penc = mmu_psize_defs[psize].penc; 283 unsigned int penc = mmu_psize_defs[psize].penc;
284 unsigned int shift = mmu_psize_defs[psize].shift; 284 unsigned int shift = mmu_psize_defs[psize].shift;
285 return (pa & ~((1ul << shift) - 1)) | (penc << 12); 285 return (pa & ~((1ul << shift) - 1)) | (penc << 12);
286 } 286 }
287 return r; 287 return r;
288 } 288 }
289 289
290 /* 290 /*
291 * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size. 291 * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
292 */ 292 */
293 static inline unsigned long hpt_vpn(unsigned long ea, 293 static inline unsigned long hpt_vpn(unsigned long ea,
294 unsigned long vsid, int ssize) 294 unsigned long vsid, int ssize)
295 { 295 {
296 unsigned long mask; 296 unsigned long mask;
297 int s_shift = segment_shift(ssize); 297 int s_shift = segment_shift(ssize);
298 298
299 mask = (1ul << (s_shift - VPN_SHIFT)) - 1; 299 mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
300 return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask); 300 return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
301 } 301 }
302 302
303 /* 303 /*
304 * This hashes a virtual address 304 * This hashes a virtual address
305 */ 305 */
306 static inline unsigned long hpt_hash(unsigned long vpn, 306 static inline unsigned long hpt_hash(unsigned long vpn,
307 unsigned int shift, int ssize) 307 unsigned int shift, int ssize)
308 { 308 {
309 int mask; 309 int mask;
310 unsigned long hash, vsid; 310 unsigned long hash, vsid;
311 311
312 /* VPN_SHIFT can be atmost 12 */ 312 /* VPN_SHIFT can be atmost 12 */
313 if (ssize == MMU_SEGSIZE_256M) { 313 if (ssize == MMU_SEGSIZE_256M) {
314 mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1; 314 mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
315 hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^ 315 hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^
316 ((vpn & mask) >> (shift - VPN_SHIFT)); 316 ((vpn & mask) >> (shift - VPN_SHIFT));
317 } else { 317 } else {
318 mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1; 318 mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
319 vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT); 319 vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT);
320 hash = vsid ^ (vsid << 25) ^ 320 hash = vsid ^ (vsid << 25) ^
321 ((vpn & mask) >> (shift - VPN_SHIFT)) ; 321 ((vpn & mask) >> (shift - VPN_SHIFT)) ;
322 } 322 }
323 return hash & 0x7fffffffffUL; 323 return hash & 0x7fffffffffUL;
324 } 324 }
325 325
326 extern int __hash_page_4K(unsigned long ea, unsigned long access, 326 extern int __hash_page_4K(unsigned long ea, unsigned long access,
327 unsigned long vsid, pte_t *ptep, unsigned long trap, 327 unsigned long vsid, pte_t *ptep, unsigned long trap,
328 unsigned int local, int ssize, int subpage_prot); 328 unsigned int local, int ssize, int subpage_prot);
329 extern int __hash_page_64K(unsigned long ea, unsigned long access, 329 extern int __hash_page_64K(unsigned long ea, unsigned long access,
330 unsigned long vsid, pte_t *ptep, unsigned long trap, 330 unsigned long vsid, pte_t *ptep, unsigned long trap,
331 unsigned int local, int ssize); 331 unsigned int local, int ssize);
332 struct mm_struct; 332 struct mm_struct;
333 unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); 333 unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap);
334 extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); 334 extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
335 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, 335 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
336 pte_t *ptep, unsigned long trap, int local, int ssize, 336 pte_t *ptep, unsigned long trap, int local, int ssize,
337 unsigned int shift, unsigned int mmu_psize); 337 unsigned int shift, unsigned int mmu_psize);
338 extern void hash_failure_debug(unsigned long ea, unsigned long access, 338 extern void hash_failure_debug(unsigned long ea, unsigned long access,
339 unsigned long vsid, unsigned long trap, 339 unsigned long vsid, unsigned long trap,
340 int ssize, int psize, unsigned long pte); 340 int ssize, int psize, unsigned long pte);
341 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, 341 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
342 unsigned long pstart, unsigned long prot, 342 unsigned long pstart, unsigned long prot,
343 int psize, int ssize); 343 int psize, int ssize);
344 extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); 344 extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
345 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); 345 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
346 346
347 extern void hpte_init_native(void); 347 extern void hpte_init_native(void);
348 extern void hpte_init_lpar(void); 348 extern void hpte_init_lpar(void);
349 extern void hpte_init_beat(void); 349 extern void hpte_init_beat(void);
350 extern void hpte_init_beat_v3(void); 350 extern void hpte_init_beat_v3(void);
351 351
352 extern void stabs_alloc(void); 352 extern void stabs_alloc(void);
353 extern void slb_initialize(void); 353 extern void slb_initialize(void);
354 extern void slb_flush_and_rebolt(void); 354 extern void slb_flush_and_rebolt(void);
355 extern void stab_initialize(unsigned long stab); 355 extern void stab_initialize(unsigned long stab);
356 356
357 extern void slb_vmalloc_update(void); 357 extern void slb_vmalloc_update(void);
358 extern void slb_set_size(u16 size); 358 extern void slb_set_size(u16 size);
359 #endif /* __ASSEMBLY__ */ 359 #endif /* __ASSEMBLY__ */
360 360
361 /* 361 /*
362 * VSID allocation (256MB segment) 362 * VSID allocation (256MB segment)
363 * 363 *
364 * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated 364 * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
365 * from mmu context id and effective segment id of the address. 365 * from mmu context id and effective segment id of the address.
366 * 366 *
367 * For user processes max context id is limited to ((1ul << 19) - 5) 367 * For user processes max context id is limited to ((1ul << 19) - 5)
368 * for kernel space, we use the top 4 context ids to map address as below 368 * for kernel space, we use the top 4 context ids to map address as below
369 * NOTE: each context only support 64TB now. 369 * NOTE: each context only support 64TB now.
370 * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] 370 * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
371 * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] 371 * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
372 * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] 372 * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
373 * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] 373 * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
374 * 374 *
375 * The proto-VSIDs are then scrambled into real VSIDs with the 375 * The proto-VSIDs are then scrambled into real VSIDs with the
376 * multiplicative hash: 376 * multiplicative hash:
377 * 377 *
378 * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS 378 * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
379 * 379 *
380 * VSID_MULTIPLIER is prime, so in particular it is 380 * VSID_MULTIPLIER is prime, so in particular it is
381 * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. 381 * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
382 * Because the modulus is 2^n-1 we can compute it efficiently without 382 * Because the modulus is 2^n-1 we can compute it efficiently without
383 * a divide or extra multiply (see below). The scramble function gives 383 * a divide or extra multiply (see below). The scramble function gives
384 * robust scattering in the hash table (at least based on some initial 384 * robust scattering in the hash table (at least based on some initial
385 * results). 385 * results).
386 * 386 *
387 * We also consider VSID 0 special. We use VSID 0 for slb entries mapping 387 * We also consider VSID 0 special. We use VSID 0 for slb entries mapping
388 * bad address. This enables us to consolidate bad address handling in 388 * bad address. This enables us to consolidate bad address handling in
389 * hash_page. 389 * hash_page.
390 * 390 *
391 * We also need to avoid the last segment of the last context, because that 391 * We also need to avoid the last segment of the last context, because that
392 * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 392 * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
393 * because of the modulo operation in vsid scramble. But the vmemmap 393 * because of the modulo operation in vsid scramble. But the vmemmap
394 * (which is what uses region 0xf) will never be close to 64TB in size 394 * (which is what uses region 0xf) will never be close to 64TB in size
395 * (it's 56 bytes per page of system memory). 395 * (it's 56 bytes per page of system memory).
396 */ 396 */
397 397
398 #define CONTEXT_BITS 19 398 #define CONTEXT_BITS 19
399 #define ESID_BITS 18 399 #define ESID_BITS 18
400 #define ESID_BITS_1T 6 400 #define ESID_BITS_1T 6
401 401
402 /* 402 /*
403 * 256MB segment 403 * 256MB segment
404 * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments 404 * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
405 * available for user + kernel mapping. The top 4 contexts are used for 405 * available for user + kernel mapping. The top 4 contexts are used for
406 * kernel mapping. Each segment contains 2^28 bytes. Each 406 * kernel mapping. Each segment contains 2^28 bytes. Each
407 * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts 407 * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
408 * (19 == 37 + 28 - 46). 408 * (19 == 37 + 28 - 46).
409 */ 409 */
410 #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5) 410 #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5)
411 411
412 /* 412 /*
413 * This should be computed such that protovosid * vsid_mulitplier 413 * This should be computed such that protovosid * vsid_mulitplier
414 * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus 414 * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
415 */ 415 */
416 #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ 416 #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
417 #define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS) 417 #define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS)
418 #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) 418 #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1)
419 419
420 #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ 420 #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
421 #define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T) 421 #define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T)
422 #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) 422 #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
423 423
424 424
425 #define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT)) 425 #define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT))
426 426
427 /* 427 /*
428 * This macro generates asm code to compute the VSID scramble 428 * This macro generates asm code to compute the VSID scramble
429 * function. Used in slb_allocate() and do_stab_bolted. The function 429 * function. Used in slb_allocate() and do_stab_bolted. The function
430 * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS 430 * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
431 * 431 *
432 * rt = register continaing the proto-VSID and into which the 432 * rt = register continaing the proto-VSID and into which the
433 * VSID will be stored 433 * VSID will be stored
434 * rx = scratch register (clobbered) 434 * rx = scratch register (clobbered)
435 * 435 *
436 * - rt and rx must be different registers 436 * - rt and rx must be different registers
437 * - The answer will end up in the low VSID_BITS bits of rt. The higher 437 * - The answer will end up in the low VSID_BITS bits of rt. The higher
438 * bits may contain other garbage, so you may need to mask the 438 * bits may contain other garbage, so you may need to mask the
439 * result. 439 * result.
440 */ 440 */
441 #define ASM_VSID_SCRAMBLE(rt, rx, size) \ 441 #define ASM_VSID_SCRAMBLE(rt, rx, size) \
442 lis rx,VSID_MULTIPLIER_##size@h; \ 442 lis rx,VSID_MULTIPLIER_##size@h; \
443 ori rx,rx,VSID_MULTIPLIER_##size@l; \ 443 ori rx,rx,VSID_MULTIPLIER_##size@l; \
444 mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ 444 mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
445 \ 445 \
446 srdi rx,rt,VSID_BITS_##size; \ 446 srdi rx,rt,VSID_BITS_##size; \
447 clrldi rt,rt,(64-VSID_BITS_##size); \ 447 clrldi rt,rt,(64-VSID_BITS_##size); \
448 add rt,rt,rx; /* add high and low bits */ \ 448 add rt,rt,rx; /* add high and low bits */ \
449 /* NOTE: explanation based on VSID_BITS_##size = 36 \ 449 /* NOTE: explanation based on VSID_BITS_##size = 36 \
450 * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ 450 * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
451 * 2^36-1+2^28-1. That in particular means that if r3 >= \ 451 * 2^36-1+2^28-1. That in particular means that if r3 >= \
452 * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ 452 * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
453 * the bit clear, r3 already has the answer we want, if it \ 453 * the bit clear, r3 already has the answer we want, if it \
454 * doesn't, the answer is the low 36 bits of r3+1. So in all \ 454 * doesn't, the answer is the low 36 bits of r3+1. So in all \
455 * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ 455 * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
456 addi rx,rt,1; \ 456 addi rx,rt,1; \
457 srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ 457 srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
458 add rt,rt,rx 458 add rt,rt,rx
459 459
460 /* 4 bits per slice and we have one slice per 1TB */ 460 /* 4 bits per slice and we have one slice per 1TB */
461 #define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) 461 #define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41)
462 462
463 #ifndef __ASSEMBLY__ 463 #ifndef __ASSEMBLY__
464 464
465 #ifdef CONFIG_PPC_SUBPAGE_PROT 465 #ifdef CONFIG_PPC_SUBPAGE_PROT
466 /* 466 /*
467 * For the sub-page protection option, we extend the PGD with one of 467 * For the sub-page protection option, we extend the PGD with one of
468 * these. Basically we have a 3-level tree, with the top level being 468 * these. Basically we have a 3-level tree, with the top level being
469 * the protptrs array. To optimize speed and memory consumption when 469 * the protptrs array. To optimize speed and memory consumption when
470 * only addresses < 4GB are being protected, pointers to the first 470 * only addresses < 4GB are being protected, pointers to the first
471 * four pages of sub-page protection words are stored in the low_prot 471 * four pages of sub-page protection words are stored in the low_prot
472 * array. 472 * array.
473 * Each page of sub-page protection words protects 1GB (4 bytes 473 * Each page of sub-page protection words protects 1GB (4 bytes
474 * protects 64k). For the 3-level tree, each page of pointers then 474 * protects 64k). For the 3-level tree, each page of pointers then
475 * protects 8TB. 475 * protects 8TB.
476 */ 476 */
477 struct subpage_prot_table { 477 struct subpage_prot_table {
478 unsigned long maxaddr; /* only addresses < this are protected */ 478 unsigned long maxaddr; /* only addresses < this are protected */
479 unsigned int **protptrs[2]; 479 unsigned int **protptrs[2];
480 unsigned int *low_prot[4]; 480 unsigned int *low_prot[4];
481 }; 481 };
482 482
483 #define SBP_L1_BITS (PAGE_SHIFT - 2) 483 #define SBP_L1_BITS (PAGE_SHIFT - 2)
484 #define SBP_L2_BITS (PAGE_SHIFT - 3) 484 #define SBP_L2_BITS (PAGE_SHIFT - 3)
485 #define SBP_L1_COUNT (1 << SBP_L1_BITS) 485 #define SBP_L1_COUNT (1 << SBP_L1_BITS)
486 #define SBP_L2_COUNT (1 << SBP_L2_BITS) 486 #define SBP_L2_COUNT (1 << SBP_L2_BITS)
487 #define SBP_L2_SHIFT (PAGE_SHIFT + SBP_L1_BITS) 487 #define SBP_L2_SHIFT (PAGE_SHIFT + SBP_L1_BITS)
488 #define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS) 488 #define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
489 489
490 extern void subpage_prot_free(struct mm_struct *mm); 490 extern void subpage_prot_free(struct mm_struct *mm);
491 extern void subpage_prot_init_new_context(struct mm_struct *mm); 491 extern void subpage_prot_init_new_context(struct mm_struct *mm);
492 #else 492 #else
493 static inline void subpage_prot_free(struct mm_struct *mm) {} 493 static inline void subpage_prot_free(struct mm_struct *mm) {}
494 static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } 494 static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
495 #endif /* CONFIG_PPC_SUBPAGE_PROT */ 495 #endif /* CONFIG_PPC_SUBPAGE_PROT */
496 496
497 typedef unsigned long mm_context_id_t; 497 typedef unsigned long mm_context_id_t;
498 struct spinlock; 498 struct spinlock;
499 499
500 typedef struct { 500 typedef struct {
501 mm_context_id_t id; 501 mm_context_id_t id;
502 u16 user_psize; /* page size index */ 502 u16 user_psize; /* page size index */
503 503
504 #ifdef CONFIG_PPC_MM_SLICES 504 #ifdef CONFIG_PPC_MM_SLICES
505 u64 low_slices_psize; /* SLB page size encodings */ 505 u64 low_slices_psize; /* SLB page size encodings */
506 unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; 506 unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
507 #else 507 #else
508 u16 sllp; /* SLB page size encoding */ 508 u16 sllp; /* SLB page size encoding */
509 #endif 509 #endif
510 unsigned long vdso_base; 510 unsigned long vdso_base;
511 #ifdef CONFIG_PPC_SUBPAGE_PROT 511 #ifdef CONFIG_PPC_SUBPAGE_PROT
512 struct subpage_prot_table spt; 512 struct subpage_prot_table spt;
513 #endif /* CONFIG_PPC_SUBPAGE_PROT */ 513 #endif /* CONFIG_PPC_SUBPAGE_PROT */
514 #ifdef CONFIG_PPC_ICSWX 514 #ifdef CONFIG_PPC_ICSWX
515 struct spinlock *cop_lockp; /* guard acop and cop_pid */ 515 struct spinlock *cop_lockp; /* guard acop and cop_pid */
516 unsigned long acop; /* mask of enabled coprocessor types */ 516 unsigned long acop; /* mask of enabled coprocessor types */
517 unsigned int cop_pid; /* pid value used with coprocessors */ 517 unsigned int cop_pid; /* pid value used with coprocessors */
518 #endif /* CONFIG_PPC_ICSWX */ 518 #endif /* CONFIG_PPC_ICSWX */
519 #ifdef CONFIG_PPC_64K_PAGES
520 /* for 4K PTE fragment support */
521 void *pte_frag;
522 #endif
519 } mm_context_t; 523 } mm_context_t;
520 524
521 525
522 #if 0 526 #if 0
523 /* 527 /*
524 * The code below is equivalent to this function for arguments 528 * The code below is equivalent to this function for arguments
525 * < 2^VSID_BITS, which is all this should ever be called 529 * < 2^VSID_BITS, which is all this should ever be called
526 * with. However gcc is not clever enough to compute the 530 * with. However gcc is not clever enough to compute the
527 * modulus (2^n-1) without a second multiply. 531 * modulus (2^n-1) without a second multiply.
528 */ 532 */
529 #define vsid_scramble(protovsid, size) \ 533 #define vsid_scramble(protovsid, size) \
530 ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size)) 534 ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
531 535
532 #else /* 1 */ 536 #else /* 1 */
533 #define vsid_scramble(protovsid, size) \ 537 #define vsid_scramble(protovsid, size) \
534 ({ \ 538 ({ \
535 unsigned long x; \ 539 unsigned long x; \
536 x = (protovsid) * VSID_MULTIPLIER_##size; \ 540 x = (protovsid) * VSID_MULTIPLIER_##size; \
537 x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \ 541 x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
538 (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \ 542 (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
539 }) 543 })
540 #endif /* 1 */ 544 #endif /* 1 */
541 545
542 /* Returns the segment size indicator for a user address */ 546 /* Returns the segment size indicator for a user address */
543 static inline int user_segment_size(unsigned long addr) 547 static inline int user_segment_size(unsigned long addr)
544 { 548 {
545 /* Use 1T segments if possible for addresses >= 1T */ 549 /* Use 1T segments if possible for addresses >= 1T */
546 if (addr >= (1UL << SID_SHIFT_1T)) 550 if (addr >= (1UL << SID_SHIFT_1T))
547 return mmu_highuser_ssize; 551 return mmu_highuser_ssize;
548 return MMU_SEGSIZE_256M; 552 return MMU_SEGSIZE_256M;
549 } 553 }
550 554
551 static inline unsigned long get_vsid(unsigned long context, unsigned long ea, 555 static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
552 int ssize) 556 int ssize)
553 { 557 {
554 /* 558 /*
555 * Bad address. We return VSID 0 for that 559 * Bad address. We return VSID 0 for that
556 */ 560 */
557 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) 561 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
558 return 0; 562 return 0;
559 563
560 if (ssize == MMU_SEGSIZE_256M) 564 if (ssize == MMU_SEGSIZE_256M)
561 return vsid_scramble((context << ESID_BITS) 565 return vsid_scramble((context << ESID_BITS)
562 | (ea >> SID_SHIFT), 256M); 566 | (ea >> SID_SHIFT), 256M);
563 return vsid_scramble((context << ESID_BITS_1T) 567 return vsid_scramble((context << ESID_BITS_1T)
564 | (ea >> SID_SHIFT_1T), 1T); 568 | (ea >> SID_SHIFT_1T), 1T);
565 } 569 }
566 570
567 /* 571 /*
568 * This is only valid for addresses >= PAGE_OFFSET 572 * This is only valid for addresses >= PAGE_OFFSET
569 * 573 *
570 * For kernel space, we use the top 4 context ids to map address as below 574 * For kernel space, we use the top 4 context ids to map address as below
571 * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] 575 * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
572 * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] 576 * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
573 * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] 577 * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
574 * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] 578 * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
575 */ 579 */
576 static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) 580 static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
577 { 581 {
578 unsigned long context; 582 unsigned long context;
579 583
580 /* 584 /*
581 * kernel take the top 4 context from the available range 585 * kernel take the top 4 context from the available range
582 */ 586 */
583 context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; 587 context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
584 return get_vsid(context, ea, ssize); 588 return get_vsid(context, ea, ssize);
585 } 589 }
586 #endif /* __ASSEMBLY__ */ 590 #endif /* __ASSEMBLY__ */
587 591
588 #endif /* _ASM_POWERPC_MMU_HASH64_H_ */ 592 #endif /* _ASM_POWERPC_MMU_HASH64_H_ */
589 593
arch/powerpc/include/asm/page.h
1 #ifndef _ASM_POWERPC_PAGE_H 1 #ifndef _ASM_POWERPC_PAGE_H
2 #define _ASM_POWERPC_PAGE_H 2 #define _ASM_POWERPC_PAGE_H
3 3
4 /* 4 /*
5 * Copyright (C) 2001,2005 IBM Corporation. 5 * Copyright (C) 2001,2005 IBM Corporation.
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 */ 11 */
12 12
13 #ifndef __ASSEMBLY__ 13 #ifndef __ASSEMBLY__
14 #include <linux/types.h> 14 #include <linux/types.h>
15 #else 15 #else
16 #include <asm/types.h> 16 #include <asm/types.h>
17 #endif 17 #endif
18 #include <asm/asm-compat.h> 18 #include <asm/asm-compat.h>
19 #include <asm/kdump.h> 19 #include <asm/kdump.h>
20 20
21 /* 21 /*
22 * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages 22 * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
23 * on PPC44x). For PPC64 we support either 4K or 64K software 23 * on PPC44x). For PPC64 we support either 4K or 64K software
24 * page size. When using 64K pages however, whether we are really supporting 24 * page size. When using 64K pages however, whether we are really supporting
25 * 64K pages in HW or not is irrelevant to those definitions. 25 * 64K pages in HW or not is irrelevant to those definitions.
26 */ 26 */
27 #if defined(CONFIG_PPC_256K_PAGES) 27 #if defined(CONFIG_PPC_256K_PAGES)
28 #define PAGE_SHIFT 18 28 #define PAGE_SHIFT 18
29 #elif defined(CONFIG_PPC_64K_PAGES) 29 #elif defined(CONFIG_PPC_64K_PAGES)
30 #define PAGE_SHIFT 16 30 #define PAGE_SHIFT 16
31 #elif defined(CONFIG_PPC_16K_PAGES) 31 #elif defined(CONFIG_PPC_16K_PAGES)
32 #define PAGE_SHIFT 14 32 #define PAGE_SHIFT 14
33 #else 33 #else
34 #define PAGE_SHIFT 12 34 #define PAGE_SHIFT 12
35 #endif 35 #endif
36 36
37 #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) 37 #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
38 38
39 #ifndef __ASSEMBLY__ 39 #ifndef __ASSEMBLY__
40 #ifdef CONFIG_HUGETLB_PAGE 40 #ifdef CONFIG_HUGETLB_PAGE
41 extern unsigned int HPAGE_SHIFT; 41 extern unsigned int HPAGE_SHIFT;
42 #else 42 #else
43 #define HPAGE_SHIFT PAGE_SHIFT 43 #define HPAGE_SHIFT PAGE_SHIFT
44 #endif 44 #endif
45 #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 45 #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
46 #define HPAGE_MASK (~(HPAGE_SIZE - 1)) 46 #define HPAGE_MASK (~(HPAGE_SIZE - 1))
47 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 47 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
48 #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) 48 #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
49 #endif 49 #endif
50 50
51 /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ 51 /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
52 #define __HAVE_ARCH_GATE_AREA 1 52 #define __HAVE_ARCH_GATE_AREA 1
53 53
54 /* 54 /*
55 * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we 55 * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
56 * assign PAGE_MASK to a larger type it gets extended the way we want 56 * assign PAGE_MASK to a larger type it gets extended the way we want
57 * (i.e. with 1s in the high bits) 57 * (i.e. with 1s in the high bits)
58 */ 58 */
59 #define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) 59 #define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
60 60
61 /* 61 /*
62 * KERNELBASE is the virtual address of the start of the kernel, it's often 62 * KERNELBASE is the virtual address of the start of the kernel, it's often
63 * the same as PAGE_OFFSET, but _might not be_. 63 * the same as PAGE_OFFSET, but _might not be_.
64 * 64 *
65 * The kdump dump kernel is one example where KERNELBASE != PAGE_OFFSET. 65 * The kdump dump kernel is one example where KERNELBASE != PAGE_OFFSET.
66 * 66 *
67 * PAGE_OFFSET is the virtual address of the start of lowmem. 67 * PAGE_OFFSET is the virtual address of the start of lowmem.
68 * 68 *
69 * PHYSICAL_START is the physical address of the start of the kernel. 69 * PHYSICAL_START is the physical address of the start of the kernel.
70 * 70 *
71 * MEMORY_START is the physical address of the start of lowmem. 71 * MEMORY_START is the physical address of the start of lowmem.
72 * 72 *
73 * KERNELBASE, PAGE_OFFSET, and PHYSICAL_START are all configurable on 73 * KERNELBASE, PAGE_OFFSET, and PHYSICAL_START are all configurable on
74 * ppc32 and based on how they are set we determine MEMORY_START. 74 * ppc32 and based on how they are set we determine MEMORY_START.
75 * 75 *
76 * For the linear mapping the following equation should be true: 76 * For the linear mapping the following equation should be true:
77 * KERNELBASE - PAGE_OFFSET = PHYSICAL_START - MEMORY_START 77 * KERNELBASE - PAGE_OFFSET = PHYSICAL_START - MEMORY_START
78 * 78 *
79 * Also, KERNELBASE >= PAGE_OFFSET and PHYSICAL_START >= MEMORY_START 79 * Also, KERNELBASE >= PAGE_OFFSET and PHYSICAL_START >= MEMORY_START
80 * 80 *
81 * There are two was to determine a physical address from a virtual one: 81 * There are two was to determine a physical address from a virtual one:
82 * va = pa + PAGE_OFFSET - MEMORY_START 82 * va = pa + PAGE_OFFSET - MEMORY_START
83 * va = pa + KERNELBASE - PHYSICAL_START 83 * va = pa + KERNELBASE - PHYSICAL_START
84 * 84 *
85 * If you want to know something's offset from the start of the kernel you 85 * If you want to know something's offset from the start of the kernel you
86 * should subtract KERNELBASE. 86 * should subtract KERNELBASE.
87 * 87 *
88 * If you want to test if something's a kernel address, use is_kernel_addr(). 88 * If you want to test if something's a kernel address, use is_kernel_addr().
89 */ 89 */
90 90
91 #define KERNELBASE ASM_CONST(CONFIG_KERNEL_START) 91 #define KERNELBASE ASM_CONST(CONFIG_KERNEL_START)
92 #define PAGE_OFFSET ASM_CONST(CONFIG_PAGE_OFFSET) 92 #define PAGE_OFFSET ASM_CONST(CONFIG_PAGE_OFFSET)
93 #define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START)) 93 #define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START))
94 94
95 #if defined(CONFIG_NONSTATIC_KERNEL) 95 #if defined(CONFIG_NONSTATIC_KERNEL)
96 #ifndef __ASSEMBLY__ 96 #ifndef __ASSEMBLY__
97 97
98 extern phys_addr_t memstart_addr; 98 extern phys_addr_t memstart_addr;
99 extern phys_addr_t kernstart_addr; 99 extern phys_addr_t kernstart_addr;
100 100
101 #ifdef CONFIG_RELOCATABLE_PPC32 101 #ifdef CONFIG_RELOCATABLE_PPC32
102 extern long long virt_phys_offset; 102 extern long long virt_phys_offset;
103 #endif 103 #endif
104 104
105 #endif /* __ASSEMBLY__ */ 105 #endif /* __ASSEMBLY__ */
106 #define PHYSICAL_START kernstart_addr 106 #define PHYSICAL_START kernstart_addr
107 107
108 #else /* !CONFIG_NONSTATIC_KERNEL */ 108 #else /* !CONFIG_NONSTATIC_KERNEL */
109 #define PHYSICAL_START ASM_CONST(CONFIG_PHYSICAL_START) 109 #define PHYSICAL_START ASM_CONST(CONFIG_PHYSICAL_START)
110 #endif 110 #endif
111 111
112 /* See Description below for VIRT_PHYS_OFFSET */ 112 /* See Description below for VIRT_PHYS_OFFSET */
113 #ifdef CONFIG_RELOCATABLE_PPC32 113 #ifdef CONFIG_RELOCATABLE_PPC32
114 #define VIRT_PHYS_OFFSET virt_phys_offset 114 #define VIRT_PHYS_OFFSET virt_phys_offset
115 #else 115 #else
116 #define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START) 116 #define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
117 #endif 117 #endif
118 118
119 119
120 #ifdef CONFIG_PPC64 120 #ifdef CONFIG_PPC64
121 #define MEMORY_START 0UL 121 #define MEMORY_START 0UL
122 #elif defined(CONFIG_NONSTATIC_KERNEL) 122 #elif defined(CONFIG_NONSTATIC_KERNEL)
123 #define MEMORY_START memstart_addr 123 #define MEMORY_START memstart_addr
124 #else 124 #else
125 #define MEMORY_START (PHYSICAL_START + PAGE_OFFSET - KERNELBASE) 125 #define MEMORY_START (PHYSICAL_START + PAGE_OFFSET - KERNELBASE)
126 #endif 126 #endif
127 127
128 #ifdef CONFIG_FLATMEM 128 #ifdef CONFIG_FLATMEM
129 #define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT)) 129 #define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
130 #define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr) 130 #define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
131 #endif 131 #endif
132 132
133 #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) 133 #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
134 #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) 134 #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
135 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) 135 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
136 136
137 /* 137 /*
138 * On Book-E parts we need __va to parse the device tree and we can't 138 * On Book-E parts we need __va to parse the device tree and we can't
139 * determine MEMORY_START until then. However we can determine PHYSICAL_START 139 * determine MEMORY_START until then. However we can determine PHYSICAL_START
140 * from information at hand (program counter, TLB lookup). 140 * from information at hand (program counter, TLB lookup).
141 * 141 *
142 * On BookE with RELOCATABLE (RELOCATABLE_PPC32) 142 * On BookE with RELOCATABLE (RELOCATABLE_PPC32)
143 * 143 *
144 * With RELOCATABLE_PPC32, we support loading the kernel at any physical 144 * With RELOCATABLE_PPC32, we support loading the kernel at any physical
145 * address without any restriction on the page alignment. 145 * address without any restriction on the page alignment.
146 * 146 *
147 * We find the runtime address of _stext and relocate ourselves based on 147 * We find the runtime address of _stext and relocate ourselves based on
148 * the following calculation: 148 * the following calculation:
149 * 149 *
150 * virtual_base = ALIGN_DOWN(KERNELBASE,256M) + 150 * virtual_base = ALIGN_DOWN(KERNELBASE,256M) +
151 * MODULO(_stext.run,256M) 151 * MODULO(_stext.run,256M)
152 * and create the following mapping: 152 * and create the following mapping:
153 * 153 *
154 * ALIGN_DOWN(_stext.run,256M) => ALIGN_DOWN(KERNELBASE,256M) 154 * ALIGN_DOWN(_stext.run,256M) => ALIGN_DOWN(KERNELBASE,256M)
155 * 155 *
156 * When we process relocations, we cannot depend on the 156 * When we process relocations, we cannot depend on the
157 * existing equation for the __va()/__pa() translations: 157 * existing equation for the __va()/__pa() translations:
158 * 158 *
159 * __va(x) = (x) - PHYSICAL_START + KERNELBASE 159 * __va(x) = (x) - PHYSICAL_START + KERNELBASE
160 * 160 *
161 * Where: 161 * Where:
162 * PHYSICAL_START = kernstart_addr = Physical address of _stext 162 * PHYSICAL_START = kernstart_addr = Physical address of _stext
163 * KERNELBASE = Compiled virtual address of _stext. 163 * KERNELBASE = Compiled virtual address of _stext.
164 * 164 *
165 * This formula holds true iff, kernel load address is TLB page aligned. 165 * This formula holds true iff, kernel load address is TLB page aligned.
166 * 166 *
167 * In our case, we need to also account for the shift in the kernel Virtual 167 * In our case, we need to also account for the shift in the kernel Virtual
168 * address. 168 * address.
169 * 169 *
170 * E.g., 170 * E.g.,
171 * 171 *
172 * Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET). 172 * Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET).
173 * In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M 173 * In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
174 * 174 *
175 * Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000 175 * Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
176 * = 0xbc100000 , which is wrong. 176 * = 0xbc100000 , which is wrong.
177 * 177 *
178 * Rather, it should be : 0xc0000000 + 0x100000 = 0xc0100000 178 * Rather, it should be : 0xc0000000 + 0x100000 = 0xc0100000
179 * according to our mapping. 179 * according to our mapping.
180 * 180 *
181 * Hence we use the following formula to get the translations right: 181 * Hence we use the following formula to get the translations right:
182 * 182 *
183 * __va(x) = (x) - [ PHYSICAL_START - Effective KERNELBASE ] 183 * __va(x) = (x) - [ PHYSICAL_START - Effective KERNELBASE ]
184 * 184 *
185 * Where : 185 * Where :
186 * PHYSICAL_START = dynamic load address.(kernstart_addr variable) 186 * PHYSICAL_START = dynamic load address.(kernstart_addr variable)
187 * Effective KERNELBASE = virtual_base = 187 * Effective KERNELBASE = virtual_base =
188 * = ALIGN_DOWN(KERNELBASE,256M) + 188 * = ALIGN_DOWN(KERNELBASE,256M) +
189 * MODULO(PHYSICAL_START,256M) 189 * MODULO(PHYSICAL_START,256M)
190 * 190 *
191 * To make the cost of __va() / __pa() more light weight, we introduce 191 * To make the cost of __va() / __pa() more light weight, we introduce
192 * a new variable virt_phys_offset, which will hold : 192 * a new variable virt_phys_offset, which will hold :
193 * 193 *
194 * virt_phys_offset = Effective KERNELBASE - PHYSICAL_START 194 * virt_phys_offset = Effective KERNELBASE - PHYSICAL_START
195 * = ALIGN_DOWN(KERNELBASE,256M) - 195 * = ALIGN_DOWN(KERNELBASE,256M) -
196 * ALIGN_DOWN(PHYSICALSTART,256M) 196 * ALIGN_DOWN(PHYSICALSTART,256M)
197 * 197 *
198 * Hence : 198 * Hence :
199 * 199 *
200 * __va(x) = x - PHYSICAL_START + Effective KERNELBASE 200 * __va(x) = x - PHYSICAL_START + Effective KERNELBASE
201 * = x + virt_phys_offset 201 * = x + virt_phys_offset
202 * 202 *
203 * and 203 * and
204 * __pa(x) = x + PHYSICAL_START - Effective KERNELBASE 204 * __pa(x) = x + PHYSICAL_START - Effective KERNELBASE
205 * = x - virt_phys_offset 205 * = x - virt_phys_offset
206 * 206 *
207 * On non-Book-E PPC64 PAGE_OFFSET and MEMORY_START are constants so use 207 * On non-Book-E PPC64 PAGE_OFFSET and MEMORY_START are constants so use
208 * the other definitions for __va & __pa. 208 * the other definitions for __va & __pa.
209 */ 209 */
210 #ifdef CONFIG_BOOKE 210 #ifdef CONFIG_BOOKE
211 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) 211 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
212 #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) 212 #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)
213 #else 213 #else
214 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START)) 214 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
215 #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START) 215 #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
216 #endif 216 #endif
217 217
218 /* 218 /*
219 * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, 219 * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
220 * and needs to be executable. This means the whole heap ends 220 * and needs to be executable. This means the whole heap ends
221 * up being executable. 221 * up being executable.
222 */ 222 */
223 #define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ 223 #define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
224 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 224 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
225 225
226 #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ 226 #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
227 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 227 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
228 228
229 #ifdef __powerpc64__ 229 #ifdef __powerpc64__
230 #include <asm/page_64.h> 230 #include <asm/page_64.h>
231 #else 231 #else
232 #include <asm/page_32.h> 232 #include <asm/page_32.h>
233 #endif 233 #endif
234 234
235 /* align addr on a size boundary - adjust address up/down if needed */ 235 /* align addr on a size boundary - adjust address up/down if needed */
236 #define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) 236 #define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
237 #define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) 237 #define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1)))
238 238
239 /* align addr on a size boundary - adjust address up if needed */ 239 /* align addr on a size boundary - adjust address up if needed */
240 #define _ALIGN(addr,size) _ALIGN_UP(addr,size) 240 #define _ALIGN(addr,size) _ALIGN_UP(addr,size)
241 241
242 /* 242 /*
243 * Don't compare things with KERNELBASE or PAGE_OFFSET to test for 243 * Don't compare things with KERNELBASE or PAGE_OFFSET to test for
244 * "kernelness", use is_kernel_addr() - it should do what you want. 244 * "kernelness", use is_kernel_addr() - it should do what you want.
245 */ 245 */
246 #ifdef CONFIG_PPC_BOOK3E_64 246 #ifdef CONFIG_PPC_BOOK3E_64
247 #define is_kernel_addr(x) ((x) >= 0x8000000000000000ul) 247 #define is_kernel_addr(x) ((x) >= 0x8000000000000000ul)
248 #else 248 #else
249 #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) 249 #define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
250 #endif 250 #endif
251 251
252 #ifndef CONFIG_PPC_BOOK3S_64 252 #ifndef CONFIG_PPC_BOOK3S_64
253 /* 253 /*
254 * Use the top bit of the higher-level page table entries to indicate whether 254 * Use the top bit of the higher-level page table entries to indicate whether
255 * the entries we point to contain hugepages. This works because we know that 255 * the entries we point to contain hugepages. This works because we know that
256 * the page tables live in kernel space. If we ever decide to support having 256 * the page tables live in kernel space. If we ever decide to support having
257 * page tables at arbitrary addresses, this breaks and will have to change. 257 * page tables at arbitrary addresses, this breaks and will have to change.
258 */ 258 */
259 #ifdef CONFIG_PPC64 259 #ifdef CONFIG_PPC64
260 #define PD_HUGE 0x8000000000000000 260 #define PD_HUGE 0x8000000000000000
261 #else 261 #else
262 #define PD_HUGE 0x80000000 262 #define PD_HUGE 0x80000000
263 #endif 263 #endif
264 #endif /* CONFIG_PPC_BOOK3S_64 */ 264 #endif /* CONFIG_PPC_BOOK3S_64 */
265 265
266 /* 266 /*
267 * Some number of bits at the level of the page table that points to 267 * Some number of bits at the level of the page table that points to
268 * a hugepte are used to encode the size. This masks those bits. 268 * a hugepte are used to encode the size. This masks those bits.
269 */ 269 */
270 #define HUGEPD_SHIFT_MASK 0x3f 270 #define HUGEPD_SHIFT_MASK 0x3f
271 271
272 #ifndef __ASSEMBLY__ 272 #ifndef __ASSEMBLY__
273 273
274 #undef STRICT_MM_TYPECHECKS 274 #undef STRICT_MM_TYPECHECKS
275 275
276 #ifdef STRICT_MM_TYPECHECKS 276 #ifdef STRICT_MM_TYPECHECKS
277 /* These are used to make use of C type-checking. */ 277 /* These are used to make use of C type-checking. */
278 278
279 /* PTE level */ 279 /* PTE level */
280 typedef struct { pte_basic_t pte; } pte_t; 280 typedef struct { pte_basic_t pte; } pte_t;
281 #define pte_val(x) ((x).pte) 281 #define pte_val(x) ((x).pte)
282 #define __pte(x) ((pte_t) { (x) }) 282 #define __pte(x) ((pte_t) { (x) })
283 283
284 /* 64k pages additionally define a bigger "real PTE" type that gathers 284 /* 64k pages additionally define a bigger "real PTE" type that gathers
285 * the "second half" part of the PTE for pseudo 64k pages 285 * the "second half" part of the PTE for pseudo 64k pages
286 */ 286 */
287 #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) 287 #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
288 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; 288 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
289 #else 289 #else
290 typedef struct { pte_t pte; } real_pte_t; 290 typedef struct { pte_t pte; } real_pte_t;
291 #endif 291 #endif
292 292
293 /* PMD level */ 293 /* PMD level */
294 #ifdef CONFIG_PPC64 294 #ifdef CONFIG_PPC64
295 typedef struct { unsigned long pmd; } pmd_t; 295 typedef struct { unsigned long pmd; } pmd_t;
296 #define pmd_val(x) ((x).pmd) 296 #define pmd_val(x) ((x).pmd)
297 #define __pmd(x) ((pmd_t) { (x) }) 297 #define __pmd(x) ((pmd_t) { (x) })
298 298
299 /* PUD level exusts only on 4k pages */ 299 /* PUD level exusts only on 4k pages */
300 #ifndef CONFIG_PPC_64K_PAGES 300 #ifndef CONFIG_PPC_64K_PAGES
301 typedef struct { unsigned long pud; } pud_t; 301 typedef struct { unsigned long pud; } pud_t;
302 #define pud_val(x) ((x).pud) 302 #define pud_val(x) ((x).pud)
303 #define __pud(x) ((pud_t) { (x) }) 303 #define __pud(x) ((pud_t) { (x) })
304 #endif /* !CONFIG_PPC_64K_PAGES */ 304 #endif /* !CONFIG_PPC_64K_PAGES */
305 #endif /* CONFIG_PPC64 */ 305 #endif /* CONFIG_PPC64 */
306 306
307 /* PGD level */ 307 /* PGD level */
308 typedef struct { unsigned long pgd; } pgd_t; 308 typedef struct { unsigned long pgd; } pgd_t;
309 #define pgd_val(x) ((x).pgd) 309 #define pgd_val(x) ((x).pgd)
310 #define __pgd(x) ((pgd_t) { (x) }) 310 #define __pgd(x) ((pgd_t) { (x) })
311 311
312 /* Page protection bits */ 312 /* Page protection bits */
313 typedef struct { unsigned long pgprot; } pgprot_t; 313 typedef struct { unsigned long pgprot; } pgprot_t;
314 #define pgprot_val(x) ((x).pgprot) 314 #define pgprot_val(x) ((x).pgprot)
315 #define __pgprot(x) ((pgprot_t) { (x) }) 315 #define __pgprot(x) ((pgprot_t) { (x) })
316 316
317 #else 317 #else
318 318
319 /* 319 /*
320 * .. while these make it easier on the compiler 320 * .. while these make it easier on the compiler
321 */ 321 */
322 322
323 typedef pte_basic_t pte_t; 323 typedef pte_basic_t pte_t;
324 #define pte_val(x) (x) 324 #define pte_val(x) (x)
325 #define __pte(x) (x) 325 #define __pte(x) (x)
326 326
327 #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) 327 #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
328 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; 328 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
329 #else 329 #else
330 typedef pte_t real_pte_t; 330 typedef pte_t real_pte_t;
331 #endif 331 #endif
332 332
333 333
334 #ifdef CONFIG_PPC64 334 #ifdef CONFIG_PPC64
335 typedef unsigned long pmd_t; 335 typedef unsigned long pmd_t;
336 #define pmd_val(x) (x) 336 #define pmd_val(x) (x)
337 #define __pmd(x) (x) 337 #define __pmd(x) (x)
338 338
339 #ifndef CONFIG_PPC_64K_PAGES 339 #ifndef CONFIG_PPC_64K_PAGES
340 typedef unsigned long pud_t; 340 typedef unsigned long pud_t;
341 #define pud_val(x) (x) 341 #define pud_val(x) (x)
342 #define __pud(x) (x) 342 #define __pud(x) (x)
343 #endif /* !CONFIG_PPC_64K_PAGES */ 343 #endif /* !CONFIG_PPC_64K_PAGES */
344 #endif /* CONFIG_PPC64 */ 344 #endif /* CONFIG_PPC64 */
345 345
346 typedef unsigned long pgd_t; 346 typedef unsigned long pgd_t;
347 #define pgd_val(x) (x) 347 #define pgd_val(x) (x)
348 #define pgprot_val(x) (x) 348 #define pgprot_val(x) (x)
349 349
350 typedef unsigned long pgprot_t; 350 typedef unsigned long pgprot_t;
351 #define __pgd(x) (x) 351 #define __pgd(x) (x)
352 #define __pgprot(x) (x) 352 #define __pgprot(x) (x)
353 353
354 #endif 354 #endif
355 355
356 typedef struct { signed long pd; } hugepd_t; 356 typedef struct { signed long pd; } hugepd_t;
357 357
358 #ifdef CONFIG_HUGETLB_PAGE 358 #ifdef CONFIG_HUGETLB_PAGE
359 #ifdef CONFIG_PPC_BOOK3S_64 359 #ifdef CONFIG_PPC_BOOK3S_64
360 static inline int hugepd_ok(hugepd_t hpd) 360 static inline int hugepd_ok(hugepd_t hpd)
361 { 361 {
362 /* 362 /*
363 * hugepd pointer, bottom two bits == 00 and next 4 bits 363 * hugepd pointer, bottom two bits == 00 and next 4 bits
364 * indicate size of table 364 * indicate size of table
365 */ 365 */
366 return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)); 366 return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
367 } 367 }
368 #else 368 #else
369 static inline int hugepd_ok(hugepd_t hpd) 369 static inline int hugepd_ok(hugepd_t hpd)
370 { 370 {
371 return (hpd.pd > 0); 371 return (hpd.pd > 0);
372 } 372 }
373 #endif 373 #endif
374 374
375 #define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) 375 #define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
376 int pgd_huge(pgd_t pgd); 376 int pgd_huge(pgd_t pgd);
377 #else /* CONFIG_HUGETLB_PAGE */ 377 #else /* CONFIG_HUGETLB_PAGE */
378 #define is_hugepd(pdep) 0 378 #define is_hugepd(pdep) 0
379 #define pgd_huge(pgd) 0 379 #define pgd_huge(pgd) 0
380 #endif /* CONFIG_HUGETLB_PAGE */ 380 #endif /* CONFIG_HUGETLB_PAGE */
381 381
382 struct page; 382 struct page;
383 extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); 383 extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
384 extern void copy_user_page(void *to, void *from, unsigned long vaddr, 384 extern void copy_user_page(void *to, void *from, unsigned long vaddr,
385 struct page *p); 385 struct page *p);
386 extern int page_is_ram(unsigned long pfn); 386 extern int page_is_ram(unsigned long pfn);
387 extern int devmem_is_allowed(unsigned long pfn); 387 extern int devmem_is_allowed(unsigned long pfn);
388 388
389 #ifdef CONFIG_PPC_SMLPAR 389 #ifdef CONFIG_PPC_SMLPAR
390 void arch_free_page(struct page *page, int order); 390 void arch_free_page(struct page *page, int order);
391 #define HAVE_ARCH_FREE_PAGE 391 #define HAVE_ARCH_FREE_PAGE
392 #endif 392 #endif
393 393
394 struct vm_area_struct; 394 struct vm_area_struct;
395 395
396 #ifdef CONFIG_PPC_64K_PAGES
397 typedef pte_t *pgtable_t;
398 #else
396 typedef struct page *pgtable_t; 399 typedef struct page *pgtable_t;
400 #endif
397 401
398 #include <asm-generic/memory_model.h> 402 #include <asm-generic/memory_model.h>
399 #endif /* __ASSEMBLY__ */ 403 #endif /* __ASSEMBLY__ */
400 404
401 #endif /* _ASM_POWERPC_PAGE_H */ 405 #endif /* _ASM_POWERPC_PAGE_H */
402 406
arch/powerpc/include/asm/pgalloc-64.h
1 #ifndef _ASM_POWERPC_PGALLOC_64_H 1 #ifndef _ASM_POWERPC_PGALLOC_64_H
2 #define _ASM_POWERPC_PGALLOC_64_H 2 #define _ASM_POWERPC_PGALLOC_64_H
3 /* 3 /*
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 */ 8 */
9 9
10 #include <linux/slab.h> 10 #include <linux/slab.h>
11 #include <linux/cpumask.h> 11 #include <linux/cpumask.h>
12 #include <linux/percpu.h> 12 #include <linux/percpu.h>
13 13
14 struct vmemmap_backing { 14 struct vmemmap_backing {
15 struct vmemmap_backing *list; 15 struct vmemmap_backing *list;
16 unsigned long phys; 16 unsigned long phys;
17 unsigned long virt_addr; 17 unsigned long virt_addr;
18 }; 18 };
19 19
20 /* 20 /*
21 * Functions that deal with pagetables that could be at any level of 21 * Functions that deal with pagetables that could be at any level of
22 * the table need to be passed an "index_size" so they know how to 22 * the table need to be passed an "index_size" so they know how to
23 * handle allocation. For PTE pages (which are linked to a struct 23 * handle allocation. For PTE pages (which are linked to a struct
24 * page for now, and drawn from the main get_free_pages() pool), the 24 * page for now, and drawn from the main get_free_pages() pool), the
25 * allocation size will be (2^index_size * sizeof(pointer)) and 25 * allocation size will be (2^index_size * sizeof(pointer)) and
26 * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). 26 * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
27 * 27 *
28 * The maximum index size needs to be big enough to allow any 28 * The maximum index size needs to be big enough to allow any
29 * pagetable sizes we need, but small enough to fit in the low bits of 29 * pagetable sizes we need, but small enough to fit in the low bits of
30 * any page table pointer. In other words all pagetables, even tiny 30 * any page table pointer. In other words all pagetables, even tiny
31 * ones, must be aligned to allow at least enough low 0 bits to 31 * ones, must be aligned to allow at least enough low 0 bits to
32 * contain this value. This value is also used as a mask, so it must 32 * contain this value. This value is also used as a mask, so it must
33 * be one less than a power of two. 33 * be one less than a power of two.
34 */ 34 */
35 #define MAX_PGTABLE_INDEX_SIZE 0xf 35 #define MAX_PGTABLE_INDEX_SIZE 0xf
36 36
37 extern struct kmem_cache *pgtable_cache[]; 37 extern struct kmem_cache *pgtable_cache[];
38 #define PGT_CACHE(shift) ({ \ 38 #define PGT_CACHE(shift) ({ \
39 BUG_ON(!(shift)); \ 39 BUG_ON(!(shift)); \
40 pgtable_cache[(shift) - 1]; \ 40 pgtable_cache[(shift) - 1]; \
41 }) 41 })
42 42
43 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 43 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
44 { 44 {
45 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); 45 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
46 } 46 }
47 47
48 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) 48 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
49 { 49 {
50 kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); 50 kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
51 } 51 }
52 52
53 #ifndef CONFIG_PPC_64K_PAGES 53 #ifndef CONFIG_PPC_64K_PAGES
54 54
55 #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) 55 #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
56 56
57 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 57 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
58 { 58 {
59 return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), 59 return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
60 GFP_KERNEL|__GFP_REPEAT); 60 GFP_KERNEL|__GFP_REPEAT);
61 } 61 }
62 62
63 static inline void pud_free(struct mm_struct *mm, pud_t *pud) 63 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
64 { 64 {
65 kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); 65 kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
66 } 66 }
67 67
68 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 68 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
69 { 69 {
70 pud_set(pud, (unsigned long)pmd); 70 pud_set(pud, (unsigned long)pmd);
71 } 71 }
72 72
73 #define pmd_populate(mm, pmd, pte_page) \ 73 #define pmd_populate(mm, pmd, pte_page) \
74 pmd_populate_kernel(mm, pmd, page_address(pte_page)) 74 pmd_populate_kernel(mm, pmd, page_address(pte_page))
75 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte)) 75 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
76 #define pmd_pgtable(pmd) pmd_page(pmd) 76 #define pmd_pgtable(pmd) pmd_page(pmd)
77 77
78 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 78 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
79 unsigned long address) 79 unsigned long address)
80 { 80 {
81 return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); 81 return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
82 } 82 }
83 83
84 static inline pgtable_t pte_alloc_one(struct mm_struct *mm, 84 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
85 unsigned long address) 85 unsigned long address)
86 { 86 {
87 struct page *page; 87 struct page *page;
88 pte_t *pte; 88 pte_t *pte;
89 89
90 pte = pte_alloc_one_kernel(mm, address); 90 pte = pte_alloc_one_kernel(mm, address);
91 if (!pte) 91 if (!pte)
92 return NULL; 92 return NULL;
93 page = virt_to_page(pte); 93 page = virt_to_page(pte);
94 pgtable_page_ctor(page); 94 pgtable_page_ctor(page);
95 return page; 95 return page;
96 } 96 }
97 97
98 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 98 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
99 { 99 {
100 free_page((unsigned long)pte); 100 free_page((unsigned long)pte);
101 } 101 }
102 102
103 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) 103 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
104 { 104 {
105 pgtable_page_dtor(ptepage); 105 pgtable_page_dtor(ptepage);
106 __free_page(ptepage); 106 __free_page(ptepage);
107 } 107 }
108 108
109 static inline void pgtable_free(void *table, unsigned index_size) 109 static inline void pgtable_free(void *table, unsigned index_size)
110 { 110 {
111 if (!index_size) 111 if (!index_size)
112 free_page((unsigned long)table); 112 free_page((unsigned long)table);
113 else { 113 else {
114 BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); 114 BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
115 kmem_cache_free(PGT_CACHE(index_size), table); 115 kmem_cache_free(PGT_CACHE(index_size), table);
116 } 116 }
117 } 117 }
118 118
119 #ifdef CONFIG_SMP 119 #ifdef CONFIG_SMP
120 static inline void pgtable_free_tlb(struct mmu_gather *tlb, 120 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
121 void *table, int shift) 121 void *table, int shift)
122 { 122 {
123 unsigned long pgf = (unsigned long)table; 123 unsigned long pgf = (unsigned long)table;
124 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); 124 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
125 pgf |= shift; 125 pgf |= shift;
126 tlb_remove_table(tlb, (void *)pgf); 126 tlb_remove_table(tlb, (void *)pgf);
127 } 127 }
128 128
129 static inline void __tlb_remove_table(void *_table) 129 static inline void __tlb_remove_table(void *_table)
130 { 130 {
131 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); 131 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
132 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; 132 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
133 133
134 pgtable_free(table, shift); 134 pgtable_free(table, shift);
135 } 135 }
136 #else /* !CONFIG_SMP */ 136 #else /* !CONFIG_SMP */
137 static inline void pgtable_free_tlb(struct mmu_gather *tlb, 137 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
138 void *table, int shift) 138 void *table, int shift)
139 { 139 {
140 pgtable_free(table, shift); 140 pgtable_free(table, shift);
141 } 141 }
142 #endif /* CONFIG_SMP */ 142 #endif /* CONFIG_SMP */
143 143
144 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, 144 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
145 unsigned long address) 145 unsigned long address)
146 { 146 {
147 struct page *page = page_address(table); 147 struct page *page = page_address(table);
148 148
149 tlb_flush_pgtable(tlb, address); 149 tlb_flush_pgtable(tlb, address);
150 pgtable_page_dtor(page); 150 pgtable_page_dtor(page);
151 pgtable_free_tlb(tlb, page, 0); 151 pgtable_free_tlb(tlb, page, 0);
152 } 152 }
153 153
154 #else /* if CONFIG_PPC_64K_PAGES */ 154 #else /* if CONFIG_PPC_64K_PAGES */
155 /*
156 * we support 16 fragments per PTE page.
157 */
158 #define PTE_FRAG_NR 16
159 /*
160 * We use a 2K PTE page fragment and another 2K for storing
161 * real_pte_t hash index
162 */
163 #define PTE_FRAG_SIZE_SHIFT 12
164 #define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
155 165
166 extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
167 extern void page_table_free(struct mm_struct *, unsigned long *, int);
168 extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
169 #ifdef CONFIG_SMP
170 extern void __tlb_remove_table(void *_table);
171 #endif
172
156 #define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) 173 #define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
157 174
158 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, 175 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
159 pte_t *pte) 176 pte_t *pte)
160 { 177 {
161 pmd_set(pmd, (unsigned long)pte); 178 pmd_set(pmd, (unsigned long)pte);
162 } 179 }
163 180
164 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, 181 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
165 pgtable_t pte_page) 182 pgtable_t pte_page)
166 { 183 {
167 pmd_populate_kernel(mm, pmd, page_address(pte_page)); 184 pmd_set(pmd, (unsigned long)pte_page);
168 } 185 }
169 186
170 static inline pgtable_t pmd_pgtable(pmd_t pmd) 187 static inline pgtable_t pmd_pgtable(pmd_t pmd)
171 { 188 {
172 return pmd_page(pmd); 189 return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE);
173 } 190 }
174 191
175 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 192 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
176 unsigned long address) 193 unsigned long address)
177 { 194 {
178 return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); 195 return (pte_t *)page_table_alloc(mm, address, 1);
179 } 196 }
180 197
181 static inline pgtable_t pte_alloc_one(struct mm_struct *mm, 198 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
182 unsigned long address) 199 unsigned long address)
183 { 200 {
184 struct page *page; 201 return (pgtable_t)page_table_alloc(mm, address, 0);
185 pte_t *pte;
186
187 pte = pte_alloc_one_kernel(mm, address);
188 if (!pte)
189 return NULL;
190 page = virt_to_page(pte);
191 pgtable_page_ctor(page);
192 return page;
193 } 202 }
194 203
195 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 204 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
196 { 205 {
197 free_page((unsigned long)pte); 206 page_table_free(mm, (unsigned long *)pte, 1);
198 } 207 }
199 208
200 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) 209 static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
201 { 210 {
202 pgtable_page_dtor(ptepage); 211 page_table_free(mm, (unsigned long *)ptepage, 0);
203 __free_page(ptepage);
204 } 212 }
205 213
206 static inline void pgtable_free(void *table, unsigned index_size)
207 {
208 if (!index_size)
209 free_page((unsigned long)table);
210 else {
211 BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
212 kmem_cache_free(PGT_CACHE(index_size), table);
213 }
214 }
215
216 #ifdef CONFIG_SMP
217 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
218 void *table, int shift)
219 {
220 unsigned long pgf = (unsigned long)table;
221 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
222 pgf |= shift;
223 tlb_remove_table(tlb, (void *)pgf);
224 }
225
226 static inline void __tlb_remove_table(void *_table)
227 {
228 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
229 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
230
231 pgtable_free(table, shift);
232 }
233 #else /* !CONFIG_SMP */
234 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
235 void *table, int shift)
236 {
237 pgtable_free(table, shift);
238 }
239 #endif /* CONFIG_SMP */
240
241 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, 214 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
242 unsigned long address) 215 unsigned long address)
243 { 216 {
244 struct page *page = page_address(table);
245
246 tlb_flush_pgtable(tlb, address); 217 tlb_flush_pgtable(tlb, address);
247 pgtable_page_dtor(page); 218 pgtable_free_tlb(tlb, table, 0);
248 pgtable_free_tlb(tlb, page, 0);
249 } 219 }
250
251 #endif /* CONFIG_PPC_64K_PAGES */ 220 #endif /* CONFIG_PPC_64K_PAGES */
252 221
253 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 222 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
254 { 223 {
255 return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE), 224 return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
256 GFP_KERNEL|__GFP_REPEAT); 225 GFP_KERNEL|__GFP_REPEAT);
257 } 226 }
258 227
259 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) 228 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
arch/powerpc/kernel/setup_64.c
1 /* 1 /*
2 * 2 *
3 * Common boot and setup code. 3 * Common boot and setup code.
4 * 4 *
5 * Copyright (C) 2001 PPC64 Team, IBM Corp 5 * Copyright (C) 2001 PPC64 Team, IBM Corp
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 */ 11 */
12 12
13 #undef DEBUG 13 #undef DEBUG
14 14
15 #include <linux/export.h> 15 #include <linux/export.h>
16 #include <linux/string.h> 16 #include <linux/string.h>
17 #include <linux/sched.h> 17 #include <linux/sched.h>
18 #include <linux/init.h> 18 #include <linux/init.h>
19 #include <linux/kernel.h> 19 #include <linux/kernel.h>
20 #include <linux/reboot.h> 20 #include <linux/reboot.h>
21 #include <linux/delay.h> 21 #include <linux/delay.h>
22 #include <linux/initrd.h> 22 #include <linux/initrd.h>
23 #include <linux/seq_file.h> 23 #include <linux/seq_file.h>
24 #include <linux/ioport.h> 24 #include <linux/ioport.h>
25 #include <linux/console.h> 25 #include <linux/console.h>
26 #include <linux/utsname.h> 26 #include <linux/utsname.h>
27 #include <linux/tty.h> 27 #include <linux/tty.h>
28 #include <linux/root_dev.h> 28 #include <linux/root_dev.h>
29 #include <linux/notifier.h> 29 #include <linux/notifier.h>
30 #include <linux/cpu.h> 30 #include <linux/cpu.h>
31 #include <linux/unistd.h> 31 #include <linux/unistd.h>
32 #include <linux/serial.h> 32 #include <linux/serial.h>
33 #include <linux/serial_8250.h> 33 #include <linux/serial_8250.h>
34 #include <linux/bootmem.h> 34 #include <linux/bootmem.h>
35 #include <linux/pci.h> 35 #include <linux/pci.h>
36 #include <linux/lockdep.h> 36 #include <linux/lockdep.h>
37 #include <linux/memblock.h> 37 #include <linux/memblock.h>
38 #include <linux/hugetlb.h> 38 #include <linux/hugetlb.h>
39 39
40 #include <asm/io.h> 40 #include <asm/io.h>
41 #include <asm/kdump.h> 41 #include <asm/kdump.h>
42 #include <asm/prom.h> 42 #include <asm/prom.h>
43 #include <asm/processor.h> 43 #include <asm/processor.h>
44 #include <asm/pgtable.h> 44 #include <asm/pgtable.h>
45 #include <asm/smp.h> 45 #include <asm/smp.h>
46 #include <asm/elf.h> 46 #include <asm/elf.h>
47 #include <asm/machdep.h> 47 #include <asm/machdep.h>
48 #include <asm/paca.h> 48 #include <asm/paca.h>
49 #include <asm/time.h> 49 #include <asm/time.h>
50 #include <asm/cputable.h> 50 #include <asm/cputable.h>
51 #include <asm/sections.h> 51 #include <asm/sections.h>
52 #include <asm/btext.h> 52 #include <asm/btext.h>
53 #include <asm/nvram.h> 53 #include <asm/nvram.h>
54 #include <asm/setup.h> 54 #include <asm/setup.h>
55 #include <asm/rtas.h> 55 #include <asm/rtas.h>
56 #include <asm/iommu.h> 56 #include <asm/iommu.h>
57 #include <asm/serial.h> 57 #include <asm/serial.h>
58 #include <asm/cache.h> 58 #include <asm/cache.h>
59 #include <asm/page.h> 59 #include <asm/page.h>
60 #include <asm/mmu.h> 60 #include <asm/mmu.h>
61 #include <asm/firmware.h> 61 #include <asm/firmware.h>
62 #include <asm/xmon.h> 62 #include <asm/xmon.h>
63 #include <asm/udbg.h> 63 #include <asm/udbg.h>
64 #include <asm/kexec.h> 64 #include <asm/kexec.h>
65 #include <asm/mmu_context.h> 65 #include <asm/mmu_context.h>
66 #include <asm/code-patching.h> 66 #include <asm/code-patching.h>
67 #include <asm/kvm_ppc.h> 67 #include <asm/kvm_ppc.h>
68 #include <asm/hugetlb.h> 68 #include <asm/hugetlb.h>
69 69
70 #include "setup.h" 70 #include "setup.h"
71 71
72 #ifdef DEBUG 72 #ifdef DEBUG
73 #define DBG(fmt...) udbg_printf(fmt) 73 #define DBG(fmt...) udbg_printf(fmt)
74 #else 74 #else
75 #define DBG(fmt...) 75 #define DBG(fmt...)
76 #endif 76 #endif
77 77
78 int boot_cpuid = 0; 78 int boot_cpuid = 0;
79 int __initdata spinning_secondaries; 79 int __initdata spinning_secondaries;
80 u64 ppc64_pft_size; 80 u64 ppc64_pft_size;
81 81
82 /* Pick defaults since we might want to patch instructions 82 /* Pick defaults since we might want to patch instructions
83 * before we've read this from the device tree. 83 * before we've read this from the device tree.
84 */ 84 */
85 struct ppc64_caches ppc64_caches = { 85 struct ppc64_caches ppc64_caches = {
86 .dline_size = 0x40, 86 .dline_size = 0x40,
87 .log_dline_size = 6, 87 .log_dline_size = 6,
88 .iline_size = 0x40, 88 .iline_size = 0x40,
89 .log_iline_size = 6 89 .log_iline_size = 6
90 }; 90 };
91 EXPORT_SYMBOL_GPL(ppc64_caches); 91 EXPORT_SYMBOL_GPL(ppc64_caches);
92 92
93 /* 93 /*
94 * These are used in binfmt_elf.c to put aux entries on the stack 94 * These are used in binfmt_elf.c to put aux entries on the stack
95 * for each elf executable being started. 95 * for each elf executable being started.
96 */ 96 */
97 int dcache_bsize; 97 int dcache_bsize;
98 int icache_bsize; 98 int icache_bsize;
99 int ucache_bsize; 99 int ucache_bsize;
100 100
101 #ifdef CONFIG_SMP 101 #ifdef CONFIG_SMP
102 102
103 static char *smt_enabled_cmdline; 103 static char *smt_enabled_cmdline;
104 104
105 /* Look for ibm,smt-enabled OF option */ 105 /* Look for ibm,smt-enabled OF option */
106 static void check_smt_enabled(void) 106 static void check_smt_enabled(void)
107 { 107 {
108 struct device_node *dn; 108 struct device_node *dn;
109 const char *smt_option; 109 const char *smt_option;
110 110
111 /* Default to enabling all threads */ 111 /* Default to enabling all threads */
112 smt_enabled_at_boot = threads_per_core; 112 smt_enabled_at_boot = threads_per_core;
113 113
114 /* Allow the command line to overrule the OF option */ 114 /* Allow the command line to overrule the OF option */
115 if (smt_enabled_cmdline) { 115 if (smt_enabled_cmdline) {
116 if (!strcmp(smt_enabled_cmdline, "on")) 116 if (!strcmp(smt_enabled_cmdline, "on"))
117 smt_enabled_at_boot = threads_per_core; 117 smt_enabled_at_boot = threads_per_core;
118 else if (!strcmp(smt_enabled_cmdline, "off")) 118 else if (!strcmp(smt_enabled_cmdline, "off"))
119 smt_enabled_at_boot = 0; 119 smt_enabled_at_boot = 0;
120 else { 120 else {
121 long smt; 121 long smt;
122 int rc; 122 int rc;
123 123
124 rc = strict_strtol(smt_enabled_cmdline, 10, &smt); 124 rc = strict_strtol(smt_enabled_cmdline, 10, &smt);
125 if (!rc) 125 if (!rc)
126 smt_enabled_at_boot = 126 smt_enabled_at_boot =
127 min(threads_per_core, (int)smt); 127 min(threads_per_core, (int)smt);
128 } 128 }
129 } else { 129 } else {
130 dn = of_find_node_by_path("/options"); 130 dn = of_find_node_by_path("/options");
131 if (dn) { 131 if (dn) {
132 smt_option = of_get_property(dn, "ibm,smt-enabled", 132 smt_option = of_get_property(dn, "ibm,smt-enabled",
133 NULL); 133 NULL);
134 134
135 if (smt_option) { 135 if (smt_option) {
136 if (!strcmp(smt_option, "on")) 136 if (!strcmp(smt_option, "on"))
137 smt_enabled_at_boot = threads_per_core; 137 smt_enabled_at_boot = threads_per_core;
138 else if (!strcmp(smt_option, "off")) 138 else if (!strcmp(smt_option, "off"))
139 smt_enabled_at_boot = 0; 139 smt_enabled_at_boot = 0;
140 } 140 }
141 141
142 of_node_put(dn); 142 of_node_put(dn);
143 } 143 }
144 } 144 }
145 } 145 }
146 146
147 /* Look for smt-enabled= cmdline option */ 147 /* Look for smt-enabled= cmdline option */
148 static int __init early_smt_enabled(char *p) 148 static int __init early_smt_enabled(char *p)
149 { 149 {
150 smt_enabled_cmdline = p; 150 smt_enabled_cmdline = p;
151 return 0; 151 return 0;
152 } 152 }
153 early_param("smt-enabled", early_smt_enabled); 153 early_param("smt-enabled", early_smt_enabled);
154 154
155 #else 155 #else
156 #define check_smt_enabled() 156 #define check_smt_enabled()
157 #endif /* CONFIG_SMP */ 157 #endif /* CONFIG_SMP */
158 158
159 /** Fix up paca fields required for the boot cpu */ 159 /** Fix up paca fields required for the boot cpu */
160 static void fixup_boot_paca(void) 160 static void fixup_boot_paca(void)
161 { 161 {
162 /* The boot cpu is started */ 162 /* The boot cpu is started */
163 get_paca()->cpu_start = 1; 163 get_paca()->cpu_start = 1;
164 /* Allow percpu accesses to work until we setup percpu data */ 164 /* Allow percpu accesses to work until we setup percpu data */
165 get_paca()->data_offset = 0; 165 get_paca()->data_offset = 0;
166 } 166 }
167 167
168 /* 168 /*
169 * Early initialization entry point. This is called by head.S 169 * Early initialization entry point. This is called by head.S
170 * with MMU translation disabled. We rely on the "feature" of 170 * with MMU translation disabled. We rely on the "feature" of
171 * the CPU that ignores the top 2 bits of the address in real 171 * the CPU that ignores the top 2 bits of the address in real
172 * mode so we can access kernel globals normally provided we 172 * mode so we can access kernel globals normally provided we
173 * only toy with things in the RMO region. From here, we do 173 * only toy with things in the RMO region. From here, we do
174 * some early parsing of the device-tree to setup out MEMBLOCK 174 * some early parsing of the device-tree to setup out MEMBLOCK
175 * data structures, and allocate & initialize the hash table 175 * data structures, and allocate & initialize the hash table
176 * and segment tables so we can start running with translation 176 * and segment tables so we can start running with translation
177 * enabled. 177 * enabled.
178 * 178 *
179 * It is this function which will call the probe() callback of 179 * It is this function which will call the probe() callback of
180 * the various platform types and copy the matching one to the 180 * the various platform types and copy the matching one to the
181 * global ppc_md structure. Your platform can eventually do 181 * global ppc_md structure. Your platform can eventually do
182 * some very early initializations from the probe() routine, but 182 * some very early initializations from the probe() routine, but
183 * this is not recommended, be very careful as, for example, the 183 * this is not recommended, be very careful as, for example, the
184 * device-tree is not accessible via normal means at this point. 184 * device-tree is not accessible via normal means at this point.
185 */ 185 */
186 186
187 void __init early_setup(unsigned long dt_ptr) 187 void __init early_setup(unsigned long dt_ptr)
188 { 188 {
189 static __initdata struct paca_struct boot_paca; 189 static __initdata struct paca_struct boot_paca;
190 190
191 /* -------- printk is _NOT_ safe to use here ! ------- */ 191 /* -------- printk is _NOT_ safe to use here ! ------- */
192 192
193 /* Identify CPU type */ 193 /* Identify CPU type */
194 identify_cpu(0, mfspr(SPRN_PVR)); 194 identify_cpu(0, mfspr(SPRN_PVR));
195 195
196 /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ 196 /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
197 initialise_paca(&boot_paca, 0); 197 initialise_paca(&boot_paca, 0);
198 setup_paca(&boot_paca); 198 setup_paca(&boot_paca);
199 fixup_boot_paca(); 199 fixup_boot_paca();
200 200
201 /* Initialize lockdep early or else spinlocks will blow */ 201 /* Initialize lockdep early or else spinlocks will blow */
202 lockdep_init(); 202 lockdep_init();
203 203
204 /* -------- printk is now safe to use ------- */ 204 /* -------- printk is now safe to use ------- */
205 205
206 /* Enable early debugging if any specified (see udbg.h) */ 206 /* Enable early debugging if any specified (see udbg.h) */
207 udbg_early_init(); 207 udbg_early_init();
208 208
209 DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); 209 DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
210 210
211 /* 211 /*
212 * Do early initialization using the flattened device 212 * Do early initialization using the flattened device
213 * tree, such as retrieving the physical memory map or 213 * tree, such as retrieving the physical memory map or
214 * calculating/retrieving the hash table size. 214 * calculating/retrieving the hash table size.
215 */ 215 */
216 early_init_devtree(__va(dt_ptr)); 216 early_init_devtree(__va(dt_ptr));
217 217
218 /* Now we know the logical id of our boot cpu, setup the paca. */ 218 /* Now we know the logical id of our boot cpu, setup the paca. */
219 setup_paca(&paca[boot_cpuid]); 219 setup_paca(&paca[boot_cpuid]);
220 fixup_boot_paca(); 220 fixup_boot_paca();
221 221
222 /* Probe the machine type */ 222 /* Probe the machine type */
223 probe_machine(); 223 probe_machine();
224 224
225 setup_kdump_trampoline(); 225 setup_kdump_trampoline();
226 226
227 DBG("Found, Initializing memory management...\n"); 227 DBG("Found, Initializing memory management...\n");
228 228
229 /* Initialize the hash table or TLB handling */ 229 /* Initialize the hash table or TLB handling */
230 early_init_mmu(); 230 early_init_mmu();
231 231
232 /* 232 /*
233 * Reserve any gigantic pages requested on the command line. 233 * Reserve any gigantic pages requested on the command line.
234 * memblock needs to have been initialized by the time this is 234 * memblock needs to have been initialized by the time this is
235 * called since this will reserve memory. 235 * called since this will reserve memory.
236 */ 236 */
237 reserve_hugetlb_gpages(); 237 reserve_hugetlb_gpages();
238 238
239 DBG(" <- early_setup()\n"); 239 DBG(" <- early_setup()\n");
240 } 240 }
241 241
242 #ifdef CONFIG_SMP 242 #ifdef CONFIG_SMP
243 void early_setup_secondary(void) 243 void early_setup_secondary(void)
244 { 244 {
245 /* Mark interrupts enabled in PACA */ 245 /* Mark interrupts enabled in PACA */
246 get_paca()->soft_enabled = 0; 246 get_paca()->soft_enabled = 0;
247 247
248 /* Initialize the hash table or TLB handling */ 248 /* Initialize the hash table or TLB handling */
249 early_init_mmu_secondary(); 249 early_init_mmu_secondary();
250 } 250 }
251 251
252 #endif /* CONFIG_SMP */ 252 #endif /* CONFIG_SMP */
253 253
254 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) 254 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
255 void smp_release_cpus(void) 255 void smp_release_cpus(void)
256 { 256 {
257 unsigned long *ptr; 257 unsigned long *ptr;
258 int i; 258 int i;
259 259
260 DBG(" -> smp_release_cpus()\n"); 260 DBG(" -> smp_release_cpus()\n");
261 261
262 /* All secondary cpus are spinning on a common spinloop, release them 262 /* All secondary cpus are spinning on a common spinloop, release them
263 * all now so they can start to spin on their individual paca 263 * all now so they can start to spin on their individual paca
264 * spinloops. For non SMP kernels, the secondary cpus never get out 264 * spinloops. For non SMP kernels, the secondary cpus never get out
265 * of the common spinloop. 265 * of the common spinloop.
266 */ 266 */
267 267
268 ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop 268 ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
269 - PHYSICAL_START); 269 - PHYSICAL_START);
270 *ptr = __pa(generic_secondary_smp_init); 270 *ptr = __pa(generic_secondary_smp_init);
271 271
272 /* And wait a bit for them to catch up */ 272 /* And wait a bit for them to catch up */
273 for (i = 0; i < 100000; i++) { 273 for (i = 0; i < 100000; i++) {
274 mb(); 274 mb();
275 HMT_low(); 275 HMT_low();
276 if (spinning_secondaries == 0) 276 if (spinning_secondaries == 0)
277 break; 277 break;
278 udelay(1); 278 udelay(1);
279 } 279 }
280 DBG("spinning_secondaries = %d\n", spinning_secondaries); 280 DBG("spinning_secondaries = %d\n", spinning_secondaries);
281 281
282 DBG(" <- smp_release_cpus()\n"); 282 DBG(" <- smp_release_cpus()\n");
283 } 283 }
284 #endif /* CONFIG_SMP || CONFIG_KEXEC */ 284 #endif /* CONFIG_SMP || CONFIG_KEXEC */
285 285
286 /* 286 /*
287 * Initialize some remaining members of the ppc64_caches and systemcfg 287 * Initialize some remaining members of the ppc64_caches and systemcfg
288 * structures 288 * structures
289 * (at least until we get rid of them completely). This is mostly some 289 * (at least until we get rid of them completely). This is mostly some
290 * cache informations about the CPU that will be used by cache flush 290 * cache informations about the CPU that will be used by cache flush
291 * routines and/or provided to userland 291 * routines and/or provided to userland
292 */ 292 */
293 static void __init initialize_cache_info(void) 293 static void __init initialize_cache_info(void)
294 { 294 {
295 struct device_node *np; 295 struct device_node *np;
296 unsigned long num_cpus = 0; 296 unsigned long num_cpus = 0;
297 297
298 DBG(" -> initialize_cache_info()\n"); 298 DBG(" -> initialize_cache_info()\n");
299 299
300 for_each_node_by_type(np, "cpu") { 300 for_each_node_by_type(np, "cpu") {
301 num_cpus += 1; 301 num_cpus += 1;
302 302
303 /* 303 /*
304 * We're assuming *all* of the CPUs have the same 304 * We're assuming *all* of the CPUs have the same
305 * d-cache and i-cache sizes... -Peter 305 * d-cache and i-cache sizes... -Peter
306 */ 306 */
307 if (num_cpus == 1) { 307 if (num_cpus == 1) {
308 const u32 *sizep, *lsizep; 308 const u32 *sizep, *lsizep;
309 u32 size, lsize; 309 u32 size, lsize;
310 310
311 size = 0; 311 size = 0;
312 lsize = cur_cpu_spec->dcache_bsize; 312 lsize = cur_cpu_spec->dcache_bsize;
313 sizep = of_get_property(np, "d-cache-size", NULL); 313 sizep = of_get_property(np, "d-cache-size", NULL);
314 if (sizep != NULL) 314 if (sizep != NULL)
315 size = *sizep; 315 size = *sizep;
316 lsizep = of_get_property(np, "d-cache-block-size", 316 lsizep = of_get_property(np, "d-cache-block-size",
317 NULL); 317 NULL);
318 /* fallback if block size missing */ 318 /* fallback if block size missing */
319 if (lsizep == NULL) 319 if (lsizep == NULL)
320 lsizep = of_get_property(np, 320 lsizep = of_get_property(np,
321 "d-cache-line-size", 321 "d-cache-line-size",
322 NULL); 322 NULL);
323 if (lsizep != NULL) 323 if (lsizep != NULL)
324 lsize = *lsizep; 324 lsize = *lsizep;
325 if (sizep == 0 || lsizep == 0) 325 if (sizep == 0 || lsizep == 0)
326 DBG("Argh, can't find dcache properties ! " 326 DBG("Argh, can't find dcache properties ! "
327 "sizep: %p, lsizep: %p\n", sizep, lsizep); 327 "sizep: %p, lsizep: %p\n", sizep, lsizep);
328 328
329 ppc64_caches.dsize = size; 329 ppc64_caches.dsize = size;
330 ppc64_caches.dline_size = lsize; 330 ppc64_caches.dline_size = lsize;
331 ppc64_caches.log_dline_size = __ilog2(lsize); 331 ppc64_caches.log_dline_size = __ilog2(lsize);
332 ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; 332 ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
333 333
334 size = 0; 334 size = 0;
335 lsize = cur_cpu_spec->icache_bsize; 335 lsize = cur_cpu_spec->icache_bsize;
336 sizep = of_get_property(np, "i-cache-size", NULL); 336 sizep = of_get_property(np, "i-cache-size", NULL);
337 if (sizep != NULL) 337 if (sizep != NULL)
338 size = *sizep; 338 size = *sizep;
339 lsizep = of_get_property(np, "i-cache-block-size", 339 lsizep = of_get_property(np, "i-cache-block-size",
340 NULL); 340 NULL);
341 if (lsizep == NULL) 341 if (lsizep == NULL)
342 lsizep = of_get_property(np, 342 lsizep = of_get_property(np,
343 "i-cache-line-size", 343 "i-cache-line-size",
344 NULL); 344 NULL);
345 if (lsizep != NULL) 345 if (lsizep != NULL)
346 lsize = *lsizep; 346 lsize = *lsizep;
347 if (sizep == 0 || lsizep == 0) 347 if (sizep == 0 || lsizep == 0)
348 DBG("Argh, can't find icache properties ! " 348 DBG("Argh, can't find icache properties ! "
349 "sizep: %p, lsizep: %p\n", sizep, lsizep); 349 "sizep: %p, lsizep: %p\n", sizep, lsizep);
350 350
351 ppc64_caches.isize = size; 351 ppc64_caches.isize = size;
352 ppc64_caches.iline_size = lsize; 352 ppc64_caches.iline_size = lsize;
353 ppc64_caches.log_iline_size = __ilog2(lsize); 353 ppc64_caches.log_iline_size = __ilog2(lsize);
354 ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; 354 ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
355 } 355 }
356 } 356 }
357 357
358 DBG(" <- initialize_cache_info()\n"); 358 DBG(" <- initialize_cache_info()\n");
359 } 359 }
360 360
361 361
362 /* 362 /*
363 * Do some initial setup of the system. The parameters are those which 363 * Do some initial setup of the system. The parameters are those which
364 * were passed in from the bootloader. 364 * were passed in from the bootloader.
365 */ 365 */
366 void __init setup_system(void) 366 void __init setup_system(void)
367 { 367 {
368 DBG(" -> setup_system()\n"); 368 DBG(" -> setup_system()\n");
369 369
370 /* Apply the CPUs-specific and firmware specific fixups to kernel 370 /* Apply the CPUs-specific and firmware specific fixups to kernel
371 * text (nop out sections not relevant to this CPU or this firmware) 371 * text (nop out sections not relevant to this CPU or this firmware)
372 */ 372 */
373 do_feature_fixups(cur_cpu_spec->cpu_features, 373 do_feature_fixups(cur_cpu_spec->cpu_features,
374 &__start___ftr_fixup, &__stop___ftr_fixup); 374 &__start___ftr_fixup, &__stop___ftr_fixup);
375 do_feature_fixups(cur_cpu_spec->mmu_features, 375 do_feature_fixups(cur_cpu_spec->mmu_features,
376 &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup); 376 &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup);
377 do_feature_fixups(powerpc_firmware_features, 377 do_feature_fixups(powerpc_firmware_features,
378 &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); 378 &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
379 do_lwsync_fixups(cur_cpu_spec->cpu_features, 379 do_lwsync_fixups(cur_cpu_spec->cpu_features,
380 &__start___lwsync_fixup, &__stop___lwsync_fixup); 380 &__start___lwsync_fixup, &__stop___lwsync_fixup);
381 do_final_fixups(); 381 do_final_fixups();
382 382
383 /* 383 /*
384 * Unflatten the device-tree passed by prom_init or kexec 384 * Unflatten the device-tree passed by prom_init or kexec
385 */ 385 */
386 unflatten_device_tree(); 386 unflatten_device_tree();
387 387
388 /* 388 /*
389 * Fill the ppc64_caches & systemcfg structures with informations 389 * Fill the ppc64_caches & systemcfg structures with informations
390 * retrieved from the device-tree. 390 * retrieved from the device-tree.
391 */ 391 */
392 initialize_cache_info(); 392 initialize_cache_info();
393 393
394 #ifdef CONFIG_PPC_RTAS 394 #ifdef CONFIG_PPC_RTAS
395 /* 395 /*
396 * Initialize RTAS if available 396 * Initialize RTAS if available
397 */ 397 */
398 rtas_initialize(); 398 rtas_initialize();
399 #endif /* CONFIG_PPC_RTAS */ 399 #endif /* CONFIG_PPC_RTAS */
400 400
401 /* 401 /*
402 * Check if we have an initrd provided via the device-tree 402 * Check if we have an initrd provided via the device-tree
403 */ 403 */
404 check_for_initrd(); 404 check_for_initrd();
405 405
406 /* 406 /*
407 * Do some platform specific early initializations, that includes 407 * Do some platform specific early initializations, that includes
408 * setting up the hash table pointers. It also sets up some interrupt-mapping 408 * setting up the hash table pointers. It also sets up some interrupt-mapping
409 * related options that will be used by finish_device_tree() 409 * related options that will be used by finish_device_tree()
410 */ 410 */
411 if (ppc_md.init_early) 411 if (ppc_md.init_early)
412 ppc_md.init_early(); 412 ppc_md.init_early();
413 413
414 /* 414 /*
415 * We can discover serial ports now since the above did setup the 415 * We can discover serial ports now since the above did setup the
416 * hash table management for us, thus ioremap works. We do that early 416 * hash table management for us, thus ioremap works. We do that early
417 * so that further code can be debugged 417 * so that further code can be debugged
418 */ 418 */
419 find_legacy_serial_ports(); 419 find_legacy_serial_ports();
420 420
421 /* 421 /*
422 * Register early console 422 * Register early console
423 */ 423 */
424 register_early_udbg_console(); 424 register_early_udbg_console();
425 425
426 /* 426 /*
427 * Initialize xmon 427 * Initialize xmon
428 */ 428 */
429 xmon_setup(); 429 xmon_setup();
430 430
431 smp_setup_cpu_maps(); 431 smp_setup_cpu_maps();
432 check_smt_enabled(); 432 check_smt_enabled();
433 433
434 #ifdef CONFIG_SMP 434 #ifdef CONFIG_SMP
435 /* Release secondary cpus out of their spinloops at 0x60 now that 435 /* Release secondary cpus out of their spinloops at 0x60 now that
436 * we can map physical -> logical CPU ids 436 * we can map physical -> logical CPU ids
437 */ 437 */
438 smp_release_cpus(); 438 smp_release_cpus();
439 #endif 439 #endif
440 440
441 printk("Starting Linux PPC64 %s\n", init_utsname()->version); 441 printk("Starting Linux PPC64 %s\n", init_utsname()->version);
442 442
443 printk("-----------------------------------------------------\n"); 443 printk("-----------------------------------------------------\n");
444 printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); 444 printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
445 printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); 445 printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size());
446 if (ppc64_caches.dline_size != 0x80) 446 if (ppc64_caches.dline_size != 0x80)
447 printk("ppc64_caches.dcache_line_size = 0x%x\n", 447 printk("ppc64_caches.dcache_line_size = 0x%x\n",
448 ppc64_caches.dline_size); 448 ppc64_caches.dline_size);
449 if (ppc64_caches.iline_size != 0x80) 449 if (ppc64_caches.iline_size != 0x80)
450 printk("ppc64_caches.icache_line_size = 0x%x\n", 450 printk("ppc64_caches.icache_line_size = 0x%x\n",
451 ppc64_caches.iline_size); 451 ppc64_caches.iline_size);
452 #ifdef CONFIG_PPC_STD_MMU_64 452 #ifdef CONFIG_PPC_STD_MMU_64
453 if (htab_address) 453 if (htab_address)
454 printk("htab_address = 0x%p\n", htab_address); 454 printk("htab_address = 0x%p\n", htab_address);
455 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); 455 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
456 #endif /* CONFIG_PPC_STD_MMU_64 */ 456 #endif /* CONFIG_PPC_STD_MMU_64 */
457 if (PHYSICAL_START > 0) 457 if (PHYSICAL_START > 0)
458 printk("physical_start = 0x%llx\n", 458 printk("physical_start = 0x%llx\n",
459 (unsigned long long)PHYSICAL_START); 459 (unsigned long long)PHYSICAL_START);
460 printk("-----------------------------------------------------\n"); 460 printk("-----------------------------------------------------\n");
461 461
462 DBG(" <- setup_system()\n"); 462 DBG(" <- setup_system()\n");
463 } 463 }
464 464
465 /* This returns the limit below which memory accesses to the linear 465 /* This returns the limit below which memory accesses to the linear
466 * mapping are guarnateed not to cause a TLB or SLB miss. This is 466 * mapping are guarnateed not to cause a TLB or SLB miss. This is
467 * used to allocate interrupt or emergency stacks for which our 467 * used to allocate interrupt or emergency stacks for which our
468 * exception entry path doesn't deal with being interrupted. 468 * exception entry path doesn't deal with being interrupted.
469 */ 469 */
470 static u64 safe_stack_limit(void) 470 static u64 safe_stack_limit(void)
471 { 471 {
472 #ifdef CONFIG_PPC_BOOK3E 472 #ifdef CONFIG_PPC_BOOK3E
473 /* Freescale BookE bolts the entire linear mapping */ 473 /* Freescale BookE bolts the entire linear mapping */
474 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) 474 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
475 return linear_map_top; 475 return linear_map_top;
476 /* Other BookE, we assume the first GB is bolted */ 476 /* Other BookE, we assume the first GB is bolted */
477 return 1ul << 30; 477 return 1ul << 30;
478 #else 478 #else
479 /* BookS, the first segment is bolted */ 479 /* BookS, the first segment is bolted */
480 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 480 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
481 return 1UL << SID_SHIFT_1T; 481 return 1UL << SID_SHIFT_1T;
482 return 1UL << SID_SHIFT; 482 return 1UL << SID_SHIFT;
483 #endif 483 #endif
484 } 484 }
485 485
486 static void __init irqstack_early_init(void) 486 static void __init irqstack_early_init(void)
487 { 487 {
488 u64 limit = safe_stack_limit(); 488 u64 limit = safe_stack_limit();
489 unsigned int i; 489 unsigned int i;
490 490
491 /* 491 /*
492 * Interrupt stacks must be in the first segment since we 492 * Interrupt stacks must be in the first segment since we
493 * cannot afford to take SLB misses on them. 493 * cannot afford to take SLB misses on them.
494 */ 494 */
495 for_each_possible_cpu(i) { 495 for_each_possible_cpu(i) {
496 softirq_ctx[i] = (struct thread_info *) 496 softirq_ctx[i] = (struct thread_info *)
497 __va(memblock_alloc_base(THREAD_SIZE, 497 __va(memblock_alloc_base(THREAD_SIZE,
498 THREAD_SIZE, limit)); 498 THREAD_SIZE, limit));
499 hardirq_ctx[i] = (struct thread_info *) 499 hardirq_ctx[i] = (struct thread_info *)
500 __va(memblock_alloc_base(THREAD_SIZE, 500 __va(memblock_alloc_base(THREAD_SIZE,
501 THREAD_SIZE, limit)); 501 THREAD_SIZE, limit));
502 } 502 }
503 } 503 }
504 504
505 #ifdef CONFIG_PPC_BOOK3E 505 #ifdef CONFIG_PPC_BOOK3E
506 static void __init exc_lvl_early_init(void) 506 static void __init exc_lvl_early_init(void)
507 { 507 {
508 extern unsigned int interrupt_base_book3e; 508 extern unsigned int interrupt_base_book3e;
509 extern unsigned int exc_debug_debug_book3e; 509 extern unsigned int exc_debug_debug_book3e;
510 510
511 unsigned int i; 511 unsigned int i;
512 512
513 for_each_possible_cpu(i) { 513 for_each_possible_cpu(i) {
514 critirq_ctx[i] = (struct thread_info *) 514 critirq_ctx[i] = (struct thread_info *)
515 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 515 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
516 dbgirq_ctx[i] = (struct thread_info *) 516 dbgirq_ctx[i] = (struct thread_info *)
517 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 517 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
518 mcheckirq_ctx[i] = (struct thread_info *) 518 mcheckirq_ctx[i] = (struct thread_info *)
519 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 519 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
520 } 520 }
521 521
522 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) 522 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
523 patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, 523 patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
524 (unsigned long)&exc_debug_debug_book3e, 0); 524 (unsigned long)&exc_debug_debug_book3e, 0);
525 } 525 }
526 #else 526 #else
527 #define exc_lvl_early_init() 527 #define exc_lvl_early_init()
528 #endif 528 #endif
529 529
530 /* 530 /*
531 * Stack space used when we detect a bad kernel stack pointer, and 531 * Stack space used when we detect a bad kernel stack pointer, and
532 * early in SMP boots before relocation is enabled. 532 * early in SMP boots before relocation is enabled.
533 */ 533 */
534 static void __init emergency_stack_init(void) 534 static void __init emergency_stack_init(void)
535 { 535 {
536 u64 limit; 536 u64 limit;
537 unsigned int i; 537 unsigned int i;
538 538
539 /* 539 /*
540 * Emergency stacks must be under 256MB, we cannot afford to take 540 * Emergency stacks must be under 256MB, we cannot afford to take
541 * SLB misses on them. The ABI also requires them to be 128-byte 541 * SLB misses on them. The ABI also requires them to be 128-byte
542 * aligned. 542 * aligned.
543 * 543 *
544 * Since we use these as temporary stacks during secondary CPU 544 * Since we use these as temporary stacks during secondary CPU
545 * bringup, we need to get at them in real mode. This means they 545 * bringup, we need to get at them in real mode. This means they
546 * must also be within the RMO region. 546 * must also be within the RMO region.
547 */ 547 */
548 limit = min(safe_stack_limit(), ppc64_rma_size); 548 limit = min(safe_stack_limit(), ppc64_rma_size);
549 549
550 for_each_possible_cpu(i) { 550 for_each_possible_cpu(i) {
551 unsigned long sp; 551 unsigned long sp;
552 sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); 552 sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
553 sp += THREAD_SIZE; 553 sp += THREAD_SIZE;
554 paca[i].emergency_sp = __va(sp); 554 paca[i].emergency_sp = __va(sp);
555 } 555 }
556 } 556 }
557 557
558 /* 558 /*
559 * Called into from start_kernel this initializes bootmem, which is used 559 * Called into from start_kernel this initializes bootmem, which is used
560 * to manage page allocation until mem_init is called. 560 * to manage page allocation until mem_init is called.
561 */ 561 */
562 void __init setup_arch(char **cmdline_p) 562 void __init setup_arch(char **cmdline_p)
563 { 563 {
564 ppc64_boot_msg(0x12, "Setup Arch"); 564 ppc64_boot_msg(0x12, "Setup Arch");
565 565
566 *cmdline_p = cmd_line; 566 *cmdline_p = cmd_line;
567 567
568 /* 568 /*
569 * Set cache line size based on type of cpu as a default. 569 * Set cache line size based on type of cpu as a default.
570 * Systems with OF can look in the properties on the cpu node(s) 570 * Systems with OF can look in the properties on the cpu node(s)
571 * for a possibly more accurate value. 571 * for a possibly more accurate value.
572 */ 572 */
573 dcache_bsize = ppc64_caches.dline_size; 573 dcache_bsize = ppc64_caches.dline_size;
574 icache_bsize = ppc64_caches.iline_size; 574 icache_bsize = ppc64_caches.iline_size;
575 575
576 /* reboot on panic */ 576 /* reboot on panic */
577 panic_timeout = 180; 577 panic_timeout = 180;
578 578
579 if (ppc_md.panic) 579 if (ppc_md.panic)
580 setup_panic(); 580 setup_panic();
581 581
582 init_mm.start_code = (unsigned long)_stext; 582 init_mm.start_code = (unsigned long)_stext;
583 init_mm.end_code = (unsigned long) _etext; 583 init_mm.end_code = (unsigned long) _etext;
584 init_mm.end_data = (unsigned long) _edata; 584 init_mm.end_data = (unsigned long) _edata;
585 init_mm.brk = klimit; 585 init_mm.brk = klimit;
586 586 #ifdef CONFIG_PPC_64K_PAGES
587 init_mm.context.pte_frag = NULL;
588 #endif
587 irqstack_early_init(); 589 irqstack_early_init();
588 exc_lvl_early_init(); 590 exc_lvl_early_init();
589 emergency_stack_init(); 591 emergency_stack_init();
590 592
591 #ifdef CONFIG_PPC_STD_MMU_64 593 #ifdef CONFIG_PPC_STD_MMU_64
592 stabs_alloc(); 594 stabs_alloc();
593 #endif 595 #endif
594 /* set up the bootmem stuff with available memory */ 596 /* set up the bootmem stuff with available memory */
595 do_init_bootmem(); 597 do_init_bootmem();
596 sparse_init(); 598 sparse_init();
597 599
598 #ifdef CONFIG_DUMMY_CONSOLE 600 #ifdef CONFIG_DUMMY_CONSOLE
599 conswitchp = &dummy_con; 601 conswitchp = &dummy_con;
600 #endif 602 #endif
601 603
602 if (ppc_md.setup_arch) 604 if (ppc_md.setup_arch)
603 ppc_md.setup_arch(); 605 ppc_md.setup_arch();
604 606
605 paging_init(); 607 paging_init();
606 608
607 /* Initialize the MMU context management stuff */ 609 /* Initialize the MMU context management stuff */
608 mmu_context_init(); 610 mmu_context_init();
609 611
610 kvm_linear_init(); 612 kvm_linear_init();
611 613
612 /* Interrupt code needs to be 64K-aligned */ 614 /* Interrupt code needs to be 64K-aligned */
613 if ((unsigned long)_stext & 0xffff) 615 if ((unsigned long)_stext & 0xffff)
614 panic("Kernelbase not 64K-aligned (0x%lx)!\n", 616 panic("Kernelbase not 64K-aligned (0x%lx)!\n",
615 (unsigned long)_stext); 617 (unsigned long)_stext);
616 618
617 ppc64_boot_msg(0x15, "Setup Done"); 619 ppc64_boot_msg(0x15, "Setup Done");
618 } 620 }
619 621
620 622
621 /* ToDo: do something useful if ppc_md is not yet setup. */ 623 /* ToDo: do something useful if ppc_md is not yet setup. */
622 #define PPC64_LINUX_FUNCTION 0x0f000000 624 #define PPC64_LINUX_FUNCTION 0x0f000000
623 #define PPC64_IPL_MESSAGE 0xc0000000 625 #define PPC64_IPL_MESSAGE 0xc0000000
624 #define PPC64_TERM_MESSAGE 0xb0000000 626 #define PPC64_TERM_MESSAGE 0xb0000000
625 627
626 static void ppc64_do_msg(unsigned int src, const char *msg) 628 static void ppc64_do_msg(unsigned int src, const char *msg)
627 { 629 {
628 if (ppc_md.progress) { 630 if (ppc_md.progress) {
629 char buf[128]; 631 char buf[128];
630 632
631 sprintf(buf, "%08X\n", src); 633 sprintf(buf, "%08X\n", src);
632 ppc_md.progress(buf, 0); 634 ppc_md.progress(buf, 0);
633 snprintf(buf, 128, "%s", msg); 635 snprintf(buf, 128, "%s", msg);
634 ppc_md.progress(buf, 0); 636 ppc_md.progress(buf, 0);
635 } 637 }
636 } 638 }
637 639
638 /* Print a boot progress message. */ 640 /* Print a boot progress message. */
639 void ppc64_boot_msg(unsigned int src, const char *msg) 641 void ppc64_boot_msg(unsigned int src, const char *msg)
640 { 642 {
641 ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); 643 ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg);
642 printk("[boot]%04x %s\n", src, msg); 644 printk("[boot]%04x %s\n", src, msg);
643 } 645 }
644 646
645 #ifdef CONFIG_SMP 647 #ifdef CONFIG_SMP
646 #define PCPU_DYN_SIZE () 648 #define PCPU_DYN_SIZE ()
647 649
648 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) 650 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
649 { 651 {
650 return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, 652 return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
651 __pa(MAX_DMA_ADDRESS)); 653 __pa(MAX_DMA_ADDRESS));
652 } 654 }
653 655
654 static void __init pcpu_fc_free(void *ptr, size_t size) 656 static void __init pcpu_fc_free(void *ptr, size_t size)
655 { 657 {
656 free_bootmem(__pa(ptr), size); 658 free_bootmem(__pa(ptr), size);
657 } 659 }
658 660
659 static int pcpu_cpu_distance(unsigned int from, unsigned int to) 661 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
660 { 662 {
661 if (cpu_to_node(from) == cpu_to_node(to)) 663 if (cpu_to_node(from) == cpu_to_node(to))
662 return LOCAL_DISTANCE; 664 return LOCAL_DISTANCE;
663 else 665 else
664 return REMOTE_DISTANCE; 666 return REMOTE_DISTANCE;
665 } 667 }
666 668
667 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 669 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
668 EXPORT_SYMBOL(__per_cpu_offset); 670 EXPORT_SYMBOL(__per_cpu_offset);
669 671
670 void __init setup_per_cpu_areas(void) 672 void __init setup_per_cpu_areas(void)
671 { 673 {
672 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 674 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
673 size_t atom_size; 675 size_t atom_size;
674 unsigned long delta; 676 unsigned long delta;
675 unsigned int cpu; 677 unsigned int cpu;
676 int rc; 678 int rc;
677 679
678 /* 680 /*
679 * Linear mapping is one of 4K, 1M and 16M. For 4K, no need 681 * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
680 * to group units. For larger mappings, use 1M atom which 682 * to group units. For larger mappings, use 1M atom which
681 * should be large enough to contain a number of units. 683 * should be large enough to contain a number of units.
682 */ 684 */
683 if (mmu_linear_psize == MMU_PAGE_4K) 685 if (mmu_linear_psize == MMU_PAGE_4K)
684 atom_size = PAGE_SIZE; 686 atom_size = PAGE_SIZE;
685 else 687 else
686 atom_size = 1 << 20; 688 atom_size = 1 << 20;
687 689
688 rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, 690 rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
689 pcpu_fc_alloc, pcpu_fc_free); 691 pcpu_fc_alloc, pcpu_fc_free);
690 if (rc < 0) 692 if (rc < 0)
691 panic("cannot initialize percpu area (err=%d)", rc); 693 panic("cannot initialize percpu area (err=%d)", rc);
692 694
693 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 695 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
694 for_each_possible_cpu(cpu) { 696 for_each_possible_cpu(cpu) {
695 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 697 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
696 paca[cpu].data_offset = __per_cpu_offset[cpu]; 698 paca[cpu].data_offset = __per_cpu_offset[cpu];
697 } 699 }
698 } 700 }
699 #endif 701 #endif
700 702
701 703
702 #ifdef CONFIG_PPC_INDIRECT_IO 704 #ifdef CONFIG_PPC_INDIRECT_IO
703 struct ppc_pci_io ppc_pci_io; 705 struct ppc_pci_io ppc_pci_io;
704 EXPORT_SYMBOL(ppc_pci_io); 706 EXPORT_SYMBOL(ppc_pci_io);
705 #endif /* CONFIG_PPC_INDIRECT_IO */ 707 #endif /* CONFIG_PPC_INDIRECT_IO */
706 708
707 709
arch/powerpc/mm/mmu_context_hash64.c
1 /* 1 /*
2 * MMU context allocation for 64-bit kernels. 2 * MMU context allocation for 64-bit kernels.
3 * 3 *
4 * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> 4 * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 */ 11 */
12 12
13 #include <linux/sched.h> 13 #include <linux/sched.h>
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/errno.h> 15 #include <linux/errno.h>
16 #include <linux/string.h> 16 #include <linux/string.h>
17 #include <linux/types.h> 17 #include <linux/types.h>
18 #include <linux/mm.h> 18 #include <linux/mm.h>
19 #include <linux/spinlock.h> 19 #include <linux/spinlock.h>
20 #include <linux/idr.h> 20 #include <linux/idr.h>
21 #include <linux/export.h> 21 #include <linux/export.h>
22 #include <linux/gfp.h> 22 #include <linux/gfp.h>
23 #include <linux/slab.h> 23 #include <linux/slab.h>
24 24
25 #include <asm/mmu_context.h> 25 #include <asm/mmu_context.h>
26 #include <asm/pgalloc.h>
26 27
27 #include "icswx.h" 28 #include "icswx.h"
28 29
29 static DEFINE_SPINLOCK(mmu_context_lock); 30 static DEFINE_SPINLOCK(mmu_context_lock);
30 static DEFINE_IDA(mmu_context_ida); 31 static DEFINE_IDA(mmu_context_ida);
31 32
32 int __init_new_context(void) 33 int __init_new_context(void)
33 { 34 {
34 int index; 35 int index;
35 int err; 36 int err;
36 37
37 again: 38 again:
38 if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL)) 39 if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
39 return -ENOMEM; 40 return -ENOMEM;
40 41
41 spin_lock(&mmu_context_lock); 42 spin_lock(&mmu_context_lock);
42 err = ida_get_new_above(&mmu_context_ida, 1, &index); 43 err = ida_get_new_above(&mmu_context_ida, 1, &index);
43 spin_unlock(&mmu_context_lock); 44 spin_unlock(&mmu_context_lock);
44 45
45 if (err == -EAGAIN) 46 if (err == -EAGAIN)
46 goto again; 47 goto again;
47 else if (err) 48 else if (err)
48 return err; 49 return err;
49 50
50 if (index > MAX_USER_CONTEXT) { 51 if (index > MAX_USER_CONTEXT) {
51 spin_lock(&mmu_context_lock); 52 spin_lock(&mmu_context_lock);
52 ida_remove(&mmu_context_ida, index); 53 ida_remove(&mmu_context_ida, index);
53 spin_unlock(&mmu_context_lock); 54 spin_unlock(&mmu_context_lock);
54 return -ENOMEM; 55 return -ENOMEM;
55 } 56 }
56 57
57 return index; 58 return index;
58 } 59 }
59 EXPORT_SYMBOL_GPL(__init_new_context); 60 EXPORT_SYMBOL_GPL(__init_new_context);
60 61
61 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 62 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
62 { 63 {
63 int index; 64 int index;
64 65
65 index = __init_new_context(); 66 index = __init_new_context();
66 if (index < 0) 67 if (index < 0)
67 return index; 68 return index;
68 69
69 /* The old code would re-promote on fork, we don't do that 70 /* The old code would re-promote on fork, we don't do that
70 * when using slices as it could cause problem promoting slices 71 * when using slices as it could cause problem promoting slices
71 * that have been forced down to 4K 72 * that have been forced down to 4K
72 */ 73 */
73 if (slice_mm_new_context(mm)) 74 if (slice_mm_new_context(mm))
74 slice_set_user_psize(mm, mmu_virtual_psize); 75 slice_set_user_psize(mm, mmu_virtual_psize);
75 subpage_prot_init_new_context(mm); 76 subpage_prot_init_new_context(mm);
76 mm->context.id = index; 77 mm->context.id = index;
77 #ifdef CONFIG_PPC_ICSWX 78 #ifdef CONFIG_PPC_ICSWX
78 mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); 79 mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
79 if (!mm->context.cop_lockp) { 80 if (!mm->context.cop_lockp) {
80 __destroy_context(index); 81 __destroy_context(index);
81 subpage_prot_free(mm); 82 subpage_prot_free(mm);
82 mm->context.id = MMU_NO_CONTEXT; 83 mm->context.id = MMU_NO_CONTEXT;
83 return -ENOMEM; 84 return -ENOMEM;
84 } 85 }
85 spin_lock_init(mm->context.cop_lockp); 86 spin_lock_init(mm->context.cop_lockp);
86 #endif /* CONFIG_PPC_ICSWX */ 87 #endif /* CONFIG_PPC_ICSWX */
87 88
89 #ifdef CONFIG_PPC_64K_PAGES
90 mm->context.pte_frag = NULL;
91 #endif
88 return 0; 92 return 0;
89 } 93 }
90 94
91 void __destroy_context(int context_id) 95 void __destroy_context(int context_id)
92 { 96 {
93 spin_lock(&mmu_context_lock); 97 spin_lock(&mmu_context_lock);
94 ida_remove(&mmu_context_ida, context_id); 98 ida_remove(&mmu_context_ida, context_id);
95 spin_unlock(&mmu_context_lock); 99 spin_unlock(&mmu_context_lock);
96 } 100 }
97 EXPORT_SYMBOL_GPL(__destroy_context); 101 EXPORT_SYMBOL_GPL(__destroy_context);
98 102
103 #ifdef CONFIG_PPC_64K_PAGES
104 static void destroy_pagetable_page(struct mm_struct *mm)
105 {
106 int count;
107 void *pte_frag;
108 struct page *page;
109
110 pte_frag = mm->context.pte_frag;
111 if (!pte_frag)
112 return;
113
114 page = virt_to_page(pte_frag);
115 /* drop all the pending references */
116 count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
117 /* We allow PTE_FRAG_NR fragments from a PTE page */
118 count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
119 if (!count) {
120 pgtable_page_dtor(page);
121 free_hot_cold_page(page, 0);
122 }
123 }
124
125 #else
126 static inline void destroy_pagetable_page(struct mm_struct *mm)
127 {
128 return;
129 }
130 #endif
131
132
99 void destroy_context(struct mm_struct *mm) 133 void destroy_context(struct mm_struct *mm)
100 { 134 {
135
101 #ifdef CONFIG_PPC_ICSWX 136 #ifdef CONFIG_PPC_ICSWX
102 drop_cop(mm->context.acop, mm); 137 drop_cop(mm->context.acop, mm);
103 kfree(mm->context.cop_lockp); 138 kfree(mm->context.cop_lockp);
104 mm->context.cop_lockp = NULL; 139 mm->context.cop_lockp = NULL;
105 #endif /* CONFIG_PPC_ICSWX */ 140 #endif /* CONFIG_PPC_ICSWX */
141
142 destroy_pagetable_page(mm);
106 __destroy_context(mm->context.id); 143 __destroy_context(mm->context.id);
107 subpage_prot_free(mm); 144 subpage_prot_free(mm);
108 mm->context.id = MMU_NO_CONTEXT; 145 mm->context.id = MMU_NO_CONTEXT;
109 } 146 }
110 147
arch/powerpc/mm/pgtable_64.c
1 /* 1 /*
2 * This file contains ioremap and related functions for 64-bit machines. 2 * This file contains ioremap and related functions for 64-bit machines.
3 * 3 *
4 * Derived from arch/ppc64/mm/init.c 4 * Derived from arch/ppc64/mm/init.c
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 * 6 *
7 * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) 7 * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
8 * and Cort Dougan (PReP) (cort@cs.nmt.edu) 8 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
9 * Copyright (C) 1996 Paul Mackerras 9 * Copyright (C) 1996 Paul Mackerras
10 * 10 *
11 * Derived from "arch/i386/mm/init.c" 11 * Derived from "arch/i386/mm/init.c"
12 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 12 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
13 * 13 *
14 * Dave Engebretsen <engebret@us.ibm.com> 14 * Dave Engebretsen <engebret@us.ibm.com>
15 * Rework for PPC64 port. 15 * Rework for PPC64 port.
16 * 16 *
17 * This program is free software; you can redistribute it and/or 17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License 18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version 19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version. 20 * 2 of the License, or (at your option) any later version.
21 * 21 *
22 */ 22 */
23 23
24 #include <linux/signal.h> 24 #include <linux/signal.h>
25 #include <linux/sched.h> 25 #include <linux/sched.h>
26 #include <linux/kernel.h> 26 #include <linux/kernel.h>
27 #include <linux/errno.h> 27 #include <linux/errno.h>
28 #include <linux/string.h> 28 #include <linux/string.h>
29 #include <linux/export.h> 29 #include <linux/export.h>
30 #include <linux/types.h> 30 #include <linux/types.h>
31 #include <linux/mman.h> 31 #include <linux/mman.h>
32 #include <linux/mm.h> 32 #include <linux/mm.h>
33 #include <linux/swap.h> 33 #include <linux/swap.h>
34 #include <linux/stddef.h> 34 #include <linux/stddef.h>
35 #include <linux/vmalloc.h> 35 #include <linux/vmalloc.h>
36 #include <linux/init.h> 36 #include <linux/init.h>
37 #include <linux/bootmem.h> 37 #include <linux/bootmem.h>
38 #include <linux/memblock.h> 38 #include <linux/memblock.h>
39 #include <linux/slab.h> 39 #include <linux/slab.h>
40 40
41 #include <asm/pgalloc.h> 41 #include <asm/pgalloc.h>
42 #include <asm/page.h> 42 #include <asm/page.h>
43 #include <asm/prom.h> 43 #include <asm/prom.h>
44 #include <asm/io.h> 44 #include <asm/io.h>
45 #include <asm/mmu_context.h> 45 #include <asm/mmu_context.h>
46 #include <asm/pgtable.h> 46 #include <asm/pgtable.h>
47 #include <asm/mmu.h> 47 #include <asm/mmu.h>
48 #include <asm/smp.h> 48 #include <asm/smp.h>
49 #include <asm/machdep.h> 49 #include <asm/machdep.h>
50 #include <asm/tlb.h> 50 #include <asm/tlb.h>
51 #include <asm/processor.h> 51 #include <asm/processor.h>
52 #include <asm/cputable.h> 52 #include <asm/cputable.h>
53 #include <asm/sections.h> 53 #include <asm/sections.h>
54 #include <asm/firmware.h> 54 #include <asm/firmware.h>
55 55
56 #include "mmu_decl.h" 56 #include "mmu_decl.h"
57 57
58 /* Some sanity checking */ 58 /* Some sanity checking */
59 #if TASK_SIZE_USER64 > PGTABLE_RANGE 59 #if TASK_SIZE_USER64 > PGTABLE_RANGE
60 #error TASK_SIZE_USER64 exceeds pagetable range 60 #error TASK_SIZE_USER64 exceeds pagetable range
61 #endif 61 #endif
62 62
63 #ifdef CONFIG_PPC_STD_MMU_64 63 #ifdef CONFIG_PPC_STD_MMU_64
64 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) 64 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
65 #error TASK_SIZE_USER64 exceeds user VSID range 65 #error TASK_SIZE_USER64 exceeds user VSID range
66 #endif 66 #endif
67 #endif 67 #endif
68 68
69 unsigned long ioremap_bot = IOREMAP_BASE; 69 unsigned long ioremap_bot = IOREMAP_BASE;
70 70
71 #ifdef CONFIG_PPC_MMU_NOHASH 71 #ifdef CONFIG_PPC_MMU_NOHASH
72 static void *early_alloc_pgtable(unsigned long size) 72 static void *early_alloc_pgtable(unsigned long size)
73 { 73 {
74 void *pt; 74 void *pt;
75 75
76 if (init_bootmem_done) 76 if (init_bootmem_done)
77 pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); 77 pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
78 else 78 else
79 pt = __va(memblock_alloc_base(size, size, 79 pt = __va(memblock_alloc_base(size, size,
80 __pa(MAX_DMA_ADDRESS))); 80 __pa(MAX_DMA_ADDRESS)));
81 memset(pt, 0, size); 81 memset(pt, 0, size);
82 82
83 return pt; 83 return pt;
84 } 84 }
85 #endif /* CONFIG_PPC_MMU_NOHASH */ 85 #endif /* CONFIG_PPC_MMU_NOHASH */
86 86
87 /* 87 /*
88 * map_kernel_page currently only called by __ioremap 88 * map_kernel_page currently only called by __ioremap
89 * map_kernel_page adds an entry to the ioremap page table 89 * map_kernel_page adds an entry to the ioremap page table
90 * and adds an entry to the HPT, possibly bolting it 90 * and adds an entry to the HPT, possibly bolting it
91 */ 91 */
92 int map_kernel_page(unsigned long ea, unsigned long pa, int flags) 92 int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
93 { 93 {
94 pgd_t *pgdp; 94 pgd_t *pgdp;
95 pud_t *pudp; 95 pud_t *pudp;
96 pmd_t *pmdp; 96 pmd_t *pmdp;
97 pte_t *ptep; 97 pte_t *ptep;
98 98
99 if (slab_is_available()) { 99 if (slab_is_available()) {
100 pgdp = pgd_offset_k(ea); 100 pgdp = pgd_offset_k(ea);
101 pudp = pud_alloc(&init_mm, pgdp, ea); 101 pudp = pud_alloc(&init_mm, pgdp, ea);
102 if (!pudp) 102 if (!pudp)
103 return -ENOMEM; 103 return -ENOMEM;
104 pmdp = pmd_alloc(&init_mm, pudp, ea); 104 pmdp = pmd_alloc(&init_mm, pudp, ea);
105 if (!pmdp) 105 if (!pmdp)
106 return -ENOMEM; 106 return -ENOMEM;
107 ptep = pte_alloc_kernel(pmdp, ea); 107 ptep = pte_alloc_kernel(pmdp, ea);
108 if (!ptep) 108 if (!ptep)
109 return -ENOMEM; 109 return -ENOMEM;
110 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 110 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
111 __pgprot(flags))); 111 __pgprot(flags)));
112 } else { 112 } else {
113 #ifdef CONFIG_PPC_MMU_NOHASH 113 #ifdef CONFIG_PPC_MMU_NOHASH
114 /* Warning ! This will blow up if bootmem is not initialized 114 /* Warning ! This will blow up if bootmem is not initialized
115 * which our ppc64 code is keen to do that, we'll need to 115 * which our ppc64 code is keen to do that, we'll need to
116 * fix it and/or be more careful 116 * fix it and/or be more careful
117 */ 117 */
118 pgdp = pgd_offset_k(ea); 118 pgdp = pgd_offset_k(ea);
119 #ifdef PUD_TABLE_SIZE 119 #ifdef PUD_TABLE_SIZE
120 if (pgd_none(*pgdp)) { 120 if (pgd_none(*pgdp)) {
121 pudp = early_alloc_pgtable(PUD_TABLE_SIZE); 121 pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
122 BUG_ON(pudp == NULL); 122 BUG_ON(pudp == NULL);
123 pgd_populate(&init_mm, pgdp, pudp); 123 pgd_populate(&init_mm, pgdp, pudp);
124 } 124 }
125 #endif /* PUD_TABLE_SIZE */ 125 #endif /* PUD_TABLE_SIZE */
126 pudp = pud_offset(pgdp, ea); 126 pudp = pud_offset(pgdp, ea);
127 if (pud_none(*pudp)) { 127 if (pud_none(*pudp)) {
128 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); 128 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
129 BUG_ON(pmdp == NULL); 129 BUG_ON(pmdp == NULL);
130 pud_populate(&init_mm, pudp, pmdp); 130 pud_populate(&init_mm, pudp, pmdp);
131 } 131 }
132 pmdp = pmd_offset(pudp, ea); 132 pmdp = pmd_offset(pudp, ea);
133 if (!pmd_present(*pmdp)) { 133 if (!pmd_present(*pmdp)) {
134 ptep = early_alloc_pgtable(PAGE_SIZE); 134 ptep = early_alloc_pgtable(PAGE_SIZE);
135 BUG_ON(ptep == NULL); 135 BUG_ON(ptep == NULL);
136 pmd_populate_kernel(&init_mm, pmdp, ptep); 136 pmd_populate_kernel(&init_mm, pmdp, ptep);
137 } 137 }
138 ptep = pte_offset_kernel(pmdp, ea); 138 ptep = pte_offset_kernel(pmdp, ea);
139 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, 139 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
140 __pgprot(flags))); 140 __pgprot(flags)));
141 #else /* CONFIG_PPC_MMU_NOHASH */ 141 #else /* CONFIG_PPC_MMU_NOHASH */
142 /* 142 /*
143 * If the mm subsystem is not fully up, we cannot create a 143 * If the mm subsystem is not fully up, we cannot create a
144 * linux page table entry for this mapping. Simply bolt an 144 * linux page table entry for this mapping. Simply bolt an
145 * entry in the hardware page table. 145 * entry in the hardware page table.
146 * 146 *
147 */ 147 */
148 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, 148 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
149 mmu_io_psize, mmu_kernel_ssize)) { 149 mmu_io_psize, mmu_kernel_ssize)) {
150 printk(KERN_ERR "Failed to do bolted mapping IO " 150 printk(KERN_ERR "Failed to do bolted mapping IO "
151 "memory at %016lx !\n", pa); 151 "memory at %016lx !\n", pa);
152 return -ENOMEM; 152 return -ENOMEM;
153 } 153 }
154 #endif /* !CONFIG_PPC_MMU_NOHASH */ 154 #endif /* !CONFIG_PPC_MMU_NOHASH */
155 } 155 }
156 return 0; 156 return 0;
157 } 157 }
158 158
159 159
160 /** 160 /**
161 * __ioremap_at - Low level function to establish the page tables 161 * __ioremap_at - Low level function to establish the page tables
162 * for an IO mapping 162 * for an IO mapping
163 */ 163 */
164 void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, 164 void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
165 unsigned long flags) 165 unsigned long flags)
166 { 166 {
167 unsigned long i; 167 unsigned long i;
168 168
169 /* Make sure we have the base flags */ 169 /* Make sure we have the base flags */
170 if ((flags & _PAGE_PRESENT) == 0) 170 if ((flags & _PAGE_PRESENT) == 0)
171 flags |= pgprot_val(PAGE_KERNEL); 171 flags |= pgprot_val(PAGE_KERNEL);
172 172
173 /* Non-cacheable page cannot be coherent */ 173 /* Non-cacheable page cannot be coherent */
174 if (flags & _PAGE_NO_CACHE) 174 if (flags & _PAGE_NO_CACHE)
175 flags &= ~_PAGE_COHERENT; 175 flags &= ~_PAGE_COHERENT;
176 176
177 /* We don't support the 4K PFN hack with ioremap */ 177 /* We don't support the 4K PFN hack with ioremap */
178 if (flags & _PAGE_4K_PFN) 178 if (flags & _PAGE_4K_PFN)
179 return NULL; 179 return NULL;
180 180
181 WARN_ON(pa & ~PAGE_MASK); 181 WARN_ON(pa & ~PAGE_MASK);
182 WARN_ON(((unsigned long)ea) & ~PAGE_MASK); 182 WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
183 WARN_ON(size & ~PAGE_MASK); 183 WARN_ON(size & ~PAGE_MASK);
184 184
185 for (i = 0; i < size; i += PAGE_SIZE) 185 for (i = 0; i < size; i += PAGE_SIZE)
186 if (map_kernel_page((unsigned long)ea+i, pa+i, flags)) 186 if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
187 return NULL; 187 return NULL;
188 188
189 return (void __iomem *)ea; 189 return (void __iomem *)ea;
190 } 190 }
191 191
192 /** 192 /**
193 * __iounmap_from - Low level function to tear down the page tables 193 * __iounmap_from - Low level function to tear down the page tables
194 * for an IO mapping. This is used for mappings that 194 * for an IO mapping. This is used for mappings that
195 * are manipulated manually, like partial unmapping of 195 * are manipulated manually, like partial unmapping of
196 * PCI IOs or ISA space. 196 * PCI IOs or ISA space.
197 */ 197 */
198 void __iounmap_at(void *ea, unsigned long size) 198 void __iounmap_at(void *ea, unsigned long size)
199 { 199 {
200 WARN_ON(((unsigned long)ea) & ~PAGE_MASK); 200 WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
201 WARN_ON(size & ~PAGE_MASK); 201 WARN_ON(size & ~PAGE_MASK);
202 202
203 unmap_kernel_range((unsigned long)ea, size); 203 unmap_kernel_range((unsigned long)ea, size);
204 } 204 }
205 205
206 void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, 206 void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
207 unsigned long flags, void *caller) 207 unsigned long flags, void *caller)
208 { 208 {
209 phys_addr_t paligned; 209 phys_addr_t paligned;
210 void __iomem *ret; 210 void __iomem *ret;
211 211
212 /* 212 /*
213 * Choose an address to map it to. 213 * Choose an address to map it to.
214 * Once the imalloc system is running, we use it. 214 * Once the imalloc system is running, we use it.
215 * Before that, we map using addresses going 215 * Before that, we map using addresses going
216 * up from ioremap_bot. imalloc will use 216 * up from ioremap_bot. imalloc will use
217 * the addresses from ioremap_bot through 217 * the addresses from ioremap_bot through
218 * IMALLOC_END 218 * IMALLOC_END
219 * 219 *
220 */ 220 */
221 paligned = addr & PAGE_MASK; 221 paligned = addr & PAGE_MASK;
222 size = PAGE_ALIGN(addr + size) - paligned; 222 size = PAGE_ALIGN(addr + size) - paligned;
223 223
224 if ((size == 0) || (paligned == 0)) 224 if ((size == 0) || (paligned == 0))
225 return NULL; 225 return NULL;
226 226
227 if (mem_init_done) { 227 if (mem_init_done) {
228 struct vm_struct *area; 228 struct vm_struct *area;
229 229
230 area = __get_vm_area_caller(size, VM_IOREMAP, 230 area = __get_vm_area_caller(size, VM_IOREMAP,
231 ioremap_bot, IOREMAP_END, 231 ioremap_bot, IOREMAP_END,
232 caller); 232 caller);
233 if (area == NULL) 233 if (area == NULL)
234 return NULL; 234 return NULL;
235 235
236 area->phys_addr = paligned; 236 area->phys_addr = paligned;
237 ret = __ioremap_at(paligned, area->addr, size, flags); 237 ret = __ioremap_at(paligned, area->addr, size, flags);
238 if (!ret) 238 if (!ret)
239 vunmap(area->addr); 239 vunmap(area->addr);
240 } else { 240 } else {
241 ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags); 241 ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
242 if (ret) 242 if (ret)
243 ioremap_bot += size; 243 ioremap_bot += size;
244 } 244 }
245 245
246 if (ret) 246 if (ret)
247 ret += addr & ~PAGE_MASK; 247 ret += addr & ~PAGE_MASK;
248 return ret; 248 return ret;
249 } 249 }
250 250
251 void __iomem * __ioremap(phys_addr_t addr, unsigned long size, 251 void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
252 unsigned long flags) 252 unsigned long flags)
253 { 253 {
254 return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); 254 return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
255 } 255 }
256 256
257 void __iomem * ioremap(phys_addr_t addr, unsigned long size) 257 void __iomem * ioremap(phys_addr_t addr, unsigned long size)
258 { 258 {
259 unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED; 259 unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
260 void *caller = __builtin_return_address(0); 260 void *caller = __builtin_return_address(0);
261 261
262 if (ppc_md.ioremap) 262 if (ppc_md.ioremap)
263 return ppc_md.ioremap(addr, size, flags, caller); 263 return ppc_md.ioremap(addr, size, flags, caller);
264 return __ioremap_caller(addr, size, flags, caller); 264 return __ioremap_caller(addr, size, flags, caller);
265 } 265 }
266 266
267 void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) 267 void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
268 { 268 {
269 unsigned long flags = _PAGE_NO_CACHE; 269 unsigned long flags = _PAGE_NO_CACHE;
270 void *caller = __builtin_return_address(0); 270 void *caller = __builtin_return_address(0);
271 271
272 if (ppc_md.ioremap) 272 if (ppc_md.ioremap)
273 return ppc_md.ioremap(addr, size, flags, caller); 273 return ppc_md.ioremap(addr, size, flags, caller);
274 return __ioremap_caller(addr, size, flags, caller); 274 return __ioremap_caller(addr, size, flags, caller);
275 } 275 }
276 276
277 void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, 277 void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
278 unsigned long flags) 278 unsigned long flags)
279 { 279 {
280 void *caller = __builtin_return_address(0); 280 void *caller = __builtin_return_address(0);
281 281
282 /* writeable implies dirty for kernel addresses */ 282 /* writeable implies dirty for kernel addresses */
283 if (flags & _PAGE_RW) 283 if (flags & _PAGE_RW)
284 flags |= _PAGE_DIRTY; 284 flags |= _PAGE_DIRTY;
285 285
286 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ 286 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
287 flags &= ~(_PAGE_USER | _PAGE_EXEC); 287 flags &= ~(_PAGE_USER | _PAGE_EXEC);
288 288
289 #ifdef _PAGE_BAP_SR 289 #ifdef _PAGE_BAP_SR
290 /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format 290 /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
291 * which means that we just cleared supervisor access... oops ;-) This 291 * which means that we just cleared supervisor access... oops ;-) This
292 * restores it 292 * restores it
293 */ 293 */
294 flags |= _PAGE_BAP_SR; 294 flags |= _PAGE_BAP_SR;
295 #endif 295 #endif
296 296
297 if (ppc_md.ioremap) 297 if (ppc_md.ioremap)
298 return ppc_md.ioremap(addr, size, flags, caller); 298 return ppc_md.ioremap(addr, size, flags, caller);
299 return __ioremap_caller(addr, size, flags, caller); 299 return __ioremap_caller(addr, size, flags, caller);
300 } 300 }
301 301
302 302
303 /* 303 /*
304 * Unmap an IO region and remove it from imalloc'd list. 304 * Unmap an IO region and remove it from imalloc'd list.
305 * Access to IO memory should be serialized by driver. 305 * Access to IO memory should be serialized by driver.
306 */ 306 */
307 void __iounmap(volatile void __iomem *token) 307 void __iounmap(volatile void __iomem *token)
308 { 308 {
309 void *addr; 309 void *addr;
310 310
311 if (!mem_init_done) 311 if (!mem_init_done)
312 return; 312 return;
313 313
314 addr = (void *) ((unsigned long __force) 314 addr = (void *) ((unsigned long __force)
315 PCI_FIX_ADDR(token) & PAGE_MASK); 315 PCI_FIX_ADDR(token) & PAGE_MASK);
316 if ((unsigned long)addr < ioremap_bot) { 316 if ((unsigned long)addr < ioremap_bot) {
317 printk(KERN_WARNING "Attempt to iounmap early bolted mapping" 317 printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
318 " at 0x%p\n", addr); 318 " at 0x%p\n", addr);
319 return; 319 return;
320 } 320 }
321 vunmap(addr); 321 vunmap(addr);
322 } 322 }
323 323
324 void iounmap(volatile void __iomem *token) 324 void iounmap(volatile void __iomem *token)
325 { 325 {
326 if (ppc_md.iounmap) 326 if (ppc_md.iounmap)
327 ppc_md.iounmap(token); 327 ppc_md.iounmap(token);
328 else 328 else
329 __iounmap(token); 329 __iounmap(token);
330 } 330 }
331 331
332 EXPORT_SYMBOL(ioremap); 332 EXPORT_SYMBOL(ioremap);
333 EXPORT_SYMBOL(ioremap_wc); 333 EXPORT_SYMBOL(ioremap_wc);
334 EXPORT_SYMBOL(ioremap_prot); 334 EXPORT_SYMBOL(ioremap_prot);
335 EXPORT_SYMBOL(__ioremap); 335 EXPORT_SYMBOL(__ioremap);
336 EXPORT_SYMBOL(__ioremap_at); 336 EXPORT_SYMBOL(__ioremap_at);
337 EXPORT_SYMBOL(iounmap); 337 EXPORT_SYMBOL(iounmap);
338 EXPORT_SYMBOL(__iounmap); 338 EXPORT_SYMBOL(__iounmap);
339 EXPORT_SYMBOL(__iounmap_at); 339 EXPORT_SYMBOL(__iounmap_at);
340
341 #ifdef CONFIG_PPC_64K_PAGES
342 static pte_t *get_from_cache(struct mm_struct *mm)
343 {
344 void *pte_frag, *ret;
345
346 spin_lock(&mm->page_table_lock);
347 ret = mm->context.pte_frag;
348 if (ret) {
349 pte_frag = ret + PTE_FRAG_SIZE;
350 /*
351 * If we have taken up all the fragments mark PTE page NULL
352 */
353 if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
354 pte_frag = NULL;
355 mm->context.pte_frag = pte_frag;
356 }
357 spin_unlock(&mm->page_table_lock);
358 return (pte_t *)ret;
359 }
360
361 static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
362 {
363 void *ret = NULL;
364 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
365 __GFP_REPEAT | __GFP_ZERO);
366 if (!page)
367 return NULL;
368
369 ret = page_address(page);
370 spin_lock(&mm->page_table_lock);
371 /*
372 * If we find pgtable_page set, we return
373 * the allocated page with single fragement
374 * count.
375 */
376 if (likely(!mm->context.pte_frag)) {
377 atomic_set(&page->_count, PTE_FRAG_NR);
378 mm->context.pte_frag = ret + PTE_FRAG_SIZE;
379 }
380 spin_unlock(&mm->page_table_lock);
381
382 if (!kernel)
383 pgtable_page_ctor(page);
384
385 return (pte_t *)ret;
386 }
387
388 pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
389 {
390 pte_t *pte;
391
392 pte = get_from_cache(mm);
393 if (pte)
394 return pte;
395
396 return __alloc_for_cache(mm, kernel);
397 }
398
399 void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
400 {
401 struct page *page = virt_to_page(table);
402 if (put_page_testzero(page)) {
403 if (!kernel)
404 pgtable_page_dtor(page);
405 free_hot_cold_page(page, 0);
406 }
407 }
408
409 #ifdef CONFIG_SMP
410 static void page_table_free_rcu(void *table)
411 {
412 struct page *page = virt_to_page(table);
413 if (put_page_testzero(page)) {
414 pgtable_page_dtor(page);
415 free_hot_cold_page(page, 0);
416 }
417 }
418
419 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
420 {
421 unsigned long pgf = (unsigned long)table;
422
423 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
424 pgf |= shift;
425 tlb_remove_table(tlb, (void *)pgf);
426 }
427
428 void __tlb_remove_table(void *_table)
429 {
430 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
431 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
432
433 if (!shift)
434 /* PTE page needs special handling */
435 page_table_free_rcu(table);
436 else {
437 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
438 kmem_cache_free(PGT_CACHE(shift), table);
439 }
440 }
441 #else
442 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
443 {
444 if (!shift) {
445 /* PTE page needs special handling */
446 struct page *page = virt_to_page(table);
447 if (put_page_testzero(page)) {
448 pgtable_page_dtor(page);
449 free_hot_cold_page(page, 0);
450 }
451 } else {
452 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
453 kmem_cache_free(PGT_CACHE(shift), table);
454 }
455 }
456 #endif
457 #endif /* CONFIG_PPC_64K_PAGES */
340 458