Commit cbc578cfb6aa459b4d880f4a058eff373b32a5fb

Authored by bob picco
Committed by Greg Kroah-Hartman
1 parent a5fb600211

sparc64: find_node adjustment

[ Upstream commit 3dee9df54836d5f844f3d58281d3f3e6331b467f ]

We have seen an issue with guest boot into LDOM that causes early boot failures
because of no matching rules for node identitity of the memory. I analyzed this
on my T4 and concluded there might not be a solution. I saw the issue in
mainline too when booting into the control/primary domain - with guests
configured.  Note, this could be a firmware bug on some older machines.

I'll provide a full explanation of the issues below. Should we not find a
matching BEST latency group for a real address (RA) then we will assume node 0.
On the T4-2 here with the information provided I can't see an alternative.

Technically the LDOM shown below should match the MBLOCK to the
favorable latency group. However other factors must be considered too. Were
the memory controllers configured "fine" grained interleave or "coarse"
grain interleaved -  T4. Also should a "group" MD node be considered a NUMA
node?

There has to be at least one Machine Description (MD) "group" and hence one
NUMA node. The group can have one or more latency groups (lg) - more than one
memory controller. The current code chooses the smallest latency as the most
favorable per group. The latency and lg information is in MLGROUP below.
MBLOCK is the base and size of the RAs for the machine as fetched from OBP
/memory "available" property. My machine has one MBLOCK but more would be
possible - with holes?

For a T4-2 the following information has been gathered:
with LDOM guest
MEMBLOCK configuration:
 memory size = 0x27f870000
 memory.cnt  = 0x3
 memory[0x0]    [0x00000020400000-0x0000029fc67fff], 0x27f868000 bytes
 memory[0x1]    [0x0000029fd8a000-0x0000029fd8bfff], 0x2000 bytes
 memory[0x2]    [0x0000029fd92000-0x0000029fd97fff], 0x6000 bytes
 reserved.cnt  = 0x2
 reserved[0x0]  [0x00000020800000-0x000000216c15c0], 0xec15c1 bytes
 reserved[0x1]  [0x00000024800000-0x0000002c180c1e], 0x7980c1f bytes
MBLOCK[0]: base[20000000] size[280000000] offset[0]
(note: "base" and "size" reported in "MBLOCK" encompass the "memory[X]" values)
(note: (RA + offset) & mask = val is the formula to detect a match for the
memory controller. should there be no match for find_node node, a return
value of -1 resulted for the node - BAD)

There is one group. It has these forward links
MLGROUP[1]: node[545] latency[1f7e8] match[200000000] mask[200000000]
MLGROUP[2]: node[54d] latency[2de60] match[0] mask[200000000]
NUMA NODE[0]: node[545] mask[200000000] val[200000000] (latency[1f7e8])
(note: "val" is the best lg's (smallest latency) "match")

no LDOM guest - bare metal
MEMBLOCK configuration:
 memory size = 0xfdf2d0000
 memory.cnt  = 0x3
 memory[0x0]    [0x00000020400000-0x00000fff6adfff], 0xfdf2ae000 bytes
 memory[0x1]    [0x00000fff6d2000-0x00000fff6e7fff], 0x16000 bytes
 memory[0x2]    [0x00000fff766000-0x00000fff771fff], 0xc000 bytes
 reserved.cnt  = 0x2
 reserved[0x0]  [0x00000020800000-0x00000021a04580], 0x1204581 bytes
 reserved[0x1]  [0x00000024800000-0x0000002c7d29fc], 0x7fd29fd bytes
MBLOCK[0]: base[20000000] size[fe0000000] offset[0]

there are two groups
group node[16d5]
MLGROUP[0]: node[1765] latency[1f7e8] match[0] mask[200000000]
MLGROUP[3]: node[177d] latency[2de60] match[200000000] mask[200000000]
NUMA NODE[0]: node[1765] mask[200000000] val[0] (latency[1f7e8])
group node[171d]
MLGROUP[2]: node[1775] latency[2de60] match[0] mask[200000000]
MLGROUP[1]: node[176d] latency[1f7e8] match[200000000] mask[200000000]
NUMA NODE[1]: node[176d] mask[200000000] val[200000000] (latency[1f7e8])
(note: for this two "group" bare metal machine, 1/2 memory is in group one's
lg and 1/2 memory is in group two's lg).

Cc: sparclinux@vger.kernel.org
Signed-off-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Showing 1 changed file with 4 additions and 1 deletions Inline Diff

arch/sparc/mm/init_64.c
1 /* 1 /*
2 * arch/sparc64/mm/init.c 2 * arch/sparc64/mm/init.c
3 * 3 *
4 * Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu) 4 * Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
5 * Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 5 * Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
6 */ 6 */
7 7
8 #include <linux/module.h> 8 #include <linux/module.h>
9 #include <linux/kernel.h> 9 #include <linux/kernel.h>
10 #include <linux/sched.h> 10 #include <linux/sched.h>
11 #include <linux/string.h> 11 #include <linux/string.h>
12 #include <linux/init.h> 12 #include <linux/init.h>
13 #include <linux/bootmem.h> 13 #include <linux/bootmem.h>
14 #include <linux/mm.h> 14 #include <linux/mm.h>
15 #include <linux/hugetlb.h> 15 #include <linux/hugetlb.h>
16 #include <linux/initrd.h> 16 #include <linux/initrd.h>
17 #include <linux/swap.h> 17 #include <linux/swap.h>
18 #include <linux/pagemap.h> 18 #include <linux/pagemap.h>
19 #include <linux/poison.h> 19 #include <linux/poison.h>
20 #include <linux/fs.h> 20 #include <linux/fs.h>
21 #include <linux/seq_file.h> 21 #include <linux/seq_file.h>
22 #include <linux/kprobes.h> 22 #include <linux/kprobes.h>
23 #include <linux/cache.h> 23 #include <linux/cache.h>
24 #include <linux/sort.h> 24 #include <linux/sort.h>
25 #include <linux/percpu.h> 25 #include <linux/percpu.h>
26 #include <linux/memblock.h> 26 #include <linux/memblock.h>
27 #include <linux/mmzone.h> 27 #include <linux/mmzone.h>
28 #include <linux/gfp.h> 28 #include <linux/gfp.h>
29 29
30 #include <asm/head.h> 30 #include <asm/head.h>
31 #include <asm/page.h> 31 #include <asm/page.h>
32 #include <asm/pgalloc.h> 32 #include <asm/pgalloc.h>
33 #include <asm/pgtable.h> 33 #include <asm/pgtable.h>
34 #include <asm/oplib.h> 34 #include <asm/oplib.h>
35 #include <asm/iommu.h> 35 #include <asm/iommu.h>
36 #include <asm/io.h> 36 #include <asm/io.h>
37 #include <asm/uaccess.h> 37 #include <asm/uaccess.h>
38 #include <asm/mmu_context.h> 38 #include <asm/mmu_context.h>
39 #include <asm/tlbflush.h> 39 #include <asm/tlbflush.h>
40 #include <asm/dma.h> 40 #include <asm/dma.h>
41 #include <asm/starfire.h> 41 #include <asm/starfire.h>
42 #include <asm/tlb.h> 42 #include <asm/tlb.h>
43 #include <asm/spitfire.h> 43 #include <asm/spitfire.h>
44 #include <asm/sections.h> 44 #include <asm/sections.h>
45 #include <asm/tsb.h> 45 #include <asm/tsb.h>
46 #include <asm/hypervisor.h> 46 #include <asm/hypervisor.h>
47 #include <asm/prom.h> 47 #include <asm/prom.h>
48 #include <asm/mdesc.h> 48 #include <asm/mdesc.h>
49 #include <asm/cpudata.h> 49 #include <asm/cpudata.h>
50 #include <asm/setup.h> 50 #include <asm/setup.h>
51 #include <asm/irq.h> 51 #include <asm/irq.h>
52 52
53 #include "init_64.h" 53 #include "init_64.h"
54 54
55 unsigned long kern_linear_pte_xor[4] __read_mostly; 55 unsigned long kern_linear_pte_xor[4] __read_mostly;
56 56
57 /* A bitmap, two bits for every 256MB of physical memory. These two 57 /* A bitmap, two bits for every 256MB of physical memory. These two
58 * bits determine what page size we use for kernel linear 58 * bits determine what page size we use for kernel linear
59 * translations. They form an index into kern_linear_pte_xor[]. The 59 * translations. They form an index into kern_linear_pte_xor[]. The
60 * value in the indexed slot is XOR'd with the TLB miss virtual 60 * value in the indexed slot is XOR'd with the TLB miss virtual
61 * address to form the resulting TTE. The mapping is: 61 * address to form the resulting TTE. The mapping is:
62 * 62 *
63 * 0 ==> 4MB 63 * 0 ==> 4MB
64 * 1 ==> 256MB 64 * 1 ==> 256MB
65 * 2 ==> 2GB 65 * 2 ==> 2GB
66 * 3 ==> 16GB 66 * 3 ==> 16GB
67 * 67 *
68 * All sun4v chips support 256MB pages. Only SPARC-T4 and later 68 * All sun4v chips support 256MB pages. Only SPARC-T4 and later
69 * support 2GB pages, and hopefully future cpus will support the 16GB 69 * support 2GB pages, and hopefully future cpus will support the 16GB
70 * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there 70 * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there
71 * if these larger page sizes are not supported by the cpu. 71 * if these larger page sizes are not supported by the cpu.
72 * 72 *
73 * It would be nice to determine this from the machine description 73 * It would be nice to determine this from the machine description
74 * 'cpu' properties, but we need to have this table setup before the 74 * 'cpu' properties, but we need to have this table setup before the
75 * MDESC is initialized. 75 * MDESC is initialized.
76 */ 76 */
77 unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; 77 unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
78 78
79 #ifndef CONFIG_DEBUG_PAGEALLOC 79 #ifndef CONFIG_DEBUG_PAGEALLOC
80 /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. 80 /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
81 * Space is allocated for this right after the trap table in 81 * Space is allocated for this right after the trap table in
82 * arch/sparc64/kernel/head.S 82 * arch/sparc64/kernel/head.S
83 */ 83 */
84 extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; 84 extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
85 #endif 85 #endif
86 86
87 static unsigned long cpu_pgsz_mask; 87 static unsigned long cpu_pgsz_mask;
88 88
89 #define MAX_BANKS 32 89 #define MAX_BANKS 32
90 90
91 static struct linux_prom64_registers pavail[MAX_BANKS]; 91 static struct linux_prom64_registers pavail[MAX_BANKS];
92 static int pavail_ents; 92 static int pavail_ents;
93 93
94 static int cmp_p64(const void *a, const void *b) 94 static int cmp_p64(const void *a, const void *b)
95 { 95 {
96 const struct linux_prom64_registers *x = a, *y = b; 96 const struct linux_prom64_registers *x = a, *y = b;
97 97
98 if (x->phys_addr > y->phys_addr) 98 if (x->phys_addr > y->phys_addr)
99 return 1; 99 return 1;
100 if (x->phys_addr < y->phys_addr) 100 if (x->phys_addr < y->phys_addr)
101 return -1; 101 return -1;
102 return 0; 102 return 0;
103 } 103 }
104 104
105 static void __init read_obp_memory(const char *property, 105 static void __init read_obp_memory(const char *property,
106 struct linux_prom64_registers *regs, 106 struct linux_prom64_registers *regs,
107 int *num_ents) 107 int *num_ents)
108 { 108 {
109 phandle node = prom_finddevice("/memory"); 109 phandle node = prom_finddevice("/memory");
110 int prop_size = prom_getproplen(node, property); 110 int prop_size = prom_getproplen(node, property);
111 int ents, ret, i; 111 int ents, ret, i;
112 112
113 ents = prop_size / sizeof(struct linux_prom64_registers); 113 ents = prop_size / sizeof(struct linux_prom64_registers);
114 if (ents > MAX_BANKS) { 114 if (ents > MAX_BANKS) {
115 prom_printf("The machine has more %s property entries than " 115 prom_printf("The machine has more %s property entries than "
116 "this kernel can support (%d).\n", 116 "this kernel can support (%d).\n",
117 property, MAX_BANKS); 117 property, MAX_BANKS);
118 prom_halt(); 118 prom_halt();
119 } 119 }
120 120
121 ret = prom_getproperty(node, property, (char *) regs, prop_size); 121 ret = prom_getproperty(node, property, (char *) regs, prop_size);
122 if (ret == -1) { 122 if (ret == -1) {
123 prom_printf("Couldn't get %s property from /memory.\n", 123 prom_printf("Couldn't get %s property from /memory.\n",
124 property); 124 property);
125 prom_halt(); 125 prom_halt();
126 } 126 }
127 127
128 /* Sanitize what we got from the firmware, by page aligning 128 /* Sanitize what we got from the firmware, by page aligning
129 * everything. 129 * everything.
130 */ 130 */
131 for (i = 0; i < ents; i++) { 131 for (i = 0; i < ents; i++) {
132 unsigned long base, size; 132 unsigned long base, size;
133 133
134 base = regs[i].phys_addr; 134 base = regs[i].phys_addr;
135 size = regs[i].reg_size; 135 size = regs[i].reg_size;
136 136
137 size &= PAGE_MASK; 137 size &= PAGE_MASK;
138 if (base & ~PAGE_MASK) { 138 if (base & ~PAGE_MASK) {
139 unsigned long new_base = PAGE_ALIGN(base); 139 unsigned long new_base = PAGE_ALIGN(base);
140 140
141 size -= new_base - base; 141 size -= new_base - base;
142 if ((long) size < 0L) 142 if ((long) size < 0L)
143 size = 0UL; 143 size = 0UL;
144 base = new_base; 144 base = new_base;
145 } 145 }
146 if (size == 0UL) { 146 if (size == 0UL) {
147 /* If it is empty, simply get rid of it. 147 /* If it is empty, simply get rid of it.
148 * This simplifies the logic of the other 148 * This simplifies the logic of the other
149 * functions that process these arrays. 149 * functions that process these arrays.
150 */ 150 */
151 memmove(&regs[i], &regs[i + 1], 151 memmove(&regs[i], &regs[i + 1],
152 (ents - i - 1) * sizeof(regs[0])); 152 (ents - i - 1) * sizeof(regs[0]));
153 i--; 153 i--;
154 ents--; 154 ents--;
155 continue; 155 continue;
156 } 156 }
157 regs[i].phys_addr = base; 157 regs[i].phys_addr = base;
158 regs[i].reg_size = size; 158 regs[i].reg_size = size;
159 } 159 }
160 160
161 *num_ents = ents; 161 *num_ents = ents;
162 162
163 sort(regs, ents, sizeof(struct linux_prom64_registers), 163 sort(regs, ents, sizeof(struct linux_prom64_registers),
164 cmp_p64, NULL); 164 cmp_p64, NULL);
165 } 165 }
166 166
167 unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES / 167 unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
168 sizeof(unsigned long)]; 168 sizeof(unsigned long)];
169 EXPORT_SYMBOL(sparc64_valid_addr_bitmap); 169 EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
170 170
171 /* Kernel physical address base and size in bytes. */ 171 /* Kernel physical address base and size in bytes. */
172 unsigned long kern_base __read_mostly; 172 unsigned long kern_base __read_mostly;
173 unsigned long kern_size __read_mostly; 173 unsigned long kern_size __read_mostly;
174 174
175 /* Initial ramdisk setup */ 175 /* Initial ramdisk setup */
176 extern unsigned long sparc_ramdisk_image64; 176 extern unsigned long sparc_ramdisk_image64;
177 extern unsigned int sparc_ramdisk_image; 177 extern unsigned int sparc_ramdisk_image;
178 extern unsigned int sparc_ramdisk_size; 178 extern unsigned int sparc_ramdisk_size;
179 179
180 struct page *mem_map_zero __read_mostly; 180 struct page *mem_map_zero __read_mostly;
181 EXPORT_SYMBOL(mem_map_zero); 181 EXPORT_SYMBOL(mem_map_zero);
182 182
183 unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly; 183 unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
184 184
185 unsigned long sparc64_kern_pri_context __read_mostly; 185 unsigned long sparc64_kern_pri_context __read_mostly;
186 unsigned long sparc64_kern_pri_nuc_bits __read_mostly; 186 unsigned long sparc64_kern_pri_nuc_bits __read_mostly;
187 unsigned long sparc64_kern_sec_context __read_mostly; 187 unsigned long sparc64_kern_sec_context __read_mostly;
188 188
189 int num_kernel_image_mappings; 189 int num_kernel_image_mappings;
190 190
191 #ifdef CONFIG_DEBUG_DCFLUSH 191 #ifdef CONFIG_DEBUG_DCFLUSH
192 atomic_t dcpage_flushes = ATOMIC_INIT(0); 192 atomic_t dcpage_flushes = ATOMIC_INIT(0);
193 #ifdef CONFIG_SMP 193 #ifdef CONFIG_SMP
194 atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0); 194 atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
195 #endif 195 #endif
196 #endif 196 #endif
197 197
198 inline void flush_dcache_page_impl(struct page *page) 198 inline void flush_dcache_page_impl(struct page *page)
199 { 199 {
200 BUG_ON(tlb_type == hypervisor); 200 BUG_ON(tlb_type == hypervisor);
201 #ifdef CONFIG_DEBUG_DCFLUSH 201 #ifdef CONFIG_DEBUG_DCFLUSH
202 atomic_inc(&dcpage_flushes); 202 atomic_inc(&dcpage_flushes);
203 #endif 203 #endif
204 204
205 #ifdef DCACHE_ALIASING_POSSIBLE 205 #ifdef DCACHE_ALIASING_POSSIBLE
206 __flush_dcache_page(page_address(page), 206 __flush_dcache_page(page_address(page),
207 ((tlb_type == spitfire) && 207 ((tlb_type == spitfire) &&
208 page_mapping(page) != NULL)); 208 page_mapping(page) != NULL));
209 #else 209 #else
210 if (page_mapping(page) != NULL && 210 if (page_mapping(page) != NULL &&
211 tlb_type == spitfire) 211 tlb_type == spitfire)
212 __flush_icache_page(__pa(page_address(page))); 212 __flush_icache_page(__pa(page_address(page)));
213 #endif 213 #endif
214 } 214 }
215 215
216 #define PG_dcache_dirty PG_arch_1 216 #define PG_dcache_dirty PG_arch_1
217 #define PG_dcache_cpu_shift 32UL 217 #define PG_dcache_cpu_shift 32UL
218 #define PG_dcache_cpu_mask \ 218 #define PG_dcache_cpu_mask \
219 ((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL) 219 ((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
220 220
221 #define dcache_dirty_cpu(page) \ 221 #define dcache_dirty_cpu(page) \
222 (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) 222 (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
223 223
224 static inline void set_dcache_dirty(struct page *page, int this_cpu) 224 static inline void set_dcache_dirty(struct page *page, int this_cpu)
225 { 225 {
226 unsigned long mask = this_cpu; 226 unsigned long mask = this_cpu;
227 unsigned long non_cpu_bits; 227 unsigned long non_cpu_bits;
228 228
229 non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift); 229 non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift);
230 mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty); 230 mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty);
231 231
232 __asm__ __volatile__("1:\n\t" 232 __asm__ __volatile__("1:\n\t"
233 "ldx [%2], %%g7\n\t" 233 "ldx [%2], %%g7\n\t"
234 "and %%g7, %1, %%g1\n\t" 234 "and %%g7, %1, %%g1\n\t"
235 "or %%g1, %0, %%g1\n\t" 235 "or %%g1, %0, %%g1\n\t"
236 "casx [%2], %%g7, %%g1\n\t" 236 "casx [%2], %%g7, %%g1\n\t"
237 "cmp %%g7, %%g1\n\t" 237 "cmp %%g7, %%g1\n\t"
238 "bne,pn %%xcc, 1b\n\t" 238 "bne,pn %%xcc, 1b\n\t"
239 " nop" 239 " nop"
240 : /* no outputs */ 240 : /* no outputs */
241 : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) 241 : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
242 : "g1", "g7"); 242 : "g1", "g7");
243 } 243 }
244 244
245 static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu) 245 static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
246 { 246 {
247 unsigned long mask = (1UL << PG_dcache_dirty); 247 unsigned long mask = (1UL << PG_dcache_dirty);
248 248
249 __asm__ __volatile__("! test_and_clear_dcache_dirty\n" 249 __asm__ __volatile__("! test_and_clear_dcache_dirty\n"
250 "1:\n\t" 250 "1:\n\t"
251 "ldx [%2], %%g7\n\t" 251 "ldx [%2], %%g7\n\t"
252 "srlx %%g7, %4, %%g1\n\t" 252 "srlx %%g7, %4, %%g1\n\t"
253 "and %%g1, %3, %%g1\n\t" 253 "and %%g1, %3, %%g1\n\t"
254 "cmp %%g1, %0\n\t" 254 "cmp %%g1, %0\n\t"
255 "bne,pn %%icc, 2f\n\t" 255 "bne,pn %%icc, 2f\n\t"
256 " andn %%g7, %1, %%g1\n\t" 256 " andn %%g7, %1, %%g1\n\t"
257 "casx [%2], %%g7, %%g1\n\t" 257 "casx [%2], %%g7, %%g1\n\t"
258 "cmp %%g7, %%g1\n\t" 258 "cmp %%g7, %%g1\n\t"
259 "bne,pn %%xcc, 1b\n\t" 259 "bne,pn %%xcc, 1b\n\t"
260 " nop\n" 260 " nop\n"
261 "2:" 261 "2:"
262 : /* no outputs */ 262 : /* no outputs */
263 : "r" (cpu), "r" (mask), "r" (&page->flags), 263 : "r" (cpu), "r" (mask), "r" (&page->flags),
264 "i" (PG_dcache_cpu_mask), 264 "i" (PG_dcache_cpu_mask),
265 "i" (PG_dcache_cpu_shift) 265 "i" (PG_dcache_cpu_shift)
266 : "g1", "g7"); 266 : "g1", "g7");
267 } 267 }
268 268
269 static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte) 269 static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
270 { 270 {
271 unsigned long tsb_addr = (unsigned long) ent; 271 unsigned long tsb_addr = (unsigned long) ent;
272 272
273 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 273 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
274 tsb_addr = __pa(tsb_addr); 274 tsb_addr = __pa(tsb_addr);
275 275
276 __tsb_insert(tsb_addr, tag, pte); 276 __tsb_insert(tsb_addr, tag, pte);
277 } 277 }
278 278
279 unsigned long _PAGE_ALL_SZ_BITS __read_mostly; 279 unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
280 280
281 static void flush_dcache(unsigned long pfn) 281 static void flush_dcache(unsigned long pfn)
282 { 282 {
283 struct page *page; 283 struct page *page;
284 284
285 page = pfn_to_page(pfn); 285 page = pfn_to_page(pfn);
286 if (page) { 286 if (page) {
287 unsigned long pg_flags; 287 unsigned long pg_flags;
288 288
289 pg_flags = page->flags; 289 pg_flags = page->flags;
290 if (pg_flags & (1UL << PG_dcache_dirty)) { 290 if (pg_flags & (1UL << PG_dcache_dirty)) {
291 int cpu = ((pg_flags >> PG_dcache_cpu_shift) & 291 int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
292 PG_dcache_cpu_mask); 292 PG_dcache_cpu_mask);
293 int this_cpu = get_cpu(); 293 int this_cpu = get_cpu();
294 294
295 /* This is just to optimize away some function calls 295 /* This is just to optimize away some function calls
296 * in the SMP case. 296 * in the SMP case.
297 */ 297 */
298 if (cpu == this_cpu) 298 if (cpu == this_cpu)
299 flush_dcache_page_impl(page); 299 flush_dcache_page_impl(page);
300 else 300 else
301 smp_flush_dcache_page_impl(page, cpu); 301 smp_flush_dcache_page_impl(page, cpu);
302 302
303 clear_dcache_dirty_cpu(page, cpu); 303 clear_dcache_dirty_cpu(page, cpu);
304 304
305 put_cpu(); 305 put_cpu();
306 } 306 }
307 } 307 }
308 } 308 }
309 309
310 /* mm->context.lock must be held */ 310 /* mm->context.lock must be held */
311 static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index, 311 static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
312 unsigned long tsb_hash_shift, unsigned long address, 312 unsigned long tsb_hash_shift, unsigned long address,
313 unsigned long tte) 313 unsigned long tte)
314 { 314 {
315 struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb; 315 struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
316 unsigned long tag; 316 unsigned long tag;
317 317
318 if (unlikely(!tsb)) 318 if (unlikely(!tsb))
319 return; 319 return;
320 320
321 tsb += ((address >> tsb_hash_shift) & 321 tsb += ((address >> tsb_hash_shift) &
322 (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL)); 322 (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
323 tag = (address >> 22UL); 323 tag = (address >> 22UL);
324 tsb_insert(tsb, tag, tte); 324 tsb_insert(tsb, tag, tte);
325 } 325 }
326 326
327 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 327 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
328 static inline bool is_hugetlb_pte(pte_t pte) 328 static inline bool is_hugetlb_pte(pte_t pte)
329 { 329 {
330 if ((tlb_type == hypervisor && 330 if ((tlb_type == hypervisor &&
331 (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || 331 (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
332 (tlb_type != hypervisor && 332 (tlb_type != hypervisor &&
333 (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) 333 (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U))
334 return true; 334 return true;
335 return false; 335 return false;
336 } 336 }
337 #endif 337 #endif
338 338
339 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) 339 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
340 { 340 {
341 struct mm_struct *mm; 341 struct mm_struct *mm;
342 unsigned long flags; 342 unsigned long flags;
343 pte_t pte = *ptep; 343 pte_t pte = *ptep;
344 344
345 if (tlb_type != hypervisor) { 345 if (tlb_type != hypervisor) {
346 unsigned long pfn = pte_pfn(pte); 346 unsigned long pfn = pte_pfn(pte);
347 347
348 if (pfn_valid(pfn)) 348 if (pfn_valid(pfn))
349 flush_dcache(pfn); 349 flush_dcache(pfn);
350 } 350 }
351 351
352 mm = vma->vm_mm; 352 mm = vma->vm_mm;
353 353
354 /* Don't insert a non-valid PTE into the TSB, we'll deadlock. */ 354 /* Don't insert a non-valid PTE into the TSB, we'll deadlock. */
355 if (!pte_accessible(mm, pte)) 355 if (!pte_accessible(mm, pte))
356 return; 356 return;
357 357
358 spin_lock_irqsave(&mm->context.lock, flags); 358 spin_lock_irqsave(&mm->context.lock, flags);
359 359
360 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 360 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
361 if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) 361 if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
362 __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, 362 __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
363 address, pte_val(pte)); 363 address, pte_val(pte));
364 else 364 else
365 #endif 365 #endif
366 __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, 366 __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
367 address, pte_val(pte)); 367 address, pte_val(pte));
368 368
369 spin_unlock_irqrestore(&mm->context.lock, flags); 369 spin_unlock_irqrestore(&mm->context.lock, flags);
370 } 370 }
371 371
372 void flush_dcache_page(struct page *page) 372 void flush_dcache_page(struct page *page)
373 { 373 {
374 struct address_space *mapping; 374 struct address_space *mapping;
375 int this_cpu; 375 int this_cpu;
376 376
377 if (tlb_type == hypervisor) 377 if (tlb_type == hypervisor)
378 return; 378 return;
379 379
380 /* Do not bother with the expensive D-cache flush if it 380 /* Do not bother with the expensive D-cache flush if it
381 * is merely the zero page. The 'bigcore' testcase in GDB 381 * is merely the zero page. The 'bigcore' testcase in GDB
382 * causes this case to run millions of times. 382 * causes this case to run millions of times.
383 */ 383 */
384 if (page == ZERO_PAGE(0)) 384 if (page == ZERO_PAGE(0))
385 return; 385 return;
386 386
387 this_cpu = get_cpu(); 387 this_cpu = get_cpu();
388 388
389 mapping = page_mapping(page); 389 mapping = page_mapping(page);
390 if (mapping && !mapping_mapped(mapping)) { 390 if (mapping && !mapping_mapped(mapping)) {
391 int dirty = test_bit(PG_dcache_dirty, &page->flags); 391 int dirty = test_bit(PG_dcache_dirty, &page->flags);
392 if (dirty) { 392 if (dirty) {
393 int dirty_cpu = dcache_dirty_cpu(page); 393 int dirty_cpu = dcache_dirty_cpu(page);
394 394
395 if (dirty_cpu == this_cpu) 395 if (dirty_cpu == this_cpu)
396 goto out; 396 goto out;
397 smp_flush_dcache_page_impl(page, dirty_cpu); 397 smp_flush_dcache_page_impl(page, dirty_cpu);
398 } 398 }
399 set_dcache_dirty(page, this_cpu); 399 set_dcache_dirty(page, this_cpu);
400 } else { 400 } else {
401 /* We could delay the flush for the !page_mapping 401 /* We could delay the flush for the !page_mapping
402 * case too. But that case is for exec env/arg 402 * case too. But that case is for exec env/arg
403 * pages and those are %99 certainly going to get 403 * pages and those are %99 certainly going to get
404 * faulted into the tlb (and thus flushed) anyways. 404 * faulted into the tlb (and thus flushed) anyways.
405 */ 405 */
406 flush_dcache_page_impl(page); 406 flush_dcache_page_impl(page);
407 } 407 }
408 408
409 out: 409 out:
410 put_cpu(); 410 put_cpu();
411 } 411 }
412 EXPORT_SYMBOL(flush_dcache_page); 412 EXPORT_SYMBOL(flush_dcache_page);
413 413
414 void __kprobes flush_icache_range(unsigned long start, unsigned long end) 414 void __kprobes flush_icache_range(unsigned long start, unsigned long end)
415 { 415 {
416 /* Cheetah and Hypervisor platform cpus have coherent I-cache. */ 416 /* Cheetah and Hypervisor platform cpus have coherent I-cache. */
417 if (tlb_type == spitfire) { 417 if (tlb_type == spitfire) {
418 unsigned long kaddr; 418 unsigned long kaddr;
419 419
420 /* This code only runs on Spitfire cpus so this is 420 /* This code only runs on Spitfire cpus so this is
421 * why we can assume _PAGE_PADDR_4U. 421 * why we can assume _PAGE_PADDR_4U.
422 */ 422 */
423 for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) { 423 for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) {
424 unsigned long paddr, mask = _PAGE_PADDR_4U; 424 unsigned long paddr, mask = _PAGE_PADDR_4U;
425 425
426 if (kaddr >= PAGE_OFFSET) 426 if (kaddr >= PAGE_OFFSET)
427 paddr = kaddr & mask; 427 paddr = kaddr & mask;
428 else { 428 else {
429 pgd_t *pgdp = pgd_offset_k(kaddr); 429 pgd_t *pgdp = pgd_offset_k(kaddr);
430 pud_t *pudp = pud_offset(pgdp, kaddr); 430 pud_t *pudp = pud_offset(pgdp, kaddr);
431 pmd_t *pmdp = pmd_offset(pudp, kaddr); 431 pmd_t *pmdp = pmd_offset(pudp, kaddr);
432 pte_t *ptep = pte_offset_kernel(pmdp, kaddr); 432 pte_t *ptep = pte_offset_kernel(pmdp, kaddr);
433 433
434 paddr = pte_val(*ptep) & mask; 434 paddr = pte_val(*ptep) & mask;
435 } 435 }
436 __flush_icache_page(paddr); 436 __flush_icache_page(paddr);
437 } 437 }
438 } 438 }
439 } 439 }
440 EXPORT_SYMBOL(flush_icache_range); 440 EXPORT_SYMBOL(flush_icache_range);
441 441
442 void mmu_info(struct seq_file *m) 442 void mmu_info(struct seq_file *m)
443 { 443 {
444 static const char *pgsz_strings[] = { 444 static const char *pgsz_strings[] = {
445 "8K", "64K", "512K", "4MB", "32MB", 445 "8K", "64K", "512K", "4MB", "32MB",
446 "256MB", "2GB", "16GB", 446 "256MB", "2GB", "16GB",
447 }; 447 };
448 int i, printed; 448 int i, printed;
449 449
450 if (tlb_type == cheetah) 450 if (tlb_type == cheetah)
451 seq_printf(m, "MMU Type\t: Cheetah\n"); 451 seq_printf(m, "MMU Type\t: Cheetah\n");
452 else if (tlb_type == cheetah_plus) 452 else if (tlb_type == cheetah_plus)
453 seq_printf(m, "MMU Type\t: Cheetah+\n"); 453 seq_printf(m, "MMU Type\t: Cheetah+\n");
454 else if (tlb_type == spitfire) 454 else if (tlb_type == spitfire)
455 seq_printf(m, "MMU Type\t: Spitfire\n"); 455 seq_printf(m, "MMU Type\t: Spitfire\n");
456 else if (tlb_type == hypervisor) 456 else if (tlb_type == hypervisor)
457 seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n"); 457 seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
458 else 458 else
459 seq_printf(m, "MMU Type\t: ???\n"); 459 seq_printf(m, "MMU Type\t: ???\n");
460 460
461 seq_printf(m, "MMU PGSZs\t: "); 461 seq_printf(m, "MMU PGSZs\t: ");
462 printed = 0; 462 printed = 0;
463 for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) { 463 for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
464 if (cpu_pgsz_mask & (1UL << i)) { 464 if (cpu_pgsz_mask & (1UL << i)) {
465 seq_printf(m, "%s%s", 465 seq_printf(m, "%s%s",
466 printed ? "," : "", pgsz_strings[i]); 466 printed ? "," : "", pgsz_strings[i]);
467 printed++; 467 printed++;
468 } 468 }
469 } 469 }
470 seq_putc(m, '\n'); 470 seq_putc(m, '\n');
471 471
472 #ifdef CONFIG_DEBUG_DCFLUSH 472 #ifdef CONFIG_DEBUG_DCFLUSH
473 seq_printf(m, "DCPageFlushes\t: %d\n", 473 seq_printf(m, "DCPageFlushes\t: %d\n",
474 atomic_read(&dcpage_flushes)); 474 atomic_read(&dcpage_flushes));
475 #ifdef CONFIG_SMP 475 #ifdef CONFIG_SMP
476 seq_printf(m, "DCPageFlushesXC\t: %d\n", 476 seq_printf(m, "DCPageFlushesXC\t: %d\n",
477 atomic_read(&dcpage_flushes_xcall)); 477 atomic_read(&dcpage_flushes_xcall));
478 #endif /* CONFIG_SMP */ 478 #endif /* CONFIG_SMP */
479 #endif /* CONFIG_DEBUG_DCFLUSH */ 479 #endif /* CONFIG_DEBUG_DCFLUSH */
480 } 480 }
481 481
482 struct linux_prom_translation prom_trans[512] __read_mostly; 482 struct linux_prom_translation prom_trans[512] __read_mostly;
483 unsigned int prom_trans_ents __read_mostly; 483 unsigned int prom_trans_ents __read_mostly;
484 484
485 unsigned long kern_locked_tte_data; 485 unsigned long kern_locked_tte_data;
486 486
487 /* The obp translations are saved based on 8k pagesize, since obp can 487 /* The obp translations are saved based on 8k pagesize, since obp can
488 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> 488 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
489 * HI_OBP_ADDRESS range are handled in ktlb.S. 489 * HI_OBP_ADDRESS range are handled in ktlb.S.
490 */ 490 */
491 static inline int in_obp_range(unsigned long vaddr) 491 static inline int in_obp_range(unsigned long vaddr)
492 { 492 {
493 return (vaddr >= LOW_OBP_ADDRESS && 493 return (vaddr >= LOW_OBP_ADDRESS &&
494 vaddr < HI_OBP_ADDRESS); 494 vaddr < HI_OBP_ADDRESS);
495 } 495 }
496 496
497 static int cmp_ptrans(const void *a, const void *b) 497 static int cmp_ptrans(const void *a, const void *b)
498 { 498 {
499 const struct linux_prom_translation *x = a, *y = b; 499 const struct linux_prom_translation *x = a, *y = b;
500 500
501 if (x->virt > y->virt) 501 if (x->virt > y->virt)
502 return 1; 502 return 1;
503 if (x->virt < y->virt) 503 if (x->virt < y->virt)
504 return -1; 504 return -1;
505 return 0; 505 return 0;
506 } 506 }
507 507
508 /* Read OBP translations property into 'prom_trans[]'. */ 508 /* Read OBP translations property into 'prom_trans[]'. */
509 static void __init read_obp_translations(void) 509 static void __init read_obp_translations(void)
510 { 510 {
511 int n, node, ents, first, last, i; 511 int n, node, ents, first, last, i;
512 512
513 node = prom_finddevice("/virtual-memory"); 513 node = prom_finddevice("/virtual-memory");
514 n = prom_getproplen(node, "translations"); 514 n = prom_getproplen(node, "translations");
515 if (unlikely(n == 0 || n == -1)) { 515 if (unlikely(n == 0 || n == -1)) {
516 prom_printf("prom_mappings: Couldn't get size.\n"); 516 prom_printf("prom_mappings: Couldn't get size.\n");
517 prom_halt(); 517 prom_halt();
518 } 518 }
519 if (unlikely(n > sizeof(prom_trans))) { 519 if (unlikely(n > sizeof(prom_trans))) {
520 prom_printf("prom_mappings: Size %d is too big.\n", n); 520 prom_printf("prom_mappings: Size %d is too big.\n", n);
521 prom_halt(); 521 prom_halt();
522 } 522 }
523 523
524 if ((n = prom_getproperty(node, "translations", 524 if ((n = prom_getproperty(node, "translations",
525 (char *)&prom_trans[0], 525 (char *)&prom_trans[0],
526 sizeof(prom_trans))) == -1) { 526 sizeof(prom_trans))) == -1) {
527 prom_printf("prom_mappings: Couldn't get property.\n"); 527 prom_printf("prom_mappings: Couldn't get property.\n");
528 prom_halt(); 528 prom_halt();
529 } 529 }
530 530
531 n = n / sizeof(struct linux_prom_translation); 531 n = n / sizeof(struct linux_prom_translation);
532 532
533 ents = n; 533 ents = n;
534 534
535 sort(prom_trans, ents, sizeof(struct linux_prom_translation), 535 sort(prom_trans, ents, sizeof(struct linux_prom_translation),
536 cmp_ptrans, NULL); 536 cmp_ptrans, NULL);
537 537
538 /* Now kick out all the non-OBP entries. */ 538 /* Now kick out all the non-OBP entries. */
539 for (i = 0; i < ents; i++) { 539 for (i = 0; i < ents; i++) {
540 if (in_obp_range(prom_trans[i].virt)) 540 if (in_obp_range(prom_trans[i].virt))
541 break; 541 break;
542 } 542 }
543 first = i; 543 first = i;
544 for (; i < ents; i++) { 544 for (; i < ents; i++) {
545 if (!in_obp_range(prom_trans[i].virt)) 545 if (!in_obp_range(prom_trans[i].virt))
546 break; 546 break;
547 } 547 }
548 last = i; 548 last = i;
549 549
550 for (i = 0; i < (last - first); i++) { 550 for (i = 0; i < (last - first); i++) {
551 struct linux_prom_translation *src = &prom_trans[i + first]; 551 struct linux_prom_translation *src = &prom_trans[i + first];
552 struct linux_prom_translation *dest = &prom_trans[i]; 552 struct linux_prom_translation *dest = &prom_trans[i];
553 553
554 *dest = *src; 554 *dest = *src;
555 } 555 }
556 for (; i < ents; i++) { 556 for (; i < ents; i++) {
557 struct linux_prom_translation *dest = &prom_trans[i]; 557 struct linux_prom_translation *dest = &prom_trans[i];
558 dest->virt = dest->size = dest->data = 0x0UL; 558 dest->virt = dest->size = dest->data = 0x0UL;
559 } 559 }
560 560
561 prom_trans_ents = last - first; 561 prom_trans_ents = last - first;
562 562
563 if (tlb_type == spitfire) { 563 if (tlb_type == spitfire) {
564 /* Clear diag TTE bits. */ 564 /* Clear diag TTE bits. */
565 for (i = 0; i < prom_trans_ents; i++) 565 for (i = 0; i < prom_trans_ents; i++)
566 prom_trans[i].data &= ~0x0003fe0000000000UL; 566 prom_trans[i].data &= ~0x0003fe0000000000UL;
567 } 567 }
568 568
569 /* Force execute bit on. */ 569 /* Force execute bit on. */
570 for (i = 0; i < prom_trans_ents; i++) 570 for (i = 0; i < prom_trans_ents; i++)
571 prom_trans[i].data |= (tlb_type == hypervisor ? 571 prom_trans[i].data |= (tlb_type == hypervisor ?
572 _PAGE_EXEC_4V : _PAGE_EXEC_4U); 572 _PAGE_EXEC_4V : _PAGE_EXEC_4U);
573 } 573 }
574 574
575 static void __init hypervisor_tlb_lock(unsigned long vaddr, 575 static void __init hypervisor_tlb_lock(unsigned long vaddr,
576 unsigned long pte, 576 unsigned long pte,
577 unsigned long mmu) 577 unsigned long mmu)
578 { 578 {
579 unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu); 579 unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);
580 580
581 if (ret != 0) { 581 if (ret != 0) {
582 prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: " 582 prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: "
583 "errors with %lx\n", vaddr, 0, pte, mmu, ret); 583 "errors with %lx\n", vaddr, 0, pte, mmu, ret);
584 prom_halt(); 584 prom_halt();
585 } 585 }
586 } 586 }
587 587
588 static unsigned long kern_large_tte(unsigned long paddr); 588 static unsigned long kern_large_tte(unsigned long paddr);
589 589
590 static void __init remap_kernel(void) 590 static void __init remap_kernel(void)
591 { 591 {
592 unsigned long phys_page, tte_vaddr, tte_data; 592 unsigned long phys_page, tte_vaddr, tte_data;
593 int i, tlb_ent = sparc64_highest_locked_tlbent(); 593 int i, tlb_ent = sparc64_highest_locked_tlbent();
594 594
595 tte_vaddr = (unsigned long) KERNBASE; 595 tte_vaddr = (unsigned long) KERNBASE;
596 phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; 596 phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
597 tte_data = kern_large_tte(phys_page); 597 tte_data = kern_large_tte(phys_page);
598 598
599 kern_locked_tte_data = tte_data; 599 kern_locked_tte_data = tte_data;
600 600
601 /* Now lock us into the TLBs via Hypervisor or OBP. */ 601 /* Now lock us into the TLBs via Hypervisor or OBP. */
602 if (tlb_type == hypervisor) { 602 if (tlb_type == hypervisor) {
603 for (i = 0; i < num_kernel_image_mappings; i++) { 603 for (i = 0; i < num_kernel_image_mappings; i++) {
604 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); 604 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
605 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); 605 hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
606 tte_vaddr += 0x400000; 606 tte_vaddr += 0x400000;
607 tte_data += 0x400000; 607 tte_data += 0x400000;
608 } 608 }
609 } else { 609 } else {
610 for (i = 0; i < num_kernel_image_mappings; i++) { 610 for (i = 0; i < num_kernel_image_mappings; i++) {
611 prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr); 611 prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr);
612 prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr); 612 prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr);
613 tte_vaddr += 0x400000; 613 tte_vaddr += 0x400000;
614 tte_data += 0x400000; 614 tte_data += 0x400000;
615 } 615 }
616 sparc64_highest_unlocked_tlb_ent = tlb_ent - i; 616 sparc64_highest_unlocked_tlb_ent = tlb_ent - i;
617 } 617 }
618 if (tlb_type == cheetah_plus) { 618 if (tlb_type == cheetah_plus) {
619 sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 | 619 sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
620 CTX_CHEETAH_PLUS_NUC); 620 CTX_CHEETAH_PLUS_NUC);
621 sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC; 621 sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC;
622 sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0; 622 sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0;
623 } 623 }
624 } 624 }
625 625
626 626
627 static void __init inherit_prom_mappings(void) 627 static void __init inherit_prom_mappings(void)
628 { 628 {
629 /* Now fixup OBP's idea about where we really are mapped. */ 629 /* Now fixup OBP's idea about where we really are mapped. */
630 printk("Remapping the kernel... "); 630 printk("Remapping the kernel... ");
631 remap_kernel(); 631 remap_kernel();
632 printk("done.\n"); 632 printk("done.\n");
633 } 633 }
634 634
635 void prom_world(int enter) 635 void prom_world(int enter)
636 { 636 {
637 if (!enter) 637 if (!enter)
638 set_fs(get_fs()); 638 set_fs(get_fs());
639 639
640 __asm__ __volatile__("flushw"); 640 __asm__ __volatile__("flushw");
641 } 641 }
642 642
643 void __flush_dcache_range(unsigned long start, unsigned long end) 643 void __flush_dcache_range(unsigned long start, unsigned long end)
644 { 644 {
645 unsigned long va; 645 unsigned long va;
646 646
647 if (tlb_type == spitfire) { 647 if (tlb_type == spitfire) {
648 int n = 0; 648 int n = 0;
649 649
650 for (va = start; va < end; va += 32) { 650 for (va = start; va < end; va += 32) {
651 spitfire_put_dcache_tag(va & 0x3fe0, 0x0); 651 spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
652 if (++n >= 512) 652 if (++n >= 512)
653 break; 653 break;
654 } 654 }
655 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { 655 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
656 start = __pa(start); 656 start = __pa(start);
657 end = __pa(end); 657 end = __pa(end);
658 for (va = start; va < end; va += 32) 658 for (va = start; va < end; va += 32)
659 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" 659 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
660 "membar #Sync" 660 "membar #Sync"
661 : /* no outputs */ 661 : /* no outputs */
662 : "r" (va), 662 : "r" (va),
663 "i" (ASI_DCACHE_INVALIDATE)); 663 "i" (ASI_DCACHE_INVALIDATE));
664 } 664 }
665 } 665 }
666 EXPORT_SYMBOL(__flush_dcache_range); 666 EXPORT_SYMBOL(__flush_dcache_range);
667 667
668 /* get_new_mmu_context() uses "cache + 1". */ 668 /* get_new_mmu_context() uses "cache + 1". */
669 DEFINE_SPINLOCK(ctx_alloc_lock); 669 DEFINE_SPINLOCK(ctx_alloc_lock);
670 unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; 670 unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
671 #define MAX_CTX_NR (1UL << CTX_NR_BITS) 671 #define MAX_CTX_NR (1UL << CTX_NR_BITS)
672 #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) 672 #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR)
673 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); 673 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
674 674
675 /* Caller does TLB context flushing on local CPU if necessary. 675 /* Caller does TLB context flushing on local CPU if necessary.
676 * The caller also ensures that CTX_VALID(mm->context) is false. 676 * The caller also ensures that CTX_VALID(mm->context) is false.
677 * 677 *
678 * We must be careful about boundary cases so that we never 678 * We must be careful about boundary cases so that we never
679 * let the user have CTX 0 (nucleus) or we ever use a CTX 679 * let the user have CTX 0 (nucleus) or we ever use a CTX
680 * version of zero (and thus NO_CONTEXT would not be caught 680 * version of zero (and thus NO_CONTEXT would not be caught
681 * by version mis-match tests in mmu_context.h). 681 * by version mis-match tests in mmu_context.h).
682 * 682 *
683 * Always invoked with interrupts disabled. 683 * Always invoked with interrupts disabled.
684 */ 684 */
685 void get_new_mmu_context(struct mm_struct *mm) 685 void get_new_mmu_context(struct mm_struct *mm)
686 { 686 {
687 unsigned long ctx, new_ctx; 687 unsigned long ctx, new_ctx;
688 unsigned long orig_pgsz_bits; 688 unsigned long orig_pgsz_bits;
689 int new_version; 689 int new_version;
690 690
691 spin_lock(&ctx_alloc_lock); 691 spin_lock(&ctx_alloc_lock);
692 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); 692 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
693 ctx = (tlb_context_cache + 1) & CTX_NR_MASK; 693 ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
694 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); 694 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
695 new_version = 0; 695 new_version = 0;
696 if (new_ctx >= (1 << CTX_NR_BITS)) { 696 if (new_ctx >= (1 << CTX_NR_BITS)) {
697 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); 697 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
698 if (new_ctx >= ctx) { 698 if (new_ctx >= ctx) {
699 int i; 699 int i;
700 new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + 700 new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
701 CTX_FIRST_VERSION; 701 CTX_FIRST_VERSION;
702 if (new_ctx == 1) 702 if (new_ctx == 1)
703 new_ctx = CTX_FIRST_VERSION; 703 new_ctx = CTX_FIRST_VERSION;
704 704
705 /* Don't call memset, for 16 entries that's just 705 /* Don't call memset, for 16 entries that's just
706 * plain silly... 706 * plain silly...
707 */ 707 */
708 mmu_context_bmap[0] = 3; 708 mmu_context_bmap[0] = 3;
709 mmu_context_bmap[1] = 0; 709 mmu_context_bmap[1] = 0;
710 mmu_context_bmap[2] = 0; 710 mmu_context_bmap[2] = 0;
711 mmu_context_bmap[3] = 0; 711 mmu_context_bmap[3] = 0;
712 for (i = 4; i < CTX_BMAP_SLOTS; i += 4) { 712 for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
713 mmu_context_bmap[i + 0] = 0; 713 mmu_context_bmap[i + 0] = 0;
714 mmu_context_bmap[i + 1] = 0; 714 mmu_context_bmap[i + 1] = 0;
715 mmu_context_bmap[i + 2] = 0; 715 mmu_context_bmap[i + 2] = 0;
716 mmu_context_bmap[i + 3] = 0; 716 mmu_context_bmap[i + 3] = 0;
717 } 717 }
718 new_version = 1; 718 new_version = 1;
719 goto out; 719 goto out;
720 } 720 }
721 } 721 }
722 mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); 722 mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
723 new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); 723 new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
724 out: 724 out:
725 tlb_context_cache = new_ctx; 725 tlb_context_cache = new_ctx;
726 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; 726 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
727 spin_unlock(&ctx_alloc_lock); 727 spin_unlock(&ctx_alloc_lock);
728 728
729 if (unlikely(new_version)) 729 if (unlikely(new_version))
730 smp_new_mmu_context_version(); 730 smp_new_mmu_context_version();
731 } 731 }
732 732
733 static int numa_enabled = 1; 733 static int numa_enabled = 1;
734 static int numa_debug; 734 static int numa_debug;
735 735
736 static int __init early_numa(char *p) 736 static int __init early_numa(char *p)
737 { 737 {
738 if (!p) 738 if (!p)
739 return 0; 739 return 0;
740 740
741 if (strstr(p, "off")) 741 if (strstr(p, "off"))
742 numa_enabled = 0; 742 numa_enabled = 0;
743 743
744 if (strstr(p, "debug")) 744 if (strstr(p, "debug"))
745 numa_debug = 1; 745 numa_debug = 1;
746 746
747 return 0; 747 return 0;
748 } 748 }
749 early_param("numa", early_numa); 749 early_param("numa", early_numa);
750 750
751 #define numadbg(f, a...) \ 751 #define numadbg(f, a...) \
752 do { if (numa_debug) \ 752 do { if (numa_debug) \
753 printk(KERN_INFO f, ## a); \ 753 printk(KERN_INFO f, ## a); \
754 } while (0) 754 } while (0)
755 755
756 static void __init find_ramdisk(unsigned long phys_base) 756 static void __init find_ramdisk(unsigned long phys_base)
757 { 757 {
758 #ifdef CONFIG_BLK_DEV_INITRD 758 #ifdef CONFIG_BLK_DEV_INITRD
759 if (sparc_ramdisk_image || sparc_ramdisk_image64) { 759 if (sparc_ramdisk_image || sparc_ramdisk_image64) {
760 unsigned long ramdisk_image; 760 unsigned long ramdisk_image;
761 761
762 /* Older versions of the bootloader only supported a 762 /* Older versions of the bootloader only supported a
763 * 32-bit physical address for the ramdisk image 763 * 32-bit physical address for the ramdisk image
764 * location, stored at sparc_ramdisk_image. Newer 764 * location, stored at sparc_ramdisk_image. Newer
765 * SILO versions set sparc_ramdisk_image to zero and 765 * SILO versions set sparc_ramdisk_image to zero and
766 * provide a full 64-bit physical address at 766 * provide a full 64-bit physical address at
767 * sparc_ramdisk_image64. 767 * sparc_ramdisk_image64.
768 */ 768 */
769 ramdisk_image = sparc_ramdisk_image; 769 ramdisk_image = sparc_ramdisk_image;
770 if (!ramdisk_image) 770 if (!ramdisk_image)
771 ramdisk_image = sparc_ramdisk_image64; 771 ramdisk_image = sparc_ramdisk_image64;
772 772
773 /* Another bootloader quirk. The bootloader normalizes 773 /* Another bootloader quirk. The bootloader normalizes
774 * the physical address to KERNBASE, so we have to 774 * the physical address to KERNBASE, so we have to
775 * factor that back out and add in the lowest valid 775 * factor that back out and add in the lowest valid
776 * physical page address to get the true physical address. 776 * physical page address to get the true physical address.
777 */ 777 */
778 ramdisk_image -= KERNBASE; 778 ramdisk_image -= KERNBASE;
779 ramdisk_image += phys_base; 779 ramdisk_image += phys_base;
780 780
781 numadbg("Found ramdisk at physical address 0x%lx, size %u\n", 781 numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
782 ramdisk_image, sparc_ramdisk_size); 782 ramdisk_image, sparc_ramdisk_size);
783 783
784 initrd_start = ramdisk_image; 784 initrd_start = ramdisk_image;
785 initrd_end = ramdisk_image + sparc_ramdisk_size; 785 initrd_end = ramdisk_image + sparc_ramdisk_size;
786 786
787 memblock_reserve(initrd_start, sparc_ramdisk_size); 787 memblock_reserve(initrd_start, sparc_ramdisk_size);
788 788
789 initrd_start += PAGE_OFFSET; 789 initrd_start += PAGE_OFFSET;
790 initrd_end += PAGE_OFFSET; 790 initrd_end += PAGE_OFFSET;
791 } 791 }
792 #endif 792 #endif
793 } 793 }
794 794
795 struct node_mem_mask { 795 struct node_mem_mask {
796 unsigned long mask; 796 unsigned long mask;
797 unsigned long val; 797 unsigned long val;
798 }; 798 };
799 static struct node_mem_mask node_masks[MAX_NUMNODES]; 799 static struct node_mem_mask node_masks[MAX_NUMNODES];
800 static int num_node_masks; 800 static int num_node_masks;
801 801
802 #ifdef CONFIG_NEED_MULTIPLE_NODES 802 #ifdef CONFIG_NEED_MULTIPLE_NODES
803 803
804 int numa_cpu_lookup_table[NR_CPUS]; 804 int numa_cpu_lookup_table[NR_CPUS];
805 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; 805 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
806 806
807 struct mdesc_mblock { 807 struct mdesc_mblock {
808 u64 base; 808 u64 base;
809 u64 size; 809 u64 size;
810 u64 offset; /* RA-to-PA */ 810 u64 offset; /* RA-to-PA */
811 }; 811 };
812 static struct mdesc_mblock *mblocks; 812 static struct mdesc_mblock *mblocks;
813 static int num_mblocks; 813 static int num_mblocks;
814 814
815 static unsigned long ra_to_pa(unsigned long addr) 815 static unsigned long ra_to_pa(unsigned long addr)
816 { 816 {
817 int i; 817 int i;
818 818
819 for (i = 0; i < num_mblocks; i++) { 819 for (i = 0; i < num_mblocks; i++) {
820 struct mdesc_mblock *m = &mblocks[i]; 820 struct mdesc_mblock *m = &mblocks[i];
821 821
822 if (addr >= m->base && 822 if (addr >= m->base &&
823 addr < (m->base + m->size)) { 823 addr < (m->base + m->size)) {
824 addr += m->offset; 824 addr += m->offset;
825 break; 825 break;
826 } 826 }
827 } 827 }
828 return addr; 828 return addr;
829 } 829 }
830 830
831 static int find_node(unsigned long addr) 831 static int find_node(unsigned long addr)
832 { 832 {
833 int i; 833 int i;
834 834
835 addr = ra_to_pa(addr); 835 addr = ra_to_pa(addr);
836 for (i = 0; i < num_node_masks; i++) { 836 for (i = 0; i < num_node_masks; i++) {
837 struct node_mem_mask *p = &node_masks[i]; 837 struct node_mem_mask *p = &node_masks[i];
838 838
839 if ((addr & p->mask) == p->val) 839 if ((addr & p->mask) == p->val)
840 return i; 840 return i;
841 } 841 }
842 return -1; 842 /* The following condition has been observed on LDOM guests.*/
843 WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
844 " rule. Some physical memory will be owned by node 0.");
845 return 0;
843 } 846 }
844 847
845 static u64 memblock_nid_range(u64 start, u64 end, int *nid) 848 static u64 memblock_nid_range(u64 start, u64 end, int *nid)
846 { 849 {
847 *nid = find_node(start); 850 *nid = find_node(start);
848 start += PAGE_SIZE; 851 start += PAGE_SIZE;
849 while (start < end) { 852 while (start < end) {
850 int n = find_node(start); 853 int n = find_node(start);
851 854
852 if (n != *nid) 855 if (n != *nid)
853 break; 856 break;
854 start += PAGE_SIZE; 857 start += PAGE_SIZE;
855 } 858 }
856 859
857 if (start > end) 860 if (start > end)
858 start = end; 861 start = end;
859 862
860 return start; 863 return start;
861 } 864 }
862 #endif 865 #endif
863 866
864 /* This must be invoked after performing all of the necessary 867 /* This must be invoked after performing all of the necessary
865 * memblock_set_node() calls for 'nid'. We need to be able to get 868 * memblock_set_node() calls for 'nid'. We need to be able to get
866 * correct data from get_pfn_range_for_nid(). 869 * correct data from get_pfn_range_for_nid().
867 */ 870 */
868 static void __init allocate_node_data(int nid) 871 static void __init allocate_node_data(int nid)
869 { 872 {
870 struct pglist_data *p; 873 struct pglist_data *p;
871 unsigned long start_pfn, end_pfn; 874 unsigned long start_pfn, end_pfn;
872 #ifdef CONFIG_NEED_MULTIPLE_NODES 875 #ifdef CONFIG_NEED_MULTIPLE_NODES
873 unsigned long paddr; 876 unsigned long paddr;
874 877
875 paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); 878 paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
876 if (!paddr) { 879 if (!paddr) {
877 prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); 880 prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
878 prom_halt(); 881 prom_halt();
879 } 882 }
880 NODE_DATA(nid) = __va(paddr); 883 NODE_DATA(nid) = __va(paddr);
881 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 884 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
882 885
883 NODE_DATA(nid)->node_id = nid; 886 NODE_DATA(nid)->node_id = nid;
884 #endif 887 #endif
885 888
886 p = NODE_DATA(nid); 889 p = NODE_DATA(nid);
887 890
888 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 891 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
889 p->node_start_pfn = start_pfn; 892 p->node_start_pfn = start_pfn;
890 p->node_spanned_pages = end_pfn - start_pfn; 893 p->node_spanned_pages = end_pfn - start_pfn;
891 } 894 }
892 895
893 static void init_node_masks_nonnuma(void) 896 static void init_node_masks_nonnuma(void)
894 { 897 {
895 #ifdef CONFIG_NEED_MULTIPLE_NODES 898 #ifdef CONFIG_NEED_MULTIPLE_NODES
896 int i; 899 int i;
897 #endif 900 #endif
898 901
899 numadbg("Initializing tables for non-numa.\n"); 902 numadbg("Initializing tables for non-numa.\n");
900 903
901 node_masks[0].mask = node_masks[0].val = 0; 904 node_masks[0].mask = node_masks[0].val = 0;
902 num_node_masks = 1; 905 num_node_masks = 1;
903 906
904 #ifdef CONFIG_NEED_MULTIPLE_NODES 907 #ifdef CONFIG_NEED_MULTIPLE_NODES
905 for (i = 0; i < NR_CPUS; i++) 908 for (i = 0; i < NR_CPUS; i++)
906 numa_cpu_lookup_table[i] = 0; 909 numa_cpu_lookup_table[i] = 0;
907 910
908 cpumask_setall(&numa_cpumask_lookup_table[0]); 911 cpumask_setall(&numa_cpumask_lookup_table[0]);
909 #endif 912 #endif
910 } 913 }
911 914
912 #ifdef CONFIG_NEED_MULTIPLE_NODES 915 #ifdef CONFIG_NEED_MULTIPLE_NODES
913 struct pglist_data *node_data[MAX_NUMNODES]; 916 struct pglist_data *node_data[MAX_NUMNODES];
914 917
915 EXPORT_SYMBOL(numa_cpu_lookup_table); 918 EXPORT_SYMBOL(numa_cpu_lookup_table);
916 EXPORT_SYMBOL(numa_cpumask_lookup_table); 919 EXPORT_SYMBOL(numa_cpumask_lookup_table);
917 EXPORT_SYMBOL(node_data); 920 EXPORT_SYMBOL(node_data);
918 921
919 struct mdesc_mlgroup { 922 struct mdesc_mlgroup {
920 u64 node; 923 u64 node;
921 u64 latency; 924 u64 latency;
922 u64 match; 925 u64 match;
923 u64 mask; 926 u64 mask;
924 }; 927 };
925 static struct mdesc_mlgroup *mlgroups; 928 static struct mdesc_mlgroup *mlgroups;
926 static int num_mlgroups; 929 static int num_mlgroups;
927 930
928 static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio, 931 static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
929 u32 cfg_handle) 932 u32 cfg_handle)
930 { 933 {
931 u64 arc; 934 u64 arc;
932 935
933 mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) { 936 mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) {
934 u64 target = mdesc_arc_target(md, arc); 937 u64 target = mdesc_arc_target(md, arc);
935 const u64 *val; 938 const u64 *val;
936 939
937 val = mdesc_get_property(md, target, 940 val = mdesc_get_property(md, target,
938 "cfg-handle", NULL); 941 "cfg-handle", NULL);
939 if (val && *val == cfg_handle) 942 if (val && *val == cfg_handle)
940 return 0; 943 return 0;
941 } 944 }
942 return -ENODEV; 945 return -ENODEV;
943 } 946 }
944 947
945 static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp, 948 static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp,
946 u32 cfg_handle) 949 u32 cfg_handle)
947 { 950 {
948 u64 arc, candidate, best_latency = ~(u64)0; 951 u64 arc, candidate, best_latency = ~(u64)0;
949 952
950 candidate = MDESC_NODE_NULL; 953 candidate = MDESC_NODE_NULL;
951 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { 954 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
952 u64 target = mdesc_arc_target(md, arc); 955 u64 target = mdesc_arc_target(md, arc);
953 const char *name = mdesc_node_name(md, target); 956 const char *name = mdesc_node_name(md, target);
954 const u64 *val; 957 const u64 *val;
955 958
956 if (strcmp(name, "pio-latency-group")) 959 if (strcmp(name, "pio-latency-group"))
957 continue; 960 continue;
958 961
959 val = mdesc_get_property(md, target, "latency", NULL); 962 val = mdesc_get_property(md, target, "latency", NULL);
960 if (!val) 963 if (!val)
961 continue; 964 continue;
962 965
963 if (*val < best_latency) { 966 if (*val < best_latency) {
964 candidate = target; 967 candidate = target;
965 best_latency = *val; 968 best_latency = *val;
966 } 969 }
967 } 970 }
968 971
969 if (candidate == MDESC_NODE_NULL) 972 if (candidate == MDESC_NODE_NULL)
970 return -ENODEV; 973 return -ENODEV;
971 974
972 return scan_pio_for_cfg_handle(md, candidate, cfg_handle); 975 return scan_pio_for_cfg_handle(md, candidate, cfg_handle);
973 } 976 }
974 977
975 int of_node_to_nid(struct device_node *dp) 978 int of_node_to_nid(struct device_node *dp)
976 { 979 {
977 const struct linux_prom64_registers *regs; 980 const struct linux_prom64_registers *regs;
978 struct mdesc_handle *md; 981 struct mdesc_handle *md;
979 u32 cfg_handle; 982 u32 cfg_handle;
980 int count, nid; 983 int count, nid;
981 u64 grp; 984 u64 grp;
982 985
983 /* This is the right thing to do on currently supported 986 /* This is the right thing to do on currently supported
984 * SUN4U NUMA platforms as well, as the PCI controller does 987 * SUN4U NUMA platforms as well, as the PCI controller does
985 * not sit behind any particular memory controller. 988 * not sit behind any particular memory controller.
986 */ 989 */
987 if (!mlgroups) 990 if (!mlgroups)
988 return -1; 991 return -1;
989 992
990 regs = of_get_property(dp, "reg", NULL); 993 regs = of_get_property(dp, "reg", NULL);
991 if (!regs) 994 if (!regs)
992 return -1; 995 return -1;
993 996
994 cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff; 997 cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff;
995 998
996 md = mdesc_grab(); 999 md = mdesc_grab();
997 1000
998 count = 0; 1001 count = 0;
999 nid = -1; 1002 nid = -1;
1000 mdesc_for_each_node_by_name(md, grp, "group") { 1003 mdesc_for_each_node_by_name(md, grp, "group") {
1001 if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) { 1004 if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) {
1002 nid = count; 1005 nid = count;
1003 break; 1006 break;
1004 } 1007 }
1005 count++; 1008 count++;
1006 } 1009 }
1007 1010
1008 mdesc_release(md); 1011 mdesc_release(md);
1009 1012
1010 return nid; 1013 return nid;
1011 } 1014 }
1012 1015
1013 static void __init add_node_ranges(void) 1016 static void __init add_node_ranges(void)
1014 { 1017 {
1015 struct memblock_region *reg; 1018 struct memblock_region *reg;
1016 1019
1017 for_each_memblock(memory, reg) { 1020 for_each_memblock(memory, reg) {
1018 unsigned long size = reg->size; 1021 unsigned long size = reg->size;
1019 unsigned long start, end; 1022 unsigned long start, end;
1020 1023
1021 start = reg->base; 1024 start = reg->base;
1022 end = start + size; 1025 end = start + size;
1023 while (start < end) { 1026 while (start < end) {
1024 unsigned long this_end; 1027 unsigned long this_end;
1025 int nid; 1028 int nid;
1026 1029
1027 this_end = memblock_nid_range(start, end, &nid); 1030 this_end = memblock_nid_range(start, end, &nid);
1028 1031
1029 numadbg("Setting memblock NUMA node nid[%d] " 1032 numadbg("Setting memblock NUMA node nid[%d] "
1030 "start[%lx] end[%lx]\n", 1033 "start[%lx] end[%lx]\n",
1031 nid, start, this_end); 1034 nid, start, this_end);
1032 1035
1033 memblock_set_node(start, this_end - start, 1036 memblock_set_node(start, this_end - start,
1034 &memblock.memory, nid); 1037 &memblock.memory, nid);
1035 start = this_end; 1038 start = this_end;
1036 } 1039 }
1037 } 1040 }
1038 } 1041 }
1039 1042
1040 static int __init grab_mlgroups(struct mdesc_handle *md) 1043 static int __init grab_mlgroups(struct mdesc_handle *md)
1041 { 1044 {
1042 unsigned long paddr; 1045 unsigned long paddr;
1043 int count = 0; 1046 int count = 0;
1044 u64 node; 1047 u64 node;
1045 1048
1046 mdesc_for_each_node_by_name(md, node, "memory-latency-group") 1049 mdesc_for_each_node_by_name(md, node, "memory-latency-group")
1047 count++; 1050 count++;
1048 if (!count) 1051 if (!count)
1049 return -ENOENT; 1052 return -ENOENT;
1050 1053
1051 paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup), 1054 paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup),
1052 SMP_CACHE_BYTES); 1055 SMP_CACHE_BYTES);
1053 if (!paddr) 1056 if (!paddr)
1054 return -ENOMEM; 1057 return -ENOMEM;
1055 1058
1056 mlgroups = __va(paddr); 1059 mlgroups = __va(paddr);
1057 num_mlgroups = count; 1060 num_mlgroups = count;
1058 1061
1059 count = 0; 1062 count = 0;
1060 mdesc_for_each_node_by_name(md, node, "memory-latency-group") { 1063 mdesc_for_each_node_by_name(md, node, "memory-latency-group") {
1061 struct mdesc_mlgroup *m = &mlgroups[count++]; 1064 struct mdesc_mlgroup *m = &mlgroups[count++];
1062 const u64 *val; 1065 const u64 *val;
1063 1066
1064 m->node = node; 1067 m->node = node;
1065 1068
1066 val = mdesc_get_property(md, node, "latency", NULL); 1069 val = mdesc_get_property(md, node, "latency", NULL);
1067 m->latency = *val; 1070 m->latency = *val;
1068 val = mdesc_get_property(md, node, "address-match", NULL); 1071 val = mdesc_get_property(md, node, "address-match", NULL);
1069 m->match = *val; 1072 m->match = *val;
1070 val = mdesc_get_property(md, node, "address-mask", NULL); 1073 val = mdesc_get_property(md, node, "address-mask", NULL);
1071 m->mask = *val; 1074 m->mask = *val;
1072 1075
1073 numadbg("MLGROUP[%d]: node[%llx] latency[%llx] " 1076 numadbg("MLGROUP[%d]: node[%llx] latency[%llx] "
1074 "match[%llx] mask[%llx]\n", 1077 "match[%llx] mask[%llx]\n",
1075 count - 1, m->node, m->latency, m->match, m->mask); 1078 count - 1, m->node, m->latency, m->match, m->mask);
1076 } 1079 }
1077 1080
1078 return 0; 1081 return 0;
1079 } 1082 }
1080 1083
1081 static int __init grab_mblocks(struct mdesc_handle *md) 1084 static int __init grab_mblocks(struct mdesc_handle *md)
1082 { 1085 {
1083 unsigned long paddr; 1086 unsigned long paddr;
1084 int count = 0; 1087 int count = 0;
1085 u64 node; 1088 u64 node;
1086 1089
1087 mdesc_for_each_node_by_name(md, node, "mblock") 1090 mdesc_for_each_node_by_name(md, node, "mblock")
1088 count++; 1091 count++;
1089 if (!count) 1092 if (!count)
1090 return -ENOENT; 1093 return -ENOENT;
1091 1094
1092 paddr = memblock_alloc(count * sizeof(struct mdesc_mblock), 1095 paddr = memblock_alloc(count * sizeof(struct mdesc_mblock),
1093 SMP_CACHE_BYTES); 1096 SMP_CACHE_BYTES);
1094 if (!paddr) 1097 if (!paddr)
1095 return -ENOMEM; 1098 return -ENOMEM;
1096 1099
1097 mblocks = __va(paddr); 1100 mblocks = __va(paddr);
1098 num_mblocks = count; 1101 num_mblocks = count;
1099 1102
1100 count = 0; 1103 count = 0;
1101 mdesc_for_each_node_by_name(md, node, "mblock") { 1104 mdesc_for_each_node_by_name(md, node, "mblock") {
1102 struct mdesc_mblock *m = &mblocks[count++]; 1105 struct mdesc_mblock *m = &mblocks[count++];
1103 const u64 *val; 1106 const u64 *val;
1104 1107
1105 val = mdesc_get_property(md, node, "base", NULL); 1108 val = mdesc_get_property(md, node, "base", NULL);
1106 m->base = *val; 1109 m->base = *val;
1107 val = mdesc_get_property(md, node, "size", NULL); 1110 val = mdesc_get_property(md, node, "size", NULL);
1108 m->size = *val; 1111 m->size = *val;
1109 val = mdesc_get_property(md, node, 1112 val = mdesc_get_property(md, node,
1110 "address-congruence-offset", NULL); 1113 "address-congruence-offset", NULL);
1111 1114
1112 /* The address-congruence-offset property is optional. 1115 /* The address-congruence-offset property is optional.
1113 * Explicity zero it be identifty this. 1116 * Explicity zero it be identifty this.
1114 */ 1117 */
1115 if (val) 1118 if (val)
1116 m->offset = *val; 1119 m->offset = *val;
1117 else 1120 else
1118 m->offset = 0UL; 1121 m->offset = 0UL;
1119 1122
1120 numadbg("MBLOCK[%d]: base[%llx] size[%llx] offset[%llx]\n", 1123 numadbg("MBLOCK[%d]: base[%llx] size[%llx] offset[%llx]\n",
1121 count - 1, m->base, m->size, m->offset); 1124 count - 1, m->base, m->size, m->offset);
1122 } 1125 }
1123 1126
1124 return 0; 1127 return 0;
1125 } 1128 }
1126 1129
1127 static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, 1130 static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
1128 u64 grp, cpumask_t *mask) 1131 u64 grp, cpumask_t *mask)
1129 { 1132 {
1130 u64 arc; 1133 u64 arc;
1131 1134
1132 cpumask_clear(mask); 1135 cpumask_clear(mask);
1133 1136
1134 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { 1137 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
1135 u64 target = mdesc_arc_target(md, arc); 1138 u64 target = mdesc_arc_target(md, arc);
1136 const char *name = mdesc_node_name(md, target); 1139 const char *name = mdesc_node_name(md, target);
1137 const u64 *id; 1140 const u64 *id;
1138 1141
1139 if (strcmp(name, "cpu")) 1142 if (strcmp(name, "cpu"))
1140 continue; 1143 continue;
1141 id = mdesc_get_property(md, target, "id", NULL); 1144 id = mdesc_get_property(md, target, "id", NULL);
1142 if (*id < nr_cpu_ids) 1145 if (*id < nr_cpu_ids)
1143 cpumask_set_cpu(*id, mask); 1146 cpumask_set_cpu(*id, mask);
1144 } 1147 }
1145 } 1148 }
1146 1149
1147 static struct mdesc_mlgroup * __init find_mlgroup(u64 node) 1150 static struct mdesc_mlgroup * __init find_mlgroup(u64 node)
1148 { 1151 {
1149 int i; 1152 int i;
1150 1153
1151 for (i = 0; i < num_mlgroups; i++) { 1154 for (i = 0; i < num_mlgroups; i++) {
1152 struct mdesc_mlgroup *m = &mlgroups[i]; 1155 struct mdesc_mlgroup *m = &mlgroups[i];
1153 if (m->node == node) 1156 if (m->node == node)
1154 return m; 1157 return m;
1155 } 1158 }
1156 return NULL; 1159 return NULL;
1157 } 1160 }
1158 1161
1159 static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, 1162 static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
1160 int index) 1163 int index)
1161 { 1164 {
1162 struct mdesc_mlgroup *candidate = NULL; 1165 struct mdesc_mlgroup *candidate = NULL;
1163 u64 arc, best_latency = ~(u64)0; 1166 u64 arc, best_latency = ~(u64)0;
1164 struct node_mem_mask *n; 1167 struct node_mem_mask *n;
1165 1168
1166 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { 1169 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
1167 u64 target = mdesc_arc_target(md, arc); 1170 u64 target = mdesc_arc_target(md, arc);
1168 struct mdesc_mlgroup *m = find_mlgroup(target); 1171 struct mdesc_mlgroup *m = find_mlgroup(target);
1169 if (!m) 1172 if (!m)
1170 continue; 1173 continue;
1171 if (m->latency < best_latency) { 1174 if (m->latency < best_latency) {
1172 candidate = m; 1175 candidate = m;
1173 best_latency = m->latency; 1176 best_latency = m->latency;
1174 } 1177 }
1175 } 1178 }
1176 if (!candidate) 1179 if (!candidate)
1177 return -ENOENT; 1180 return -ENOENT;
1178 1181
1179 if (num_node_masks != index) { 1182 if (num_node_masks != index) {
1180 printk(KERN_ERR "Inconsistent NUMA state, " 1183 printk(KERN_ERR "Inconsistent NUMA state, "
1181 "index[%d] != num_node_masks[%d]\n", 1184 "index[%d] != num_node_masks[%d]\n",
1182 index, num_node_masks); 1185 index, num_node_masks);
1183 return -EINVAL; 1186 return -EINVAL;
1184 } 1187 }
1185 1188
1186 n = &node_masks[num_node_masks++]; 1189 n = &node_masks[num_node_masks++];
1187 1190
1188 n->mask = candidate->mask; 1191 n->mask = candidate->mask;
1189 n->val = candidate->match; 1192 n->val = candidate->match;
1190 1193
1191 numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%llx])\n", 1194 numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%llx])\n",
1192 index, n->mask, n->val, candidate->latency); 1195 index, n->mask, n->val, candidate->latency);
1193 1196
1194 return 0; 1197 return 0;
1195 } 1198 }
1196 1199
1197 static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, 1200 static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
1198 int index) 1201 int index)
1199 { 1202 {
1200 cpumask_t mask; 1203 cpumask_t mask;
1201 int cpu; 1204 int cpu;
1202 1205
1203 numa_parse_mdesc_group_cpus(md, grp, &mask); 1206 numa_parse_mdesc_group_cpus(md, grp, &mask);
1204 1207
1205 for_each_cpu(cpu, &mask) 1208 for_each_cpu(cpu, &mask)
1206 numa_cpu_lookup_table[cpu] = index; 1209 numa_cpu_lookup_table[cpu] = index;
1207 cpumask_copy(&numa_cpumask_lookup_table[index], &mask); 1210 cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
1208 1211
1209 if (numa_debug) { 1212 if (numa_debug) {
1210 printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); 1213 printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
1211 for_each_cpu(cpu, &mask) 1214 for_each_cpu(cpu, &mask)
1212 printk("%d ", cpu); 1215 printk("%d ", cpu);
1213 printk("]\n"); 1216 printk("]\n");
1214 } 1217 }
1215 1218
1216 return numa_attach_mlgroup(md, grp, index); 1219 return numa_attach_mlgroup(md, grp, index);
1217 } 1220 }
1218 1221
1219 static int __init numa_parse_mdesc(void) 1222 static int __init numa_parse_mdesc(void)
1220 { 1223 {
1221 struct mdesc_handle *md = mdesc_grab(); 1224 struct mdesc_handle *md = mdesc_grab();
1222 int i, err, count; 1225 int i, err, count;
1223 u64 node; 1226 u64 node;
1224 1227
1225 node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); 1228 node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
1226 if (node == MDESC_NODE_NULL) { 1229 if (node == MDESC_NODE_NULL) {
1227 mdesc_release(md); 1230 mdesc_release(md);
1228 return -ENOENT; 1231 return -ENOENT;
1229 } 1232 }
1230 1233
1231 err = grab_mblocks(md); 1234 err = grab_mblocks(md);
1232 if (err < 0) 1235 if (err < 0)
1233 goto out; 1236 goto out;
1234 1237
1235 err = grab_mlgroups(md); 1238 err = grab_mlgroups(md);
1236 if (err < 0) 1239 if (err < 0)
1237 goto out; 1240 goto out;
1238 1241
1239 count = 0; 1242 count = 0;
1240 mdesc_for_each_node_by_name(md, node, "group") { 1243 mdesc_for_each_node_by_name(md, node, "group") {
1241 err = numa_parse_mdesc_group(md, node, count); 1244 err = numa_parse_mdesc_group(md, node, count);
1242 if (err < 0) 1245 if (err < 0)
1243 break; 1246 break;
1244 count++; 1247 count++;
1245 } 1248 }
1246 1249
1247 add_node_ranges(); 1250 add_node_ranges();
1248 1251
1249 for (i = 0; i < num_node_masks; i++) { 1252 for (i = 0; i < num_node_masks; i++) {
1250 allocate_node_data(i); 1253 allocate_node_data(i);
1251 node_set_online(i); 1254 node_set_online(i);
1252 } 1255 }
1253 1256
1254 err = 0; 1257 err = 0;
1255 out: 1258 out:
1256 mdesc_release(md); 1259 mdesc_release(md);
1257 return err; 1260 return err;
1258 } 1261 }
1259 1262
1260 static int __init numa_parse_jbus(void) 1263 static int __init numa_parse_jbus(void)
1261 { 1264 {
1262 unsigned long cpu, index; 1265 unsigned long cpu, index;
1263 1266
1264 /* NUMA node id is encoded in bits 36 and higher, and there is 1267 /* NUMA node id is encoded in bits 36 and higher, and there is
1265 * a 1-to-1 mapping from CPU ID to NUMA node ID. 1268 * a 1-to-1 mapping from CPU ID to NUMA node ID.
1266 */ 1269 */
1267 index = 0; 1270 index = 0;
1268 for_each_present_cpu(cpu) { 1271 for_each_present_cpu(cpu) {
1269 numa_cpu_lookup_table[cpu] = index; 1272 numa_cpu_lookup_table[cpu] = index;
1270 cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu)); 1273 cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
1271 node_masks[index].mask = ~((1UL << 36UL) - 1UL); 1274 node_masks[index].mask = ~((1UL << 36UL) - 1UL);
1272 node_masks[index].val = cpu << 36UL; 1275 node_masks[index].val = cpu << 36UL;
1273 1276
1274 index++; 1277 index++;
1275 } 1278 }
1276 num_node_masks = index; 1279 num_node_masks = index;
1277 1280
1278 add_node_ranges(); 1281 add_node_ranges();
1279 1282
1280 for (index = 0; index < num_node_masks; index++) { 1283 for (index = 0; index < num_node_masks; index++) {
1281 allocate_node_data(index); 1284 allocate_node_data(index);
1282 node_set_online(index); 1285 node_set_online(index);
1283 } 1286 }
1284 1287
1285 return 0; 1288 return 0;
1286 } 1289 }
1287 1290
1288 static int __init numa_parse_sun4u(void) 1291 static int __init numa_parse_sun4u(void)
1289 { 1292 {
1290 if (tlb_type == cheetah || tlb_type == cheetah_plus) { 1293 if (tlb_type == cheetah || tlb_type == cheetah_plus) {
1291 unsigned long ver; 1294 unsigned long ver;
1292 1295
1293 __asm__ ("rdpr %%ver, %0" : "=r" (ver)); 1296 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
1294 if ((ver >> 32UL) == __JALAPENO_ID || 1297 if ((ver >> 32UL) == __JALAPENO_ID ||
1295 (ver >> 32UL) == __SERRANO_ID) 1298 (ver >> 32UL) == __SERRANO_ID)
1296 return numa_parse_jbus(); 1299 return numa_parse_jbus();
1297 } 1300 }
1298 return -1; 1301 return -1;
1299 } 1302 }
1300 1303
1301 static int __init bootmem_init_numa(void) 1304 static int __init bootmem_init_numa(void)
1302 { 1305 {
1303 int err = -1; 1306 int err = -1;
1304 1307
1305 numadbg("bootmem_init_numa()\n"); 1308 numadbg("bootmem_init_numa()\n");
1306 1309
1307 if (numa_enabled) { 1310 if (numa_enabled) {
1308 if (tlb_type == hypervisor) 1311 if (tlb_type == hypervisor)
1309 err = numa_parse_mdesc(); 1312 err = numa_parse_mdesc();
1310 else 1313 else
1311 err = numa_parse_sun4u(); 1314 err = numa_parse_sun4u();
1312 } 1315 }
1313 return err; 1316 return err;
1314 } 1317 }
1315 1318
1316 #else 1319 #else
1317 1320
1318 static int bootmem_init_numa(void) 1321 static int bootmem_init_numa(void)
1319 { 1322 {
1320 return -1; 1323 return -1;
1321 } 1324 }
1322 1325
1323 #endif 1326 #endif
1324 1327
1325 static void __init bootmem_init_nonnuma(void) 1328 static void __init bootmem_init_nonnuma(void)
1326 { 1329 {
1327 unsigned long top_of_ram = memblock_end_of_DRAM(); 1330 unsigned long top_of_ram = memblock_end_of_DRAM();
1328 unsigned long total_ram = memblock_phys_mem_size(); 1331 unsigned long total_ram = memblock_phys_mem_size();
1329 1332
1330 numadbg("bootmem_init_nonnuma()\n"); 1333 numadbg("bootmem_init_nonnuma()\n");
1331 1334
1332 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 1335 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
1333 top_of_ram, total_ram); 1336 top_of_ram, total_ram);
1334 printk(KERN_INFO "Memory hole size: %ldMB\n", 1337 printk(KERN_INFO "Memory hole size: %ldMB\n",
1335 (top_of_ram - total_ram) >> 20); 1338 (top_of_ram - total_ram) >> 20);
1336 1339
1337 init_node_masks_nonnuma(); 1340 init_node_masks_nonnuma();
1338 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); 1341 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
1339 allocate_node_data(0); 1342 allocate_node_data(0);
1340 node_set_online(0); 1343 node_set_online(0);
1341 } 1344 }
1342 1345
1343 static unsigned long __init bootmem_init(unsigned long phys_base) 1346 static unsigned long __init bootmem_init(unsigned long phys_base)
1344 { 1347 {
1345 unsigned long end_pfn; 1348 unsigned long end_pfn;
1346 1349
1347 end_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 1350 end_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
1348 max_pfn = max_low_pfn = end_pfn; 1351 max_pfn = max_low_pfn = end_pfn;
1349 min_low_pfn = (phys_base >> PAGE_SHIFT); 1352 min_low_pfn = (phys_base >> PAGE_SHIFT);
1350 1353
1351 if (bootmem_init_numa() < 0) 1354 if (bootmem_init_numa() < 0)
1352 bootmem_init_nonnuma(); 1355 bootmem_init_nonnuma();
1353 1356
1354 /* Dump memblock with node info. */ 1357 /* Dump memblock with node info. */
1355 memblock_dump_all(); 1358 memblock_dump_all();
1356 1359
1357 /* XXX cpu notifier XXX */ 1360 /* XXX cpu notifier XXX */
1358 1361
1359 sparse_memory_present_with_active_regions(MAX_NUMNODES); 1362 sparse_memory_present_with_active_regions(MAX_NUMNODES);
1360 sparse_init(); 1363 sparse_init();
1361 1364
1362 return end_pfn; 1365 return end_pfn;
1363 } 1366 }
1364 1367
1365 static struct linux_prom64_registers pall[MAX_BANKS] __initdata; 1368 static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
1366 static int pall_ents __initdata; 1369 static int pall_ents __initdata;
1367 1370
1368 #ifdef CONFIG_DEBUG_PAGEALLOC 1371 #ifdef CONFIG_DEBUG_PAGEALLOC
1369 static unsigned long __ref kernel_map_range(unsigned long pstart, 1372 static unsigned long __ref kernel_map_range(unsigned long pstart,
1370 unsigned long pend, pgprot_t prot) 1373 unsigned long pend, pgprot_t prot)
1371 { 1374 {
1372 unsigned long vstart = PAGE_OFFSET + pstart; 1375 unsigned long vstart = PAGE_OFFSET + pstart;
1373 unsigned long vend = PAGE_OFFSET + pend; 1376 unsigned long vend = PAGE_OFFSET + pend;
1374 unsigned long alloc_bytes = 0UL; 1377 unsigned long alloc_bytes = 0UL;
1375 1378
1376 if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) { 1379 if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) {
1377 prom_printf("kernel_map: Unaligned physmem[%lx:%lx]\n", 1380 prom_printf("kernel_map: Unaligned physmem[%lx:%lx]\n",
1378 vstart, vend); 1381 vstart, vend);
1379 prom_halt(); 1382 prom_halt();
1380 } 1383 }
1381 1384
1382 while (vstart < vend) { 1385 while (vstart < vend) {
1383 unsigned long this_end, paddr = __pa(vstart); 1386 unsigned long this_end, paddr = __pa(vstart);
1384 pgd_t *pgd = pgd_offset_k(vstart); 1387 pgd_t *pgd = pgd_offset_k(vstart);
1385 pud_t *pud; 1388 pud_t *pud;
1386 pmd_t *pmd; 1389 pmd_t *pmd;
1387 pte_t *pte; 1390 pte_t *pte;
1388 1391
1389 pud = pud_offset(pgd, vstart); 1392 pud = pud_offset(pgd, vstart);
1390 if (pud_none(*pud)) { 1393 if (pud_none(*pud)) {
1391 pmd_t *new; 1394 pmd_t *new;
1392 1395
1393 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); 1396 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1394 alloc_bytes += PAGE_SIZE; 1397 alloc_bytes += PAGE_SIZE;
1395 pud_populate(&init_mm, pud, new); 1398 pud_populate(&init_mm, pud, new);
1396 } 1399 }
1397 1400
1398 pmd = pmd_offset(pud, vstart); 1401 pmd = pmd_offset(pud, vstart);
1399 if (!pmd_present(*pmd)) { 1402 if (!pmd_present(*pmd)) {
1400 pte_t *new; 1403 pte_t *new;
1401 1404
1402 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); 1405 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1403 alloc_bytes += PAGE_SIZE; 1406 alloc_bytes += PAGE_SIZE;
1404 pmd_populate_kernel(&init_mm, pmd, new); 1407 pmd_populate_kernel(&init_mm, pmd, new);
1405 } 1408 }
1406 1409
1407 pte = pte_offset_kernel(pmd, vstart); 1410 pte = pte_offset_kernel(pmd, vstart);
1408 this_end = (vstart + PMD_SIZE) & PMD_MASK; 1411 this_end = (vstart + PMD_SIZE) & PMD_MASK;
1409 if (this_end > vend) 1412 if (this_end > vend)
1410 this_end = vend; 1413 this_end = vend;
1411 1414
1412 while (vstart < this_end) { 1415 while (vstart < this_end) {
1413 pte_val(*pte) = (paddr | pgprot_val(prot)); 1416 pte_val(*pte) = (paddr | pgprot_val(prot));
1414 1417
1415 vstart += PAGE_SIZE; 1418 vstart += PAGE_SIZE;
1416 paddr += PAGE_SIZE; 1419 paddr += PAGE_SIZE;
1417 pte++; 1420 pte++;
1418 } 1421 }
1419 } 1422 }
1420 1423
1421 return alloc_bytes; 1424 return alloc_bytes;
1422 } 1425 }
1423 1426
1424 extern unsigned int kvmap_linear_patch[1]; 1427 extern unsigned int kvmap_linear_patch[1];
1425 #endif /* CONFIG_DEBUG_PAGEALLOC */ 1428 #endif /* CONFIG_DEBUG_PAGEALLOC */
1426 1429
1427 static void __init kpte_set_val(unsigned long index, unsigned long val) 1430 static void __init kpte_set_val(unsigned long index, unsigned long val)
1428 { 1431 {
1429 unsigned long *ptr = kpte_linear_bitmap; 1432 unsigned long *ptr = kpte_linear_bitmap;
1430 1433
1431 val <<= ((index % (BITS_PER_LONG / 2)) * 2); 1434 val <<= ((index % (BITS_PER_LONG / 2)) * 2);
1432 ptr += (index / (BITS_PER_LONG / 2)); 1435 ptr += (index / (BITS_PER_LONG / 2));
1433 1436
1434 *ptr |= val; 1437 *ptr |= val;
1435 } 1438 }
1436 1439
1437 static const unsigned long kpte_shift_min = 28; /* 256MB */ 1440 static const unsigned long kpte_shift_min = 28; /* 256MB */
1438 static const unsigned long kpte_shift_max = 34; /* 16GB */ 1441 static const unsigned long kpte_shift_max = 34; /* 16GB */
1439 static const unsigned long kpte_shift_incr = 3; 1442 static const unsigned long kpte_shift_incr = 3;
1440 1443
1441 static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, 1444 static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
1442 unsigned long shift) 1445 unsigned long shift)
1443 { 1446 {
1444 unsigned long size = (1UL << shift); 1447 unsigned long size = (1UL << shift);
1445 unsigned long mask = (size - 1UL); 1448 unsigned long mask = (size - 1UL);
1446 unsigned long remains = end - start; 1449 unsigned long remains = end - start;
1447 unsigned long val; 1450 unsigned long val;
1448 1451
1449 if (remains < size || (start & mask)) 1452 if (remains < size || (start & mask))
1450 return start; 1453 return start;
1451 1454
1452 /* VAL maps: 1455 /* VAL maps:
1453 * 1456 *
1454 * shift 28 --> kern_linear_pte_xor index 1 1457 * shift 28 --> kern_linear_pte_xor index 1
1455 * shift 31 --> kern_linear_pte_xor index 2 1458 * shift 31 --> kern_linear_pte_xor index 2
1456 * shift 34 --> kern_linear_pte_xor index 3 1459 * shift 34 --> kern_linear_pte_xor index 3
1457 */ 1460 */
1458 val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; 1461 val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
1459 1462
1460 remains &= ~mask; 1463 remains &= ~mask;
1461 if (shift != kpte_shift_max) 1464 if (shift != kpte_shift_max)
1462 remains = size; 1465 remains = size;
1463 1466
1464 while (remains) { 1467 while (remains) {
1465 unsigned long index = start >> kpte_shift_min; 1468 unsigned long index = start >> kpte_shift_min;
1466 1469
1467 kpte_set_val(index, val); 1470 kpte_set_val(index, val);
1468 1471
1469 start += 1UL << kpte_shift_min; 1472 start += 1UL << kpte_shift_min;
1470 remains -= 1UL << kpte_shift_min; 1473 remains -= 1UL << kpte_shift_min;
1471 } 1474 }
1472 1475
1473 return start; 1476 return start;
1474 } 1477 }
1475 1478
1476 static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) 1479 static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
1477 { 1480 {
1478 unsigned long smallest_size, smallest_mask; 1481 unsigned long smallest_size, smallest_mask;
1479 unsigned long s; 1482 unsigned long s;
1480 1483
1481 smallest_size = (1UL << kpte_shift_min); 1484 smallest_size = (1UL << kpte_shift_min);
1482 smallest_mask = (smallest_size - 1UL); 1485 smallest_mask = (smallest_size - 1UL);
1483 1486
1484 while (start < end) { 1487 while (start < end) {
1485 unsigned long orig_start = start; 1488 unsigned long orig_start = start;
1486 1489
1487 for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { 1490 for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
1488 start = kpte_mark_using_shift(start, end, s); 1491 start = kpte_mark_using_shift(start, end, s);
1489 1492
1490 if (start != orig_start) 1493 if (start != orig_start)
1491 break; 1494 break;
1492 } 1495 }
1493 1496
1494 if (start == orig_start) 1497 if (start == orig_start)
1495 start = (start + smallest_size) & ~smallest_mask; 1498 start = (start + smallest_size) & ~smallest_mask;
1496 } 1499 }
1497 } 1500 }
1498 1501
1499 static void __init init_kpte_bitmap(void) 1502 static void __init init_kpte_bitmap(void)
1500 { 1503 {
1501 unsigned long i; 1504 unsigned long i;
1502 1505
1503 for (i = 0; i < pall_ents; i++) { 1506 for (i = 0; i < pall_ents; i++) {
1504 unsigned long phys_start, phys_end; 1507 unsigned long phys_start, phys_end;
1505 1508
1506 phys_start = pall[i].phys_addr; 1509 phys_start = pall[i].phys_addr;
1507 phys_end = phys_start + pall[i].reg_size; 1510 phys_end = phys_start + pall[i].reg_size;
1508 1511
1509 mark_kpte_bitmap(phys_start, phys_end); 1512 mark_kpte_bitmap(phys_start, phys_end);
1510 } 1513 }
1511 } 1514 }
1512 1515
1513 static void __init kernel_physical_mapping_init(void) 1516 static void __init kernel_physical_mapping_init(void)
1514 { 1517 {
1515 #ifdef CONFIG_DEBUG_PAGEALLOC 1518 #ifdef CONFIG_DEBUG_PAGEALLOC
1516 unsigned long i, mem_alloced = 0UL; 1519 unsigned long i, mem_alloced = 0UL;
1517 1520
1518 for (i = 0; i < pall_ents; i++) { 1521 for (i = 0; i < pall_ents; i++) {
1519 unsigned long phys_start, phys_end; 1522 unsigned long phys_start, phys_end;
1520 1523
1521 phys_start = pall[i].phys_addr; 1524 phys_start = pall[i].phys_addr;
1522 phys_end = phys_start + pall[i].reg_size; 1525 phys_end = phys_start + pall[i].reg_size;
1523 1526
1524 mem_alloced += kernel_map_range(phys_start, phys_end, 1527 mem_alloced += kernel_map_range(phys_start, phys_end,
1525 PAGE_KERNEL); 1528 PAGE_KERNEL);
1526 } 1529 }
1527 1530
1528 printk("Allocated %ld bytes for kernel page tables.\n", 1531 printk("Allocated %ld bytes for kernel page tables.\n",
1529 mem_alloced); 1532 mem_alloced);
1530 1533
1531 kvmap_linear_patch[0] = 0x01000000; /* nop */ 1534 kvmap_linear_patch[0] = 0x01000000; /* nop */
1532 flushi(&kvmap_linear_patch[0]); 1535 flushi(&kvmap_linear_patch[0]);
1533 1536
1534 __flush_tlb_all(); 1537 __flush_tlb_all();
1535 #endif 1538 #endif
1536 } 1539 }
1537 1540
1538 #ifdef CONFIG_DEBUG_PAGEALLOC 1541 #ifdef CONFIG_DEBUG_PAGEALLOC
1539 void kernel_map_pages(struct page *page, int numpages, int enable) 1542 void kernel_map_pages(struct page *page, int numpages, int enable)
1540 { 1543 {
1541 unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT; 1544 unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
1542 unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); 1545 unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
1543 1546
1544 kernel_map_range(phys_start, phys_end, 1547 kernel_map_range(phys_start, phys_end,
1545 (enable ? PAGE_KERNEL : __pgprot(0))); 1548 (enable ? PAGE_KERNEL : __pgprot(0)));
1546 1549
1547 flush_tsb_kernel_range(PAGE_OFFSET + phys_start, 1550 flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
1548 PAGE_OFFSET + phys_end); 1551 PAGE_OFFSET + phys_end);
1549 1552
1550 /* we should perform an IPI and flush all tlbs, 1553 /* we should perform an IPI and flush all tlbs,
1551 * but that can deadlock->flush only current cpu. 1554 * but that can deadlock->flush only current cpu.
1552 */ 1555 */
1553 __flush_tlb_kernel_range(PAGE_OFFSET + phys_start, 1556 __flush_tlb_kernel_range(PAGE_OFFSET + phys_start,
1554 PAGE_OFFSET + phys_end); 1557 PAGE_OFFSET + phys_end);
1555 } 1558 }
1556 #endif 1559 #endif
1557 1560
1558 unsigned long __init find_ecache_flush_span(unsigned long size) 1561 unsigned long __init find_ecache_flush_span(unsigned long size)
1559 { 1562 {
1560 int i; 1563 int i;
1561 1564
1562 for (i = 0; i < pavail_ents; i++) { 1565 for (i = 0; i < pavail_ents; i++) {
1563 if (pavail[i].reg_size >= size) 1566 if (pavail[i].reg_size >= size)
1564 return pavail[i].phys_addr; 1567 return pavail[i].phys_addr;
1565 } 1568 }
1566 1569
1567 return ~0UL; 1570 return ~0UL;
1568 } 1571 }
1569 1572
1570 unsigned long PAGE_OFFSET; 1573 unsigned long PAGE_OFFSET;
1571 EXPORT_SYMBOL(PAGE_OFFSET); 1574 EXPORT_SYMBOL(PAGE_OFFSET);
1572 1575
1573 static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits) 1576 static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
1574 { 1577 {
1575 unsigned long final_shift; 1578 unsigned long final_shift;
1576 unsigned int val = *insn; 1579 unsigned int val = *insn;
1577 unsigned int cnt; 1580 unsigned int cnt;
1578 1581
1579 /* We are patching in ilog2(max_supported_phys_address), and 1582 /* We are patching in ilog2(max_supported_phys_address), and
1580 * we are doing so in a manner similar to a relocation addend. 1583 * we are doing so in a manner similar to a relocation addend.
1581 * That is, we are adding the shift value to whatever value 1584 * That is, we are adding the shift value to whatever value
1582 * is in the shift instruction count field already. 1585 * is in the shift instruction count field already.
1583 */ 1586 */
1584 cnt = (val & 0x3f); 1587 cnt = (val & 0x3f);
1585 val &= ~0x3f; 1588 val &= ~0x3f;
1586 1589
1587 /* If we are trying to shift >= 64 bits, clear the destination 1590 /* If we are trying to shift >= 64 bits, clear the destination
1588 * register. This can happen when phys_bits ends up being equal 1591 * register. This can happen when phys_bits ends up being equal
1589 * to MAX_PHYS_ADDRESS_BITS. 1592 * to MAX_PHYS_ADDRESS_BITS.
1590 */ 1593 */
1591 final_shift = (cnt + (64 - phys_bits)); 1594 final_shift = (cnt + (64 - phys_bits));
1592 if (final_shift >= 64) { 1595 if (final_shift >= 64) {
1593 unsigned int rd = (val >> 25) & 0x1f; 1596 unsigned int rd = (val >> 25) & 0x1f;
1594 1597
1595 val = 0x80100000 | (rd << 25); 1598 val = 0x80100000 | (rd << 25);
1596 } else { 1599 } else {
1597 val |= final_shift; 1600 val |= final_shift;
1598 } 1601 }
1599 *insn = val; 1602 *insn = val;
1600 1603
1601 __asm__ __volatile__("flush %0" 1604 __asm__ __volatile__("flush %0"
1602 : /* no outputs */ 1605 : /* no outputs */
1603 : "r" (insn)); 1606 : "r" (insn));
1604 } 1607 }
1605 1608
1606 static void __init page_offset_shift_patch(unsigned long phys_bits) 1609 static void __init page_offset_shift_patch(unsigned long phys_bits)
1607 { 1610 {
1608 extern unsigned int __page_offset_shift_patch; 1611 extern unsigned int __page_offset_shift_patch;
1609 extern unsigned int __page_offset_shift_patch_end; 1612 extern unsigned int __page_offset_shift_patch_end;
1610 unsigned int *p; 1613 unsigned int *p;
1611 1614
1612 p = &__page_offset_shift_patch; 1615 p = &__page_offset_shift_patch;
1613 while (p < &__page_offset_shift_patch_end) { 1616 while (p < &__page_offset_shift_patch_end) {
1614 unsigned int *insn = (unsigned int *)(unsigned long)*p; 1617 unsigned int *insn = (unsigned int *)(unsigned long)*p;
1615 1618
1616 page_offset_shift_patch_one(insn, phys_bits); 1619 page_offset_shift_patch_one(insn, phys_bits);
1617 1620
1618 p++; 1621 p++;
1619 } 1622 }
1620 } 1623 }
1621 1624
1622 static void __init setup_page_offset(void) 1625 static void __init setup_page_offset(void)
1623 { 1626 {
1624 unsigned long max_phys_bits = 40; 1627 unsigned long max_phys_bits = 40;
1625 1628
1626 if (tlb_type == cheetah || tlb_type == cheetah_plus) { 1629 if (tlb_type == cheetah || tlb_type == cheetah_plus) {
1627 max_phys_bits = 42; 1630 max_phys_bits = 42;
1628 } else if (tlb_type == hypervisor) { 1631 } else if (tlb_type == hypervisor) {
1629 switch (sun4v_chip_type) { 1632 switch (sun4v_chip_type) {
1630 case SUN4V_CHIP_NIAGARA1: 1633 case SUN4V_CHIP_NIAGARA1:
1631 case SUN4V_CHIP_NIAGARA2: 1634 case SUN4V_CHIP_NIAGARA2:
1632 max_phys_bits = 39; 1635 max_phys_bits = 39;
1633 break; 1636 break;
1634 case SUN4V_CHIP_NIAGARA3: 1637 case SUN4V_CHIP_NIAGARA3:
1635 max_phys_bits = 43; 1638 max_phys_bits = 43;
1636 break; 1639 break;
1637 case SUN4V_CHIP_NIAGARA4: 1640 case SUN4V_CHIP_NIAGARA4:
1638 case SUN4V_CHIP_NIAGARA5: 1641 case SUN4V_CHIP_NIAGARA5:
1639 case SUN4V_CHIP_SPARC64X: 1642 case SUN4V_CHIP_SPARC64X:
1640 default: 1643 default:
1641 max_phys_bits = 47; 1644 max_phys_bits = 47;
1642 break; 1645 break;
1643 } 1646 }
1644 } 1647 }
1645 1648
1646 if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) { 1649 if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) {
1647 prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n", 1650 prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n",
1648 max_phys_bits); 1651 max_phys_bits);
1649 prom_halt(); 1652 prom_halt();
1650 } 1653 }
1651 1654
1652 PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits); 1655 PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits);
1653 1656
1654 pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", 1657 pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
1655 PAGE_OFFSET, max_phys_bits); 1658 PAGE_OFFSET, max_phys_bits);
1656 1659
1657 page_offset_shift_patch(max_phys_bits); 1660 page_offset_shift_patch(max_phys_bits);
1658 } 1661 }
1659 1662
1660 static void __init tsb_phys_patch(void) 1663 static void __init tsb_phys_patch(void)
1661 { 1664 {
1662 struct tsb_ldquad_phys_patch_entry *pquad; 1665 struct tsb_ldquad_phys_patch_entry *pquad;
1663 struct tsb_phys_patch_entry *p; 1666 struct tsb_phys_patch_entry *p;
1664 1667
1665 pquad = &__tsb_ldquad_phys_patch; 1668 pquad = &__tsb_ldquad_phys_patch;
1666 while (pquad < &__tsb_ldquad_phys_patch_end) { 1669 while (pquad < &__tsb_ldquad_phys_patch_end) {
1667 unsigned long addr = pquad->addr; 1670 unsigned long addr = pquad->addr;
1668 1671
1669 if (tlb_type == hypervisor) 1672 if (tlb_type == hypervisor)
1670 *(unsigned int *) addr = pquad->sun4v_insn; 1673 *(unsigned int *) addr = pquad->sun4v_insn;
1671 else 1674 else
1672 *(unsigned int *) addr = pquad->sun4u_insn; 1675 *(unsigned int *) addr = pquad->sun4u_insn;
1673 wmb(); 1676 wmb();
1674 __asm__ __volatile__("flush %0" 1677 __asm__ __volatile__("flush %0"
1675 : /* no outputs */ 1678 : /* no outputs */
1676 : "r" (addr)); 1679 : "r" (addr));
1677 1680
1678 pquad++; 1681 pquad++;
1679 } 1682 }
1680 1683
1681 p = &__tsb_phys_patch; 1684 p = &__tsb_phys_patch;
1682 while (p < &__tsb_phys_patch_end) { 1685 while (p < &__tsb_phys_patch_end) {
1683 unsigned long addr = p->addr; 1686 unsigned long addr = p->addr;
1684 1687
1685 *(unsigned int *) addr = p->insn; 1688 *(unsigned int *) addr = p->insn;
1686 wmb(); 1689 wmb();
1687 __asm__ __volatile__("flush %0" 1690 __asm__ __volatile__("flush %0"
1688 : /* no outputs */ 1691 : /* no outputs */
1689 : "r" (addr)); 1692 : "r" (addr));
1690 1693
1691 p++; 1694 p++;
1692 } 1695 }
1693 } 1696 }
1694 1697
1695 /* Don't mark as init, we give this to the Hypervisor. */ 1698 /* Don't mark as init, we give this to the Hypervisor. */
1696 #ifndef CONFIG_DEBUG_PAGEALLOC 1699 #ifndef CONFIG_DEBUG_PAGEALLOC
1697 #define NUM_KTSB_DESCR 2 1700 #define NUM_KTSB_DESCR 2
1698 #else 1701 #else
1699 #define NUM_KTSB_DESCR 1 1702 #define NUM_KTSB_DESCR 1
1700 #endif 1703 #endif
1701 static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; 1704 static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
1702 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 1705 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
1703 1706
1704 static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) 1707 static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
1705 { 1708 {
1706 pa >>= KTSB_PHYS_SHIFT; 1709 pa >>= KTSB_PHYS_SHIFT;
1707 1710
1708 while (start < end) { 1711 while (start < end) {
1709 unsigned int *ia = (unsigned int *)(unsigned long)*start; 1712 unsigned int *ia = (unsigned int *)(unsigned long)*start;
1710 1713
1711 ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); 1714 ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10);
1712 __asm__ __volatile__("flush %0" : : "r" (ia)); 1715 __asm__ __volatile__("flush %0" : : "r" (ia));
1713 1716
1714 ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); 1717 ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff);
1715 __asm__ __volatile__("flush %0" : : "r" (ia + 1)); 1718 __asm__ __volatile__("flush %0" : : "r" (ia + 1));
1716 1719
1717 start++; 1720 start++;
1718 } 1721 }
1719 } 1722 }
1720 1723
1721 static void ktsb_phys_patch(void) 1724 static void ktsb_phys_patch(void)
1722 { 1725 {
1723 extern unsigned int __swapper_tsb_phys_patch; 1726 extern unsigned int __swapper_tsb_phys_patch;
1724 extern unsigned int __swapper_tsb_phys_patch_end; 1727 extern unsigned int __swapper_tsb_phys_patch_end;
1725 unsigned long ktsb_pa; 1728 unsigned long ktsb_pa;
1726 1729
1727 ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); 1730 ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
1728 patch_one_ktsb_phys(&__swapper_tsb_phys_patch, 1731 patch_one_ktsb_phys(&__swapper_tsb_phys_patch,
1729 &__swapper_tsb_phys_patch_end, ktsb_pa); 1732 &__swapper_tsb_phys_patch_end, ktsb_pa);
1730 #ifndef CONFIG_DEBUG_PAGEALLOC 1733 #ifndef CONFIG_DEBUG_PAGEALLOC
1731 { 1734 {
1732 extern unsigned int __swapper_4m_tsb_phys_patch; 1735 extern unsigned int __swapper_4m_tsb_phys_patch;
1733 extern unsigned int __swapper_4m_tsb_phys_patch_end; 1736 extern unsigned int __swapper_4m_tsb_phys_patch_end;
1734 ktsb_pa = (kern_base + 1737 ktsb_pa = (kern_base +
1735 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); 1738 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
1736 patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch, 1739 patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch,
1737 &__swapper_4m_tsb_phys_patch_end, ktsb_pa); 1740 &__swapper_4m_tsb_phys_patch_end, ktsb_pa);
1738 } 1741 }
1739 #endif 1742 #endif
1740 } 1743 }
1741 1744
1742 static void __init sun4v_ktsb_init(void) 1745 static void __init sun4v_ktsb_init(void)
1743 { 1746 {
1744 unsigned long ktsb_pa; 1747 unsigned long ktsb_pa;
1745 1748
1746 /* First KTSB for PAGE_SIZE mappings. */ 1749 /* First KTSB for PAGE_SIZE mappings. */
1747 ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); 1750 ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
1748 1751
1749 switch (PAGE_SIZE) { 1752 switch (PAGE_SIZE) {
1750 case 8 * 1024: 1753 case 8 * 1024:
1751 default: 1754 default:
1752 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K; 1755 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K;
1753 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K; 1756 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K;
1754 break; 1757 break;
1755 1758
1756 case 64 * 1024: 1759 case 64 * 1024:
1757 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K; 1760 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K;
1758 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K; 1761 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K;
1759 break; 1762 break;
1760 1763
1761 case 512 * 1024: 1764 case 512 * 1024:
1762 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K; 1765 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K;
1763 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K; 1766 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K;
1764 break; 1767 break;
1765 1768
1766 case 4 * 1024 * 1024: 1769 case 4 * 1024 * 1024:
1767 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB; 1770 ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB;
1768 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB; 1771 ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB;
1769 break; 1772 break;
1770 } 1773 }
1771 1774
1772 ktsb_descr[0].assoc = 1; 1775 ktsb_descr[0].assoc = 1;
1773 ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES; 1776 ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES;
1774 ktsb_descr[0].ctx_idx = 0; 1777 ktsb_descr[0].ctx_idx = 0;
1775 ktsb_descr[0].tsb_base = ktsb_pa; 1778 ktsb_descr[0].tsb_base = ktsb_pa;
1776 ktsb_descr[0].resv = 0; 1779 ktsb_descr[0].resv = 0;
1777 1780
1778 #ifndef CONFIG_DEBUG_PAGEALLOC 1781 #ifndef CONFIG_DEBUG_PAGEALLOC
1779 /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */ 1782 /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */
1780 ktsb_pa = (kern_base + 1783 ktsb_pa = (kern_base +
1781 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); 1784 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
1782 1785
1783 ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; 1786 ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
1784 ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB | 1787 ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB |
1785 HV_PGSZ_MASK_256MB | 1788 HV_PGSZ_MASK_256MB |
1786 HV_PGSZ_MASK_2GB | 1789 HV_PGSZ_MASK_2GB |
1787 HV_PGSZ_MASK_16GB) & 1790 HV_PGSZ_MASK_16GB) &
1788 cpu_pgsz_mask); 1791 cpu_pgsz_mask);
1789 ktsb_descr[1].assoc = 1; 1792 ktsb_descr[1].assoc = 1;
1790 ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; 1793 ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
1791 ktsb_descr[1].ctx_idx = 0; 1794 ktsb_descr[1].ctx_idx = 0;
1792 ktsb_descr[1].tsb_base = ktsb_pa; 1795 ktsb_descr[1].tsb_base = ktsb_pa;
1793 ktsb_descr[1].resv = 0; 1796 ktsb_descr[1].resv = 0;
1794 #endif 1797 #endif
1795 } 1798 }
1796 1799
1797 void sun4v_ktsb_register(void) 1800 void sun4v_ktsb_register(void)
1798 { 1801 {
1799 unsigned long pa, ret; 1802 unsigned long pa, ret;
1800 1803
1801 pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE); 1804 pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
1802 1805
1803 ret = sun4v_mmu_tsb_ctx0(NUM_KTSB_DESCR, pa); 1806 ret = sun4v_mmu_tsb_ctx0(NUM_KTSB_DESCR, pa);
1804 if (ret != 0) { 1807 if (ret != 0) {
1805 prom_printf("hypervisor_mmu_tsb_ctx0[%lx]: " 1808 prom_printf("hypervisor_mmu_tsb_ctx0[%lx]: "
1806 "errors with %lx\n", pa, ret); 1809 "errors with %lx\n", pa, ret);
1807 prom_halt(); 1810 prom_halt();
1808 } 1811 }
1809 } 1812 }
1810 1813
1811 static void __init sun4u_linear_pte_xor_finalize(void) 1814 static void __init sun4u_linear_pte_xor_finalize(void)
1812 { 1815 {
1813 #ifndef CONFIG_DEBUG_PAGEALLOC 1816 #ifndef CONFIG_DEBUG_PAGEALLOC
1814 /* This is where we would add Panther support for 1817 /* This is where we would add Panther support for
1815 * 32MB and 256MB pages. 1818 * 32MB and 256MB pages.
1816 */ 1819 */
1817 #endif 1820 #endif
1818 } 1821 }
1819 1822
1820 static void __init sun4v_linear_pte_xor_finalize(void) 1823 static void __init sun4v_linear_pte_xor_finalize(void)
1821 { 1824 {
1822 #ifndef CONFIG_DEBUG_PAGEALLOC 1825 #ifndef CONFIG_DEBUG_PAGEALLOC
1823 if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { 1826 if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
1824 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ 1827 kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
1825 PAGE_OFFSET; 1828 PAGE_OFFSET;
1826 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | 1829 kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1827 _PAGE_P_4V | _PAGE_W_4V); 1830 _PAGE_P_4V | _PAGE_W_4V);
1828 } else { 1831 } else {
1829 kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; 1832 kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
1830 } 1833 }
1831 1834
1832 if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { 1835 if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
1833 kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ 1836 kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
1834 PAGE_OFFSET; 1837 PAGE_OFFSET;
1835 kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | 1838 kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1836 _PAGE_P_4V | _PAGE_W_4V); 1839 _PAGE_P_4V | _PAGE_W_4V);
1837 } else { 1840 } else {
1838 kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; 1841 kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
1839 } 1842 }
1840 1843
1841 if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { 1844 if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
1842 kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ 1845 kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
1843 PAGE_OFFSET; 1846 PAGE_OFFSET;
1844 kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | 1847 kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
1845 _PAGE_P_4V | _PAGE_W_4V); 1848 _PAGE_P_4V | _PAGE_W_4V);
1846 } else { 1849 } else {
1847 kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; 1850 kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
1848 } 1851 }
1849 #endif 1852 #endif
1850 } 1853 }
1851 1854
1852 /* paging_init() sets up the page tables */ 1855 /* paging_init() sets up the page tables */
1853 1856
1854 static unsigned long last_valid_pfn; 1857 static unsigned long last_valid_pfn;
1855 pgd_t swapper_pg_dir[PTRS_PER_PGD]; 1858 pgd_t swapper_pg_dir[PTRS_PER_PGD];
1856 1859
1857 static void sun4u_pgprot_init(void); 1860 static void sun4u_pgprot_init(void);
1858 static void sun4v_pgprot_init(void); 1861 static void sun4v_pgprot_init(void);
1859 1862
1860 void __init paging_init(void) 1863 void __init paging_init(void)
1861 { 1864 {
1862 unsigned long end_pfn, shift, phys_base; 1865 unsigned long end_pfn, shift, phys_base;
1863 unsigned long real_end, i; 1866 unsigned long real_end, i;
1864 int node; 1867 int node;
1865 1868
1866 setup_page_offset(); 1869 setup_page_offset();
1867 1870
1868 /* These build time checkes make sure that the dcache_dirty_cpu() 1871 /* These build time checkes make sure that the dcache_dirty_cpu()
1869 * page->flags usage will work. 1872 * page->flags usage will work.
1870 * 1873 *
1871 * When a page gets marked as dcache-dirty, we store the 1874 * When a page gets marked as dcache-dirty, we store the
1872 * cpu number starting at bit 32 in the page->flags. Also, 1875 * cpu number starting at bit 32 in the page->flags. Also,
1873 * functions like clear_dcache_dirty_cpu use the cpu mask 1876 * functions like clear_dcache_dirty_cpu use the cpu mask
1874 * in 13-bit signed-immediate instruction fields. 1877 * in 13-bit signed-immediate instruction fields.
1875 */ 1878 */
1876 1879
1877 /* 1880 /*
1878 * Page flags must not reach into upper 32 bits that are used 1881 * Page flags must not reach into upper 32 bits that are used
1879 * for the cpu number 1882 * for the cpu number
1880 */ 1883 */
1881 BUILD_BUG_ON(NR_PAGEFLAGS > 32); 1884 BUILD_BUG_ON(NR_PAGEFLAGS > 32);
1882 1885
1883 /* 1886 /*
1884 * The bit fields placed in the high range must not reach below 1887 * The bit fields placed in the high range must not reach below
1885 * the 32 bit boundary. Otherwise we cannot place the cpu field 1888 * the 32 bit boundary. Otherwise we cannot place the cpu field
1886 * at the 32 bit boundary. 1889 * at the 32 bit boundary.
1887 */ 1890 */
1888 BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + 1891 BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
1889 ilog2(roundup_pow_of_two(NR_CPUS)) > 32); 1892 ilog2(roundup_pow_of_two(NR_CPUS)) > 32);
1890 1893
1891 BUILD_BUG_ON(NR_CPUS > 4096); 1894 BUILD_BUG_ON(NR_CPUS > 4096);
1892 1895
1893 kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; 1896 kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
1894 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; 1897 kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
1895 1898
1896 /* Invalidate both kernel TSBs. */ 1899 /* Invalidate both kernel TSBs. */
1897 memset(swapper_tsb, 0x40, sizeof(swapper_tsb)); 1900 memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
1898 #ifndef CONFIG_DEBUG_PAGEALLOC 1901 #ifndef CONFIG_DEBUG_PAGEALLOC
1899 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); 1902 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
1900 #endif 1903 #endif
1901 1904
1902 if (tlb_type == hypervisor) 1905 if (tlb_type == hypervisor)
1903 sun4v_pgprot_init(); 1906 sun4v_pgprot_init();
1904 else 1907 else
1905 sun4u_pgprot_init(); 1908 sun4u_pgprot_init();
1906 1909
1907 if (tlb_type == cheetah_plus || 1910 if (tlb_type == cheetah_plus ||
1908 tlb_type == hypervisor) { 1911 tlb_type == hypervisor) {
1909 tsb_phys_patch(); 1912 tsb_phys_patch();
1910 ktsb_phys_patch(); 1913 ktsb_phys_patch();
1911 } 1914 }
1912 1915
1913 if (tlb_type == hypervisor) 1916 if (tlb_type == hypervisor)
1914 sun4v_patch_tlb_handlers(); 1917 sun4v_patch_tlb_handlers();
1915 1918
1916 /* Find available physical memory... 1919 /* Find available physical memory...
1917 * 1920 *
1918 * Read it twice in order to work around a bug in openfirmware. 1921 * Read it twice in order to work around a bug in openfirmware.
1919 * The call to grab this table itself can cause openfirmware to 1922 * The call to grab this table itself can cause openfirmware to
1920 * allocate memory, which in turn can take away some space from 1923 * allocate memory, which in turn can take away some space from
1921 * the list of available memory. Reading it twice makes sure 1924 * the list of available memory. Reading it twice makes sure
1922 * we really do get the final value. 1925 * we really do get the final value.
1923 */ 1926 */
1924 read_obp_translations(); 1927 read_obp_translations();
1925 read_obp_memory("reg", &pall[0], &pall_ents); 1928 read_obp_memory("reg", &pall[0], &pall_ents);
1926 read_obp_memory("available", &pavail[0], &pavail_ents); 1929 read_obp_memory("available", &pavail[0], &pavail_ents);
1927 read_obp_memory("available", &pavail[0], &pavail_ents); 1930 read_obp_memory("available", &pavail[0], &pavail_ents);
1928 1931
1929 phys_base = 0xffffffffffffffffUL; 1932 phys_base = 0xffffffffffffffffUL;
1930 for (i = 0; i < pavail_ents; i++) { 1933 for (i = 0; i < pavail_ents; i++) {
1931 phys_base = min(phys_base, pavail[i].phys_addr); 1934 phys_base = min(phys_base, pavail[i].phys_addr);
1932 memblock_add(pavail[i].phys_addr, pavail[i].reg_size); 1935 memblock_add(pavail[i].phys_addr, pavail[i].reg_size);
1933 } 1936 }
1934 1937
1935 memblock_reserve(kern_base, kern_size); 1938 memblock_reserve(kern_base, kern_size);
1936 1939
1937 find_ramdisk(phys_base); 1940 find_ramdisk(phys_base);
1938 1941
1939 memblock_enforce_memory_limit(cmdline_memory_size); 1942 memblock_enforce_memory_limit(cmdline_memory_size);
1940 1943
1941 memblock_allow_resize(); 1944 memblock_allow_resize();
1942 memblock_dump_all(); 1945 memblock_dump_all();
1943 1946
1944 set_bit(0, mmu_context_bmap); 1947 set_bit(0, mmu_context_bmap);
1945 1948
1946 shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); 1949 shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
1947 1950
1948 real_end = (unsigned long)_end; 1951 real_end = (unsigned long)_end;
1949 num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB); 1952 num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB);
1950 printk("Kernel: Using %d locked TLB entries for main kernel image.\n", 1953 printk("Kernel: Using %d locked TLB entries for main kernel image.\n",
1951 num_kernel_image_mappings); 1954 num_kernel_image_mappings);
1952 1955
1953 /* Set kernel pgd to upper alias so physical page computations 1956 /* Set kernel pgd to upper alias so physical page computations
1954 * work. 1957 * work.
1955 */ 1958 */
1956 init_mm.pgd += ((shift) / (sizeof(pgd_t))); 1959 init_mm.pgd += ((shift) / (sizeof(pgd_t)));
1957 1960
1958 memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); 1961 memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
1959 1962
1960 /* Now can init the kernel/bad page tables. */ 1963 /* Now can init the kernel/bad page tables. */
1961 pud_set(pud_offset(&swapper_pg_dir[0], 0), 1964 pud_set(pud_offset(&swapper_pg_dir[0], 0),
1962 swapper_low_pmd_dir + (shift / sizeof(pgd_t))); 1965 swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
1963 1966
1964 inherit_prom_mappings(); 1967 inherit_prom_mappings();
1965 1968
1966 init_kpte_bitmap(); 1969 init_kpte_bitmap();
1967 1970
1968 /* Ok, we can use our TLB miss and window trap handlers safely. */ 1971 /* Ok, we can use our TLB miss and window trap handlers safely. */
1969 setup_tba(); 1972 setup_tba();
1970 1973
1971 __flush_tlb_all(); 1974 __flush_tlb_all();
1972 1975
1973 prom_build_devicetree(); 1976 prom_build_devicetree();
1974 of_populate_present_mask(); 1977 of_populate_present_mask();
1975 #ifndef CONFIG_SMP 1978 #ifndef CONFIG_SMP
1976 of_fill_in_cpu_data(); 1979 of_fill_in_cpu_data();
1977 #endif 1980 #endif
1978 1981
1979 if (tlb_type == hypervisor) { 1982 if (tlb_type == hypervisor) {
1980 sun4v_mdesc_init(); 1983 sun4v_mdesc_init();
1981 mdesc_populate_present_mask(cpu_all_mask); 1984 mdesc_populate_present_mask(cpu_all_mask);
1982 #ifndef CONFIG_SMP 1985 #ifndef CONFIG_SMP
1983 mdesc_fill_in_cpu_data(cpu_all_mask); 1986 mdesc_fill_in_cpu_data(cpu_all_mask);
1984 #endif 1987 #endif
1985 mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask); 1988 mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask);
1986 1989
1987 sun4v_linear_pte_xor_finalize(); 1990 sun4v_linear_pte_xor_finalize();
1988 1991
1989 sun4v_ktsb_init(); 1992 sun4v_ktsb_init();
1990 sun4v_ktsb_register(); 1993 sun4v_ktsb_register();
1991 } else { 1994 } else {
1992 unsigned long impl, ver; 1995 unsigned long impl, ver;
1993 1996
1994 cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | 1997 cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
1995 HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); 1998 HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
1996 1999
1997 __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); 2000 __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
1998 impl = ((ver >> 32) & 0xffff); 2001 impl = ((ver >> 32) & 0xffff);
1999 if (impl == PANTHER_IMPL) 2002 if (impl == PANTHER_IMPL)
2000 cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB | 2003 cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB |
2001 HV_PGSZ_MASK_256MB); 2004 HV_PGSZ_MASK_256MB);
2002 2005
2003 sun4u_linear_pte_xor_finalize(); 2006 sun4u_linear_pte_xor_finalize();
2004 } 2007 }
2005 2008
2006 /* Flush the TLBs and the 4M TSB so that the updated linear 2009 /* Flush the TLBs and the 4M TSB so that the updated linear
2007 * pte XOR settings are realized for all mappings. 2010 * pte XOR settings are realized for all mappings.
2008 */ 2011 */
2009 __flush_tlb_all(); 2012 __flush_tlb_all();
2010 #ifndef CONFIG_DEBUG_PAGEALLOC 2013 #ifndef CONFIG_DEBUG_PAGEALLOC
2011 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); 2014 memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
2012 #endif 2015 #endif
2013 __flush_tlb_all(); 2016 __flush_tlb_all();
2014 2017
2015 /* Setup bootmem... */ 2018 /* Setup bootmem... */
2016 last_valid_pfn = end_pfn = bootmem_init(phys_base); 2019 last_valid_pfn = end_pfn = bootmem_init(phys_base);
2017 2020
2018 /* Once the OF device tree and MDESC have been setup, we know 2021 /* Once the OF device tree and MDESC have been setup, we know
2019 * the list of possible cpus. Therefore we can allocate the 2022 * the list of possible cpus. Therefore we can allocate the
2020 * IRQ stacks. 2023 * IRQ stacks.
2021 */ 2024 */
2022 for_each_possible_cpu(i) { 2025 for_each_possible_cpu(i) {
2023 node = cpu_to_node(i); 2026 node = cpu_to_node(i);
2024 2027
2025 softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), 2028 softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
2026 THREAD_SIZE, 2029 THREAD_SIZE,
2027 THREAD_SIZE, 0); 2030 THREAD_SIZE, 0);
2028 hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), 2031 hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
2029 THREAD_SIZE, 2032 THREAD_SIZE,
2030 THREAD_SIZE, 0); 2033 THREAD_SIZE, 0);
2031 } 2034 }
2032 2035
2033 kernel_physical_mapping_init(); 2036 kernel_physical_mapping_init();
2034 2037
2035 { 2038 {
2036 unsigned long max_zone_pfns[MAX_NR_ZONES]; 2039 unsigned long max_zone_pfns[MAX_NR_ZONES];
2037 2040
2038 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 2041 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
2039 2042
2040 max_zone_pfns[ZONE_NORMAL] = end_pfn; 2043 max_zone_pfns[ZONE_NORMAL] = end_pfn;
2041 2044
2042 free_area_init_nodes(max_zone_pfns); 2045 free_area_init_nodes(max_zone_pfns);
2043 } 2046 }
2044 2047
2045 printk("Booting Linux...\n"); 2048 printk("Booting Linux...\n");
2046 } 2049 }
2047 2050
2048 int page_in_phys_avail(unsigned long paddr) 2051 int page_in_phys_avail(unsigned long paddr)
2049 { 2052 {
2050 int i; 2053 int i;
2051 2054
2052 paddr &= PAGE_MASK; 2055 paddr &= PAGE_MASK;
2053 2056
2054 for (i = 0; i < pavail_ents; i++) { 2057 for (i = 0; i < pavail_ents; i++) {
2055 unsigned long start, end; 2058 unsigned long start, end;
2056 2059
2057 start = pavail[i].phys_addr; 2060 start = pavail[i].phys_addr;
2058 end = start + pavail[i].reg_size; 2061 end = start + pavail[i].reg_size;
2059 2062
2060 if (paddr >= start && paddr < end) 2063 if (paddr >= start && paddr < end)
2061 return 1; 2064 return 1;
2062 } 2065 }
2063 if (paddr >= kern_base && paddr < (kern_base + kern_size)) 2066 if (paddr >= kern_base && paddr < (kern_base + kern_size))
2064 return 1; 2067 return 1;
2065 #ifdef CONFIG_BLK_DEV_INITRD 2068 #ifdef CONFIG_BLK_DEV_INITRD
2066 if (paddr >= __pa(initrd_start) && 2069 if (paddr >= __pa(initrd_start) &&
2067 paddr < __pa(PAGE_ALIGN(initrd_end))) 2070 paddr < __pa(PAGE_ALIGN(initrd_end)))
2068 return 1; 2071 return 1;
2069 #endif 2072 #endif
2070 2073
2071 return 0; 2074 return 0;
2072 } 2075 }
2073 2076
2074 static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; 2077 static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
2075 static int pavail_rescan_ents __initdata; 2078 static int pavail_rescan_ents __initdata;
2076 2079
2077 /* Certain OBP calls, such as fetching "available" properties, can 2080 /* Certain OBP calls, such as fetching "available" properties, can
2078 * claim physical memory. So, along with initializing the valid 2081 * claim physical memory. So, along with initializing the valid
2079 * address bitmap, what we do here is refetch the physical available 2082 * address bitmap, what we do here is refetch the physical available
2080 * memory list again, and make sure it provides at least as much 2083 * memory list again, and make sure it provides at least as much
2081 * memory as 'pavail' does. 2084 * memory as 'pavail' does.
2082 */ 2085 */
2083 static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) 2086 static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
2084 { 2087 {
2085 int i; 2088 int i;
2086 2089
2087 read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); 2090 read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
2088 2091
2089 for (i = 0; i < pavail_ents; i++) { 2092 for (i = 0; i < pavail_ents; i++) {
2090 unsigned long old_start, old_end; 2093 unsigned long old_start, old_end;
2091 2094
2092 old_start = pavail[i].phys_addr; 2095 old_start = pavail[i].phys_addr;
2093 old_end = old_start + pavail[i].reg_size; 2096 old_end = old_start + pavail[i].reg_size;
2094 while (old_start < old_end) { 2097 while (old_start < old_end) {
2095 int n; 2098 int n;
2096 2099
2097 for (n = 0; n < pavail_rescan_ents; n++) { 2100 for (n = 0; n < pavail_rescan_ents; n++) {
2098 unsigned long new_start, new_end; 2101 unsigned long new_start, new_end;
2099 2102
2100 new_start = pavail_rescan[n].phys_addr; 2103 new_start = pavail_rescan[n].phys_addr;
2101 new_end = new_start + 2104 new_end = new_start +
2102 pavail_rescan[n].reg_size; 2105 pavail_rescan[n].reg_size;
2103 2106
2104 if (new_start <= old_start && 2107 if (new_start <= old_start &&
2105 new_end >= (old_start + PAGE_SIZE)) { 2108 new_end >= (old_start + PAGE_SIZE)) {
2106 set_bit(old_start >> ILOG2_4MB, bitmap); 2109 set_bit(old_start >> ILOG2_4MB, bitmap);
2107 goto do_next_page; 2110 goto do_next_page;
2108 } 2111 }
2109 } 2112 }
2110 2113
2111 prom_printf("mem_init: Lost memory in pavail\n"); 2114 prom_printf("mem_init: Lost memory in pavail\n");
2112 prom_printf("mem_init: OLD start[%lx] size[%lx]\n", 2115 prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
2113 pavail[i].phys_addr, 2116 pavail[i].phys_addr,
2114 pavail[i].reg_size); 2117 pavail[i].reg_size);
2115 prom_printf("mem_init: NEW start[%lx] size[%lx]\n", 2118 prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
2116 pavail_rescan[i].phys_addr, 2119 pavail_rescan[i].phys_addr,
2117 pavail_rescan[i].reg_size); 2120 pavail_rescan[i].reg_size);
2118 prom_printf("mem_init: Cannot continue, aborting.\n"); 2121 prom_printf("mem_init: Cannot continue, aborting.\n");
2119 prom_halt(); 2122 prom_halt();
2120 2123
2121 do_next_page: 2124 do_next_page:
2122 old_start += PAGE_SIZE; 2125 old_start += PAGE_SIZE;
2123 } 2126 }
2124 } 2127 }
2125 } 2128 }
2126 2129
2127 static void __init patch_tlb_miss_handler_bitmap(void) 2130 static void __init patch_tlb_miss_handler_bitmap(void)
2128 { 2131 {
2129 extern unsigned int valid_addr_bitmap_insn[]; 2132 extern unsigned int valid_addr_bitmap_insn[];
2130 extern unsigned int valid_addr_bitmap_patch[]; 2133 extern unsigned int valid_addr_bitmap_patch[];
2131 2134
2132 valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1]; 2135 valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
2133 mb(); 2136 mb();
2134 valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0]; 2137 valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
2135 flushi(&valid_addr_bitmap_insn[0]); 2138 flushi(&valid_addr_bitmap_insn[0]);
2136 } 2139 }
2137 2140
2138 static void __init register_page_bootmem_info(void) 2141 static void __init register_page_bootmem_info(void)
2139 { 2142 {
2140 #ifdef CONFIG_NEED_MULTIPLE_NODES 2143 #ifdef CONFIG_NEED_MULTIPLE_NODES
2141 int i; 2144 int i;
2142 2145
2143 for_each_online_node(i) 2146 for_each_online_node(i)
2144 if (NODE_DATA(i)->node_spanned_pages) 2147 if (NODE_DATA(i)->node_spanned_pages)
2145 register_page_bootmem_info_node(NODE_DATA(i)); 2148 register_page_bootmem_info_node(NODE_DATA(i));
2146 #endif 2149 #endif
2147 } 2150 }
2148 void __init mem_init(void) 2151 void __init mem_init(void)
2149 { 2152 {
2150 unsigned long addr, last; 2153 unsigned long addr, last;
2151 2154
2152 addr = PAGE_OFFSET + kern_base; 2155 addr = PAGE_OFFSET + kern_base;
2153 last = PAGE_ALIGN(kern_size) + addr; 2156 last = PAGE_ALIGN(kern_size) + addr;
2154 while (addr < last) { 2157 while (addr < last) {
2155 set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap); 2158 set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
2156 addr += PAGE_SIZE; 2159 addr += PAGE_SIZE;
2157 } 2160 }
2158 2161
2159 setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap); 2162 setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
2160 patch_tlb_miss_handler_bitmap(); 2163 patch_tlb_miss_handler_bitmap();
2161 2164
2162 high_memory = __va(last_valid_pfn << PAGE_SHIFT); 2165 high_memory = __va(last_valid_pfn << PAGE_SHIFT);
2163 2166
2164 register_page_bootmem_info(); 2167 register_page_bootmem_info();
2165 free_all_bootmem(); 2168 free_all_bootmem();
2166 2169
2167 /* 2170 /*
2168 * Set up the zero page, mark it reserved, so that page count 2171 * Set up the zero page, mark it reserved, so that page count
2169 * is not manipulated when freeing the page from user ptes. 2172 * is not manipulated when freeing the page from user ptes.
2170 */ 2173 */
2171 mem_map_zero = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); 2174 mem_map_zero = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
2172 if (mem_map_zero == NULL) { 2175 if (mem_map_zero == NULL) {
2173 prom_printf("paging_init: Cannot alloc zero page.\n"); 2176 prom_printf("paging_init: Cannot alloc zero page.\n");
2174 prom_halt(); 2177 prom_halt();
2175 } 2178 }
2176 mark_page_reserved(mem_map_zero); 2179 mark_page_reserved(mem_map_zero);
2177 2180
2178 mem_init_print_info(NULL); 2181 mem_init_print_info(NULL);
2179 2182
2180 if (tlb_type == cheetah || tlb_type == cheetah_plus) 2183 if (tlb_type == cheetah || tlb_type == cheetah_plus)
2181 cheetah_ecache_flush_init(); 2184 cheetah_ecache_flush_init();
2182 } 2185 }
2183 2186
2184 void free_initmem(void) 2187 void free_initmem(void)
2185 { 2188 {
2186 unsigned long addr, initend; 2189 unsigned long addr, initend;
2187 int do_free = 1; 2190 int do_free = 1;
2188 2191
2189 /* If the physical memory maps were trimmed by kernel command 2192 /* If the physical memory maps were trimmed by kernel command
2190 * line options, don't even try freeing this initmem stuff up. 2193 * line options, don't even try freeing this initmem stuff up.
2191 * The kernel image could have been in the trimmed out region 2194 * The kernel image could have been in the trimmed out region
2192 * and if so the freeing below will free invalid page structs. 2195 * and if so the freeing below will free invalid page structs.
2193 */ 2196 */
2194 if (cmdline_memory_size) 2197 if (cmdline_memory_size)
2195 do_free = 0; 2198 do_free = 0;
2196 2199
2197 /* 2200 /*
2198 * The init section is aligned to 8k in vmlinux.lds. Page align for >8k pagesizes. 2201 * The init section is aligned to 8k in vmlinux.lds. Page align for >8k pagesizes.
2199 */ 2202 */
2200 addr = PAGE_ALIGN((unsigned long)(__init_begin)); 2203 addr = PAGE_ALIGN((unsigned long)(__init_begin));
2201 initend = (unsigned long)(__init_end) & PAGE_MASK; 2204 initend = (unsigned long)(__init_end) & PAGE_MASK;
2202 for (; addr < initend; addr += PAGE_SIZE) { 2205 for (; addr < initend; addr += PAGE_SIZE) {
2203 unsigned long page; 2206 unsigned long page;
2204 2207
2205 page = (addr + 2208 page = (addr +
2206 ((unsigned long) __va(kern_base)) - 2209 ((unsigned long) __va(kern_base)) -
2207 ((unsigned long) KERNBASE)); 2210 ((unsigned long) KERNBASE));
2208 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); 2211 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
2209 2212
2210 if (do_free) 2213 if (do_free)
2211 free_reserved_page(virt_to_page(page)); 2214 free_reserved_page(virt_to_page(page));
2212 } 2215 }
2213 } 2216 }
2214 2217
2215 #ifdef CONFIG_BLK_DEV_INITRD 2218 #ifdef CONFIG_BLK_DEV_INITRD
2216 void free_initrd_mem(unsigned long start, unsigned long end) 2219 void free_initrd_mem(unsigned long start, unsigned long end)
2217 { 2220 {
2218 free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, 2221 free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
2219 "initrd"); 2222 "initrd");
2220 } 2223 }
2221 #endif 2224 #endif
2222 2225
2223 #define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) 2226 #define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
2224 #define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) 2227 #define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
2225 #define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) 2228 #define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
2226 #define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) 2229 #define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
2227 #define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) 2230 #define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
2228 #define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) 2231 #define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
2229 2232
2230 pgprot_t PAGE_KERNEL __read_mostly; 2233 pgprot_t PAGE_KERNEL __read_mostly;
2231 EXPORT_SYMBOL(PAGE_KERNEL); 2234 EXPORT_SYMBOL(PAGE_KERNEL);
2232 2235
2233 pgprot_t PAGE_KERNEL_LOCKED __read_mostly; 2236 pgprot_t PAGE_KERNEL_LOCKED __read_mostly;
2234 pgprot_t PAGE_COPY __read_mostly; 2237 pgprot_t PAGE_COPY __read_mostly;
2235 2238
2236 pgprot_t PAGE_SHARED __read_mostly; 2239 pgprot_t PAGE_SHARED __read_mostly;
2237 EXPORT_SYMBOL(PAGE_SHARED); 2240 EXPORT_SYMBOL(PAGE_SHARED);
2238 2241
2239 unsigned long pg_iobits __read_mostly; 2242 unsigned long pg_iobits __read_mostly;
2240 2243
2241 unsigned long _PAGE_IE __read_mostly; 2244 unsigned long _PAGE_IE __read_mostly;
2242 EXPORT_SYMBOL(_PAGE_IE); 2245 EXPORT_SYMBOL(_PAGE_IE);
2243 2246
2244 unsigned long _PAGE_E __read_mostly; 2247 unsigned long _PAGE_E __read_mostly;
2245 EXPORT_SYMBOL(_PAGE_E); 2248 EXPORT_SYMBOL(_PAGE_E);
2246 2249
2247 unsigned long _PAGE_CACHE __read_mostly; 2250 unsigned long _PAGE_CACHE __read_mostly;
2248 EXPORT_SYMBOL(_PAGE_CACHE); 2251 EXPORT_SYMBOL(_PAGE_CACHE);
2249 2252
2250 #ifdef CONFIG_SPARSEMEM_VMEMMAP 2253 #ifdef CONFIG_SPARSEMEM_VMEMMAP
2251 unsigned long vmemmap_table[VMEMMAP_SIZE]; 2254 unsigned long vmemmap_table[VMEMMAP_SIZE];
2252 2255
2253 static long __meminitdata addr_start, addr_end; 2256 static long __meminitdata addr_start, addr_end;
2254 static int __meminitdata node_start; 2257 static int __meminitdata node_start;
2255 2258
2256 int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, 2259 int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
2257 int node) 2260 int node)
2258 { 2261 {
2259 unsigned long phys_start = (vstart - VMEMMAP_BASE); 2262 unsigned long phys_start = (vstart - VMEMMAP_BASE);
2260 unsigned long phys_end = (vend - VMEMMAP_BASE); 2263 unsigned long phys_end = (vend - VMEMMAP_BASE);
2261 unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; 2264 unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK;
2262 unsigned long end = VMEMMAP_ALIGN(phys_end); 2265 unsigned long end = VMEMMAP_ALIGN(phys_end);
2263 unsigned long pte_base; 2266 unsigned long pte_base;
2264 2267
2265 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | 2268 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
2266 _PAGE_CP_4U | _PAGE_CV_4U | 2269 _PAGE_CP_4U | _PAGE_CV_4U |
2267 _PAGE_P_4U | _PAGE_W_4U); 2270 _PAGE_P_4U | _PAGE_W_4U);
2268 if (tlb_type == hypervisor) 2271 if (tlb_type == hypervisor)
2269 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | 2272 pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
2270 _PAGE_CP_4V | _PAGE_CV_4V | 2273 _PAGE_CP_4V | _PAGE_CV_4V |
2271 _PAGE_P_4V | _PAGE_W_4V); 2274 _PAGE_P_4V | _PAGE_W_4V);
2272 2275
2273 for (; addr < end; addr += VMEMMAP_CHUNK) { 2276 for (; addr < end; addr += VMEMMAP_CHUNK) {
2274 unsigned long *vmem_pp = 2277 unsigned long *vmem_pp =
2275 vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT); 2278 vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT);
2276 void *block; 2279 void *block;
2277 2280
2278 if (!(*vmem_pp & _PAGE_VALID)) { 2281 if (!(*vmem_pp & _PAGE_VALID)) {
2279 block = vmemmap_alloc_block(1UL << ILOG2_4MB, node); 2282 block = vmemmap_alloc_block(1UL << ILOG2_4MB, node);
2280 if (!block) 2283 if (!block)
2281 return -ENOMEM; 2284 return -ENOMEM;
2282 2285
2283 *vmem_pp = pte_base | __pa(block); 2286 *vmem_pp = pte_base | __pa(block);
2284 2287
2285 /* check to see if we have contiguous blocks */ 2288 /* check to see if we have contiguous blocks */
2286 if (addr_end != addr || node_start != node) { 2289 if (addr_end != addr || node_start != node) {
2287 if (addr_start) 2290 if (addr_start)
2288 printk(KERN_DEBUG " [%lx-%lx] on node %d\n", 2291 printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
2289 addr_start, addr_end-1, node_start); 2292 addr_start, addr_end-1, node_start);
2290 addr_start = addr; 2293 addr_start = addr;
2291 node_start = node; 2294 node_start = node;
2292 } 2295 }
2293 addr_end = addr + VMEMMAP_CHUNK; 2296 addr_end = addr + VMEMMAP_CHUNK;
2294 } 2297 }
2295 } 2298 }
2296 return 0; 2299 return 0;
2297 } 2300 }
2298 2301
2299 void __meminit vmemmap_populate_print_last(void) 2302 void __meminit vmemmap_populate_print_last(void)
2300 { 2303 {
2301 if (addr_start) { 2304 if (addr_start) {
2302 printk(KERN_DEBUG " [%lx-%lx] on node %d\n", 2305 printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
2303 addr_start, addr_end-1, node_start); 2306 addr_start, addr_end-1, node_start);
2304 addr_start = 0; 2307 addr_start = 0;
2305 addr_end = 0; 2308 addr_end = 0;
2306 node_start = 0; 2309 node_start = 0;
2307 } 2310 }
2308 } 2311 }
2309 2312
2310 void vmemmap_free(unsigned long start, unsigned long end) 2313 void vmemmap_free(unsigned long start, unsigned long end)
2311 { 2314 {
2312 } 2315 }
2313 2316
2314 #endif /* CONFIG_SPARSEMEM_VMEMMAP */ 2317 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
2315 2318
2316 static void prot_init_common(unsigned long page_none, 2319 static void prot_init_common(unsigned long page_none,
2317 unsigned long page_shared, 2320 unsigned long page_shared,
2318 unsigned long page_copy, 2321 unsigned long page_copy,
2319 unsigned long page_readonly, 2322 unsigned long page_readonly,
2320 unsigned long page_exec_bit) 2323 unsigned long page_exec_bit)
2321 { 2324 {
2322 PAGE_COPY = __pgprot(page_copy); 2325 PAGE_COPY = __pgprot(page_copy);
2323 PAGE_SHARED = __pgprot(page_shared); 2326 PAGE_SHARED = __pgprot(page_shared);
2324 2327
2325 protection_map[0x0] = __pgprot(page_none); 2328 protection_map[0x0] = __pgprot(page_none);
2326 protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit); 2329 protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
2327 protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit); 2330 protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit);
2328 protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit); 2331 protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit);
2329 protection_map[0x4] = __pgprot(page_readonly); 2332 protection_map[0x4] = __pgprot(page_readonly);
2330 protection_map[0x5] = __pgprot(page_readonly); 2333 protection_map[0x5] = __pgprot(page_readonly);
2331 protection_map[0x6] = __pgprot(page_copy); 2334 protection_map[0x6] = __pgprot(page_copy);
2332 protection_map[0x7] = __pgprot(page_copy); 2335 protection_map[0x7] = __pgprot(page_copy);
2333 protection_map[0x8] = __pgprot(page_none); 2336 protection_map[0x8] = __pgprot(page_none);
2334 protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit); 2337 protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit);
2335 protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit); 2338 protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit);
2336 protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit); 2339 protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit);
2337 protection_map[0xc] = __pgprot(page_readonly); 2340 protection_map[0xc] = __pgprot(page_readonly);
2338 protection_map[0xd] = __pgprot(page_readonly); 2341 protection_map[0xd] = __pgprot(page_readonly);
2339 protection_map[0xe] = __pgprot(page_shared); 2342 protection_map[0xe] = __pgprot(page_shared);
2340 protection_map[0xf] = __pgprot(page_shared); 2343 protection_map[0xf] = __pgprot(page_shared);
2341 } 2344 }
2342 2345
2343 static void __init sun4u_pgprot_init(void) 2346 static void __init sun4u_pgprot_init(void)
2344 { 2347 {
2345 unsigned long page_none, page_shared, page_copy, page_readonly; 2348 unsigned long page_none, page_shared, page_copy, page_readonly;
2346 unsigned long page_exec_bit; 2349 unsigned long page_exec_bit;
2347 int i; 2350 int i;
2348 2351
2349 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | 2352 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
2350 _PAGE_CACHE_4U | _PAGE_P_4U | 2353 _PAGE_CACHE_4U | _PAGE_P_4U |
2351 __ACCESS_BITS_4U | __DIRTY_BITS_4U | 2354 __ACCESS_BITS_4U | __DIRTY_BITS_4U |
2352 _PAGE_EXEC_4U); 2355 _PAGE_EXEC_4U);
2353 PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | 2356 PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
2354 _PAGE_CACHE_4U | _PAGE_P_4U | 2357 _PAGE_CACHE_4U | _PAGE_P_4U |
2355 __ACCESS_BITS_4U | __DIRTY_BITS_4U | 2358 __ACCESS_BITS_4U | __DIRTY_BITS_4U |
2356 _PAGE_EXEC_4U | _PAGE_L_4U); 2359 _PAGE_EXEC_4U | _PAGE_L_4U);
2357 2360
2358 _PAGE_IE = _PAGE_IE_4U; 2361 _PAGE_IE = _PAGE_IE_4U;
2359 _PAGE_E = _PAGE_E_4U; 2362 _PAGE_E = _PAGE_E_4U;
2360 _PAGE_CACHE = _PAGE_CACHE_4U; 2363 _PAGE_CACHE = _PAGE_CACHE_4U;
2361 2364
2362 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U | 2365 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U |
2363 __ACCESS_BITS_4U | _PAGE_E_4U); 2366 __ACCESS_BITS_4U | _PAGE_E_4U);
2364 2367
2365 #ifdef CONFIG_DEBUG_PAGEALLOC 2368 #ifdef CONFIG_DEBUG_PAGEALLOC
2366 kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; 2369 kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
2367 #else 2370 #else
2368 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ 2371 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
2369 PAGE_OFFSET; 2372 PAGE_OFFSET;
2370 #endif 2373 #endif
2371 kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | 2374 kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
2372 _PAGE_P_4U | _PAGE_W_4U); 2375 _PAGE_P_4U | _PAGE_W_4U);
2373 2376
2374 for (i = 1; i < 4; i++) 2377 for (i = 1; i < 4; i++)
2375 kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; 2378 kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
2376 2379
2377 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | 2380 _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
2378 _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | 2381 _PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
2379 _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); 2382 _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
2380 2383
2381 2384
2382 page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U; 2385 page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U;
2383 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | 2386 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
2384 __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U); 2387 __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U);
2385 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | 2388 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
2386 __ACCESS_BITS_4U | _PAGE_EXEC_4U); 2389 __ACCESS_BITS_4U | _PAGE_EXEC_4U);
2387 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | 2390 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
2388 __ACCESS_BITS_4U | _PAGE_EXEC_4U); 2391 __ACCESS_BITS_4U | _PAGE_EXEC_4U);
2389 2392
2390 page_exec_bit = _PAGE_EXEC_4U; 2393 page_exec_bit = _PAGE_EXEC_4U;
2391 2394
2392 prot_init_common(page_none, page_shared, page_copy, page_readonly, 2395 prot_init_common(page_none, page_shared, page_copy, page_readonly,
2393 page_exec_bit); 2396 page_exec_bit);
2394 } 2397 }
2395 2398
2396 static void __init sun4v_pgprot_init(void) 2399 static void __init sun4v_pgprot_init(void)
2397 { 2400 {
2398 unsigned long page_none, page_shared, page_copy, page_readonly; 2401 unsigned long page_none, page_shared, page_copy, page_readonly;
2399 unsigned long page_exec_bit; 2402 unsigned long page_exec_bit;
2400 int i; 2403 int i;
2401 2404
2402 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | 2405 PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
2403 _PAGE_CACHE_4V | _PAGE_P_4V | 2406 _PAGE_CACHE_4V | _PAGE_P_4V |
2404 __ACCESS_BITS_4V | __DIRTY_BITS_4V | 2407 __ACCESS_BITS_4V | __DIRTY_BITS_4V |
2405 _PAGE_EXEC_4V); 2408 _PAGE_EXEC_4V);
2406 PAGE_KERNEL_LOCKED = PAGE_KERNEL; 2409 PAGE_KERNEL_LOCKED = PAGE_KERNEL;
2407 2410
2408 _PAGE_IE = _PAGE_IE_4V; 2411 _PAGE_IE = _PAGE_IE_4V;
2409 _PAGE_E = _PAGE_E_4V; 2412 _PAGE_E = _PAGE_E_4V;
2410 _PAGE_CACHE = _PAGE_CACHE_4V; 2413 _PAGE_CACHE = _PAGE_CACHE_4V;
2411 2414
2412 #ifdef CONFIG_DEBUG_PAGEALLOC 2415 #ifdef CONFIG_DEBUG_PAGEALLOC
2413 kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; 2416 kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
2414 #else 2417 #else
2415 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ 2418 kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
2416 PAGE_OFFSET; 2419 PAGE_OFFSET;
2417 #endif 2420 #endif
2418 kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | 2421 kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
2419 _PAGE_P_4V | _PAGE_W_4V); 2422 _PAGE_P_4V | _PAGE_W_4V);
2420 2423
2421 for (i = 1; i < 4; i++) 2424 for (i = 1; i < 4; i++)
2422 kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; 2425 kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
2423 2426
2424 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | 2427 pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
2425 __ACCESS_BITS_4V | _PAGE_E_4V); 2428 __ACCESS_BITS_4V | _PAGE_E_4V);
2426 2429
2427 _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | 2430 _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
2428 _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | 2431 _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
2429 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | 2432 _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
2430 _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); 2433 _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
2431 2434
2432 page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; 2435 page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V;
2433 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | 2436 page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
2434 __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); 2437 __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
2435 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | 2438 page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
2436 __ACCESS_BITS_4V | _PAGE_EXEC_4V); 2439 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
2437 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | 2440 page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V |
2438 __ACCESS_BITS_4V | _PAGE_EXEC_4V); 2441 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
2439 2442
2440 page_exec_bit = _PAGE_EXEC_4V; 2443 page_exec_bit = _PAGE_EXEC_4V;
2441 2444
2442 prot_init_common(page_none, page_shared, page_copy, page_readonly, 2445 prot_init_common(page_none, page_shared, page_copy, page_readonly,
2443 page_exec_bit); 2446 page_exec_bit);
2444 } 2447 }
2445 2448
2446 unsigned long pte_sz_bits(unsigned long sz) 2449 unsigned long pte_sz_bits(unsigned long sz)
2447 { 2450 {
2448 if (tlb_type == hypervisor) { 2451 if (tlb_type == hypervisor) {
2449 switch (sz) { 2452 switch (sz) {
2450 case 8 * 1024: 2453 case 8 * 1024:
2451 default: 2454 default:
2452 return _PAGE_SZ8K_4V; 2455 return _PAGE_SZ8K_4V;
2453 case 64 * 1024: 2456 case 64 * 1024:
2454 return _PAGE_SZ64K_4V; 2457 return _PAGE_SZ64K_4V;
2455 case 512 * 1024: 2458 case 512 * 1024:
2456 return _PAGE_SZ512K_4V; 2459 return _PAGE_SZ512K_4V;
2457 case 4 * 1024 * 1024: 2460 case 4 * 1024 * 1024:
2458 return _PAGE_SZ4MB_4V; 2461 return _PAGE_SZ4MB_4V;
2459 } 2462 }
2460 } else { 2463 } else {
2461 switch (sz) { 2464 switch (sz) {
2462 case 8 * 1024: 2465 case 8 * 1024:
2463 default: 2466 default:
2464 return _PAGE_SZ8K_4U; 2467 return _PAGE_SZ8K_4U;
2465 case 64 * 1024: 2468 case 64 * 1024:
2466 return _PAGE_SZ64K_4U; 2469 return _PAGE_SZ64K_4U;
2467 case 512 * 1024: 2470 case 512 * 1024:
2468 return _PAGE_SZ512K_4U; 2471 return _PAGE_SZ512K_4U;
2469 case 4 * 1024 * 1024: 2472 case 4 * 1024 * 1024:
2470 return _PAGE_SZ4MB_4U; 2473 return _PAGE_SZ4MB_4U;
2471 } 2474 }
2472 } 2475 }
2473 } 2476 }
2474 2477
2475 pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size) 2478 pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size)
2476 { 2479 {
2477 pte_t pte; 2480 pte_t pte;
2478 2481
2479 pte_val(pte) = page | pgprot_val(pgprot_noncached(prot)); 2482 pte_val(pte) = page | pgprot_val(pgprot_noncached(prot));
2480 pte_val(pte) |= (((unsigned long)space) << 32); 2483 pte_val(pte) |= (((unsigned long)space) << 32);
2481 pte_val(pte) |= pte_sz_bits(page_size); 2484 pte_val(pte) |= pte_sz_bits(page_size);
2482 2485
2483 return pte; 2486 return pte;
2484 } 2487 }
2485 2488
2486 static unsigned long kern_large_tte(unsigned long paddr) 2489 static unsigned long kern_large_tte(unsigned long paddr)
2487 { 2490 {
2488 unsigned long val; 2491 unsigned long val;
2489 2492
2490 val = (_PAGE_VALID | _PAGE_SZ4MB_4U | 2493 val = (_PAGE_VALID | _PAGE_SZ4MB_4U |
2491 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | 2494 _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U |
2492 _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); 2495 _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
2493 if (tlb_type == hypervisor) 2496 if (tlb_type == hypervisor)
2494 val = (_PAGE_VALID | _PAGE_SZ4MB_4V | 2497 val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
2495 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | 2498 _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V |
2496 _PAGE_EXEC_4V | _PAGE_W_4V); 2499 _PAGE_EXEC_4V | _PAGE_W_4V);
2497 2500
2498 return val | paddr; 2501 return val | paddr;
2499 } 2502 }
2500 2503
2501 /* If not locked, zap it. */ 2504 /* If not locked, zap it. */
2502 void __flush_tlb_all(void) 2505 void __flush_tlb_all(void)
2503 { 2506 {
2504 unsigned long pstate; 2507 unsigned long pstate;
2505 int i; 2508 int i;
2506 2509
2507 __asm__ __volatile__("flushw\n\t" 2510 __asm__ __volatile__("flushw\n\t"
2508 "rdpr %%pstate, %0\n\t" 2511 "rdpr %%pstate, %0\n\t"
2509 "wrpr %0, %1, %%pstate" 2512 "wrpr %0, %1, %%pstate"
2510 : "=r" (pstate) 2513 : "=r" (pstate)
2511 : "i" (PSTATE_IE)); 2514 : "i" (PSTATE_IE));
2512 if (tlb_type == hypervisor) { 2515 if (tlb_type == hypervisor) {
2513 sun4v_mmu_demap_all(); 2516 sun4v_mmu_demap_all();
2514 } else if (tlb_type == spitfire) { 2517 } else if (tlb_type == spitfire) {
2515 for (i = 0; i < 64; i++) { 2518 for (i = 0; i < 64; i++) {
2516 /* Spitfire Errata #32 workaround */ 2519 /* Spitfire Errata #32 workaround */
2517 /* NOTE: Always runs on spitfire, so no 2520 /* NOTE: Always runs on spitfire, so no
2518 * cheetah+ page size encodings. 2521 * cheetah+ page size encodings.
2519 */ 2522 */
2520 __asm__ __volatile__("stxa %0, [%1] %2\n\t" 2523 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
2521 "flush %%g6" 2524 "flush %%g6"
2522 : /* No outputs */ 2525 : /* No outputs */
2523 : "r" (0), 2526 : "r" (0),
2524 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); 2527 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
2525 2528
2526 if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) { 2529 if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) {
2527 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" 2530 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
2528 "membar #Sync" 2531 "membar #Sync"
2529 : /* no outputs */ 2532 : /* no outputs */
2530 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); 2533 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
2531 spitfire_put_dtlb_data(i, 0x0UL); 2534 spitfire_put_dtlb_data(i, 0x0UL);
2532 } 2535 }
2533 2536
2534 /* Spitfire Errata #32 workaround */ 2537 /* Spitfire Errata #32 workaround */
2535 /* NOTE: Always runs on spitfire, so no 2538 /* NOTE: Always runs on spitfire, so no
2536 * cheetah+ page size encodings. 2539 * cheetah+ page size encodings.
2537 */ 2540 */
2538 __asm__ __volatile__("stxa %0, [%1] %2\n\t" 2541 __asm__ __volatile__("stxa %0, [%1] %2\n\t"
2539 "flush %%g6" 2542 "flush %%g6"
2540 : /* No outputs */ 2543 : /* No outputs */
2541 : "r" (0), 2544 : "r" (0),
2542 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); 2545 "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
2543 2546
2544 if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) { 2547 if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) {
2545 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" 2548 __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
2546 "membar #Sync" 2549 "membar #Sync"
2547 : /* no outputs */ 2550 : /* no outputs */
2548 : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); 2551 : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
2549 spitfire_put_itlb_data(i, 0x0UL); 2552 spitfire_put_itlb_data(i, 0x0UL);
2550 } 2553 }
2551 } 2554 }
2552 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { 2555 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
2553 cheetah_flush_dtlb_all(); 2556 cheetah_flush_dtlb_all();
2554 cheetah_flush_itlb_all(); 2557 cheetah_flush_itlb_all();
2555 } 2558 }
2556 __asm__ __volatile__("wrpr %0, 0, %%pstate" 2559 __asm__ __volatile__("wrpr %0, 0, %%pstate"
2557 : : "r" (pstate)); 2560 : : "r" (pstate));
2558 } 2561 }
2559 2562
2560 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 2563 pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
2561 unsigned long address) 2564 unsigned long address)
2562 { 2565 {
2563 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | 2566 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
2564 __GFP_REPEAT | __GFP_ZERO); 2567 __GFP_REPEAT | __GFP_ZERO);
2565 pte_t *pte = NULL; 2568 pte_t *pte = NULL;
2566 2569
2567 if (page) 2570 if (page)
2568 pte = (pte_t *) page_address(page); 2571 pte = (pte_t *) page_address(page);
2569 2572
2570 return pte; 2573 return pte;
2571 } 2574 }
2572 2575
2573 pgtable_t pte_alloc_one(struct mm_struct *mm, 2576 pgtable_t pte_alloc_one(struct mm_struct *mm,
2574 unsigned long address) 2577 unsigned long address)
2575 { 2578 {
2576 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | 2579 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
2577 __GFP_REPEAT | __GFP_ZERO); 2580 __GFP_REPEAT | __GFP_ZERO);
2578 if (!page) 2581 if (!page)
2579 return NULL; 2582 return NULL;
2580 if (!pgtable_page_ctor(page)) { 2583 if (!pgtable_page_ctor(page)) {
2581 free_hot_cold_page(page, 0); 2584 free_hot_cold_page(page, 0);
2582 return NULL; 2585 return NULL;
2583 } 2586 }
2584 return (pte_t *) page_address(page); 2587 return (pte_t *) page_address(page);
2585 } 2588 }
2586 2589
2587 void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 2590 void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
2588 { 2591 {
2589 free_page((unsigned long)pte); 2592 free_page((unsigned long)pte);
2590 } 2593 }
2591 2594
2592 static void __pte_free(pgtable_t pte) 2595 static void __pte_free(pgtable_t pte)
2593 { 2596 {
2594 struct page *page = virt_to_page(pte); 2597 struct page *page = virt_to_page(pte);
2595 2598
2596 pgtable_page_dtor(page); 2599 pgtable_page_dtor(page);
2597 __free_page(page); 2600 __free_page(page);
2598 } 2601 }
2599 2602
2600 void pte_free(struct mm_struct *mm, pgtable_t pte) 2603 void pte_free(struct mm_struct *mm, pgtable_t pte)
2601 { 2604 {
2602 __pte_free(pte); 2605 __pte_free(pte);
2603 } 2606 }
2604 2607
2605 void pgtable_free(void *table, bool is_page) 2608 void pgtable_free(void *table, bool is_page)
2606 { 2609 {
2607 if (is_page) 2610 if (is_page)
2608 __pte_free(table); 2611 __pte_free(table);
2609 else 2612 else
2610 kmem_cache_free(pgtable_cache, table); 2613 kmem_cache_free(pgtable_cache, table);
2611 } 2614 }
2612 2615
2613 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2616 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
2614 void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, 2617 void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
2615 pmd_t *pmd) 2618 pmd_t *pmd)
2616 { 2619 {
2617 unsigned long pte, flags; 2620 unsigned long pte, flags;
2618 struct mm_struct *mm; 2621 struct mm_struct *mm;
2619 pmd_t entry = *pmd; 2622 pmd_t entry = *pmd;
2620 2623
2621 if (!pmd_large(entry) || !pmd_young(entry)) 2624 if (!pmd_large(entry) || !pmd_young(entry))
2622 return; 2625 return;
2623 2626
2624 pte = pmd_val(entry); 2627 pte = pmd_val(entry);
2625 2628
2626 /* Don't insert a non-valid PMD into the TSB, we'll deadlock. */ 2629 /* Don't insert a non-valid PMD into the TSB, we'll deadlock. */
2627 if (!(pte & _PAGE_VALID)) 2630 if (!(pte & _PAGE_VALID))
2628 return; 2631 return;
2629 2632
2630 /* We are fabricating 8MB pages using 4MB real hw pages. */ 2633 /* We are fabricating 8MB pages using 4MB real hw pages. */
2631 pte |= (addr & (1UL << REAL_HPAGE_SHIFT)); 2634 pte |= (addr & (1UL << REAL_HPAGE_SHIFT));
2632 2635
2633 mm = vma->vm_mm; 2636 mm = vma->vm_mm;
2634 2637
2635 spin_lock_irqsave(&mm->context.lock, flags); 2638 spin_lock_irqsave(&mm->context.lock, flags);
2636 2639
2637 if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) 2640 if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
2638 __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, 2641 __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
2639 addr, pte); 2642 addr, pte);
2640 2643
2641 spin_unlock_irqrestore(&mm->context.lock, flags); 2644 spin_unlock_irqrestore(&mm->context.lock, flags);
2642 } 2645 }
2643 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 2646 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2644 2647
2645 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 2648 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
2646 static void context_reload(void *__data) 2649 static void context_reload(void *__data)
2647 { 2650 {
2648 struct mm_struct *mm = __data; 2651 struct mm_struct *mm = __data;
2649 2652
2650 if (mm == current->mm) 2653 if (mm == current->mm)
2651 load_secondary_context(mm); 2654 load_secondary_context(mm);
2652 } 2655 }
2653 2656
2654 void hugetlb_setup(struct pt_regs *regs) 2657 void hugetlb_setup(struct pt_regs *regs)
2655 { 2658 {
2656 struct mm_struct *mm = current->mm; 2659 struct mm_struct *mm = current->mm;
2657 struct tsb_config *tp; 2660 struct tsb_config *tp;
2658 2661
2659 if (in_atomic() || !mm) { 2662 if (in_atomic() || !mm) {
2660 const struct exception_table_entry *entry; 2663 const struct exception_table_entry *entry;
2661 2664
2662 entry = search_exception_tables(regs->tpc); 2665 entry = search_exception_tables(regs->tpc);
2663 if (entry) { 2666 if (entry) {
2664 regs->tpc = entry->fixup; 2667 regs->tpc = entry->fixup;
2665 regs->tnpc = regs->tpc + 4; 2668 regs->tnpc = regs->tpc + 4;
2666 return; 2669 return;
2667 } 2670 }
2668 pr_alert("Unexpected HugeTLB setup in atomic context.\n"); 2671 pr_alert("Unexpected HugeTLB setup in atomic context.\n");
2669 die_if_kernel("HugeTSB in atomic", regs); 2672 die_if_kernel("HugeTSB in atomic", regs);
2670 } 2673 }
2671 2674
2672 tp = &mm->context.tsb_block[MM_TSB_HUGE]; 2675 tp = &mm->context.tsb_block[MM_TSB_HUGE];
2673 if (likely(tp->tsb == NULL)) 2676 if (likely(tp->tsb == NULL))
2674 tsb_grow(mm, MM_TSB_HUGE, 0); 2677 tsb_grow(mm, MM_TSB_HUGE, 0);
2675 2678
2676 tsb_context_switch(mm); 2679 tsb_context_switch(mm);
2677 smp_tsb_sync(mm); 2680 smp_tsb_sync(mm);
2678 2681
2679 /* On UltraSPARC-III+ and later, configure the second half of 2682 /* On UltraSPARC-III+ and later, configure the second half of
2680 * the Data-TLB for huge pages. 2683 * the Data-TLB for huge pages.
2681 */ 2684 */
2682 if (tlb_type == cheetah_plus) { 2685 if (tlb_type == cheetah_plus) {
2683 unsigned long ctx; 2686 unsigned long ctx;
2684 2687
2685 spin_lock(&ctx_alloc_lock); 2688 spin_lock(&ctx_alloc_lock);
2686 ctx = mm->context.sparc64_ctx_val; 2689 ctx = mm->context.sparc64_ctx_val;
2687 ctx &= ~CTX_PGSZ_MASK; 2690 ctx &= ~CTX_PGSZ_MASK;
2688 ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; 2691 ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
2689 ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; 2692 ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
2690 2693
2691 if (ctx != mm->context.sparc64_ctx_val) { 2694 if (ctx != mm->context.sparc64_ctx_val) {
2692 /* When changing the page size fields, we 2695 /* When changing the page size fields, we
2693 * must perform a context flush so that no 2696 * must perform a context flush so that no
2694 * stale entries match. This flush must 2697 * stale entries match. This flush must
2695 * occur with the original context register 2698 * occur with the original context register
2696 * settings. 2699 * settings.
2697 */ 2700 */
2698 do_flush_tlb_mm(mm); 2701 do_flush_tlb_mm(mm);
2699 2702
2700 /* Reload the context register of all processors 2703 /* Reload the context register of all processors
2701 * also executing in this address space. 2704 * also executing in this address space.
2702 */ 2705 */
2703 mm->context.sparc64_ctx_val = ctx; 2706 mm->context.sparc64_ctx_val = ctx;
2704 on_each_cpu(context_reload, mm, 0); 2707 on_each_cpu(context_reload, mm, 0);
2705 } 2708 }
2706 spin_unlock(&ctx_alloc_lock); 2709 spin_unlock(&ctx_alloc_lock);
2707 } 2710 }
2708 } 2711 }
2709 #endif 2712 #endif
2710 2713
2711 #ifdef CONFIG_SMP 2714 #ifdef CONFIG_SMP
2712 #define do_flush_tlb_kernel_range smp_flush_tlb_kernel_range 2715 #define do_flush_tlb_kernel_range smp_flush_tlb_kernel_range
2713 #else 2716 #else
2714 #define do_flush_tlb_kernel_range __flush_tlb_kernel_range 2717 #define do_flush_tlb_kernel_range __flush_tlb_kernel_range
2715 #endif 2718 #endif
2716 2719
2717 void flush_tlb_kernel_range(unsigned long start, unsigned long end) 2720 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
2718 { 2721 {
2719 if (start < HI_OBP_ADDRESS && end > LOW_OBP_ADDRESS) { 2722 if (start < HI_OBP_ADDRESS && end > LOW_OBP_ADDRESS) {
2720 if (start < LOW_OBP_ADDRESS) { 2723 if (start < LOW_OBP_ADDRESS) {
2721 flush_tsb_kernel_range(start, LOW_OBP_ADDRESS); 2724 flush_tsb_kernel_range(start, LOW_OBP_ADDRESS);
2722 do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS); 2725 do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
2723 } 2726 }
2724 if (end > HI_OBP_ADDRESS) { 2727 if (end > HI_OBP_ADDRESS) {
2725 flush_tsb_kernel_range(end, HI_OBP_ADDRESS); 2728 flush_tsb_kernel_range(end, HI_OBP_ADDRESS);
2726 do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS); 2729 do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS);
2727 } 2730 }
2728 } else { 2731 } else {
2729 flush_tsb_kernel_range(start, end); 2732 flush_tsb_kernel_range(start, end);
2730 do_flush_tlb_kernel_range(start, end); 2733 do_flush_tlb_kernel_range(start, end);
2731 } 2734 }
2732 } 2735 }
2733 2736