Commit a581c2a4697ee264699b364399b73477af408e00

Authored by Heiko Carstens
Committed by Linus Torvalds
1 parent e2c2770096

[PATCH] add __[start|end]_rodata sections to asm-generic/sections.h

Add __start_rodata and __end_rodata to sections.h to avoid extern
declarations.  Needed by s390 code (see following patch).

[akpm@osdl.org: update architectures]
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andi Kleen <ak@muc.de>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 4 changed files with 10 additions and 11 deletions Inline Diff

1 /* 1 /*
2 * linux/arch/i386/mm/init.c 2 * linux/arch/i386/mm/init.c
3 * 3 *
4 * Copyright (C) 1995 Linus Torvalds 4 * Copyright (C) 1995 Linus Torvalds
5 * 5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 */ 7 */
8 8
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/signal.h> 10 #include <linux/signal.h>
11 #include <linux/sched.h> 11 #include <linux/sched.h>
12 #include <linux/kernel.h> 12 #include <linux/kernel.h>
13 #include <linux/errno.h> 13 #include <linux/errno.h>
14 #include <linux/string.h> 14 #include <linux/string.h>
15 #include <linux/types.h> 15 #include <linux/types.h>
16 #include <linux/ptrace.h> 16 #include <linux/ptrace.h>
17 #include <linux/mman.h> 17 #include <linux/mman.h>
18 #include <linux/mm.h> 18 #include <linux/mm.h>
19 #include <linux/hugetlb.h> 19 #include <linux/hugetlb.h>
20 #include <linux/swap.h> 20 #include <linux/swap.h>
21 #include <linux/smp.h> 21 #include <linux/smp.h>
22 #include <linux/init.h> 22 #include <linux/init.h>
23 #include <linux/highmem.h> 23 #include <linux/highmem.h>
24 #include <linux/pagemap.h> 24 #include <linux/pagemap.h>
25 #include <linux/poison.h> 25 #include <linux/poison.h>
26 #include <linux/bootmem.h> 26 #include <linux/bootmem.h>
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/proc_fs.h> 28 #include <linux/proc_fs.h>
29 #include <linux/efi.h> 29 #include <linux/efi.h>
30 #include <linux/memory_hotplug.h> 30 #include <linux/memory_hotplug.h>
31 #include <linux/initrd.h> 31 #include <linux/initrd.h>
32 #include <linux/cpumask.h> 32 #include <linux/cpumask.h>
33 33
34 #include <asm/processor.h> 34 #include <asm/processor.h>
35 #include <asm/system.h> 35 #include <asm/system.h>
36 #include <asm/uaccess.h> 36 #include <asm/uaccess.h>
37 #include <asm/pgtable.h> 37 #include <asm/pgtable.h>
38 #include <asm/dma.h> 38 #include <asm/dma.h>
39 #include <asm/fixmap.h> 39 #include <asm/fixmap.h>
40 #include <asm/e820.h> 40 #include <asm/e820.h>
41 #include <asm/apic.h> 41 #include <asm/apic.h>
42 #include <asm/tlb.h> 42 #include <asm/tlb.h>
43 #include <asm/tlbflush.h> 43 #include <asm/tlbflush.h>
44 #include <asm/sections.h> 44 #include <asm/sections.h>
45 45
46 unsigned int __VMALLOC_RESERVE = 128 << 20; 46 unsigned int __VMALLOC_RESERVE = 128 << 20;
47 47
48 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 48 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
49 unsigned long highstart_pfn, highend_pfn; 49 unsigned long highstart_pfn, highend_pfn;
50 50
51 static int noinline do_test_wp_bit(void); 51 static int noinline do_test_wp_bit(void);
52 52
53 /* 53 /*
54 * Creates a middle page table and puts a pointer to it in the 54 * Creates a middle page table and puts a pointer to it in the
55 * given global directory entry. This only returns the gd entry 55 * given global directory entry. This only returns the gd entry
56 * in non-PAE compilation mode, since the middle layer is folded. 56 * in non-PAE compilation mode, since the middle layer is folded.
57 */ 57 */
58 static pmd_t * __init one_md_table_init(pgd_t *pgd) 58 static pmd_t * __init one_md_table_init(pgd_t *pgd)
59 { 59 {
60 pud_t *pud; 60 pud_t *pud;
61 pmd_t *pmd_table; 61 pmd_t *pmd_table;
62 62
63 #ifdef CONFIG_X86_PAE 63 #ifdef CONFIG_X86_PAE
64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
65 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 65 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
66 pud = pud_offset(pgd, 0); 66 pud = pud_offset(pgd, 0);
67 if (pmd_table != pmd_offset(pud, 0)) 67 if (pmd_table != pmd_offset(pud, 0))
68 BUG(); 68 BUG();
69 #else 69 #else
70 pud = pud_offset(pgd, 0); 70 pud = pud_offset(pgd, 0);
71 pmd_table = pmd_offset(pud, 0); 71 pmd_table = pmd_offset(pud, 0);
72 #endif 72 #endif
73 73
74 return pmd_table; 74 return pmd_table;
75 } 75 }
76 76
77 /* 77 /*
78 * Create a page table and place a pointer to it in a middle page 78 * Create a page table and place a pointer to it in a middle page
79 * directory entry. 79 * directory entry.
80 */ 80 */
81 static pte_t * __init one_page_table_init(pmd_t *pmd) 81 static pte_t * __init one_page_table_init(pmd_t *pmd)
82 { 82 {
83 if (pmd_none(*pmd)) { 83 if (pmd_none(*pmd)) {
84 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 84 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
85 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 85 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
86 if (page_table != pte_offset_kernel(pmd, 0)) 86 if (page_table != pte_offset_kernel(pmd, 0))
87 BUG(); 87 BUG();
88 88
89 return page_table; 89 return page_table;
90 } 90 }
91 91
92 return pte_offset_kernel(pmd, 0); 92 return pte_offset_kernel(pmd, 0);
93 } 93 }
94 94
95 /* 95 /*
96 * This function initializes a certain range of kernel virtual memory 96 * This function initializes a certain range of kernel virtual memory
97 * with new bootmem page tables, everywhere page tables are missing in 97 * with new bootmem page tables, everywhere page tables are missing in
98 * the given range. 98 * the given range.
99 */ 99 */
100 100
101 /* 101 /*
102 * NOTE: The pagetables are allocated contiguous on the physical space 102 * NOTE: The pagetables are allocated contiguous on the physical space
103 * so we can cache the place of the first one and move around without 103 * so we can cache the place of the first one and move around without
104 * checking the pgd every time. 104 * checking the pgd every time.
105 */ 105 */
106 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 106 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
107 { 107 {
108 pgd_t *pgd; 108 pgd_t *pgd;
109 pud_t *pud; 109 pud_t *pud;
110 pmd_t *pmd; 110 pmd_t *pmd;
111 int pgd_idx, pmd_idx; 111 int pgd_idx, pmd_idx;
112 unsigned long vaddr; 112 unsigned long vaddr;
113 113
114 vaddr = start; 114 vaddr = start;
115 pgd_idx = pgd_index(vaddr); 115 pgd_idx = pgd_index(vaddr);
116 pmd_idx = pmd_index(vaddr); 116 pmd_idx = pmd_index(vaddr);
117 pgd = pgd_base + pgd_idx; 117 pgd = pgd_base + pgd_idx;
118 118
119 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { 119 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
120 if (pgd_none(*pgd)) 120 if (pgd_none(*pgd))
121 one_md_table_init(pgd); 121 one_md_table_init(pgd);
122 pud = pud_offset(pgd, vaddr); 122 pud = pud_offset(pgd, vaddr);
123 pmd = pmd_offset(pud, vaddr); 123 pmd = pmd_offset(pud, vaddr);
124 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { 124 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
125 if (pmd_none(*pmd)) 125 if (pmd_none(*pmd))
126 one_page_table_init(pmd); 126 one_page_table_init(pmd);
127 127
128 vaddr += PMD_SIZE; 128 vaddr += PMD_SIZE;
129 } 129 }
130 pmd_idx = 0; 130 pmd_idx = 0;
131 } 131 }
132 } 132 }
133 133
134 static inline int is_kernel_text(unsigned long addr) 134 static inline int is_kernel_text(unsigned long addr)
135 { 135 {
136 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 136 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
137 return 1; 137 return 1;
138 return 0; 138 return 0;
139 } 139 }
140 140
141 /* 141 /*
142 * This maps the physical memory to kernel virtual address space, a total 142 * This maps the physical memory to kernel virtual address space, a total
143 * of max_low_pfn pages, by creating page tables starting from address 143 * of max_low_pfn pages, by creating page tables starting from address
144 * PAGE_OFFSET. 144 * PAGE_OFFSET.
145 */ 145 */
146 static void __init kernel_physical_mapping_init(pgd_t *pgd_base) 146 static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
147 { 147 {
148 unsigned long pfn; 148 unsigned long pfn;
149 pgd_t *pgd; 149 pgd_t *pgd;
150 pmd_t *pmd; 150 pmd_t *pmd;
151 pte_t *pte; 151 pte_t *pte;
152 int pgd_idx, pmd_idx, pte_ofs; 152 int pgd_idx, pmd_idx, pte_ofs;
153 153
154 pgd_idx = pgd_index(PAGE_OFFSET); 154 pgd_idx = pgd_index(PAGE_OFFSET);
155 pgd = pgd_base + pgd_idx; 155 pgd = pgd_base + pgd_idx;
156 pfn = 0; 156 pfn = 0;
157 157
158 for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { 158 for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
159 pmd = one_md_table_init(pgd); 159 pmd = one_md_table_init(pgd);
160 if (pfn >= max_low_pfn) 160 if (pfn >= max_low_pfn)
161 continue; 161 continue;
162 for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { 162 for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
163 unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; 163 unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
164 164
165 /* Map with big pages if possible, otherwise create normal page tables. */ 165 /* Map with big pages if possible, otherwise create normal page tables. */
166 if (cpu_has_pse) { 166 if (cpu_has_pse) {
167 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; 167 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
168 168
169 if (is_kernel_text(address) || is_kernel_text(address2)) 169 if (is_kernel_text(address) || is_kernel_text(address2))
170 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); 170 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
171 else 171 else
172 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); 172 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
173 pfn += PTRS_PER_PTE; 173 pfn += PTRS_PER_PTE;
174 } else { 174 } else {
175 pte = one_page_table_init(pmd); 175 pte = one_page_table_init(pmd);
176 176
177 for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { 177 for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
178 if (is_kernel_text(address)) 178 if (is_kernel_text(address))
179 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 179 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
180 else 180 else
181 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); 181 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
182 } 182 }
183 } 183 }
184 } 184 }
185 } 185 }
186 } 186 }
187 187
188 static inline int page_kills_ppro(unsigned long pagenr) 188 static inline int page_kills_ppro(unsigned long pagenr)
189 { 189 {
190 if (pagenr >= 0x70000 && pagenr <= 0x7003F) 190 if (pagenr >= 0x70000 && pagenr <= 0x7003F)
191 return 1; 191 return 1;
192 return 0; 192 return 0;
193 } 193 }
194 194
195 extern int is_available_memory(efi_memory_desc_t *); 195 extern int is_available_memory(efi_memory_desc_t *);
196 196
197 int page_is_ram(unsigned long pagenr) 197 int page_is_ram(unsigned long pagenr)
198 { 198 {
199 int i; 199 int i;
200 unsigned long addr, end; 200 unsigned long addr, end;
201 201
202 if (efi_enabled) { 202 if (efi_enabled) {
203 efi_memory_desc_t *md; 203 efi_memory_desc_t *md;
204 void *p; 204 void *p;
205 205
206 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 206 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
207 md = p; 207 md = p;
208 if (!is_available_memory(md)) 208 if (!is_available_memory(md))
209 continue; 209 continue;
210 addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; 210 addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
211 end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; 211 end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
212 212
213 if ((pagenr >= addr) && (pagenr < end)) 213 if ((pagenr >= addr) && (pagenr < end))
214 return 1; 214 return 1;
215 } 215 }
216 return 0; 216 return 0;
217 } 217 }
218 218
219 for (i = 0; i < e820.nr_map; i++) { 219 for (i = 0; i < e820.nr_map; i++) {
220 220
221 if (e820.map[i].type != E820_RAM) /* not usable memory */ 221 if (e820.map[i].type != E820_RAM) /* not usable memory */
222 continue; 222 continue;
223 /* 223 /*
224 * !!!FIXME!!! Some BIOSen report areas as RAM that 224 * !!!FIXME!!! Some BIOSen report areas as RAM that
225 * are not. Notably the 640->1Mb area. We need a sanity 225 * are not. Notably the 640->1Mb area. We need a sanity
226 * check here. 226 * check here.
227 */ 227 */
228 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; 228 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
229 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; 229 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
230 if ((pagenr >= addr) && (pagenr < end)) 230 if ((pagenr >= addr) && (pagenr < end))
231 return 1; 231 return 1;
232 } 232 }
233 return 0; 233 return 0;
234 } 234 }
235 235
236 #ifdef CONFIG_HIGHMEM 236 #ifdef CONFIG_HIGHMEM
237 pte_t *kmap_pte; 237 pte_t *kmap_pte;
238 pgprot_t kmap_prot; 238 pgprot_t kmap_prot;
239 239
240 #define kmap_get_fixmap_pte(vaddr) \ 240 #define kmap_get_fixmap_pte(vaddr) \
241 pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) 241 pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
242 242
243 static void __init kmap_init(void) 243 static void __init kmap_init(void)
244 { 244 {
245 unsigned long kmap_vstart; 245 unsigned long kmap_vstart;
246 246
247 /* cache the first kmap pte */ 247 /* cache the first kmap pte */
248 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); 248 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
249 kmap_pte = kmap_get_fixmap_pte(kmap_vstart); 249 kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
250 250
251 kmap_prot = PAGE_KERNEL; 251 kmap_prot = PAGE_KERNEL;
252 } 252 }
253 253
254 static void __init permanent_kmaps_init(pgd_t *pgd_base) 254 static void __init permanent_kmaps_init(pgd_t *pgd_base)
255 { 255 {
256 pgd_t *pgd; 256 pgd_t *pgd;
257 pud_t *pud; 257 pud_t *pud;
258 pmd_t *pmd; 258 pmd_t *pmd;
259 pte_t *pte; 259 pte_t *pte;
260 unsigned long vaddr; 260 unsigned long vaddr;
261 261
262 vaddr = PKMAP_BASE; 262 vaddr = PKMAP_BASE;
263 page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); 263 page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
264 264
265 pgd = swapper_pg_dir + pgd_index(vaddr); 265 pgd = swapper_pg_dir + pgd_index(vaddr);
266 pud = pud_offset(pgd, vaddr); 266 pud = pud_offset(pgd, vaddr);
267 pmd = pmd_offset(pud, vaddr); 267 pmd = pmd_offset(pud, vaddr);
268 pte = pte_offset_kernel(pmd, vaddr); 268 pte = pte_offset_kernel(pmd, vaddr);
269 pkmap_page_table = pte; 269 pkmap_page_table = pte;
270 } 270 }
271 271
272 static void __meminit free_new_highpage(struct page *page) 272 static void __meminit free_new_highpage(struct page *page)
273 { 273 {
274 init_page_count(page); 274 init_page_count(page);
275 __free_page(page); 275 __free_page(page);
276 totalhigh_pages++; 276 totalhigh_pages++;
277 } 277 }
278 278
279 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) 279 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
280 { 280 {
281 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { 281 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
282 ClearPageReserved(page); 282 ClearPageReserved(page);
283 free_new_highpage(page); 283 free_new_highpage(page);
284 } else 284 } else
285 SetPageReserved(page); 285 SetPageReserved(page);
286 } 286 }
287 287
288 static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) 288 static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
289 { 289 {
290 free_new_highpage(page); 290 free_new_highpage(page);
291 totalram_pages++; 291 totalram_pages++;
292 #ifdef CONFIG_FLATMEM 292 #ifdef CONFIG_FLATMEM
293 max_mapnr = max(pfn, max_mapnr); 293 max_mapnr = max(pfn, max_mapnr);
294 #endif 294 #endif
295 num_physpages++; 295 num_physpages++;
296 return 0; 296 return 0;
297 } 297 }
298 298
299 /* 299 /*
300 * Not currently handling the NUMA case. 300 * Not currently handling the NUMA case.
301 * Assuming single node and all memory that 301 * Assuming single node and all memory that
302 * has been added dynamically that would be 302 * has been added dynamically that would be
303 * onlined here is in HIGHMEM 303 * onlined here is in HIGHMEM
304 */ 304 */
305 void online_page(struct page *page) 305 void online_page(struct page *page)
306 { 306 {
307 ClearPageReserved(page); 307 ClearPageReserved(page);
308 add_one_highpage_hotplug(page, page_to_pfn(page)); 308 add_one_highpage_hotplug(page, page_to_pfn(page));
309 } 309 }
310 310
311 311
312 #ifdef CONFIG_NUMA 312 #ifdef CONFIG_NUMA
313 extern void set_highmem_pages_init(int); 313 extern void set_highmem_pages_init(int);
314 #else 314 #else
315 static void __init set_highmem_pages_init(int bad_ppro) 315 static void __init set_highmem_pages_init(int bad_ppro)
316 { 316 {
317 int pfn; 317 int pfn;
318 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) 318 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
319 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 319 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
320 totalram_pages += totalhigh_pages; 320 totalram_pages += totalhigh_pages;
321 } 321 }
322 #endif /* CONFIG_FLATMEM */ 322 #endif /* CONFIG_FLATMEM */
323 323
324 #else 324 #else
325 #define kmap_init() do { } while (0) 325 #define kmap_init() do { } while (0)
326 #define permanent_kmaps_init(pgd_base) do { } while (0) 326 #define permanent_kmaps_init(pgd_base) do { } while (0)
327 #define set_highmem_pages_init(bad_ppro) do { } while (0) 327 #define set_highmem_pages_init(bad_ppro) do { } while (0)
328 #endif /* CONFIG_HIGHMEM */ 328 #endif /* CONFIG_HIGHMEM */
329 329
330 unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; 330 unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
331 EXPORT_SYMBOL(__PAGE_KERNEL); 331 EXPORT_SYMBOL(__PAGE_KERNEL);
332 unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; 332 unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
333 333
334 #ifdef CONFIG_NUMA 334 #ifdef CONFIG_NUMA
335 extern void __init remap_numa_kva(void); 335 extern void __init remap_numa_kva(void);
336 #else 336 #else
337 #define remap_numa_kva() do {} while (0) 337 #define remap_numa_kva() do {} while (0)
338 #endif 338 #endif
339 339
340 static void __init pagetable_init (void) 340 static void __init pagetable_init (void)
341 { 341 {
342 unsigned long vaddr; 342 unsigned long vaddr;
343 pgd_t *pgd_base = swapper_pg_dir; 343 pgd_t *pgd_base = swapper_pg_dir;
344 344
345 #ifdef CONFIG_X86_PAE 345 #ifdef CONFIG_X86_PAE
346 int i; 346 int i;
347 /* Init entries of the first-level page table to the zero page */ 347 /* Init entries of the first-level page table to the zero page */
348 for (i = 0; i < PTRS_PER_PGD; i++) 348 for (i = 0; i < PTRS_PER_PGD; i++)
349 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 349 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
350 #endif 350 #endif
351 351
352 /* Enable PSE if available */ 352 /* Enable PSE if available */
353 if (cpu_has_pse) { 353 if (cpu_has_pse) {
354 set_in_cr4(X86_CR4_PSE); 354 set_in_cr4(X86_CR4_PSE);
355 } 355 }
356 356
357 /* Enable PGE if available */ 357 /* Enable PGE if available */
358 if (cpu_has_pge) { 358 if (cpu_has_pge) {
359 set_in_cr4(X86_CR4_PGE); 359 set_in_cr4(X86_CR4_PGE);
360 __PAGE_KERNEL |= _PAGE_GLOBAL; 360 __PAGE_KERNEL |= _PAGE_GLOBAL;
361 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; 361 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
362 } 362 }
363 363
364 kernel_physical_mapping_init(pgd_base); 364 kernel_physical_mapping_init(pgd_base);
365 remap_numa_kva(); 365 remap_numa_kva();
366 366
367 /* 367 /*
368 * Fixed mappings, only the page table structure has to be 368 * Fixed mappings, only the page table structure has to be
369 * created - mappings will be set by set_fixmap(): 369 * created - mappings will be set by set_fixmap():
370 */ 370 */
371 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 371 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
372 page_table_range_init(vaddr, 0, pgd_base); 372 page_table_range_init(vaddr, 0, pgd_base);
373 373
374 permanent_kmaps_init(pgd_base); 374 permanent_kmaps_init(pgd_base);
375 375
376 #ifdef CONFIG_X86_PAE 376 #ifdef CONFIG_X86_PAE
377 /* 377 /*
378 * Add low memory identity-mappings - SMP needs it when 378 * Add low memory identity-mappings - SMP needs it when
379 * starting up on an AP from real-mode. In the non-PAE 379 * starting up on an AP from real-mode. In the non-PAE
380 * case we already have these mappings through head.S. 380 * case we already have these mappings through head.S.
381 * All user-space mappings are explicitly cleared after 381 * All user-space mappings are explicitly cleared after
382 * SMP startup. 382 * SMP startup.
383 */ 383 */
384 set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); 384 set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
385 #endif 385 #endif
386 } 386 }
387 387
388 #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) 388 #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
389 /* 389 /*
390 * Swap suspend & friends need this for resume because things like the intel-agp 390 * Swap suspend & friends need this for resume because things like the intel-agp
391 * driver might have split up a kernel 4MB mapping. 391 * driver might have split up a kernel 4MB mapping.
392 */ 392 */
393 char __nosavedata swsusp_pg_dir[PAGE_SIZE] 393 char __nosavedata swsusp_pg_dir[PAGE_SIZE]
394 __attribute__ ((aligned (PAGE_SIZE))); 394 __attribute__ ((aligned (PAGE_SIZE)));
395 395
396 static inline void save_pg_dir(void) 396 static inline void save_pg_dir(void)
397 { 397 {
398 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); 398 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
399 } 399 }
400 #else 400 #else
401 static inline void save_pg_dir(void) 401 static inline void save_pg_dir(void)
402 { 402 {
403 } 403 }
404 #endif 404 #endif
405 405
406 void zap_low_mappings (void) 406 void zap_low_mappings (void)
407 { 407 {
408 int i; 408 int i;
409 409
410 save_pg_dir(); 410 save_pg_dir();
411 411
412 /* 412 /*
413 * Zap initial low-memory mappings. 413 * Zap initial low-memory mappings.
414 * 414 *
415 * Note that "pgd_clear()" doesn't do it for 415 * Note that "pgd_clear()" doesn't do it for
416 * us, because pgd_clear() is a no-op on i386. 416 * us, because pgd_clear() is a no-op on i386.
417 */ 417 */
418 for (i = 0; i < USER_PTRS_PER_PGD; i++) 418 for (i = 0; i < USER_PTRS_PER_PGD; i++)
419 #ifdef CONFIG_X86_PAE 419 #ifdef CONFIG_X86_PAE
420 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 420 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
421 #else 421 #else
422 set_pgd(swapper_pg_dir+i, __pgd(0)); 422 set_pgd(swapper_pg_dir+i, __pgd(0));
423 #endif 423 #endif
424 flush_tlb_all(); 424 flush_tlb_all();
425 } 425 }
426 426
427 static int disable_nx __initdata = 0; 427 static int disable_nx __initdata = 0;
428 u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; 428 u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
429 429
430 /* 430 /*
431 * noexec = on|off 431 * noexec = on|off
432 * 432 *
433 * Control non executable mappings. 433 * Control non executable mappings.
434 * 434 *
435 * on Enable 435 * on Enable
436 * off Disable 436 * off Disable
437 */ 437 */
438 void __init noexec_setup(const char *str) 438 void __init noexec_setup(const char *str)
439 { 439 {
440 if (!strncmp(str, "on",2) && cpu_has_nx) { 440 if (!strncmp(str, "on",2) && cpu_has_nx) {
441 __supported_pte_mask |= _PAGE_NX; 441 __supported_pte_mask |= _PAGE_NX;
442 disable_nx = 0; 442 disable_nx = 0;
443 } else if (!strncmp(str,"off",3)) { 443 } else if (!strncmp(str,"off",3)) {
444 disable_nx = 1; 444 disable_nx = 1;
445 __supported_pte_mask &= ~_PAGE_NX; 445 __supported_pte_mask &= ~_PAGE_NX;
446 } 446 }
447 } 447 }
448 448
449 int nx_enabled = 0; 449 int nx_enabled = 0;
450 #ifdef CONFIG_X86_PAE 450 #ifdef CONFIG_X86_PAE
451 451
452 static void __init set_nx(void) 452 static void __init set_nx(void)
453 { 453 {
454 unsigned int v[4], l, h; 454 unsigned int v[4], l, h;
455 455
456 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { 456 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
457 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); 457 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
458 if ((v[3] & (1 << 20)) && !disable_nx) { 458 if ((v[3] & (1 << 20)) && !disable_nx) {
459 rdmsr(MSR_EFER, l, h); 459 rdmsr(MSR_EFER, l, h);
460 l |= EFER_NX; 460 l |= EFER_NX;
461 wrmsr(MSR_EFER, l, h); 461 wrmsr(MSR_EFER, l, h);
462 nx_enabled = 1; 462 nx_enabled = 1;
463 __supported_pte_mask |= _PAGE_NX; 463 __supported_pte_mask |= _PAGE_NX;
464 } 464 }
465 } 465 }
466 } 466 }
467 467
468 /* 468 /*
469 * Enables/disables executability of a given kernel page and 469 * Enables/disables executability of a given kernel page and
470 * returns the previous setting. 470 * returns the previous setting.
471 */ 471 */
472 int __init set_kernel_exec(unsigned long vaddr, int enable) 472 int __init set_kernel_exec(unsigned long vaddr, int enable)
473 { 473 {
474 pte_t *pte; 474 pte_t *pte;
475 int ret = 1; 475 int ret = 1;
476 476
477 if (!nx_enabled) 477 if (!nx_enabled)
478 goto out; 478 goto out;
479 479
480 pte = lookup_address(vaddr); 480 pte = lookup_address(vaddr);
481 BUG_ON(!pte); 481 BUG_ON(!pte);
482 482
483 if (!pte_exec_kernel(*pte)) 483 if (!pte_exec_kernel(*pte))
484 ret = 0; 484 ret = 0;
485 485
486 if (enable) 486 if (enable)
487 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); 487 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
488 else 488 else
489 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); 489 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
490 __flush_tlb_all(); 490 __flush_tlb_all();
491 out: 491 out:
492 return ret; 492 return ret;
493 } 493 }
494 494
495 #endif 495 #endif
496 496
497 /* 497 /*
498 * paging_init() sets up the page tables - note that the first 8MB are 498 * paging_init() sets up the page tables - note that the first 8MB are
499 * already mapped by head.S. 499 * already mapped by head.S.
500 * 500 *
501 * This routines also unmaps the page at virtual kernel address 0, so 501 * This routines also unmaps the page at virtual kernel address 0, so
502 * that we can trap those pesky NULL-reference errors in the kernel. 502 * that we can trap those pesky NULL-reference errors in the kernel.
503 */ 503 */
504 void __init paging_init(void) 504 void __init paging_init(void)
505 { 505 {
506 #ifdef CONFIG_X86_PAE 506 #ifdef CONFIG_X86_PAE
507 set_nx(); 507 set_nx();
508 if (nx_enabled) 508 if (nx_enabled)
509 printk("NX (Execute Disable) protection: active\n"); 509 printk("NX (Execute Disable) protection: active\n");
510 #endif 510 #endif
511 511
512 pagetable_init(); 512 pagetable_init();
513 513
514 load_cr3(swapper_pg_dir); 514 load_cr3(swapper_pg_dir);
515 515
516 #ifdef CONFIG_X86_PAE 516 #ifdef CONFIG_X86_PAE
517 /* 517 /*
518 * We will bail out later - printk doesn't work right now so 518 * We will bail out later - printk doesn't work right now so
519 * the user would just see a hanging kernel. 519 * the user would just see a hanging kernel.
520 */ 520 */
521 if (cpu_has_pae) 521 if (cpu_has_pae)
522 set_in_cr4(X86_CR4_PAE); 522 set_in_cr4(X86_CR4_PAE);
523 #endif 523 #endif
524 __flush_tlb_all(); 524 __flush_tlb_all();
525 525
526 kmap_init(); 526 kmap_init();
527 } 527 }
528 528
529 /* 529 /*
530 * Test if the WP bit works in supervisor mode. It isn't supported on 386's 530 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
531 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This 531 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
532 * used to involve black magic jumps to work around some nasty CPU bugs, 532 * used to involve black magic jumps to work around some nasty CPU bugs,
533 * but fortunately the switch to using exceptions got rid of all that. 533 * but fortunately the switch to using exceptions got rid of all that.
534 */ 534 */
535 535
536 static void __init test_wp_bit(void) 536 static void __init test_wp_bit(void)
537 { 537 {
538 printk("Checking if this processor honours the WP bit even in supervisor mode... "); 538 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
539 539
540 /* Any page-aligned address will do, the test is non-destructive */ 540 /* Any page-aligned address will do, the test is non-destructive */
541 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); 541 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
542 boot_cpu_data.wp_works_ok = do_test_wp_bit(); 542 boot_cpu_data.wp_works_ok = do_test_wp_bit();
543 clear_fixmap(FIX_WP_TEST); 543 clear_fixmap(FIX_WP_TEST);
544 544
545 if (!boot_cpu_data.wp_works_ok) { 545 if (!boot_cpu_data.wp_works_ok) {
546 printk("No.\n"); 546 printk("No.\n");
547 #ifdef CONFIG_X86_WP_WORKS_OK 547 #ifdef CONFIG_X86_WP_WORKS_OK
548 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); 548 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
549 #endif 549 #endif
550 } else { 550 } else {
551 printk("Ok.\n"); 551 printk("Ok.\n");
552 } 552 }
553 } 553 }
554 554
555 static void __init set_max_mapnr_init(void) 555 static void __init set_max_mapnr_init(void)
556 { 556 {
557 #ifdef CONFIG_HIGHMEM 557 #ifdef CONFIG_HIGHMEM
558 num_physpages = highend_pfn; 558 num_physpages = highend_pfn;
559 #else 559 #else
560 num_physpages = max_low_pfn; 560 num_physpages = max_low_pfn;
561 #endif 561 #endif
562 #ifdef CONFIG_FLATMEM 562 #ifdef CONFIG_FLATMEM
563 max_mapnr = num_physpages; 563 max_mapnr = num_physpages;
564 #endif 564 #endif
565 } 565 }
566 566
567 static struct kcore_list kcore_mem, kcore_vmalloc; 567 static struct kcore_list kcore_mem, kcore_vmalloc;
568 568
569 void __init mem_init(void) 569 void __init mem_init(void)
570 { 570 {
571 extern int ppro_with_ram_bug(void); 571 extern int ppro_with_ram_bug(void);
572 int codesize, reservedpages, datasize, initsize; 572 int codesize, reservedpages, datasize, initsize;
573 int tmp; 573 int tmp;
574 int bad_ppro; 574 int bad_ppro;
575 575
576 #ifdef CONFIG_FLATMEM 576 #ifdef CONFIG_FLATMEM
577 if (!mem_map) 577 if (!mem_map)
578 BUG(); 578 BUG();
579 #endif 579 #endif
580 580
581 bad_ppro = ppro_with_ram_bug(); 581 bad_ppro = ppro_with_ram_bug();
582 582
583 #ifdef CONFIG_HIGHMEM 583 #ifdef CONFIG_HIGHMEM
584 /* check that fixmap and pkmap do not overlap */ 584 /* check that fixmap and pkmap do not overlap */
585 if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { 585 if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
586 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); 586 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
587 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", 587 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
588 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); 588 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
589 BUG(); 589 BUG();
590 } 590 }
591 #endif 591 #endif
592 592
593 set_max_mapnr_init(); 593 set_max_mapnr_init();
594 594
595 #ifdef CONFIG_HIGHMEM 595 #ifdef CONFIG_HIGHMEM
596 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 596 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
597 #else 597 #else
598 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 598 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
599 #endif 599 #endif
600 600
601 /* this will put all low memory onto the freelists */ 601 /* this will put all low memory onto the freelists */
602 totalram_pages += free_all_bootmem(); 602 totalram_pages += free_all_bootmem();
603 603
604 reservedpages = 0; 604 reservedpages = 0;
605 for (tmp = 0; tmp < max_low_pfn; tmp++) 605 for (tmp = 0; tmp < max_low_pfn; tmp++)
606 /* 606 /*
607 * Only count reserved RAM pages 607 * Only count reserved RAM pages
608 */ 608 */
609 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 609 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
610 reservedpages++; 610 reservedpages++;
611 611
612 set_highmem_pages_init(bad_ppro); 612 set_highmem_pages_init(bad_ppro);
613 613
614 codesize = (unsigned long) &_etext - (unsigned long) &_text; 614 codesize = (unsigned long) &_etext - (unsigned long) &_text;
615 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 615 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
616 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 616 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
617 617
618 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 618 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
619 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 619 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
620 VMALLOC_END-VMALLOC_START); 620 VMALLOC_END-VMALLOC_START);
621 621
622 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", 622 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
623 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 623 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
624 num_physpages << (PAGE_SHIFT-10), 624 num_physpages << (PAGE_SHIFT-10),
625 codesize >> 10, 625 codesize >> 10,
626 reservedpages << (PAGE_SHIFT-10), 626 reservedpages << (PAGE_SHIFT-10),
627 datasize >> 10, 627 datasize >> 10,
628 initsize >> 10, 628 initsize >> 10,
629 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 629 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
630 ); 630 );
631 631
632 #ifdef CONFIG_X86_PAE 632 #ifdef CONFIG_X86_PAE
633 if (!cpu_has_pae) 633 if (!cpu_has_pae)
634 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); 634 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
635 #endif 635 #endif
636 if (boot_cpu_data.wp_works_ok < 0) 636 if (boot_cpu_data.wp_works_ok < 0)
637 test_wp_bit(); 637 test_wp_bit();
638 638
639 /* 639 /*
640 * Subtle. SMP is doing it's boot stuff late (because it has to 640 * Subtle. SMP is doing it's boot stuff late (because it has to
641 * fork idle threads) - but it also needs low mappings for the 641 * fork idle threads) - but it also needs low mappings for the
642 * protected-mode entry to work. We zap these entries only after 642 * protected-mode entry to work. We zap these entries only after
643 * the WP-bit has been tested. 643 * the WP-bit has been tested.
644 */ 644 */
645 #ifndef CONFIG_SMP 645 #ifndef CONFIG_SMP
646 zap_low_mappings(); 646 zap_low_mappings();
647 #endif 647 #endif
648 } 648 }
649 649
650 /* 650 /*
651 * this is for the non-NUMA, single node SMP system case. 651 * this is for the non-NUMA, single node SMP system case.
652 * Specifically, in the case of x86, we will always add 652 * Specifically, in the case of x86, we will always add
653 * memory to the highmem for now. 653 * memory to the highmem for now.
654 */ 654 */
655 #ifdef CONFIG_MEMORY_HOTPLUG 655 #ifdef CONFIG_MEMORY_HOTPLUG
656 #ifndef CONFIG_NEED_MULTIPLE_NODES 656 #ifndef CONFIG_NEED_MULTIPLE_NODES
657 int arch_add_memory(int nid, u64 start, u64 size) 657 int arch_add_memory(int nid, u64 start, u64 size)
658 { 658 {
659 struct pglist_data *pgdata = &contig_page_data; 659 struct pglist_data *pgdata = &contig_page_data;
660 struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; 660 struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
661 unsigned long start_pfn = start >> PAGE_SHIFT; 661 unsigned long start_pfn = start >> PAGE_SHIFT;
662 unsigned long nr_pages = size >> PAGE_SHIFT; 662 unsigned long nr_pages = size >> PAGE_SHIFT;
663 663
664 return __add_pages(zone, start_pfn, nr_pages); 664 return __add_pages(zone, start_pfn, nr_pages);
665 } 665 }
666 666
667 int remove_memory(u64 start, u64 size) 667 int remove_memory(u64 start, u64 size)
668 { 668 {
669 return -EINVAL; 669 return -EINVAL;
670 } 670 }
671 #endif 671 #endif
672 #endif 672 #endif
673 673
674 kmem_cache_t *pgd_cache; 674 kmem_cache_t *pgd_cache;
675 kmem_cache_t *pmd_cache; 675 kmem_cache_t *pmd_cache;
676 676
677 void __init pgtable_cache_init(void) 677 void __init pgtable_cache_init(void)
678 { 678 {
679 if (PTRS_PER_PMD > 1) { 679 if (PTRS_PER_PMD > 1) {
680 pmd_cache = kmem_cache_create("pmd", 680 pmd_cache = kmem_cache_create("pmd",
681 PTRS_PER_PMD*sizeof(pmd_t), 681 PTRS_PER_PMD*sizeof(pmd_t),
682 PTRS_PER_PMD*sizeof(pmd_t), 682 PTRS_PER_PMD*sizeof(pmd_t),
683 0, 683 0,
684 pmd_ctor, 684 pmd_ctor,
685 NULL); 685 NULL);
686 if (!pmd_cache) 686 if (!pmd_cache)
687 panic("pgtable_cache_init(): cannot create pmd cache"); 687 panic("pgtable_cache_init(): cannot create pmd cache");
688 } 688 }
689 pgd_cache = kmem_cache_create("pgd", 689 pgd_cache = kmem_cache_create("pgd",
690 PTRS_PER_PGD*sizeof(pgd_t), 690 PTRS_PER_PGD*sizeof(pgd_t),
691 PTRS_PER_PGD*sizeof(pgd_t), 691 PTRS_PER_PGD*sizeof(pgd_t),
692 0, 692 0,
693 pgd_ctor, 693 pgd_ctor,
694 PTRS_PER_PMD == 1 ? pgd_dtor : NULL); 694 PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
695 if (!pgd_cache) 695 if (!pgd_cache)
696 panic("pgtable_cache_init(): Cannot create pgd cache"); 696 panic("pgtable_cache_init(): Cannot create pgd cache");
697 } 697 }
698 698
699 /* 699 /*
700 * This function cannot be __init, since exceptions don't work in that 700 * This function cannot be __init, since exceptions don't work in that
701 * section. Put this after the callers, so that it cannot be inlined. 701 * section. Put this after the callers, so that it cannot be inlined.
702 */ 702 */
703 static int noinline do_test_wp_bit(void) 703 static int noinline do_test_wp_bit(void)
704 { 704 {
705 char tmp_reg; 705 char tmp_reg;
706 int flag; 706 int flag;
707 707
708 __asm__ __volatile__( 708 __asm__ __volatile__(
709 " movb %0,%1 \n" 709 " movb %0,%1 \n"
710 "1: movb %1,%0 \n" 710 "1: movb %1,%0 \n"
711 " xorl %2,%2 \n" 711 " xorl %2,%2 \n"
712 "2: \n" 712 "2: \n"
713 ".section __ex_table,\"a\"\n" 713 ".section __ex_table,\"a\"\n"
714 " .align 4 \n" 714 " .align 4 \n"
715 " .long 1b,2b \n" 715 " .long 1b,2b \n"
716 ".previous \n" 716 ".previous \n"
717 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), 717 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
718 "=q" (tmp_reg), 718 "=q" (tmp_reg),
719 "=r" (flag) 719 "=r" (flag)
720 :"2" (1) 720 :"2" (1)
721 :"memory"); 721 :"memory");
722 722
723 return flag; 723 return flag;
724 } 724 }
725 725
726 #ifdef CONFIG_DEBUG_RODATA 726 #ifdef CONFIG_DEBUG_RODATA
727 727
728 extern char __start_rodata, __end_rodata;
729 void mark_rodata_ro(void) 728 void mark_rodata_ro(void)
730 { 729 {
731 unsigned long addr = (unsigned long)&__start_rodata; 730 unsigned long addr = (unsigned long)__start_rodata;
732 731
733 for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE) 732 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
734 change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); 733 change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
735 734
736 printk ("Write protecting the kernel read-only data: %luk\n", 735 printk("Write protecting the kernel read-only data: %uk\n",
737 (unsigned long)(&__end_rodata - &__start_rodata) >> 10); 736 (__end_rodata - __start_rodata) >> 10);
738 737
739 /* 738 /*
740 * change_page_attr() requires a global_flush_tlb() call after it. 739 * change_page_attr() requires a global_flush_tlb() call after it.
741 * We do this after the printk so that if something went wrong in the 740 * We do this after the printk so that if something went wrong in the
742 * change, the printk gets out at least to give a better debug hint 741 * change, the printk gets out at least to give a better debug hint
743 * of who is the culprit. 742 * of who is the culprit.
744 */ 743 */
745 global_flush_tlb(); 744 global_flush_tlb();
746 } 745 }
747 #endif 746 #endif
748 747
749 void free_init_pages(char *what, unsigned long begin, unsigned long end) 748 void free_init_pages(char *what, unsigned long begin, unsigned long end)
750 { 749 {
751 unsigned long addr; 750 unsigned long addr;
752 751
753 for (addr = begin; addr < end; addr += PAGE_SIZE) { 752 for (addr = begin; addr < end; addr += PAGE_SIZE) {
754 ClearPageReserved(virt_to_page(addr)); 753 ClearPageReserved(virt_to_page(addr));
755 init_page_count(virt_to_page(addr)); 754 init_page_count(virt_to_page(addr));
756 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); 755 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
757 free_page(addr); 756 free_page(addr);
758 totalram_pages++; 757 totalram_pages++;
759 } 758 }
760 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 759 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
761 } 760 }
762 761
763 void free_initmem(void) 762 void free_initmem(void)
764 { 763 {
765 free_init_pages("unused kernel memory", 764 free_init_pages("unused kernel memory",
766 (unsigned long)(&__init_begin), 765 (unsigned long)(&__init_begin),
767 (unsigned long)(&__init_end)); 766 (unsigned long)(&__init_end));
768 } 767 }
769 768
770 #ifdef CONFIG_BLK_DEV_INITRD 769 #ifdef CONFIG_BLK_DEV_INITRD
771 void free_initrd_mem(unsigned long start, unsigned long end) 770 void free_initrd_mem(unsigned long start, unsigned long end)
772 { 771 {
773 free_init_pages("initrd memory", start, end); 772 free_init_pages("initrd memory", start, end);
774 } 773 }
775 #endif 774 #endif
776 775
777 776
arch/parisc/mm/init.c
1 /* 1 /*
2 * linux/arch/parisc/mm/init.c 2 * linux/arch/parisc/mm/init.c
3 * 3 *
4 * Copyright (C) 1995 Linus Torvalds 4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright 1999 SuSE GmbH 5 * Copyright 1999 SuSE GmbH
6 * changed by Philipp Rumpf 6 * changed by Philipp Rumpf
7 * Copyright 1999 Philipp Rumpf (prumpf@tux.org) 7 * Copyright 1999 Philipp Rumpf (prumpf@tux.org)
8 * Copyright 2004 Randolph Chung (tausq@debian.org) 8 * Copyright 2004 Randolph Chung (tausq@debian.org)
9 * Copyright 2006 Helge Deller (deller@gmx.de) 9 * Copyright 2006 Helge Deller (deller@gmx.de)
10 * 10 *
11 */ 11 */
12 12
13 13
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/mm.h> 15 #include <linux/mm.h>
16 #include <linux/bootmem.h> 16 #include <linux/bootmem.h>
17 #include <linux/delay.h> 17 #include <linux/delay.h>
18 #include <linux/init.h> 18 #include <linux/init.h>
19 #include <linux/pci.h> /* for hppa_dma_ops and pcxl_dma_ops */ 19 #include <linux/pci.h> /* for hppa_dma_ops and pcxl_dma_ops */
20 #include <linux/initrd.h> 20 #include <linux/initrd.h>
21 #include <linux/swap.h> 21 #include <linux/swap.h>
22 #include <linux/unistd.h> 22 #include <linux/unistd.h>
23 #include <linux/nodemask.h> /* for node_online_map */ 23 #include <linux/nodemask.h> /* for node_online_map */
24 #include <linux/pagemap.h> /* for release_pages and page_cache_release */ 24 #include <linux/pagemap.h> /* for release_pages and page_cache_release */
25 25
26 #include <asm/pgalloc.h> 26 #include <asm/pgalloc.h>
27 #include <asm/tlb.h> 27 #include <asm/tlb.h>
28 #include <asm/pdc_chassis.h> 28 #include <asm/pdc_chassis.h>
29 #include <asm/mmzone.h> 29 #include <asm/mmzone.h>
30 #include <asm/sections.h>
30 31
31 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 32 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
32 33
33 extern char _text; /* start of kernel code, defined by linker */ 34 extern char _text; /* start of kernel code, defined by linker */
34 extern int data_start; 35 extern int data_start;
35 extern char _end; /* end of BSS, defined by linker */ 36 extern char _end; /* end of BSS, defined by linker */
36 extern char __init_begin, __init_end; 37 extern char __init_begin, __init_end;
37 38
38 #ifdef CONFIG_DISCONTIGMEM 39 #ifdef CONFIG_DISCONTIGMEM
39 struct node_map_data node_data[MAX_NUMNODES] __read_mostly; 40 struct node_map_data node_data[MAX_NUMNODES] __read_mostly;
40 bootmem_data_t bmem_data[MAX_NUMNODES] __read_mostly; 41 bootmem_data_t bmem_data[MAX_NUMNODES] __read_mostly;
41 unsigned char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; 42 unsigned char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
42 #endif 43 #endif
43 44
44 static struct resource data_resource = { 45 static struct resource data_resource = {
45 .name = "Kernel data", 46 .name = "Kernel data",
46 .flags = IORESOURCE_BUSY | IORESOURCE_MEM, 47 .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
47 }; 48 };
48 49
49 static struct resource code_resource = { 50 static struct resource code_resource = {
50 .name = "Kernel code", 51 .name = "Kernel code",
51 .flags = IORESOURCE_BUSY | IORESOURCE_MEM, 52 .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
52 }; 53 };
53 54
54 static struct resource pdcdata_resource = { 55 static struct resource pdcdata_resource = {
55 .name = "PDC data (Page Zero)", 56 .name = "PDC data (Page Zero)",
56 .start = 0, 57 .start = 0,
57 .end = 0x9ff, 58 .end = 0x9ff,
58 .flags = IORESOURCE_BUSY | IORESOURCE_MEM, 59 .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
59 }; 60 };
60 61
61 static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly; 62 static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly;
62 63
63 /* The following array is initialized from the firmware specific 64 /* The following array is initialized from the firmware specific
64 * information retrieved in kernel/inventory.c. 65 * information retrieved in kernel/inventory.c.
65 */ 66 */
66 67
67 physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; 68 physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly;
68 int npmem_ranges __read_mostly; 69 int npmem_ranges __read_mostly;
69 70
70 #ifdef __LP64__ 71 #ifdef __LP64__
71 #define MAX_MEM (~0UL) 72 #define MAX_MEM (~0UL)
72 #else /* !__LP64__ */ 73 #else /* !__LP64__ */
73 #define MAX_MEM (3584U*1024U*1024U) 74 #define MAX_MEM (3584U*1024U*1024U)
74 #endif /* !__LP64__ */ 75 #endif /* !__LP64__ */
75 76
76 static unsigned long mem_limit __read_mostly = MAX_MEM; 77 static unsigned long mem_limit __read_mostly = MAX_MEM;
77 78
78 static void __init mem_limit_func(void) 79 static void __init mem_limit_func(void)
79 { 80 {
80 char *cp, *end; 81 char *cp, *end;
81 unsigned long limit; 82 unsigned long limit;
82 extern char saved_command_line[]; 83 extern char saved_command_line[];
83 84
84 /* We need this before __setup() functions are called */ 85 /* We need this before __setup() functions are called */
85 86
86 limit = MAX_MEM; 87 limit = MAX_MEM;
87 for (cp = saved_command_line; *cp; ) { 88 for (cp = saved_command_line; *cp; ) {
88 if (memcmp(cp, "mem=", 4) == 0) { 89 if (memcmp(cp, "mem=", 4) == 0) {
89 cp += 4; 90 cp += 4;
90 limit = memparse(cp, &end); 91 limit = memparse(cp, &end);
91 if (end != cp) 92 if (end != cp)
92 break; 93 break;
93 cp = end; 94 cp = end;
94 } else { 95 } else {
95 while (*cp != ' ' && *cp) 96 while (*cp != ' ' && *cp)
96 ++cp; 97 ++cp;
97 while (*cp == ' ') 98 while (*cp == ' ')
98 ++cp; 99 ++cp;
99 } 100 }
100 } 101 }
101 102
102 if (limit < mem_limit) 103 if (limit < mem_limit)
103 mem_limit = limit; 104 mem_limit = limit;
104 } 105 }
105 106
106 #define MAX_GAP (0x40000000UL >> PAGE_SHIFT) 107 #define MAX_GAP (0x40000000UL >> PAGE_SHIFT)
107 108
108 static void __init setup_bootmem(void) 109 static void __init setup_bootmem(void)
109 { 110 {
110 unsigned long bootmap_size; 111 unsigned long bootmap_size;
111 unsigned long mem_max; 112 unsigned long mem_max;
112 unsigned long bootmap_pages; 113 unsigned long bootmap_pages;
113 unsigned long bootmap_start_pfn; 114 unsigned long bootmap_start_pfn;
114 unsigned long bootmap_pfn; 115 unsigned long bootmap_pfn;
115 #ifndef CONFIG_DISCONTIGMEM 116 #ifndef CONFIG_DISCONTIGMEM
116 physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1]; 117 physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1];
117 int npmem_holes; 118 int npmem_holes;
118 #endif 119 #endif
119 int i, sysram_resource_count; 120 int i, sysram_resource_count;
120 121
121 disable_sr_hashing(); /* Turn off space register hashing */ 122 disable_sr_hashing(); /* Turn off space register hashing */
122 123
123 /* 124 /*
124 * Sort the ranges. Since the number of ranges is typically 125 * Sort the ranges. Since the number of ranges is typically
125 * small, and performance is not an issue here, just do 126 * small, and performance is not an issue here, just do
126 * a simple insertion sort. 127 * a simple insertion sort.
127 */ 128 */
128 129
129 for (i = 1; i < npmem_ranges; i++) { 130 for (i = 1; i < npmem_ranges; i++) {
130 int j; 131 int j;
131 132
132 for (j = i; j > 0; j--) { 133 for (j = i; j > 0; j--) {
133 unsigned long tmp; 134 unsigned long tmp;
134 135
135 if (pmem_ranges[j-1].start_pfn < 136 if (pmem_ranges[j-1].start_pfn <
136 pmem_ranges[j].start_pfn) { 137 pmem_ranges[j].start_pfn) {
137 138
138 break; 139 break;
139 } 140 }
140 tmp = pmem_ranges[j-1].start_pfn; 141 tmp = pmem_ranges[j-1].start_pfn;
141 pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn; 142 pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn;
142 pmem_ranges[j].start_pfn = tmp; 143 pmem_ranges[j].start_pfn = tmp;
143 tmp = pmem_ranges[j-1].pages; 144 tmp = pmem_ranges[j-1].pages;
144 pmem_ranges[j-1].pages = pmem_ranges[j].pages; 145 pmem_ranges[j-1].pages = pmem_ranges[j].pages;
145 pmem_ranges[j].pages = tmp; 146 pmem_ranges[j].pages = tmp;
146 } 147 }
147 } 148 }
148 149
149 #ifndef CONFIG_DISCONTIGMEM 150 #ifndef CONFIG_DISCONTIGMEM
150 /* 151 /*
151 * Throw out ranges that are too far apart (controlled by 152 * Throw out ranges that are too far apart (controlled by
152 * MAX_GAP). 153 * MAX_GAP).
153 */ 154 */
154 155
155 for (i = 1; i < npmem_ranges; i++) { 156 for (i = 1; i < npmem_ranges; i++) {
156 if (pmem_ranges[i].start_pfn - 157 if (pmem_ranges[i].start_pfn -
157 (pmem_ranges[i-1].start_pfn + 158 (pmem_ranges[i-1].start_pfn +
158 pmem_ranges[i-1].pages) > MAX_GAP) { 159 pmem_ranges[i-1].pages) > MAX_GAP) {
159 npmem_ranges = i; 160 npmem_ranges = i;
160 printk("Large gap in memory detected (%ld pages). " 161 printk("Large gap in memory detected (%ld pages). "
161 "Consider turning on CONFIG_DISCONTIGMEM\n", 162 "Consider turning on CONFIG_DISCONTIGMEM\n",
162 pmem_ranges[i].start_pfn - 163 pmem_ranges[i].start_pfn -
163 (pmem_ranges[i-1].start_pfn + 164 (pmem_ranges[i-1].start_pfn +
164 pmem_ranges[i-1].pages)); 165 pmem_ranges[i-1].pages));
165 break; 166 break;
166 } 167 }
167 } 168 }
168 #endif 169 #endif
169 170
170 if (npmem_ranges > 1) { 171 if (npmem_ranges > 1) {
171 172
172 /* Print the memory ranges */ 173 /* Print the memory ranges */
173 174
174 printk(KERN_INFO "Memory Ranges:\n"); 175 printk(KERN_INFO "Memory Ranges:\n");
175 176
176 for (i = 0; i < npmem_ranges; i++) { 177 for (i = 0; i < npmem_ranges; i++) {
177 unsigned long start; 178 unsigned long start;
178 unsigned long size; 179 unsigned long size;
179 180
180 size = (pmem_ranges[i].pages << PAGE_SHIFT); 181 size = (pmem_ranges[i].pages << PAGE_SHIFT);
181 start = (pmem_ranges[i].start_pfn << PAGE_SHIFT); 182 start = (pmem_ranges[i].start_pfn << PAGE_SHIFT);
182 printk(KERN_INFO "%2d) Start 0x%016lx End 0x%016lx Size %6ld MB\n", 183 printk(KERN_INFO "%2d) Start 0x%016lx End 0x%016lx Size %6ld MB\n",
183 i,start, start + (size - 1), size >> 20); 184 i,start, start + (size - 1), size >> 20);
184 } 185 }
185 } 186 }
186 187
187 sysram_resource_count = npmem_ranges; 188 sysram_resource_count = npmem_ranges;
188 for (i = 0; i < sysram_resource_count; i++) { 189 for (i = 0; i < sysram_resource_count; i++) {
189 struct resource *res = &sysram_resources[i]; 190 struct resource *res = &sysram_resources[i];
190 res->name = "System RAM"; 191 res->name = "System RAM";
191 res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT; 192 res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT;
192 res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1; 193 res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1;
193 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 194 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
194 request_resource(&iomem_resource, res); 195 request_resource(&iomem_resource, res);
195 } 196 }
196 197
197 /* 198 /*
198 * For 32 bit kernels we limit the amount of memory we can 199 * For 32 bit kernels we limit the amount of memory we can
199 * support, in order to preserve enough kernel address space 200 * support, in order to preserve enough kernel address space
200 * for other purposes. For 64 bit kernels we don't normally 201 * for other purposes. For 64 bit kernels we don't normally
201 * limit the memory, but this mechanism can be used to 202 * limit the memory, but this mechanism can be used to
202 * artificially limit the amount of memory (and it is written 203 * artificially limit the amount of memory (and it is written
203 * to work with multiple memory ranges). 204 * to work with multiple memory ranges).
204 */ 205 */
205 206
206 mem_limit_func(); /* check for "mem=" argument */ 207 mem_limit_func(); /* check for "mem=" argument */
207 208
208 mem_max = 0; 209 mem_max = 0;
209 num_physpages = 0; 210 num_physpages = 0;
210 for (i = 0; i < npmem_ranges; i++) { 211 for (i = 0; i < npmem_ranges; i++) {
211 unsigned long rsize; 212 unsigned long rsize;
212 213
213 rsize = pmem_ranges[i].pages << PAGE_SHIFT; 214 rsize = pmem_ranges[i].pages << PAGE_SHIFT;
214 if ((mem_max + rsize) > mem_limit) { 215 if ((mem_max + rsize) > mem_limit) {
215 printk(KERN_WARNING "Memory truncated to %ld MB\n", mem_limit >> 20); 216 printk(KERN_WARNING "Memory truncated to %ld MB\n", mem_limit >> 20);
216 if (mem_max == mem_limit) 217 if (mem_max == mem_limit)
217 npmem_ranges = i; 218 npmem_ranges = i;
218 else { 219 else {
219 pmem_ranges[i].pages = (mem_limit >> PAGE_SHIFT) 220 pmem_ranges[i].pages = (mem_limit >> PAGE_SHIFT)
220 - (mem_max >> PAGE_SHIFT); 221 - (mem_max >> PAGE_SHIFT);
221 npmem_ranges = i + 1; 222 npmem_ranges = i + 1;
222 mem_max = mem_limit; 223 mem_max = mem_limit;
223 } 224 }
224 num_physpages += pmem_ranges[i].pages; 225 num_physpages += pmem_ranges[i].pages;
225 break; 226 break;
226 } 227 }
227 num_physpages += pmem_ranges[i].pages; 228 num_physpages += pmem_ranges[i].pages;
228 mem_max += rsize; 229 mem_max += rsize;
229 } 230 }
230 231
231 printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20); 232 printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20);
232 233
233 #ifndef CONFIG_DISCONTIGMEM 234 #ifndef CONFIG_DISCONTIGMEM
234 /* Merge the ranges, keeping track of the holes */ 235 /* Merge the ranges, keeping track of the holes */
235 236
236 { 237 {
237 unsigned long end_pfn; 238 unsigned long end_pfn;
238 unsigned long hole_pages; 239 unsigned long hole_pages;
239 240
240 npmem_holes = 0; 241 npmem_holes = 0;
241 end_pfn = pmem_ranges[0].start_pfn + pmem_ranges[0].pages; 242 end_pfn = pmem_ranges[0].start_pfn + pmem_ranges[0].pages;
242 for (i = 1; i < npmem_ranges; i++) { 243 for (i = 1; i < npmem_ranges; i++) {
243 244
244 hole_pages = pmem_ranges[i].start_pfn - end_pfn; 245 hole_pages = pmem_ranges[i].start_pfn - end_pfn;
245 if (hole_pages) { 246 if (hole_pages) {
246 pmem_holes[npmem_holes].start_pfn = end_pfn; 247 pmem_holes[npmem_holes].start_pfn = end_pfn;
247 pmem_holes[npmem_holes++].pages = hole_pages; 248 pmem_holes[npmem_holes++].pages = hole_pages;
248 end_pfn += hole_pages; 249 end_pfn += hole_pages;
249 } 250 }
250 end_pfn += pmem_ranges[i].pages; 251 end_pfn += pmem_ranges[i].pages;
251 } 252 }
252 253
253 pmem_ranges[0].pages = end_pfn - pmem_ranges[0].start_pfn; 254 pmem_ranges[0].pages = end_pfn - pmem_ranges[0].start_pfn;
254 npmem_ranges = 1; 255 npmem_ranges = 1;
255 } 256 }
256 #endif 257 #endif
257 258
258 bootmap_pages = 0; 259 bootmap_pages = 0;
259 for (i = 0; i < npmem_ranges; i++) 260 for (i = 0; i < npmem_ranges; i++)
260 bootmap_pages += bootmem_bootmap_pages(pmem_ranges[i].pages); 261 bootmap_pages += bootmem_bootmap_pages(pmem_ranges[i].pages);
261 262
262 bootmap_start_pfn = PAGE_ALIGN(__pa((unsigned long) &_end)) >> PAGE_SHIFT; 263 bootmap_start_pfn = PAGE_ALIGN(__pa((unsigned long) &_end)) >> PAGE_SHIFT;
263 264
264 #ifdef CONFIG_DISCONTIGMEM 265 #ifdef CONFIG_DISCONTIGMEM
265 for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { 266 for (i = 0; i < MAX_PHYSMEM_RANGES; i++) {
266 memset(NODE_DATA(i), 0, sizeof(pg_data_t)); 267 memset(NODE_DATA(i), 0, sizeof(pg_data_t));
267 NODE_DATA(i)->bdata = &bmem_data[i]; 268 NODE_DATA(i)->bdata = &bmem_data[i];
268 } 269 }
269 memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); 270 memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
270 271
271 for (i = 0; i < npmem_ranges; i++) 272 for (i = 0; i < npmem_ranges; i++)
272 node_set_online(i); 273 node_set_online(i);
273 #endif 274 #endif
274 275
275 /* 276 /*
276 * Initialize and free the full range of memory in each range. 277 * Initialize and free the full range of memory in each range.
277 * Note that the only writing these routines do are to the bootmap, 278 * Note that the only writing these routines do are to the bootmap,
278 * and we've made sure to locate the bootmap properly so that they 279 * and we've made sure to locate the bootmap properly so that they
279 * won't be writing over anything important. 280 * won't be writing over anything important.
280 */ 281 */
281 282
282 bootmap_pfn = bootmap_start_pfn; 283 bootmap_pfn = bootmap_start_pfn;
283 max_pfn = 0; 284 max_pfn = 0;
284 for (i = 0; i < npmem_ranges; i++) { 285 for (i = 0; i < npmem_ranges; i++) {
285 unsigned long start_pfn; 286 unsigned long start_pfn;
286 unsigned long npages; 287 unsigned long npages;
287 288
288 start_pfn = pmem_ranges[i].start_pfn; 289 start_pfn = pmem_ranges[i].start_pfn;
289 npages = pmem_ranges[i].pages; 290 npages = pmem_ranges[i].pages;
290 291
291 bootmap_size = init_bootmem_node(NODE_DATA(i), 292 bootmap_size = init_bootmem_node(NODE_DATA(i),
292 bootmap_pfn, 293 bootmap_pfn,
293 start_pfn, 294 start_pfn,
294 (start_pfn + npages) ); 295 (start_pfn + npages) );
295 free_bootmem_node(NODE_DATA(i), 296 free_bootmem_node(NODE_DATA(i),
296 (start_pfn << PAGE_SHIFT), 297 (start_pfn << PAGE_SHIFT),
297 (npages << PAGE_SHIFT) ); 298 (npages << PAGE_SHIFT) );
298 bootmap_pfn += (bootmap_size + PAGE_SIZE - 1) >> PAGE_SHIFT; 299 bootmap_pfn += (bootmap_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
299 if ((start_pfn + npages) > max_pfn) 300 if ((start_pfn + npages) > max_pfn)
300 max_pfn = start_pfn + npages; 301 max_pfn = start_pfn + npages;
301 } 302 }
302 303
303 /* IOMMU is always used to access "high mem" on those boxes 304 /* IOMMU is always used to access "high mem" on those boxes
304 * that can support enough mem that a PCI device couldn't 305 * that can support enough mem that a PCI device couldn't
305 * directly DMA to any physical addresses. 306 * directly DMA to any physical addresses.
306 * ISA DMA support will need to revisit this. 307 * ISA DMA support will need to revisit this.
307 */ 308 */
308 max_low_pfn = max_pfn; 309 max_low_pfn = max_pfn;
309 310
310 if ((bootmap_pfn - bootmap_start_pfn) != bootmap_pages) { 311 if ((bootmap_pfn - bootmap_start_pfn) != bootmap_pages) {
311 printk(KERN_WARNING "WARNING! bootmap sizing is messed up!\n"); 312 printk(KERN_WARNING "WARNING! bootmap sizing is messed up!\n");
312 BUG(); 313 BUG();
313 } 314 }
314 315
315 /* reserve PAGE0 pdc memory, kernel text/data/bss & bootmap */ 316 /* reserve PAGE0 pdc memory, kernel text/data/bss & bootmap */
316 317
317 #define PDC_CONSOLE_IO_IODC_SIZE 32768 318 #define PDC_CONSOLE_IO_IODC_SIZE 32768
318 319
319 reserve_bootmem_node(NODE_DATA(0), 0UL, 320 reserve_bootmem_node(NODE_DATA(0), 0UL,
320 (unsigned long)(PAGE0->mem_free + PDC_CONSOLE_IO_IODC_SIZE)); 321 (unsigned long)(PAGE0->mem_free + PDC_CONSOLE_IO_IODC_SIZE));
321 reserve_bootmem_node(NODE_DATA(0),__pa((unsigned long)&_text), 322 reserve_bootmem_node(NODE_DATA(0),__pa((unsigned long)&_text),
322 (unsigned long)(&_end - &_text)); 323 (unsigned long)(&_end - &_text));
323 reserve_bootmem_node(NODE_DATA(0), (bootmap_start_pfn << PAGE_SHIFT), 324 reserve_bootmem_node(NODE_DATA(0), (bootmap_start_pfn << PAGE_SHIFT),
324 ((bootmap_pfn - bootmap_start_pfn) << PAGE_SHIFT)); 325 ((bootmap_pfn - bootmap_start_pfn) << PAGE_SHIFT));
325 326
326 #ifndef CONFIG_DISCONTIGMEM 327 #ifndef CONFIG_DISCONTIGMEM
327 328
328 /* reserve the holes */ 329 /* reserve the holes */
329 330
330 for (i = 0; i < npmem_holes; i++) { 331 for (i = 0; i < npmem_holes; i++) {
331 reserve_bootmem_node(NODE_DATA(0), 332 reserve_bootmem_node(NODE_DATA(0),
332 (pmem_holes[i].start_pfn << PAGE_SHIFT), 333 (pmem_holes[i].start_pfn << PAGE_SHIFT),
333 (pmem_holes[i].pages << PAGE_SHIFT)); 334 (pmem_holes[i].pages << PAGE_SHIFT));
334 } 335 }
335 #endif 336 #endif
336 337
337 #ifdef CONFIG_BLK_DEV_INITRD 338 #ifdef CONFIG_BLK_DEV_INITRD
338 if (initrd_start) { 339 if (initrd_start) {
339 printk(KERN_INFO "initrd: %08lx-%08lx\n", initrd_start, initrd_end); 340 printk(KERN_INFO "initrd: %08lx-%08lx\n", initrd_start, initrd_end);
340 if (__pa(initrd_start) < mem_max) { 341 if (__pa(initrd_start) < mem_max) {
341 unsigned long initrd_reserve; 342 unsigned long initrd_reserve;
342 343
343 if (__pa(initrd_end) > mem_max) { 344 if (__pa(initrd_end) > mem_max) {
344 initrd_reserve = mem_max - __pa(initrd_start); 345 initrd_reserve = mem_max - __pa(initrd_start);
345 } else { 346 } else {
346 initrd_reserve = initrd_end - initrd_start; 347 initrd_reserve = initrd_end - initrd_start;
347 } 348 }
348 initrd_below_start_ok = 1; 349 initrd_below_start_ok = 1;
349 printk(KERN_INFO "initrd: reserving %08lx-%08lx (mem_max %08lx)\n", __pa(initrd_start), __pa(initrd_start) + initrd_reserve, mem_max); 350 printk(KERN_INFO "initrd: reserving %08lx-%08lx (mem_max %08lx)\n", __pa(initrd_start), __pa(initrd_start) + initrd_reserve, mem_max);
350 351
351 reserve_bootmem_node(NODE_DATA(0),__pa(initrd_start), initrd_reserve); 352 reserve_bootmem_node(NODE_DATA(0),__pa(initrd_start), initrd_reserve);
352 } 353 }
353 } 354 }
354 #endif 355 #endif
355 356
356 data_resource.start = virt_to_phys(&data_start); 357 data_resource.start = virt_to_phys(&data_start);
357 data_resource.end = virt_to_phys(&_end)-1; 358 data_resource.end = virt_to_phys(&_end)-1;
358 code_resource.start = virt_to_phys(&_text); 359 code_resource.start = virt_to_phys(&_text);
359 code_resource.end = virt_to_phys(&data_start)-1; 360 code_resource.end = virt_to_phys(&data_start)-1;
360 361
361 /* We don't know which region the kernel will be in, so try 362 /* We don't know which region the kernel will be in, so try
362 * all of them. 363 * all of them.
363 */ 364 */
364 for (i = 0; i < sysram_resource_count; i++) { 365 for (i = 0; i < sysram_resource_count; i++) {
365 struct resource *res = &sysram_resources[i]; 366 struct resource *res = &sysram_resources[i];
366 request_resource(res, &code_resource); 367 request_resource(res, &code_resource);
367 request_resource(res, &data_resource); 368 request_resource(res, &data_resource);
368 } 369 }
369 request_resource(&sysram_resources[0], &pdcdata_resource); 370 request_resource(&sysram_resources[0], &pdcdata_resource);
370 } 371 }
371 372
372 void free_initmem(void) 373 void free_initmem(void)
373 { 374 {
374 unsigned long addr, init_begin, init_end; 375 unsigned long addr, init_begin, init_end;
375 376
376 printk(KERN_INFO "Freeing unused kernel memory: "); 377 printk(KERN_INFO "Freeing unused kernel memory: ");
377 378
378 #ifdef CONFIG_DEBUG_KERNEL 379 #ifdef CONFIG_DEBUG_KERNEL
379 /* Attempt to catch anyone trying to execute code here 380 /* Attempt to catch anyone trying to execute code here
380 * by filling the page with BRK insns. 381 * by filling the page with BRK insns.
381 * 382 *
382 * If we disable interrupts for all CPUs, then IPI stops working. 383 * If we disable interrupts for all CPUs, then IPI stops working.
383 * Kinda breaks the global cache flushing. 384 * Kinda breaks the global cache flushing.
384 */ 385 */
385 local_irq_disable(); 386 local_irq_disable();
386 387
387 memset(&__init_begin, 0x00, 388 memset(&__init_begin, 0x00,
388 (unsigned long)&__init_end - (unsigned long)&__init_begin); 389 (unsigned long)&__init_end - (unsigned long)&__init_begin);
389 390
390 flush_data_cache(); 391 flush_data_cache();
391 asm volatile("sync" : : ); 392 asm volatile("sync" : : );
392 flush_icache_range((unsigned long)&__init_begin, (unsigned long)&__init_end); 393 flush_icache_range((unsigned long)&__init_begin, (unsigned long)&__init_end);
393 asm volatile("sync" : : ); 394 asm volatile("sync" : : );
394 395
395 local_irq_enable(); 396 local_irq_enable();
396 #endif 397 #endif
397 398
398 /* align __init_begin and __init_end to page size, 399 /* align __init_begin and __init_end to page size,
399 ignoring linker script where we might have tried to save RAM */ 400 ignoring linker script where we might have tried to save RAM */
400 init_begin = PAGE_ALIGN((unsigned long)(&__init_begin)); 401 init_begin = PAGE_ALIGN((unsigned long)(&__init_begin));
401 init_end = PAGE_ALIGN((unsigned long)(&__init_end)); 402 init_end = PAGE_ALIGN((unsigned long)(&__init_end));
402 for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) { 403 for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) {
403 ClearPageReserved(virt_to_page(addr)); 404 ClearPageReserved(virt_to_page(addr));
404 init_page_count(virt_to_page(addr)); 405 init_page_count(virt_to_page(addr));
405 free_page(addr); 406 free_page(addr);
406 num_physpages++; 407 num_physpages++;
407 totalram_pages++; 408 totalram_pages++;
408 } 409 }
409 410
410 /* set up a new led state on systems shipped LED State panel */ 411 /* set up a new led state on systems shipped LED State panel */
411 pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE); 412 pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE);
412 413
413 printk("%luk freed\n", (init_end - init_begin) >> 10); 414 printk("%luk freed\n", (init_end - init_begin) >> 10);
414 } 415 }
415 416
416 417
417 #ifdef CONFIG_DEBUG_RODATA 418 #ifdef CONFIG_DEBUG_RODATA
418 void mark_rodata_ro(void) 419 void mark_rodata_ro(void)
419 { 420 {
420 extern char __start_rodata, __end_rodata;
421 /* rodata memory was already mapped with KERNEL_RO access rights by 421 /* rodata memory was already mapped with KERNEL_RO access rights by
422 pagetable_init() and map_pages(). No need to do additional stuff here */ 422 pagetable_init() and map_pages(). No need to do additional stuff here */
423 printk (KERN_INFO "Write protecting the kernel read-only data: %luk\n", 423 printk (KERN_INFO "Write protecting the kernel read-only data: %luk\n",
424 (unsigned long)(&__end_rodata - &__start_rodata) >> 10); 424 (unsigned long)(__end_rodata - __start_rodata) >> 10);
425 } 425 }
426 #endif 426 #endif
427 427
428 428
429 /* 429 /*
430 * Just an arbitrary offset to serve as a "hole" between mapping areas 430 * Just an arbitrary offset to serve as a "hole" between mapping areas
431 * (between top of physical memory and a potential pcxl dma mapping 431 * (between top of physical memory and a potential pcxl dma mapping
432 * area, and below the vmalloc mapping area). 432 * area, and below the vmalloc mapping area).
433 * 433 *
434 * The current 32K value just means that there will be a 32K "hole" 434 * The current 32K value just means that there will be a 32K "hole"
435 * between mapping areas. That means that any out-of-bounds memory 435 * between mapping areas. That means that any out-of-bounds memory
436 * accesses will hopefully be caught. The vmalloc() routines leaves 436 * accesses will hopefully be caught. The vmalloc() routines leaves
437 * a hole of 4kB between each vmalloced area for the same reason. 437 * a hole of 4kB between each vmalloced area for the same reason.
438 */ 438 */
439 439
440 /* Leave room for gateway page expansion */ 440 /* Leave room for gateway page expansion */
441 #if KERNEL_MAP_START < GATEWAY_PAGE_SIZE 441 #if KERNEL_MAP_START < GATEWAY_PAGE_SIZE
442 #error KERNEL_MAP_START is in gateway reserved region 442 #error KERNEL_MAP_START is in gateway reserved region
443 #endif 443 #endif
444 #define MAP_START (KERNEL_MAP_START) 444 #define MAP_START (KERNEL_MAP_START)
445 445
446 #define VM_MAP_OFFSET (32*1024) 446 #define VM_MAP_OFFSET (32*1024)
447 #define SET_MAP_OFFSET(x) ((void *)(((unsigned long)(x) + VM_MAP_OFFSET) \ 447 #define SET_MAP_OFFSET(x) ((void *)(((unsigned long)(x) + VM_MAP_OFFSET) \
448 & ~(VM_MAP_OFFSET-1))) 448 & ~(VM_MAP_OFFSET-1)))
449 449
450 void *vmalloc_start __read_mostly; 450 void *vmalloc_start __read_mostly;
451 EXPORT_SYMBOL(vmalloc_start); 451 EXPORT_SYMBOL(vmalloc_start);
452 452
453 #ifdef CONFIG_PA11 453 #ifdef CONFIG_PA11
454 unsigned long pcxl_dma_start __read_mostly; 454 unsigned long pcxl_dma_start __read_mostly;
455 #endif 455 #endif
456 456
457 void __init mem_init(void) 457 void __init mem_init(void)
458 { 458 {
459 high_memory = __va((max_pfn << PAGE_SHIFT)); 459 high_memory = __va((max_pfn << PAGE_SHIFT));
460 460
461 #ifndef CONFIG_DISCONTIGMEM 461 #ifndef CONFIG_DISCONTIGMEM
462 max_mapnr = page_to_pfn(virt_to_page(high_memory - 1)) + 1; 462 max_mapnr = page_to_pfn(virt_to_page(high_memory - 1)) + 1;
463 totalram_pages += free_all_bootmem(); 463 totalram_pages += free_all_bootmem();
464 #else 464 #else
465 { 465 {
466 int i; 466 int i;
467 467
468 for (i = 0; i < npmem_ranges; i++) 468 for (i = 0; i < npmem_ranges; i++)
469 totalram_pages += free_all_bootmem_node(NODE_DATA(i)); 469 totalram_pages += free_all_bootmem_node(NODE_DATA(i));
470 } 470 }
471 #endif 471 #endif
472 472
473 printk(KERN_INFO "Memory: %luk available\n", num_physpages << (PAGE_SHIFT-10)); 473 printk(KERN_INFO "Memory: %luk available\n", num_physpages << (PAGE_SHIFT-10));
474 474
475 #ifdef CONFIG_PA11 475 #ifdef CONFIG_PA11
476 if (hppa_dma_ops == &pcxl_dma_ops) { 476 if (hppa_dma_ops == &pcxl_dma_ops) {
477 pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START); 477 pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START);
478 vmalloc_start = SET_MAP_OFFSET(pcxl_dma_start + PCXL_DMA_MAP_SIZE); 478 vmalloc_start = SET_MAP_OFFSET(pcxl_dma_start + PCXL_DMA_MAP_SIZE);
479 } else { 479 } else {
480 pcxl_dma_start = 0; 480 pcxl_dma_start = 0;
481 vmalloc_start = SET_MAP_OFFSET(MAP_START); 481 vmalloc_start = SET_MAP_OFFSET(MAP_START);
482 } 482 }
483 #else 483 #else
484 vmalloc_start = SET_MAP_OFFSET(MAP_START); 484 vmalloc_start = SET_MAP_OFFSET(MAP_START);
485 #endif 485 #endif
486 486
487 } 487 }
488 488
489 unsigned long *empty_zero_page __read_mostly; 489 unsigned long *empty_zero_page __read_mostly;
490 490
491 void show_mem(void) 491 void show_mem(void)
492 { 492 {
493 int i,free = 0,total = 0,reserved = 0; 493 int i,free = 0,total = 0,reserved = 0;
494 int shared = 0, cached = 0; 494 int shared = 0, cached = 0;
495 495
496 printk(KERN_INFO "Mem-info:\n"); 496 printk(KERN_INFO "Mem-info:\n");
497 show_free_areas(); 497 show_free_areas();
498 printk(KERN_INFO "Free swap: %6ldkB\n", 498 printk(KERN_INFO "Free swap: %6ldkB\n",
499 nr_swap_pages<<(PAGE_SHIFT-10)); 499 nr_swap_pages<<(PAGE_SHIFT-10));
500 #ifndef CONFIG_DISCONTIGMEM 500 #ifndef CONFIG_DISCONTIGMEM
501 i = max_mapnr; 501 i = max_mapnr;
502 while (i-- > 0) { 502 while (i-- > 0) {
503 total++; 503 total++;
504 if (PageReserved(mem_map+i)) 504 if (PageReserved(mem_map+i))
505 reserved++; 505 reserved++;
506 else if (PageSwapCache(mem_map+i)) 506 else if (PageSwapCache(mem_map+i))
507 cached++; 507 cached++;
508 else if (!page_count(&mem_map[i])) 508 else if (!page_count(&mem_map[i]))
509 free++; 509 free++;
510 else 510 else
511 shared += page_count(&mem_map[i]) - 1; 511 shared += page_count(&mem_map[i]) - 1;
512 } 512 }
513 #else 513 #else
514 for (i = 0; i < npmem_ranges; i++) { 514 for (i = 0; i < npmem_ranges; i++) {
515 int j; 515 int j;
516 516
517 for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { 517 for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
518 struct page *p; 518 struct page *p;
519 unsigned long flags; 519 unsigned long flags;
520 520
521 pgdat_resize_lock(NODE_DATA(i), &flags); 521 pgdat_resize_lock(NODE_DATA(i), &flags);
522 p = nid_page_nr(i, j) - node_start_pfn(i); 522 p = nid_page_nr(i, j) - node_start_pfn(i);
523 523
524 total++; 524 total++;
525 if (PageReserved(p)) 525 if (PageReserved(p))
526 reserved++; 526 reserved++;
527 else if (PageSwapCache(p)) 527 else if (PageSwapCache(p))
528 cached++; 528 cached++;
529 else if (!page_count(p)) 529 else if (!page_count(p))
530 free++; 530 free++;
531 else 531 else
532 shared += page_count(p) - 1; 532 shared += page_count(p) - 1;
533 pgdat_resize_unlock(NODE_DATA(i), &flags); 533 pgdat_resize_unlock(NODE_DATA(i), &flags);
534 } 534 }
535 } 535 }
536 #endif 536 #endif
537 printk(KERN_INFO "%d pages of RAM\n", total); 537 printk(KERN_INFO "%d pages of RAM\n", total);
538 printk(KERN_INFO "%d reserved pages\n", reserved); 538 printk(KERN_INFO "%d reserved pages\n", reserved);
539 printk(KERN_INFO "%d pages shared\n", shared); 539 printk(KERN_INFO "%d pages shared\n", shared);
540 printk(KERN_INFO "%d pages swap cached\n", cached); 540 printk(KERN_INFO "%d pages swap cached\n", cached);
541 541
542 542
543 #ifdef CONFIG_DISCONTIGMEM 543 #ifdef CONFIG_DISCONTIGMEM
544 { 544 {
545 struct zonelist *zl; 545 struct zonelist *zl;
546 int i, j, k; 546 int i, j, k;
547 547
548 for (i = 0; i < npmem_ranges; i++) { 548 for (i = 0; i < npmem_ranges; i++) {
549 for (j = 0; j < MAX_NR_ZONES; j++) { 549 for (j = 0; j < MAX_NR_ZONES; j++) {
550 zl = NODE_DATA(i)->node_zonelists + j; 550 zl = NODE_DATA(i)->node_zonelists + j;
551 551
552 printk("Zone list for zone %d on node %d: ", j, i); 552 printk("Zone list for zone %d on node %d: ", j, i);
553 for (k = 0; zl->zones[k] != NULL; k++) 553 for (k = 0; zl->zones[k] != NULL; k++)
554 printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name); 554 printk("[%d/%s] ", zl->zones[k]->zone_pgdat->node_id, zl->zones[k]->name);
555 printk("\n"); 555 printk("\n");
556 } 556 }
557 } 557 }
558 } 558 }
559 #endif 559 #endif
560 } 560 }
561 561
562 562
563 static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot) 563 static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot)
564 { 564 {
565 pgd_t *pg_dir; 565 pgd_t *pg_dir;
566 pmd_t *pmd; 566 pmd_t *pmd;
567 pte_t *pg_table; 567 pte_t *pg_table;
568 unsigned long end_paddr; 568 unsigned long end_paddr;
569 unsigned long start_pmd; 569 unsigned long start_pmd;
570 unsigned long start_pte; 570 unsigned long start_pte;
571 unsigned long tmp1; 571 unsigned long tmp1;
572 unsigned long tmp2; 572 unsigned long tmp2;
573 unsigned long address; 573 unsigned long address;
574 unsigned long ro_start; 574 unsigned long ro_start;
575 unsigned long ro_end; 575 unsigned long ro_end;
576 unsigned long fv_addr; 576 unsigned long fv_addr;
577 unsigned long gw_addr; 577 unsigned long gw_addr;
578 extern const unsigned long fault_vector_20; 578 extern const unsigned long fault_vector_20;
579 extern void * const linux_gateway_page; 579 extern void * const linux_gateway_page;
580 580
581 ro_start = __pa((unsigned long)&_text); 581 ro_start = __pa((unsigned long)&_text);
582 ro_end = __pa((unsigned long)&data_start); 582 ro_end = __pa((unsigned long)&data_start);
583 fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; 583 fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
584 gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; 584 gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
585 585
586 end_paddr = start_paddr + size; 586 end_paddr = start_paddr + size;
587 587
588 pg_dir = pgd_offset_k(start_vaddr); 588 pg_dir = pgd_offset_k(start_vaddr);
589 589
590 #if PTRS_PER_PMD == 1 590 #if PTRS_PER_PMD == 1
591 start_pmd = 0; 591 start_pmd = 0;
592 #else 592 #else
593 start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); 593 start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
594 #endif 594 #endif
595 start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); 595 start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
596 596
597 address = start_paddr; 597 address = start_paddr;
598 while (address < end_paddr) { 598 while (address < end_paddr) {
599 #if PTRS_PER_PMD == 1 599 #if PTRS_PER_PMD == 1
600 pmd = (pmd_t *)__pa(pg_dir); 600 pmd = (pmd_t *)__pa(pg_dir);
601 #else 601 #else
602 pmd = (pmd_t *)pgd_address(*pg_dir); 602 pmd = (pmd_t *)pgd_address(*pg_dir);
603 603
604 /* 604 /*
605 * pmd is physical at this point 605 * pmd is physical at this point
606 */ 606 */
607 607
608 if (!pmd) { 608 if (!pmd) {
609 pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER); 609 pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER);
610 pmd = (pmd_t *) __pa(pmd); 610 pmd = (pmd_t *) __pa(pmd);
611 } 611 }
612 612
613 pgd_populate(NULL, pg_dir, __va(pmd)); 613 pgd_populate(NULL, pg_dir, __va(pmd));
614 #endif 614 #endif
615 pg_dir++; 615 pg_dir++;
616 616
617 /* now change pmd to kernel virtual addresses */ 617 /* now change pmd to kernel virtual addresses */
618 618
619 pmd = (pmd_t *)__va(pmd) + start_pmd; 619 pmd = (pmd_t *)__va(pmd) + start_pmd;
620 for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) { 620 for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) {
621 621
622 /* 622 /*
623 * pg_table is physical at this point 623 * pg_table is physical at this point
624 */ 624 */
625 625
626 pg_table = (pte_t *)pmd_address(*pmd); 626 pg_table = (pte_t *)pmd_address(*pmd);
627 if (!pg_table) { 627 if (!pg_table) {
628 pg_table = (pte_t *) 628 pg_table = (pte_t *)
629 alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE); 629 alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE);
630 pg_table = (pte_t *) __pa(pg_table); 630 pg_table = (pte_t *) __pa(pg_table);
631 } 631 }
632 632
633 pmd_populate_kernel(NULL, pmd, __va(pg_table)); 633 pmd_populate_kernel(NULL, pmd, __va(pg_table));
634 634
635 /* now change pg_table to kernel virtual addresses */ 635 /* now change pg_table to kernel virtual addresses */
636 636
637 pg_table = (pte_t *) __va(pg_table) + start_pte; 637 pg_table = (pte_t *) __va(pg_table) + start_pte;
638 for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) { 638 for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) {
639 pte_t pte; 639 pte_t pte;
640 640
641 /* 641 /*
642 * Map the fault vector writable so we can 642 * Map the fault vector writable so we can
643 * write the HPMC checksum. 643 * write the HPMC checksum.
644 */ 644 */
645 #if defined(CONFIG_PARISC_PAGE_SIZE_4KB) 645 #if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
646 if (address >= ro_start && address < ro_end 646 if (address >= ro_start && address < ro_end
647 && address != fv_addr 647 && address != fv_addr
648 && address != gw_addr) 648 && address != gw_addr)
649 pte = __mk_pte(address, PAGE_KERNEL_RO); 649 pte = __mk_pte(address, PAGE_KERNEL_RO);
650 else 650 else
651 #endif 651 #endif
652 pte = __mk_pte(address, pgprot); 652 pte = __mk_pte(address, pgprot);
653 653
654 if (address >= end_paddr) 654 if (address >= end_paddr)
655 pte_val(pte) = 0; 655 pte_val(pte) = 0;
656 656
657 set_pte(pg_table, pte); 657 set_pte(pg_table, pte);
658 658
659 address += PAGE_SIZE; 659 address += PAGE_SIZE;
660 } 660 }
661 start_pte = 0; 661 start_pte = 0;
662 662
663 if (address >= end_paddr) 663 if (address >= end_paddr)
664 break; 664 break;
665 } 665 }
666 start_pmd = 0; 666 start_pmd = 0;
667 } 667 }
668 } 668 }
669 669
670 /* 670 /*
671 * pagetable_init() sets up the page tables 671 * pagetable_init() sets up the page tables
672 * 672 *
673 * Note that gateway_init() places the Linux gateway page at page 0. 673 * Note that gateway_init() places the Linux gateway page at page 0.
674 * Since gateway pages cannot be dereferenced this has the desirable 674 * Since gateway pages cannot be dereferenced this has the desirable
675 * side effect of trapping those pesky NULL-reference errors in the 675 * side effect of trapping those pesky NULL-reference errors in the
676 * kernel. 676 * kernel.
677 */ 677 */
678 static void __init pagetable_init(void) 678 static void __init pagetable_init(void)
679 { 679 {
680 int range; 680 int range;
681 681
682 /* Map each physical memory range to its kernel vaddr */ 682 /* Map each physical memory range to its kernel vaddr */
683 683
684 for (range = 0; range < npmem_ranges; range++) { 684 for (range = 0; range < npmem_ranges; range++) {
685 unsigned long start_paddr; 685 unsigned long start_paddr;
686 unsigned long end_paddr; 686 unsigned long end_paddr;
687 unsigned long size; 687 unsigned long size;
688 688
689 start_paddr = pmem_ranges[range].start_pfn << PAGE_SHIFT; 689 start_paddr = pmem_ranges[range].start_pfn << PAGE_SHIFT;
690 end_paddr = start_paddr + (pmem_ranges[range].pages << PAGE_SHIFT); 690 end_paddr = start_paddr + (pmem_ranges[range].pages << PAGE_SHIFT);
691 size = pmem_ranges[range].pages << PAGE_SHIFT; 691 size = pmem_ranges[range].pages << PAGE_SHIFT;
692 692
693 map_pages((unsigned long)__va(start_paddr), start_paddr, 693 map_pages((unsigned long)__va(start_paddr), start_paddr,
694 size, PAGE_KERNEL); 694 size, PAGE_KERNEL);
695 } 695 }
696 696
697 #ifdef CONFIG_BLK_DEV_INITRD 697 #ifdef CONFIG_BLK_DEV_INITRD
698 if (initrd_end && initrd_end > mem_limit) { 698 if (initrd_end && initrd_end > mem_limit) {
699 printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end); 699 printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end);
700 map_pages(initrd_start, __pa(initrd_start), 700 map_pages(initrd_start, __pa(initrd_start),
701 initrd_end - initrd_start, PAGE_KERNEL); 701 initrd_end - initrd_start, PAGE_KERNEL);
702 } 702 }
703 #endif 703 #endif
704 704
705 empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); 705 empty_zero_page = alloc_bootmem_pages(PAGE_SIZE);
706 memset(empty_zero_page, 0, PAGE_SIZE); 706 memset(empty_zero_page, 0, PAGE_SIZE);
707 } 707 }
708 708
709 static void __init gateway_init(void) 709 static void __init gateway_init(void)
710 { 710 {
711 unsigned long linux_gateway_page_addr; 711 unsigned long linux_gateway_page_addr;
712 /* FIXME: This is 'const' in order to trick the compiler 712 /* FIXME: This is 'const' in order to trick the compiler
713 into not treating it as DP-relative data. */ 713 into not treating it as DP-relative data. */
714 extern void * const linux_gateway_page; 714 extern void * const linux_gateway_page;
715 715
716 linux_gateway_page_addr = LINUX_GATEWAY_ADDR & PAGE_MASK; 716 linux_gateway_page_addr = LINUX_GATEWAY_ADDR & PAGE_MASK;
717 717
718 /* 718 /*
719 * Setup Linux Gateway page. 719 * Setup Linux Gateway page.
720 * 720 *
721 * The Linux gateway page will reside in kernel space (on virtual 721 * The Linux gateway page will reside in kernel space (on virtual
722 * page 0), so it doesn't need to be aliased into user space. 722 * page 0), so it doesn't need to be aliased into user space.
723 */ 723 */
724 724
725 map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page), 725 map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page),
726 PAGE_SIZE, PAGE_GATEWAY); 726 PAGE_SIZE, PAGE_GATEWAY);
727 } 727 }
728 728
729 #ifdef CONFIG_HPUX 729 #ifdef CONFIG_HPUX
730 void 730 void
731 map_hpux_gateway_page(struct task_struct *tsk, struct mm_struct *mm) 731 map_hpux_gateway_page(struct task_struct *tsk, struct mm_struct *mm)
732 { 732 {
733 pgd_t *pg_dir; 733 pgd_t *pg_dir;
734 pmd_t *pmd; 734 pmd_t *pmd;
735 pte_t *pg_table; 735 pte_t *pg_table;
736 unsigned long start_pmd; 736 unsigned long start_pmd;
737 unsigned long start_pte; 737 unsigned long start_pte;
738 unsigned long address; 738 unsigned long address;
739 unsigned long hpux_gw_page_addr; 739 unsigned long hpux_gw_page_addr;
740 /* FIXME: This is 'const' in order to trick the compiler 740 /* FIXME: This is 'const' in order to trick the compiler
741 into not treating it as DP-relative data. */ 741 into not treating it as DP-relative data. */
742 extern void * const hpux_gateway_page; 742 extern void * const hpux_gateway_page;
743 743
744 hpux_gw_page_addr = HPUX_GATEWAY_ADDR & PAGE_MASK; 744 hpux_gw_page_addr = HPUX_GATEWAY_ADDR & PAGE_MASK;
745 745
746 /* 746 /*
747 * Setup HP-UX Gateway page. 747 * Setup HP-UX Gateway page.
748 * 748 *
749 * The HP-UX gateway page resides in the user address space, 749 * The HP-UX gateway page resides in the user address space,
750 * so it needs to be aliased into each process. 750 * so it needs to be aliased into each process.
751 */ 751 */
752 752
753 pg_dir = pgd_offset(mm,hpux_gw_page_addr); 753 pg_dir = pgd_offset(mm,hpux_gw_page_addr);
754 754
755 #if PTRS_PER_PMD == 1 755 #if PTRS_PER_PMD == 1
756 start_pmd = 0; 756 start_pmd = 0;
757 #else 757 #else
758 start_pmd = ((hpux_gw_page_addr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); 758 start_pmd = ((hpux_gw_page_addr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
759 #endif 759 #endif
760 start_pte = ((hpux_gw_page_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); 760 start_pte = ((hpux_gw_page_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
761 761
762 address = __pa(&hpux_gateway_page); 762 address = __pa(&hpux_gateway_page);
763 #if PTRS_PER_PMD == 1 763 #if PTRS_PER_PMD == 1
764 pmd = (pmd_t *)__pa(pg_dir); 764 pmd = (pmd_t *)__pa(pg_dir);
765 #else 765 #else
766 pmd = (pmd_t *) pgd_address(*pg_dir); 766 pmd = (pmd_t *) pgd_address(*pg_dir);
767 767
768 /* 768 /*
769 * pmd is physical at this point 769 * pmd is physical at this point
770 */ 770 */
771 771
772 if (!pmd) { 772 if (!pmd) {
773 pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL); 773 pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL);
774 pmd = (pmd_t *) __pa(pmd); 774 pmd = (pmd_t *) __pa(pmd);
775 } 775 }
776 776
777 __pgd_val_set(*pg_dir, PxD_FLAG_PRESENT | PxD_FLAG_VALID | (unsigned long) pmd); 777 __pgd_val_set(*pg_dir, PxD_FLAG_PRESENT | PxD_FLAG_VALID | (unsigned long) pmd);
778 #endif 778 #endif
779 /* now change pmd to kernel virtual addresses */ 779 /* now change pmd to kernel virtual addresses */
780 780
781 pmd = (pmd_t *)__va(pmd) + start_pmd; 781 pmd = (pmd_t *)__va(pmd) + start_pmd;
782 782
783 /* 783 /*
784 * pg_table is physical at this point 784 * pg_table is physical at this point
785 */ 785 */
786 786
787 pg_table = (pte_t *) pmd_address(*pmd); 787 pg_table = (pte_t *) pmd_address(*pmd);
788 if (!pg_table) 788 if (!pg_table)
789 pg_table = (pte_t *) __pa(get_zeroed_page(GFP_KERNEL)); 789 pg_table = (pte_t *) __pa(get_zeroed_page(GFP_KERNEL));
790 790
791 __pmd_val_set(*pmd, PxD_FLAG_PRESENT | PxD_FLAG_VALID | (unsigned long) pg_table); 791 __pmd_val_set(*pmd, PxD_FLAG_PRESENT | PxD_FLAG_VALID | (unsigned long) pg_table);
792 792
793 /* now change pg_table to kernel virtual addresses */ 793 /* now change pg_table to kernel virtual addresses */
794 794
795 pg_table = (pte_t *) __va(pg_table) + start_pte; 795 pg_table = (pte_t *) __va(pg_table) + start_pte;
796 set_pte(pg_table, __mk_pte(address, PAGE_GATEWAY)); 796 set_pte(pg_table, __mk_pte(address, PAGE_GATEWAY));
797 } 797 }
798 EXPORT_SYMBOL(map_hpux_gateway_page); 798 EXPORT_SYMBOL(map_hpux_gateway_page);
799 #endif 799 #endif
800 800
801 void __init paging_init(void) 801 void __init paging_init(void)
802 { 802 {
803 int i; 803 int i;
804 804
805 setup_bootmem(); 805 setup_bootmem();
806 pagetable_init(); 806 pagetable_init();
807 gateway_init(); 807 gateway_init();
808 flush_cache_all_local(); /* start with known state */ 808 flush_cache_all_local(); /* start with known state */
809 flush_tlb_all_local(NULL); 809 flush_tlb_all_local(NULL);
810 810
811 for (i = 0; i < npmem_ranges; i++) { 811 for (i = 0; i < npmem_ranges; i++) {
812 unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0 }; 812 unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0 };
813 813
814 /* We have an IOMMU, so all memory can go into a single 814 /* We have an IOMMU, so all memory can go into a single
815 ZONE_DMA zone. */ 815 ZONE_DMA zone. */
816 zones_size[ZONE_DMA] = pmem_ranges[i].pages; 816 zones_size[ZONE_DMA] = pmem_ranges[i].pages;
817 817
818 #ifdef CONFIG_DISCONTIGMEM 818 #ifdef CONFIG_DISCONTIGMEM
819 /* Need to initialize the pfnnid_map before we can initialize 819 /* Need to initialize the pfnnid_map before we can initialize
820 the zone */ 820 the zone */
821 { 821 {
822 int j; 822 int j;
823 for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT); 823 for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT);
824 j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT); 824 j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT);
825 j++) { 825 j++) {
826 pfnnid_map[j] = i; 826 pfnnid_map[j] = i;
827 } 827 }
828 } 828 }
829 #endif 829 #endif
830 830
831 free_area_init_node(i, NODE_DATA(i), zones_size, 831 free_area_init_node(i, NODE_DATA(i), zones_size,
832 pmem_ranges[i].start_pfn, NULL); 832 pmem_ranges[i].start_pfn, NULL);
833 } 833 }
834 } 834 }
835 835
836 #ifdef CONFIG_PA20 836 #ifdef CONFIG_PA20
837 837
838 /* 838 /*
839 * Currently, all PA20 chips have 18 bit protection id's, which is the 839 * Currently, all PA20 chips have 18 bit protection id's, which is the
840 * limiting factor (space ids are 32 bits). 840 * limiting factor (space ids are 32 bits).
841 */ 841 */
842 842
843 #define NR_SPACE_IDS 262144 843 #define NR_SPACE_IDS 262144
844 844
845 #else 845 #else
846 846
847 /* 847 /*
848 * Currently we have a one-to-one relationship between space id's and 848 * Currently we have a one-to-one relationship between space id's and
849 * protection id's. Older parisc chips (PCXS, PCXT, PCXL, PCXL2) only 849 * protection id's. Older parisc chips (PCXS, PCXT, PCXL, PCXL2) only
850 * support 15 bit protection id's, so that is the limiting factor. 850 * support 15 bit protection id's, so that is the limiting factor.
851 * PCXT' has 18 bit protection id's, but only 16 bit spaceids, so it's 851 * PCXT' has 18 bit protection id's, but only 16 bit spaceids, so it's
852 * probably not worth the effort for a special case here. 852 * probably not worth the effort for a special case here.
853 */ 853 */
854 854
855 #define NR_SPACE_IDS 32768 855 #define NR_SPACE_IDS 32768
856 856
857 #endif /* !CONFIG_PA20 */ 857 #endif /* !CONFIG_PA20 */
858 858
859 #define RECYCLE_THRESHOLD (NR_SPACE_IDS / 2) 859 #define RECYCLE_THRESHOLD (NR_SPACE_IDS / 2)
860 #define SID_ARRAY_SIZE (NR_SPACE_IDS / (8 * sizeof(long))) 860 #define SID_ARRAY_SIZE (NR_SPACE_IDS / (8 * sizeof(long)))
861 861
862 static unsigned long space_id[SID_ARRAY_SIZE] = { 1 }; /* disallow space 0 */ 862 static unsigned long space_id[SID_ARRAY_SIZE] = { 1 }; /* disallow space 0 */
863 static unsigned long dirty_space_id[SID_ARRAY_SIZE]; 863 static unsigned long dirty_space_id[SID_ARRAY_SIZE];
864 static unsigned long space_id_index; 864 static unsigned long space_id_index;
865 static unsigned long free_space_ids = NR_SPACE_IDS - 1; 865 static unsigned long free_space_ids = NR_SPACE_IDS - 1;
866 static unsigned long dirty_space_ids = 0; 866 static unsigned long dirty_space_ids = 0;
867 867
868 static DEFINE_SPINLOCK(sid_lock); 868 static DEFINE_SPINLOCK(sid_lock);
869 869
870 unsigned long alloc_sid(void) 870 unsigned long alloc_sid(void)
871 { 871 {
872 unsigned long index; 872 unsigned long index;
873 873
874 spin_lock(&sid_lock); 874 spin_lock(&sid_lock);
875 875
876 if (free_space_ids == 0) { 876 if (free_space_ids == 0) {
877 if (dirty_space_ids != 0) { 877 if (dirty_space_ids != 0) {
878 spin_unlock(&sid_lock); 878 spin_unlock(&sid_lock);
879 flush_tlb_all(); /* flush_tlb_all() calls recycle_sids() */ 879 flush_tlb_all(); /* flush_tlb_all() calls recycle_sids() */
880 spin_lock(&sid_lock); 880 spin_lock(&sid_lock);
881 } 881 }
882 BUG_ON(free_space_ids == 0); 882 BUG_ON(free_space_ids == 0);
883 } 883 }
884 884
885 free_space_ids--; 885 free_space_ids--;
886 886
887 index = find_next_zero_bit(space_id, NR_SPACE_IDS, space_id_index); 887 index = find_next_zero_bit(space_id, NR_SPACE_IDS, space_id_index);
888 space_id[index >> SHIFT_PER_LONG] |= (1L << (index & (BITS_PER_LONG - 1))); 888 space_id[index >> SHIFT_PER_LONG] |= (1L << (index & (BITS_PER_LONG - 1)));
889 space_id_index = index; 889 space_id_index = index;
890 890
891 spin_unlock(&sid_lock); 891 spin_unlock(&sid_lock);
892 892
893 return index << SPACEID_SHIFT; 893 return index << SPACEID_SHIFT;
894 } 894 }
895 895
896 void free_sid(unsigned long spaceid) 896 void free_sid(unsigned long spaceid)
897 { 897 {
898 unsigned long index = spaceid >> SPACEID_SHIFT; 898 unsigned long index = spaceid >> SPACEID_SHIFT;
899 unsigned long *dirty_space_offset; 899 unsigned long *dirty_space_offset;
900 900
901 dirty_space_offset = dirty_space_id + (index >> SHIFT_PER_LONG); 901 dirty_space_offset = dirty_space_id + (index >> SHIFT_PER_LONG);
902 index &= (BITS_PER_LONG - 1); 902 index &= (BITS_PER_LONG - 1);
903 903
904 spin_lock(&sid_lock); 904 spin_lock(&sid_lock);
905 905
906 BUG_ON(*dirty_space_offset & (1L << index)); /* attempt to free space id twice */ 906 BUG_ON(*dirty_space_offset & (1L << index)); /* attempt to free space id twice */
907 907
908 *dirty_space_offset |= (1L << index); 908 *dirty_space_offset |= (1L << index);
909 dirty_space_ids++; 909 dirty_space_ids++;
910 910
911 spin_unlock(&sid_lock); 911 spin_unlock(&sid_lock);
912 } 912 }
913 913
914 914
915 #ifdef CONFIG_SMP 915 #ifdef CONFIG_SMP
916 static void get_dirty_sids(unsigned long *ndirtyptr,unsigned long *dirty_array) 916 static void get_dirty_sids(unsigned long *ndirtyptr,unsigned long *dirty_array)
917 { 917 {
918 int i; 918 int i;
919 919
920 /* NOTE: sid_lock must be held upon entry */ 920 /* NOTE: sid_lock must be held upon entry */
921 921
922 *ndirtyptr = dirty_space_ids; 922 *ndirtyptr = dirty_space_ids;
923 if (dirty_space_ids != 0) { 923 if (dirty_space_ids != 0) {
924 for (i = 0; i < SID_ARRAY_SIZE; i++) { 924 for (i = 0; i < SID_ARRAY_SIZE; i++) {
925 dirty_array[i] = dirty_space_id[i]; 925 dirty_array[i] = dirty_space_id[i];
926 dirty_space_id[i] = 0; 926 dirty_space_id[i] = 0;
927 } 927 }
928 dirty_space_ids = 0; 928 dirty_space_ids = 0;
929 } 929 }
930 930
931 return; 931 return;
932 } 932 }
933 933
934 static void recycle_sids(unsigned long ndirty,unsigned long *dirty_array) 934 static void recycle_sids(unsigned long ndirty,unsigned long *dirty_array)
935 { 935 {
936 int i; 936 int i;
937 937
938 /* NOTE: sid_lock must be held upon entry */ 938 /* NOTE: sid_lock must be held upon entry */
939 939
940 if (ndirty != 0) { 940 if (ndirty != 0) {
941 for (i = 0; i < SID_ARRAY_SIZE; i++) { 941 for (i = 0; i < SID_ARRAY_SIZE; i++) {
942 space_id[i] ^= dirty_array[i]; 942 space_id[i] ^= dirty_array[i];
943 } 943 }
944 944
945 free_space_ids += ndirty; 945 free_space_ids += ndirty;
946 space_id_index = 0; 946 space_id_index = 0;
947 } 947 }
948 } 948 }
949 949
950 #else /* CONFIG_SMP */ 950 #else /* CONFIG_SMP */
951 951
952 static void recycle_sids(void) 952 static void recycle_sids(void)
953 { 953 {
954 int i; 954 int i;
955 955
956 /* NOTE: sid_lock must be held upon entry */ 956 /* NOTE: sid_lock must be held upon entry */
957 957
958 if (dirty_space_ids != 0) { 958 if (dirty_space_ids != 0) {
959 for (i = 0; i < SID_ARRAY_SIZE; i++) { 959 for (i = 0; i < SID_ARRAY_SIZE; i++) {
960 space_id[i] ^= dirty_space_id[i]; 960 space_id[i] ^= dirty_space_id[i];
961 dirty_space_id[i] = 0; 961 dirty_space_id[i] = 0;
962 } 962 }
963 963
964 free_space_ids += dirty_space_ids; 964 free_space_ids += dirty_space_ids;
965 dirty_space_ids = 0; 965 dirty_space_ids = 0;
966 space_id_index = 0; 966 space_id_index = 0;
967 } 967 }
968 } 968 }
969 #endif 969 #endif
970 970
971 /* 971 /*
972 * flush_tlb_all() calls recycle_sids(), since whenever the entire tlb is 972 * flush_tlb_all() calls recycle_sids(), since whenever the entire tlb is
973 * purged, we can safely reuse the space ids that were released but 973 * purged, we can safely reuse the space ids that were released but
974 * not flushed from the tlb. 974 * not flushed from the tlb.
975 */ 975 */
976 976
977 #ifdef CONFIG_SMP 977 #ifdef CONFIG_SMP
978 978
979 static unsigned long recycle_ndirty; 979 static unsigned long recycle_ndirty;
980 static unsigned long recycle_dirty_array[SID_ARRAY_SIZE]; 980 static unsigned long recycle_dirty_array[SID_ARRAY_SIZE];
981 static unsigned int recycle_inuse; 981 static unsigned int recycle_inuse;
982 982
983 void flush_tlb_all(void) 983 void flush_tlb_all(void)
984 { 984 {
985 int do_recycle; 985 int do_recycle;
986 986
987 do_recycle = 0; 987 do_recycle = 0;
988 spin_lock(&sid_lock); 988 spin_lock(&sid_lock);
989 if (dirty_space_ids > RECYCLE_THRESHOLD) { 989 if (dirty_space_ids > RECYCLE_THRESHOLD) {
990 BUG_ON(recycle_inuse); /* FIXME: Use a semaphore/wait queue here */ 990 BUG_ON(recycle_inuse); /* FIXME: Use a semaphore/wait queue here */
991 get_dirty_sids(&recycle_ndirty,recycle_dirty_array); 991 get_dirty_sids(&recycle_ndirty,recycle_dirty_array);
992 recycle_inuse++; 992 recycle_inuse++;
993 do_recycle++; 993 do_recycle++;
994 } 994 }
995 spin_unlock(&sid_lock); 995 spin_unlock(&sid_lock);
996 on_each_cpu(flush_tlb_all_local, NULL, 1, 1); 996 on_each_cpu(flush_tlb_all_local, NULL, 1, 1);
997 if (do_recycle) { 997 if (do_recycle) {
998 spin_lock(&sid_lock); 998 spin_lock(&sid_lock);
999 recycle_sids(recycle_ndirty,recycle_dirty_array); 999 recycle_sids(recycle_ndirty,recycle_dirty_array);
1000 recycle_inuse = 0; 1000 recycle_inuse = 0;
1001 spin_unlock(&sid_lock); 1001 spin_unlock(&sid_lock);
1002 } 1002 }
1003 } 1003 }
1004 #else 1004 #else
1005 void flush_tlb_all(void) 1005 void flush_tlb_all(void)
1006 { 1006 {
1007 spin_lock(&sid_lock); 1007 spin_lock(&sid_lock);
1008 flush_tlb_all_local(NULL); 1008 flush_tlb_all_local(NULL);
1009 recycle_sids(); 1009 recycle_sids();
1010 spin_unlock(&sid_lock); 1010 spin_unlock(&sid_lock);
1011 } 1011 }
1012 #endif 1012 #endif
1013 1013
1014 #ifdef CONFIG_BLK_DEV_INITRD 1014 #ifdef CONFIG_BLK_DEV_INITRD
1015 void free_initrd_mem(unsigned long start, unsigned long end) 1015 void free_initrd_mem(unsigned long start, unsigned long end)
1016 { 1016 {
1017 if (start >= end) 1017 if (start >= end)
1018 return; 1018 return;
1019 printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); 1019 printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
1020 for (; start < end; start += PAGE_SIZE) { 1020 for (; start < end; start += PAGE_SIZE) {
1021 ClearPageReserved(virt_to_page(start)); 1021 ClearPageReserved(virt_to_page(start));
1022 init_page_count(virt_to_page(start)); 1022 init_page_count(virt_to_page(start));
1023 free_page(start); 1023 free_page(start);
1024 num_physpages++; 1024 num_physpages++;
1025 totalram_pages++; 1025 totalram_pages++;
1026 } 1026 }
1027 } 1027 }
1028 #endif 1028 #endif
arch/x86_64/mm/init.c
1 /* 1 /*
2 * linux/arch/x86_64/mm/init.c 2 * linux/arch/x86_64/mm/init.c
3 * 3 *
4 * Copyright (C) 1995 Linus Torvalds 4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> 6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */ 7 */
8 8
9 #include <linux/signal.h> 9 #include <linux/signal.h>
10 #include <linux/sched.h> 10 #include <linux/sched.h>
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 #include <linux/errno.h> 12 #include <linux/errno.h>
13 #include <linux/string.h> 13 #include <linux/string.h>
14 #include <linux/types.h> 14 #include <linux/types.h>
15 #include <linux/ptrace.h> 15 #include <linux/ptrace.h>
16 #include <linux/mman.h> 16 #include <linux/mman.h>
17 #include <linux/mm.h> 17 #include <linux/mm.h>
18 #include <linux/swap.h> 18 #include <linux/swap.h>
19 #include <linux/smp.h> 19 #include <linux/smp.h>
20 #include <linux/init.h> 20 #include <linux/init.h>
21 #include <linux/pagemap.h> 21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h> 22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h> 23 #include <linux/proc_fs.h>
24 #include <linux/pci.h> 24 #include <linux/pci.h>
25 #include <linux/poison.h> 25 #include <linux/poison.h>
26 #include <linux/dma-mapping.h> 26 #include <linux/dma-mapping.h>
27 #include <linux/module.h> 27 #include <linux/module.h>
28 #include <linux/memory_hotplug.h> 28 #include <linux/memory_hotplug.h>
29 29
30 #include <asm/processor.h> 30 #include <asm/processor.h>
31 #include <asm/system.h> 31 #include <asm/system.h>
32 #include <asm/uaccess.h> 32 #include <asm/uaccess.h>
33 #include <asm/pgtable.h> 33 #include <asm/pgtable.h>
34 #include <asm/pgalloc.h> 34 #include <asm/pgalloc.h>
35 #include <asm/dma.h> 35 #include <asm/dma.h>
36 #include <asm/fixmap.h> 36 #include <asm/fixmap.h>
37 #include <asm/e820.h> 37 #include <asm/e820.h>
38 #include <asm/apic.h> 38 #include <asm/apic.h>
39 #include <asm/tlb.h> 39 #include <asm/tlb.h>
40 #include <asm/mmu_context.h> 40 #include <asm/mmu_context.h>
41 #include <asm/proto.h> 41 #include <asm/proto.h>
42 #include <asm/smp.h> 42 #include <asm/smp.h>
43 #include <asm/sections.h> 43 #include <asm/sections.h>
44 44
45 #ifndef Dprintk 45 #ifndef Dprintk
46 #define Dprintk(x...) 46 #define Dprintk(x...)
47 #endif 47 #endif
48 48
49 struct dma_mapping_ops* dma_ops; 49 struct dma_mapping_ops* dma_ops;
50 EXPORT_SYMBOL(dma_ops); 50 EXPORT_SYMBOL(dma_ops);
51 51
52 static unsigned long dma_reserve __initdata; 52 static unsigned long dma_reserve __initdata;
53 53
54 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 54 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
55 55
56 /* 56 /*
57 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 57 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
58 * physical space so we can cache the place of the first one and move 58 * physical space so we can cache the place of the first one and move
59 * around without checking the pgd every time. 59 * around without checking the pgd every time.
60 */ 60 */
61 61
62 void show_mem(void) 62 void show_mem(void)
63 { 63 {
64 long i, total = 0, reserved = 0; 64 long i, total = 0, reserved = 0;
65 long shared = 0, cached = 0; 65 long shared = 0, cached = 0;
66 pg_data_t *pgdat; 66 pg_data_t *pgdat;
67 struct page *page; 67 struct page *page;
68 68
69 printk(KERN_INFO "Mem-info:\n"); 69 printk(KERN_INFO "Mem-info:\n");
70 show_free_areas(); 70 show_free_areas();
71 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 71 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
72 72
73 for_each_online_pgdat(pgdat) { 73 for_each_online_pgdat(pgdat) {
74 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 74 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
75 page = pfn_to_page(pgdat->node_start_pfn + i); 75 page = pfn_to_page(pgdat->node_start_pfn + i);
76 total++; 76 total++;
77 if (PageReserved(page)) 77 if (PageReserved(page))
78 reserved++; 78 reserved++;
79 else if (PageSwapCache(page)) 79 else if (PageSwapCache(page))
80 cached++; 80 cached++;
81 else if (page_count(page)) 81 else if (page_count(page))
82 shared += page_count(page) - 1; 82 shared += page_count(page) - 1;
83 } 83 }
84 } 84 }
85 printk(KERN_INFO "%lu pages of RAM\n", total); 85 printk(KERN_INFO "%lu pages of RAM\n", total);
86 printk(KERN_INFO "%lu reserved pages\n",reserved); 86 printk(KERN_INFO "%lu reserved pages\n",reserved);
87 printk(KERN_INFO "%lu pages shared\n",shared); 87 printk(KERN_INFO "%lu pages shared\n",shared);
88 printk(KERN_INFO "%lu pages swap cached\n",cached); 88 printk(KERN_INFO "%lu pages swap cached\n",cached);
89 } 89 }
90 90
91 int after_bootmem; 91 int after_bootmem;
92 92
93 static __init void *spp_getpage(void) 93 static __init void *spp_getpage(void)
94 { 94 {
95 void *ptr; 95 void *ptr;
96 if (after_bootmem) 96 if (after_bootmem)
97 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 97 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
98 else 98 else
99 ptr = alloc_bootmem_pages(PAGE_SIZE); 99 ptr = alloc_bootmem_pages(PAGE_SIZE);
100 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) 100 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
101 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); 101 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
102 102
103 Dprintk("spp_getpage %p\n", ptr); 103 Dprintk("spp_getpage %p\n", ptr);
104 return ptr; 104 return ptr;
105 } 105 }
106 106
107 static __init void set_pte_phys(unsigned long vaddr, 107 static __init void set_pte_phys(unsigned long vaddr,
108 unsigned long phys, pgprot_t prot) 108 unsigned long phys, pgprot_t prot)
109 { 109 {
110 pgd_t *pgd; 110 pgd_t *pgd;
111 pud_t *pud; 111 pud_t *pud;
112 pmd_t *pmd; 112 pmd_t *pmd;
113 pte_t *pte, new_pte; 113 pte_t *pte, new_pte;
114 114
115 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys); 115 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
116 116
117 pgd = pgd_offset_k(vaddr); 117 pgd = pgd_offset_k(vaddr);
118 if (pgd_none(*pgd)) { 118 if (pgd_none(*pgd)) {
119 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n"); 119 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
120 return; 120 return;
121 } 121 }
122 pud = pud_offset(pgd, vaddr); 122 pud = pud_offset(pgd, vaddr);
123 if (pud_none(*pud)) { 123 if (pud_none(*pud)) {
124 pmd = (pmd_t *) spp_getpage(); 124 pmd = (pmd_t *) spp_getpage();
125 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); 125 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
126 if (pmd != pmd_offset(pud, 0)) { 126 if (pmd != pmd_offset(pud, 0)) {
127 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); 127 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
128 return; 128 return;
129 } 129 }
130 } 130 }
131 pmd = pmd_offset(pud, vaddr); 131 pmd = pmd_offset(pud, vaddr);
132 if (pmd_none(*pmd)) { 132 if (pmd_none(*pmd)) {
133 pte = (pte_t *) spp_getpage(); 133 pte = (pte_t *) spp_getpage();
134 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); 134 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
135 if (pte != pte_offset_kernel(pmd, 0)) { 135 if (pte != pte_offset_kernel(pmd, 0)) {
136 printk("PAGETABLE BUG #02!\n"); 136 printk("PAGETABLE BUG #02!\n");
137 return; 137 return;
138 } 138 }
139 } 139 }
140 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); 140 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
141 141
142 pte = pte_offset_kernel(pmd, vaddr); 142 pte = pte_offset_kernel(pmd, vaddr);
143 if (!pte_none(*pte) && 143 if (!pte_none(*pte) &&
144 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) 144 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
145 pte_ERROR(*pte); 145 pte_ERROR(*pte);
146 set_pte(pte, new_pte); 146 set_pte(pte, new_pte);
147 147
148 /* 148 /*
149 * It's enough to flush this one mapping. 149 * It's enough to flush this one mapping.
150 * (PGE mappings get flushed as well) 150 * (PGE mappings get flushed as well)
151 */ 151 */
152 __flush_tlb_one(vaddr); 152 __flush_tlb_one(vaddr);
153 } 153 }
154 154
155 /* NOTE: this is meant to be run only at boot */ 155 /* NOTE: this is meant to be run only at boot */
156 void __init 156 void __init
157 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) 157 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
158 { 158 {
159 unsigned long address = __fix_to_virt(idx); 159 unsigned long address = __fix_to_virt(idx);
160 160
161 if (idx >= __end_of_fixed_addresses) { 161 if (idx >= __end_of_fixed_addresses) {
162 printk("Invalid __set_fixmap\n"); 162 printk("Invalid __set_fixmap\n");
163 return; 163 return;
164 } 164 }
165 set_pte_phys(address, phys, prot); 165 set_pte_phys(address, phys, prot);
166 } 166 }
167 167
168 unsigned long __initdata table_start, table_end; 168 unsigned long __initdata table_start, table_end;
169 169
170 extern pmd_t temp_boot_pmds[]; 170 extern pmd_t temp_boot_pmds[];
171 171
172 static struct temp_map { 172 static struct temp_map {
173 pmd_t *pmd; 173 pmd_t *pmd;
174 void *address; 174 void *address;
175 int allocated; 175 int allocated;
176 } temp_mappings[] __initdata = { 176 } temp_mappings[] __initdata = {
177 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) }, 177 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
178 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, 178 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
179 {} 179 {}
180 }; 180 };
181 181
182 static __meminit void *alloc_low_page(int *index, unsigned long *phys) 182 static __meminit void *alloc_low_page(int *index, unsigned long *phys)
183 { 183 {
184 struct temp_map *ti; 184 struct temp_map *ti;
185 int i; 185 int i;
186 unsigned long pfn = table_end++, paddr; 186 unsigned long pfn = table_end++, paddr;
187 void *adr; 187 void *adr;
188 188
189 if (after_bootmem) { 189 if (after_bootmem) {
190 adr = (void *)get_zeroed_page(GFP_ATOMIC); 190 adr = (void *)get_zeroed_page(GFP_ATOMIC);
191 *phys = __pa(adr); 191 *phys = __pa(adr);
192 return adr; 192 return adr;
193 } 193 }
194 194
195 if (pfn >= end_pfn) 195 if (pfn >= end_pfn)
196 panic("alloc_low_page: ran out of memory"); 196 panic("alloc_low_page: ran out of memory");
197 for (i = 0; temp_mappings[i].allocated; i++) { 197 for (i = 0; temp_mappings[i].allocated; i++) {
198 if (!temp_mappings[i].pmd) 198 if (!temp_mappings[i].pmd)
199 panic("alloc_low_page: ran out of temp mappings"); 199 panic("alloc_low_page: ran out of temp mappings");
200 } 200 }
201 ti = &temp_mappings[i]; 201 ti = &temp_mappings[i];
202 paddr = (pfn << PAGE_SHIFT) & PMD_MASK; 202 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
203 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); 203 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
204 ti->allocated = 1; 204 ti->allocated = 1;
205 __flush_tlb(); 205 __flush_tlb();
206 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); 206 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
207 memset(adr, 0, PAGE_SIZE); 207 memset(adr, 0, PAGE_SIZE);
208 *index = i; 208 *index = i;
209 *phys = pfn * PAGE_SIZE; 209 *phys = pfn * PAGE_SIZE;
210 return adr; 210 return adr;
211 } 211 }
212 212
213 static __meminit void unmap_low_page(int i) 213 static __meminit void unmap_low_page(int i)
214 { 214 {
215 struct temp_map *ti; 215 struct temp_map *ti;
216 216
217 if (after_bootmem) 217 if (after_bootmem)
218 return; 218 return;
219 219
220 ti = &temp_mappings[i]; 220 ti = &temp_mappings[i];
221 set_pmd(ti->pmd, __pmd(0)); 221 set_pmd(ti->pmd, __pmd(0));
222 ti->allocated = 0; 222 ti->allocated = 0;
223 } 223 }
224 224
225 /* Must run before zap_low_mappings */ 225 /* Must run before zap_low_mappings */
226 __init void *early_ioremap(unsigned long addr, unsigned long size) 226 __init void *early_ioremap(unsigned long addr, unsigned long size)
227 { 227 {
228 unsigned long map = round_down(addr, LARGE_PAGE_SIZE); 228 unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
229 229
230 /* actually usually some more */ 230 /* actually usually some more */
231 if (size >= LARGE_PAGE_SIZE) { 231 if (size >= LARGE_PAGE_SIZE) {
232 printk("SMBIOS area too long %lu\n", size); 232 printk("SMBIOS area too long %lu\n", size);
233 return NULL; 233 return NULL;
234 } 234 }
235 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 235 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
236 map += LARGE_PAGE_SIZE; 236 map += LARGE_PAGE_SIZE;
237 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 237 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
238 __flush_tlb(); 238 __flush_tlb();
239 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); 239 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
240 } 240 }
241 241
242 /* To avoid virtual aliases later */ 242 /* To avoid virtual aliases later */
243 __init void early_iounmap(void *addr, unsigned long size) 243 __init void early_iounmap(void *addr, unsigned long size)
244 { 244 {
245 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) 245 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
246 printk("early_iounmap: bad address %p\n", addr); 246 printk("early_iounmap: bad address %p\n", addr);
247 set_pmd(temp_mappings[0].pmd, __pmd(0)); 247 set_pmd(temp_mappings[0].pmd, __pmd(0));
248 set_pmd(temp_mappings[1].pmd, __pmd(0)); 248 set_pmd(temp_mappings[1].pmd, __pmd(0));
249 __flush_tlb(); 249 __flush_tlb();
250 } 250 }
251 251
252 static void __meminit 252 static void __meminit
253 phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) 253 phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
254 { 254 {
255 int i; 255 int i;
256 256
257 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { 257 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
258 unsigned long entry; 258 unsigned long entry;
259 259
260 if (address >= end) { 260 if (address >= end) {
261 if (!after_bootmem) 261 if (!after_bootmem)
262 for (; i < PTRS_PER_PMD; i++, pmd++) 262 for (; i < PTRS_PER_PMD; i++, pmd++)
263 set_pmd(pmd, __pmd(0)); 263 set_pmd(pmd, __pmd(0));
264 break; 264 break;
265 } 265 }
266 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; 266 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
267 entry &= __supported_pte_mask; 267 entry &= __supported_pte_mask;
268 set_pmd(pmd, __pmd(entry)); 268 set_pmd(pmd, __pmd(entry));
269 } 269 }
270 } 270 }
271 271
272 static void __meminit 272 static void __meminit
273 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) 273 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
274 { 274 {
275 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); 275 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
276 276
277 if (pmd_none(*pmd)) { 277 if (pmd_none(*pmd)) {
278 spin_lock(&init_mm.page_table_lock); 278 spin_lock(&init_mm.page_table_lock);
279 phys_pmd_init(pmd, address, end); 279 phys_pmd_init(pmd, address, end);
280 spin_unlock(&init_mm.page_table_lock); 280 spin_unlock(&init_mm.page_table_lock);
281 __flush_tlb_all(); 281 __flush_tlb_all();
282 } 282 }
283 } 283 }
284 284
285 static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) 285 static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
286 { 286 {
287 long i = pud_index(address); 287 long i = pud_index(address);
288 288
289 pud = pud + i; 289 pud = pud + i;
290 290
291 if (after_bootmem && pud_val(*pud)) { 291 if (after_bootmem && pud_val(*pud)) {
292 phys_pmd_update(pud, address, end); 292 phys_pmd_update(pud, address, end);
293 return; 293 return;
294 } 294 }
295 295
296 for (; i < PTRS_PER_PUD; pud++, i++) { 296 for (; i < PTRS_PER_PUD; pud++, i++) {
297 int map; 297 int map;
298 unsigned long paddr, pmd_phys; 298 unsigned long paddr, pmd_phys;
299 pmd_t *pmd; 299 pmd_t *pmd;
300 300
301 paddr = (address & PGDIR_MASK) + i*PUD_SIZE; 301 paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
302 if (paddr >= end) 302 if (paddr >= end)
303 break; 303 break;
304 304
305 if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) { 305 if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
306 set_pud(pud, __pud(0)); 306 set_pud(pud, __pud(0));
307 continue; 307 continue;
308 } 308 }
309 309
310 pmd = alloc_low_page(&map, &pmd_phys); 310 pmd = alloc_low_page(&map, &pmd_phys);
311 spin_lock(&init_mm.page_table_lock); 311 spin_lock(&init_mm.page_table_lock);
312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313 phys_pmd_init(pmd, paddr, end); 313 phys_pmd_init(pmd, paddr, end);
314 spin_unlock(&init_mm.page_table_lock); 314 spin_unlock(&init_mm.page_table_lock);
315 unmap_low_page(map); 315 unmap_low_page(map);
316 } 316 }
317 __flush_tlb(); 317 __flush_tlb();
318 } 318 }
319 319
320 static void __init find_early_table_space(unsigned long end) 320 static void __init find_early_table_space(unsigned long end)
321 { 321 {
322 unsigned long puds, pmds, tables, start; 322 unsigned long puds, pmds, tables, start;
323 323
324 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 324 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
325 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 325 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
326 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + 326 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
327 round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 327 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
328 328
329 /* RED-PEN putting page tables only on node 0 could 329 /* RED-PEN putting page tables only on node 0 could
330 cause a hotspot and fill up ZONE_DMA. The page tables 330 cause a hotspot and fill up ZONE_DMA. The page tables
331 need roughly 0.5KB per GB. */ 331 need roughly 0.5KB per GB. */
332 start = 0x8000; 332 start = 0x8000;
333 table_start = find_e820_area(start, end, tables); 333 table_start = find_e820_area(start, end, tables);
334 if (table_start == -1UL) 334 if (table_start == -1UL)
335 panic("Cannot find space for the kernel page tables"); 335 panic("Cannot find space for the kernel page tables");
336 336
337 table_start >>= PAGE_SHIFT; 337 table_start >>= PAGE_SHIFT;
338 table_end = table_start; 338 table_end = table_start;
339 339
340 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", 340 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
341 end, table_start << PAGE_SHIFT, 341 end, table_start << PAGE_SHIFT,
342 (table_start << PAGE_SHIFT) + tables); 342 (table_start << PAGE_SHIFT) + tables);
343 } 343 }
344 344
345 /* Setup the direct mapping of the physical memory at PAGE_OFFSET. 345 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
346 This runs before bootmem is initialized and gets pages directly from the 346 This runs before bootmem is initialized and gets pages directly from the
347 physical memory. To access them they are temporarily mapped. */ 347 physical memory. To access them they are temporarily mapped. */
348 void __meminit init_memory_mapping(unsigned long start, unsigned long end) 348 void __meminit init_memory_mapping(unsigned long start, unsigned long end)
349 { 349 {
350 unsigned long next; 350 unsigned long next;
351 351
352 Dprintk("init_memory_mapping\n"); 352 Dprintk("init_memory_mapping\n");
353 353
354 /* 354 /*
355 * Find space for the kernel direct mapping tables. 355 * Find space for the kernel direct mapping tables.
356 * Later we should allocate these tables in the local node of the memory 356 * Later we should allocate these tables in the local node of the memory
357 * mapped. Unfortunately this is done currently before the nodes are 357 * mapped. Unfortunately this is done currently before the nodes are
358 * discovered. 358 * discovered.
359 */ 359 */
360 if (!after_bootmem) 360 if (!after_bootmem)
361 find_early_table_space(end); 361 find_early_table_space(end);
362 362
363 start = (unsigned long)__va(start); 363 start = (unsigned long)__va(start);
364 end = (unsigned long)__va(end); 364 end = (unsigned long)__va(end);
365 365
366 for (; start < end; start = next) { 366 for (; start < end; start = next) {
367 int map; 367 int map;
368 unsigned long pud_phys; 368 unsigned long pud_phys;
369 pgd_t *pgd = pgd_offset_k(start); 369 pgd_t *pgd = pgd_offset_k(start);
370 pud_t *pud; 370 pud_t *pud;
371 371
372 if (after_bootmem) 372 if (after_bootmem)
373 pud = pud_offset(pgd, start & PGDIR_MASK); 373 pud = pud_offset(pgd, start & PGDIR_MASK);
374 else 374 else
375 pud = alloc_low_page(&map, &pud_phys); 375 pud = alloc_low_page(&map, &pud_phys);
376 376
377 next = start + PGDIR_SIZE; 377 next = start + PGDIR_SIZE;
378 if (next > end) 378 if (next > end)
379 next = end; 379 next = end;
380 phys_pud_init(pud, __pa(start), __pa(next)); 380 phys_pud_init(pud, __pa(start), __pa(next));
381 if (!after_bootmem) 381 if (!after_bootmem)
382 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 382 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
383 unmap_low_page(map); 383 unmap_low_page(map);
384 } 384 }
385 385
386 if (!after_bootmem) 386 if (!after_bootmem)
387 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); 387 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
388 __flush_tlb_all(); 388 __flush_tlb_all();
389 } 389 }
390 390
391 void __cpuinit zap_low_mappings(int cpu) 391 void __cpuinit zap_low_mappings(int cpu)
392 { 392 {
393 if (cpu == 0) { 393 if (cpu == 0) {
394 pgd_t *pgd = pgd_offset_k(0UL); 394 pgd_t *pgd = pgd_offset_k(0UL);
395 pgd_clear(pgd); 395 pgd_clear(pgd);
396 } else { 396 } else {
397 /* 397 /*
398 * For AP's, zap the low identity mappings by changing the cr3 398 * For AP's, zap the low identity mappings by changing the cr3
399 * to init_level4_pgt and doing local flush tlb all 399 * to init_level4_pgt and doing local flush tlb all
400 */ 400 */
401 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); 401 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
402 } 402 }
403 __flush_tlb_all(); 403 __flush_tlb_all();
404 } 404 }
405 405
406 /* Compute zone sizes for the DMA and DMA32 zones in a node. */ 406 /* Compute zone sizes for the DMA and DMA32 zones in a node. */
407 __init void 407 __init void
408 size_zones(unsigned long *z, unsigned long *h, 408 size_zones(unsigned long *z, unsigned long *h,
409 unsigned long start_pfn, unsigned long end_pfn) 409 unsigned long start_pfn, unsigned long end_pfn)
410 { 410 {
411 int i; 411 int i;
412 unsigned long w; 412 unsigned long w;
413 413
414 for (i = 0; i < MAX_NR_ZONES; i++) 414 for (i = 0; i < MAX_NR_ZONES; i++)
415 z[i] = 0; 415 z[i] = 0;
416 416
417 if (start_pfn < MAX_DMA_PFN) 417 if (start_pfn < MAX_DMA_PFN)
418 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; 418 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
419 if (start_pfn < MAX_DMA32_PFN) { 419 if (start_pfn < MAX_DMA32_PFN) {
420 unsigned long dma32_pfn = MAX_DMA32_PFN; 420 unsigned long dma32_pfn = MAX_DMA32_PFN;
421 if (dma32_pfn > end_pfn) 421 if (dma32_pfn > end_pfn)
422 dma32_pfn = end_pfn; 422 dma32_pfn = end_pfn;
423 z[ZONE_DMA32] = dma32_pfn - start_pfn; 423 z[ZONE_DMA32] = dma32_pfn - start_pfn;
424 } 424 }
425 z[ZONE_NORMAL] = end_pfn - start_pfn; 425 z[ZONE_NORMAL] = end_pfn - start_pfn;
426 426
427 /* Remove lower zones from higher ones. */ 427 /* Remove lower zones from higher ones. */
428 w = 0; 428 w = 0;
429 for (i = 0; i < MAX_NR_ZONES; i++) { 429 for (i = 0; i < MAX_NR_ZONES; i++) {
430 if (z[i]) 430 if (z[i])
431 z[i] -= w; 431 z[i] -= w;
432 w += z[i]; 432 w += z[i];
433 } 433 }
434 434
435 /* Compute holes */ 435 /* Compute holes */
436 w = start_pfn; 436 w = start_pfn;
437 for (i = 0; i < MAX_NR_ZONES; i++) { 437 for (i = 0; i < MAX_NR_ZONES; i++) {
438 unsigned long s = w; 438 unsigned long s = w;
439 w += z[i]; 439 w += z[i];
440 h[i] = e820_hole_size(s, w); 440 h[i] = e820_hole_size(s, w);
441 } 441 }
442 442
443 /* Add the space pace needed for mem_map to the holes too. */ 443 /* Add the space pace needed for mem_map to the holes too. */
444 for (i = 0; i < MAX_NR_ZONES; i++) 444 for (i = 0; i < MAX_NR_ZONES; i++)
445 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE; 445 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
446 446
447 /* The 16MB DMA zone has the kernel and other misc mappings. 447 /* The 16MB DMA zone has the kernel and other misc mappings.
448 Account them too */ 448 Account them too */
449 if (h[ZONE_DMA]) { 449 if (h[ZONE_DMA]) {
450 h[ZONE_DMA] += dma_reserve; 450 h[ZONE_DMA] += dma_reserve;
451 if (h[ZONE_DMA] >= z[ZONE_DMA]) { 451 if (h[ZONE_DMA] >= z[ZONE_DMA]) {
452 printk(KERN_WARNING 452 printk(KERN_WARNING
453 "Kernel too large and filling up ZONE_DMA?\n"); 453 "Kernel too large and filling up ZONE_DMA?\n");
454 h[ZONE_DMA] = z[ZONE_DMA]; 454 h[ZONE_DMA] = z[ZONE_DMA];
455 } 455 }
456 } 456 }
457 } 457 }
458 458
459 #ifndef CONFIG_NUMA 459 #ifndef CONFIG_NUMA
460 void __init paging_init(void) 460 void __init paging_init(void)
461 { 461 {
462 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; 462 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
463 463
464 memory_present(0, 0, end_pfn); 464 memory_present(0, 0, end_pfn);
465 sparse_init(); 465 sparse_init();
466 size_zones(zones, holes, 0, end_pfn); 466 size_zones(zones, holes, 0, end_pfn);
467 free_area_init_node(0, NODE_DATA(0), zones, 467 free_area_init_node(0, NODE_DATA(0), zones,
468 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); 468 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
469 } 469 }
470 #endif 470 #endif
471 471
472 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches 472 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
473 from the CPU leading to inconsistent cache lines. address and size 473 from the CPU leading to inconsistent cache lines. address and size
474 must be aligned to 2MB boundaries. 474 must be aligned to 2MB boundaries.
475 Does nothing when the mapping doesn't exist. */ 475 Does nothing when the mapping doesn't exist. */
476 void __init clear_kernel_mapping(unsigned long address, unsigned long size) 476 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
477 { 477 {
478 unsigned long end = address + size; 478 unsigned long end = address + size;
479 479
480 BUG_ON(address & ~LARGE_PAGE_MASK); 480 BUG_ON(address & ~LARGE_PAGE_MASK);
481 BUG_ON(size & ~LARGE_PAGE_MASK); 481 BUG_ON(size & ~LARGE_PAGE_MASK);
482 482
483 for (; address < end; address += LARGE_PAGE_SIZE) { 483 for (; address < end; address += LARGE_PAGE_SIZE) {
484 pgd_t *pgd = pgd_offset_k(address); 484 pgd_t *pgd = pgd_offset_k(address);
485 pud_t *pud; 485 pud_t *pud;
486 pmd_t *pmd; 486 pmd_t *pmd;
487 if (pgd_none(*pgd)) 487 if (pgd_none(*pgd))
488 continue; 488 continue;
489 pud = pud_offset(pgd, address); 489 pud = pud_offset(pgd, address);
490 if (pud_none(*pud)) 490 if (pud_none(*pud))
491 continue; 491 continue;
492 pmd = pmd_offset(pud, address); 492 pmd = pmd_offset(pud, address);
493 if (!pmd || pmd_none(*pmd)) 493 if (!pmd || pmd_none(*pmd))
494 continue; 494 continue;
495 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 495 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
496 /* Could handle this, but it should not happen currently. */ 496 /* Could handle this, but it should not happen currently. */
497 printk(KERN_ERR 497 printk(KERN_ERR
498 "clear_kernel_mapping: mapping has been split. will leak memory\n"); 498 "clear_kernel_mapping: mapping has been split. will leak memory\n");
499 pmd_ERROR(*pmd); 499 pmd_ERROR(*pmd);
500 } 500 }
501 set_pmd(pmd, __pmd(0)); 501 set_pmd(pmd, __pmd(0));
502 } 502 }
503 __flush_tlb_all(); 503 __flush_tlb_all();
504 } 504 }
505 505
506 /* 506 /*
507 * Memory hotplug specific functions 507 * Memory hotplug specific functions
508 */ 508 */
509 void online_page(struct page *page) 509 void online_page(struct page *page)
510 { 510 {
511 ClearPageReserved(page); 511 ClearPageReserved(page);
512 init_page_count(page); 512 init_page_count(page);
513 __free_page(page); 513 __free_page(page);
514 totalram_pages++; 514 totalram_pages++;
515 num_physpages++; 515 num_physpages++;
516 } 516 }
517 517
518 #ifdef CONFIG_MEMORY_HOTPLUG 518 #ifdef CONFIG_MEMORY_HOTPLUG
519 /* 519 /*
520 * XXX: memory_add_physaddr_to_nid() is to find node id from physical address 520 * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
521 * via probe interface of sysfs. If acpi notifies hot-add event, then it 521 * via probe interface of sysfs. If acpi notifies hot-add event, then it
522 * can tell node id by searching dsdt. But, probe interface doesn't have 522 * can tell node id by searching dsdt. But, probe interface doesn't have
523 * node id. So, return 0 as node id at this time. 523 * node id. So, return 0 as node id at this time.
524 */ 524 */
525 #ifdef CONFIG_NUMA 525 #ifdef CONFIG_NUMA
526 int memory_add_physaddr_to_nid(u64 start) 526 int memory_add_physaddr_to_nid(u64 start)
527 { 527 {
528 return 0; 528 return 0;
529 } 529 }
530 #endif 530 #endif
531 531
532 /* 532 /*
533 * Memory is added always to NORMAL zone. This means you will never get 533 * Memory is added always to NORMAL zone. This means you will never get
534 * additional DMA/DMA32 memory. 534 * additional DMA/DMA32 memory.
535 */ 535 */
536 int arch_add_memory(int nid, u64 start, u64 size) 536 int arch_add_memory(int nid, u64 start, u64 size)
537 { 537 {
538 struct pglist_data *pgdat = NODE_DATA(nid); 538 struct pglist_data *pgdat = NODE_DATA(nid);
539 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; 539 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
540 unsigned long start_pfn = start >> PAGE_SHIFT; 540 unsigned long start_pfn = start >> PAGE_SHIFT;
541 unsigned long nr_pages = size >> PAGE_SHIFT; 541 unsigned long nr_pages = size >> PAGE_SHIFT;
542 int ret; 542 int ret;
543 543
544 ret = __add_pages(zone, start_pfn, nr_pages); 544 ret = __add_pages(zone, start_pfn, nr_pages);
545 if (ret) 545 if (ret)
546 goto error; 546 goto error;
547 547
548 init_memory_mapping(start, (start + size -1)); 548 init_memory_mapping(start, (start + size -1));
549 549
550 return ret; 550 return ret;
551 error: 551 error:
552 printk("%s: Problem encountered in __add_pages!\n", __func__); 552 printk("%s: Problem encountered in __add_pages!\n", __func__);
553 return ret; 553 return ret;
554 } 554 }
555 EXPORT_SYMBOL_GPL(arch_add_memory); 555 EXPORT_SYMBOL_GPL(arch_add_memory);
556 556
557 int remove_memory(u64 start, u64 size) 557 int remove_memory(u64 start, u64 size)
558 { 558 {
559 return -EINVAL; 559 return -EINVAL;
560 } 560 }
561 EXPORT_SYMBOL_GPL(remove_memory); 561 EXPORT_SYMBOL_GPL(remove_memory);
562 562
563 #else /* CONFIG_MEMORY_HOTPLUG */ 563 #else /* CONFIG_MEMORY_HOTPLUG */
564 /* 564 /*
565 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, 565 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
566 * just online the pages. 566 * just online the pages.
567 */ 567 */
568 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) 568 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
569 { 569 {
570 int err = -EIO; 570 int err = -EIO;
571 unsigned long pfn; 571 unsigned long pfn;
572 unsigned long total = 0, mem = 0; 572 unsigned long total = 0, mem = 0;
573 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { 573 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
574 if (pfn_valid(pfn)) { 574 if (pfn_valid(pfn)) {
575 online_page(pfn_to_page(pfn)); 575 online_page(pfn_to_page(pfn));
576 err = 0; 576 err = 0;
577 mem++; 577 mem++;
578 } 578 }
579 total++; 579 total++;
580 } 580 }
581 if (!err) { 581 if (!err) {
582 z->spanned_pages += total; 582 z->spanned_pages += total;
583 z->present_pages += mem; 583 z->present_pages += mem;
584 z->zone_pgdat->node_spanned_pages += total; 584 z->zone_pgdat->node_spanned_pages += total;
585 z->zone_pgdat->node_present_pages += mem; 585 z->zone_pgdat->node_present_pages += mem;
586 } 586 }
587 return err; 587 return err;
588 } 588 }
589 #endif /* CONFIG_MEMORY_HOTPLUG */ 589 #endif /* CONFIG_MEMORY_HOTPLUG */
590 590
591 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, 591 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
592 kcore_vsyscall; 592 kcore_vsyscall;
593 593
594 void __init mem_init(void) 594 void __init mem_init(void)
595 { 595 {
596 long codesize, reservedpages, datasize, initsize; 596 long codesize, reservedpages, datasize, initsize;
597 597
598 pci_iommu_alloc(); 598 pci_iommu_alloc();
599 599
600 /* How many end-of-memory variables you have, grandma! */ 600 /* How many end-of-memory variables you have, grandma! */
601 max_low_pfn = end_pfn; 601 max_low_pfn = end_pfn;
602 max_pfn = end_pfn; 602 max_pfn = end_pfn;
603 num_physpages = end_pfn; 603 num_physpages = end_pfn;
604 high_memory = (void *) __va(end_pfn * PAGE_SIZE); 604 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
605 605
606 /* clear the zero-page */ 606 /* clear the zero-page */
607 memset(empty_zero_page, 0, PAGE_SIZE); 607 memset(empty_zero_page, 0, PAGE_SIZE);
608 608
609 reservedpages = 0; 609 reservedpages = 0;
610 610
611 /* this will put all low memory onto the freelists */ 611 /* this will put all low memory onto the freelists */
612 #ifdef CONFIG_NUMA 612 #ifdef CONFIG_NUMA
613 totalram_pages = numa_free_all_bootmem(); 613 totalram_pages = numa_free_all_bootmem();
614 #else 614 #else
615 totalram_pages = free_all_bootmem(); 615 totalram_pages = free_all_bootmem();
616 #endif 616 #endif
617 reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn); 617 reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
618 618
619 after_bootmem = 1; 619 after_bootmem = 1;
620 620
621 codesize = (unsigned long) &_etext - (unsigned long) &_text; 621 codesize = (unsigned long) &_etext - (unsigned long) &_text;
622 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 622 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
623 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 623 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
624 624
625 /* Register memory areas for /proc/kcore */ 625 /* Register memory areas for /proc/kcore */
626 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 626 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
627 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 627 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
628 VMALLOC_END-VMALLOC_START); 628 VMALLOC_END-VMALLOC_START);
629 kclist_add(&kcore_kernel, &_stext, _end - _stext); 629 kclist_add(&kcore_kernel, &_stext, _end - _stext);
630 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN); 630 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
631 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 631 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
632 VSYSCALL_END - VSYSCALL_START); 632 VSYSCALL_END - VSYSCALL_START);
633 633
634 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", 634 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
635 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 635 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
636 end_pfn << (PAGE_SHIFT-10), 636 end_pfn << (PAGE_SHIFT-10),
637 codesize >> 10, 637 codesize >> 10,
638 reservedpages << (PAGE_SHIFT-10), 638 reservedpages << (PAGE_SHIFT-10),
639 datasize >> 10, 639 datasize >> 10,
640 initsize >> 10); 640 initsize >> 10);
641 641
642 #ifdef CONFIG_SMP 642 #ifdef CONFIG_SMP
643 /* 643 /*
644 * Sync boot_level4_pgt mappings with the init_level4_pgt 644 * Sync boot_level4_pgt mappings with the init_level4_pgt
645 * except for the low identity mappings which are already zapped 645 * except for the low identity mappings which are already zapped
646 * in init_level4_pgt. This sync-up is essential for AP's bringup 646 * in init_level4_pgt. This sync-up is essential for AP's bringup
647 */ 647 */
648 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); 648 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
649 #endif 649 #endif
650 } 650 }
651 651
652 void free_init_pages(char *what, unsigned long begin, unsigned long end) 652 void free_init_pages(char *what, unsigned long begin, unsigned long end)
653 { 653 {
654 unsigned long addr; 654 unsigned long addr;
655 655
656 if (begin >= end) 656 if (begin >= end)
657 return; 657 return;
658 658
659 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 659 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
660 for (addr = begin; addr < end; addr += PAGE_SIZE) { 660 for (addr = begin; addr < end; addr += PAGE_SIZE) {
661 ClearPageReserved(virt_to_page(addr)); 661 ClearPageReserved(virt_to_page(addr));
662 init_page_count(virt_to_page(addr)); 662 init_page_count(virt_to_page(addr));
663 memset((void *)(addr & ~(PAGE_SIZE-1)), 663 memset((void *)(addr & ~(PAGE_SIZE-1)),
664 POISON_FREE_INITMEM, PAGE_SIZE); 664 POISON_FREE_INITMEM, PAGE_SIZE);
665 free_page(addr); 665 free_page(addr);
666 totalram_pages++; 666 totalram_pages++;
667 } 667 }
668 } 668 }
669 669
670 void free_initmem(void) 670 void free_initmem(void)
671 { 671 {
672 memset(__initdata_begin, POISON_FREE_INITDATA, 672 memset(__initdata_begin, POISON_FREE_INITDATA,
673 __initdata_end - __initdata_begin); 673 __initdata_end - __initdata_begin);
674 free_init_pages("unused kernel memory", 674 free_init_pages("unused kernel memory",
675 (unsigned long)(&__init_begin), 675 (unsigned long)(&__init_begin),
676 (unsigned long)(&__init_end)); 676 (unsigned long)(&__init_end));
677 } 677 }
678 678
679 #ifdef CONFIG_DEBUG_RODATA 679 #ifdef CONFIG_DEBUG_RODATA
680 680
681 extern char __start_rodata, __end_rodata;
682 void mark_rodata_ro(void) 681 void mark_rodata_ro(void)
683 { 682 {
684 unsigned long addr = (unsigned long)&__start_rodata; 683 unsigned long addr = (unsigned long)__start_rodata;
685 684
686 for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE) 685 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
687 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); 686 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
688 687
689 printk ("Write protecting the kernel read-only data: %luk\n", 688 printk ("Write protecting the kernel read-only data: %luk\n",
690 (&__end_rodata - &__start_rodata) >> 10); 689 (__end_rodata - __start_rodata) >> 10);
691 690
692 /* 691 /*
693 * change_page_attr_addr() requires a global_flush_tlb() call after it. 692 * change_page_attr_addr() requires a global_flush_tlb() call after it.
694 * We do this after the printk so that if something went wrong in the 693 * We do this after the printk so that if something went wrong in the
695 * change, the printk gets out at least to give a better debug hint 694 * change, the printk gets out at least to give a better debug hint
696 * of who is the culprit. 695 * of who is the culprit.
697 */ 696 */
698 global_flush_tlb(); 697 global_flush_tlb();
699 } 698 }
700 #endif 699 #endif
701 700
702 #ifdef CONFIG_BLK_DEV_INITRD 701 #ifdef CONFIG_BLK_DEV_INITRD
703 void free_initrd_mem(unsigned long start, unsigned long end) 702 void free_initrd_mem(unsigned long start, unsigned long end)
704 { 703 {
705 free_init_pages("initrd memory", start, end); 704 free_init_pages("initrd memory", start, end);
706 } 705 }
707 #endif 706 #endif
708 707
709 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 708 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
710 { 709 {
711 /* Should check here against the e820 map to avoid double free */ 710 /* Should check here against the e820 map to avoid double free */
712 #ifdef CONFIG_NUMA 711 #ifdef CONFIG_NUMA
713 int nid = phys_to_nid(phys); 712 int nid = phys_to_nid(phys);
714 reserve_bootmem_node(NODE_DATA(nid), phys, len); 713 reserve_bootmem_node(NODE_DATA(nid), phys, len);
715 #else 714 #else
716 reserve_bootmem(phys, len); 715 reserve_bootmem(phys, len);
717 #endif 716 #endif
718 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) 717 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
719 dma_reserve += len / PAGE_SIZE; 718 dma_reserve += len / PAGE_SIZE;
720 } 719 }
721 720
722 int kern_addr_valid(unsigned long addr) 721 int kern_addr_valid(unsigned long addr)
723 { 722 {
724 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; 723 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
725 pgd_t *pgd; 724 pgd_t *pgd;
726 pud_t *pud; 725 pud_t *pud;
727 pmd_t *pmd; 726 pmd_t *pmd;
728 pte_t *pte; 727 pte_t *pte;
729 728
730 if (above != 0 && above != -1UL) 729 if (above != 0 && above != -1UL)
731 return 0; 730 return 0;
732 731
733 pgd = pgd_offset_k(addr); 732 pgd = pgd_offset_k(addr);
734 if (pgd_none(*pgd)) 733 if (pgd_none(*pgd))
735 return 0; 734 return 0;
736 735
737 pud = pud_offset(pgd, addr); 736 pud = pud_offset(pgd, addr);
738 if (pud_none(*pud)) 737 if (pud_none(*pud))
739 return 0; 738 return 0;
740 739
741 pmd = pmd_offset(pud, addr); 740 pmd = pmd_offset(pud, addr);
742 if (pmd_none(*pmd)) 741 if (pmd_none(*pmd))
743 return 0; 742 return 0;
744 if (pmd_large(*pmd)) 743 if (pmd_large(*pmd))
745 return pfn_valid(pmd_pfn(*pmd)); 744 return pfn_valid(pmd_pfn(*pmd));
746 745
747 pte = pte_offset_kernel(pmd, addr); 746 pte = pte_offset_kernel(pmd, addr);
748 if (pte_none(*pte)) 747 if (pte_none(*pte))
749 return 0; 748 return 0;
750 return pfn_valid(pte_pfn(*pte)); 749 return pfn_valid(pte_pfn(*pte));
751 } 750 }
752 751
753 #ifdef CONFIG_SYSCTL 752 #ifdef CONFIG_SYSCTL
754 #include <linux/sysctl.h> 753 #include <linux/sysctl.h>
755 754
756 extern int exception_trace, page_fault_trace; 755 extern int exception_trace, page_fault_trace;
757 756
758 static ctl_table debug_table2[] = { 757 static ctl_table debug_table2[] = {
759 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, 758 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
760 proc_dointvec }, 759 proc_dointvec },
761 { 0, } 760 { 0, }
762 }; 761 };
763 762
764 static ctl_table debug_root_table2[] = { 763 static ctl_table debug_root_table2[] = {
765 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555, 764 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
766 .child = debug_table2 }, 765 .child = debug_table2 },
767 { 0 }, 766 { 0 },
768 }; 767 };
769 768
770 static __init int x8664_sysctl_init(void) 769 static __init int x8664_sysctl_init(void)
771 { 770 {
772 register_sysctl_table(debug_root_table2, 1); 771 register_sysctl_table(debug_root_table2, 1);
773 return 0; 772 return 0;
774 } 773 }
775 __initcall(x8664_sysctl_init); 774 __initcall(x8664_sysctl_init);
776 #endif 775 #endif
777 776
778 /* A pseudo VMAs to allow ptrace access for the vsyscall page. This only 777 /* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
779 covers the 64bit vsyscall page now. 32bit has a real VMA now and does 778 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
780 not need special handling anymore. */ 779 not need special handling anymore. */
781 780
782 static struct vm_area_struct gate_vma = { 781 static struct vm_area_struct gate_vma = {
783 .vm_start = VSYSCALL_START, 782 .vm_start = VSYSCALL_START,
784 .vm_end = VSYSCALL_END, 783 .vm_end = VSYSCALL_END,
785 .vm_page_prot = PAGE_READONLY 784 .vm_page_prot = PAGE_READONLY
786 }; 785 };
787 786
788 struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 787 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
789 { 788 {
790 #ifdef CONFIG_IA32_EMULATION 789 #ifdef CONFIG_IA32_EMULATION
791 if (test_tsk_thread_flag(tsk, TIF_IA32)) 790 if (test_tsk_thread_flag(tsk, TIF_IA32))
792 return NULL; 791 return NULL;
793 #endif 792 #endif
794 return &gate_vma; 793 return &gate_vma;
795 } 794 }
796 795
797 int in_gate_area(struct task_struct *task, unsigned long addr) 796 int in_gate_area(struct task_struct *task, unsigned long addr)
798 { 797 {
799 struct vm_area_struct *vma = get_gate_vma(task); 798 struct vm_area_struct *vma = get_gate_vma(task);
800 if (!vma) 799 if (!vma)
801 return 0; 800 return 0;
802 return (addr >= vma->vm_start) && (addr < vma->vm_end); 801 return (addr >= vma->vm_start) && (addr < vma->vm_end);
803 } 802 }
804 803
805 /* Use this when you have no reliable task/vma, typically from interrupt 804 /* Use this when you have no reliable task/vma, typically from interrupt
806 * context. It is less reliable than using the task's vma and may give 805 * context. It is less reliable than using the task's vma and may give
807 * false positives. 806 * false positives.
808 */ 807 */
809 int in_gate_area_no_task(unsigned long addr) 808 int in_gate_area_no_task(unsigned long addr)
810 { 809 {
811 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); 810 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
812 } 811 }
813 812
include/asm-generic/sections.h
1 #ifndef _ASM_GENERIC_SECTIONS_H_ 1 #ifndef _ASM_GENERIC_SECTIONS_H_
2 #define _ASM_GENERIC_SECTIONS_H_ 2 #define _ASM_GENERIC_SECTIONS_H_
3 3
4 /* References to section boundaries */ 4 /* References to section boundaries */
5 5
6 extern char _text[], _stext[], _etext[]; 6 extern char _text[], _stext[], _etext[];
7 extern char _data[], _sdata[], _edata[]; 7 extern char _data[], _sdata[], _edata[];
8 extern char __bss_start[], __bss_stop[]; 8 extern char __bss_start[], __bss_stop[];
9 extern char __init_begin[], __init_end[]; 9 extern char __init_begin[], __init_end[];
10 extern char _sinittext[], _einittext[]; 10 extern char _sinittext[], _einittext[];
11 extern char _sextratext[] __attribute__((weak)); 11 extern char _sextratext[] __attribute__((weak));
12 extern char _eextratext[] __attribute__((weak)); 12 extern char _eextratext[] __attribute__((weak));
13 extern char _end[]; 13 extern char _end[];
14 extern char __per_cpu_start[], __per_cpu_end[]; 14 extern char __per_cpu_start[], __per_cpu_end[];
15 extern char __kprobes_text_start[], __kprobes_text_end[]; 15 extern char __kprobes_text_start[], __kprobes_text_end[];
16 extern char __initdata_begin[], __initdata_end[]; 16 extern char __initdata_begin[], __initdata_end[];
17 extern char __start_rodata[], __end_rodata[];
17 18
18 #endif /* _ASM_GENERIC_SECTIONS_H_ */ 19 #endif /* _ASM_GENERIC_SECTIONS_H_ */
19 20