Commit 3a58a2a6c879b2e47daafd6e641661c50ac9da5a
Committed by
Ingo Molnar
1 parent
cfb0e53b05
Exists in
master
and in
7 other branches
x86: introduce init_memory_mapping for 32bit #3
move kva related early backto initmem_init for numa32 Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 3 changed files with 6 additions and 27 deletions Inline Diff
arch/x86/mm/discontig_32.c
1 | /* | 1 | /* |
2 | * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation | 2 | * Written by: Patricia Gaughen <gone@us.ibm.com>, IBM Corporation |
3 | * August 2002: added remote node KVA remap - Martin J. Bligh | 3 | * August 2002: added remote node KVA remap - Martin J. Bligh |
4 | * | 4 | * |
5 | * Copyright (C) 2002, IBM Corp. | 5 | * Copyright (C) 2002, IBM Corp. |
6 | * | 6 | * |
7 | * All rights reserved. | 7 | * All rights reserved. |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
11 | * the Free Software Foundation; either version 2 of the License, or | 11 | * the Free Software Foundation; either version 2 of the License, or |
12 | * (at your option) any later version. | 12 | * (at your option) any later version. |
13 | * | 13 | * |
14 | * This program is distributed in the hope that it will be useful, but | 14 | * This program is distributed in the hope that it will be useful, but |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | 16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
17 | * NON INFRINGEMENT. See the GNU General Public License for more | 17 | * NON INFRINGEMENT. See the GNU General Public License for more |
18 | * details. | 18 | * details. |
19 | * | 19 | * |
20 | * You should have received a copy of the GNU General Public License | 20 | * You should have received a copy of the GNU General Public License |
21 | * along with this program; if not, write to the Free Software | 21 | * along with this program; if not, write to the Free Software |
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/bootmem.h> | 26 | #include <linux/bootmem.h> |
27 | #include <linux/mmzone.h> | 27 | #include <linux/mmzone.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/initrd.h> | 29 | #include <linux/initrd.h> |
30 | #include <linux/nodemask.h> | 30 | #include <linux/nodemask.h> |
31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/kexec.h> | 32 | #include <linux/kexec.h> |
33 | #include <linux/pfn.h> | 33 | #include <linux/pfn.h> |
34 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
35 | #include <linux/acpi.h> | 35 | #include <linux/acpi.h> |
36 | 36 | ||
37 | #include <asm/e820.h> | 37 | #include <asm/e820.h> |
38 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
39 | #include <asm/mmzone.h> | 39 | #include <asm/mmzone.h> |
40 | #include <asm/bios_ebda.h> | 40 | #include <asm/bios_ebda.h> |
41 | #include <asm/proto.h> | 41 | #include <asm/proto.h> |
42 | 42 | ||
43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
44 | EXPORT_SYMBOL(node_data); | 44 | EXPORT_SYMBOL(node_data); |
45 | static bootmem_data_t node0_bdata; | 45 | static bootmem_data_t node0_bdata; |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * numa interface - we expect the numa architecture specific code to have | 48 | * numa interface - we expect the numa architecture specific code to have |
49 | * populated the following initialisation. | 49 | * populated the following initialisation. |
50 | * | 50 | * |
51 | * 1) node_online_map - the map of all nodes configured (online) in the system | 51 | * 1) node_online_map - the map of all nodes configured (online) in the system |
52 | * 2) node_start_pfn - the starting page frame number for a node | 52 | * 2) node_start_pfn - the starting page frame number for a node |
53 | * 3) node_end_pfn - the ending page fram number for a node | 53 | * 3) node_end_pfn - the ending page fram number for a node |
54 | */ | 54 | */ |
55 | unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; | 55 | unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; |
56 | unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; | 56 | unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; |
57 | 57 | ||
58 | 58 | ||
59 | #ifdef CONFIG_DISCONTIGMEM | 59 | #ifdef CONFIG_DISCONTIGMEM |
60 | /* | 60 | /* |
61 | * 4) physnode_map - the mapping between a pfn and owning node | 61 | * 4) physnode_map - the mapping between a pfn and owning node |
62 | * physnode_map keeps track of the physical memory layout of a generic | 62 | * physnode_map keeps track of the physical memory layout of a generic |
63 | * numa node on a 64Mb break (each element of the array will | 63 | * numa node on a 64Mb break (each element of the array will |
64 | * represent 64Mb of memory and will be marked by the node id. so, | 64 | * represent 64Mb of memory and will be marked by the node id. so, |
65 | * if the first gig is on node 0, and the second gig is on node 1 | 65 | * if the first gig is on node 0, and the second gig is on node 1 |
66 | * physnode_map will contain: | 66 | * physnode_map will contain: |
67 | * | 67 | * |
68 | * physnode_map[0-15] = 0; | 68 | * physnode_map[0-15] = 0; |
69 | * physnode_map[16-31] = 1; | 69 | * physnode_map[16-31] = 1; |
70 | * physnode_map[32- ] = -1; | 70 | * physnode_map[32- ] = -1; |
71 | */ | 71 | */ |
72 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; | 72 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; |
73 | EXPORT_SYMBOL(physnode_map); | 73 | EXPORT_SYMBOL(physnode_map); |
74 | 74 | ||
75 | void memory_present(int nid, unsigned long start, unsigned long end) | 75 | void memory_present(int nid, unsigned long start, unsigned long end) |
76 | { | 76 | { |
77 | unsigned long pfn; | 77 | unsigned long pfn; |
78 | 78 | ||
79 | printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n", | 79 | printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n", |
80 | nid, start, end); | 80 | nid, start, end); |
81 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); | 81 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); |
82 | printk(KERN_DEBUG " "); | 82 | printk(KERN_DEBUG " "); |
83 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { | 83 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { |
84 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; | 84 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; |
85 | printk(KERN_CONT "%lx ", pfn); | 85 | printk(KERN_CONT "%lx ", pfn); |
86 | } | 86 | } |
87 | printk(KERN_CONT "\n"); | 87 | printk(KERN_CONT "\n"); |
88 | } | 88 | } |
89 | 89 | ||
90 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | 90 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, |
91 | unsigned long end_pfn) | 91 | unsigned long end_pfn) |
92 | { | 92 | { |
93 | unsigned long nr_pages = end_pfn - start_pfn; | 93 | unsigned long nr_pages = end_pfn - start_pfn; |
94 | 94 | ||
95 | if (!nr_pages) | 95 | if (!nr_pages) |
96 | return 0; | 96 | return 0; |
97 | 97 | ||
98 | return (nr_pages + 1) * sizeof(struct page); | 98 | return (nr_pages + 1) * sizeof(struct page); |
99 | } | 99 | } |
100 | #endif | 100 | #endif |
101 | 101 | ||
102 | extern unsigned long find_max_low_pfn(void); | 102 | extern unsigned long find_max_low_pfn(void); |
103 | extern unsigned long highend_pfn, highstart_pfn; | 103 | extern unsigned long highend_pfn, highstart_pfn; |
104 | 104 | ||
105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
106 | 106 | ||
107 | unsigned long node_remap_size[MAX_NUMNODES]; | 107 | unsigned long node_remap_size[MAX_NUMNODES]; |
108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | 108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; |
109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
110 | 110 | ||
111 | static unsigned long kva_start_pfn; | 111 | static unsigned long kva_start_pfn; |
112 | static unsigned long kva_pages; | 112 | static unsigned long kva_pages; |
113 | /* | 113 | /* |
114 | * FLAT - support for basic PC memory model with discontig enabled, essentially | 114 | * FLAT - support for basic PC memory model with discontig enabled, essentially |
115 | * a single node with all available processors in it with a flat | 115 | * a single node with all available processors in it with a flat |
116 | * memory map. | 116 | * memory map. |
117 | */ | 117 | */ |
118 | int __init get_memcfg_numa_flat(void) | 118 | int __init get_memcfg_numa_flat(void) |
119 | { | 119 | { |
120 | printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); | 120 | printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); |
121 | 121 | ||
122 | node_start_pfn[0] = 0; | 122 | node_start_pfn[0] = 0; |
123 | node_end_pfn[0] = max_pfn; | 123 | node_end_pfn[0] = max_pfn; |
124 | e820_register_active_regions(0, 0, max_pfn); | 124 | e820_register_active_regions(0, 0, max_pfn); |
125 | memory_present(0, 0, max_pfn); | 125 | memory_present(0, 0, max_pfn); |
126 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | 126 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); |
127 | 127 | ||
128 | /* Indicate there is one node available. */ | 128 | /* Indicate there is one node available. */ |
129 | nodes_clear(node_online_map); | 129 | nodes_clear(node_online_map); |
130 | node_set_online(0); | 130 | node_set_online(0); |
131 | return 1; | 131 | return 1; |
132 | } | 132 | } |
133 | 133 | ||
134 | /* | 134 | /* |
135 | * Find the highest page frame number we have available for the node | 135 | * Find the highest page frame number we have available for the node |
136 | */ | 136 | */ |
137 | static void __init propagate_e820_map_node(int nid) | 137 | static void __init propagate_e820_map_node(int nid) |
138 | { | 138 | { |
139 | if (node_end_pfn[nid] > max_pfn) | 139 | if (node_end_pfn[nid] > max_pfn) |
140 | node_end_pfn[nid] = max_pfn; | 140 | node_end_pfn[nid] = max_pfn; |
141 | /* | 141 | /* |
142 | * if a user has given mem=XXXX, then we need to make sure | 142 | * if a user has given mem=XXXX, then we need to make sure |
143 | * that the node _starts_ before that, too, not just ends | 143 | * that the node _starts_ before that, too, not just ends |
144 | */ | 144 | */ |
145 | if (node_start_pfn[nid] > max_pfn) | 145 | if (node_start_pfn[nid] > max_pfn) |
146 | node_start_pfn[nid] = max_pfn; | 146 | node_start_pfn[nid] = max_pfn; |
147 | BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); | 147 | BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); |
148 | } | 148 | } |
149 | 149 | ||
150 | /* | 150 | /* |
151 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem | 151 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem |
152 | * method. For node zero take this from the bottom of memory, for | 152 | * method. For node zero take this from the bottom of memory, for |
153 | * subsequent nodes place them at node_remap_start_vaddr which contains | 153 | * subsequent nodes place them at node_remap_start_vaddr which contains |
154 | * node local data in physically node local memory. See setup_memory() | 154 | * node local data in physically node local memory. See setup_memory() |
155 | * for details. | 155 | * for details. |
156 | */ | 156 | */ |
157 | static void __init allocate_pgdat(int nid) | 157 | static void __init allocate_pgdat(int nid) |
158 | { | 158 | { |
159 | if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) | 159 | if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) |
160 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | 160 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; |
161 | else { | 161 | else { |
162 | unsigned long pgdat_phys; | 162 | unsigned long pgdat_phys; |
163 | pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, | 163 | pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, |
164 | (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT, | 164 | (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT, |
165 | sizeof(pg_data_t), | 165 | sizeof(pg_data_t), |
166 | PAGE_SIZE); | 166 | PAGE_SIZE); |
167 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | 167 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); |
168 | reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), | 168 | reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), |
169 | "NODE_DATA"); | 169 | "NODE_DATA"); |
170 | } | 170 | } |
171 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | 171 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", |
172 | nid, (unsigned long)NODE_DATA(nid)); | 172 | nid, (unsigned long)NODE_DATA(nid)); |
173 | } | 173 | } |
174 | 174 | ||
175 | /* | 175 | /* |
176 | * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel | 176 | * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel |
177 | * virtual address space (KVA) is reserved and portions of nodes are mapped | 177 | * virtual address space (KVA) is reserved and portions of nodes are mapped |
178 | * using it. This is to allow node-local memory to be allocated for | 178 | * using it. This is to allow node-local memory to be allocated for |
179 | * structures that would normally require ZONE_NORMAL. The memory is | 179 | * structures that would normally require ZONE_NORMAL. The memory is |
180 | * allocated with alloc_remap() and callers should be prepared to allocate | 180 | * allocated with alloc_remap() and callers should be prepared to allocate |
181 | * from the bootmem allocator instead. | 181 | * from the bootmem allocator instead. |
182 | */ | 182 | */ |
183 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | 183 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; |
184 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | 184 | static void *node_remap_end_vaddr[MAX_NUMNODES]; |
185 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | 185 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; |
186 | static unsigned long node_remap_offset[MAX_NUMNODES]; | 186 | static unsigned long node_remap_offset[MAX_NUMNODES]; |
187 | 187 | ||
188 | void *alloc_remap(int nid, unsigned long size) | 188 | void *alloc_remap(int nid, unsigned long size) |
189 | { | 189 | { |
190 | void *allocation = node_remap_alloc_vaddr[nid]; | 190 | void *allocation = node_remap_alloc_vaddr[nid]; |
191 | 191 | ||
192 | size = ALIGN(size, L1_CACHE_BYTES); | 192 | size = ALIGN(size, L1_CACHE_BYTES); |
193 | 193 | ||
194 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) | 194 | if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) |
195 | return 0; | 195 | return 0; |
196 | 196 | ||
197 | node_remap_alloc_vaddr[nid] += size; | 197 | node_remap_alloc_vaddr[nid] += size; |
198 | memset(allocation, 0, size); | 198 | memset(allocation, 0, size); |
199 | 199 | ||
200 | return allocation; | 200 | return allocation; |
201 | } | 201 | } |
202 | 202 | ||
203 | void __init remap_numa_kva(void) | 203 | static void __init remap_numa_kva(void) |
204 | { | 204 | { |
205 | void *vaddr; | 205 | void *vaddr; |
206 | unsigned long pfn; | 206 | unsigned long pfn; |
207 | int node; | 207 | int node; |
208 | 208 | ||
209 | for_each_online_node(node) { | 209 | for_each_online_node(node) { |
210 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); | 210 | printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); |
211 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { | 211 | for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { |
212 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); | 212 | vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); |
213 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", | 213 | printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", |
214 | (unsigned long)vaddr, | 214 | (unsigned long)vaddr, |
215 | node_remap_start_pfn[node] + pfn); | 215 | node_remap_start_pfn[node] + pfn); |
216 | set_pmd_pfn((ulong) vaddr, | 216 | set_pmd_pfn((ulong) vaddr, |
217 | node_remap_start_pfn[node] + pfn, | 217 | node_remap_start_pfn[node] + pfn, |
218 | PAGE_KERNEL_LARGE); | 218 | PAGE_KERNEL_LARGE); |
219 | } | 219 | } |
220 | } | 220 | } |
221 | } | 221 | } |
222 | 222 | ||
223 | static unsigned long calculate_numa_remap_pages(void) | 223 | static unsigned long calculate_numa_remap_pages(void) |
224 | { | 224 | { |
225 | int nid; | 225 | int nid; |
226 | unsigned long size, reserve_pages = 0; | 226 | unsigned long size, reserve_pages = 0; |
227 | 227 | ||
228 | for_each_online_node(nid) { | 228 | for_each_online_node(nid) { |
229 | u64 node_kva_target; | 229 | u64 node_kva_target; |
230 | u64 node_kva_final; | 230 | u64 node_kva_final; |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * The acpi/srat node info can show hot-add memroy zones | 233 | * The acpi/srat node info can show hot-add memroy zones |
234 | * where memory could be added but not currently present. | 234 | * where memory could be added but not currently present. |
235 | */ | 235 | */ |
236 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", | 236 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", |
237 | nid, node_start_pfn[nid], node_end_pfn[nid]); | 237 | nid, node_start_pfn[nid], node_end_pfn[nid]); |
238 | if (node_start_pfn[nid] > max_pfn) | 238 | if (node_start_pfn[nid] > max_pfn) |
239 | continue; | 239 | continue; |
240 | if (!node_end_pfn[nid]) | 240 | if (!node_end_pfn[nid]) |
241 | continue; | 241 | continue; |
242 | if (node_end_pfn[nid] > max_pfn) | 242 | if (node_end_pfn[nid] > max_pfn) |
243 | node_end_pfn[nid] = max_pfn; | 243 | node_end_pfn[nid] = max_pfn; |
244 | 244 | ||
245 | /* ensure the remap includes space for the pgdat. */ | 245 | /* ensure the remap includes space for the pgdat. */ |
246 | size = node_remap_size[nid] + sizeof(pg_data_t); | 246 | size = node_remap_size[nid] + sizeof(pg_data_t); |
247 | 247 | ||
248 | /* convert size to large (pmd size) pages, rounding up */ | 248 | /* convert size to large (pmd size) pages, rounding up */ |
249 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; | 249 | size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; |
250 | /* now the roundup is correct, convert to PAGE_SIZE pages */ | 250 | /* now the roundup is correct, convert to PAGE_SIZE pages */ |
251 | size = size * PTRS_PER_PTE; | 251 | size = size * PTRS_PER_PTE; |
252 | 252 | ||
253 | node_kva_target = round_down(node_end_pfn[nid] - size, | 253 | node_kva_target = round_down(node_end_pfn[nid] - size, |
254 | PTRS_PER_PTE); | 254 | PTRS_PER_PTE); |
255 | node_kva_target <<= PAGE_SHIFT; | 255 | node_kva_target <<= PAGE_SHIFT; |
256 | do { | 256 | do { |
257 | node_kva_final = find_e820_area(node_kva_target, | 257 | node_kva_final = find_e820_area(node_kva_target, |
258 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, | 258 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, |
259 | ((u64)size)<<PAGE_SHIFT, | 259 | ((u64)size)<<PAGE_SHIFT, |
260 | LARGE_PAGE_BYTES); | 260 | LARGE_PAGE_BYTES); |
261 | node_kva_target -= LARGE_PAGE_BYTES; | 261 | node_kva_target -= LARGE_PAGE_BYTES; |
262 | } while (node_kva_final == -1ULL && | 262 | } while (node_kva_final == -1ULL && |
263 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | 263 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); |
264 | 264 | ||
265 | if (node_kva_final == -1ULL) | 265 | if (node_kva_final == -1ULL) |
266 | panic("Can not get kva ram\n"); | 266 | panic("Can not get kva ram\n"); |
267 | 267 | ||
268 | node_remap_size[nid] = size; | 268 | node_remap_size[nid] = size; |
269 | node_remap_offset[nid] = reserve_pages; | 269 | node_remap_offset[nid] = reserve_pages; |
270 | reserve_pages += size; | 270 | reserve_pages += size; |
271 | printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" | 271 | printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" |
272 | " node %d at %llx\n", | 272 | " node %d at %llx\n", |
273 | size, nid, node_kva_final>>PAGE_SHIFT); | 273 | size, nid, node_kva_final>>PAGE_SHIFT); |
274 | 274 | ||
275 | /* | 275 | /* |
276 | * prevent kva address below max_low_pfn want it on system | 276 | * prevent kva address below max_low_pfn want it on system |
277 | * with less memory later. | 277 | * with less memory later. |
278 | * layout will be: KVA address , KVA RAM | 278 | * layout will be: KVA address , KVA RAM |
279 | * | 279 | * |
280 | * we are supposed to only record the one less then max_low_pfn | 280 | * we are supposed to only record the one less then max_low_pfn |
281 | * but we could have some hole in high memory, and it will only | 281 | * but we could have some hole in high memory, and it will only |
282 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide | 282 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide |
283 | * to use it as free. | 283 | * to use it as free. |
284 | * So reserve_early here, hope we don't run out of that array | 284 | * So reserve_early here, hope we don't run out of that array |
285 | */ | 285 | */ |
286 | reserve_early(node_kva_final, | 286 | reserve_early(node_kva_final, |
287 | node_kva_final+(((u64)size)<<PAGE_SHIFT), | 287 | node_kva_final+(((u64)size)<<PAGE_SHIFT), |
288 | "KVA RAM"); | 288 | "KVA RAM"); |
289 | 289 | ||
290 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; | 290 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; |
291 | remove_active_range(nid, node_remap_start_pfn[nid], | 291 | remove_active_range(nid, node_remap_start_pfn[nid], |
292 | node_remap_start_pfn[nid] + size); | 292 | node_remap_start_pfn[nid] + size); |
293 | } | 293 | } |
294 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", | 294 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", |
295 | reserve_pages); | 295 | reserve_pages); |
296 | return reserve_pages; | 296 | return reserve_pages; |
297 | } | 297 | } |
298 | 298 | ||
299 | static void init_remap_allocator(int nid) | 299 | static void init_remap_allocator(int nid) |
300 | { | 300 | { |
301 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 301 | node_remap_start_vaddr[nid] = pfn_to_kaddr( |
302 | kva_start_pfn + node_remap_offset[nid]); | 302 | kva_start_pfn + node_remap_offset[nid]); |
303 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | 303 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + |
304 | (node_remap_size[nid] * PAGE_SIZE); | 304 | (node_remap_size[nid] * PAGE_SIZE); |
305 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | 305 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + |
306 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | 306 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); |
307 | 307 | ||
308 | printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, | 308 | printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, |
309 | (ulong) node_remap_start_vaddr[nid], | 309 | (ulong) node_remap_start_vaddr[nid], |
310 | (ulong) node_remap_end_vaddr[nid]); | 310 | (ulong) node_remap_end_vaddr[nid]); |
311 | } | 311 | } |
312 | 312 | ||
313 | void __init initmem_init(unsigned long start_pfn, | 313 | void __init initmem_init(unsigned long start_pfn, |
314 | unsigned long end_pfn) | 314 | unsigned long end_pfn) |
315 | { | 315 | { |
316 | int nid; | 316 | int nid; |
317 | long kva_target_pfn; | 317 | long kva_target_pfn; |
318 | 318 | ||
319 | /* | 319 | /* |
320 | * When mapping a NUMA machine we allocate the node_mem_map arrays | 320 | * When mapping a NUMA machine we allocate the node_mem_map arrays |
321 | * from node local memory. They are then mapped directly into KVA | 321 | * from node local memory. They are then mapped directly into KVA |
322 | * between zone normal and vmalloc space. Calculate the size of | 322 | * between zone normal and vmalloc space. Calculate the size of |
323 | * this space and use it to adjust the boundary between ZONE_NORMAL | 323 | * this space and use it to adjust the boundary between ZONE_NORMAL |
324 | * and ZONE_HIGHMEM. | 324 | * and ZONE_HIGHMEM. |
325 | */ | 325 | */ |
326 | 326 | ||
327 | remove_all_active_ranges(); | 327 | remove_all_active_ranges(); |
328 | get_memcfg_numa(); | 328 | get_memcfg_numa(); |
329 | 329 | ||
330 | kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); | 330 | kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); |
331 | 331 | ||
332 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); | 332 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); |
333 | do { | 333 | do { |
334 | kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, | 334 | kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, |
335 | max_low_pfn<<PAGE_SHIFT, | 335 | max_low_pfn<<PAGE_SHIFT, |
336 | kva_pages<<PAGE_SHIFT, | 336 | kva_pages<<PAGE_SHIFT, |
337 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; | 337 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; |
338 | kva_target_pfn -= PTRS_PER_PTE; | 338 | kva_target_pfn -= PTRS_PER_PTE; |
339 | } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); | 339 | } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); |
340 | 340 | ||
341 | if (kva_start_pfn == -1UL) | 341 | if (kva_start_pfn == -1UL) |
342 | panic("Can not get kva space\n"); | 342 | panic("Can not get kva space\n"); |
343 | 343 | ||
344 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", | 344 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", |
345 | kva_start_pfn, max_low_pfn); | 345 | kva_start_pfn, max_low_pfn); |
346 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); | 346 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); |
347 | 347 | ||
348 | /* avoid clash with initrd */ | 348 | /* avoid clash with initrd */ |
349 | reserve_early(kva_start_pfn<<PAGE_SHIFT, | 349 | reserve_early(kva_start_pfn<<PAGE_SHIFT, |
350 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | 350 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, |
351 | "KVA PG"); | 351 | "KVA PG"); |
352 | #ifdef CONFIG_HIGHMEM | 352 | #ifdef CONFIG_HIGHMEM |
353 | highstart_pfn = highend_pfn = max_pfn; | 353 | highstart_pfn = highend_pfn = max_pfn; |
354 | if (max_pfn > max_low_pfn) | 354 | if (max_pfn > max_low_pfn) |
355 | highstart_pfn = max_low_pfn; | 355 | highstart_pfn = max_low_pfn; |
356 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 356 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
357 | pages_to_mb(highend_pfn - highstart_pfn)); | 357 | pages_to_mb(highend_pfn - highstart_pfn)); |
358 | num_physpages = highend_pfn; | 358 | num_physpages = highend_pfn; |
359 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 359 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
360 | #else | 360 | #else |
361 | num_physpages = max_low_pfn; | 361 | num_physpages = max_low_pfn; |
362 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 362 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
363 | #endif | 363 | #endif |
364 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | 364 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", |
365 | pages_to_mb(max_low_pfn)); | 365 | pages_to_mb(max_low_pfn)); |
366 | printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", | 366 | printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", |
367 | max_low_pfn, highstart_pfn); | 367 | max_low_pfn, highstart_pfn); |
368 | 368 | ||
369 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", | 369 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", |
370 | (ulong) pfn_to_kaddr(max_low_pfn)); | 370 | (ulong) pfn_to_kaddr(max_low_pfn)); |
371 | for_each_online_node(nid) { | 371 | for_each_online_node(nid) { |
372 | init_remap_allocator(nid); | 372 | init_remap_allocator(nid); |
373 | 373 | ||
374 | allocate_pgdat(nid); | 374 | allocate_pgdat(nid); |
375 | } | 375 | } |
376 | remap_numa_kva(); | ||
377 | |||
376 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", | 378 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", |
377 | (ulong) pfn_to_kaddr(highstart_pfn)); | 379 | (ulong) pfn_to_kaddr(highstart_pfn)); |
378 | for_each_online_node(nid) | 380 | for_each_online_node(nid) |
379 | propagate_e820_map_node(nid); | 381 | propagate_e820_map_node(nid); |
380 | 382 | ||
381 | memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); | 383 | for_each_online_node(nid) |
384 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
385 | |||
382 | NODE_DATA(0)->bdata = &node0_bdata; | 386 | NODE_DATA(0)->bdata = &node0_bdata; |
383 | setup_bootmem_allocator(); | 387 | setup_bootmem_allocator(); |
384 | } | 388 | } |
385 | 389 | ||
386 | void __init zone_sizes_init(void) | 390 | void __init zone_sizes_init(void) |
387 | { | 391 | { |
388 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 392 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
389 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 393 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
390 | max_zone_pfns[ZONE_DMA] = | 394 | max_zone_pfns[ZONE_DMA] = |
391 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 395 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
392 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 396 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
393 | #ifdef CONFIG_HIGHMEM | 397 | #ifdef CONFIG_HIGHMEM |
394 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 398 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
395 | #endif | 399 | #endif |
396 | 400 | ||
397 | free_area_init_nodes(max_zone_pfns); | 401 | free_area_init_nodes(max_zone_pfns); |
398 | return; | 402 | return; |
399 | } | 403 | } |
400 | 404 | ||
401 | void __init set_highmem_pages_init(void) | 405 | void __init set_highmem_pages_init(void) |
402 | { | 406 | { |
403 | #ifdef CONFIG_HIGHMEM | 407 | #ifdef CONFIG_HIGHMEM |
404 | struct zone *zone; | 408 | struct zone *zone; |
405 | int nid; | 409 | int nid; |
406 | 410 | ||
407 | for_each_zone(zone) { | 411 | for_each_zone(zone) { |
408 | unsigned long zone_start_pfn, zone_end_pfn; | 412 | unsigned long zone_start_pfn, zone_end_pfn; |
409 | 413 | ||
410 | if (!is_highmem(zone)) | 414 | if (!is_highmem(zone)) |
411 | continue; | 415 | continue; |
412 | 416 | ||
413 | zone_start_pfn = zone->zone_start_pfn; | 417 | zone_start_pfn = zone->zone_start_pfn; |
414 | zone_end_pfn = zone_start_pfn + zone->spanned_pages; | 418 | zone_end_pfn = zone_start_pfn + zone->spanned_pages; |
415 | 419 | ||
416 | nid = zone_to_nid(zone); | 420 | nid = zone_to_nid(zone); |
417 | printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", | 421 | printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", |
418 | zone->name, nid, zone_start_pfn, zone_end_pfn); | 422 | zone->name, nid, zone_start_pfn, zone_end_pfn); |
419 | 423 | ||
420 | add_highpages_with_active_regions(nid, zone_start_pfn, | 424 | add_highpages_with_active_regions(nid, zone_start_pfn, |
421 | zone_end_pfn); | 425 | zone_end_pfn); |
422 | } | 426 | } |
423 | totalram_pages += totalhigh_pages; | 427 | totalram_pages += totalhigh_pages; |
424 | #endif | 428 | #endif |
425 | } | 429 | } |
426 | 430 | ||
427 | #ifdef CONFIG_MEMORY_HOTPLUG | 431 | #ifdef CONFIG_MEMORY_HOTPLUG |
428 | static int paddr_to_nid(u64 addr) | 432 | static int paddr_to_nid(u64 addr) |
429 | { | 433 | { |
430 | int nid; | 434 | int nid; |
431 | unsigned long pfn = PFN_DOWN(addr); | 435 | unsigned long pfn = PFN_DOWN(addr); |
432 | 436 | ||
433 | for_each_node(nid) | 437 | for_each_node(nid) |
434 | if (node_start_pfn[nid] <= pfn && | 438 | if (node_start_pfn[nid] <= pfn && |
435 | pfn < node_end_pfn[nid]) | 439 | pfn < node_end_pfn[nid]) |
436 | return nid; | 440 | return nid; |
437 | 441 | ||
438 | return -1; | 442 | return -1; |
439 | } | 443 | } |
440 | 444 | ||
441 | /* | 445 | /* |
442 | * This function is used to ask node id BEFORE memmap and mem_section's | 446 | * This function is used to ask node id BEFORE memmap and mem_section's |
443 | * initialization (pfn_to_nid() can't be used yet). | 447 | * initialization (pfn_to_nid() can't be used yet). |
444 | * If _PXM is not defined on ACPI's DSDT, node id must be found by this. | 448 | * If _PXM is not defined on ACPI's DSDT, node id must be found by this. |
445 | */ | 449 | */ |
446 | int memory_add_physaddr_to_nid(u64 addr) | 450 | int memory_add_physaddr_to_nid(u64 addr) |
447 | { | 451 | { |
448 | int nid = paddr_to_nid(addr); | 452 | int nid = paddr_to_nid(addr); |
449 | return (nid >= 0) ? nid : 0; | 453 | return (nid >= 0) ? nid : 0; |
450 | } | 454 | } |
451 | 455 | ||
452 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 456 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
453 | #endif | 457 | #endif |
454 | 458 | ||
455 | #if defined(CONFIG_ACPI_NUMA) && !defined(CONFIG_HAVE_ARCH_PARSE_SRAT) | 459 | #if defined(CONFIG_ACPI_NUMA) && !defined(CONFIG_HAVE_ARCH_PARSE_SRAT) |
456 | /* | 460 | /* |
457 | * Dummy on 32-bit, for now: | 461 | * Dummy on 32-bit, for now: |
458 | */ | 462 | */ |
459 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | 463 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
460 | { | 464 | { |
461 | } | 465 | } |
462 | 466 | ||
463 | void __init | 467 | void __init |
464 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | 468 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) |
465 | { | 469 | { |
466 | } | 470 | } |
467 | 471 | ||
468 | void __init acpi_numa_arch_fixup(void) | 472 | void __init acpi_numa_arch_fixup(void) |
469 | { | 473 | { |
470 | } | 474 | } |
471 | #endif | 475 | #endif |
472 | 476 |
arch/x86/mm/init_32.c
1 | /* | 1 | /* |
2 | * | 2 | * |
3 | * Copyright (C) 1995 Linus Torvalds | 3 | * Copyright (C) 1995 Linus Torvalds |
4 | * | 4 | * |
5 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | 5 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/signal.h> | 9 | #include <linux/signal.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
13 | #include <linux/string.h> | 13 | #include <linux/string.h> |
14 | #include <linux/types.h> | 14 | #include <linux/types.h> |
15 | #include <linux/ptrace.h> | 15 | #include <linux/ptrace.h> |
16 | #include <linux/mman.h> | 16 | #include <linux/mman.h> |
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/hugetlb.h> | 18 | #include <linux/hugetlb.h> |
19 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/pfn.h> | 24 | #include <linux/pfn.h> |
25 | #include <linux/poison.h> | 25 | #include <linux/poison.h> |
26 | #include <linux/bootmem.h> | 26 | #include <linux/bootmem.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
29 | #include <linux/memory_hotplug.h> | 29 | #include <linux/memory_hotplug.h> |
30 | #include <linux/initrd.h> | 30 | #include <linux/initrd.h> |
31 | #include <linux/cpumask.h> | 31 | #include <linux/cpumask.h> |
32 | 32 | ||
33 | #include <asm/asm.h> | 33 | #include <asm/asm.h> |
34 | #include <asm/processor.h> | 34 | #include <asm/processor.h> |
35 | #include <asm/system.h> | 35 | #include <asm/system.h> |
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | #include <asm/pgtable.h> | 37 | #include <asm/pgtable.h> |
38 | #include <asm/dma.h> | 38 | #include <asm/dma.h> |
39 | #include <asm/fixmap.h> | 39 | #include <asm/fixmap.h> |
40 | #include <asm/e820.h> | 40 | #include <asm/e820.h> |
41 | #include <asm/apic.h> | 41 | #include <asm/apic.h> |
42 | #include <asm/bugs.h> | 42 | #include <asm/bugs.h> |
43 | #include <asm/tlb.h> | 43 | #include <asm/tlb.h> |
44 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
45 | #include <asm/pgalloc.h> | 45 | #include <asm/pgalloc.h> |
46 | #include <asm/sections.h> | 46 | #include <asm/sections.h> |
47 | #include <asm/paravirt.h> | 47 | #include <asm/paravirt.h> |
48 | #include <asm/setup.h> | 48 | #include <asm/setup.h> |
49 | #include <asm/cacheflush.h> | 49 | #include <asm/cacheflush.h> |
50 | 50 | ||
51 | unsigned int __VMALLOC_RESERVE = 128 << 20; | 51 | unsigned int __VMALLOC_RESERVE = 128 << 20; |
52 | 52 | ||
53 | unsigned long max_pfn_mapped; | 53 | unsigned long max_pfn_mapped; |
54 | 54 | ||
55 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 55 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
56 | unsigned long highstart_pfn, highend_pfn; | 56 | unsigned long highstart_pfn, highend_pfn; |
57 | 57 | ||
58 | static noinline int do_test_wp_bit(void); | 58 | static noinline int do_test_wp_bit(void); |
59 | 59 | ||
60 | 60 | ||
61 | static unsigned long __initdata table_start; | 61 | static unsigned long __initdata table_start; |
62 | static unsigned long __meminitdata table_end; | 62 | static unsigned long __meminitdata table_end; |
63 | static unsigned long __meminitdata table_top; | 63 | static unsigned long __meminitdata table_top; |
64 | 64 | ||
65 | static int __initdata after_init_bootmem; | 65 | static int __initdata after_init_bootmem; |
66 | 66 | ||
67 | static __init void *alloc_low_page(unsigned long *phys) | 67 | static __init void *alloc_low_page(unsigned long *phys) |
68 | { | 68 | { |
69 | unsigned long pfn = table_end++; | 69 | unsigned long pfn = table_end++; |
70 | void *adr; | 70 | void *adr; |
71 | 71 | ||
72 | if (pfn >= table_top) | 72 | if (pfn >= table_top) |
73 | panic("alloc_low_page: ran out of memory"); | 73 | panic("alloc_low_page: ran out of memory"); |
74 | 74 | ||
75 | adr = __va(pfn * PAGE_SIZE); | 75 | adr = __va(pfn * PAGE_SIZE); |
76 | memset(adr, 0, PAGE_SIZE); | 76 | memset(adr, 0, PAGE_SIZE); |
77 | *phys = pfn * PAGE_SIZE; | 77 | *phys = pfn * PAGE_SIZE; |
78 | return adr; | 78 | return adr; |
79 | } | 79 | } |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * Creates a middle page table and puts a pointer to it in the | 82 | * Creates a middle page table and puts a pointer to it in the |
83 | * given global directory entry. This only returns the gd entry | 83 | * given global directory entry. This only returns the gd entry |
84 | * in non-PAE compilation mode, since the middle layer is folded. | 84 | * in non-PAE compilation mode, since the middle layer is folded. |
85 | */ | 85 | */ |
86 | static pmd_t * __init one_md_table_init(pgd_t *pgd) | 86 | static pmd_t * __init one_md_table_init(pgd_t *pgd) |
87 | { | 87 | { |
88 | pud_t *pud; | 88 | pud_t *pud; |
89 | pmd_t *pmd_table; | 89 | pmd_t *pmd_table; |
90 | 90 | ||
91 | #ifdef CONFIG_X86_PAE | 91 | #ifdef CONFIG_X86_PAE |
92 | unsigned long phys; | 92 | unsigned long phys; |
93 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 93 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
94 | if (after_init_bootmem) | 94 | if (after_init_bootmem) |
95 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 95 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
96 | else | 96 | else |
97 | pmd_table = (pmd_t *)alloc_low_page(&phys); | 97 | pmd_table = (pmd_t *)alloc_low_page(&phys); |
98 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 98 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
99 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 99 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
100 | pud = pud_offset(pgd, 0); | 100 | pud = pud_offset(pgd, 0); |
101 | BUG_ON(pmd_table != pmd_offset(pud, 0)); | 101 | BUG_ON(pmd_table != pmd_offset(pud, 0)); |
102 | } | 102 | } |
103 | #endif | 103 | #endif |
104 | pud = pud_offset(pgd, 0); | 104 | pud = pud_offset(pgd, 0); |
105 | pmd_table = pmd_offset(pud, 0); | 105 | pmd_table = pmd_offset(pud, 0); |
106 | 106 | ||
107 | return pmd_table; | 107 | return pmd_table; |
108 | } | 108 | } |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Create a page table and place a pointer to it in a middle page | 111 | * Create a page table and place a pointer to it in a middle page |
112 | * directory entry: | 112 | * directory entry: |
113 | */ | 113 | */ |
114 | static pte_t * __init one_page_table_init(pmd_t *pmd) | 114 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
115 | { | 115 | { |
116 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { | 116 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
117 | pte_t *page_table = NULL; | 117 | pte_t *page_table = NULL; |
118 | 118 | ||
119 | if (after_init_bootmem) { | 119 | if (after_init_bootmem) { |
120 | #ifdef CONFIG_DEBUG_PAGEALLOC | 120 | #ifdef CONFIG_DEBUG_PAGEALLOC |
121 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 121 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
122 | #endif | 122 | #endif |
123 | if (!page_table) | 123 | if (!page_table) |
124 | page_table = | 124 | page_table = |
125 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 125 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
126 | } else { | 126 | } else { |
127 | unsigned long phys; | 127 | unsigned long phys; |
128 | page_table = (pte_t *)alloc_low_page(&phys); | 128 | page_table = (pte_t *)alloc_low_page(&phys); |
129 | } | 129 | } |
130 | 130 | ||
131 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 131 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
132 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 132 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
133 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); | 133 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); |
134 | } | 134 | } |
135 | 135 | ||
136 | return pte_offset_kernel(pmd, 0); | 136 | return pte_offset_kernel(pmd, 0); |
137 | } | 137 | } |
138 | 138 | ||
139 | /* | 139 | /* |
140 | * This function initializes a certain range of kernel virtual memory | 140 | * This function initializes a certain range of kernel virtual memory |
141 | * with new bootmem page tables, everywhere page tables are missing in | 141 | * with new bootmem page tables, everywhere page tables are missing in |
142 | * the given range. | 142 | * the given range. |
143 | * | 143 | * |
144 | * NOTE: The pagetables are allocated contiguous on the physical space | 144 | * NOTE: The pagetables are allocated contiguous on the physical space |
145 | * so we can cache the place of the first one and move around without | 145 | * so we can cache the place of the first one and move around without |
146 | * checking the pgd every time. | 146 | * checking the pgd every time. |
147 | */ | 147 | */ |
148 | static void __init | 148 | static void __init |
149 | page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | 149 | page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) |
150 | { | 150 | { |
151 | int pgd_idx, pmd_idx; | 151 | int pgd_idx, pmd_idx; |
152 | unsigned long vaddr; | 152 | unsigned long vaddr; |
153 | pgd_t *pgd; | 153 | pgd_t *pgd; |
154 | pmd_t *pmd; | 154 | pmd_t *pmd; |
155 | 155 | ||
156 | vaddr = start; | 156 | vaddr = start; |
157 | pgd_idx = pgd_index(vaddr); | 157 | pgd_idx = pgd_index(vaddr); |
158 | pmd_idx = pmd_index(vaddr); | 158 | pmd_idx = pmd_index(vaddr); |
159 | pgd = pgd_base + pgd_idx; | 159 | pgd = pgd_base + pgd_idx; |
160 | 160 | ||
161 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | 161 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { |
162 | pmd = one_md_table_init(pgd); | 162 | pmd = one_md_table_init(pgd); |
163 | pmd = pmd + pmd_index(vaddr); | 163 | pmd = pmd + pmd_index(vaddr); |
164 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | 164 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); |
165 | pmd++, pmd_idx++) { | 165 | pmd++, pmd_idx++) { |
166 | one_page_table_init(pmd); | 166 | one_page_table_init(pmd); |
167 | 167 | ||
168 | vaddr += PMD_SIZE; | 168 | vaddr += PMD_SIZE; |
169 | } | 169 | } |
170 | pmd_idx = 0; | 170 | pmd_idx = 0; |
171 | } | 171 | } |
172 | } | 172 | } |
173 | 173 | ||
174 | static inline int is_kernel_text(unsigned long addr) | 174 | static inline int is_kernel_text(unsigned long addr) |
175 | { | 175 | { |
176 | if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) | 176 | if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) |
177 | return 1; | 177 | return 1; |
178 | return 0; | 178 | return 0; |
179 | } | 179 | } |
180 | 180 | ||
181 | /* | 181 | /* |
182 | * This maps the physical memory to kernel virtual address space, a total | 182 | * This maps the physical memory to kernel virtual address space, a total |
183 | * of max_low_pfn pages, by creating page tables starting from address | 183 | * of max_low_pfn pages, by creating page tables starting from address |
184 | * PAGE_OFFSET: | 184 | * PAGE_OFFSET: |
185 | */ | 185 | */ |
186 | static void __init kernel_physical_mapping_init(pgd_t *pgd_base, | 186 | static void __init kernel_physical_mapping_init(pgd_t *pgd_base, |
187 | unsigned long start, | 187 | unsigned long start, |
188 | unsigned long end) | 188 | unsigned long end) |
189 | { | 189 | { |
190 | int pgd_idx, pmd_idx, pte_ofs; | 190 | int pgd_idx, pmd_idx, pte_ofs; |
191 | unsigned long pfn; | 191 | unsigned long pfn; |
192 | pgd_t *pgd; | 192 | pgd_t *pgd; |
193 | pmd_t *pmd; | 193 | pmd_t *pmd; |
194 | pte_t *pte; | 194 | pte_t *pte; |
195 | unsigned pages_2m = 0, pages_4k = 0; | 195 | unsigned pages_2m = 0, pages_4k = 0; |
196 | unsigned limit_pfn = end >> PAGE_SHIFT; | 196 | unsigned limit_pfn = end >> PAGE_SHIFT; |
197 | 197 | ||
198 | pgd_idx = pgd_index(PAGE_OFFSET); | 198 | pgd_idx = pgd_index(PAGE_OFFSET); |
199 | pgd = pgd_base + pgd_idx; | 199 | pgd = pgd_base + pgd_idx; |
200 | pfn = start >> PAGE_SHIFT; | 200 | pfn = start >> PAGE_SHIFT; |
201 | 201 | ||
202 | for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { | 202 | for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { |
203 | pmd = one_md_table_init(pgd); | 203 | pmd = one_md_table_init(pgd); |
204 | if (pfn >= limit_pfn) | 204 | if (pfn >= limit_pfn) |
205 | continue; | 205 | continue; |
206 | 206 | ||
207 | for (pmd_idx = 0; | 207 | for (pmd_idx = 0; |
208 | pmd_idx < PTRS_PER_PMD && pfn < limit_pfn; | 208 | pmd_idx < PTRS_PER_PMD && pfn < limit_pfn; |
209 | pmd++, pmd_idx++) { | 209 | pmd++, pmd_idx++) { |
210 | unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; | 210 | unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * Map with big pages if possible, otherwise | 213 | * Map with big pages if possible, otherwise |
214 | * create normal page tables: | 214 | * create normal page tables: |
215 | * | 215 | * |
216 | * Don't use a large page for the first 2/4MB of memory | 216 | * Don't use a large page for the first 2/4MB of memory |
217 | * because there are often fixed size MTRRs in there | 217 | * because there are often fixed size MTRRs in there |
218 | * and overlapping MTRRs into large pages can cause | 218 | * and overlapping MTRRs into large pages can cause |
219 | * slowdowns. | 219 | * slowdowns. |
220 | */ | 220 | */ |
221 | if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) { | 221 | if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) { |
222 | unsigned int addr2; | 222 | unsigned int addr2; |
223 | pgprot_t prot = PAGE_KERNEL_LARGE; | 223 | pgprot_t prot = PAGE_KERNEL_LARGE; |
224 | 224 | ||
225 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + | 225 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + |
226 | PAGE_OFFSET + PAGE_SIZE-1; | 226 | PAGE_OFFSET + PAGE_SIZE-1; |
227 | 227 | ||
228 | if (is_kernel_text(addr) || | 228 | if (is_kernel_text(addr) || |
229 | is_kernel_text(addr2)) | 229 | is_kernel_text(addr2)) |
230 | prot = PAGE_KERNEL_LARGE_EXEC; | 230 | prot = PAGE_KERNEL_LARGE_EXEC; |
231 | 231 | ||
232 | pages_2m++; | 232 | pages_2m++; |
233 | set_pmd(pmd, pfn_pmd(pfn, prot)); | 233 | set_pmd(pmd, pfn_pmd(pfn, prot)); |
234 | 234 | ||
235 | pfn += PTRS_PER_PTE; | 235 | pfn += PTRS_PER_PTE; |
236 | max_pfn_mapped = pfn; | 236 | max_pfn_mapped = pfn; |
237 | continue; | 237 | continue; |
238 | } | 238 | } |
239 | pte = one_page_table_init(pmd); | 239 | pte = one_page_table_init(pmd); |
240 | 240 | ||
241 | for (pte_ofs = 0; | 241 | for (pte_ofs = 0; |
242 | pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; | 242 | pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; |
243 | pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { | 243 | pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { |
244 | pgprot_t prot = PAGE_KERNEL; | 244 | pgprot_t prot = PAGE_KERNEL; |
245 | 245 | ||
246 | if (is_kernel_text(addr)) | 246 | if (is_kernel_text(addr)) |
247 | prot = PAGE_KERNEL_EXEC; | 247 | prot = PAGE_KERNEL_EXEC; |
248 | 248 | ||
249 | pages_4k++; | 249 | pages_4k++; |
250 | set_pte(pte, pfn_pte(pfn, prot)); | 250 | set_pte(pte, pfn_pte(pfn, prot)); |
251 | } | 251 | } |
252 | max_pfn_mapped = pfn; | 252 | max_pfn_mapped = pfn; |
253 | } | 253 | } |
254 | } | 254 | } |
255 | update_page_count(PG_LEVEL_2M, pages_2m); | 255 | update_page_count(PG_LEVEL_2M, pages_2m); |
256 | update_page_count(PG_LEVEL_4K, pages_4k); | 256 | update_page_count(PG_LEVEL_4K, pages_4k); |
257 | } | 257 | } |
258 | 258 | ||
259 | /* | 259 | /* |
260 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | 260 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address |
261 | * is valid. The argument is a physical page number. | 261 | * is valid. The argument is a physical page number. |
262 | * | 262 | * |
263 | * | 263 | * |
264 | * On x86, access has to be given to the first megabyte of ram because that area | 264 | * On x86, access has to be given to the first megabyte of ram because that area |
265 | * contains bios code and data regions used by X and dosemu and similar apps. | 265 | * contains bios code and data regions used by X and dosemu and similar apps. |
266 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | 266 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI |
267 | * mmio resources as well as potential bios/acpi data regions. | 267 | * mmio resources as well as potential bios/acpi data regions. |
268 | */ | 268 | */ |
269 | int devmem_is_allowed(unsigned long pagenr) | 269 | int devmem_is_allowed(unsigned long pagenr) |
270 | { | 270 | { |
271 | if (pagenr <= 256) | 271 | if (pagenr <= 256) |
272 | return 1; | 272 | return 1; |
273 | if (!page_is_ram(pagenr)) | 273 | if (!page_is_ram(pagenr)) |
274 | return 1; | 274 | return 1; |
275 | return 0; | 275 | return 0; |
276 | } | 276 | } |
277 | 277 | ||
278 | #ifdef CONFIG_HIGHMEM | 278 | #ifdef CONFIG_HIGHMEM |
279 | pte_t *kmap_pte; | 279 | pte_t *kmap_pte; |
280 | pgprot_t kmap_prot; | 280 | pgprot_t kmap_prot; |
281 | 281 | ||
282 | static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) | 282 | static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) |
283 | { | 283 | { |
284 | return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), | 284 | return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), |
285 | vaddr), vaddr), vaddr); | 285 | vaddr), vaddr), vaddr); |
286 | } | 286 | } |
287 | 287 | ||
288 | static void __init kmap_init(void) | 288 | static void __init kmap_init(void) |
289 | { | 289 | { |
290 | unsigned long kmap_vstart; | 290 | unsigned long kmap_vstart; |
291 | 291 | ||
292 | /* | 292 | /* |
293 | * Cache the first kmap pte: | 293 | * Cache the first kmap pte: |
294 | */ | 294 | */ |
295 | kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); | 295 | kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); |
296 | kmap_pte = kmap_get_fixmap_pte(kmap_vstart); | 296 | kmap_pte = kmap_get_fixmap_pte(kmap_vstart); |
297 | 297 | ||
298 | kmap_prot = PAGE_KERNEL; | 298 | kmap_prot = PAGE_KERNEL; |
299 | } | 299 | } |
300 | 300 | ||
301 | static void __init permanent_kmaps_init(pgd_t *pgd_base) | 301 | static void __init permanent_kmaps_init(pgd_t *pgd_base) |
302 | { | 302 | { |
303 | unsigned long vaddr; | 303 | unsigned long vaddr; |
304 | pgd_t *pgd; | 304 | pgd_t *pgd; |
305 | pud_t *pud; | 305 | pud_t *pud; |
306 | pmd_t *pmd; | 306 | pmd_t *pmd; |
307 | pte_t *pte; | 307 | pte_t *pte; |
308 | 308 | ||
309 | vaddr = PKMAP_BASE; | 309 | vaddr = PKMAP_BASE; |
310 | page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); | 310 | page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); |
311 | 311 | ||
312 | pgd = swapper_pg_dir + pgd_index(vaddr); | 312 | pgd = swapper_pg_dir + pgd_index(vaddr); |
313 | pud = pud_offset(pgd, vaddr); | 313 | pud = pud_offset(pgd, vaddr); |
314 | pmd = pmd_offset(pud, vaddr); | 314 | pmd = pmd_offset(pud, vaddr); |
315 | pte = pte_offset_kernel(pmd, vaddr); | 315 | pte = pte_offset_kernel(pmd, vaddr); |
316 | pkmap_page_table = pte; | 316 | pkmap_page_table = pte; |
317 | } | 317 | } |
318 | 318 | ||
319 | static void __init add_one_highpage_init(struct page *page, int pfn) | 319 | static void __init add_one_highpage_init(struct page *page, int pfn) |
320 | { | 320 | { |
321 | ClearPageReserved(page); | 321 | ClearPageReserved(page); |
322 | init_page_count(page); | 322 | init_page_count(page); |
323 | __free_page(page); | 323 | __free_page(page); |
324 | totalhigh_pages++; | 324 | totalhigh_pages++; |
325 | } | 325 | } |
326 | 326 | ||
327 | struct add_highpages_data { | 327 | struct add_highpages_data { |
328 | unsigned long start_pfn; | 328 | unsigned long start_pfn; |
329 | unsigned long end_pfn; | 329 | unsigned long end_pfn; |
330 | }; | 330 | }; |
331 | 331 | ||
332 | static int __init add_highpages_work_fn(unsigned long start_pfn, | 332 | static int __init add_highpages_work_fn(unsigned long start_pfn, |
333 | unsigned long end_pfn, void *datax) | 333 | unsigned long end_pfn, void *datax) |
334 | { | 334 | { |
335 | int node_pfn; | 335 | int node_pfn; |
336 | struct page *page; | 336 | struct page *page; |
337 | unsigned long final_start_pfn, final_end_pfn; | 337 | unsigned long final_start_pfn, final_end_pfn; |
338 | struct add_highpages_data *data; | 338 | struct add_highpages_data *data; |
339 | 339 | ||
340 | data = (struct add_highpages_data *)datax; | 340 | data = (struct add_highpages_data *)datax; |
341 | 341 | ||
342 | final_start_pfn = max(start_pfn, data->start_pfn); | 342 | final_start_pfn = max(start_pfn, data->start_pfn); |
343 | final_end_pfn = min(end_pfn, data->end_pfn); | 343 | final_end_pfn = min(end_pfn, data->end_pfn); |
344 | if (final_start_pfn >= final_end_pfn) | 344 | if (final_start_pfn >= final_end_pfn) |
345 | return 0; | 345 | return 0; |
346 | 346 | ||
347 | for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; | 347 | for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; |
348 | node_pfn++) { | 348 | node_pfn++) { |
349 | if (!pfn_valid(node_pfn)) | 349 | if (!pfn_valid(node_pfn)) |
350 | continue; | 350 | continue; |
351 | page = pfn_to_page(node_pfn); | 351 | page = pfn_to_page(node_pfn); |
352 | add_one_highpage_init(page, node_pfn); | 352 | add_one_highpage_init(page, node_pfn); |
353 | } | 353 | } |
354 | 354 | ||
355 | return 0; | 355 | return 0; |
356 | 356 | ||
357 | } | 357 | } |
358 | 358 | ||
359 | void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, | 359 | void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, |
360 | unsigned long end_pfn) | 360 | unsigned long end_pfn) |
361 | { | 361 | { |
362 | struct add_highpages_data data; | 362 | struct add_highpages_data data; |
363 | 363 | ||
364 | data.start_pfn = start_pfn; | 364 | data.start_pfn = start_pfn; |
365 | data.end_pfn = end_pfn; | 365 | data.end_pfn = end_pfn; |
366 | 366 | ||
367 | work_with_active_regions(nid, add_highpages_work_fn, &data); | 367 | work_with_active_regions(nid, add_highpages_work_fn, &data); |
368 | } | 368 | } |
369 | 369 | ||
370 | #ifndef CONFIG_NUMA | 370 | #ifndef CONFIG_NUMA |
371 | static void __init set_highmem_pages_init(void) | 371 | static void __init set_highmem_pages_init(void) |
372 | { | 372 | { |
373 | add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); | 373 | add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); |
374 | 374 | ||
375 | totalram_pages += totalhigh_pages; | 375 | totalram_pages += totalhigh_pages; |
376 | } | 376 | } |
377 | #endif /* !CONFIG_NUMA */ | 377 | #endif /* !CONFIG_NUMA */ |
378 | 378 | ||
379 | #else | 379 | #else |
380 | # define kmap_init() do { } while (0) | 380 | # define kmap_init() do { } while (0) |
381 | # define permanent_kmaps_init(pgd_base) do { } while (0) | 381 | # define permanent_kmaps_init(pgd_base) do { } while (0) |
382 | # define set_highmem_pages_init() do { } while (0) | 382 | # define set_highmem_pages_init() do { } while (0) |
383 | #endif /* CONFIG_HIGHMEM */ | 383 | #endif /* CONFIG_HIGHMEM */ |
384 | 384 | ||
385 | pteval_t __PAGE_KERNEL = _PAGE_KERNEL; | 385 | pteval_t __PAGE_KERNEL = _PAGE_KERNEL; |
386 | EXPORT_SYMBOL(__PAGE_KERNEL); | 386 | EXPORT_SYMBOL(__PAGE_KERNEL); |
387 | 387 | ||
388 | pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; | 388 | pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; |
389 | 389 | ||
390 | void __init native_pagetable_setup_start(pgd_t *base) | 390 | void __init native_pagetable_setup_start(pgd_t *base) |
391 | { | 391 | { |
392 | unsigned long pfn, va; | 392 | unsigned long pfn, va; |
393 | pgd_t *pgd; | 393 | pgd_t *pgd; |
394 | pud_t *pud; | 394 | pud_t *pud; |
395 | pmd_t *pmd; | 395 | pmd_t *pmd; |
396 | pte_t *pte; | 396 | pte_t *pte; |
397 | 397 | ||
398 | /* | 398 | /* |
399 | * Remove any mappings which extend past the end of physical | 399 | * Remove any mappings which extend past the end of physical |
400 | * memory from the boot time page table: | 400 | * memory from the boot time page table: |
401 | */ | 401 | */ |
402 | for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { | 402 | for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { |
403 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); | 403 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); |
404 | pgd = base + pgd_index(va); | 404 | pgd = base + pgd_index(va); |
405 | if (!pgd_present(*pgd)) | 405 | if (!pgd_present(*pgd)) |
406 | break; | 406 | break; |
407 | 407 | ||
408 | pud = pud_offset(pgd, va); | 408 | pud = pud_offset(pgd, va); |
409 | pmd = pmd_offset(pud, va); | 409 | pmd = pmd_offset(pud, va); |
410 | if (!pmd_present(*pmd)) | 410 | if (!pmd_present(*pmd)) |
411 | break; | 411 | break; |
412 | 412 | ||
413 | pte = pte_offset_kernel(pmd, va); | 413 | pte = pte_offset_kernel(pmd, va); |
414 | if (!pte_present(*pte)) | 414 | if (!pte_present(*pte)) |
415 | break; | 415 | break; |
416 | 416 | ||
417 | pte_clear(NULL, va, pte); | 417 | pte_clear(NULL, va, pte); |
418 | } | 418 | } |
419 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); | 419 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); |
420 | } | 420 | } |
421 | 421 | ||
422 | void __init native_pagetable_setup_done(pgd_t *base) | 422 | void __init native_pagetable_setup_done(pgd_t *base) |
423 | { | 423 | { |
424 | } | 424 | } |
425 | 425 | ||
426 | /* | 426 | /* |
427 | * Build a proper pagetable for the kernel mappings. Up until this | 427 | * Build a proper pagetable for the kernel mappings. Up until this |
428 | * point, we've been running on some set of pagetables constructed by | 428 | * point, we've been running on some set of pagetables constructed by |
429 | * the boot process. | 429 | * the boot process. |
430 | * | 430 | * |
431 | * If we're booting on native hardware, this will be a pagetable | 431 | * If we're booting on native hardware, this will be a pagetable |
432 | * constructed in arch/x86/kernel/head_32.S. The root of the | 432 | * constructed in arch/x86/kernel/head_32.S. The root of the |
433 | * pagetable will be swapper_pg_dir. | 433 | * pagetable will be swapper_pg_dir. |
434 | * | 434 | * |
435 | * If we're booting paravirtualized under a hypervisor, then there are | 435 | * If we're booting paravirtualized under a hypervisor, then there are |
436 | * more options: we may already be running PAE, and the pagetable may | 436 | * more options: we may already be running PAE, and the pagetable may |
437 | * or may not be based in swapper_pg_dir. In any case, | 437 | * or may not be based in swapper_pg_dir. In any case, |
438 | * paravirt_pagetable_setup_start() will set up swapper_pg_dir | 438 | * paravirt_pagetable_setup_start() will set up swapper_pg_dir |
439 | * appropriately for the rest of the initialization to work. | 439 | * appropriately for the rest of the initialization to work. |
440 | * | 440 | * |
441 | * In general, pagetable_init() assumes that the pagetable may already | 441 | * In general, pagetable_init() assumes that the pagetable may already |
442 | * be partially populated, and so it avoids stomping on any existing | 442 | * be partially populated, and so it avoids stomping on any existing |
443 | * mappings. | 443 | * mappings. |
444 | */ | 444 | */ |
445 | static void __init pagetable_init(void) | 445 | static void __init pagetable_init(void) |
446 | { | 446 | { |
447 | pgd_t *pgd_base = swapper_pg_dir; | 447 | pgd_t *pgd_base = swapper_pg_dir; |
448 | unsigned long vaddr, end; | 448 | unsigned long vaddr, end; |
449 | 449 | ||
450 | paravirt_pagetable_setup_start(pgd_base); | 450 | paravirt_pagetable_setup_start(pgd_base); |
451 | 451 | ||
452 | remap_numa_kva(); | ||
453 | /* | 452 | /* |
454 | * Fixed mappings, only the page table structure has to be | 453 | * Fixed mappings, only the page table structure has to be |
455 | * created - mappings will be set by set_fixmap(): | 454 | * created - mappings will be set by set_fixmap(): |
456 | */ | 455 | */ |
457 | early_ioremap_clear(); | 456 | early_ioremap_clear(); |
458 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | 457 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; |
459 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; | 458 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; |
460 | page_table_range_init(vaddr, end, pgd_base); | 459 | page_table_range_init(vaddr, end, pgd_base); |
461 | early_ioremap_reset(); | 460 | early_ioremap_reset(); |
462 | 461 | ||
463 | permanent_kmaps_init(pgd_base); | 462 | permanent_kmaps_init(pgd_base); |
464 | 463 | ||
465 | paravirt_pagetable_setup_done(pgd_base); | 464 | paravirt_pagetable_setup_done(pgd_base); |
466 | } | 465 | } |
467 | 466 | ||
468 | #ifdef CONFIG_ACPI_SLEEP | 467 | #ifdef CONFIG_ACPI_SLEEP |
469 | /* | 468 | /* |
470 | * ACPI suspend needs this for resume, because things like the intel-agp | 469 | * ACPI suspend needs this for resume, because things like the intel-agp |
471 | * driver might have split up a kernel 4MB mapping. | 470 | * driver might have split up a kernel 4MB mapping. |
472 | */ | 471 | */ |
473 | char swsusp_pg_dir[PAGE_SIZE] | 472 | char swsusp_pg_dir[PAGE_SIZE] |
474 | __attribute__ ((aligned(PAGE_SIZE))); | 473 | __attribute__ ((aligned(PAGE_SIZE))); |
475 | 474 | ||
476 | static inline void save_pg_dir(void) | 475 | static inline void save_pg_dir(void) |
477 | { | 476 | { |
478 | memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); | 477 | memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); |
479 | } | 478 | } |
480 | #else /* !CONFIG_ACPI_SLEEP */ | 479 | #else /* !CONFIG_ACPI_SLEEP */ |
481 | static inline void save_pg_dir(void) | 480 | static inline void save_pg_dir(void) |
482 | { | 481 | { |
483 | } | 482 | } |
484 | #endif /* !CONFIG_ACPI_SLEEP */ | 483 | #endif /* !CONFIG_ACPI_SLEEP */ |
485 | 484 | ||
486 | void zap_low_mappings(void) | 485 | void zap_low_mappings(void) |
487 | { | 486 | { |
488 | int i; | 487 | int i; |
489 | 488 | ||
490 | /* | 489 | /* |
491 | * Zap initial low-memory mappings. | 490 | * Zap initial low-memory mappings. |
492 | * | 491 | * |
493 | * Note that "pgd_clear()" doesn't do it for | 492 | * Note that "pgd_clear()" doesn't do it for |
494 | * us, because pgd_clear() is a no-op on i386. | 493 | * us, because pgd_clear() is a no-op on i386. |
495 | */ | 494 | */ |
496 | for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { | 495 | for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { |
497 | #ifdef CONFIG_X86_PAE | 496 | #ifdef CONFIG_X86_PAE |
498 | set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); | 497 | set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); |
499 | #else | 498 | #else |
500 | set_pgd(swapper_pg_dir+i, __pgd(0)); | 499 | set_pgd(swapper_pg_dir+i, __pgd(0)); |
501 | #endif | 500 | #endif |
502 | } | 501 | } |
503 | flush_tlb_all(); | 502 | flush_tlb_all(); |
504 | } | 503 | } |
505 | 504 | ||
506 | int nx_enabled; | 505 | int nx_enabled; |
507 | 506 | ||
508 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; | 507 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; |
509 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 508 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
510 | 509 | ||
511 | #ifdef CONFIG_X86_PAE | 510 | #ifdef CONFIG_X86_PAE |
512 | 511 | ||
513 | static int disable_nx __initdata; | 512 | static int disable_nx __initdata; |
514 | 513 | ||
515 | /* | 514 | /* |
516 | * noexec = on|off | 515 | * noexec = on|off |
517 | * | 516 | * |
518 | * Control non executable mappings. | 517 | * Control non executable mappings. |
519 | * | 518 | * |
520 | * on Enable | 519 | * on Enable |
521 | * off Disable | 520 | * off Disable |
522 | */ | 521 | */ |
523 | static int __init noexec_setup(char *str) | 522 | static int __init noexec_setup(char *str) |
524 | { | 523 | { |
525 | if (!str || !strcmp(str, "on")) { | 524 | if (!str || !strcmp(str, "on")) { |
526 | if (cpu_has_nx) { | 525 | if (cpu_has_nx) { |
527 | __supported_pte_mask |= _PAGE_NX; | 526 | __supported_pte_mask |= _PAGE_NX; |
528 | disable_nx = 0; | 527 | disable_nx = 0; |
529 | } | 528 | } |
530 | } else { | 529 | } else { |
531 | if (!strcmp(str, "off")) { | 530 | if (!strcmp(str, "off")) { |
532 | disable_nx = 1; | 531 | disable_nx = 1; |
533 | __supported_pte_mask &= ~_PAGE_NX; | 532 | __supported_pte_mask &= ~_PAGE_NX; |
534 | } else { | 533 | } else { |
535 | return -EINVAL; | 534 | return -EINVAL; |
536 | } | 535 | } |
537 | } | 536 | } |
538 | 537 | ||
539 | return 0; | 538 | return 0; |
540 | } | 539 | } |
541 | early_param("noexec", noexec_setup); | 540 | early_param("noexec", noexec_setup); |
542 | 541 | ||
543 | static void __init set_nx(void) | 542 | static void __init set_nx(void) |
544 | { | 543 | { |
545 | unsigned int v[4], l, h; | 544 | unsigned int v[4], l, h; |
546 | 545 | ||
547 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | 546 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { |
548 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | 547 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); |
549 | 548 | ||
550 | if ((v[3] & (1 << 20)) && !disable_nx) { | 549 | if ((v[3] & (1 << 20)) && !disable_nx) { |
551 | rdmsr(MSR_EFER, l, h); | 550 | rdmsr(MSR_EFER, l, h); |
552 | l |= EFER_NX; | 551 | l |= EFER_NX; |
553 | wrmsr(MSR_EFER, l, h); | 552 | wrmsr(MSR_EFER, l, h); |
554 | nx_enabled = 1; | 553 | nx_enabled = 1; |
555 | __supported_pte_mask |= _PAGE_NX; | 554 | __supported_pte_mask |= _PAGE_NX; |
556 | } | 555 | } |
557 | } | 556 | } |
558 | } | 557 | } |
559 | #endif | 558 | #endif |
560 | 559 | ||
561 | /* user-defined highmem size */ | 560 | /* user-defined highmem size */ |
562 | static unsigned int highmem_pages = -1; | 561 | static unsigned int highmem_pages = -1; |
563 | 562 | ||
564 | /* | 563 | /* |
565 | * highmem=size forces highmem to be exactly 'size' bytes. | 564 | * highmem=size forces highmem to be exactly 'size' bytes. |
566 | * This works even on boxes that have no highmem otherwise. | 565 | * This works even on boxes that have no highmem otherwise. |
567 | * This also works to reduce highmem size on bigger boxes. | 566 | * This also works to reduce highmem size on bigger boxes. |
568 | */ | 567 | */ |
569 | static int __init parse_highmem(char *arg) | 568 | static int __init parse_highmem(char *arg) |
570 | { | 569 | { |
571 | if (!arg) | 570 | if (!arg) |
572 | return -EINVAL; | 571 | return -EINVAL; |
573 | 572 | ||
574 | highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; | 573 | highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; |
575 | return 0; | 574 | return 0; |
576 | } | 575 | } |
577 | early_param("highmem", parse_highmem); | 576 | early_param("highmem", parse_highmem); |
578 | 577 | ||
579 | /* | 578 | /* |
580 | * Determine low and high memory ranges: | 579 | * Determine low and high memory ranges: |
581 | */ | 580 | */ |
582 | void __init find_low_pfn_range(void) | 581 | void __init find_low_pfn_range(void) |
583 | { | 582 | { |
584 | /* it could update max_pfn */ | 583 | /* it could update max_pfn */ |
585 | 584 | ||
586 | /* max_low_pfn is 0, we already have early_res support */ | 585 | /* max_low_pfn is 0, we already have early_res support */ |
587 | 586 | ||
588 | max_low_pfn = max_pfn; | 587 | max_low_pfn = max_pfn; |
589 | if (max_low_pfn > MAXMEM_PFN) { | 588 | if (max_low_pfn > MAXMEM_PFN) { |
590 | if (highmem_pages == -1) | 589 | if (highmem_pages == -1) |
591 | highmem_pages = max_pfn - MAXMEM_PFN; | 590 | highmem_pages = max_pfn - MAXMEM_PFN; |
592 | if (highmem_pages + MAXMEM_PFN < max_pfn) | 591 | if (highmem_pages + MAXMEM_PFN < max_pfn) |
593 | max_pfn = MAXMEM_PFN + highmem_pages; | 592 | max_pfn = MAXMEM_PFN + highmem_pages; |
594 | if (highmem_pages + MAXMEM_PFN > max_pfn) { | 593 | if (highmem_pages + MAXMEM_PFN > max_pfn) { |
595 | printk(KERN_WARNING "only %luMB highmem pages " | 594 | printk(KERN_WARNING "only %luMB highmem pages " |
596 | "available, ignoring highmem size of %uMB.\n", | 595 | "available, ignoring highmem size of %uMB.\n", |
597 | pages_to_mb(max_pfn - MAXMEM_PFN), | 596 | pages_to_mb(max_pfn - MAXMEM_PFN), |
598 | pages_to_mb(highmem_pages)); | 597 | pages_to_mb(highmem_pages)); |
599 | highmem_pages = 0; | 598 | highmem_pages = 0; |
600 | } | 599 | } |
601 | max_low_pfn = MAXMEM_PFN; | 600 | max_low_pfn = MAXMEM_PFN; |
602 | #ifndef CONFIG_HIGHMEM | 601 | #ifndef CONFIG_HIGHMEM |
603 | /* Maximum memory usable is what is directly addressable */ | 602 | /* Maximum memory usable is what is directly addressable */ |
604 | printk(KERN_WARNING "Warning only %ldMB will be used.\n", | 603 | printk(KERN_WARNING "Warning only %ldMB will be used.\n", |
605 | MAXMEM>>20); | 604 | MAXMEM>>20); |
606 | if (max_pfn > MAX_NONPAE_PFN) | 605 | if (max_pfn > MAX_NONPAE_PFN) |
607 | printk(KERN_WARNING | 606 | printk(KERN_WARNING |
608 | "Use a HIGHMEM64G enabled kernel.\n"); | 607 | "Use a HIGHMEM64G enabled kernel.\n"); |
609 | else | 608 | else |
610 | printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); | 609 | printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); |
611 | max_pfn = MAXMEM_PFN; | 610 | max_pfn = MAXMEM_PFN; |
612 | #else /* !CONFIG_HIGHMEM */ | 611 | #else /* !CONFIG_HIGHMEM */ |
613 | #ifndef CONFIG_HIGHMEM64G | 612 | #ifndef CONFIG_HIGHMEM64G |
614 | if (max_pfn > MAX_NONPAE_PFN) { | 613 | if (max_pfn > MAX_NONPAE_PFN) { |
615 | max_pfn = MAX_NONPAE_PFN; | 614 | max_pfn = MAX_NONPAE_PFN; |
616 | printk(KERN_WARNING "Warning only 4GB will be used." | 615 | printk(KERN_WARNING "Warning only 4GB will be used." |
617 | "Use a HIGHMEM64G enabled kernel.\n"); | 616 | "Use a HIGHMEM64G enabled kernel.\n"); |
618 | } | 617 | } |
619 | #endif /* !CONFIG_HIGHMEM64G */ | 618 | #endif /* !CONFIG_HIGHMEM64G */ |
620 | #endif /* !CONFIG_HIGHMEM */ | 619 | #endif /* !CONFIG_HIGHMEM */ |
621 | } else { | 620 | } else { |
622 | if (highmem_pages == -1) | 621 | if (highmem_pages == -1) |
623 | highmem_pages = 0; | 622 | highmem_pages = 0; |
624 | #ifdef CONFIG_HIGHMEM | 623 | #ifdef CONFIG_HIGHMEM |
625 | if (highmem_pages >= max_pfn) { | 624 | if (highmem_pages >= max_pfn) { |
626 | printk(KERN_ERR "highmem size specified (%uMB) is " | 625 | printk(KERN_ERR "highmem size specified (%uMB) is " |
627 | "bigger than pages available (%luMB)!.\n", | 626 | "bigger than pages available (%luMB)!.\n", |
628 | pages_to_mb(highmem_pages), | 627 | pages_to_mb(highmem_pages), |
629 | pages_to_mb(max_pfn)); | 628 | pages_to_mb(max_pfn)); |
630 | highmem_pages = 0; | 629 | highmem_pages = 0; |
631 | } | 630 | } |
632 | if (highmem_pages) { | 631 | if (highmem_pages) { |
633 | if (max_low_pfn - highmem_pages < | 632 | if (max_low_pfn - highmem_pages < |
634 | 64*1024*1024/PAGE_SIZE){ | 633 | 64*1024*1024/PAGE_SIZE){ |
635 | printk(KERN_ERR "highmem size %uMB results in " | 634 | printk(KERN_ERR "highmem size %uMB results in " |
636 | "smaller than 64MB lowmem, ignoring it.\n" | 635 | "smaller than 64MB lowmem, ignoring it.\n" |
637 | , pages_to_mb(highmem_pages)); | 636 | , pages_to_mb(highmem_pages)); |
638 | highmem_pages = 0; | 637 | highmem_pages = 0; |
639 | } | 638 | } |
640 | max_low_pfn -= highmem_pages; | 639 | max_low_pfn -= highmem_pages; |
641 | } | 640 | } |
642 | #else | 641 | #else |
643 | if (highmem_pages) | 642 | if (highmem_pages) |
644 | printk(KERN_ERR "ignoring highmem size on non-highmem" | 643 | printk(KERN_ERR "ignoring highmem size on non-highmem" |
645 | " kernel!\n"); | 644 | " kernel!\n"); |
646 | #endif | 645 | #endif |
647 | } | 646 | } |
648 | } | 647 | } |
649 | 648 | ||
650 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 649 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
651 | void __init initmem_init(unsigned long start_pfn, | 650 | void __init initmem_init(unsigned long start_pfn, |
652 | unsigned long end_pfn) | 651 | unsigned long end_pfn) |
653 | { | 652 | { |
654 | #ifdef CONFIG_HIGHMEM | 653 | #ifdef CONFIG_HIGHMEM |
655 | highstart_pfn = highend_pfn = max_pfn; | 654 | highstart_pfn = highend_pfn = max_pfn; |
656 | if (max_pfn > max_low_pfn) | 655 | if (max_pfn > max_low_pfn) |
657 | highstart_pfn = max_low_pfn; | 656 | highstart_pfn = max_low_pfn; |
658 | memory_present(0, 0, highend_pfn); | 657 | memory_present(0, 0, highend_pfn); |
659 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 658 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
660 | pages_to_mb(highend_pfn - highstart_pfn)); | 659 | pages_to_mb(highend_pfn - highstart_pfn)); |
661 | num_physpages = highend_pfn; | 660 | num_physpages = highend_pfn; |
662 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 661 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
663 | #else | 662 | #else |
664 | memory_present(0, 0, max_low_pfn); | 663 | memory_present(0, 0, max_low_pfn); |
665 | num_physpages = max_low_pfn; | 664 | num_physpages = max_low_pfn; |
666 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 665 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
667 | #endif | 666 | #endif |
668 | #ifdef CONFIG_FLATMEM | 667 | #ifdef CONFIG_FLATMEM |
669 | max_mapnr = num_physpages; | 668 | max_mapnr = num_physpages; |
670 | #endif | 669 | #endif |
671 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | 670 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", |
672 | pages_to_mb(max_low_pfn)); | 671 | pages_to_mb(max_low_pfn)); |
673 | 672 | ||
674 | setup_bootmem_allocator(); | 673 | setup_bootmem_allocator(); |
675 | } | 674 | } |
676 | 675 | ||
677 | void __init zone_sizes_init(void) | 676 | void __init zone_sizes_init(void) |
678 | { | 677 | { |
679 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 678 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
680 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 679 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
681 | max_zone_pfns[ZONE_DMA] = | 680 | max_zone_pfns[ZONE_DMA] = |
682 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | 681 | virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; |
683 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 682 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
684 | remove_all_active_ranges(); | 683 | remove_all_active_ranges(); |
685 | #ifdef CONFIG_HIGHMEM | 684 | #ifdef CONFIG_HIGHMEM |
686 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; | 685 | max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; |
687 | e820_register_active_regions(0, 0, highend_pfn); | 686 | e820_register_active_regions(0, 0, highend_pfn); |
688 | #else | 687 | #else |
689 | e820_register_active_regions(0, 0, max_low_pfn); | 688 | e820_register_active_regions(0, 0, max_low_pfn); |
690 | #endif | 689 | #endif |
691 | 690 | ||
692 | free_area_init_nodes(max_zone_pfns); | 691 | free_area_init_nodes(max_zone_pfns); |
693 | } | 692 | } |
694 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ | 693 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ |
695 | 694 | ||
696 | void __init setup_bootmem_allocator(void) | 695 | void __init setup_bootmem_allocator(void) |
697 | { | 696 | { |
698 | int i; | 697 | int i; |
699 | unsigned long bootmap_size, bootmap; | 698 | unsigned long bootmap_size, bootmap; |
700 | /* | 699 | /* |
701 | * Initialize the boot-time allocator (with low memory only): | 700 | * Initialize the boot-time allocator (with low memory only): |
702 | */ | 701 | */ |
703 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; | 702 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; |
704 | bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, | 703 | bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT, |
705 | max_pfn_mapped<<PAGE_SHIFT, bootmap_size, | 704 | max_pfn_mapped<<PAGE_SHIFT, bootmap_size, |
706 | PAGE_SIZE); | 705 | PAGE_SIZE); |
707 | if (bootmap == -1L) | 706 | if (bootmap == -1L) |
708 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 707 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
709 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 708 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
710 | 709 | ||
711 | /* don't touch min_low_pfn */ | 710 | /* don't touch min_low_pfn */ |
712 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | 711 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, |
713 | min_low_pfn, max_low_pfn); | 712 | min_low_pfn, max_low_pfn); |
714 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 713 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
715 | max_pfn_mapped<<PAGE_SHIFT); | 714 | max_pfn_mapped<<PAGE_SHIFT); |
716 | printk(KERN_INFO " low ram: %08lx - %08lx\n", | 715 | printk(KERN_INFO " low ram: %08lx - %08lx\n", |
717 | min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); | 716 | min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT); |
718 | printk(KERN_INFO " bootmap %08lx - %08lx\n", | 717 | printk(KERN_INFO " bootmap %08lx - %08lx\n", |
719 | bootmap, bootmap + bootmap_size); | 718 | bootmap, bootmap + bootmap_size); |
720 | for_each_online_node(i) | 719 | for_each_online_node(i) |
721 | free_bootmem_with_active_regions(i, max_low_pfn); | 720 | free_bootmem_with_active_regions(i, max_low_pfn); |
722 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | 721 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); |
723 | 722 | ||
724 | after_init_bootmem = 1; | 723 | after_init_bootmem = 1; |
725 | } | 724 | } |
726 | 725 | ||
727 | /* | ||
728 | * The node 0 pgdat is initialized before all of these because | ||
729 | * it's needed for bootmem. node>0 pgdats have their virtual | ||
730 | * space allocated before the pagetables are in place to access | ||
731 | * them, so they can't be cleared then. | ||
732 | * | ||
733 | * This should all compile down to nothing when NUMA is off. | ||
734 | */ | ||
735 | static void __init remapped_pgdat_init(void) | ||
736 | { | ||
737 | int nid; | ||
738 | |||
739 | for_each_online_node(nid) { | ||
740 | if (nid != 0) | ||
741 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
742 | } | ||
743 | } | ||
744 | |||
745 | static void __init find_early_table_space(unsigned long end) | 726 | static void __init find_early_table_space(unsigned long end) |
746 | { | 727 | { |
747 | unsigned long puds, pmds, tables, start; | 728 | unsigned long puds, pmds, tables, start; |
748 | 729 | ||
749 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 730 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; |
750 | tables = PAGE_ALIGN(puds * sizeof(pud_t)); | 731 | tables = PAGE_ALIGN(puds * sizeof(pud_t)); |
751 | 732 | ||
752 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | 733 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; |
753 | tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); | 734 | tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); |
754 | 735 | ||
755 | /* | 736 | /* |
756 | * RED-PEN putting page tables only on node 0 could | 737 | * RED-PEN putting page tables only on node 0 could |
757 | * cause a hotspot and fill up ZONE_DMA. The page tables | 738 | * cause a hotspot and fill up ZONE_DMA. The page tables |
758 | * need roughly 0.5KB per GB. | 739 | * need roughly 0.5KB per GB. |
759 | */ | 740 | */ |
760 | start = 0x7000; | 741 | start = 0x7000; |
761 | table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | 742 | table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, |
762 | tables, PAGE_SIZE); | 743 | tables, PAGE_SIZE); |
763 | if (table_start == -1UL) | 744 | if (table_start == -1UL) |
764 | panic("Cannot find space for the kernel page tables"); | 745 | panic("Cannot find space for the kernel page tables"); |
765 | 746 | ||
766 | table_start >>= PAGE_SHIFT; | 747 | table_start >>= PAGE_SHIFT; |
767 | table_end = table_start; | 748 | table_end = table_start; |
768 | table_top = table_start + (tables>>PAGE_SHIFT); | 749 | table_top = table_start + (tables>>PAGE_SHIFT); |
769 | 750 | ||
770 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | 751 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", |
771 | end, table_start << PAGE_SHIFT, | 752 | end, table_start << PAGE_SHIFT, |
772 | (table_start << PAGE_SHIFT) + tables); | 753 | (table_start << PAGE_SHIFT) + tables); |
773 | } | 754 | } |
774 | 755 | ||
775 | unsigned long __init_refok init_memory_mapping(unsigned long start, | 756 | unsigned long __init_refok init_memory_mapping(unsigned long start, |
776 | unsigned long end) | 757 | unsigned long end) |
777 | { | 758 | { |
778 | pgd_t *pgd_base = swapper_pg_dir; | 759 | pgd_t *pgd_base = swapper_pg_dir; |
779 | 760 | ||
780 | /* | 761 | /* |
781 | * Find space for the kernel direct mapping tables. | 762 | * Find space for the kernel direct mapping tables. |
782 | */ | 763 | */ |
783 | if (!after_init_bootmem) | 764 | if (!after_init_bootmem) |
784 | find_early_table_space(end); | 765 | find_early_table_space(end); |
785 | 766 | ||
786 | #ifdef CONFIG_X86_PAE | 767 | #ifdef CONFIG_X86_PAE |
787 | set_nx(); | 768 | set_nx(); |
788 | if (nx_enabled) | 769 | if (nx_enabled) |
789 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | 770 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); |
790 | #endif | 771 | #endif |
791 | 772 | ||
792 | /* Enable PSE if available */ | 773 | /* Enable PSE if available */ |
793 | if (cpu_has_pse) | 774 | if (cpu_has_pse) |
794 | set_in_cr4(X86_CR4_PSE); | 775 | set_in_cr4(X86_CR4_PSE); |
795 | 776 | ||
796 | /* Enable PGE if available */ | 777 | /* Enable PGE if available */ |
797 | if (cpu_has_pge) { | 778 | if (cpu_has_pge) { |
798 | set_in_cr4(X86_CR4_PGE); | 779 | set_in_cr4(X86_CR4_PGE); |
799 | __PAGE_KERNEL |= _PAGE_GLOBAL; | 780 | __PAGE_KERNEL |= _PAGE_GLOBAL; |
800 | __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; | 781 | __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; |
801 | } | 782 | } |
802 | 783 | ||
803 | kernel_physical_mapping_init(pgd_base, start, end); | 784 | kernel_physical_mapping_init(pgd_base, start, end); |
804 | 785 | ||
805 | load_cr3(swapper_pg_dir); | 786 | load_cr3(swapper_pg_dir); |
806 | 787 | ||
807 | __flush_tlb_all(); | 788 | __flush_tlb_all(); |
808 | 789 | ||
809 | if (!after_init_bootmem) | 790 | if (!after_init_bootmem) |
810 | reserve_early(table_start << PAGE_SHIFT, | 791 | reserve_early(table_start << PAGE_SHIFT, |
811 | table_end << PAGE_SHIFT, "PGTABLE"); | 792 | table_end << PAGE_SHIFT, "PGTABLE"); |
812 | 793 | ||
813 | return end >> PAGE_SHIFT; | 794 | return end >> PAGE_SHIFT; |
814 | } | 795 | } |
815 | 796 | ||
816 | /* | 797 | /* |
817 | * paging_init() sets up the page tables - note that the first 8MB are | 798 | * paging_init() sets up the page tables - note that the first 8MB are |
818 | * already mapped by head.S. | 799 | * already mapped by head.S. |
819 | * | 800 | * |
820 | * This routines also unmaps the page at virtual kernel address 0, so | 801 | * This routines also unmaps the page at virtual kernel address 0, so |
821 | * that we can trap those pesky NULL-reference errors in the kernel. | 802 | * that we can trap those pesky NULL-reference errors in the kernel. |
822 | */ | 803 | */ |
823 | void __init paging_init(void) | 804 | void __init paging_init(void) |
824 | { | 805 | { |
825 | pagetable_init(); | 806 | pagetable_init(); |
826 | 807 | ||
827 | __flush_tlb_all(); | 808 | __flush_tlb_all(); |
828 | 809 | ||
829 | kmap_init(); | 810 | kmap_init(); |
830 | 811 | ||
831 | /* | 812 | /* |
832 | * NOTE: at this point the bootmem allocator is fully available. | 813 | * NOTE: at this point the bootmem allocator is fully available. |
833 | */ | 814 | */ |
834 | remapped_pgdat_init(); | ||
835 | sparse_init(); | 815 | sparse_init(); |
836 | zone_sizes_init(); | 816 | zone_sizes_init(); |
837 | 817 | ||
838 | paravirt_post_allocator_init(); | 818 | paravirt_post_allocator_init(); |
839 | } | 819 | } |
840 | 820 | ||
841 | /* | 821 | /* |
842 | * Test if the WP bit works in supervisor mode. It isn't supported on 386's | 822 | * Test if the WP bit works in supervisor mode. It isn't supported on 386's |
843 | * and also on some strange 486's. All 586+'s are OK. This used to involve | 823 | * and also on some strange 486's. All 586+'s are OK. This used to involve |
844 | * black magic jumps to work around some nasty CPU bugs, but fortunately the | 824 | * black magic jumps to work around some nasty CPU bugs, but fortunately the |
845 | * switch to using exceptions got rid of all that. | 825 | * switch to using exceptions got rid of all that. |
846 | */ | 826 | */ |
847 | static void __init test_wp_bit(void) | 827 | static void __init test_wp_bit(void) |
848 | { | 828 | { |
849 | printk(KERN_INFO | 829 | printk(KERN_INFO |
850 | "Checking if this processor honours the WP bit even in supervisor mode..."); | 830 | "Checking if this processor honours the WP bit even in supervisor mode..."); |
851 | 831 | ||
852 | /* Any page-aligned address will do, the test is non-destructive */ | 832 | /* Any page-aligned address will do, the test is non-destructive */ |
853 | __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); | 833 | __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); |
854 | boot_cpu_data.wp_works_ok = do_test_wp_bit(); | 834 | boot_cpu_data.wp_works_ok = do_test_wp_bit(); |
855 | clear_fixmap(FIX_WP_TEST); | 835 | clear_fixmap(FIX_WP_TEST); |
856 | 836 | ||
857 | if (!boot_cpu_data.wp_works_ok) { | 837 | if (!boot_cpu_data.wp_works_ok) { |
858 | printk(KERN_CONT "No.\n"); | 838 | printk(KERN_CONT "No.\n"); |
859 | #ifdef CONFIG_X86_WP_WORKS_OK | 839 | #ifdef CONFIG_X86_WP_WORKS_OK |
860 | panic( | 840 | panic( |
861 | "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); | 841 | "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); |
862 | #endif | 842 | #endif |
863 | } else { | 843 | } else { |
864 | printk(KERN_CONT "Ok.\n"); | 844 | printk(KERN_CONT "Ok.\n"); |
865 | } | 845 | } |
866 | } | 846 | } |
867 | 847 | ||
868 | static struct kcore_list kcore_mem, kcore_vmalloc; | 848 | static struct kcore_list kcore_mem, kcore_vmalloc; |
869 | 849 | ||
870 | void __init mem_init(void) | 850 | void __init mem_init(void) |
871 | { | 851 | { |
872 | int codesize, reservedpages, datasize, initsize; | 852 | int codesize, reservedpages, datasize, initsize; |
873 | int tmp; | 853 | int tmp; |
874 | 854 | ||
875 | #ifdef CONFIG_FLATMEM | 855 | #ifdef CONFIG_FLATMEM |
876 | BUG_ON(!mem_map); | 856 | BUG_ON(!mem_map); |
877 | #endif | 857 | #endif |
878 | /* this will put all low memory onto the freelists */ | 858 | /* this will put all low memory onto the freelists */ |
879 | totalram_pages += free_all_bootmem(); | 859 | totalram_pages += free_all_bootmem(); |
880 | 860 | ||
881 | reservedpages = 0; | 861 | reservedpages = 0; |
882 | for (tmp = 0; tmp < max_low_pfn; tmp++) | 862 | for (tmp = 0; tmp < max_low_pfn; tmp++) |
883 | /* | 863 | /* |
884 | * Only count reserved RAM pages: | 864 | * Only count reserved RAM pages: |
885 | */ | 865 | */ |
886 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | 866 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) |
887 | reservedpages++; | 867 | reservedpages++; |
888 | 868 | ||
889 | set_highmem_pages_init(); | 869 | set_highmem_pages_init(); |
890 | 870 | ||
891 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | 871 | codesize = (unsigned long) &_etext - (unsigned long) &_text; |
892 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 872 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
893 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 873 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
894 | 874 | ||
895 | kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); | 875 | kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); |
896 | kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | 876 | kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, |
897 | VMALLOC_END-VMALLOC_START); | 877 | VMALLOC_END-VMALLOC_START); |
898 | 878 | ||
899 | printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " | 879 | printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " |
900 | "%dk reserved, %dk data, %dk init, %ldk highmem)\n", | 880 | "%dk reserved, %dk data, %dk init, %ldk highmem)\n", |
901 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | 881 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), |
902 | num_physpages << (PAGE_SHIFT-10), | 882 | num_physpages << (PAGE_SHIFT-10), |
903 | codesize >> 10, | 883 | codesize >> 10, |
904 | reservedpages << (PAGE_SHIFT-10), | 884 | reservedpages << (PAGE_SHIFT-10), |
905 | datasize >> 10, | 885 | datasize >> 10, |
906 | initsize >> 10, | 886 | initsize >> 10, |
907 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) | 887 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) |
908 | ); | 888 | ); |
909 | 889 | ||
910 | printk(KERN_INFO "virtual kernel memory layout:\n" | 890 | printk(KERN_INFO "virtual kernel memory layout:\n" |
911 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | 891 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" |
912 | #ifdef CONFIG_HIGHMEM | 892 | #ifdef CONFIG_HIGHMEM |
913 | " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | 893 | " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" |
914 | #endif | 894 | #endif |
915 | " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" | 895 | " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" |
916 | " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" | 896 | " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" |
917 | " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" | 897 | " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" |
918 | " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" | 898 | " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" |
919 | " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", | 899 | " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", |
920 | FIXADDR_START, FIXADDR_TOP, | 900 | FIXADDR_START, FIXADDR_TOP, |
921 | (FIXADDR_TOP - FIXADDR_START) >> 10, | 901 | (FIXADDR_TOP - FIXADDR_START) >> 10, |
922 | 902 | ||
923 | #ifdef CONFIG_HIGHMEM | 903 | #ifdef CONFIG_HIGHMEM |
924 | PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, | 904 | PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, |
925 | (LAST_PKMAP*PAGE_SIZE) >> 10, | 905 | (LAST_PKMAP*PAGE_SIZE) >> 10, |
926 | #endif | 906 | #endif |
927 | 907 | ||
928 | VMALLOC_START, VMALLOC_END, | 908 | VMALLOC_START, VMALLOC_END, |
929 | (VMALLOC_END - VMALLOC_START) >> 20, | 909 | (VMALLOC_END - VMALLOC_START) >> 20, |
930 | 910 | ||
931 | (unsigned long)__va(0), (unsigned long)high_memory, | 911 | (unsigned long)__va(0), (unsigned long)high_memory, |
932 | ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, | 912 | ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, |
933 | 913 | ||
934 | (unsigned long)&__init_begin, (unsigned long)&__init_end, | 914 | (unsigned long)&__init_begin, (unsigned long)&__init_end, |
935 | ((unsigned long)&__init_end - | 915 | ((unsigned long)&__init_end - |
936 | (unsigned long)&__init_begin) >> 10, | 916 | (unsigned long)&__init_begin) >> 10, |
937 | 917 | ||
938 | (unsigned long)&_etext, (unsigned long)&_edata, | 918 | (unsigned long)&_etext, (unsigned long)&_edata, |
939 | ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, | 919 | ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, |
940 | 920 | ||
941 | (unsigned long)&_text, (unsigned long)&_etext, | 921 | (unsigned long)&_text, (unsigned long)&_etext, |
942 | ((unsigned long)&_etext - (unsigned long)&_text) >> 10); | 922 | ((unsigned long)&_etext - (unsigned long)&_text) >> 10); |
943 | 923 | ||
944 | #ifdef CONFIG_HIGHMEM | 924 | #ifdef CONFIG_HIGHMEM |
945 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | 925 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); |
946 | BUG_ON(VMALLOC_END > PKMAP_BASE); | 926 | BUG_ON(VMALLOC_END > PKMAP_BASE); |
947 | #endif | 927 | #endif |
948 | BUG_ON(VMALLOC_START > VMALLOC_END); | 928 | BUG_ON(VMALLOC_START > VMALLOC_END); |
949 | BUG_ON((unsigned long)high_memory > VMALLOC_START); | 929 | BUG_ON((unsigned long)high_memory > VMALLOC_START); |
950 | 930 | ||
951 | if (boot_cpu_data.wp_works_ok < 0) | 931 | if (boot_cpu_data.wp_works_ok < 0) |
952 | test_wp_bit(); | 932 | test_wp_bit(); |
953 | 933 | ||
954 | cpa_init(); | 934 | cpa_init(); |
955 | save_pg_dir(); | 935 | save_pg_dir(); |
956 | zap_low_mappings(); | 936 | zap_low_mappings(); |
957 | } | 937 | } |
958 | 938 | ||
959 | #ifdef CONFIG_MEMORY_HOTPLUG | 939 | #ifdef CONFIG_MEMORY_HOTPLUG |
960 | int arch_add_memory(int nid, u64 start, u64 size) | 940 | int arch_add_memory(int nid, u64 start, u64 size) |
961 | { | 941 | { |
962 | struct pglist_data *pgdata = NODE_DATA(nid); | 942 | struct pglist_data *pgdata = NODE_DATA(nid); |
963 | struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; | 943 | struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; |
964 | unsigned long start_pfn = start >> PAGE_SHIFT; | 944 | unsigned long start_pfn = start >> PAGE_SHIFT; |
965 | unsigned long nr_pages = size >> PAGE_SHIFT; | 945 | unsigned long nr_pages = size >> PAGE_SHIFT; |
966 | 946 | ||
967 | return __add_pages(zone, start_pfn, nr_pages); | 947 | return __add_pages(zone, start_pfn, nr_pages); |
968 | } | 948 | } |
969 | #endif | 949 | #endif |
970 | 950 | ||
971 | /* | 951 | /* |
972 | * This function cannot be __init, since exceptions don't work in that | 952 | * This function cannot be __init, since exceptions don't work in that |
973 | * section. Put this after the callers, so that it cannot be inlined. | 953 | * section. Put this after the callers, so that it cannot be inlined. |
974 | */ | 954 | */ |
975 | static noinline int do_test_wp_bit(void) | 955 | static noinline int do_test_wp_bit(void) |
976 | { | 956 | { |
977 | char tmp_reg; | 957 | char tmp_reg; |
978 | int flag; | 958 | int flag; |
979 | 959 | ||
980 | __asm__ __volatile__( | 960 | __asm__ __volatile__( |
981 | " movb %0, %1 \n" | 961 | " movb %0, %1 \n" |
982 | "1: movb %1, %0 \n" | 962 | "1: movb %1, %0 \n" |
983 | " xorl %2, %2 \n" | 963 | " xorl %2, %2 \n" |
984 | "2: \n" | 964 | "2: \n" |
985 | _ASM_EXTABLE(1b,2b) | 965 | _ASM_EXTABLE(1b,2b) |
986 | :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), | 966 | :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), |
987 | "=q" (tmp_reg), | 967 | "=q" (tmp_reg), |
988 | "=r" (flag) | 968 | "=r" (flag) |
989 | :"2" (1) | 969 | :"2" (1) |
990 | :"memory"); | 970 | :"memory"); |
991 | 971 | ||
992 | return flag; | 972 | return flag; |
993 | } | 973 | } |
994 | 974 | ||
995 | #ifdef CONFIG_DEBUG_RODATA | 975 | #ifdef CONFIG_DEBUG_RODATA |
996 | const int rodata_test_data = 0xC3; | 976 | const int rodata_test_data = 0xC3; |
997 | EXPORT_SYMBOL_GPL(rodata_test_data); | 977 | EXPORT_SYMBOL_GPL(rodata_test_data); |
998 | 978 | ||
999 | void mark_rodata_ro(void) | 979 | void mark_rodata_ro(void) |
1000 | { | 980 | { |
1001 | unsigned long start = PFN_ALIGN(_text); | 981 | unsigned long start = PFN_ALIGN(_text); |
1002 | unsigned long size = PFN_ALIGN(_etext) - start; | 982 | unsigned long size = PFN_ALIGN(_etext) - start; |
1003 | 983 | ||
1004 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 984 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
1005 | printk(KERN_INFO "Write protecting the kernel text: %luk\n", | 985 | printk(KERN_INFO "Write protecting the kernel text: %luk\n", |
1006 | size >> 10); | 986 | size >> 10); |
1007 | 987 | ||
1008 | #ifdef CONFIG_CPA_DEBUG | 988 | #ifdef CONFIG_CPA_DEBUG |
1009 | printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", | 989 | printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", |
1010 | start, start+size); | 990 | start, start+size); |
1011 | set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT); | 991 | set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT); |
1012 | 992 | ||
1013 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 993 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
1014 | set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); | 994 | set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); |
1015 | #endif | 995 | #endif |
1016 | start += size; | 996 | start += size; |
1017 | size = (unsigned long)__end_rodata - start; | 997 | size = (unsigned long)__end_rodata - start; |
1018 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 998 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
1019 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 999 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
1020 | size >> 10); | 1000 | size >> 10); |
1021 | rodata_test(); | 1001 | rodata_test(); |
1022 | 1002 | ||
1023 | #ifdef CONFIG_CPA_DEBUG | 1003 | #ifdef CONFIG_CPA_DEBUG |
1024 | printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); | 1004 | printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); |
1025 | set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); | 1005 | set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); |
1026 | 1006 | ||
1027 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 1007 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
1028 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 1008 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
1029 | #endif | 1009 | #endif |
1030 | } | 1010 | } |
1031 | #endif | 1011 | #endif |
1032 | 1012 | ||
1033 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 1013 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
1034 | { | 1014 | { |
1035 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1015 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1036 | /* | 1016 | /* |
1037 | * If debugging page accesses then do not free this memory but | 1017 | * If debugging page accesses then do not free this memory but |
1038 | * mark them not present - any buggy init-section access will | 1018 | * mark them not present - any buggy init-section access will |
1039 | * create a kernel page fault: | 1019 | * create a kernel page fault: |
1040 | */ | 1020 | */ |
1041 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", | 1021 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", |
1042 | begin, PAGE_ALIGN(end)); | 1022 | begin, PAGE_ALIGN(end)); |
1043 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | 1023 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); |
1044 | #else | 1024 | #else |
1045 | unsigned long addr; | 1025 | unsigned long addr; |
1046 | 1026 | ||
1047 | /* | 1027 | /* |
1048 | * We just marked the kernel text read only above, now that | 1028 | * We just marked the kernel text read only above, now that |
1049 | * we are going to free part of that, we need to make that | 1029 | * we are going to free part of that, we need to make that |
1050 | * writeable first. | 1030 | * writeable first. |
1051 | */ | 1031 | */ |
1052 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 1032 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
1053 | 1033 | ||
1054 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 1034 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
1055 | ClearPageReserved(virt_to_page(addr)); | 1035 | ClearPageReserved(virt_to_page(addr)); |
1056 | init_page_count(virt_to_page(addr)); | 1036 | init_page_count(virt_to_page(addr)); |
1057 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); | 1037 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); |
1058 | free_page(addr); | 1038 | free_page(addr); |
1059 | totalram_pages++; | 1039 | totalram_pages++; |
1060 | } | 1040 | } |
1061 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | 1041 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
1062 | #endif | 1042 | #endif |
1063 | } | 1043 | } |
1064 | 1044 | ||
1065 | void free_initmem(void) | 1045 | void free_initmem(void) |
1066 | { | 1046 | { |
1067 | free_init_pages("unused kernel memory", | 1047 | free_init_pages("unused kernel memory", |
1068 | (unsigned long)(&__init_begin), | 1048 | (unsigned long)(&__init_begin), |
1069 | (unsigned long)(&__init_end)); | 1049 | (unsigned long)(&__init_end)); |
1070 | } | 1050 | } |
1071 | 1051 | ||
1072 | #ifdef CONFIG_BLK_DEV_INITRD | 1052 | #ifdef CONFIG_BLK_DEV_INITRD |
1073 | void free_initrd_mem(unsigned long start, unsigned long end) | 1053 | void free_initrd_mem(unsigned long start, unsigned long end) |
1074 | { | 1054 | { |
1075 | free_init_pages("initrd memory", start, end); | 1055 | free_init_pages("initrd memory", start, end); |
1076 | } | 1056 | } |
1077 | #endif | 1057 | #endif |
1078 | 1058 | ||
1079 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | 1059 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, |
1080 | int flags) | 1060 | int flags) |
1081 | { | 1061 | { |
1082 | return reserve_bootmem(phys, len, flags); | 1062 | return reserve_bootmem(phys, len, flags); |
1083 | } | 1063 | } |
1084 | 1064 |
include/asm-x86/numa_32.h
1 | #ifndef _ASM_X86_32_NUMA_H | 1 | #ifndef _ASM_X86_32_NUMA_H |
2 | #define _ASM_X86_32_NUMA_H 1 | 2 | #define _ASM_X86_32_NUMA_H 1 |
3 | 3 | ||
4 | extern int pxm_to_nid(int pxm); | 4 | extern int pxm_to_nid(int pxm); |
5 | extern void numa_remove_cpu(int cpu); | 5 | extern void numa_remove_cpu(int cpu); |
6 | 6 | ||
7 | #ifdef CONFIG_NUMA | 7 | #ifdef CONFIG_NUMA |
8 | extern void __init remap_numa_kva(void); | ||
9 | extern void set_highmem_pages_init(void); | 8 | extern void set_highmem_pages_init(void); |
10 | #else | ||
11 | static inline void remap_numa_kva(void) | ||
12 | { | ||
13 | } | ||
14 | #endif | 9 | #endif |
15 | 10 | ||
16 | #endif /* _ASM_X86_32_NUMA_H */ | 11 | #endif /* _ASM_X86_32_NUMA_H */ |
17 | 12 |