Commit 9512938b885304f72c847379611d6018064af840
Committed by
Linus Torvalds
1 parent
f1db7afd91
Exists in
master
and in
38 other branches
cpumask: update setup_node_to_cpumask_map() comments
node_to_cpumask() has been replaced by cpumask_of_node(), and wholly removed since commit 29c337a0 ("cpumask: remove obsolete node_to_cpumask now everyone uses cpumask_of_node"). So update the comments for setup_node_to_cpumask_map(). Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 2 additions and 2 deletions Inline Diff
arch/powerpc/mm/numa.c
1 | /* | 1 | /* |
2 | * pSeries NUMA support | 2 | * pSeries NUMA support |
3 | * | 3 | * |
4 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM | 4 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License | 7 | * modify it under the terms of the GNU General Public License |
8 | * as published by the Free Software Foundation; either version | 8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | #include <linux/threads.h> | 11 | #include <linux/threads.h> |
12 | #include <linux/bootmem.h> | 12 | #include <linux/bootmem.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/mmzone.h> | 15 | #include <linux/mmzone.h> |
16 | #include <linux/export.h> | 16 | #include <linux/export.h> |
17 | #include <linux/nodemask.h> | 17 | #include <linux/nodemask.h> |
18 | #include <linux/cpu.h> | 18 | #include <linux/cpu.h> |
19 | #include <linux/notifier.h> | 19 | #include <linux/notifier.h> |
20 | #include <linux/memblock.h> | 20 | #include <linux/memblock.h> |
21 | #include <linux/of.h> | 21 | #include <linux/of.h> |
22 | #include <linux/pfn.h> | 22 | #include <linux/pfn.h> |
23 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
24 | #include <linux/node.h> | 24 | #include <linux/node.h> |
25 | #include <asm/sparsemem.h> | 25 | #include <asm/sparsemem.h> |
26 | #include <asm/prom.h> | 26 | #include <asm/prom.h> |
27 | #include <asm/system.h> | 27 | #include <asm/system.h> |
28 | #include <asm/smp.h> | 28 | #include <asm/smp.h> |
29 | #include <asm/firmware.h> | 29 | #include <asm/firmware.h> |
30 | #include <asm/paca.h> | 30 | #include <asm/paca.h> |
31 | #include <asm/hvcall.h> | 31 | #include <asm/hvcall.h> |
32 | 32 | ||
33 | static int numa_enabled = 1; | 33 | static int numa_enabled = 1; |
34 | 34 | ||
35 | static char *cmdline __initdata; | 35 | static char *cmdline __initdata; |
36 | 36 | ||
37 | static int numa_debug; | 37 | static int numa_debug; |
38 | #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } | 38 | #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } |
39 | 39 | ||
40 | int numa_cpu_lookup_table[NR_CPUS]; | 40 | int numa_cpu_lookup_table[NR_CPUS]; |
41 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 41 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
42 | struct pglist_data *node_data[MAX_NUMNODES]; | 42 | struct pglist_data *node_data[MAX_NUMNODES]; |
43 | 43 | ||
44 | EXPORT_SYMBOL(numa_cpu_lookup_table); | 44 | EXPORT_SYMBOL(numa_cpu_lookup_table); |
45 | EXPORT_SYMBOL(node_to_cpumask_map); | 45 | EXPORT_SYMBOL(node_to_cpumask_map); |
46 | EXPORT_SYMBOL(node_data); | 46 | EXPORT_SYMBOL(node_data); |
47 | 47 | ||
48 | static int min_common_depth; | 48 | static int min_common_depth; |
49 | static int n_mem_addr_cells, n_mem_size_cells; | 49 | static int n_mem_addr_cells, n_mem_size_cells; |
50 | static int form1_affinity; | 50 | static int form1_affinity; |
51 | 51 | ||
52 | #define MAX_DISTANCE_REF_POINTS 4 | 52 | #define MAX_DISTANCE_REF_POINTS 4 |
53 | static int distance_ref_points_depth; | 53 | static int distance_ref_points_depth; |
54 | static const unsigned int *distance_ref_points; | 54 | static const unsigned int *distance_ref_points; |
55 | static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; | 55 | static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * Allocate node_to_cpumask_map based on number of available nodes | 58 | * Allocate node_to_cpumask_map based on number of available nodes |
59 | * Requires node_possible_map to be valid. | 59 | * Requires node_possible_map to be valid. |
60 | * | 60 | * |
61 | * Note: node_to_cpumask() is not valid until after this is done. | 61 | * Note: cpumask_of_node() is not valid until after this is done. |
62 | */ | 62 | */ |
63 | static void __init setup_node_to_cpumask_map(void) | 63 | static void __init setup_node_to_cpumask_map(void) |
64 | { | 64 | { |
65 | unsigned int node, num = 0; | 65 | unsigned int node, num = 0; |
66 | 66 | ||
67 | /* setup nr_node_ids if not done yet */ | 67 | /* setup nr_node_ids if not done yet */ |
68 | if (nr_node_ids == MAX_NUMNODES) { | 68 | if (nr_node_ids == MAX_NUMNODES) { |
69 | for_each_node_mask(node, node_possible_map) | 69 | for_each_node_mask(node, node_possible_map) |
70 | num = node; | 70 | num = node; |
71 | nr_node_ids = num + 1; | 71 | nr_node_ids = num + 1; |
72 | } | 72 | } |
73 | 73 | ||
74 | /* allocate the map */ | 74 | /* allocate the map */ |
75 | for (node = 0; node < nr_node_ids; node++) | 75 | for (node = 0; node < nr_node_ids; node++) |
76 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); | 76 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); |
77 | 77 | ||
78 | /* cpumask_of_node() will now work */ | 78 | /* cpumask_of_node() will now work */ |
79 | dbg("Node to cpumask map for %d nodes\n", nr_node_ids); | 79 | dbg("Node to cpumask map for %d nodes\n", nr_node_ids); |
80 | } | 80 | } |
81 | 81 | ||
82 | static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, | 82 | static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, |
83 | unsigned int *nid) | 83 | unsigned int *nid) |
84 | { | 84 | { |
85 | unsigned long long mem; | 85 | unsigned long long mem; |
86 | char *p = cmdline; | 86 | char *p = cmdline; |
87 | static unsigned int fake_nid; | 87 | static unsigned int fake_nid; |
88 | static unsigned long long curr_boundary; | 88 | static unsigned long long curr_boundary; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Modify node id, iff we started creating NUMA nodes | 91 | * Modify node id, iff we started creating NUMA nodes |
92 | * We want to continue from where we left of the last time | 92 | * We want to continue from where we left of the last time |
93 | */ | 93 | */ |
94 | if (fake_nid) | 94 | if (fake_nid) |
95 | *nid = fake_nid; | 95 | *nid = fake_nid; |
96 | /* | 96 | /* |
97 | * In case there are no more arguments to parse, the | 97 | * In case there are no more arguments to parse, the |
98 | * node_id should be the same as the last fake node id | 98 | * node_id should be the same as the last fake node id |
99 | * (we've handled this above). | 99 | * (we've handled this above). |
100 | */ | 100 | */ |
101 | if (!p) | 101 | if (!p) |
102 | return 0; | 102 | return 0; |
103 | 103 | ||
104 | mem = memparse(p, &p); | 104 | mem = memparse(p, &p); |
105 | if (!mem) | 105 | if (!mem) |
106 | return 0; | 106 | return 0; |
107 | 107 | ||
108 | if (mem < curr_boundary) | 108 | if (mem < curr_boundary) |
109 | return 0; | 109 | return 0; |
110 | 110 | ||
111 | curr_boundary = mem; | 111 | curr_boundary = mem; |
112 | 112 | ||
113 | if ((end_pfn << PAGE_SHIFT) > mem) { | 113 | if ((end_pfn << PAGE_SHIFT) > mem) { |
114 | /* | 114 | /* |
115 | * Skip commas and spaces | 115 | * Skip commas and spaces |
116 | */ | 116 | */ |
117 | while (*p == ',' || *p == ' ' || *p == '\t') | 117 | while (*p == ',' || *p == ' ' || *p == '\t') |
118 | p++; | 118 | p++; |
119 | 119 | ||
120 | cmdline = p; | 120 | cmdline = p; |
121 | fake_nid++; | 121 | fake_nid++; |
122 | *nid = fake_nid; | 122 | *nid = fake_nid; |
123 | dbg("created new fake_node with id %d\n", fake_nid); | 123 | dbg("created new fake_node with id %d\n", fake_nid); |
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | return 0; | 126 | return 0; |
127 | } | 127 | } |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * get_node_active_region - Return active region containing pfn | 130 | * get_node_active_region - Return active region containing pfn |
131 | * Active range returned is empty if none found. | 131 | * Active range returned is empty if none found. |
132 | * @pfn: The page to return the region for | 132 | * @pfn: The page to return the region for |
133 | * @node_ar: Returned set to the active region containing @pfn | 133 | * @node_ar: Returned set to the active region containing @pfn |
134 | */ | 134 | */ |
135 | static void __init get_node_active_region(unsigned long pfn, | 135 | static void __init get_node_active_region(unsigned long pfn, |
136 | struct node_active_region *node_ar) | 136 | struct node_active_region *node_ar) |
137 | { | 137 | { |
138 | unsigned long start_pfn, end_pfn; | 138 | unsigned long start_pfn, end_pfn; |
139 | int i, nid; | 139 | int i, nid; |
140 | 140 | ||
141 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { | 141 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { |
142 | if (pfn >= start_pfn && pfn < end_pfn) { | 142 | if (pfn >= start_pfn && pfn < end_pfn) { |
143 | node_ar->nid = nid; | 143 | node_ar->nid = nid; |
144 | node_ar->start_pfn = start_pfn; | 144 | node_ar->start_pfn = start_pfn; |
145 | node_ar->end_pfn = end_pfn; | 145 | node_ar->end_pfn = end_pfn; |
146 | break; | 146 | break; |
147 | } | 147 | } |
148 | } | 148 | } |
149 | } | 149 | } |
150 | 150 | ||
151 | static void map_cpu_to_node(int cpu, int node) | 151 | static void map_cpu_to_node(int cpu, int node) |
152 | { | 152 | { |
153 | numa_cpu_lookup_table[cpu] = node; | 153 | numa_cpu_lookup_table[cpu] = node; |
154 | 154 | ||
155 | dbg("adding cpu %d to node %d\n", cpu, node); | 155 | dbg("adding cpu %d to node %d\n", cpu, node); |
156 | 156 | ||
157 | if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) | 157 | if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) |
158 | cpumask_set_cpu(cpu, node_to_cpumask_map[node]); | 158 | cpumask_set_cpu(cpu, node_to_cpumask_map[node]); |
159 | } | 159 | } |
160 | 160 | ||
161 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) | 161 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) |
162 | static void unmap_cpu_from_node(unsigned long cpu) | 162 | static void unmap_cpu_from_node(unsigned long cpu) |
163 | { | 163 | { |
164 | int node = numa_cpu_lookup_table[cpu]; | 164 | int node = numa_cpu_lookup_table[cpu]; |
165 | 165 | ||
166 | dbg("removing cpu %lu from node %d\n", cpu, node); | 166 | dbg("removing cpu %lu from node %d\n", cpu, node); |
167 | 167 | ||
168 | if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { | 168 | if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { |
169 | cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); | 169 | cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); |
170 | } else { | 170 | } else { |
171 | printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", | 171 | printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", |
172 | cpu, node); | 172 | cpu, node); |
173 | } | 173 | } |
174 | } | 174 | } |
175 | #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ | 175 | #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ |
176 | 176 | ||
177 | /* must hold reference to node during call */ | 177 | /* must hold reference to node during call */ |
178 | static const int *of_get_associativity(struct device_node *dev) | 178 | static const int *of_get_associativity(struct device_node *dev) |
179 | { | 179 | { |
180 | return of_get_property(dev, "ibm,associativity", NULL); | 180 | return of_get_property(dev, "ibm,associativity", NULL); |
181 | } | 181 | } |
182 | 182 | ||
183 | /* | 183 | /* |
184 | * Returns the property linux,drconf-usable-memory if | 184 | * Returns the property linux,drconf-usable-memory if |
185 | * it exists (the property exists only in kexec/kdump kernels, | 185 | * it exists (the property exists only in kexec/kdump kernels, |
186 | * added by kexec-tools) | 186 | * added by kexec-tools) |
187 | */ | 187 | */ |
188 | static const u32 *of_get_usable_memory(struct device_node *memory) | 188 | static const u32 *of_get_usable_memory(struct device_node *memory) |
189 | { | 189 | { |
190 | const u32 *prop; | 190 | const u32 *prop; |
191 | u32 len; | 191 | u32 len; |
192 | prop = of_get_property(memory, "linux,drconf-usable-memory", &len); | 192 | prop = of_get_property(memory, "linux,drconf-usable-memory", &len); |
193 | if (!prop || len < sizeof(unsigned int)) | 193 | if (!prop || len < sizeof(unsigned int)) |
194 | return 0; | 194 | return 0; |
195 | return prop; | 195 | return prop; |
196 | } | 196 | } |
197 | 197 | ||
198 | int __node_distance(int a, int b) | 198 | int __node_distance(int a, int b) |
199 | { | 199 | { |
200 | int i; | 200 | int i; |
201 | int distance = LOCAL_DISTANCE; | 201 | int distance = LOCAL_DISTANCE; |
202 | 202 | ||
203 | if (!form1_affinity) | 203 | if (!form1_affinity) |
204 | return distance; | 204 | return distance; |
205 | 205 | ||
206 | for (i = 0; i < distance_ref_points_depth; i++) { | 206 | for (i = 0; i < distance_ref_points_depth; i++) { |
207 | if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) | 207 | if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) |
208 | break; | 208 | break; |
209 | 209 | ||
210 | /* Double the distance for each NUMA level */ | 210 | /* Double the distance for each NUMA level */ |
211 | distance *= 2; | 211 | distance *= 2; |
212 | } | 212 | } |
213 | 213 | ||
214 | return distance; | 214 | return distance; |
215 | } | 215 | } |
216 | 216 | ||
217 | static void initialize_distance_lookup_table(int nid, | 217 | static void initialize_distance_lookup_table(int nid, |
218 | const unsigned int *associativity) | 218 | const unsigned int *associativity) |
219 | { | 219 | { |
220 | int i; | 220 | int i; |
221 | 221 | ||
222 | if (!form1_affinity) | 222 | if (!form1_affinity) |
223 | return; | 223 | return; |
224 | 224 | ||
225 | for (i = 0; i < distance_ref_points_depth; i++) { | 225 | for (i = 0; i < distance_ref_points_depth; i++) { |
226 | distance_lookup_table[nid][i] = | 226 | distance_lookup_table[nid][i] = |
227 | associativity[distance_ref_points[i]]; | 227 | associativity[distance_ref_points[i]]; |
228 | } | 228 | } |
229 | } | 229 | } |
230 | 230 | ||
231 | /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa | 231 | /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa |
232 | * info is found. | 232 | * info is found. |
233 | */ | 233 | */ |
234 | static int associativity_to_nid(const unsigned int *associativity) | 234 | static int associativity_to_nid(const unsigned int *associativity) |
235 | { | 235 | { |
236 | int nid = -1; | 236 | int nid = -1; |
237 | 237 | ||
238 | if (min_common_depth == -1) | 238 | if (min_common_depth == -1) |
239 | goto out; | 239 | goto out; |
240 | 240 | ||
241 | if (associativity[0] >= min_common_depth) | 241 | if (associativity[0] >= min_common_depth) |
242 | nid = associativity[min_common_depth]; | 242 | nid = associativity[min_common_depth]; |
243 | 243 | ||
244 | /* POWER4 LPAR uses 0xffff as invalid node */ | 244 | /* POWER4 LPAR uses 0xffff as invalid node */ |
245 | if (nid == 0xffff || nid >= MAX_NUMNODES) | 245 | if (nid == 0xffff || nid >= MAX_NUMNODES) |
246 | nid = -1; | 246 | nid = -1; |
247 | 247 | ||
248 | if (nid > 0 && associativity[0] >= distance_ref_points_depth) | 248 | if (nid > 0 && associativity[0] >= distance_ref_points_depth) |
249 | initialize_distance_lookup_table(nid, associativity); | 249 | initialize_distance_lookup_table(nid, associativity); |
250 | 250 | ||
251 | out: | 251 | out: |
252 | return nid; | 252 | return nid; |
253 | } | 253 | } |
254 | 254 | ||
255 | /* Returns the nid associated with the given device tree node, | 255 | /* Returns the nid associated with the given device tree node, |
256 | * or -1 if not found. | 256 | * or -1 if not found. |
257 | */ | 257 | */ |
258 | static int of_node_to_nid_single(struct device_node *device) | 258 | static int of_node_to_nid_single(struct device_node *device) |
259 | { | 259 | { |
260 | int nid = -1; | 260 | int nid = -1; |
261 | const unsigned int *tmp; | 261 | const unsigned int *tmp; |
262 | 262 | ||
263 | tmp = of_get_associativity(device); | 263 | tmp = of_get_associativity(device); |
264 | if (tmp) | 264 | if (tmp) |
265 | nid = associativity_to_nid(tmp); | 265 | nid = associativity_to_nid(tmp); |
266 | return nid; | 266 | return nid; |
267 | } | 267 | } |
268 | 268 | ||
269 | /* Walk the device tree upwards, looking for an associativity id */ | 269 | /* Walk the device tree upwards, looking for an associativity id */ |
270 | int of_node_to_nid(struct device_node *device) | 270 | int of_node_to_nid(struct device_node *device) |
271 | { | 271 | { |
272 | struct device_node *tmp; | 272 | struct device_node *tmp; |
273 | int nid = -1; | 273 | int nid = -1; |
274 | 274 | ||
275 | of_node_get(device); | 275 | of_node_get(device); |
276 | while (device) { | 276 | while (device) { |
277 | nid = of_node_to_nid_single(device); | 277 | nid = of_node_to_nid_single(device); |
278 | if (nid != -1) | 278 | if (nid != -1) |
279 | break; | 279 | break; |
280 | 280 | ||
281 | tmp = device; | 281 | tmp = device; |
282 | device = of_get_parent(tmp); | 282 | device = of_get_parent(tmp); |
283 | of_node_put(tmp); | 283 | of_node_put(tmp); |
284 | } | 284 | } |
285 | of_node_put(device); | 285 | of_node_put(device); |
286 | 286 | ||
287 | return nid; | 287 | return nid; |
288 | } | 288 | } |
289 | EXPORT_SYMBOL_GPL(of_node_to_nid); | 289 | EXPORT_SYMBOL_GPL(of_node_to_nid); |
290 | 290 | ||
291 | static int __init find_min_common_depth(void) | 291 | static int __init find_min_common_depth(void) |
292 | { | 292 | { |
293 | int depth; | 293 | int depth; |
294 | struct device_node *chosen; | 294 | struct device_node *chosen; |
295 | struct device_node *root; | 295 | struct device_node *root; |
296 | const char *vec5; | 296 | const char *vec5; |
297 | 297 | ||
298 | if (firmware_has_feature(FW_FEATURE_OPAL)) | 298 | if (firmware_has_feature(FW_FEATURE_OPAL)) |
299 | root = of_find_node_by_path("/ibm,opal"); | 299 | root = of_find_node_by_path("/ibm,opal"); |
300 | else | 300 | else |
301 | root = of_find_node_by_path("/rtas"); | 301 | root = of_find_node_by_path("/rtas"); |
302 | if (!root) | 302 | if (!root) |
303 | root = of_find_node_by_path("/"); | 303 | root = of_find_node_by_path("/"); |
304 | 304 | ||
305 | /* | 305 | /* |
306 | * This property is a set of 32-bit integers, each representing | 306 | * This property is a set of 32-bit integers, each representing |
307 | * an index into the ibm,associativity nodes. | 307 | * an index into the ibm,associativity nodes. |
308 | * | 308 | * |
309 | * With form 0 affinity the first integer is for an SMP configuration | 309 | * With form 0 affinity the first integer is for an SMP configuration |
310 | * (should be all 0's) and the second is for a normal NUMA | 310 | * (should be all 0's) and the second is for a normal NUMA |
311 | * configuration. We have only one level of NUMA. | 311 | * configuration. We have only one level of NUMA. |
312 | * | 312 | * |
313 | * With form 1 affinity the first integer is the most significant | 313 | * With form 1 affinity the first integer is the most significant |
314 | * NUMA boundary and the following are progressively less significant | 314 | * NUMA boundary and the following are progressively less significant |
315 | * boundaries. There can be more than one level of NUMA. | 315 | * boundaries. There can be more than one level of NUMA. |
316 | */ | 316 | */ |
317 | distance_ref_points = of_get_property(root, | 317 | distance_ref_points = of_get_property(root, |
318 | "ibm,associativity-reference-points", | 318 | "ibm,associativity-reference-points", |
319 | &distance_ref_points_depth); | 319 | &distance_ref_points_depth); |
320 | 320 | ||
321 | if (!distance_ref_points) { | 321 | if (!distance_ref_points) { |
322 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); | 322 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); |
323 | goto err; | 323 | goto err; |
324 | } | 324 | } |
325 | 325 | ||
326 | distance_ref_points_depth /= sizeof(int); | 326 | distance_ref_points_depth /= sizeof(int); |
327 | 327 | ||
328 | #define VEC5_AFFINITY_BYTE 5 | 328 | #define VEC5_AFFINITY_BYTE 5 |
329 | #define VEC5_AFFINITY 0x80 | 329 | #define VEC5_AFFINITY 0x80 |
330 | 330 | ||
331 | if (firmware_has_feature(FW_FEATURE_OPAL)) | 331 | if (firmware_has_feature(FW_FEATURE_OPAL)) |
332 | form1_affinity = 1; | 332 | form1_affinity = 1; |
333 | else { | 333 | else { |
334 | chosen = of_find_node_by_path("/chosen"); | 334 | chosen = of_find_node_by_path("/chosen"); |
335 | if (chosen) { | 335 | if (chosen) { |
336 | vec5 = of_get_property(chosen, | 336 | vec5 = of_get_property(chosen, |
337 | "ibm,architecture-vec-5", NULL); | 337 | "ibm,architecture-vec-5", NULL); |
338 | if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & | 338 | if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & |
339 | VEC5_AFFINITY)) { | 339 | VEC5_AFFINITY)) { |
340 | dbg("Using form 1 affinity\n"); | 340 | dbg("Using form 1 affinity\n"); |
341 | form1_affinity = 1; | 341 | form1_affinity = 1; |
342 | } | 342 | } |
343 | } | 343 | } |
344 | } | 344 | } |
345 | 345 | ||
346 | if (form1_affinity) { | 346 | if (form1_affinity) { |
347 | depth = distance_ref_points[0]; | 347 | depth = distance_ref_points[0]; |
348 | } else { | 348 | } else { |
349 | if (distance_ref_points_depth < 2) { | 349 | if (distance_ref_points_depth < 2) { |
350 | printk(KERN_WARNING "NUMA: " | 350 | printk(KERN_WARNING "NUMA: " |
351 | "short ibm,associativity-reference-points\n"); | 351 | "short ibm,associativity-reference-points\n"); |
352 | goto err; | 352 | goto err; |
353 | } | 353 | } |
354 | 354 | ||
355 | depth = distance_ref_points[1]; | 355 | depth = distance_ref_points[1]; |
356 | } | 356 | } |
357 | 357 | ||
358 | /* | 358 | /* |
359 | * Warn and cap if the hardware supports more than | 359 | * Warn and cap if the hardware supports more than |
360 | * MAX_DISTANCE_REF_POINTS domains. | 360 | * MAX_DISTANCE_REF_POINTS domains. |
361 | */ | 361 | */ |
362 | if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { | 362 | if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { |
363 | printk(KERN_WARNING "NUMA: distance array capped at " | 363 | printk(KERN_WARNING "NUMA: distance array capped at " |
364 | "%d entries\n", MAX_DISTANCE_REF_POINTS); | 364 | "%d entries\n", MAX_DISTANCE_REF_POINTS); |
365 | distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; | 365 | distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; |
366 | } | 366 | } |
367 | 367 | ||
368 | of_node_put(root); | 368 | of_node_put(root); |
369 | return depth; | 369 | return depth; |
370 | 370 | ||
371 | err: | 371 | err: |
372 | of_node_put(root); | 372 | of_node_put(root); |
373 | return -1; | 373 | return -1; |
374 | } | 374 | } |
375 | 375 | ||
376 | static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) | 376 | static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) |
377 | { | 377 | { |
378 | struct device_node *memory = NULL; | 378 | struct device_node *memory = NULL; |
379 | 379 | ||
380 | memory = of_find_node_by_type(memory, "memory"); | 380 | memory = of_find_node_by_type(memory, "memory"); |
381 | if (!memory) | 381 | if (!memory) |
382 | panic("numa.c: No memory nodes found!"); | 382 | panic("numa.c: No memory nodes found!"); |
383 | 383 | ||
384 | *n_addr_cells = of_n_addr_cells(memory); | 384 | *n_addr_cells = of_n_addr_cells(memory); |
385 | *n_size_cells = of_n_size_cells(memory); | 385 | *n_size_cells = of_n_size_cells(memory); |
386 | of_node_put(memory); | 386 | of_node_put(memory); |
387 | } | 387 | } |
388 | 388 | ||
389 | static unsigned long read_n_cells(int n, const unsigned int **buf) | 389 | static unsigned long read_n_cells(int n, const unsigned int **buf) |
390 | { | 390 | { |
391 | unsigned long result = 0; | 391 | unsigned long result = 0; |
392 | 392 | ||
393 | while (n--) { | 393 | while (n--) { |
394 | result = (result << 32) | **buf; | 394 | result = (result << 32) | **buf; |
395 | (*buf)++; | 395 | (*buf)++; |
396 | } | 396 | } |
397 | return result; | 397 | return result; |
398 | } | 398 | } |
399 | 399 | ||
400 | struct of_drconf_cell { | 400 | struct of_drconf_cell { |
401 | u64 base_addr; | 401 | u64 base_addr; |
402 | u32 drc_index; | 402 | u32 drc_index; |
403 | u32 reserved; | 403 | u32 reserved; |
404 | u32 aa_index; | 404 | u32 aa_index; |
405 | u32 flags; | 405 | u32 flags; |
406 | }; | 406 | }; |
407 | 407 | ||
408 | #define DRCONF_MEM_ASSIGNED 0x00000008 | 408 | #define DRCONF_MEM_ASSIGNED 0x00000008 |
409 | #define DRCONF_MEM_AI_INVALID 0x00000040 | 409 | #define DRCONF_MEM_AI_INVALID 0x00000040 |
410 | #define DRCONF_MEM_RESERVED 0x00000080 | 410 | #define DRCONF_MEM_RESERVED 0x00000080 |
411 | 411 | ||
412 | /* | 412 | /* |
413 | * Read the next memblock list entry from the ibm,dynamic-memory property | 413 | * Read the next memblock list entry from the ibm,dynamic-memory property |
414 | * and return the information in the provided of_drconf_cell structure. | 414 | * and return the information in the provided of_drconf_cell structure. |
415 | */ | 415 | */ |
416 | static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) | 416 | static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) |
417 | { | 417 | { |
418 | const u32 *cp; | 418 | const u32 *cp; |
419 | 419 | ||
420 | drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); | 420 | drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); |
421 | 421 | ||
422 | cp = *cellp; | 422 | cp = *cellp; |
423 | drmem->drc_index = cp[0]; | 423 | drmem->drc_index = cp[0]; |
424 | drmem->reserved = cp[1]; | 424 | drmem->reserved = cp[1]; |
425 | drmem->aa_index = cp[2]; | 425 | drmem->aa_index = cp[2]; |
426 | drmem->flags = cp[3]; | 426 | drmem->flags = cp[3]; |
427 | 427 | ||
428 | *cellp = cp + 4; | 428 | *cellp = cp + 4; |
429 | } | 429 | } |
430 | 430 | ||
431 | /* | 431 | /* |
432 | * Retrieve and validate the ibm,dynamic-memory property of the device tree. | 432 | * Retrieve and validate the ibm,dynamic-memory property of the device tree. |
433 | * | 433 | * |
434 | * The layout of the ibm,dynamic-memory property is a number N of memblock | 434 | * The layout of the ibm,dynamic-memory property is a number N of memblock |
435 | * list entries followed by N memblock list entries. Each memblock list entry | 435 | * list entries followed by N memblock list entries. Each memblock list entry |
436 | * contains information as laid out in the of_drconf_cell struct above. | 436 | * contains information as laid out in the of_drconf_cell struct above. |
437 | */ | 437 | */ |
438 | static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) | 438 | static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) |
439 | { | 439 | { |
440 | const u32 *prop; | 440 | const u32 *prop; |
441 | u32 len, entries; | 441 | u32 len, entries; |
442 | 442 | ||
443 | prop = of_get_property(memory, "ibm,dynamic-memory", &len); | 443 | prop = of_get_property(memory, "ibm,dynamic-memory", &len); |
444 | if (!prop || len < sizeof(unsigned int)) | 444 | if (!prop || len < sizeof(unsigned int)) |
445 | return 0; | 445 | return 0; |
446 | 446 | ||
447 | entries = *prop++; | 447 | entries = *prop++; |
448 | 448 | ||
449 | /* Now that we know the number of entries, revalidate the size | 449 | /* Now that we know the number of entries, revalidate the size |
450 | * of the property read in to ensure we have everything | 450 | * of the property read in to ensure we have everything |
451 | */ | 451 | */ |
452 | if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) | 452 | if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) |
453 | return 0; | 453 | return 0; |
454 | 454 | ||
455 | *dm = prop; | 455 | *dm = prop; |
456 | return entries; | 456 | return entries; |
457 | } | 457 | } |
458 | 458 | ||
459 | /* | 459 | /* |
460 | * Retrieve and validate the ibm,lmb-size property for drconf memory | 460 | * Retrieve and validate the ibm,lmb-size property for drconf memory |
461 | * from the device tree. | 461 | * from the device tree. |
462 | */ | 462 | */ |
463 | static u64 of_get_lmb_size(struct device_node *memory) | 463 | static u64 of_get_lmb_size(struct device_node *memory) |
464 | { | 464 | { |
465 | const u32 *prop; | 465 | const u32 *prop; |
466 | u32 len; | 466 | u32 len; |
467 | 467 | ||
468 | prop = of_get_property(memory, "ibm,lmb-size", &len); | 468 | prop = of_get_property(memory, "ibm,lmb-size", &len); |
469 | if (!prop || len < sizeof(unsigned int)) | 469 | if (!prop || len < sizeof(unsigned int)) |
470 | return 0; | 470 | return 0; |
471 | 471 | ||
472 | return read_n_cells(n_mem_size_cells, &prop); | 472 | return read_n_cells(n_mem_size_cells, &prop); |
473 | } | 473 | } |
474 | 474 | ||
475 | struct assoc_arrays { | 475 | struct assoc_arrays { |
476 | u32 n_arrays; | 476 | u32 n_arrays; |
477 | u32 array_sz; | 477 | u32 array_sz; |
478 | const u32 *arrays; | 478 | const u32 *arrays; |
479 | }; | 479 | }; |
480 | 480 | ||
481 | /* | 481 | /* |
482 | * Retrieve and validate the list of associativity arrays for drconf | 482 | * Retrieve and validate the list of associativity arrays for drconf |
483 | * memory from the ibm,associativity-lookup-arrays property of the | 483 | * memory from the ibm,associativity-lookup-arrays property of the |
484 | * device tree.. | 484 | * device tree.. |
485 | * | 485 | * |
486 | * The layout of the ibm,associativity-lookup-arrays property is a number N | 486 | * The layout of the ibm,associativity-lookup-arrays property is a number N |
487 | * indicating the number of associativity arrays, followed by a number M | 487 | * indicating the number of associativity arrays, followed by a number M |
488 | * indicating the size of each associativity array, followed by a list | 488 | * indicating the size of each associativity array, followed by a list |
489 | * of N associativity arrays. | 489 | * of N associativity arrays. |
490 | */ | 490 | */ |
491 | static int of_get_assoc_arrays(struct device_node *memory, | 491 | static int of_get_assoc_arrays(struct device_node *memory, |
492 | struct assoc_arrays *aa) | 492 | struct assoc_arrays *aa) |
493 | { | 493 | { |
494 | const u32 *prop; | 494 | const u32 *prop; |
495 | u32 len; | 495 | u32 len; |
496 | 496 | ||
497 | prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); | 497 | prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); |
498 | if (!prop || len < 2 * sizeof(unsigned int)) | 498 | if (!prop || len < 2 * sizeof(unsigned int)) |
499 | return -1; | 499 | return -1; |
500 | 500 | ||
501 | aa->n_arrays = *prop++; | 501 | aa->n_arrays = *prop++; |
502 | aa->array_sz = *prop++; | 502 | aa->array_sz = *prop++; |
503 | 503 | ||
504 | /* Now that we know the number of arrays and size of each array, | 504 | /* Now that we know the number of arrays and size of each array, |
505 | * revalidate the size of the property read in. | 505 | * revalidate the size of the property read in. |
506 | */ | 506 | */ |
507 | if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) | 507 | if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) |
508 | return -1; | 508 | return -1; |
509 | 509 | ||
510 | aa->arrays = prop; | 510 | aa->arrays = prop; |
511 | return 0; | 511 | return 0; |
512 | } | 512 | } |
513 | 513 | ||
514 | /* | 514 | /* |
515 | * This is like of_node_to_nid_single() for memory represented in the | 515 | * This is like of_node_to_nid_single() for memory represented in the |
516 | * ibm,dynamic-reconfiguration-memory node. | 516 | * ibm,dynamic-reconfiguration-memory node. |
517 | */ | 517 | */ |
518 | static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, | 518 | static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, |
519 | struct assoc_arrays *aa) | 519 | struct assoc_arrays *aa) |
520 | { | 520 | { |
521 | int default_nid = 0; | 521 | int default_nid = 0; |
522 | int nid = default_nid; | 522 | int nid = default_nid; |
523 | int index; | 523 | int index; |
524 | 524 | ||
525 | if (min_common_depth > 0 && min_common_depth <= aa->array_sz && | 525 | if (min_common_depth > 0 && min_common_depth <= aa->array_sz && |
526 | !(drmem->flags & DRCONF_MEM_AI_INVALID) && | 526 | !(drmem->flags & DRCONF_MEM_AI_INVALID) && |
527 | drmem->aa_index < aa->n_arrays) { | 527 | drmem->aa_index < aa->n_arrays) { |
528 | index = drmem->aa_index * aa->array_sz + min_common_depth - 1; | 528 | index = drmem->aa_index * aa->array_sz + min_common_depth - 1; |
529 | nid = aa->arrays[index]; | 529 | nid = aa->arrays[index]; |
530 | 530 | ||
531 | if (nid == 0xffff || nid >= MAX_NUMNODES) | 531 | if (nid == 0xffff || nid >= MAX_NUMNODES) |
532 | nid = default_nid; | 532 | nid = default_nid; |
533 | } | 533 | } |
534 | 534 | ||
535 | return nid; | 535 | return nid; |
536 | } | 536 | } |
537 | 537 | ||
538 | /* | 538 | /* |
539 | * Figure out to which domain a cpu belongs and stick it there. | 539 | * Figure out to which domain a cpu belongs and stick it there. |
540 | * Return the id of the domain used. | 540 | * Return the id of the domain used. |
541 | */ | 541 | */ |
542 | static int __cpuinit numa_setup_cpu(unsigned long lcpu) | 542 | static int __cpuinit numa_setup_cpu(unsigned long lcpu) |
543 | { | 543 | { |
544 | int nid = 0; | 544 | int nid = 0; |
545 | struct device_node *cpu = of_get_cpu_node(lcpu, NULL); | 545 | struct device_node *cpu = of_get_cpu_node(lcpu, NULL); |
546 | 546 | ||
547 | if (!cpu) { | 547 | if (!cpu) { |
548 | WARN_ON(1); | 548 | WARN_ON(1); |
549 | goto out; | 549 | goto out; |
550 | } | 550 | } |
551 | 551 | ||
552 | nid = of_node_to_nid_single(cpu); | 552 | nid = of_node_to_nid_single(cpu); |
553 | 553 | ||
554 | if (nid < 0 || !node_online(nid)) | 554 | if (nid < 0 || !node_online(nid)) |
555 | nid = first_online_node; | 555 | nid = first_online_node; |
556 | out: | 556 | out: |
557 | map_cpu_to_node(lcpu, nid); | 557 | map_cpu_to_node(lcpu, nid); |
558 | 558 | ||
559 | of_node_put(cpu); | 559 | of_node_put(cpu); |
560 | 560 | ||
561 | return nid; | 561 | return nid; |
562 | } | 562 | } |
563 | 563 | ||
564 | static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, | 564 | static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, |
565 | unsigned long action, | 565 | unsigned long action, |
566 | void *hcpu) | 566 | void *hcpu) |
567 | { | 567 | { |
568 | unsigned long lcpu = (unsigned long)hcpu; | 568 | unsigned long lcpu = (unsigned long)hcpu; |
569 | int ret = NOTIFY_DONE; | 569 | int ret = NOTIFY_DONE; |
570 | 570 | ||
571 | switch (action) { | 571 | switch (action) { |
572 | case CPU_UP_PREPARE: | 572 | case CPU_UP_PREPARE: |
573 | case CPU_UP_PREPARE_FROZEN: | 573 | case CPU_UP_PREPARE_FROZEN: |
574 | numa_setup_cpu(lcpu); | 574 | numa_setup_cpu(lcpu); |
575 | ret = NOTIFY_OK; | 575 | ret = NOTIFY_OK; |
576 | break; | 576 | break; |
577 | #ifdef CONFIG_HOTPLUG_CPU | 577 | #ifdef CONFIG_HOTPLUG_CPU |
578 | case CPU_DEAD: | 578 | case CPU_DEAD: |
579 | case CPU_DEAD_FROZEN: | 579 | case CPU_DEAD_FROZEN: |
580 | case CPU_UP_CANCELED: | 580 | case CPU_UP_CANCELED: |
581 | case CPU_UP_CANCELED_FROZEN: | 581 | case CPU_UP_CANCELED_FROZEN: |
582 | unmap_cpu_from_node(lcpu); | 582 | unmap_cpu_from_node(lcpu); |
583 | break; | 583 | break; |
584 | ret = NOTIFY_OK; | 584 | ret = NOTIFY_OK; |
585 | #endif | 585 | #endif |
586 | } | 586 | } |
587 | return ret; | 587 | return ret; |
588 | } | 588 | } |
589 | 589 | ||
590 | /* | 590 | /* |
591 | * Check and possibly modify a memory region to enforce the memory limit. | 591 | * Check and possibly modify a memory region to enforce the memory limit. |
592 | * | 592 | * |
593 | * Returns the size the region should have to enforce the memory limit. | 593 | * Returns the size the region should have to enforce the memory limit. |
594 | * This will either be the original value of size, a truncated value, | 594 | * This will either be the original value of size, a truncated value, |
595 | * or zero. If the returned value of size is 0 the region should be | 595 | * or zero. If the returned value of size is 0 the region should be |
596 | * discarded as it lies wholly above the memory limit. | 596 | * discarded as it lies wholly above the memory limit. |
597 | */ | 597 | */ |
598 | static unsigned long __init numa_enforce_memory_limit(unsigned long start, | 598 | static unsigned long __init numa_enforce_memory_limit(unsigned long start, |
599 | unsigned long size) | 599 | unsigned long size) |
600 | { | 600 | { |
601 | /* | 601 | /* |
602 | * We use memblock_end_of_DRAM() in here instead of memory_limit because | 602 | * We use memblock_end_of_DRAM() in here instead of memory_limit because |
603 | * we've already adjusted it for the limit and it takes care of | 603 | * we've already adjusted it for the limit and it takes care of |
604 | * having memory holes below the limit. Also, in the case of | 604 | * having memory holes below the limit. Also, in the case of |
605 | * iommu_is_off, memory_limit is not set but is implicitly enforced. | 605 | * iommu_is_off, memory_limit is not set but is implicitly enforced. |
606 | */ | 606 | */ |
607 | 607 | ||
608 | if (start + size <= memblock_end_of_DRAM()) | 608 | if (start + size <= memblock_end_of_DRAM()) |
609 | return size; | 609 | return size; |
610 | 610 | ||
611 | if (start >= memblock_end_of_DRAM()) | 611 | if (start >= memblock_end_of_DRAM()) |
612 | return 0; | 612 | return 0; |
613 | 613 | ||
614 | return memblock_end_of_DRAM() - start; | 614 | return memblock_end_of_DRAM() - start; |
615 | } | 615 | } |
616 | 616 | ||
617 | /* | 617 | /* |
618 | * Reads the counter for a given entry in | 618 | * Reads the counter for a given entry in |
619 | * linux,drconf-usable-memory property | 619 | * linux,drconf-usable-memory property |
620 | */ | 620 | */ |
621 | static inline int __init read_usm_ranges(const u32 **usm) | 621 | static inline int __init read_usm_ranges(const u32 **usm) |
622 | { | 622 | { |
623 | /* | 623 | /* |
624 | * For each lmb in ibm,dynamic-memory a corresponding | 624 | * For each lmb in ibm,dynamic-memory a corresponding |
625 | * entry in linux,drconf-usable-memory property contains | 625 | * entry in linux,drconf-usable-memory property contains |
626 | * a counter followed by that many (base, size) duple. | 626 | * a counter followed by that many (base, size) duple. |
627 | * read the counter from linux,drconf-usable-memory | 627 | * read the counter from linux,drconf-usable-memory |
628 | */ | 628 | */ |
629 | return read_n_cells(n_mem_size_cells, usm); | 629 | return read_n_cells(n_mem_size_cells, usm); |
630 | } | 630 | } |
631 | 631 | ||
632 | /* | 632 | /* |
633 | * Extract NUMA information from the ibm,dynamic-reconfiguration-memory | 633 | * Extract NUMA information from the ibm,dynamic-reconfiguration-memory |
634 | * node. This assumes n_mem_{addr,size}_cells have been set. | 634 | * node. This assumes n_mem_{addr,size}_cells have been set. |
635 | */ | 635 | */ |
636 | static void __init parse_drconf_memory(struct device_node *memory) | 636 | static void __init parse_drconf_memory(struct device_node *memory) |
637 | { | 637 | { |
638 | const u32 *dm, *usm; | 638 | const u32 *dm, *usm; |
639 | unsigned int n, rc, ranges, is_kexec_kdump = 0; | 639 | unsigned int n, rc, ranges, is_kexec_kdump = 0; |
640 | unsigned long lmb_size, base, size, sz; | 640 | unsigned long lmb_size, base, size, sz; |
641 | int nid; | 641 | int nid; |
642 | struct assoc_arrays aa; | 642 | struct assoc_arrays aa; |
643 | 643 | ||
644 | n = of_get_drconf_memory(memory, &dm); | 644 | n = of_get_drconf_memory(memory, &dm); |
645 | if (!n) | 645 | if (!n) |
646 | return; | 646 | return; |
647 | 647 | ||
648 | lmb_size = of_get_lmb_size(memory); | 648 | lmb_size = of_get_lmb_size(memory); |
649 | if (!lmb_size) | 649 | if (!lmb_size) |
650 | return; | 650 | return; |
651 | 651 | ||
652 | rc = of_get_assoc_arrays(memory, &aa); | 652 | rc = of_get_assoc_arrays(memory, &aa); |
653 | if (rc) | 653 | if (rc) |
654 | return; | 654 | return; |
655 | 655 | ||
656 | /* check if this is a kexec/kdump kernel */ | 656 | /* check if this is a kexec/kdump kernel */ |
657 | usm = of_get_usable_memory(memory); | 657 | usm = of_get_usable_memory(memory); |
658 | if (usm != NULL) | 658 | if (usm != NULL) |
659 | is_kexec_kdump = 1; | 659 | is_kexec_kdump = 1; |
660 | 660 | ||
661 | for (; n != 0; --n) { | 661 | for (; n != 0; --n) { |
662 | struct of_drconf_cell drmem; | 662 | struct of_drconf_cell drmem; |
663 | 663 | ||
664 | read_drconf_cell(&drmem, &dm); | 664 | read_drconf_cell(&drmem, &dm); |
665 | 665 | ||
666 | /* skip this block if the reserved bit is set in flags (0x80) | 666 | /* skip this block if the reserved bit is set in flags (0x80) |
667 | or if the block is not assigned to this partition (0x8) */ | 667 | or if the block is not assigned to this partition (0x8) */ |
668 | if ((drmem.flags & DRCONF_MEM_RESERVED) | 668 | if ((drmem.flags & DRCONF_MEM_RESERVED) |
669 | || !(drmem.flags & DRCONF_MEM_ASSIGNED)) | 669 | || !(drmem.flags & DRCONF_MEM_ASSIGNED)) |
670 | continue; | 670 | continue; |
671 | 671 | ||
672 | base = drmem.base_addr; | 672 | base = drmem.base_addr; |
673 | size = lmb_size; | 673 | size = lmb_size; |
674 | ranges = 1; | 674 | ranges = 1; |
675 | 675 | ||
676 | if (is_kexec_kdump) { | 676 | if (is_kexec_kdump) { |
677 | ranges = read_usm_ranges(&usm); | 677 | ranges = read_usm_ranges(&usm); |
678 | if (!ranges) /* there are no (base, size) duple */ | 678 | if (!ranges) /* there are no (base, size) duple */ |
679 | continue; | 679 | continue; |
680 | } | 680 | } |
681 | do { | 681 | do { |
682 | if (is_kexec_kdump) { | 682 | if (is_kexec_kdump) { |
683 | base = read_n_cells(n_mem_addr_cells, &usm); | 683 | base = read_n_cells(n_mem_addr_cells, &usm); |
684 | size = read_n_cells(n_mem_size_cells, &usm); | 684 | size = read_n_cells(n_mem_size_cells, &usm); |
685 | } | 685 | } |
686 | nid = of_drconf_to_nid_single(&drmem, &aa); | 686 | nid = of_drconf_to_nid_single(&drmem, &aa); |
687 | fake_numa_create_new_node( | 687 | fake_numa_create_new_node( |
688 | ((base + size) >> PAGE_SHIFT), | 688 | ((base + size) >> PAGE_SHIFT), |
689 | &nid); | 689 | &nid); |
690 | node_set_online(nid); | 690 | node_set_online(nid); |
691 | sz = numa_enforce_memory_limit(base, size); | 691 | sz = numa_enforce_memory_limit(base, size); |
692 | if (sz) | 692 | if (sz) |
693 | memblock_set_node(base, sz, nid); | 693 | memblock_set_node(base, sz, nid); |
694 | } while (--ranges); | 694 | } while (--ranges); |
695 | } | 695 | } |
696 | } | 696 | } |
697 | 697 | ||
698 | static int __init parse_numa_properties(void) | 698 | static int __init parse_numa_properties(void) |
699 | { | 699 | { |
700 | struct device_node *memory; | 700 | struct device_node *memory; |
701 | int default_nid = 0; | 701 | int default_nid = 0; |
702 | unsigned long i; | 702 | unsigned long i; |
703 | 703 | ||
704 | if (numa_enabled == 0) { | 704 | if (numa_enabled == 0) { |
705 | printk(KERN_WARNING "NUMA disabled by user\n"); | 705 | printk(KERN_WARNING "NUMA disabled by user\n"); |
706 | return -1; | 706 | return -1; |
707 | } | 707 | } |
708 | 708 | ||
709 | min_common_depth = find_min_common_depth(); | 709 | min_common_depth = find_min_common_depth(); |
710 | 710 | ||
711 | if (min_common_depth < 0) | 711 | if (min_common_depth < 0) |
712 | return min_common_depth; | 712 | return min_common_depth; |
713 | 713 | ||
714 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); | 714 | dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); |
715 | 715 | ||
716 | /* | 716 | /* |
717 | * Even though we connect cpus to numa domains later in SMP | 717 | * Even though we connect cpus to numa domains later in SMP |
718 | * init, we need to know the node ids now. This is because | 718 | * init, we need to know the node ids now. This is because |
719 | * each node to be onlined must have NODE_DATA etc backing it. | 719 | * each node to be onlined must have NODE_DATA etc backing it. |
720 | */ | 720 | */ |
721 | for_each_present_cpu(i) { | 721 | for_each_present_cpu(i) { |
722 | struct device_node *cpu; | 722 | struct device_node *cpu; |
723 | int nid; | 723 | int nid; |
724 | 724 | ||
725 | cpu = of_get_cpu_node(i, NULL); | 725 | cpu = of_get_cpu_node(i, NULL); |
726 | BUG_ON(!cpu); | 726 | BUG_ON(!cpu); |
727 | nid = of_node_to_nid_single(cpu); | 727 | nid = of_node_to_nid_single(cpu); |
728 | of_node_put(cpu); | 728 | of_node_put(cpu); |
729 | 729 | ||
730 | /* | 730 | /* |
731 | * Don't fall back to default_nid yet -- we will plug | 731 | * Don't fall back to default_nid yet -- we will plug |
732 | * cpus into nodes once the memory scan has discovered | 732 | * cpus into nodes once the memory scan has discovered |
733 | * the topology. | 733 | * the topology. |
734 | */ | 734 | */ |
735 | if (nid < 0) | 735 | if (nid < 0) |
736 | continue; | 736 | continue; |
737 | node_set_online(nid); | 737 | node_set_online(nid); |
738 | } | 738 | } |
739 | 739 | ||
740 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); | 740 | get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); |
741 | 741 | ||
742 | for_each_node_by_type(memory, "memory") { | 742 | for_each_node_by_type(memory, "memory") { |
743 | unsigned long start; | 743 | unsigned long start; |
744 | unsigned long size; | 744 | unsigned long size; |
745 | int nid; | 745 | int nid; |
746 | int ranges; | 746 | int ranges; |
747 | const unsigned int *memcell_buf; | 747 | const unsigned int *memcell_buf; |
748 | unsigned int len; | 748 | unsigned int len; |
749 | 749 | ||
750 | memcell_buf = of_get_property(memory, | 750 | memcell_buf = of_get_property(memory, |
751 | "linux,usable-memory", &len); | 751 | "linux,usable-memory", &len); |
752 | if (!memcell_buf || len <= 0) | 752 | if (!memcell_buf || len <= 0) |
753 | memcell_buf = of_get_property(memory, "reg", &len); | 753 | memcell_buf = of_get_property(memory, "reg", &len); |
754 | if (!memcell_buf || len <= 0) | 754 | if (!memcell_buf || len <= 0) |
755 | continue; | 755 | continue; |
756 | 756 | ||
757 | /* ranges in cell */ | 757 | /* ranges in cell */ |
758 | ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); | 758 | ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); |
759 | new_range: | 759 | new_range: |
760 | /* these are order-sensitive, and modify the buffer pointer */ | 760 | /* these are order-sensitive, and modify the buffer pointer */ |
761 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); | 761 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); |
762 | size = read_n_cells(n_mem_size_cells, &memcell_buf); | 762 | size = read_n_cells(n_mem_size_cells, &memcell_buf); |
763 | 763 | ||
764 | /* | 764 | /* |
765 | * Assumption: either all memory nodes or none will | 765 | * Assumption: either all memory nodes or none will |
766 | * have associativity properties. If none, then | 766 | * have associativity properties. If none, then |
767 | * everything goes to default_nid. | 767 | * everything goes to default_nid. |
768 | */ | 768 | */ |
769 | nid = of_node_to_nid_single(memory); | 769 | nid = of_node_to_nid_single(memory); |
770 | if (nid < 0) | 770 | if (nid < 0) |
771 | nid = default_nid; | 771 | nid = default_nid; |
772 | 772 | ||
773 | fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); | 773 | fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); |
774 | node_set_online(nid); | 774 | node_set_online(nid); |
775 | 775 | ||
776 | if (!(size = numa_enforce_memory_limit(start, size))) { | 776 | if (!(size = numa_enforce_memory_limit(start, size))) { |
777 | if (--ranges) | 777 | if (--ranges) |
778 | goto new_range; | 778 | goto new_range; |
779 | else | 779 | else |
780 | continue; | 780 | continue; |
781 | } | 781 | } |
782 | 782 | ||
783 | memblock_set_node(start, size, nid); | 783 | memblock_set_node(start, size, nid); |
784 | 784 | ||
785 | if (--ranges) | 785 | if (--ranges) |
786 | goto new_range; | 786 | goto new_range; |
787 | } | 787 | } |
788 | 788 | ||
789 | /* | 789 | /* |
790 | * Now do the same thing for each MEMBLOCK listed in the | 790 | * Now do the same thing for each MEMBLOCK listed in the |
791 | * ibm,dynamic-memory property in the | 791 | * ibm,dynamic-memory property in the |
792 | * ibm,dynamic-reconfiguration-memory node. | 792 | * ibm,dynamic-reconfiguration-memory node. |
793 | */ | 793 | */ |
794 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); | 794 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
795 | if (memory) | 795 | if (memory) |
796 | parse_drconf_memory(memory); | 796 | parse_drconf_memory(memory); |
797 | 797 | ||
798 | return 0; | 798 | return 0; |
799 | } | 799 | } |
800 | 800 | ||
801 | static void __init setup_nonnuma(void) | 801 | static void __init setup_nonnuma(void) |
802 | { | 802 | { |
803 | unsigned long top_of_ram = memblock_end_of_DRAM(); | 803 | unsigned long top_of_ram = memblock_end_of_DRAM(); |
804 | unsigned long total_ram = memblock_phys_mem_size(); | 804 | unsigned long total_ram = memblock_phys_mem_size(); |
805 | unsigned long start_pfn, end_pfn; | 805 | unsigned long start_pfn, end_pfn; |
806 | unsigned int nid = 0; | 806 | unsigned int nid = 0; |
807 | struct memblock_region *reg; | 807 | struct memblock_region *reg; |
808 | 808 | ||
809 | printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | 809 | printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", |
810 | top_of_ram, total_ram); | 810 | top_of_ram, total_ram); |
811 | printk(KERN_DEBUG "Memory hole size: %ldMB\n", | 811 | printk(KERN_DEBUG "Memory hole size: %ldMB\n", |
812 | (top_of_ram - total_ram) >> 20); | 812 | (top_of_ram - total_ram) >> 20); |
813 | 813 | ||
814 | for_each_memblock(memory, reg) { | 814 | for_each_memblock(memory, reg) { |
815 | start_pfn = memblock_region_memory_base_pfn(reg); | 815 | start_pfn = memblock_region_memory_base_pfn(reg); |
816 | end_pfn = memblock_region_memory_end_pfn(reg); | 816 | end_pfn = memblock_region_memory_end_pfn(reg); |
817 | 817 | ||
818 | fake_numa_create_new_node(end_pfn, &nid); | 818 | fake_numa_create_new_node(end_pfn, &nid); |
819 | memblock_set_node(PFN_PHYS(start_pfn), | 819 | memblock_set_node(PFN_PHYS(start_pfn), |
820 | PFN_PHYS(end_pfn - start_pfn), nid); | 820 | PFN_PHYS(end_pfn - start_pfn), nid); |
821 | node_set_online(nid); | 821 | node_set_online(nid); |
822 | } | 822 | } |
823 | } | 823 | } |
824 | 824 | ||
825 | void __init dump_numa_cpu_topology(void) | 825 | void __init dump_numa_cpu_topology(void) |
826 | { | 826 | { |
827 | unsigned int node; | 827 | unsigned int node; |
828 | unsigned int cpu, count; | 828 | unsigned int cpu, count; |
829 | 829 | ||
830 | if (min_common_depth == -1 || !numa_enabled) | 830 | if (min_common_depth == -1 || !numa_enabled) |
831 | return; | 831 | return; |
832 | 832 | ||
833 | for_each_online_node(node) { | 833 | for_each_online_node(node) { |
834 | printk(KERN_DEBUG "Node %d CPUs:", node); | 834 | printk(KERN_DEBUG "Node %d CPUs:", node); |
835 | 835 | ||
836 | count = 0; | 836 | count = 0; |
837 | /* | 837 | /* |
838 | * If we used a CPU iterator here we would miss printing | 838 | * If we used a CPU iterator here we would miss printing |
839 | * the holes in the cpumap. | 839 | * the holes in the cpumap. |
840 | */ | 840 | */ |
841 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { | 841 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { |
842 | if (cpumask_test_cpu(cpu, | 842 | if (cpumask_test_cpu(cpu, |
843 | node_to_cpumask_map[node])) { | 843 | node_to_cpumask_map[node])) { |
844 | if (count == 0) | 844 | if (count == 0) |
845 | printk(" %u", cpu); | 845 | printk(" %u", cpu); |
846 | ++count; | 846 | ++count; |
847 | } else { | 847 | } else { |
848 | if (count > 1) | 848 | if (count > 1) |
849 | printk("-%u", cpu - 1); | 849 | printk("-%u", cpu - 1); |
850 | count = 0; | 850 | count = 0; |
851 | } | 851 | } |
852 | } | 852 | } |
853 | 853 | ||
854 | if (count > 1) | 854 | if (count > 1) |
855 | printk("-%u", nr_cpu_ids - 1); | 855 | printk("-%u", nr_cpu_ids - 1); |
856 | printk("\n"); | 856 | printk("\n"); |
857 | } | 857 | } |
858 | } | 858 | } |
859 | 859 | ||
860 | static void __init dump_numa_memory_topology(void) | 860 | static void __init dump_numa_memory_topology(void) |
861 | { | 861 | { |
862 | unsigned int node; | 862 | unsigned int node; |
863 | unsigned int count; | 863 | unsigned int count; |
864 | 864 | ||
865 | if (min_common_depth == -1 || !numa_enabled) | 865 | if (min_common_depth == -1 || !numa_enabled) |
866 | return; | 866 | return; |
867 | 867 | ||
868 | for_each_online_node(node) { | 868 | for_each_online_node(node) { |
869 | unsigned long i; | 869 | unsigned long i; |
870 | 870 | ||
871 | printk(KERN_DEBUG "Node %d Memory:", node); | 871 | printk(KERN_DEBUG "Node %d Memory:", node); |
872 | 872 | ||
873 | count = 0; | 873 | count = 0; |
874 | 874 | ||
875 | for (i = 0; i < memblock_end_of_DRAM(); | 875 | for (i = 0; i < memblock_end_of_DRAM(); |
876 | i += (1 << SECTION_SIZE_BITS)) { | 876 | i += (1 << SECTION_SIZE_BITS)) { |
877 | if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { | 877 | if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { |
878 | if (count == 0) | 878 | if (count == 0) |
879 | printk(" 0x%lx", i); | 879 | printk(" 0x%lx", i); |
880 | ++count; | 880 | ++count; |
881 | } else { | 881 | } else { |
882 | if (count > 0) | 882 | if (count > 0) |
883 | printk("-0x%lx", i); | 883 | printk("-0x%lx", i); |
884 | count = 0; | 884 | count = 0; |
885 | } | 885 | } |
886 | } | 886 | } |
887 | 887 | ||
888 | if (count > 0) | 888 | if (count > 0) |
889 | printk("-0x%lx", i); | 889 | printk("-0x%lx", i); |
890 | printk("\n"); | 890 | printk("\n"); |
891 | } | 891 | } |
892 | } | 892 | } |
893 | 893 | ||
894 | /* | 894 | /* |
895 | * Allocate some memory, satisfying the memblock or bootmem allocator where | 895 | * Allocate some memory, satisfying the memblock or bootmem allocator where |
896 | * required. nid is the preferred node and end is the physical address of | 896 | * required. nid is the preferred node and end is the physical address of |
897 | * the highest address in the node. | 897 | * the highest address in the node. |
898 | * | 898 | * |
899 | * Returns the virtual address of the memory. | 899 | * Returns the virtual address of the memory. |
900 | */ | 900 | */ |
901 | static void __init *careful_zallocation(int nid, unsigned long size, | 901 | static void __init *careful_zallocation(int nid, unsigned long size, |
902 | unsigned long align, | 902 | unsigned long align, |
903 | unsigned long end_pfn) | 903 | unsigned long end_pfn) |
904 | { | 904 | { |
905 | void *ret; | 905 | void *ret; |
906 | int new_nid; | 906 | int new_nid; |
907 | unsigned long ret_paddr; | 907 | unsigned long ret_paddr; |
908 | 908 | ||
909 | ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); | 909 | ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); |
910 | 910 | ||
911 | /* retry over all memory */ | 911 | /* retry over all memory */ |
912 | if (!ret_paddr) | 912 | if (!ret_paddr) |
913 | ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); | 913 | ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); |
914 | 914 | ||
915 | if (!ret_paddr) | 915 | if (!ret_paddr) |
916 | panic("numa.c: cannot allocate %lu bytes for node %d", | 916 | panic("numa.c: cannot allocate %lu bytes for node %d", |
917 | size, nid); | 917 | size, nid); |
918 | 918 | ||
919 | ret = __va(ret_paddr); | 919 | ret = __va(ret_paddr); |
920 | 920 | ||
921 | /* | 921 | /* |
922 | * We initialize the nodes in numeric order: 0, 1, 2... | 922 | * We initialize the nodes in numeric order: 0, 1, 2... |
923 | * and hand over control from the MEMBLOCK allocator to the | 923 | * and hand over control from the MEMBLOCK allocator to the |
924 | * bootmem allocator. If this function is called for | 924 | * bootmem allocator. If this function is called for |
925 | * node 5, then we know that all nodes <5 are using the | 925 | * node 5, then we know that all nodes <5 are using the |
926 | * bootmem allocator instead of the MEMBLOCK allocator. | 926 | * bootmem allocator instead of the MEMBLOCK allocator. |
927 | * | 927 | * |
928 | * So, check the nid from which this allocation came | 928 | * So, check the nid from which this allocation came |
929 | * and double check to see if we need to use bootmem | 929 | * and double check to see if we need to use bootmem |
930 | * instead of the MEMBLOCK. We don't free the MEMBLOCK memory | 930 | * instead of the MEMBLOCK. We don't free the MEMBLOCK memory |
931 | * since it would be useless. | 931 | * since it would be useless. |
932 | */ | 932 | */ |
933 | new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); | 933 | new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); |
934 | if (new_nid < nid) { | 934 | if (new_nid < nid) { |
935 | ret = __alloc_bootmem_node(NODE_DATA(new_nid), | 935 | ret = __alloc_bootmem_node(NODE_DATA(new_nid), |
936 | size, align, 0); | 936 | size, align, 0); |
937 | 937 | ||
938 | dbg("alloc_bootmem %p %lx\n", ret, size); | 938 | dbg("alloc_bootmem %p %lx\n", ret, size); |
939 | } | 939 | } |
940 | 940 | ||
941 | memset(ret, 0, size); | 941 | memset(ret, 0, size); |
942 | return ret; | 942 | return ret; |
943 | } | 943 | } |
944 | 944 | ||
945 | static struct notifier_block __cpuinitdata ppc64_numa_nb = { | 945 | static struct notifier_block __cpuinitdata ppc64_numa_nb = { |
946 | .notifier_call = cpu_numa_callback, | 946 | .notifier_call = cpu_numa_callback, |
947 | .priority = 1 /* Must run before sched domains notifier. */ | 947 | .priority = 1 /* Must run before sched domains notifier. */ |
948 | }; | 948 | }; |
949 | 949 | ||
950 | static void __init mark_reserved_regions_for_nid(int nid) | 950 | static void __init mark_reserved_regions_for_nid(int nid) |
951 | { | 951 | { |
952 | struct pglist_data *node = NODE_DATA(nid); | 952 | struct pglist_data *node = NODE_DATA(nid); |
953 | struct memblock_region *reg; | 953 | struct memblock_region *reg; |
954 | 954 | ||
955 | for_each_memblock(reserved, reg) { | 955 | for_each_memblock(reserved, reg) { |
956 | unsigned long physbase = reg->base; | 956 | unsigned long physbase = reg->base; |
957 | unsigned long size = reg->size; | 957 | unsigned long size = reg->size; |
958 | unsigned long start_pfn = physbase >> PAGE_SHIFT; | 958 | unsigned long start_pfn = physbase >> PAGE_SHIFT; |
959 | unsigned long end_pfn = PFN_UP(physbase + size); | 959 | unsigned long end_pfn = PFN_UP(physbase + size); |
960 | struct node_active_region node_ar; | 960 | struct node_active_region node_ar; |
961 | unsigned long node_end_pfn = node->node_start_pfn + | 961 | unsigned long node_end_pfn = node->node_start_pfn + |
962 | node->node_spanned_pages; | 962 | node->node_spanned_pages; |
963 | 963 | ||
964 | /* | 964 | /* |
965 | * Check to make sure that this memblock.reserved area is | 965 | * Check to make sure that this memblock.reserved area is |
966 | * within the bounds of the node that we care about. | 966 | * within the bounds of the node that we care about. |
967 | * Checking the nid of the start and end points is not | 967 | * Checking the nid of the start and end points is not |
968 | * sufficient because the reserved area could span the | 968 | * sufficient because the reserved area could span the |
969 | * entire node. | 969 | * entire node. |
970 | */ | 970 | */ |
971 | if (end_pfn <= node->node_start_pfn || | 971 | if (end_pfn <= node->node_start_pfn || |
972 | start_pfn >= node_end_pfn) | 972 | start_pfn >= node_end_pfn) |
973 | continue; | 973 | continue; |
974 | 974 | ||
975 | get_node_active_region(start_pfn, &node_ar); | 975 | get_node_active_region(start_pfn, &node_ar); |
976 | while (start_pfn < end_pfn && | 976 | while (start_pfn < end_pfn && |
977 | node_ar.start_pfn < node_ar.end_pfn) { | 977 | node_ar.start_pfn < node_ar.end_pfn) { |
978 | unsigned long reserve_size = size; | 978 | unsigned long reserve_size = size; |
979 | /* | 979 | /* |
980 | * if reserved region extends past active region | 980 | * if reserved region extends past active region |
981 | * then trim size to active region | 981 | * then trim size to active region |
982 | */ | 982 | */ |
983 | if (end_pfn > node_ar.end_pfn) | 983 | if (end_pfn > node_ar.end_pfn) |
984 | reserve_size = (node_ar.end_pfn << PAGE_SHIFT) | 984 | reserve_size = (node_ar.end_pfn << PAGE_SHIFT) |
985 | - physbase; | 985 | - physbase; |
986 | /* | 986 | /* |
987 | * Only worry about *this* node, others may not | 987 | * Only worry about *this* node, others may not |
988 | * yet have valid NODE_DATA(). | 988 | * yet have valid NODE_DATA(). |
989 | */ | 989 | */ |
990 | if (node_ar.nid == nid) { | 990 | if (node_ar.nid == nid) { |
991 | dbg("reserve_bootmem %lx %lx nid=%d\n", | 991 | dbg("reserve_bootmem %lx %lx nid=%d\n", |
992 | physbase, reserve_size, node_ar.nid); | 992 | physbase, reserve_size, node_ar.nid); |
993 | reserve_bootmem_node(NODE_DATA(node_ar.nid), | 993 | reserve_bootmem_node(NODE_DATA(node_ar.nid), |
994 | physbase, reserve_size, | 994 | physbase, reserve_size, |
995 | BOOTMEM_DEFAULT); | 995 | BOOTMEM_DEFAULT); |
996 | } | 996 | } |
997 | /* | 997 | /* |
998 | * if reserved region is contained in the active region | 998 | * if reserved region is contained in the active region |
999 | * then done. | 999 | * then done. |
1000 | */ | 1000 | */ |
1001 | if (end_pfn <= node_ar.end_pfn) | 1001 | if (end_pfn <= node_ar.end_pfn) |
1002 | break; | 1002 | break; |
1003 | 1003 | ||
1004 | /* | 1004 | /* |
1005 | * reserved region extends past the active region | 1005 | * reserved region extends past the active region |
1006 | * get next active region that contains this | 1006 | * get next active region that contains this |
1007 | * reserved region | 1007 | * reserved region |
1008 | */ | 1008 | */ |
1009 | start_pfn = node_ar.end_pfn; | 1009 | start_pfn = node_ar.end_pfn; |
1010 | physbase = start_pfn << PAGE_SHIFT; | 1010 | physbase = start_pfn << PAGE_SHIFT; |
1011 | size = size - reserve_size; | 1011 | size = size - reserve_size; |
1012 | get_node_active_region(start_pfn, &node_ar); | 1012 | get_node_active_region(start_pfn, &node_ar); |
1013 | } | 1013 | } |
1014 | } | 1014 | } |
1015 | } | 1015 | } |
1016 | 1016 | ||
1017 | 1017 | ||
1018 | void __init do_init_bootmem(void) | 1018 | void __init do_init_bootmem(void) |
1019 | { | 1019 | { |
1020 | int nid; | 1020 | int nid; |
1021 | 1021 | ||
1022 | min_low_pfn = 0; | 1022 | min_low_pfn = 0; |
1023 | max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; | 1023 | max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; |
1024 | max_pfn = max_low_pfn; | 1024 | max_pfn = max_low_pfn; |
1025 | 1025 | ||
1026 | if (parse_numa_properties()) | 1026 | if (parse_numa_properties()) |
1027 | setup_nonnuma(); | 1027 | setup_nonnuma(); |
1028 | else | 1028 | else |
1029 | dump_numa_memory_topology(); | 1029 | dump_numa_memory_topology(); |
1030 | 1030 | ||
1031 | for_each_online_node(nid) { | 1031 | for_each_online_node(nid) { |
1032 | unsigned long start_pfn, end_pfn; | 1032 | unsigned long start_pfn, end_pfn; |
1033 | void *bootmem_vaddr; | 1033 | void *bootmem_vaddr; |
1034 | unsigned long bootmap_pages; | 1034 | unsigned long bootmap_pages; |
1035 | 1035 | ||
1036 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); | 1036 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
1037 | 1037 | ||
1038 | /* | 1038 | /* |
1039 | * Allocate the node structure node local if possible | 1039 | * Allocate the node structure node local if possible |
1040 | * | 1040 | * |
1041 | * Be careful moving this around, as it relies on all | 1041 | * Be careful moving this around, as it relies on all |
1042 | * previous nodes' bootmem to be initialized and have | 1042 | * previous nodes' bootmem to be initialized and have |
1043 | * all reserved areas marked. | 1043 | * all reserved areas marked. |
1044 | */ | 1044 | */ |
1045 | NODE_DATA(nid) = careful_zallocation(nid, | 1045 | NODE_DATA(nid) = careful_zallocation(nid, |
1046 | sizeof(struct pglist_data), | 1046 | sizeof(struct pglist_data), |
1047 | SMP_CACHE_BYTES, end_pfn); | 1047 | SMP_CACHE_BYTES, end_pfn); |
1048 | 1048 | ||
1049 | dbg("node %d\n", nid); | 1049 | dbg("node %d\n", nid); |
1050 | dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); | 1050 | dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); |
1051 | 1051 | ||
1052 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | 1052 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; |
1053 | NODE_DATA(nid)->node_start_pfn = start_pfn; | 1053 | NODE_DATA(nid)->node_start_pfn = start_pfn; |
1054 | NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; | 1054 | NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; |
1055 | 1055 | ||
1056 | if (NODE_DATA(nid)->node_spanned_pages == 0) | 1056 | if (NODE_DATA(nid)->node_spanned_pages == 0) |
1057 | continue; | 1057 | continue; |
1058 | 1058 | ||
1059 | dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); | 1059 | dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); |
1060 | dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); | 1060 | dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); |
1061 | 1061 | ||
1062 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 1062 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
1063 | bootmem_vaddr = careful_zallocation(nid, | 1063 | bootmem_vaddr = careful_zallocation(nid, |
1064 | bootmap_pages << PAGE_SHIFT, | 1064 | bootmap_pages << PAGE_SHIFT, |
1065 | PAGE_SIZE, end_pfn); | 1065 | PAGE_SIZE, end_pfn); |
1066 | 1066 | ||
1067 | dbg("bootmap_vaddr = %p\n", bootmem_vaddr); | 1067 | dbg("bootmap_vaddr = %p\n", bootmem_vaddr); |
1068 | 1068 | ||
1069 | init_bootmem_node(NODE_DATA(nid), | 1069 | init_bootmem_node(NODE_DATA(nid), |
1070 | __pa(bootmem_vaddr) >> PAGE_SHIFT, | 1070 | __pa(bootmem_vaddr) >> PAGE_SHIFT, |
1071 | start_pfn, end_pfn); | 1071 | start_pfn, end_pfn); |
1072 | 1072 | ||
1073 | free_bootmem_with_active_regions(nid, end_pfn); | 1073 | free_bootmem_with_active_regions(nid, end_pfn); |
1074 | /* | 1074 | /* |
1075 | * Be very careful about moving this around. Future | 1075 | * Be very careful about moving this around. Future |
1076 | * calls to careful_zallocation() depend on this getting | 1076 | * calls to careful_zallocation() depend on this getting |
1077 | * done correctly. | 1077 | * done correctly. |
1078 | */ | 1078 | */ |
1079 | mark_reserved_regions_for_nid(nid); | 1079 | mark_reserved_regions_for_nid(nid); |
1080 | sparse_memory_present_with_active_regions(nid); | 1080 | sparse_memory_present_with_active_regions(nid); |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | init_bootmem_done = 1; | 1083 | init_bootmem_done = 1; |
1084 | 1084 | ||
1085 | /* | 1085 | /* |
1086 | * Now bootmem is initialised we can create the node to cpumask | 1086 | * Now bootmem is initialised we can create the node to cpumask |
1087 | * lookup tables and setup the cpu callback to populate them. | 1087 | * lookup tables and setup the cpu callback to populate them. |
1088 | */ | 1088 | */ |
1089 | setup_node_to_cpumask_map(); | 1089 | setup_node_to_cpumask_map(); |
1090 | 1090 | ||
1091 | register_cpu_notifier(&ppc64_numa_nb); | 1091 | register_cpu_notifier(&ppc64_numa_nb); |
1092 | cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, | 1092 | cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, |
1093 | (void *)(unsigned long)boot_cpuid); | 1093 | (void *)(unsigned long)boot_cpuid); |
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | void __init paging_init(void) | 1096 | void __init paging_init(void) |
1097 | { | 1097 | { |
1098 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 1098 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
1099 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 1099 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
1100 | max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; | 1100 | max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; |
1101 | free_area_init_nodes(max_zone_pfns); | 1101 | free_area_init_nodes(max_zone_pfns); |
1102 | } | 1102 | } |
1103 | 1103 | ||
1104 | static int __init early_numa(char *p) | 1104 | static int __init early_numa(char *p) |
1105 | { | 1105 | { |
1106 | if (!p) | 1106 | if (!p) |
1107 | return 0; | 1107 | return 0; |
1108 | 1108 | ||
1109 | if (strstr(p, "off")) | 1109 | if (strstr(p, "off")) |
1110 | numa_enabled = 0; | 1110 | numa_enabled = 0; |
1111 | 1111 | ||
1112 | if (strstr(p, "debug")) | 1112 | if (strstr(p, "debug")) |
1113 | numa_debug = 1; | 1113 | numa_debug = 1; |
1114 | 1114 | ||
1115 | p = strstr(p, "fake="); | 1115 | p = strstr(p, "fake="); |
1116 | if (p) | 1116 | if (p) |
1117 | cmdline = p + strlen("fake="); | 1117 | cmdline = p + strlen("fake="); |
1118 | 1118 | ||
1119 | return 0; | 1119 | return 0; |
1120 | } | 1120 | } |
1121 | early_param("numa", early_numa); | 1121 | early_param("numa", early_numa); |
1122 | 1122 | ||
1123 | #ifdef CONFIG_MEMORY_HOTPLUG | 1123 | #ifdef CONFIG_MEMORY_HOTPLUG |
1124 | /* | 1124 | /* |
1125 | * Find the node associated with a hot added memory section for | 1125 | * Find the node associated with a hot added memory section for |
1126 | * memory represented in the device tree by the property | 1126 | * memory represented in the device tree by the property |
1127 | * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. | 1127 | * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. |
1128 | */ | 1128 | */ |
1129 | static int hot_add_drconf_scn_to_nid(struct device_node *memory, | 1129 | static int hot_add_drconf_scn_to_nid(struct device_node *memory, |
1130 | unsigned long scn_addr) | 1130 | unsigned long scn_addr) |
1131 | { | 1131 | { |
1132 | const u32 *dm; | 1132 | const u32 *dm; |
1133 | unsigned int drconf_cell_cnt, rc; | 1133 | unsigned int drconf_cell_cnt, rc; |
1134 | unsigned long lmb_size; | 1134 | unsigned long lmb_size; |
1135 | struct assoc_arrays aa; | 1135 | struct assoc_arrays aa; |
1136 | int nid = -1; | 1136 | int nid = -1; |
1137 | 1137 | ||
1138 | drconf_cell_cnt = of_get_drconf_memory(memory, &dm); | 1138 | drconf_cell_cnt = of_get_drconf_memory(memory, &dm); |
1139 | if (!drconf_cell_cnt) | 1139 | if (!drconf_cell_cnt) |
1140 | return -1; | 1140 | return -1; |
1141 | 1141 | ||
1142 | lmb_size = of_get_lmb_size(memory); | 1142 | lmb_size = of_get_lmb_size(memory); |
1143 | if (!lmb_size) | 1143 | if (!lmb_size) |
1144 | return -1; | 1144 | return -1; |
1145 | 1145 | ||
1146 | rc = of_get_assoc_arrays(memory, &aa); | 1146 | rc = of_get_assoc_arrays(memory, &aa); |
1147 | if (rc) | 1147 | if (rc) |
1148 | return -1; | 1148 | return -1; |
1149 | 1149 | ||
1150 | for (; drconf_cell_cnt != 0; --drconf_cell_cnt) { | 1150 | for (; drconf_cell_cnt != 0; --drconf_cell_cnt) { |
1151 | struct of_drconf_cell drmem; | 1151 | struct of_drconf_cell drmem; |
1152 | 1152 | ||
1153 | read_drconf_cell(&drmem, &dm); | 1153 | read_drconf_cell(&drmem, &dm); |
1154 | 1154 | ||
1155 | /* skip this block if it is reserved or not assigned to | 1155 | /* skip this block if it is reserved or not assigned to |
1156 | * this partition */ | 1156 | * this partition */ |
1157 | if ((drmem.flags & DRCONF_MEM_RESERVED) | 1157 | if ((drmem.flags & DRCONF_MEM_RESERVED) |
1158 | || !(drmem.flags & DRCONF_MEM_ASSIGNED)) | 1158 | || !(drmem.flags & DRCONF_MEM_ASSIGNED)) |
1159 | continue; | 1159 | continue; |
1160 | 1160 | ||
1161 | if ((scn_addr < drmem.base_addr) | 1161 | if ((scn_addr < drmem.base_addr) |
1162 | || (scn_addr >= (drmem.base_addr + lmb_size))) | 1162 | || (scn_addr >= (drmem.base_addr + lmb_size))) |
1163 | continue; | 1163 | continue; |
1164 | 1164 | ||
1165 | nid = of_drconf_to_nid_single(&drmem, &aa); | 1165 | nid = of_drconf_to_nid_single(&drmem, &aa); |
1166 | break; | 1166 | break; |
1167 | } | 1167 | } |
1168 | 1168 | ||
1169 | return nid; | 1169 | return nid; |
1170 | } | 1170 | } |
1171 | 1171 | ||
1172 | /* | 1172 | /* |
1173 | * Find the node associated with a hot added memory section for memory | 1173 | * Find the node associated with a hot added memory section for memory |
1174 | * represented in the device tree as a node (i.e. memory@XXXX) for | 1174 | * represented in the device tree as a node (i.e. memory@XXXX) for |
1175 | * each memblock. | 1175 | * each memblock. |
1176 | */ | 1176 | */ |
1177 | int hot_add_node_scn_to_nid(unsigned long scn_addr) | 1177 | int hot_add_node_scn_to_nid(unsigned long scn_addr) |
1178 | { | 1178 | { |
1179 | struct device_node *memory; | 1179 | struct device_node *memory; |
1180 | int nid = -1; | 1180 | int nid = -1; |
1181 | 1181 | ||
1182 | for_each_node_by_type(memory, "memory") { | 1182 | for_each_node_by_type(memory, "memory") { |
1183 | unsigned long start, size; | 1183 | unsigned long start, size; |
1184 | int ranges; | 1184 | int ranges; |
1185 | const unsigned int *memcell_buf; | 1185 | const unsigned int *memcell_buf; |
1186 | unsigned int len; | 1186 | unsigned int len; |
1187 | 1187 | ||
1188 | memcell_buf = of_get_property(memory, "reg", &len); | 1188 | memcell_buf = of_get_property(memory, "reg", &len); |
1189 | if (!memcell_buf || len <= 0) | 1189 | if (!memcell_buf || len <= 0) |
1190 | continue; | 1190 | continue; |
1191 | 1191 | ||
1192 | /* ranges in cell */ | 1192 | /* ranges in cell */ |
1193 | ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); | 1193 | ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); |
1194 | 1194 | ||
1195 | while (ranges--) { | 1195 | while (ranges--) { |
1196 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); | 1196 | start = read_n_cells(n_mem_addr_cells, &memcell_buf); |
1197 | size = read_n_cells(n_mem_size_cells, &memcell_buf); | 1197 | size = read_n_cells(n_mem_size_cells, &memcell_buf); |
1198 | 1198 | ||
1199 | if ((scn_addr < start) || (scn_addr >= (start + size))) | 1199 | if ((scn_addr < start) || (scn_addr >= (start + size))) |
1200 | continue; | 1200 | continue; |
1201 | 1201 | ||
1202 | nid = of_node_to_nid_single(memory); | 1202 | nid = of_node_to_nid_single(memory); |
1203 | break; | 1203 | break; |
1204 | } | 1204 | } |
1205 | 1205 | ||
1206 | if (nid >= 0) | 1206 | if (nid >= 0) |
1207 | break; | 1207 | break; |
1208 | } | 1208 | } |
1209 | 1209 | ||
1210 | of_node_put(memory); | 1210 | of_node_put(memory); |
1211 | 1211 | ||
1212 | return nid; | 1212 | return nid; |
1213 | } | 1213 | } |
1214 | 1214 | ||
1215 | /* | 1215 | /* |
1216 | * Find the node associated with a hot added memory section. Section | 1216 | * Find the node associated with a hot added memory section. Section |
1217 | * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that | 1217 | * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that |
1218 | * sections are fully contained within a single MEMBLOCK. | 1218 | * sections are fully contained within a single MEMBLOCK. |
1219 | */ | 1219 | */ |
1220 | int hot_add_scn_to_nid(unsigned long scn_addr) | 1220 | int hot_add_scn_to_nid(unsigned long scn_addr) |
1221 | { | 1221 | { |
1222 | struct device_node *memory = NULL; | 1222 | struct device_node *memory = NULL; |
1223 | int nid, found = 0; | 1223 | int nid, found = 0; |
1224 | 1224 | ||
1225 | if (!numa_enabled || (min_common_depth < 0)) | 1225 | if (!numa_enabled || (min_common_depth < 0)) |
1226 | return first_online_node; | 1226 | return first_online_node; |
1227 | 1227 | ||
1228 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); | 1228 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
1229 | if (memory) { | 1229 | if (memory) { |
1230 | nid = hot_add_drconf_scn_to_nid(memory, scn_addr); | 1230 | nid = hot_add_drconf_scn_to_nid(memory, scn_addr); |
1231 | of_node_put(memory); | 1231 | of_node_put(memory); |
1232 | } else { | 1232 | } else { |
1233 | nid = hot_add_node_scn_to_nid(scn_addr); | 1233 | nid = hot_add_node_scn_to_nid(scn_addr); |
1234 | } | 1234 | } |
1235 | 1235 | ||
1236 | if (nid < 0 || !node_online(nid)) | 1236 | if (nid < 0 || !node_online(nid)) |
1237 | nid = first_online_node; | 1237 | nid = first_online_node; |
1238 | 1238 | ||
1239 | if (NODE_DATA(nid)->node_spanned_pages) | 1239 | if (NODE_DATA(nid)->node_spanned_pages) |
1240 | return nid; | 1240 | return nid; |
1241 | 1241 | ||
1242 | for_each_online_node(nid) { | 1242 | for_each_online_node(nid) { |
1243 | if (NODE_DATA(nid)->node_spanned_pages) { | 1243 | if (NODE_DATA(nid)->node_spanned_pages) { |
1244 | found = 1; | 1244 | found = 1; |
1245 | break; | 1245 | break; |
1246 | } | 1246 | } |
1247 | } | 1247 | } |
1248 | 1248 | ||
1249 | BUG_ON(!found); | 1249 | BUG_ON(!found); |
1250 | return nid; | 1250 | return nid; |
1251 | } | 1251 | } |
1252 | 1252 | ||
1253 | static u64 hot_add_drconf_memory_max(void) | 1253 | static u64 hot_add_drconf_memory_max(void) |
1254 | { | 1254 | { |
1255 | struct device_node *memory = NULL; | 1255 | struct device_node *memory = NULL; |
1256 | unsigned int drconf_cell_cnt = 0; | 1256 | unsigned int drconf_cell_cnt = 0; |
1257 | u64 lmb_size = 0; | 1257 | u64 lmb_size = 0; |
1258 | const u32 *dm = 0; | 1258 | const u32 *dm = 0; |
1259 | 1259 | ||
1260 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); | 1260 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
1261 | if (memory) { | 1261 | if (memory) { |
1262 | drconf_cell_cnt = of_get_drconf_memory(memory, &dm); | 1262 | drconf_cell_cnt = of_get_drconf_memory(memory, &dm); |
1263 | lmb_size = of_get_lmb_size(memory); | 1263 | lmb_size = of_get_lmb_size(memory); |
1264 | of_node_put(memory); | 1264 | of_node_put(memory); |
1265 | } | 1265 | } |
1266 | return lmb_size * drconf_cell_cnt; | 1266 | return lmb_size * drconf_cell_cnt; |
1267 | } | 1267 | } |
1268 | 1268 | ||
1269 | /* | 1269 | /* |
1270 | * memory_hotplug_max - return max address of memory that may be added | 1270 | * memory_hotplug_max - return max address of memory that may be added |
1271 | * | 1271 | * |
1272 | * This is currently only used on systems that support drconfig memory | 1272 | * This is currently only used on systems that support drconfig memory |
1273 | * hotplug. | 1273 | * hotplug. |
1274 | */ | 1274 | */ |
1275 | u64 memory_hotplug_max(void) | 1275 | u64 memory_hotplug_max(void) |
1276 | { | 1276 | { |
1277 | return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); | 1277 | return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); |
1278 | } | 1278 | } |
1279 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 1279 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
1280 | 1280 | ||
1281 | /* Virtual Processor Home Node (VPHN) support */ | 1281 | /* Virtual Processor Home Node (VPHN) support */ |
1282 | #ifdef CONFIG_PPC_SPLPAR | 1282 | #ifdef CONFIG_PPC_SPLPAR |
1283 | static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; | 1283 | static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; |
1284 | static cpumask_t cpu_associativity_changes_mask; | 1284 | static cpumask_t cpu_associativity_changes_mask; |
1285 | static int vphn_enabled; | 1285 | static int vphn_enabled; |
1286 | static void set_topology_timer(void); | 1286 | static void set_topology_timer(void); |
1287 | 1287 | ||
1288 | /* | 1288 | /* |
1289 | * Store the current values of the associativity change counters in the | 1289 | * Store the current values of the associativity change counters in the |
1290 | * hypervisor. | 1290 | * hypervisor. |
1291 | */ | 1291 | */ |
1292 | static void setup_cpu_associativity_change_counters(void) | 1292 | static void setup_cpu_associativity_change_counters(void) |
1293 | { | 1293 | { |
1294 | int cpu; | 1294 | int cpu; |
1295 | 1295 | ||
1296 | /* The VPHN feature supports a maximum of 8 reference points */ | 1296 | /* The VPHN feature supports a maximum of 8 reference points */ |
1297 | BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); | 1297 | BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); |
1298 | 1298 | ||
1299 | for_each_possible_cpu(cpu) { | 1299 | for_each_possible_cpu(cpu) { |
1300 | int i; | 1300 | int i; |
1301 | u8 *counts = vphn_cpu_change_counts[cpu]; | 1301 | u8 *counts = vphn_cpu_change_counts[cpu]; |
1302 | volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; | 1302 | volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; |
1303 | 1303 | ||
1304 | for (i = 0; i < distance_ref_points_depth; i++) | 1304 | for (i = 0; i < distance_ref_points_depth; i++) |
1305 | counts[i] = hypervisor_counts[i]; | 1305 | counts[i] = hypervisor_counts[i]; |
1306 | } | 1306 | } |
1307 | } | 1307 | } |
1308 | 1308 | ||
1309 | /* | 1309 | /* |
1310 | * The hypervisor maintains a set of 8 associativity change counters in | 1310 | * The hypervisor maintains a set of 8 associativity change counters in |
1311 | * the VPA of each cpu that correspond to the associativity levels in the | 1311 | * the VPA of each cpu that correspond to the associativity levels in the |
1312 | * ibm,associativity-reference-points property. When an associativity | 1312 | * ibm,associativity-reference-points property. When an associativity |
1313 | * level changes, the corresponding counter is incremented. | 1313 | * level changes, the corresponding counter is incremented. |
1314 | * | 1314 | * |
1315 | * Set a bit in cpu_associativity_changes_mask for each cpu whose home | 1315 | * Set a bit in cpu_associativity_changes_mask for each cpu whose home |
1316 | * node associativity levels have changed. | 1316 | * node associativity levels have changed. |
1317 | * | 1317 | * |
1318 | * Returns the number of cpus with unhandled associativity changes. | 1318 | * Returns the number of cpus with unhandled associativity changes. |
1319 | */ | 1319 | */ |
1320 | static int update_cpu_associativity_changes_mask(void) | 1320 | static int update_cpu_associativity_changes_mask(void) |
1321 | { | 1321 | { |
1322 | int cpu, nr_cpus = 0; | 1322 | int cpu, nr_cpus = 0; |
1323 | cpumask_t *changes = &cpu_associativity_changes_mask; | 1323 | cpumask_t *changes = &cpu_associativity_changes_mask; |
1324 | 1324 | ||
1325 | cpumask_clear(changes); | 1325 | cpumask_clear(changes); |
1326 | 1326 | ||
1327 | for_each_possible_cpu(cpu) { | 1327 | for_each_possible_cpu(cpu) { |
1328 | int i, changed = 0; | 1328 | int i, changed = 0; |
1329 | u8 *counts = vphn_cpu_change_counts[cpu]; | 1329 | u8 *counts = vphn_cpu_change_counts[cpu]; |
1330 | volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; | 1330 | volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; |
1331 | 1331 | ||
1332 | for (i = 0; i < distance_ref_points_depth; i++) { | 1332 | for (i = 0; i < distance_ref_points_depth; i++) { |
1333 | if (hypervisor_counts[i] != counts[i]) { | 1333 | if (hypervisor_counts[i] != counts[i]) { |
1334 | counts[i] = hypervisor_counts[i]; | 1334 | counts[i] = hypervisor_counts[i]; |
1335 | changed = 1; | 1335 | changed = 1; |
1336 | } | 1336 | } |
1337 | } | 1337 | } |
1338 | if (changed) { | 1338 | if (changed) { |
1339 | cpumask_set_cpu(cpu, changes); | 1339 | cpumask_set_cpu(cpu, changes); |
1340 | nr_cpus++; | 1340 | nr_cpus++; |
1341 | } | 1341 | } |
1342 | } | 1342 | } |
1343 | 1343 | ||
1344 | return nr_cpus; | 1344 | return nr_cpus; |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | /* | 1347 | /* |
1348 | * 6 64-bit registers unpacked into 12 32-bit associativity values. To form | 1348 | * 6 64-bit registers unpacked into 12 32-bit associativity values. To form |
1349 | * the complete property we have to add the length in the first cell. | 1349 | * the complete property we have to add the length in the first cell. |
1350 | */ | 1350 | */ |
1351 | #define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1) | 1351 | #define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1) |
1352 | 1352 | ||
1353 | /* | 1353 | /* |
1354 | * Convert the associativity domain numbers returned from the hypervisor | 1354 | * Convert the associativity domain numbers returned from the hypervisor |
1355 | * to the sequence they would appear in the ibm,associativity property. | 1355 | * to the sequence they would appear in the ibm,associativity property. |
1356 | */ | 1356 | */ |
1357 | static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked) | 1357 | static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked) |
1358 | { | 1358 | { |
1359 | int i, nr_assoc_doms = 0; | 1359 | int i, nr_assoc_doms = 0; |
1360 | const u16 *field = (const u16*) packed; | 1360 | const u16 *field = (const u16*) packed; |
1361 | 1361 | ||
1362 | #define VPHN_FIELD_UNUSED (0xffff) | 1362 | #define VPHN_FIELD_UNUSED (0xffff) |
1363 | #define VPHN_FIELD_MSB (0x8000) | 1363 | #define VPHN_FIELD_MSB (0x8000) |
1364 | #define VPHN_FIELD_MASK (~VPHN_FIELD_MSB) | 1364 | #define VPHN_FIELD_MASK (~VPHN_FIELD_MSB) |
1365 | 1365 | ||
1366 | for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) { | 1366 | for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) { |
1367 | if (*field == VPHN_FIELD_UNUSED) { | 1367 | if (*field == VPHN_FIELD_UNUSED) { |
1368 | /* All significant fields processed, and remaining | 1368 | /* All significant fields processed, and remaining |
1369 | * fields contain the reserved value of all 1's. | 1369 | * fields contain the reserved value of all 1's. |
1370 | * Just store them. | 1370 | * Just store them. |
1371 | */ | 1371 | */ |
1372 | unpacked[i] = *((u32*)field); | 1372 | unpacked[i] = *((u32*)field); |
1373 | field += 2; | 1373 | field += 2; |
1374 | } else if (*field & VPHN_FIELD_MSB) { | 1374 | } else if (*field & VPHN_FIELD_MSB) { |
1375 | /* Data is in the lower 15 bits of this field */ | 1375 | /* Data is in the lower 15 bits of this field */ |
1376 | unpacked[i] = *field & VPHN_FIELD_MASK; | 1376 | unpacked[i] = *field & VPHN_FIELD_MASK; |
1377 | field++; | 1377 | field++; |
1378 | nr_assoc_doms++; | 1378 | nr_assoc_doms++; |
1379 | } else { | 1379 | } else { |
1380 | /* Data is in the lower 15 bits of this field | 1380 | /* Data is in the lower 15 bits of this field |
1381 | * concatenated with the next 16 bit field | 1381 | * concatenated with the next 16 bit field |
1382 | */ | 1382 | */ |
1383 | unpacked[i] = *((u32*)field); | 1383 | unpacked[i] = *((u32*)field); |
1384 | field += 2; | 1384 | field += 2; |
1385 | nr_assoc_doms++; | 1385 | nr_assoc_doms++; |
1386 | } | 1386 | } |
1387 | } | 1387 | } |
1388 | 1388 | ||
1389 | /* The first cell contains the length of the property */ | 1389 | /* The first cell contains the length of the property */ |
1390 | unpacked[0] = nr_assoc_doms; | 1390 | unpacked[0] = nr_assoc_doms; |
1391 | 1391 | ||
1392 | return nr_assoc_doms; | 1392 | return nr_assoc_doms; |
1393 | } | 1393 | } |
1394 | 1394 | ||
1395 | /* | 1395 | /* |
1396 | * Retrieve the new associativity information for a virtual processor's | 1396 | * Retrieve the new associativity information for a virtual processor's |
1397 | * home node. | 1397 | * home node. |
1398 | */ | 1398 | */ |
1399 | static long hcall_vphn(unsigned long cpu, unsigned int *associativity) | 1399 | static long hcall_vphn(unsigned long cpu, unsigned int *associativity) |
1400 | { | 1400 | { |
1401 | long rc; | 1401 | long rc; |
1402 | long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; | 1402 | long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; |
1403 | u64 flags = 1; | 1403 | u64 flags = 1; |
1404 | int hwcpu = get_hard_smp_processor_id(cpu); | 1404 | int hwcpu = get_hard_smp_processor_id(cpu); |
1405 | 1405 | ||
1406 | rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); | 1406 | rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); |
1407 | vphn_unpack_associativity(retbuf, associativity); | 1407 | vphn_unpack_associativity(retbuf, associativity); |
1408 | 1408 | ||
1409 | return rc; | 1409 | return rc; |
1410 | } | 1410 | } |
1411 | 1411 | ||
1412 | static long vphn_get_associativity(unsigned long cpu, | 1412 | static long vphn_get_associativity(unsigned long cpu, |
1413 | unsigned int *associativity) | 1413 | unsigned int *associativity) |
1414 | { | 1414 | { |
1415 | long rc; | 1415 | long rc; |
1416 | 1416 | ||
1417 | rc = hcall_vphn(cpu, associativity); | 1417 | rc = hcall_vphn(cpu, associativity); |
1418 | 1418 | ||
1419 | switch (rc) { | 1419 | switch (rc) { |
1420 | case H_FUNCTION: | 1420 | case H_FUNCTION: |
1421 | printk(KERN_INFO | 1421 | printk(KERN_INFO |
1422 | "VPHN is not supported. Disabling polling...\n"); | 1422 | "VPHN is not supported. Disabling polling...\n"); |
1423 | stop_topology_update(); | 1423 | stop_topology_update(); |
1424 | break; | 1424 | break; |
1425 | case H_HARDWARE: | 1425 | case H_HARDWARE: |
1426 | printk(KERN_ERR | 1426 | printk(KERN_ERR |
1427 | "hcall_vphn() experienced a hardware fault " | 1427 | "hcall_vphn() experienced a hardware fault " |
1428 | "preventing VPHN. Disabling polling...\n"); | 1428 | "preventing VPHN. Disabling polling...\n"); |
1429 | stop_topology_update(); | 1429 | stop_topology_update(); |
1430 | } | 1430 | } |
1431 | 1431 | ||
1432 | return rc; | 1432 | return rc; |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | /* | 1435 | /* |
1436 | * Update the node maps and sysfs entries for each cpu whose home node | 1436 | * Update the node maps and sysfs entries for each cpu whose home node |
1437 | * has changed. | 1437 | * has changed. |
1438 | */ | 1438 | */ |
1439 | int arch_update_cpu_topology(void) | 1439 | int arch_update_cpu_topology(void) |
1440 | { | 1440 | { |
1441 | int cpu, nid, old_nid; | 1441 | int cpu, nid, old_nid; |
1442 | unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; | 1442 | unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
1443 | struct device *dev; | 1443 | struct device *dev; |
1444 | 1444 | ||
1445 | for_each_cpu(cpu,&cpu_associativity_changes_mask) { | 1445 | for_each_cpu(cpu,&cpu_associativity_changes_mask) { |
1446 | vphn_get_associativity(cpu, associativity); | 1446 | vphn_get_associativity(cpu, associativity); |
1447 | nid = associativity_to_nid(associativity); | 1447 | nid = associativity_to_nid(associativity); |
1448 | 1448 | ||
1449 | if (nid < 0 || !node_online(nid)) | 1449 | if (nid < 0 || !node_online(nid)) |
1450 | nid = first_online_node; | 1450 | nid = first_online_node; |
1451 | 1451 | ||
1452 | old_nid = numa_cpu_lookup_table[cpu]; | 1452 | old_nid = numa_cpu_lookup_table[cpu]; |
1453 | 1453 | ||
1454 | /* Disable hotplug while we update the cpu | 1454 | /* Disable hotplug while we update the cpu |
1455 | * masks and sysfs. | 1455 | * masks and sysfs. |
1456 | */ | 1456 | */ |
1457 | get_online_cpus(); | 1457 | get_online_cpus(); |
1458 | unregister_cpu_under_node(cpu, old_nid); | 1458 | unregister_cpu_under_node(cpu, old_nid); |
1459 | unmap_cpu_from_node(cpu); | 1459 | unmap_cpu_from_node(cpu); |
1460 | map_cpu_to_node(cpu, nid); | 1460 | map_cpu_to_node(cpu, nid); |
1461 | register_cpu_under_node(cpu, nid); | 1461 | register_cpu_under_node(cpu, nid); |
1462 | put_online_cpus(); | 1462 | put_online_cpus(); |
1463 | 1463 | ||
1464 | dev = get_cpu_device(cpu); | 1464 | dev = get_cpu_device(cpu); |
1465 | if (dev) | 1465 | if (dev) |
1466 | kobject_uevent(&dev->kobj, KOBJ_CHANGE); | 1466 | kobject_uevent(&dev->kobj, KOBJ_CHANGE); |
1467 | } | 1467 | } |
1468 | 1468 | ||
1469 | return 1; | 1469 | return 1; |
1470 | } | 1470 | } |
1471 | 1471 | ||
1472 | static void topology_work_fn(struct work_struct *work) | 1472 | static void topology_work_fn(struct work_struct *work) |
1473 | { | 1473 | { |
1474 | rebuild_sched_domains(); | 1474 | rebuild_sched_domains(); |
1475 | } | 1475 | } |
1476 | static DECLARE_WORK(topology_work, topology_work_fn); | 1476 | static DECLARE_WORK(topology_work, topology_work_fn); |
1477 | 1477 | ||
1478 | void topology_schedule_update(void) | 1478 | void topology_schedule_update(void) |
1479 | { | 1479 | { |
1480 | schedule_work(&topology_work); | 1480 | schedule_work(&topology_work); |
1481 | } | 1481 | } |
1482 | 1482 | ||
1483 | static void topology_timer_fn(unsigned long ignored) | 1483 | static void topology_timer_fn(unsigned long ignored) |
1484 | { | 1484 | { |
1485 | if (!vphn_enabled) | 1485 | if (!vphn_enabled) |
1486 | return; | 1486 | return; |
1487 | if (update_cpu_associativity_changes_mask() > 0) | 1487 | if (update_cpu_associativity_changes_mask() > 0) |
1488 | topology_schedule_update(); | 1488 | topology_schedule_update(); |
1489 | set_topology_timer(); | 1489 | set_topology_timer(); |
1490 | } | 1490 | } |
1491 | static struct timer_list topology_timer = | 1491 | static struct timer_list topology_timer = |
1492 | TIMER_INITIALIZER(topology_timer_fn, 0, 0); | 1492 | TIMER_INITIALIZER(topology_timer_fn, 0, 0); |
1493 | 1493 | ||
1494 | static void set_topology_timer(void) | 1494 | static void set_topology_timer(void) |
1495 | { | 1495 | { |
1496 | topology_timer.data = 0; | 1496 | topology_timer.data = 0; |
1497 | topology_timer.expires = jiffies + 60 * HZ; | 1497 | topology_timer.expires = jiffies + 60 * HZ; |
1498 | add_timer(&topology_timer); | 1498 | add_timer(&topology_timer); |
1499 | } | 1499 | } |
1500 | 1500 | ||
1501 | /* | 1501 | /* |
1502 | * Start polling for VPHN associativity changes. | 1502 | * Start polling for VPHN associativity changes. |
1503 | */ | 1503 | */ |
1504 | int start_topology_update(void) | 1504 | int start_topology_update(void) |
1505 | { | 1505 | { |
1506 | int rc = 0; | 1506 | int rc = 0; |
1507 | 1507 | ||
1508 | /* Disabled until races with load balancing are fixed */ | 1508 | /* Disabled until races with load balancing are fixed */ |
1509 | if (0 && firmware_has_feature(FW_FEATURE_VPHN) && | 1509 | if (0 && firmware_has_feature(FW_FEATURE_VPHN) && |
1510 | get_lppaca()->shared_proc) { | 1510 | get_lppaca()->shared_proc) { |
1511 | vphn_enabled = 1; | 1511 | vphn_enabled = 1; |
1512 | setup_cpu_associativity_change_counters(); | 1512 | setup_cpu_associativity_change_counters(); |
1513 | init_timer_deferrable(&topology_timer); | 1513 | init_timer_deferrable(&topology_timer); |
1514 | set_topology_timer(); | 1514 | set_topology_timer(); |
1515 | rc = 1; | 1515 | rc = 1; |
1516 | } | 1516 | } |
1517 | 1517 | ||
1518 | return rc; | 1518 | return rc; |
1519 | } | 1519 | } |
1520 | __initcall(start_topology_update); | 1520 | __initcall(start_topology_update); |
1521 | 1521 | ||
1522 | /* | 1522 | /* |
1523 | * Disable polling for VPHN associativity changes. | 1523 | * Disable polling for VPHN associativity changes. |
1524 | */ | 1524 | */ |
1525 | int stop_topology_update(void) | 1525 | int stop_topology_update(void) |
1526 | { | 1526 | { |
1527 | vphn_enabled = 0; | 1527 | vphn_enabled = 0; |
1528 | return del_timer_sync(&topology_timer); | 1528 | return del_timer_sync(&topology_timer); |
1529 | } | 1529 | } |
1530 | #endif /* CONFIG_PPC_SPLPAR */ | 1530 | #endif /* CONFIG_PPC_SPLPAR */ |
1531 | 1531 |
arch/x86/mm/numa.c
1 | /* Common code for 32 and 64-bit NUMA */ | 1 | /* Common code for 32 and 64-bit NUMA */ |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
4 | #include <linux/string.h> | 4 | #include <linux/string.h> |
5 | #include <linux/init.h> | 5 | #include <linux/init.h> |
6 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
7 | #include <linux/memblock.h> | 7 | #include <linux/memblock.h> |
8 | #include <linux/mmzone.h> | 8 | #include <linux/mmzone.h> |
9 | #include <linux/ctype.h> | 9 | #include <linux/ctype.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/nodemask.h> | 11 | #include <linux/nodemask.h> |
12 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
13 | #include <linux/topology.h> | 13 | #include <linux/topology.h> |
14 | 14 | ||
15 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
16 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
17 | #include <asm/dma.h> | 17 | #include <asm/dma.h> |
18 | #include <asm/acpi.h> | 18 | #include <asm/acpi.h> |
19 | #include <asm/amd_nb.h> | 19 | #include <asm/amd_nb.h> |
20 | 20 | ||
21 | #include "numa_internal.h" | 21 | #include "numa_internal.h" |
22 | 22 | ||
23 | int __initdata numa_off; | 23 | int __initdata numa_off; |
24 | nodemask_t numa_nodes_parsed __initdata; | 24 | nodemask_t numa_nodes_parsed __initdata; |
25 | 25 | ||
26 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 26 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
27 | EXPORT_SYMBOL(node_data); | 27 | EXPORT_SYMBOL(node_data); |
28 | 28 | ||
29 | static struct numa_meminfo numa_meminfo | 29 | static struct numa_meminfo numa_meminfo |
30 | #ifndef CONFIG_MEMORY_HOTPLUG | 30 | #ifndef CONFIG_MEMORY_HOTPLUG |
31 | __initdata | 31 | __initdata |
32 | #endif | 32 | #endif |
33 | ; | 33 | ; |
34 | 34 | ||
35 | static int numa_distance_cnt; | 35 | static int numa_distance_cnt; |
36 | static u8 *numa_distance; | 36 | static u8 *numa_distance; |
37 | 37 | ||
38 | static __init int numa_setup(char *opt) | 38 | static __init int numa_setup(char *opt) |
39 | { | 39 | { |
40 | if (!opt) | 40 | if (!opt) |
41 | return -EINVAL; | 41 | return -EINVAL; |
42 | if (!strncmp(opt, "off", 3)) | 42 | if (!strncmp(opt, "off", 3)) |
43 | numa_off = 1; | 43 | numa_off = 1; |
44 | #ifdef CONFIG_NUMA_EMU | 44 | #ifdef CONFIG_NUMA_EMU |
45 | if (!strncmp(opt, "fake=", 5)) | 45 | if (!strncmp(opt, "fake=", 5)) |
46 | numa_emu_cmdline(opt + 5); | 46 | numa_emu_cmdline(opt + 5); |
47 | #endif | 47 | #endif |
48 | #ifdef CONFIG_ACPI_NUMA | 48 | #ifdef CONFIG_ACPI_NUMA |
49 | if (!strncmp(opt, "noacpi", 6)) | 49 | if (!strncmp(opt, "noacpi", 6)) |
50 | acpi_numa = -1; | 50 | acpi_numa = -1; |
51 | #endif | 51 | #endif |
52 | return 0; | 52 | return 0; |
53 | } | 53 | } |
54 | early_param("numa", numa_setup); | 54 | early_param("numa", numa_setup); |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * apicid, cpu, node mappings | 57 | * apicid, cpu, node mappings |
58 | */ | 58 | */ |
59 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 59 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
60 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 60 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
61 | }; | 61 | }; |
62 | 62 | ||
63 | int __cpuinit numa_cpu_node(int cpu) | 63 | int __cpuinit numa_cpu_node(int cpu) |
64 | { | 64 | { |
65 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | 65 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); |
66 | 66 | ||
67 | if (apicid != BAD_APICID) | 67 | if (apicid != BAD_APICID) |
68 | return __apicid_to_node[apicid]; | 68 | return __apicid_to_node[apicid]; |
69 | return NUMA_NO_NODE; | 69 | return NUMA_NO_NODE; |
70 | } | 70 | } |
71 | 71 | ||
72 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 72 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
73 | EXPORT_SYMBOL(node_to_cpumask_map); | 73 | EXPORT_SYMBOL(node_to_cpumask_map); |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * Map cpu index to node index | 76 | * Map cpu index to node index |
77 | */ | 77 | */ |
78 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | 78 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); |
79 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | 79 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); |
80 | 80 | ||
81 | void __cpuinit numa_set_node(int cpu, int node) | 81 | void __cpuinit numa_set_node(int cpu, int node) |
82 | { | 82 | { |
83 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | 83 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); |
84 | 84 | ||
85 | /* early setting, no percpu area yet */ | 85 | /* early setting, no percpu area yet */ |
86 | if (cpu_to_node_map) { | 86 | if (cpu_to_node_map) { |
87 | cpu_to_node_map[cpu] = node; | 87 | cpu_to_node_map[cpu] = node; |
88 | return; | 88 | return; |
89 | } | 89 | } |
90 | 90 | ||
91 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 91 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
92 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { | 92 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { |
93 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); | 93 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); |
94 | dump_stack(); | 94 | dump_stack(); |
95 | return; | 95 | return; |
96 | } | 96 | } |
97 | #endif | 97 | #endif |
98 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 98 | per_cpu(x86_cpu_to_node_map, cpu) = node; |
99 | 99 | ||
100 | if (node != NUMA_NO_NODE) | 100 | if (node != NUMA_NO_NODE) |
101 | set_cpu_numa_node(cpu, node); | 101 | set_cpu_numa_node(cpu, node); |
102 | } | 102 | } |
103 | 103 | ||
104 | void __cpuinit numa_clear_node(int cpu) | 104 | void __cpuinit numa_clear_node(int cpu) |
105 | { | 105 | { |
106 | numa_set_node(cpu, NUMA_NO_NODE); | 106 | numa_set_node(cpu, NUMA_NO_NODE); |
107 | } | 107 | } |
108 | 108 | ||
109 | /* | 109 | /* |
110 | * Allocate node_to_cpumask_map based on number of available nodes | 110 | * Allocate node_to_cpumask_map based on number of available nodes |
111 | * Requires node_possible_map to be valid. | 111 | * Requires node_possible_map to be valid. |
112 | * | 112 | * |
113 | * Note: node_to_cpumask() is not valid until after this is done. | 113 | * Note: cpumask_of_node() is not valid until after this is done. |
114 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) | 114 | * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) |
115 | */ | 115 | */ |
116 | void __init setup_node_to_cpumask_map(void) | 116 | void __init setup_node_to_cpumask_map(void) |
117 | { | 117 | { |
118 | unsigned int node, num = 0; | 118 | unsigned int node, num = 0; |
119 | 119 | ||
120 | /* setup nr_node_ids if not done yet */ | 120 | /* setup nr_node_ids if not done yet */ |
121 | if (nr_node_ids == MAX_NUMNODES) { | 121 | if (nr_node_ids == MAX_NUMNODES) { |
122 | for_each_node_mask(node, node_possible_map) | 122 | for_each_node_mask(node, node_possible_map) |
123 | num = node; | 123 | num = node; |
124 | nr_node_ids = num + 1; | 124 | nr_node_ids = num + 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | /* allocate the map */ | 127 | /* allocate the map */ |
128 | for (node = 0; node < nr_node_ids; node++) | 128 | for (node = 0; node < nr_node_ids; node++) |
129 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); | 129 | alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); |
130 | 130 | ||
131 | /* cpumask_of_node() will now work */ | 131 | /* cpumask_of_node() will now work */ |
132 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 132 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
133 | } | 133 | } |
134 | 134 | ||
135 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, | 135 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, |
136 | struct numa_meminfo *mi) | 136 | struct numa_meminfo *mi) |
137 | { | 137 | { |
138 | /* ignore zero length blks */ | 138 | /* ignore zero length blks */ |
139 | if (start == end) | 139 | if (start == end) |
140 | return 0; | 140 | return 0; |
141 | 141 | ||
142 | /* whine about and ignore invalid blks */ | 142 | /* whine about and ignore invalid blks */ |
143 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { | 143 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { |
144 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", | 144 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", |
145 | nid, start, end); | 145 | nid, start, end); |
146 | return 0; | 146 | return 0; |
147 | } | 147 | } |
148 | 148 | ||
149 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { | 149 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { |
150 | pr_err("NUMA: too many memblk ranges\n"); | 150 | pr_err("NUMA: too many memblk ranges\n"); |
151 | return -EINVAL; | 151 | return -EINVAL; |
152 | } | 152 | } |
153 | 153 | ||
154 | mi->blk[mi->nr_blks].start = start; | 154 | mi->blk[mi->nr_blks].start = start; |
155 | mi->blk[mi->nr_blks].end = end; | 155 | mi->blk[mi->nr_blks].end = end; |
156 | mi->blk[mi->nr_blks].nid = nid; | 156 | mi->blk[mi->nr_blks].nid = nid; |
157 | mi->nr_blks++; | 157 | mi->nr_blks++; |
158 | return 0; | 158 | return 0; |
159 | } | 159 | } |
160 | 160 | ||
161 | /** | 161 | /** |
162 | * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo | 162 | * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo |
163 | * @idx: Index of memblk to remove | 163 | * @idx: Index of memblk to remove |
164 | * @mi: numa_meminfo to remove memblk from | 164 | * @mi: numa_meminfo to remove memblk from |
165 | * | 165 | * |
166 | * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and | 166 | * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and |
167 | * decrementing @mi->nr_blks. | 167 | * decrementing @mi->nr_blks. |
168 | */ | 168 | */ |
169 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) | 169 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) |
170 | { | 170 | { |
171 | mi->nr_blks--; | 171 | mi->nr_blks--; |
172 | memmove(&mi->blk[idx], &mi->blk[idx + 1], | 172 | memmove(&mi->blk[idx], &mi->blk[idx + 1], |
173 | (mi->nr_blks - idx) * sizeof(mi->blk[0])); | 173 | (mi->nr_blks - idx) * sizeof(mi->blk[0])); |
174 | } | 174 | } |
175 | 175 | ||
176 | /** | 176 | /** |
177 | * numa_add_memblk - Add one numa_memblk to numa_meminfo | 177 | * numa_add_memblk - Add one numa_memblk to numa_meminfo |
178 | * @nid: NUMA node ID of the new memblk | 178 | * @nid: NUMA node ID of the new memblk |
179 | * @start: Start address of the new memblk | 179 | * @start: Start address of the new memblk |
180 | * @end: End address of the new memblk | 180 | * @end: End address of the new memblk |
181 | * | 181 | * |
182 | * Add a new memblk to the default numa_meminfo. | 182 | * Add a new memblk to the default numa_meminfo. |
183 | * | 183 | * |
184 | * RETURNS: | 184 | * RETURNS: |
185 | * 0 on success, -errno on failure. | 185 | * 0 on success, -errno on failure. |
186 | */ | 186 | */ |
187 | int __init numa_add_memblk(int nid, u64 start, u64 end) | 187 | int __init numa_add_memblk(int nid, u64 start, u64 end) |
188 | { | 188 | { |
189 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); | 189 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); |
190 | } | 190 | } |
191 | 191 | ||
192 | /* Initialize NODE_DATA for a node on the local memory */ | 192 | /* Initialize NODE_DATA for a node on the local memory */ |
193 | static void __init setup_node_data(int nid, u64 start, u64 end) | 193 | static void __init setup_node_data(int nid, u64 start, u64 end) |
194 | { | 194 | { |
195 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 195 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
196 | bool remapped = false; | 196 | bool remapped = false; |
197 | u64 nd_pa; | 197 | u64 nd_pa; |
198 | void *nd; | 198 | void *nd; |
199 | int tnid; | 199 | int tnid; |
200 | 200 | ||
201 | /* | 201 | /* |
202 | * Don't confuse VM with a node that doesn't have the | 202 | * Don't confuse VM with a node that doesn't have the |
203 | * minimum amount of memory: | 203 | * minimum amount of memory: |
204 | */ | 204 | */ |
205 | if (end && (end - start) < NODE_MIN_SIZE) | 205 | if (end && (end - start) < NODE_MIN_SIZE) |
206 | return; | 206 | return; |
207 | 207 | ||
208 | /* initialize remap allocator before aligning to ZONE_ALIGN */ | 208 | /* initialize remap allocator before aligning to ZONE_ALIGN */ |
209 | init_alloc_remap(nid, start, end); | 209 | init_alloc_remap(nid, start, end); |
210 | 210 | ||
211 | start = roundup(start, ZONE_ALIGN); | 211 | start = roundup(start, ZONE_ALIGN); |
212 | 212 | ||
213 | printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", | 213 | printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", |
214 | nid, start, end); | 214 | nid, start, end); |
215 | 215 | ||
216 | /* | 216 | /* |
217 | * Allocate node data. Try remap allocator first, node-local | 217 | * Allocate node data. Try remap allocator first, node-local |
218 | * memory and then any node. Never allocate in DMA zone. | 218 | * memory and then any node. Never allocate in DMA zone. |
219 | */ | 219 | */ |
220 | nd = alloc_remap(nid, nd_size); | 220 | nd = alloc_remap(nid, nd_size); |
221 | if (nd) { | 221 | if (nd) { |
222 | nd_pa = __pa(nd); | 222 | nd_pa = __pa(nd); |
223 | remapped = true; | 223 | remapped = true; |
224 | } else { | 224 | } else { |
225 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); | 225 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); |
226 | if (!nd_pa) { | 226 | if (!nd_pa) { |
227 | pr_err("Cannot find %zu bytes in node %d\n", | 227 | pr_err("Cannot find %zu bytes in node %d\n", |
228 | nd_size, nid); | 228 | nd_size, nid); |
229 | return; | 229 | return; |
230 | } | 230 | } |
231 | nd = __va(nd_pa); | 231 | nd = __va(nd_pa); |
232 | } | 232 | } |
233 | 233 | ||
234 | /* report and initialize */ | 234 | /* report and initialize */ |
235 | printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", | 235 | printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", |
236 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); | 236 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); |
237 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); | 237 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); |
238 | if (!remapped && tnid != nid) | 238 | if (!remapped && tnid != nid) |
239 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); | 239 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); |
240 | 240 | ||
241 | node_data[nid] = nd; | 241 | node_data[nid] = nd; |
242 | memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); | 242 | memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); |
243 | NODE_DATA(nid)->node_id = nid; | 243 | NODE_DATA(nid)->node_id = nid; |
244 | NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; | 244 | NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT; |
245 | NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; | 245 | NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT; |
246 | 246 | ||
247 | node_set_online(nid); | 247 | node_set_online(nid); |
248 | } | 248 | } |
249 | 249 | ||
250 | /** | 250 | /** |
251 | * numa_cleanup_meminfo - Cleanup a numa_meminfo | 251 | * numa_cleanup_meminfo - Cleanup a numa_meminfo |
252 | * @mi: numa_meminfo to clean up | 252 | * @mi: numa_meminfo to clean up |
253 | * | 253 | * |
254 | * Sanitize @mi by merging and removing unncessary memblks. Also check for | 254 | * Sanitize @mi by merging and removing unncessary memblks. Also check for |
255 | * conflicts and clear unused memblks. | 255 | * conflicts and clear unused memblks. |
256 | * | 256 | * |
257 | * RETURNS: | 257 | * RETURNS: |
258 | * 0 on success, -errno on failure. | 258 | * 0 on success, -errno on failure. |
259 | */ | 259 | */ |
260 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | 260 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi) |
261 | { | 261 | { |
262 | const u64 low = 0; | 262 | const u64 low = 0; |
263 | const u64 high = PFN_PHYS(max_pfn); | 263 | const u64 high = PFN_PHYS(max_pfn); |
264 | int i, j, k; | 264 | int i, j, k; |
265 | 265 | ||
266 | /* first, trim all entries */ | 266 | /* first, trim all entries */ |
267 | for (i = 0; i < mi->nr_blks; i++) { | 267 | for (i = 0; i < mi->nr_blks; i++) { |
268 | struct numa_memblk *bi = &mi->blk[i]; | 268 | struct numa_memblk *bi = &mi->blk[i]; |
269 | 269 | ||
270 | /* make sure all blocks are inside the limits */ | 270 | /* make sure all blocks are inside the limits */ |
271 | bi->start = max(bi->start, low); | 271 | bi->start = max(bi->start, low); |
272 | bi->end = min(bi->end, high); | 272 | bi->end = min(bi->end, high); |
273 | 273 | ||
274 | /* and there's no empty block */ | 274 | /* and there's no empty block */ |
275 | if (bi->start >= bi->end) | 275 | if (bi->start >= bi->end) |
276 | numa_remove_memblk_from(i--, mi); | 276 | numa_remove_memblk_from(i--, mi); |
277 | } | 277 | } |
278 | 278 | ||
279 | /* merge neighboring / overlapping entries */ | 279 | /* merge neighboring / overlapping entries */ |
280 | for (i = 0; i < mi->nr_blks; i++) { | 280 | for (i = 0; i < mi->nr_blks; i++) { |
281 | struct numa_memblk *bi = &mi->blk[i]; | 281 | struct numa_memblk *bi = &mi->blk[i]; |
282 | 282 | ||
283 | for (j = i + 1; j < mi->nr_blks; j++) { | 283 | for (j = i + 1; j < mi->nr_blks; j++) { |
284 | struct numa_memblk *bj = &mi->blk[j]; | 284 | struct numa_memblk *bj = &mi->blk[j]; |
285 | u64 start, end; | 285 | u64 start, end; |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * See whether there are overlapping blocks. Whine | 288 | * See whether there are overlapping blocks. Whine |
289 | * about but allow overlaps of the same nid. They | 289 | * about but allow overlaps of the same nid. They |
290 | * will be merged below. | 290 | * will be merged below. |
291 | */ | 291 | */ |
292 | if (bi->end > bj->start && bi->start < bj->end) { | 292 | if (bi->end > bj->start && bi->start < bj->end) { |
293 | if (bi->nid != bj->nid) { | 293 | if (bi->nid != bj->nid) { |
294 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", | 294 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", |
295 | bi->nid, bi->start, bi->end, | 295 | bi->nid, bi->start, bi->end, |
296 | bj->nid, bj->start, bj->end); | 296 | bj->nid, bj->start, bj->end); |
297 | return -EINVAL; | 297 | return -EINVAL; |
298 | } | 298 | } |
299 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", | 299 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", |
300 | bi->nid, bi->start, bi->end, | 300 | bi->nid, bi->start, bi->end, |
301 | bj->start, bj->end); | 301 | bj->start, bj->end); |
302 | } | 302 | } |
303 | 303 | ||
304 | /* | 304 | /* |
305 | * Join together blocks on the same node, holes | 305 | * Join together blocks on the same node, holes |
306 | * between which don't overlap with memory on other | 306 | * between which don't overlap with memory on other |
307 | * nodes. | 307 | * nodes. |
308 | */ | 308 | */ |
309 | if (bi->nid != bj->nid) | 309 | if (bi->nid != bj->nid) |
310 | continue; | 310 | continue; |
311 | start = min(bi->start, bj->start); | 311 | start = min(bi->start, bj->start); |
312 | end = max(bi->end, bj->end); | 312 | end = max(bi->end, bj->end); |
313 | for (k = 0; k < mi->nr_blks; k++) { | 313 | for (k = 0; k < mi->nr_blks; k++) { |
314 | struct numa_memblk *bk = &mi->blk[k]; | 314 | struct numa_memblk *bk = &mi->blk[k]; |
315 | 315 | ||
316 | if (bi->nid == bk->nid) | 316 | if (bi->nid == bk->nid) |
317 | continue; | 317 | continue; |
318 | if (start < bk->end && end > bk->start) | 318 | if (start < bk->end && end > bk->start) |
319 | break; | 319 | break; |
320 | } | 320 | } |
321 | if (k < mi->nr_blks) | 321 | if (k < mi->nr_blks) |
322 | continue; | 322 | continue; |
323 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", | 323 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", |
324 | bi->nid, bi->start, bi->end, bj->start, bj->end, | 324 | bi->nid, bi->start, bi->end, bj->start, bj->end, |
325 | start, end); | 325 | start, end); |
326 | bi->start = start; | 326 | bi->start = start; |
327 | bi->end = end; | 327 | bi->end = end; |
328 | numa_remove_memblk_from(j--, mi); | 328 | numa_remove_memblk_from(j--, mi); |
329 | } | 329 | } |
330 | } | 330 | } |
331 | 331 | ||
332 | /* clear unused ones */ | 332 | /* clear unused ones */ |
333 | for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { | 333 | for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { |
334 | mi->blk[i].start = mi->blk[i].end = 0; | 334 | mi->blk[i].start = mi->blk[i].end = 0; |
335 | mi->blk[i].nid = NUMA_NO_NODE; | 335 | mi->blk[i].nid = NUMA_NO_NODE; |
336 | } | 336 | } |
337 | 337 | ||
338 | return 0; | 338 | return 0; |
339 | } | 339 | } |
340 | 340 | ||
341 | /* | 341 | /* |
342 | * Set nodes, which have memory in @mi, in *@nodemask. | 342 | * Set nodes, which have memory in @mi, in *@nodemask. |
343 | */ | 343 | */ |
344 | static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, | 344 | static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, |
345 | const struct numa_meminfo *mi) | 345 | const struct numa_meminfo *mi) |
346 | { | 346 | { |
347 | int i; | 347 | int i; |
348 | 348 | ||
349 | for (i = 0; i < ARRAY_SIZE(mi->blk); i++) | 349 | for (i = 0; i < ARRAY_SIZE(mi->blk); i++) |
350 | if (mi->blk[i].start != mi->blk[i].end && | 350 | if (mi->blk[i].start != mi->blk[i].end && |
351 | mi->blk[i].nid != NUMA_NO_NODE) | 351 | mi->blk[i].nid != NUMA_NO_NODE) |
352 | node_set(mi->blk[i].nid, *nodemask); | 352 | node_set(mi->blk[i].nid, *nodemask); |
353 | } | 353 | } |
354 | 354 | ||
355 | /** | 355 | /** |
356 | * numa_reset_distance - Reset NUMA distance table | 356 | * numa_reset_distance - Reset NUMA distance table |
357 | * | 357 | * |
358 | * The current table is freed. The next numa_set_distance() call will | 358 | * The current table is freed. The next numa_set_distance() call will |
359 | * create a new one. | 359 | * create a new one. |
360 | */ | 360 | */ |
361 | void __init numa_reset_distance(void) | 361 | void __init numa_reset_distance(void) |
362 | { | 362 | { |
363 | size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); | 363 | size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); |
364 | 364 | ||
365 | /* numa_distance could be 1LU marking allocation failure, test cnt */ | 365 | /* numa_distance could be 1LU marking allocation failure, test cnt */ |
366 | if (numa_distance_cnt) | 366 | if (numa_distance_cnt) |
367 | memblock_free(__pa(numa_distance), size); | 367 | memblock_free(__pa(numa_distance), size); |
368 | numa_distance_cnt = 0; | 368 | numa_distance_cnt = 0; |
369 | numa_distance = NULL; /* enable table creation */ | 369 | numa_distance = NULL; /* enable table creation */ |
370 | } | 370 | } |
371 | 371 | ||
372 | static int __init numa_alloc_distance(void) | 372 | static int __init numa_alloc_distance(void) |
373 | { | 373 | { |
374 | nodemask_t nodes_parsed; | 374 | nodemask_t nodes_parsed; |
375 | size_t size; | 375 | size_t size; |
376 | int i, j, cnt = 0; | 376 | int i, j, cnt = 0; |
377 | u64 phys; | 377 | u64 phys; |
378 | 378 | ||
379 | /* size the new table and allocate it */ | 379 | /* size the new table and allocate it */ |
380 | nodes_parsed = numa_nodes_parsed; | 380 | nodes_parsed = numa_nodes_parsed; |
381 | numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); | 381 | numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); |
382 | 382 | ||
383 | for_each_node_mask(i, nodes_parsed) | 383 | for_each_node_mask(i, nodes_parsed) |
384 | cnt = i; | 384 | cnt = i; |
385 | cnt++; | 385 | cnt++; |
386 | size = cnt * cnt * sizeof(numa_distance[0]); | 386 | size = cnt * cnt * sizeof(numa_distance[0]); |
387 | 387 | ||
388 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), | 388 | phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
389 | size, PAGE_SIZE); | 389 | size, PAGE_SIZE); |
390 | if (!phys) { | 390 | if (!phys) { |
391 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); | 391 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); |
392 | /* don't retry until explicitly reset */ | 392 | /* don't retry until explicitly reset */ |
393 | numa_distance = (void *)1LU; | 393 | numa_distance = (void *)1LU; |
394 | return -ENOMEM; | 394 | return -ENOMEM; |
395 | } | 395 | } |
396 | memblock_reserve(phys, size); | 396 | memblock_reserve(phys, size); |
397 | 397 | ||
398 | numa_distance = __va(phys); | 398 | numa_distance = __va(phys); |
399 | numa_distance_cnt = cnt; | 399 | numa_distance_cnt = cnt; |
400 | 400 | ||
401 | /* fill with the default distances */ | 401 | /* fill with the default distances */ |
402 | for (i = 0; i < cnt; i++) | 402 | for (i = 0; i < cnt; i++) |
403 | for (j = 0; j < cnt; j++) | 403 | for (j = 0; j < cnt; j++) |
404 | numa_distance[i * cnt + j] = i == j ? | 404 | numa_distance[i * cnt + j] = i == j ? |
405 | LOCAL_DISTANCE : REMOTE_DISTANCE; | 405 | LOCAL_DISTANCE : REMOTE_DISTANCE; |
406 | printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); | 406 | printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); |
407 | 407 | ||
408 | return 0; | 408 | return 0; |
409 | } | 409 | } |
410 | 410 | ||
411 | /** | 411 | /** |
412 | * numa_set_distance - Set NUMA distance from one NUMA to another | 412 | * numa_set_distance - Set NUMA distance from one NUMA to another |
413 | * @from: the 'from' node to set distance | 413 | * @from: the 'from' node to set distance |
414 | * @to: the 'to' node to set distance | 414 | * @to: the 'to' node to set distance |
415 | * @distance: NUMA distance | 415 | * @distance: NUMA distance |
416 | * | 416 | * |
417 | * Set the distance from node @from to @to to @distance. If distance table | 417 | * Set the distance from node @from to @to to @distance. If distance table |
418 | * doesn't exist, one which is large enough to accommodate all the currently | 418 | * doesn't exist, one which is large enough to accommodate all the currently |
419 | * known nodes will be created. | 419 | * known nodes will be created. |
420 | * | 420 | * |
421 | * If such table cannot be allocated, a warning is printed and further | 421 | * If such table cannot be allocated, a warning is printed and further |
422 | * calls are ignored until the distance table is reset with | 422 | * calls are ignored until the distance table is reset with |
423 | * numa_reset_distance(). | 423 | * numa_reset_distance(). |
424 | * | 424 | * |
425 | * If @from or @to is higher than the highest known node or lower than zero | 425 | * If @from or @to is higher than the highest known node or lower than zero |
426 | * at the time of table creation or @distance doesn't make sense, the call | 426 | * at the time of table creation or @distance doesn't make sense, the call |
427 | * is ignored. | 427 | * is ignored. |
428 | * This is to allow simplification of specific NUMA config implementations. | 428 | * This is to allow simplification of specific NUMA config implementations. |
429 | */ | 429 | */ |
430 | void __init numa_set_distance(int from, int to, int distance) | 430 | void __init numa_set_distance(int from, int to, int distance) |
431 | { | 431 | { |
432 | if (!numa_distance && numa_alloc_distance() < 0) | 432 | if (!numa_distance && numa_alloc_distance() < 0) |
433 | return; | 433 | return; |
434 | 434 | ||
435 | if (from >= numa_distance_cnt || to >= numa_distance_cnt || | 435 | if (from >= numa_distance_cnt || to >= numa_distance_cnt || |
436 | from < 0 || to < 0) { | 436 | from < 0 || to < 0) { |
437 | pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", | 437 | pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", |
438 | from, to, distance); | 438 | from, to, distance); |
439 | return; | 439 | return; |
440 | } | 440 | } |
441 | 441 | ||
442 | if ((u8)distance != distance || | 442 | if ((u8)distance != distance || |
443 | (from == to && distance != LOCAL_DISTANCE)) { | 443 | (from == to && distance != LOCAL_DISTANCE)) { |
444 | pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", | 444 | pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", |
445 | from, to, distance); | 445 | from, to, distance); |
446 | return; | 446 | return; |
447 | } | 447 | } |
448 | 448 | ||
449 | numa_distance[from * numa_distance_cnt + to] = distance; | 449 | numa_distance[from * numa_distance_cnt + to] = distance; |
450 | } | 450 | } |
451 | 451 | ||
452 | int __node_distance(int from, int to) | 452 | int __node_distance(int from, int to) |
453 | { | 453 | { |
454 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) | 454 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) |
455 | return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; | 455 | return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; |
456 | return numa_distance[from * numa_distance_cnt + to]; | 456 | return numa_distance[from * numa_distance_cnt + to]; |
457 | } | 457 | } |
458 | EXPORT_SYMBOL(__node_distance); | 458 | EXPORT_SYMBOL(__node_distance); |
459 | 459 | ||
460 | /* | 460 | /* |
461 | * Sanity check to catch more bad NUMA configurations (they are amazingly | 461 | * Sanity check to catch more bad NUMA configurations (they are amazingly |
462 | * common). Make sure the nodes cover all memory. | 462 | * common). Make sure the nodes cover all memory. |
463 | */ | 463 | */ |
464 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | 464 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) |
465 | { | 465 | { |
466 | u64 numaram, e820ram; | 466 | u64 numaram, e820ram; |
467 | int i; | 467 | int i; |
468 | 468 | ||
469 | numaram = 0; | 469 | numaram = 0; |
470 | for (i = 0; i < mi->nr_blks; i++) { | 470 | for (i = 0; i < mi->nr_blks; i++) { |
471 | u64 s = mi->blk[i].start >> PAGE_SHIFT; | 471 | u64 s = mi->blk[i].start >> PAGE_SHIFT; |
472 | u64 e = mi->blk[i].end >> PAGE_SHIFT; | 472 | u64 e = mi->blk[i].end >> PAGE_SHIFT; |
473 | numaram += e - s; | 473 | numaram += e - s; |
474 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); | 474 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); |
475 | if ((s64)numaram < 0) | 475 | if ((s64)numaram < 0) |
476 | numaram = 0; | 476 | numaram = 0; |
477 | } | 477 | } |
478 | 478 | ||
479 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); | 479 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); |
480 | 480 | ||
481 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | 481 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
482 | if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { | 482 | if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { |
483 | printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", | 483 | printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", |
484 | (numaram << PAGE_SHIFT) >> 20, | 484 | (numaram << PAGE_SHIFT) >> 20, |
485 | (e820ram << PAGE_SHIFT) >> 20); | 485 | (e820ram << PAGE_SHIFT) >> 20); |
486 | return false; | 486 | return false; |
487 | } | 487 | } |
488 | return true; | 488 | return true; |
489 | } | 489 | } |
490 | 490 | ||
491 | static int __init numa_register_memblks(struct numa_meminfo *mi) | 491 | static int __init numa_register_memblks(struct numa_meminfo *mi) |
492 | { | 492 | { |
493 | unsigned long uninitialized_var(pfn_align); | 493 | unsigned long uninitialized_var(pfn_align); |
494 | int i, nid; | 494 | int i, nid; |
495 | 495 | ||
496 | /* Account for nodes with cpus and no memory */ | 496 | /* Account for nodes with cpus and no memory */ |
497 | node_possible_map = numa_nodes_parsed; | 497 | node_possible_map = numa_nodes_parsed; |
498 | numa_nodemask_from_meminfo(&node_possible_map, mi); | 498 | numa_nodemask_from_meminfo(&node_possible_map, mi); |
499 | if (WARN_ON(nodes_empty(node_possible_map))) | 499 | if (WARN_ON(nodes_empty(node_possible_map))) |
500 | return -EINVAL; | 500 | return -EINVAL; |
501 | 501 | ||
502 | for (i = 0; i < mi->nr_blks; i++) { | 502 | for (i = 0; i < mi->nr_blks; i++) { |
503 | struct numa_memblk *mb = &mi->blk[i]; | 503 | struct numa_memblk *mb = &mi->blk[i]; |
504 | memblock_set_node(mb->start, mb->end - mb->start, mb->nid); | 504 | memblock_set_node(mb->start, mb->end - mb->start, mb->nid); |
505 | } | 505 | } |
506 | 506 | ||
507 | /* | 507 | /* |
508 | * If sections array is gonna be used for pfn -> nid mapping, check | 508 | * If sections array is gonna be used for pfn -> nid mapping, check |
509 | * whether its granularity is fine enough. | 509 | * whether its granularity is fine enough. |
510 | */ | 510 | */ |
511 | #ifdef NODE_NOT_IN_PAGE_FLAGS | 511 | #ifdef NODE_NOT_IN_PAGE_FLAGS |
512 | pfn_align = node_map_pfn_alignment(); | 512 | pfn_align = node_map_pfn_alignment(); |
513 | if (pfn_align && pfn_align < PAGES_PER_SECTION) { | 513 | if (pfn_align && pfn_align < PAGES_PER_SECTION) { |
514 | printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", | 514 | printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", |
515 | PFN_PHYS(pfn_align) >> 20, | 515 | PFN_PHYS(pfn_align) >> 20, |
516 | PFN_PHYS(PAGES_PER_SECTION) >> 20); | 516 | PFN_PHYS(PAGES_PER_SECTION) >> 20); |
517 | return -EINVAL; | 517 | return -EINVAL; |
518 | } | 518 | } |
519 | #endif | 519 | #endif |
520 | if (!numa_meminfo_cover_memory(mi)) | 520 | if (!numa_meminfo_cover_memory(mi)) |
521 | return -EINVAL; | 521 | return -EINVAL; |
522 | 522 | ||
523 | /* Finally register nodes. */ | 523 | /* Finally register nodes. */ |
524 | for_each_node_mask(nid, node_possible_map) { | 524 | for_each_node_mask(nid, node_possible_map) { |
525 | u64 start = PFN_PHYS(max_pfn); | 525 | u64 start = PFN_PHYS(max_pfn); |
526 | u64 end = 0; | 526 | u64 end = 0; |
527 | 527 | ||
528 | for (i = 0; i < mi->nr_blks; i++) { | 528 | for (i = 0; i < mi->nr_blks; i++) { |
529 | if (nid != mi->blk[i].nid) | 529 | if (nid != mi->blk[i].nid) |
530 | continue; | 530 | continue; |
531 | start = min(mi->blk[i].start, start); | 531 | start = min(mi->blk[i].start, start); |
532 | end = max(mi->blk[i].end, end); | 532 | end = max(mi->blk[i].end, end); |
533 | } | 533 | } |
534 | 534 | ||
535 | if (start < end) | 535 | if (start < end) |
536 | setup_node_data(nid, start, end); | 536 | setup_node_data(nid, start, end); |
537 | } | 537 | } |
538 | 538 | ||
539 | /* Dump memblock with node info and return. */ | 539 | /* Dump memblock with node info and return. */ |
540 | memblock_dump_all(); | 540 | memblock_dump_all(); |
541 | return 0; | 541 | return 0; |
542 | } | 542 | } |
543 | 543 | ||
544 | /* | 544 | /* |
545 | * There are unfortunately some poorly designed mainboards around that | 545 | * There are unfortunately some poorly designed mainboards around that |
546 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | 546 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node |
547 | * mapping. To avoid this fill in the mapping for all possible CPUs, | 547 | * mapping. To avoid this fill in the mapping for all possible CPUs, |
548 | * as the number of CPUs is not known yet. We round robin the existing | 548 | * as the number of CPUs is not known yet. We round robin the existing |
549 | * nodes. | 549 | * nodes. |
550 | */ | 550 | */ |
551 | static void __init numa_init_array(void) | 551 | static void __init numa_init_array(void) |
552 | { | 552 | { |
553 | int rr, i; | 553 | int rr, i; |
554 | 554 | ||
555 | rr = first_node(node_online_map); | 555 | rr = first_node(node_online_map); |
556 | for (i = 0; i < nr_cpu_ids; i++) { | 556 | for (i = 0; i < nr_cpu_ids; i++) { |
557 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | 557 | if (early_cpu_to_node(i) != NUMA_NO_NODE) |
558 | continue; | 558 | continue; |
559 | numa_set_node(i, rr); | 559 | numa_set_node(i, rr); |
560 | rr = next_node(rr, node_online_map); | 560 | rr = next_node(rr, node_online_map); |
561 | if (rr == MAX_NUMNODES) | 561 | if (rr == MAX_NUMNODES) |
562 | rr = first_node(node_online_map); | 562 | rr = first_node(node_online_map); |
563 | } | 563 | } |
564 | } | 564 | } |
565 | 565 | ||
566 | static int __init numa_init(int (*init_func)(void)) | 566 | static int __init numa_init(int (*init_func)(void)) |
567 | { | 567 | { |
568 | int i; | 568 | int i; |
569 | int ret; | 569 | int ret; |
570 | 570 | ||
571 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 571 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
572 | set_apicid_to_node(i, NUMA_NO_NODE); | 572 | set_apicid_to_node(i, NUMA_NO_NODE); |
573 | 573 | ||
574 | nodes_clear(numa_nodes_parsed); | 574 | nodes_clear(numa_nodes_parsed); |
575 | nodes_clear(node_possible_map); | 575 | nodes_clear(node_possible_map); |
576 | nodes_clear(node_online_map); | 576 | nodes_clear(node_online_map); |
577 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | 577 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
578 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); | 578 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); |
579 | numa_reset_distance(); | 579 | numa_reset_distance(); |
580 | 580 | ||
581 | ret = init_func(); | 581 | ret = init_func(); |
582 | if (ret < 0) | 582 | if (ret < 0) |
583 | return ret; | 583 | return ret; |
584 | ret = numa_cleanup_meminfo(&numa_meminfo); | 584 | ret = numa_cleanup_meminfo(&numa_meminfo); |
585 | if (ret < 0) | 585 | if (ret < 0) |
586 | return ret; | 586 | return ret; |
587 | 587 | ||
588 | numa_emulation(&numa_meminfo, numa_distance_cnt); | 588 | numa_emulation(&numa_meminfo, numa_distance_cnt); |
589 | 589 | ||
590 | ret = numa_register_memblks(&numa_meminfo); | 590 | ret = numa_register_memblks(&numa_meminfo); |
591 | if (ret < 0) | 591 | if (ret < 0) |
592 | return ret; | 592 | return ret; |
593 | 593 | ||
594 | for (i = 0; i < nr_cpu_ids; i++) { | 594 | for (i = 0; i < nr_cpu_ids; i++) { |
595 | int nid = early_cpu_to_node(i); | 595 | int nid = early_cpu_to_node(i); |
596 | 596 | ||
597 | if (nid == NUMA_NO_NODE) | 597 | if (nid == NUMA_NO_NODE) |
598 | continue; | 598 | continue; |
599 | if (!node_online(nid)) | 599 | if (!node_online(nid)) |
600 | numa_clear_node(i); | 600 | numa_clear_node(i); |
601 | } | 601 | } |
602 | numa_init_array(); | 602 | numa_init_array(); |
603 | return 0; | 603 | return 0; |
604 | } | 604 | } |
605 | 605 | ||
606 | /** | 606 | /** |
607 | * dummy_numa_init - Fallback dummy NUMA init | 607 | * dummy_numa_init - Fallback dummy NUMA init |
608 | * | 608 | * |
609 | * Used if there's no underlying NUMA architecture, NUMA initialization | 609 | * Used if there's no underlying NUMA architecture, NUMA initialization |
610 | * fails, or NUMA is disabled on the command line. | 610 | * fails, or NUMA is disabled on the command line. |
611 | * | 611 | * |
612 | * Must online at least one node and add memory blocks that cover all | 612 | * Must online at least one node and add memory blocks that cover all |
613 | * allowed memory. This function must not fail. | 613 | * allowed memory. This function must not fail. |
614 | */ | 614 | */ |
615 | static int __init dummy_numa_init(void) | 615 | static int __init dummy_numa_init(void) |
616 | { | 616 | { |
617 | printk(KERN_INFO "%s\n", | 617 | printk(KERN_INFO "%s\n", |
618 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | 618 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); |
619 | printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", | 619 | printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", |
620 | 0LLU, PFN_PHYS(max_pfn)); | 620 | 0LLU, PFN_PHYS(max_pfn)); |
621 | 621 | ||
622 | node_set(0, numa_nodes_parsed); | 622 | node_set(0, numa_nodes_parsed); |
623 | numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); | 623 | numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); |
624 | 624 | ||
625 | return 0; | 625 | return 0; |
626 | } | 626 | } |
627 | 627 | ||
628 | /** | 628 | /** |
629 | * x86_numa_init - Initialize NUMA | 629 | * x86_numa_init - Initialize NUMA |
630 | * | 630 | * |
631 | * Try each configured NUMA initialization method until one succeeds. The | 631 | * Try each configured NUMA initialization method until one succeeds. The |
632 | * last fallback is dummy single node config encomapssing whole memory and | 632 | * last fallback is dummy single node config encomapssing whole memory and |
633 | * never fails. | 633 | * never fails. |
634 | */ | 634 | */ |
635 | void __init x86_numa_init(void) | 635 | void __init x86_numa_init(void) |
636 | { | 636 | { |
637 | if (!numa_off) { | 637 | if (!numa_off) { |
638 | #ifdef CONFIG_X86_NUMAQ | 638 | #ifdef CONFIG_X86_NUMAQ |
639 | if (!numa_init(numaq_numa_init)) | 639 | if (!numa_init(numaq_numa_init)) |
640 | return; | 640 | return; |
641 | #endif | 641 | #endif |
642 | #ifdef CONFIG_ACPI_NUMA | 642 | #ifdef CONFIG_ACPI_NUMA |
643 | if (!numa_init(x86_acpi_numa_init)) | 643 | if (!numa_init(x86_acpi_numa_init)) |
644 | return; | 644 | return; |
645 | #endif | 645 | #endif |
646 | #ifdef CONFIG_AMD_NUMA | 646 | #ifdef CONFIG_AMD_NUMA |
647 | if (!numa_init(amd_numa_init)) | 647 | if (!numa_init(amd_numa_init)) |
648 | return; | 648 | return; |
649 | #endif | 649 | #endif |
650 | } | 650 | } |
651 | 651 | ||
652 | numa_init(dummy_numa_init); | 652 | numa_init(dummy_numa_init); |
653 | } | 653 | } |
654 | 654 | ||
655 | static __init int find_near_online_node(int node) | 655 | static __init int find_near_online_node(int node) |
656 | { | 656 | { |
657 | int n, val; | 657 | int n, val; |
658 | int min_val = INT_MAX; | 658 | int min_val = INT_MAX; |
659 | int best_node = -1; | 659 | int best_node = -1; |
660 | 660 | ||
661 | for_each_online_node(n) { | 661 | for_each_online_node(n) { |
662 | val = node_distance(node, n); | 662 | val = node_distance(node, n); |
663 | 663 | ||
664 | if (val < min_val) { | 664 | if (val < min_val) { |
665 | min_val = val; | 665 | min_val = val; |
666 | best_node = n; | 666 | best_node = n; |
667 | } | 667 | } |
668 | } | 668 | } |
669 | 669 | ||
670 | return best_node; | 670 | return best_node; |
671 | } | 671 | } |
672 | 672 | ||
673 | /* | 673 | /* |
674 | * Setup early cpu_to_node. | 674 | * Setup early cpu_to_node. |
675 | * | 675 | * |
676 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | 676 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], |
677 | * and apicid_to_node[] tables have valid entries for a CPU. | 677 | * and apicid_to_node[] tables have valid entries for a CPU. |
678 | * This means we skip cpu_to_node[] initialisation for NUMA | 678 | * This means we skip cpu_to_node[] initialisation for NUMA |
679 | * emulation and faking node case (when running a kernel compiled | 679 | * emulation and faking node case (when running a kernel compiled |
680 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | 680 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] |
681 | * is already initialized in a round robin manner at numa_init_array, | 681 | * is already initialized in a round robin manner at numa_init_array, |
682 | * prior to this call, and this initialization is good enough | 682 | * prior to this call, and this initialization is good enough |
683 | * for the fake NUMA cases. | 683 | * for the fake NUMA cases. |
684 | * | 684 | * |
685 | * Called before the per_cpu areas are setup. | 685 | * Called before the per_cpu areas are setup. |
686 | */ | 686 | */ |
687 | void __init init_cpu_to_node(void) | 687 | void __init init_cpu_to_node(void) |
688 | { | 688 | { |
689 | int cpu; | 689 | int cpu; |
690 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | 690 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
691 | 691 | ||
692 | BUG_ON(cpu_to_apicid == NULL); | 692 | BUG_ON(cpu_to_apicid == NULL); |
693 | 693 | ||
694 | for_each_possible_cpu(cpu) { | 694 | for_each_possible_cpu(cpu) { |
695 | int node = numa_cpu_node(cpu); | 695 | int node = numa_cpu_node(cpu); |
696 | 696 | ||
697 | if (node == NUMA_NO_NODE) | 697 | if (node == NUMA_NO_NODE) |
698 | continue; | 698 | continue; |
699 | if (!node_online(node)) | 699 | if (!node_online(node)) |
700 | node = find_near_online_node(node); | 700 | node = find_near_online_node(node); |
701 | numa_set_node(cpu, node); | 701 | numa_set_node(cpu, node); |
702 | } | 702 | } |
703 | } | 703 | } |
704 | 704 | ||
705 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | 705 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS |
706 | 706 | ||
707 | # ifndef CONFIG_NUMA_EMU | 707 | # ifndef CONFIG_NUMA_EMU |
708 | void __cpuinit numa_add_cpu(int cpu) | 708 | void __cpuinit numa_add_cpu(int cpu) |
709 | { | 709 | { |
710 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 710 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); |
711 | } | 711 | } |
712 | 712 | ||
713 | void __cpuinit numa_remove_cpu(int cpu) | 713 | void __cpuinit numa_remove_cpu(int cpu) |
714 | { | 714 | { |
715 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 715 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); |
716 | } | 716 | } |
717 | # endif /* !CONFIG_NUMA_EMU */ | 717 | # endif /* !CONFIG_NUMA_EMU */ |
718 | 718 | ||
719 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 719 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
720 | 720 | ||
721 | int __cpu_to_node(int cpu) | 721 | int __cpu_to_node(int cpu) |
722 | { | 722 | { |
723 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | 723 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { |
724 | printk(KERN_WARNING | 724 | printk(KERN_WARNING |
725 | "cpu_to_node(%d): usage too early!\n", cpu); | 725 | "cpu_to_node(%d): usage too early!\n", cpu); |
726 | dump_stack(); | 726 | dump_stack(); |
727 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | 727 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; |
728 | } | 728 | } |
729 | return per_cpu(x86_cpu_to_node_map, cpu); | 729 | return per_cpu(x86_cpu_to_node_map, cpu); |
730 | } | 730 | } |
731 | EXPORT_SYMBOL(__cpu_to_node); | 731 | EXPORT_SYMBOL(__cpu_to_node); |
732 | 732 | ||
733 | /* | 733 | /* |
734 | * Same function as cpu_to_node() but used if called before the | 734 | * Same function as cpu_to_node() but used if called before the |
735 | * per_cpu areas are setup. | 735 | * per_cpu areas are setup. |
736 | */ | 736 | */ |
737 | int early_cpu_to_node(int cpu) | 737 | int early_cpu_to_node(int cpu) |
738 | { | 738 | { |
739 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | 739 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) |
740 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | 740 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; |
741 | 741 | ||
742 | if (!cpu_possible(cpu)) { | 742 | if (!cpu_possible(cpu)) { |
743 | printk(KERN_WARNING | 743 | printk(KERN_WARNING |
744 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | 744 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); |
745 | dump_stack(); | 745 | dump_stack(); |
746 | return NUMA_NO_NODE; | 746 | return NUMA_NO_NODE; |
747 | } | 747 | } |
748 | return per_cpu(x86_cpu_to_node_map, cpu); | 748 | return per_cpu(x86_cpu_to_node_map, cpu); |
749 | } | 749 | } |
750 | 750 | ||
751 | void debug_cpumask_set_cpu(int cpu, int node, bool enable) | 751 | void debug_cpumask_set_cpu(int cpu, int node, bool enable) |
752 | { | 752 | { |
753 | struct cpumask *mask; | 753 | struct cpumask *mask; |
754 | char buf[64]; | 754 | char buf[64]; |
755 | 755 | ||
756 | if (node == NUMA_NO_NODE) { | 756 | if (node == NUMA_NO_NODE) { |
757 | /* early_cpu_to_node() already emits a warning and trace */ | 757 | /* early_cpu_to_node() already emits a warning and trace */ |
758 | return; | 758 | return; |
759 | } | 759 | } |
760 | mask = node_to_cpumask_map[node]; | 760 | mask = node_to_cpumask_map[node]; |
761 | if (!mask) { | 761 | if (!mask) { |
762 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | 762 | pr_err("node_to_cpumask_map[%i] NULL\n", node); |
763 | dump_stack(); | 763 | dump_stack(); |
764 | return; | 764 | return; |
765 | } | 765 | } |
766 | 766 | ||
767 | if (enable) | 767 | if (enable) |
768 | cpumask_set_cpu(cpu, mask); | 768 | cpumask_set_cpu(cpu, mask); |
769 | else | 769 | else |
770 | cpumask_clear_cpu(cpu, mask); | 770 | cpumask_clear_cpu(cpu, mask); |
771 | 771 | ||
772 | cpulist_scnprintf(buf, sizeof(buf), mask); | 772 | cpulist_scnprintf(buf, sizeof(buf), mask); |
773 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 773 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", |
774 | enable ? "numa_add_cpu" : "numa_remove_cpu", | 774 | enable ? "numa_add_cpu" : "numa_remove_cpu", |
775 | cpu, node, buf); | 775 | cpu, node, buf); |
776 | return; | 776 | return; |
777 | } | 777 | } |
778 | 778 | ||
779 | # ifndef CONFIG_NUMA_EMU | 779 | # ifndef CONFIG_NUMA_EMU |
780 | static void __cpuinit numa_set_cpumask(int cpu, bool enable) | 780 | static void __cpuinit numa_set_cpumask(int cpu, bool enable) |
781 | { | 781 | { |
782 | debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); | 782 | debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); |
783 | } | 783 | } |
784 | 784 | ||
785 | void __cpuinit numa_add_cpu(int cpu) | 785 | void __cpuinit numa_add_cpu(int cpu) |
786 | { | 786 | { |
787 | numa_set_cpumask(cpu, true); | 787 | numa_set_cpumask(cpu, true); |
788 | } | 788 | } |
789 | 789 | ||
790 | void __cpuinit numa_remove_cpu(int cpu) | 790 | void __cpuinit numa_remove_cpu(int cpu) |
791 | { | 791 | { |
792 | numa_set_cpumask(cpu, false); | 792 | numa_set_cpumask(cpu, false); |
793 | } | 793 | } |
794 | # endif /* !CONFIG_NUMA_EMU */ | 794 | # endif /* !CONFIG_NUMA_EMU */ |
795 | 795 | ||
796 | /* | 796 | /* |
797 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 797 | * Returns a pointer to the bitmask of CPUs on Node 'node'. |
798 | */ | 798 | */ |
799 | const struct cpumask *cpumask_of_node(int node) | 799 | const struct cpumask *cpumask_of_node(int node) |
800 | { | 800 | { |
801 | if (node >= nr_node_ids) { | 801 | if (node >= nr_node_ids) { |
802 | printk(KERN_WARNING | 802 | printk(KERN_WARNING |
803 | "cpumask_of_node(%d): node > nr_node_ids(%d)\n", | 803 | "cpumask_of_node(%d): node > nr_node_ids(%d)\n", |
804 | node, nr_node_ids); | 804 | node, nr_node_ids); |
805 | dump_stack(); | 805 | dump_stack(); |
806 | return cpu_none_mask; | 806 | return cpu_none_mask; |
807 | } | 807 | } |
808 | if (node_to_cpumask_map[node] == NULL) { | 808 | if (node_to_cpumask_map[node] == NULL) { |
809 | printk(KERN_WARNING | 809 | printk(KERN_WARNING |
810 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", | 810 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", |
811 | node); | 811 | node); |
812 | dump_stack(); | 812 | dump_stack(); |
813 | return cpu_online_mask; | 813 | return cpu_online_mask; |
814 | } | 814 | } |
815 | return node_to_cpumask_map[node]; | 815 | return node_to_cpumask_map[node]; |
816 | } | 816 | } |
817 | EXPORT_SYMBOL(cpumask_of_node); | 817 | EXPORT_SYMBOL(cpumask_of_node); |
818 | 818 | ||
819 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 819 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
820 | 820 | ||
821 | #ifdef CONFIG_MEMORY_HOTPLUG | 821 | #ifdef CONFIG_MEMORY_HOTPLUG |
822 | int memory_add_physaddr_to_nid(u64 start) | 822 | int memory_add_physaddr_to_nid(u64 start) |
823 | { | 823 | { |
824 | struct numa_meminfo *mi = &numa_meminfo; | 824 | struct numa_meminfo *mi = &numa_meminfo; |
825 | int nid = mi->blk[0].nid; | 825 | int nid = mi->blk[0].nid; |
826 | int i; | 826 | int i; |
827 | 827 | ||
828 | for (i = 0; i < mi->nr_blks; i++) | 828 | for (i = 0; i < mi->nr_blks; i++) |
829 | if (mi->blk[i].start <= start && mi->blk[i].end > start) | 829 | if (mi->blk[i].start <= start && mi->blk[i].end > start) |
830 | nid = mi->blk[i].nid; | 830 | nid = mi->blk[i].nid; |
831 | return nid; | 831 | return nid; |
832 | } | 832 | } |
833 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 833 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
834 | #endif | 834 | #endif |
835 | 835 |