Commit 6ec6e0d9f2fd7cb6ca6bc3bfab5ae7b5cdd8c36f
Committed by
Ingo Molnar
1 parent
8705a49c35
Exists in
master
and in
20 other branches
srat, x86: add support for nodes spanning other nodes
For example, If the physical address layout on a two node system with 8 GB memory is something like: node 0: 0-2GB, 4-6GB node 1: 2-4GB, 6-8GB Current kernels fail to boot/detect this NUMA topology. ACPI SRAT tables can expose such a topology which needs to be supported. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Showing 5 changed files with 44 additions and 18 deletions Side-by-side Diff
arch/x86/Kconfig
... | ... | @@ -903,6 +903,15 @@ |
903 | 903 | help |
904 | 904 | Enable ACPI SRAT based node topology detection. |
905 | 905 | |
906 | +# Some NUMA nodes have memory ranges that span | |
907 | +# other nodes. Even though a pfn is valid and | |
908 | +# between a node's start and end pfns, it may not | |
909 | +# reside on that node. See memmap_init_zone() | |
910 | +# for details. | |
911 | +config NODES_SPAN_OTHER_NODES | |
912 | + def_bool y | |
913 | + depends on X86_64_ACPI_NUMA | |
914 | + | |
906 | 915 | config NUMA_EMU |
907 | 916 | bool "NUMA emulation" |
908 | 917 | depends on X86_64 && NUMA |
arch/x86/mm/k8topology_64.c
... | ... | @@ -164,7 +164,7 @@ |
164 | 164 | if (!found) |
165 | 165 | return -1; |
166 | 166 | |
167 | - memnode_shift = compute_hash_shift(nodes, 8); | |
167 | + memnode_shift = compute_hash_shift(nodes, 8, NULL); | |
168 | 168 | if (memnode_shift < 0) { |
169 | 169 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); |
170 | 170 | return -1; |
arch/x86/mm/numa_64.c
... | ... | @@ -60,7 +60,7 @@ |
60 | 60 | * -1 if node overlap or lost ram (shift too big) |
61 | 61 | */ |
62 | 62 | static int __init populate_memnodemap(const struct bootnode *nodes, |
63 | - int numnodes, int shift) | |
63 | + int numnodes, int shift, int *nodeids) | |
64 | 64 | { |
65 | 65 | unsigned long addr, end; |
66 | 66 | int i, res = -1; |
... | ... | @@ -76,7 +76,12 @@ |
76 | 76 | do { |
77 | 77 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) |
78 | 78 | return -1; |
79 | - memnodemap[addr >> shift] = i; | |
79 | + | |
80 | + if (!nodeids) | |
81 | + memnodemap[addr >> shift] = i; | |
82 | + else | |
83 | + memnodemap[addr >> shift] = nodeids[i]; | |
84 | + | |
80 | 85 | addr += (1UL << shift); |
81 | 86 | } while (addr < end); |
82 | 87 | res = 1; |
... | ... | @@ -139,7 +144,8 @@ |
139 | 144 | return i; |
140 | 145 | } |
141 | 146 | |
142 | -int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | |
147 | +int __init compute_hash_shift(struct bootnode *nodes, int numnodes, | |
148 | + int *nodeids) | |
143 | 149 | { |
144 | 150 | int shift; |
145 | 151 | |
... | ... | @@ -149,7 +155,7 @@ |
149 | 155 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
150 | 156 | shift); |
151 | 157 | |
152 | - if (populate_memnodemap(nodes, numnodes, shift) != 1) { | |
158 | + if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { | |
153 | 159 | printk(KERN_INFO "Your memory is not aligned you need to " |
154 | 160 | "rebuild your kernel with a bigger NODEMAPSIZE " |
155 | 161 | "shift=%d\n", shift); |
... | ... | @@ -462,7 +468,7 @@ |
462 | 468 | } |
463 | 469 | } |
464 | 470 | out: |
465 | - memnode_shift = compute_hash_shift(nodes, num_nodes); | |
471 | + memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); | |
466 | 472 | if (memnode_shift < 0) { |
467 | 473 | memnode_shift = 0; |
468 | 474 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " |
arch/x86/mm/srat_64.c
... | ... | @@ -32,6 +32,10 @@ |
32 | 32 | static int found_add_area __initdata; |
33 | 33 | int hotadd_percent __initdata = 0; |
34 | 34 | |
35 | +static int num_node_memblks __initdata; | |
36 | +static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | |
37 | +static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | |
38 | + | |
35 | 39 | /* Too small nodes confuse the VM badly. Usually they result |
36 | 40 | from BIOS bugs. */ |
37 | 41 | #define NODE_MIN_SIZE (4*1024*1024) |
38 | 42 | |
39 | 43 | |
40 | 44 | |
... | ... | @@ -41,17 +45,17 @@ |
41 | 45 | return acpi_map_pxm_to_node(pxm); |
42 | 46 | } |
43 | 47 | |
44 | -static __init int conflicting_nodes(unsigned long start, unsigned long end) | |
48 | +static __init int conflicting_memblks(unsigned long start, unsigned long end) | |
45 | 49 | { |
46 | 50 | int i; |
47 | - for_each_node_mask(i, nodes_parsed) { | |
48 | - struct bootnode *nd = &nodes[i]; | |
51 | + for (i = 0; i < num_node_memblks; i++) { | |
52 | + struct bootnode *nd = &node_memblk_range[i]; | |
49 | 53 | if (nd->start == nd->end) |
50 | 54 | continue; |
51 | 55 | if (nd->end > start && nd->start < end) |
52 | - return i; | |
56 | + return memblk_nodeid[i]; | |
53 | 57 | if (nd->end == end && nd->start == start) |
54 | - return i; | |
58 | + return memblk_nodeid[i]; | |
55 | 59 | } |
56 | 60 | return -1; |
57 | 61 | } |
... | ... | @@ -258,7 +262,7 @@ |
258 | 262 | bad_srat(); |
259 | 263 | return; |
260 | 264 | } |
261 | - i = conflicting_nodes(start, end); | |
265 | + i = conflicting_memblks(start, end); | |
262 | 266 | if (i == node) { |
263 | 267 | printk(KERN_WARNING |
264 | 268 | "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", |
... | ... | @@ -283,10 +287,10 @@ |
283 | 287 | nd->end = end; |
284 | 288 | } |
285 | 289 | |
286 | - printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, | |
287 | - nd->start, nd->end); | |
288 | - e820_register_active_regions(node, nd->start >> PAGE_SHIFT, | |
289 | - nd->end >> PAGE_SHIFT); | |
290 | + printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | |
291 | + start, end); | |
292 | + e820_register_active_regions(node, start >> PAGE_SHIFT, | |
293 | + end >> PAGE_SHIFT); | |
290 | 294 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, |
291 | 295 | nd->end >> PAGE_SHIFT); |
292 | 296 | |
... | ... | @@ -298,6 +302,11 @@ |
298 | 302 | if ((nd->start | nd->end) == 0) |
299 | 303 | node_clear(node, nodes_parsed); |
300 | 304 | } |
305 | + | |
306 | + node_memblk_range[num_node_memblks].start = start; | |
307 | + node_memblk_range[num_node_memblks].end = end; | |
308 | + memblk_nodeid[num_node_memblks] = node; | |
309 | + num_node_memblks++; | |
301 | 310 | } |
302 | 311 | |
303 | 312 | /* Sanity check to catch more bad SRATs (they are amazingly common). |
... | ... | @@ -368,7 +377,8 @@ |
368 | 377 | return -1; |
369 | 378 | } |
370 | 379 | |
371 | - memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); | |
380 | + memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | |
381 | + memblk_nodeid); | |
372 | 382 | if (memnode_shift < 0) { |
373 | 383 | printk(KERN_ERR |
374 | 384 | "SRAT: No NUMA node hash function found. Contact maintainer\n"); |
include/asm-x86/numa_64.h