Commit c1b1a5f1f1b2612b69b67381b223bce9f8ec4da5
1 parent
0c49a573ea
Exists in
master
and in
4 other branches
[SPARC64]: NUMA device infrastructure.
Record and propagate NUMA information for devices. Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 16 changed files with 91 additions and 23 deletions Side-by-side Diff
- arch/sparc64/kernel/ebus.c
- arch/sparc64/kernel/iommu.c
- arch/sparc64/kernel/isa.c
- arch/sparc64/kernel/of_device.c
- arch/sparc64/kernel/pci.c
- arch/sparc64/kernel/pci_fire.c
- arch/sparc64/kernel/pci_impl.h
- arch/sparc64/kernel/pci_msi.c
- arch/sparc64/kernel/pci_psycho.c
- arch/sparc64/kernel/pci_sabre.c
- arch/sparc64/kernel/pci_schizo.c
- arch/sparc64/kernel/pci_sun4v.c
- arch/sparc64/kernel/sbus.c
- include/asm-sparc/device.h
- include/asm-sparc/prom.h
- include/asm-sparc64/iommu.h
arch/sparc64/kernel/ebus.c
... | ... | @@ -396,6 +396,7 @@ |
396 | 396 | sd->op = &dev->ofdev; |
397 | 397 | sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu; |
398 | 398 | sd->stc = dev->bus->ofdev.dev.parent->archdata.stc; |
399 | + sd->numa_node = dev->bus->ofdev.dev.parent->archdata.numa_node; | |
399 | 400 | |
400 | 401 | dev->ofdev.node = dp; |
401 | 402 | dev->ofdev.dev.parent = &dev->bus->ofdev.dev; |
arch/sparc64/kernel/iommu.c
... | ... | @@ -173,9 +173,11 @@ |
173 | 173 | } |
174 | 174 | |
175 | 175 | int iommu_table_init(struct iommu *iommu, int tsbsize, |
176 | - u32 dma_offset, u32 dma_addr_mask) | |
176 | + u32 dma_offset, u32 dma_addr_mask, | |
177 | + int numa_node) | |
177 | 178 | { |
178 | - unsigned long i, tsbbase, order, sz, num_tsb_entries; | |
179 | + unsigned long i, order, sz, num_tsb_entries; | |
180 | + struct page *page; | |
179 | 181 | |
180 | 182 | num_tsb_entries = tsbsize / sizeof(iopte_t); |
181 | 183 | |
182 | 184 | |
... | ... | @@ -188,11 +190,12 @@ |
188 | 190 | /* Allocate and initialize the free area map. */ |
189 | 191 | sz = num_tsb_entries / 8; |
190 | 192 | sz = (sz + 7UL) & ~7UL; |
191 | - iommu->arena.map = kzalloc(sz, GFP_KERNEL); | |
193 | + iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node); | |
192 | 194 | if (!iommu->arena.map) { |
193 | 195 | printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n"); |
194 | 196 | return -ENOMEM; |
195 | 197 | } |
198 | + memset(iommu->arena.map, 0, sz); | |
196 | 199 | iommu->arena.limit = num_tsb_entries; |
197 | 200 | |
198 | 201 | if (tlb_type != hypervisor) |
199 | 202 | |
200 | 203 | |
201 | 204 | |
... | ... | @@ -201,21 +204,23 @@ |
201 | 204 | /* Allocate and initialize the dummy page which we |
202 | 205 | * set inactive IO PTEs to point to. |
203 | 206 | */ |
204 | - iommu->dummy_page = get_zeroed_page(GFP_KERNEL); | |
205 | - if (!iommu->dummy_page) { | |
207 | + page = alloc_pages_node(numa_node, GFP_KERNEL, 0); | |
208 | + if (!page) { | |
206 | 209 | printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n"); |
207 | 210 | goto out_free_map; |
208 | 211 | } |
212 | + iommu->dummy_page = (unsigned long) page_address(page); | |
213 | + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); | |
209 | 214 | iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); |
210 | 215 | |
211 | 216 | /* Now allocate and setup the IOMMU page table itself. */ |
212 | 217 | order = get_order(tsbsize); |
213 | - tsbbase = __get_free_pages(GFP_KERNEL, order); | |
214 | - if (!tsbbase) { | |
218 | + page = alloc_pages_node(numa_node, GFP_KERNEL, order); | |
219 | + if (!page) { | |
215 | 220 | printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n"); |
216 | 221 | goto out_free_dummy_page; |
217 | 222 | } |
218 | - iommu->page_table = (iopte_t *)tsbbase; | |
223 | + iommu->page_table = (iopte_t *)page_address(page); | |
219 | 224 | |
220 | 225 | for (i = 0; i < num_tsb_entries; i++) |
221 | 226 | iopte_make_dummy(iommu, &iommu->page_table[i]); |
222 | 227 | |
223 | 228 | |
224 | 229 | |
225 | 230 | |
226 | 231 | |
... | ... | @@ -276,20 +281,24 @@ |
276 | 281 | static void *dma_4u_alloc_coherent(struct device *dev, size_t size, |
277 | 282 | dma_addr_t *dma_addrp, gfp_t gfp) |
278 | 283 | { |
284 | + unsigned long flags, order, first_page; | |
279 | 285 | struct iommu *iommu; |
286 | + struct page *page; | |
287 | + int npages, nid; | |
280 | 288 | iopte_t *iopte; |
281 | - unsigned long flags, order, first_page; | |
282 | 289 | void *ret; |
283 | - int npages; | |
284 | 290 | |
285 | 291 | size = IO_PAGE_ALIGN(size); |
286 | 292 | order = get_order(size); |
287 | 293 | if (order >= 10) |
288 | 294 | return NULL; |
289 | 295 | |
290 | - first_page = __get_free_pages(gfp, order); | |
291 | - if (first_page == 0UL) | |
296 | + nid = dev->archdata.numa_node; | |
297 | + page = alloc_pages_node(nid, gfp, order); | |
298 | + if (unlikely(!page)) | |
292 | 299 | return NULL; |
300 | + | |
301 | + first_page = (unsigned long) page_address(page); | |
293 | 302 | memset((char *)first_page, 0, PAGE_SIZE << order); |
294 | 303 | |
295 | 304 | iommu = dev->archdata.iommu; |
arch/sparc64/kernel/isa.c
... | ... | @@ -92,6 +92,7 @@ |
92 | 92 | sd->op = &isa_dev->ofdev; |
93 | 93 | sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu; |
94 | 94 | sd->stc = isa_br->ofdev.dev.parent->archdata.stc; |
95 | + sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node; | |
95 | 96 | |
96 | 97 | isa_dev->ofdev.node = dp; |
97 | 98 | isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev; |
arch/sparc64/kernel/of_device.c
... | ... | @@ -6,6 +6,7 @@ |
6 | 6 | #include <linux/mod_devicetable.h> |
7 | 7 | #include <linux/slab.h> |
8 | 8 | #include <linux/errno.h> |
9 | +#include <linux/irq.h> | |
9 | 10 | #include <linux/of_device.h> |
10 | 11 | #include <linux/of_platform.h> |
11 | 12 | |
... | ... | @@ -660,6 +661,7 @@ |
660 | 661 | struct device_node *dp = op->node; |
661 | 662 | struct device_node *pp, *ip; |
662 | 663 | unsigned int orig_irq = irq; |
664 | + int nid; | |
663 | 665 | |
664 | 666 | if (irq == 0xffffffff) |
665 | 667 | return irq; |
... | ... | @@ -672,7 +674,7 @@ |
672 | 674 | printk("%s: direct translate %x --> %x\n", |
673 | 675 | dp->full_name, orig_irq, irq); |
674 | 676 | |
675 | - return irq; | |
677 | + goto out; | |
676 | 678 | } |
677 | 679 | |
678 | 680 | /* Something more complicated. Walk up to the root, applying |
... | ... | @@ -743,6 +745,14 @@ |
743 | 745 | if (of_irq_verbose) |
744 | 746 | printk("%s: Apply IRQ trans [%s] %x --> %x\n", |
745 | 747 | op->node->full_name, ip->full_name, orig_irq, irq); |
748 | + | |
749 | +out: | |
750 | + nid = of_node_to_nid(dp); | |
751 | + if (nid != -1) { | |
752 | + cpumask_t numa_mask = node_to_cpumask(nid); | |
753 | + | |
754 | + irq_set_affinity(irq, numa_mask); | |
755 | + } | |
746 | 756 | |
747 | 757 | return irq; |
748 | 758 | } |
arch/sparc64/kernel/pci.c
... | ... | @@ -369,10 +369,12 @@ |
369 | 369 | sd->host_controller = pbm; |
370 | 370 | sd->prom_node = node; |
371 | 371 | sd->op = of_find_device_by_node(node); |
372 | + sd->numa_node = pbm->numa_node; | |
372 | 373 | |
373 | 374 | sd = &sd->op->dev.archdata; |
374 | 375 | sd->iommu = pbm->iommu; |
375 | 376 | sd->stc = &pbm->stc; |
377 | + sd->numa_node = pbm->numa_node; | |
376 | 378 | |
377 | 379 | type = of_get_property(node, "device_type", NULL); |
378 | 380 | if (type == NULL) |
... | ... | @@ -1158,6 +1160,16 @@ |
1158 | 1160 | |
1159 | 1161 | return 0; |
1160 | 1162 | } |
1163 | + | |
1164 | +#ifdef CONFIG_NUMA | |
1165 | +int pcibus_to_node(struct pci_bus *pbus) | |
1166 | +{ | |
1167 | + struct pci_pbm_info *pbm = pbus->sysdata; | |
1168 | + | |
1169 | + return pbm->numa_node; | |
1170 | +} | |
1171 | +EXPORT_SYMBOL(pcibus_to_node); | |
1172 | +#endif | |
1161 | 1173 | |
1162 | 1174 | /* Return the domain nuber for this pci bus */ |
1163 | 1175 |
arch/sparc64/kernel/pci_fire.c
... | ... | @@ -71,7 +71,8 @@ |
71 | 71 | */ |
72 | 72 | fire_write(iommu->iommu_flushinv, ~(u64)0); |
73 | 73 | |
74 | - err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); | |
74 | + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask, | |
75 | + pbm->numa_node); | |
75 | 76 | if (err) |
76 | 77 | return err; |
77 | 78 | |
... | ... | @@ -448,6 +449,8 @@ |
448 | 449 | |
449 | 450 | pbm->next = pci_pbm_root; |
450 | 451 | pci_pbm_root = pbm; |
452 | + | |
453 | + pbm->numa_node = -1; | |
451 | 454 | |
452 | 455 | pbm->scan_bus = pci_fire_scan_bus; |
453 | 456 | pbm->pci_ops = &sun4u_pci_ops; |
arch/sparc64/kernel/pci_impl.h
arch/sparc64/kernel/pci_msi.c
... | ... | @@ -279,11 +279,17 @@ |
279 | 279 | unsigned long devino) |
280 | 280 | { |
281 | 281 | int irq = ops->msiq_build_irq(pbm, msiqid, devino); |
282 | - int err; | |
282 | + int err, nid; | |
283 | 283 | |
284 | 284 | if (irq < 0) |
285 | 285 | return irq; |
286 | 286 | |
287 | + nid = pbm->numa_node; | |
288 | + if (nid != -1) { | |
289 | + cpumask_t numa_mask = node_to_cpumask(nid); | |
290 | + | |
291 | + irq_set_affinity(irq, numa_mask); | |
292 | + } | |
287 | 293 | err = request_irq(irq, sparc64_msiq_interrupt, 0, |
288 | 294 | "MSIQ", |
289 | 295 | &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]); |
arch/sparc64/kernel/pci_psycho.c
... | ... | @@ -848,7 +848,8 @@ |
848 | 848 | /* Leave diag mode enabled for full-flushing done |
849 | 849 | * in pci_iommu.c |
850 | 850 | */ |
851 | - err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff); | |
851 | + err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff, | |
852 | + pbm->numa_node); | |
852 | 853 | if (err) |
853 | 854 | return err; |
854 | 855 | |
... | ... | @@ -978,6 +979,8 @@ |
978 | 979 | |
979 | 980 | pbm->next = pci_pbm_root; |
980 | 981 | pci_pbm_root = pbm; |
982 | + | |
983 | + pbm->numa_node = -1; | |
981 | 984 | |
982 | 985 | pbm->scan_bus = psycho_scan_bus; |
983 | 986 | pbm->pci_ops = &sun4u_pci_ops; |
arch/sparc64/kernel/pci_sabre.c
... | ... | @@ -704,7 +704,7 @@ |
704 | 704 | * in pci_iommu.c |
705 | 705 | */ |
706 | 706 | err = iommu_table_init(iommu, tsbsize * 1024 * 8, |
707 | - dvma_offset, dma_mask); | |
707 | + dvma_offset, dma_mask, pbm->numa_node); | |
708 | 708 | if (err) |
709 | 709 | return err; |
710 | 710 | |
... | ... | @@ -736,6 +736,8 @@ |
736 | 736 | { |
737 | 737 | pbm->name = dp->full_name; |
738 | 738 | printk("%s: SABRE PCI Bus Module\n", pbm->name); |
739 | + | |
740 | + pbm->numa_node = -1; | |
739 | 741 | |
740 | 742 | pbm->scan_bus = sabre_scan_bus; |
741 | 743 | pbm->pci_ops = &sun4u_pci_ops; |
arch/sparc64/kernel/pci_schizo.c
... | ... | @@ -1220,7 +1220,8 @@ |
1220 | 1220 | /* Leave diag mode enabled for full-flushing done |
1221 | 1221 | * in pci_iommu.c |
1222 | 1222 | */ |
1223 | - err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); | |
1223 | + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask, | |
1224 | + pbm->numa_node); | |
1224 | 1225 | if (err) |
1225 | 1226 | return err; |
1226 | 1227 | |
... | ... | @@ -1378,6 +1379,8 @@ |
1378 | 1379 | |
1379 | 1380 | pbm->next = pci_pbm_root; |
1380 | 1381 | pci_pbm_root = pbm; |
1382 | + | |
1383 | + pbm->numa_node = -1; | |
1381 | 1384 | |
1382 | 1385 | pbm->scan_bus = schizo_scan_bus; |
1383 | 1386 | pbm->pci_ops = &sun4u_pci_ops; |
arch/sparc64/kernel/pci_sun4v.c
... | ... | @@ -127,10 +127,12 @@ |
127 | 127 | static void *dma_4v_alloc_coherent(struct device *dev, size_t size, |
128 | 128 | dma_addr_t *dma_addrp, gfp_t gfp) |
129 | 129 | { |
130 | - struct iommu *iommu; | |
131 | 130 | unsigned long flags, order, first_page, npages, n; |
131 | + struct iommu *iommu; | |
132 | + struct page *page; | |
132 | 133 | void *ret; |
133 | 134 | long entry; |
135 | + int nid; | |
134 | 136 | |
135 | 137 | size = IO_PAGE_ALIGN(size); |
136 | 138 | order = get_order(size); |
137 | 139 | |
... | ... | @@ -139,10 +141,12 @@ |
139 | 141 | |
140 | 142 | npages = size >> IO_PAGE_SHIFT; |
141 | 143 | |
142 | - first_page = __get_free_pages(gfp, order); | |
143 | - if (unlikely(first_page == 0UL)) | |
144 | + nid = dev->archdata.numa_node; | |
145 | + page = alloc_pages_node(nid, gfp, order); | |
146 | + if (unlikely(!page)) | |
144 | 147 | return NULL; |
145 | 148 | |
149 | + first_page = (unsigned long) page_address(page); | |
146 | 150 | memset((char *)first_page, 0, PAGE_SIZE << order); |
147 | 151 | |
148 | 152 | iommu = dev->archdata.iommu; |
... | ... | @@ -899,6 +903,8 @@ |
899 | 903 | pbm->next = pci_pbm_root; |
900 | 904 | pci_pbm_root = pbm; |
901 | 905 | |
906 | + pbm->numa_node = of_node_to_nid(dp); | |
907 | + | |
902 | 908 | pbm->scan_bus = pci_sun4v_scan_bus; |
903 | 909 | pbm->pci_ops = &sun4v_pci_ops; |
904 | 910 | pbm->config_space_reg_bits = 12; |
... | ... | @@ -913,6 +919,7 @@ |
913 | 919 | pbm->name = dp->full_name; |
914 | 920 | |
915 | 921 | printk("%s: SUN4V PCI Bus Module\n", pbm->name); |
922 | + printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node); | |
916 | 923 | |
917 | 924 | pci_determine_mem_io_space(pbm); |
918 | 925 |
arch/sparc64/kernel/sbus.c
... | ... | @@ -544,6 +544,7 @@ |
544 | 544 | |
545 | 545 | sbus->ofdev.dev.archdata.iommu = iommu; |
546 | 546 | sbus->ofdev.dev.archdata.stc = strbuf; |
547 | + sbus->ofdev.dev.archdata.numa_node = -1; | |
547 | 548 | |
548 | 549 | reg_base = regs + SYSIO_IOMMUREG_BASE; |
549 | 550 | iommu->iommu_control = reg_base + IOMMU_CONTROL; |
... | ... | @@ -575,7 +576,7 @@ |
575 | 576 | sbus->portid, regs); |
576 | 577 | |
577 | 578 | /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ |
578 | - if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff)) | |
579 | + if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1)) | |
579 | 580 | goto fatal_memory_error; |
580 | 581 | |
581 | 582 | control = upa_readq(iommu->iommu_control); |
include/asm-sparc/device.h
include/asm-sparc/prom.h
... | ... | @@ -77,6 +77,11 @@ |
77 | 77 | const char *name, |
78 | 78 | int def); |
79 | 79 | extern int of_find_in_proplist(const char *list, const char *match, int len); |
80 | +#ifdef CONFIG_NUMA | |
81 | +extern int of_node_to_nid(struct device_node *dp); | |
82 | +#else | |
83 | +#define of_node_to_nid(dp) (-1) | |
84 | +#endif | |
80 | 85 | |
81 | 86 | extern void prom_build_devicetree(void); |
82 | 87 |
include/asm-sparc64/iommu.h