Commit c1b1a5f1f1b2612b69b67381b223bce9f8ec4da5

Authored by David S. Miller
1 parent 0c49a573ea

[SPARC64]: NUMA device infrastructure.

Record and propagate NUMA information for devices.

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 16 changed files with 91 additions and 23 deletions Side-by-side Diff

arch/sparc64/kernel/ebus.c
... ... @@ -396,6 +396,7 @@
396 396 sd->op = &dev->ofdev;
397 397 sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu;
398 398 sd->stc = dev->bus->ofdev.dev.parent->archdata.stc;
  399 + sd->numa_node = dev->bus->ofdev.dev.parent->archdata.numa_node;
399 400  
400 401 dev->ofdev.node = dp;
401 402 dev->ofdev.dev.parent = &dev->bus->ofdev.dev;
arch/sparc64/kernel/iommu.c
... ... @@ -173,9 +173,11 @@
173 173 }
174 174  
175 175 int iommu_table_init(struct iommu *iommu, int tsbsize,
176   - u32 dma_offset, u32 dma_addr_mask)
  176 + u32 dma_offset, u32 dma_addr_mask,
  177 + int numa_node)
177 178 {
178   - unsigned long i, tsbbase, order, sz, num_tsb_entries;
  179 + unsigned long i, order, sz, num_tsb_entries;
  180 + struct page *page;
179 181  
180 182 num_tsb_entries = tsbsize / sizeof(iopte_t);
181 183  
182 184  
... ... @@ -188,11 +190,12 @@
188 190 /* Allocate and initialize the free area map. */
189 191 sz = num_tsb_entries / 8;
190 192 sz = (sz + 7UL) & ~7UL;
191   - iommu->arena.map = kzalloc(sz, GFP_KERNEL);
  193 + iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
192 194 if (!iommu->arena.map) {
193 195 printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
194 196 return -ENOMEM;
195 197 }
  198 + memset(iommu->arena.map, 0, sz);
196 199 iommu->arena.limit = num_tsb_entries;
197 200  
198 201 if (tlb_type != hypervisor)
199 202  
200 203  
201 204  
... ... @@ -201,21 +204,23 @@
201 204 /* Allocate and initialize the dummy page which we
202 205 * set inactive IO PTEs to point to.
203 206 */
204   - iommu->dummy_page = get_zeroed_page(GFP_KERNEL);
205   - if (!iommu->dummy_page) {
  207 + page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
  208 + if (!page) {
206 209 printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
207 210 goto out_free_map;
208 211 }
  212 + iommu->dummy_page = (unsigned long) page_address(page);
  213 + memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
209 214 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
210 215  
211 216 /* Now allocate and setup the IOMMU page table itself. */
212 217 order = get_order(tsbsize);
213   - tsbbase = __get_free_pages(GFP_KERNEL, order);
214   - if (!tsbbase) {
  218 + page = alloc_pages_node(numa_node, GFP_KERNEL, order);
  219 + if (!page) {
215 220 printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
216 221 goto out_free_dummy_page;
217 222 }
218   - iommu->page_table = (iopte_t *)tsbbase;
  223 + iommu->page_table = (iopte_t *)page_address(page);
219 224  
220 225 for (i = 0; i < num_tsb_entries; i++)
221 226 iopte_make_dummy(iommu, &iommu->page_table[i]);
222 227  
223 228  
224 229  
225 230  
226 231  
... ... @@ -276,20 +281,24 @@
276 281 static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
277 282 dma_addr_t *dma_addrp, gfp_t gfp)
278 283 {
  284 + unsigned long flags, order, first_page;
279 285 struct iommu *iommu;
  286 + struct page *page;
  287 + int npages, nid;
280 288 iopte_t *iopte;
281   - unsigned long flags, order, first_page;
282 289 void *ret;
283   - int npages;
284 290  
285 291 size = IO_PAGE_ALIGN(size);
286 292 order = get_order(size);
287 293 if (order >= 10)
288 294 return NULL;
289 295  
290   - first_page = __get_free_pages(gfp, order);
291   - if (first_page == 0UL)
  296 + nid = dev->archdata.numa_node;
  297 + page = alloc_pages_node(nid, gfp, order);
  298 + if (unlikely(!page))
292 299 return NULL;
  300 +
  301 + first_page = (unsigned long) page_address(page);
293 302 memset((char *)first_page, 0, PAGE_SIZE << order);
294 303  
295 304 iommu = dev->archdata.iommu;
arch/sparc64/kernel/isa.c
... ... @@ -92,6 +92,7 @@
92 92 sd->op = &isa_dev->ofdev;
93 93 sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu;
94 94 sd->stc = isa_br->ofdev.dev.parent->archdata.stc;
  95 + sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node;
95 96  
96 97 isa_dev->ofdev.node = dp;
97 98 isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev;
arch/sparc64/kernel/of_device.c
... ... @@ -6,6 +6,7 @@
6 6 #include <linux/mod_devicetable.h>
7 7 #include <linux/slab.h>
8 8 #include <linux/errno.h>
  9 +#include <linux/irq.h>
9 10 #include <linux/of_device.h>
10 11 #include <linux/of_platform.h>
11 12  
... ... @@ -660,6 +661,7 @@
660 661 struct device_node *dp = op->node;
661 662 struct device_node *pp, *ip;
662 663 unsigned int orig_irq = irq;
  664 + int nid;
663 665  
664 666 if (irq == 0xffffffff)
665 667 return irq;
... ... @@ -672,7 +674,7 @@
672 674 printk("%s: direct translate %x --> %x\n",
673 675 dp->full_name, orig_irq, irq);
674 676  
675   - return irq;
  677 + goto out;
676 678 }
677 679  
678 680 /* Something more complicated. Walk up to the root, applying
... ... @@ -743,6 +745,14 @@
743 745 if (of_irq_verbose)
744 746 printk("%s: Apply IRQ trans [%s] %x --> %x\n",
745 747 op->node->full_name, ip->full_name, orig_irq, irq);
  748 +
  749 +out:
  750 + nid = of_node_to_nid(dp);
  751 + if (nid != -1) {
  752 + cpumask_t numa_mask = node_to_cpumask(nid);
  753 +
  754 + irq_set_affinity(irq, numa_mask);
  755 + }
746 756  
747 757 return irq;
748 758 }
arch/sparc64/kernel/pci.c
... ... @@ -369,10 +369,12 @@
369 369 sd->host_controller = pbm;
370 370 sd->prom_node = node;
371 371 sd->op = of_find_device_by_node(node);
  372 + sd->numa_node = pbm->numa_node;
372 373  
373 374 sd = &sd->op->dev.archdata;
374 375 sd->iommu = pbm->iommu;
375 376 sd->stc = &pbm->stc;
  377 + sd->numa_node = pbm->numa_node;
376 378  
377 379 type = of_get_property(node, "device_type", NULL);
378 380 if (type == NULL)
... ... @@ -1158,6 +1160,16 @@
1158 1160  
1159 1161 return 0;
1160 1162 }
  1163 +
  1164 +#ifdef CONFIG_NUMA
  1165 +int pcibus_to_node(struct pci_bus *pbus)
  1166 +{
  1167 + struct pci_pbm_info *pbm = pbus->sysdata;
  1168 +
  1169 + return pbm->numa_node;
  1170 +}
  1171 +EXPORT_SYMBOL(pcibus_to_node);
  1172 +#endif
1161 1173  
1162 1174 /* Return the domain nuber for this pci bus */
1163 1175  
arch/sparc64/kernel/pci_fire.c
... ... @@ -71,7 +71,8 @@
71 71 */
72 72 fire_write(iommu->iommu_flushinv, ~(u64)0);
73 73  
74   - err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
  74 + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
  75 + pbm->numa_node);
75 76 if (err)
76 77 return err;
77 78  
... ... @@ -448,6 +449,8 @@
448 449  
449 450 pbm->next = pci_pbm_root;
450 451 pci_pbm_root = pbm;
  452 +
  453 + pbm->numa_node = -1;
451 454  
452 455 pbm->scan_bus = pci_fire_scan_bus;
453 456 pbm->pci_ops = &sun4u_pci_ops;
arch/sparc64/kernel/pci_impl.h
... ... @@ -148,6 +148,8 @@
148 148 struct pci_bus *pci_bus;
149 149 void (*scan_bus)(struct pci_pbm_info *);
150 150 struct pci_ops *pci_ops;
  151 +
  152 + int numa_node;
151 153 };
152 154  
153 155 struct pci_controller_info {
arch/sparc64/kernel/pci_msi.c
... ... @@ -279,11 +279,17 @@
279 279 unsigned long devino)
280 280 {
281 281 int irq = ops->msiq_build_irq(pbm, msiqid, devino);
282   - int err;
  282 + int err, nid;
283 283  
284 284 if (irq < 0)
285 285 return irq;
286 286  
  287 + nid = pbm->numa_node;
  288 + if (nid != -1) {
  289 + cpumask_t numa_mask = node_to_cpumask(nid);
  290 +
  291 + irq_set_affinity(irq, numa_mask);
  292 + }
287 293 err = request_irq(irq, sparc64_msiq_interrupt, 0,
288 294 "MSIQ",
289 295 &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
arch/sparc64/kernel/pci_psycho.c
... ... @@ -848,7 +848,8 @@
848 848 /* Leave diag mode enabled for full-flushing done
849 849 * in pci_iommu.c
850 850 */
851   - err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff);
  851 + err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff,
  852 + pbm->numa_node);
852 853 if (err)
853 854 return err;
854 855  
... ... @@ -978,6 +979,8 @@
978 979  
979 980 pbm->next = pci_pbm_root;
980 981 pci_pbm_root = pbm;
  982 +
  983 + pbm->numa_node = -1;
981 984  
982 985 pbm->scan_bus = psycho_scan_bus;
983 986 pbm->pci_ops = &sun4u_pci_ops;
arch/sparc64/kernel/pci_sabre.c
... ... @@ -704,7 +704,7 @@
704 704 * in pci_iommu.c
705 705 */
706 706 err = iommu_table_init(iommu, tsbsize * 1024 * 8,
707   - dvma_offset, dma_mask);
  707 + dvma_offset, dma_mask, pbm->numa_node);
708 708 if (err)
709 709 return err;
710 710  
... ... @@ -736,6 +736,8 @@
736 736 {
737 737 pbm->name = dp->full_name;
738 738 printk("%s: SABRE PCI Bus Module\n", pbm->name);
  739 +
  740 + pbm->numa_node = -1;
739 741  
740 742 pbm->scan_bus = sabre_scan_bus;
741 743 pbm->pci_ops = &sun4u_pci_ops;
arch/sparc64/kernel/pci_schizo.c
... ... @@ -1220,7 +1220,8 @@
1220 1220 /* Leave diag mode enabled for full-flushing done
1221 1221 * in pci_iommu.c
1222 1222 */
1223   - err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
  1223 + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
  1224 + pbm->numa_node);
1224 1225 if (err)
1225 1226 return err;
1226 1227  
... ... @@ -1378,6 +1379,8 @@
1378 1379  
1379 1380 pbm->next = pci_pbm_root;
1380 1381 pci_pbm_root = pbm;
  1382 +
  1383 + pbm->numa_node = -1;
1381 1384  
1382 1385 pbm->scan_bus = schizo_scan_bus;
1383 1386 pbm->pci_ops = &sun4u_pci_ops;
arch/sparc64/kernel/pci_sun4v.c
... ... @@ -127,10 +127,12 @@
127 127 static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
128 128 dma_addr_t *dma_addrp, gfp_t gfp)
129 129 {
130   - struct iommu *iommu;
131 130 unsigned long flags, order, first_page, npages, n;
  131 + struct iommu *iommu;
  132 + struct page *page;
132 133 void *ret;
133 134 long entry;
  135 + int nid;
134 136  
135 137 size = IO_PAGE_ALIGN(size);
136 138 order = get_order(size);
137 139  
... ... @@ -139,10 +141,12 @@
139 141  
140 142 npages = size >> IO_PAGE_SHIFT;
141 143  
142   - first_page = __get_free_pages(gfp, order);
143   - if (unlikely(first_page == 0UL))
  144 + nid = dev->archdata.numa_node;
  145 + page = alloc_pages_node(nid, gfp, order);
  146 + if (unlikely(!page))
144 147 return NULL;
145 148  
  149 + first_page = (unsigned long) page_address(page);
146 150 memset((char *)first_page, 0, PAGE_SIZE << order);
147 151  
148 152 iommu = dev->archdata.iommu;
... ... @@ -899,6 +903,8 @@
899 903 pbm->next = pci_pbm_root;
900 904 pci_pbm_root = pbm;
901 905  
  906 + pbm->numa_node = of_node_to_nid(dp);
  907 +
902 908 pbm->scan_bus = pci_sun4v_scan_bus;
903 909 pbm->pci_ops = &sun4v_pci_ops;
904 910 pbm->config_space_reg_bits = 12;
... ... @@ -913,6 +919,7 @@
913 919 pbm->name = dp->full_name;
914 920  
915 921 printk("%s: SUN4V PCI Bus Module\n", pbm->name);
  922 + printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
916 923  
917 924 pci_determine_mem_io_space(pbm);
918 925  
arch/sparc64/kernel/sbus.c
... ... @@ -544,6 +544,7 @@
544 544  
545 545 sbus->ofdev.dev.archdata.iommu = iommu;
546 546 sbus->ofdev.dev.archdata.stc = strbuf;
  547 + sbus->ofdev.dev.archdata.numa_node = -1;
547 548  
548 549 reg_base = regs + SYSIO_IOMMUREG_BASE;
549 550 iommu->iommu_control = reg_base + IOMMU_CONTROL;
... ... @@ -575,7 +576,7 @@
575 576 sbus->portid, regs);
576 577  
577 578 /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
578   - if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff))
  579 + if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1))
579 580 goto fatal_memory_error;
580 581  
581 582 control = upa_readq(iommu->iommu_control);
include/asm-sparc/device.h
... ... @@ -16,6 +16,8 @@
16 16  
17 17 struct device_node *prom_node;
18 18 struct of_device *op;
  19 +
  20 + int numa_node;
19 21 };
20 22  
21 23 #endif /* _ASM_SPARC_DEVICE_H */
include/asm-sparc/prom.h
... ... @@ -77,6 +77,11 @@
77 77 const char *name,
78 78 int def);
79 79 extern int of_find_in_proplist(const char *list, const char *match, int len);
  80 +#ifdef CONFIG_NUMA
  81 +extern int of_node_to_nid(struct device_node *dp);
  82 +#else
  83 +#define of_node_to_nid(dp) (-1)
  84 +#endif
80 85  
81 86 extern void prom_build_devicetree(void);
82 87  
include/asm-sparc64/iommu.h
... ... @@ -56,7 +56,8 @@
56 56 };
57 57  
58 58 extern int iommu_table_init(struct iommu *iommu, int tsbsize,
59   - u32 dma_offset, u32 dma_addr_mask);
  59 + u32 dma_offset, u32 dma_addr_mask,
  60 + int numa_node);
60 61  
61 62 #endif /* !(_SPARC64_IOMMU_H) */