Commit 3f28c5af3964c11e61e9a58df77cae5ebdb8209e

Authored by Wei Yang
Committed by Benjamin Herrenschmidt
1 parent cc146d1db0

powerpc/powernv: Reduce multi-hit of iommu_add_device()

During the EEH hotplug event, iommu_add_device() will be invoked three times
and two of them will trigger warning or error.

The three times to invoke the iommu_add_device() are:

    pci_device_add
       ...
       set_iommu_table_base_and_group   <- 1st time, fail
    device_add
       ...
       tce_iommu_bus_notifier           <- 2nd time, succees
    pcibios_add_pci_devices
       ...
       pcibios_setup_bus_devices        <- 3rd time, re-attach

The first time fails, since the dev->kobj->sd is not initialized. The
dev->kobj->sd is initialized in device_add().
The third time's warning is triggered by the re-attach of the iommu_group.

After applying this patch, the error

    iommu_tce: 0003:05:00.0 has not been added, ret=-14

and the warning

    [  204.123609] ------------[ cut here ]------------
    [  204.123645] WARNING: at arch/powerpc/kernel/iommu.c:1125
    [  204.123680] Modules linked in: xt_CHECKSUM nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6t_REJECT bnep bluetooth 6lowpan_iphc rfkill xt_conntrack ebtable_nat ebtable_broute bridge stp llc mlx4_ib ib_sa ib_mad ib_core ib_addr ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw bnx2x tg3 mlx4_core nfsd ptp mdio ses libcrc32c nfs_acl enclosure be2net pps_core shpchp lockd kvm uinput sunrpc binfmt_misc lpfc scsi_transport_fc ipr scsi_tgt
    [  204.124356] CPU: 18 PID: 650 Comm: eehd Not tainted 3.14.0-rc5yw+ #102
    [  204.124400] task: c0000027ed485670 ti: c0000027ed50c000 task.ti: c0000027ed50c000
    [  204.124453] NIP: c00000000003cf80 LR: c00000000006c648 CTR: c00000000006c5c0
    [  204.124506] REGS: c0000027ed50f440 TRAP: 0700   Not tainted  (3.14.0-rc5yw+)
    [  204.124558] MSR: 9000000000029032 <SF,HV,EE,ME,IR,DR,RI>  CR: 88008084  XER: 20000000
    [  204.124682] CFAR: c00000000006c644 SOFTE: 1
    GPR00: c00000000006c648 c0000027ed50f6c0 c000000001398380 c0000027ec260300
    GPR04: c0000027ea92c000 c00000000006ad00 c0000000016e41b0 0000000000000110
    GPR08: c0000000012cd4c0 0000000000000001 c0000027ec2602ff 0000000000000062
    GPR12: 0000000028008084 c00000000fdca200 c0000000000d1d90 c0000027ec281a80
    GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
    GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001
    GPR24: 000000005342697b 0000000000002906 c000001fe6ac9800 c000001fe6ac9800
    GPR28: 0000000000000000 c0000000016e3a80 c0000027ea92c090 c0000027ea92c000
    [  204.125353] NIP [c00000000003cf80] .iommu_add_device+0x30/0x1f0
    [  204.125399] LR [c00000000006c648] .pnv_pci_ioda_dma_dev_setup+0x88/0xb0
    [  204.125443] Call Trace:
    [  204.125464] [c0000027ed50f6c0] [c0000027ed50f750] 0xc0000027ed50f750 (unreliable)
    [  204.125526] [c0000027ed50f750] [c00000000006c648] .pnv_pci_ioda_dma_dev_setup+0x88/0xb0
    [  204.125588] [c0000027ed50f7d0] [c000000000069cc8] .pnv_pci_dma_dev_setup+0x78/0x340
    [  204.125650] [c0000027ed50f870] [c000000000044408] .pcibios_setup_device+0x88/0x2f0
    [  204.125712] [c0000027ed50f940] [c000000000046040] .pcibios_setup_bus_devices+0x60/0xd0
    [  204.125774] [c0000027ed50f9c0] [c000000000043acc] .pcibios_add_pci_devices+0xdc/0x1c0
    [  204.125837] [c0000027ed50fa50] [c00000000086f970] .eeh_reset_device+0x36c/0x4f0
    [  204.125939] [c0000027ed50fb20] [c00000000003a2d8] .eeh_handle_normal_event+0x448/0x480
    [  204.126068] [c0000027ed50fbc0] [c00000000003a35c] .eeh_handle_event+0x4c/0x340
    [  204.126192] [c0000027ed50fc80] [c00000000003a74c] .eeh_event_handler+0xfc/0x1b0
    [  204.126319] [c0000027ed50fd30] [c0000000000d1ea0] .kthread+0x110/0x130
    [  204.126430] [c0000027ed50fe30] [c00000000000a460] .ret_from_kernel_thread+0x5c/0x7c
    [  204.126556] Instruction dump:
    [  204.126610] 7c0802a6 fba1ffe8 fbc1fff0 fbe1fff8 f8010010 f821ff71 7c7e1b78 60000000
    [  204.126787] 60000000 e87e0298 3143ffff 7d2a1910 <0b090000> 2fa90000 40de00c8 ebfe0218
    [  204.126966] ---[ end trace 6e7aefd80add2973 ]---

are cleared.

This patch removes iommu_add_device() in pnv_pci_ioda_dma_dev_setup(), which
revert part of the change in commit d905c5df(PPC: POWERNV: move
iommu_add_device earlier).

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Showing 1 changed file with 1 additions and 1 deletions Inline Diff

arch/powerpc/platforms/powernv/pci-ioda.c
1 /* 1 /*
2 * Support PCI/PCIe on PowerNV platforms 2 * Support PCI/PCIe on PowerNV platforms
3 * 3 *
4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12 #undef DEBUG 12 #undef DEBUG
13 13
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/pci.h> 15 #include <linux/pci.h>
16 #include <linux/debugfs.h> 16 #include <linux/debugfs.h>
17 #include <linux/delay.h> 17 #include <linux/delay.h>
18 #include <linux/string.h> 18 #include <linux/string.h>
19 #include <linux/init.h> 19 #include <linux/init.h>
20 #include <linux/bootmem.h> 20 #include <linux/bootmem.h>
21 #include <linux/irq.h> 21 #include <linux/irq.h>
22 #include <linux/io.h> 22 #include <linux/io.h>
23 #include <linux/msi.h> 23 #include <linux/msi.h>
24 #include <linux/memblock.h> 24 #include <linux/memblock.h>
25 25
26 #include <asm/sections.h> 26 #include <asm/sections.h>
27 #include <asm/io.h> 27 #include <asm/io.h>
28 #include <asm/prom.h> 28 #include <asm/prom.h>
29 #include <asm/pci-bridge.h> 29 #include <asm/pci-bridge.h>
30 #include <asm/machdep.h> 30 #include <asm/machdep.h>
31 #include <asm/msi_bitmap.h> 31 #include <asm/msi_bitmap.h>
32 #include <asm/ppc-pci.h> 32 #include <asm/ppc-pci.h>
33 #include <asm/opal.h> 33 #include <asm/opal.h>
34 #include <asm/iommu.h> 34 #include <asm/iommu.h>
35 #include <asm/tce.h> 35 #include <asm/tce.h>
36 #include <asm/xics.h> 36 #include <asm/xics.h>
37 #include <asm/debug.h> 37 #include <asm/debug.h>
38 38
39 #include "powernv.h" 39 #include "powernv.h"
40 #include "pci.h" 40 #include "pci.h"
41 41
42 #define define_pe_printk_level(func, kern_level) \ 42 #define define_pe_printk_level(func, kern_level) \
43 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 43 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
44 { \ 44 { \
45 struct va_format vaf; \ 45 struct va_format vaf; \
46 va_list args; \ 46 va_list args; \
47 char pfix[32]; \ 47 char pfix[32]; \
48 int r; \ 48 int r; \
49 \ 49 \
50 va_start(args, fmt); \ 50 va_start(args, fmt); \
51 \ 51 \
52 vaf.fmt = fmt; \ 52 vaf.fmt = fmt; \
53 vaf.va = &args; \ 53 vaf.va = &args; \
54 \ 54 \
55 if (pe->pdev) \ 55 if (pe->pdev) \
56 strlcpy(pfix, dev_name(&pe->pdev->dev), \ 56 strlcpy(pfix, dev_name(&pe->pdev->dev), \
57 sizeof(pfix)); \ 57 sizeof(pfix)); \
58 else \ 58 else \
59 sprintf(pfix, "%04x:%02x ", \ 59 sprintf(pfix, "%04x:%02x ", \
60 pci_domain_nr(pe->pbus), \ 60 pci_domain_nr(pe->pbus), \
61 pe->pbus->number); \ 61 pe->pbus->number); \
62 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ 62 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \
63 pfix, pe->pe_number, &vaf); \ 63 pfix, pe->pe_number, &vaf); \
64 \ 64 \
65 va_end(args); \ 65 va_end(args); \
66 \ 66 \
67 return r; \ 67 return r; \
68 } \ 68 } \
69 69
70 define_pe_printk_level(pe_err, KERN_ERR); 70 define_pe_printk_level(pe_err, KERN_ERR);
71 define_pe_printk_level(pe_warn, KERN_WARNING); 71 define_pe_printk_level(pe_warn, KERN_WARNING);
72 define_pe_printk_level(pe_info, KERN_INFO); 72 define_pe_printk_level(pe_info, KERN_INFO);
73 73
74 /* 74 /*
75 * stdcix is only supposed to be used in hypervisor real mode as per 75 * stdcix is only supposed to be used in hypervisor real mode as per
76 * the architecture spec 76 * the architecture spec
77 */ 77 */
78 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 78 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
79 { 79 {
80 __asm__ __volatile__("stdcix %0,0,%1" 80 __asm__ __volatile__("stdcix %0,0,%1"
81 : : "r" (val), "r" (paddr) : "memory"); 81 : : "r" (val), "r" (paddr) : "memory");
82 } 82 }
83 83
84 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 84 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
85 { 85 {
86 unsigned long pe; 86 unsigned long pe;
87 87
88 do { 88 do {
89 pe = find_next_zero_bit(phb->ioda.pe_alloc, 89 pe = find_next_zero_bit(phb->ioda.pe_alloc,
90 phb->ioda.total_pe, 0); 90 phb->ioda.total_pe, 0);
91 if (pe >= phb->ioda.total_pe) 91 if (pe >= phb->ioda.total_pe)
92 return IODA_INVALID_PE; 92 return IODA_INVALID_PE;
93 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 93 } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
94 94
95 phb->ioda.pe_array[pe].phb = phb; 95 phb->ioda.pe_array[pe].phb = phb;
96 phb->ioda.pe_array[pe].pe_number = pe; 96 phb->ioda.pe_array[pe].pe_number = pe;
97 return pe; 97 return pe;
98 } 98 }
99 99
100 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 100 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
101 { 101 {
102 WARN_ON(phb->ioda.pe_array[pe].pdev); 102 WARN_ON(phb->ioda.pe_array[pe].pdev);
103 103
104 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 104 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
105 clear_bit(pe, phb->ioda.pe_alloc); 105 clear_bit(pe, phb->ioda.pe_alloc);
106 } 106 }
107 107
108 /* Currently those 2 are only used when MSIs are enabled, this will change 108 /* Currently those 2 are only used when MSIs are enabled, this will change
109 * but in the meantime, we need to protect them to avoid warnings 109 * but in the meantime, we need to protect them to avoid warnings
110 */ 110 */
111 #ifdef CONFIG_PCI_MSI 111 #ifdef CONFIG_PCI_MSI
112 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 112 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
113 { 113 {
114 struct pci_controller *hose = pci_bus_to_host(dev->bus); 114 struct pci_controller *hose = pci_bus_to_host(dev->bus);
115 struct pnv_phb *phb = hose->private_data; 115 struct pnv_phb *phb = hose->private_data;
116 struct pci_dn *pdn = pci_get_pdn(dev); 116 struct pci_dn *pdn = pci_get_pdn(dev);
117 117
118 if (!pdn) 118 if (!pdn)
119 return NULL; 119 return NULL;
120 if (pdn->pe_number == IODA_INVALID_PE) 120 if (pdn->pe_number == IODA_INVALID_PE)
121 return NULL; 121 return NULL;
122 return &phb->ioda.pe_array[pdn->pe_number]; 122 return &phb->ioda.pe_array[pdn->pe_number];
123 } 123 }
124 #endif /* CONFIG_PCI_MSI */ 124 #endif /* CONFIG_PCI_MSI */
125 125
126 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 126 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
127 { 127 {
128 struct pci_dev *parent; 128 struct pci_dev *parent;
129 uint8_t bcomp, dcomp, fcomp; 129 uint8_t bcomp, dcomp, fcomp;
130 long rc, rid_end, rid; 130 long rc, rid_end, rid;
131 131
132 /* Bus validation ? */ 132 /* Bus validation ? */
133 if (pe->pbus) { 133 if (pe->pbus) {
134 int count; 134 int count;
135 135
136 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 136 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
137 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 137 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
138 parent = pe->pbus->self; 138 parent = pe->pbus->self;
139 if (pe->flags & PNV_IODA_PE_BUS_ALL) 139 if (pe->flags & PNV_IODA_PE_BUS_ALL)
140 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 140 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
141 else 141 else
142 count = 1; 142 count = 1;
143 143
144 switch(count) { 144 switch(count) {
145 case 1: bcomp = OpalPciBusAll; break; 145 case 1: bcomp = OpalPciBusAll; break;
146 case 2: bcomp = OpalPciBus7Bits; break; 146 case 2: bcomp = OpalPciBus7Bits; break;
147 case 4: bcomp = OpalPciBus6Bits; break; 147 case 4: bcomp = OpalPciBus6Bits; break;
148 case 8: bcomp = OpalPciBus5Bits; break; 148 case 8: bcomp = OpalPciBus5Bits; break;
149 case 16: bcomp = OpalPciBus4Bits; break; 149 case 16: bcomp = OpalPciBus4Bits; break;
150 case 32: bcomp = OpalPciBus3Bits; break; 150 case 32: bcomp = OpalPciBus3Bits; break;
151 default: 151 default:
152 pr_err("%s: Number of subordinate busses %d" 152 pr_err("%s: Number of subordinate busses %d"
153 " unsupported\n", 153 " unsupported\n",
154 pci_name(pe->pbus->self), count); 154 pci_name(pe->pbus->self), count);
155 /* Do an exact match only */ 155 /* Do an exact match only */
156 bcomp = OpalPciBusAll; 156 bcomp = OpalPciBusAll;
157 } 157 }
158 rid_end = pe->rid + (count << 8); 158 rid_end = pe->rid + (count << 8);
159 } else { 159 } else {
160 parent = pe->pdev->bus->self; 160 parent = pe->pdev->bus->self;
161 bcomp = OpalPciBusAll; 161 bcomp = OpalPciBusAll;
162 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 162 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
163 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 163 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
164 rid_end = pe->rid + 1; 164 rid_end = pe->rid + 1;
165 } 165 }
166 166
167 /* 167 /*
168 * Associate PE in PELT. We need add the PE into the 168 * Associate PE in PELT. We need add the PE into the
169 * corresponding PELT-V as well. Otherwise, the error 169 * corresponding PELT-V as well. Otherwise, the error
170 * originated from the PE might contribute to other 170 * originated from the PE might contribute to other
171 * PEs. 171 * PEs.
172 */ 172 */
173 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 173 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
174 bcomp, dcomp, fcomp, OPAL_MAP_PE); 174 bcomp, dcomp, fcomp, OPAL_MAP_PE);
175 if (rc) { 175 if (rc) {
176 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 176 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
177 return -ENXIO; 177 return -ENXIO;
178 } 178 }
179 179
180 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 180 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
181 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 181 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
182 if (rc) 182 if (rc)
183 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); 183 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
184 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 184 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
185 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 185 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
186 186
187 /* Add to all parents PELT-V */ 187 /* Add to all parents PELT-V */
188 while (parent) { 188 while (parent) {
189 struct pci_dn *pdn = pci_get_pdn(parent); 189 struct pci_dn *pdn = pci_get_pdn(parent);
190 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 190 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
191 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 191 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
192 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 192 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
193 /* XXX What to do in case of error ? */ 193 /* XXX What to do in case of error ? */
194 } 194 }
195 parent = parent->bus->self; 195 parent = parent->bus->self;
196 } 196 }
197 /* Setup reverse map */ 197 /* Setup reverse map */
198 for (rid = pe->rid; rid < rid_end; rid++) 198 for (rid = pe->rid; rid < rid_end; rid++)
199 phb->ioda.pe_rmap[rid] = pe->pe_number; 199 phb->ioda.pe_rmap[rid] = pe->pe_number;
200 200
201 /* Setup one MVTs on IODA1 */ 201 /* Setup one MVTs on IODA1 */
202 if (phb->type == PNV_PHB_IODA1) { 202 if (phb->type == PNV_PHB_IODA1) {
203 pe->mve_number = pe->pe_number; 203 pe->mve_number = pe->pe_number;
204 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 204 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
205 pe->pe_number); 205 pe->pe_number);
206 if (rc) { 206 if (rc) {
207 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 207 pe_err(pe, "OPAL error %ld setting up MVE %d\n",
208 rc, pe->mve_number); 208 rc, pe->mve_number);
209 pe->mve_number = -1; 209 pe->mve_number = -1;
210 } else { 210 } else {
211 rc = opal_pci_set_mve_enable(phb->opal_id, 211 rc = opal_pci_set_mve_enable(phb->opal_id,
212 pe->mve_number, OPAL_ENABLE_MVE); 212 pe->mve_number, OPAL_ENABLE_MVE);
213 if (rc) { 213 if (rc) {
214 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 214 pe_err(pe, "OPAL error %ld enabling MVE %d\n",
215 rc, pe->mve_number); 215 rc, pe->mve_number);
216 pe->mve_number = -1; 216 pe->mve_number = -1;
217 } 217 }
218 } 218 }
219 } else if (phb->type == PNV_PHB_IODA2) 219 } else if (phb->type == PNV_PHB_IODA2)
220 pe->mve_number = 0; 220 pe->mve_number = 0;
221 221
222 return 0; 222 return 0;
223 } 223 }
224 224
225 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 225 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
226 struct pnv_ioda_pe *pe) 226 struct pnv_ioda_pe *pe)
227 { 227 {
228 struct pnv_ioda_pe *lpe; 228 struct pnv_ioda_pe *lpe;
229 229
230 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 230 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
231 if (lpe->dma_weight < pe->dma_weight) { 231 if (lpe->dma_weight < pe->dma_weight) {
232 list_add_tail(&pe->dma_link, &lpe->dma_link); 232 list_add_tail(&pe->dma_link, &lpe->dma_link);
233 return; 233 return;
234 } 234 }
235 } 235 }
236 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 236 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
237 } 237 }
238 238
239 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 239 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
240 { 240 {
241 /* This is quite simplistic. The "base" weight of a device 241 /* This is quite simplistic. The "base" weight of a device
242 * is 10. 0 means no DMA is to be accounted for it. 242 * is 10. 0 means no DMA is to be accounted for it.
243 */ 243 */
244 244
245 /* If it's a bridge, no DMA */ 245 /* If it's a bridge, no DMA */
246 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 246 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
247 return 0; 247 return 0;
248 248
249 /* Reduce the weight of slow USB controllers */ 249 /* Reduce the weight of slow USB controllers */
250 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 250 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
251 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 251 dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
252 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 252 dev->class == PCI_CLASS_SERIAL_USB_EHCI)
253 return 3; 253 return 3;
254 254
255 /* Increase the weight of RAID (includes Obsidian) */ 255 /* Increase the weight of RAID (includes Obsidian) */
256 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 256 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
257 return 15; 257 return 15;
258 258
259 /* Default */ 259 /* Default */
260 return 10; 260 return 10;
261 } 261 }
262 262
263 #if 0 263 #if 0
264 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 264 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
265 { 265 {
266 struct pci_controller *hose = pci_bus_to_host(dev->bus); 266 struct pci_controller *hose = pci_bus_to_host(dev->bus);
267 struct pnv_phb *phb = hose->private_data; 267 struct pnv_phb *phb = hose->private_data;
268 struct pci_dn *pdn = pci_get_pdn(dev); 268 struct pci_dn *pdn = pci_get_pdn(dev);
269 struct pnv_ioda_pe *pe; 269 struct pnv_ioda_pe *pe;
270 int pe_num; 270 int pe_num;
271 271
272 if (!pdn) { 272 if (!pdn) {
273 pr_err("%s: Device tree node not associated properly\n", 273 pr_err("%s: Device tree node not associated properly\n",
274 pci_name(dev)); 274 pci_name(dev));
275 return NULL; 275 return NULL;
276 } 276 }
277 if (pdn->pe_number != IODA_INVALID_PE) 277 if (pdn->pe_number != IODA_INVALID_PE)
278 return NULL; 278 return NULL;
279 279
280 /* PE#0 has been pre-set */ 280 /* PE#0 has been pre-set */
281 if (dev->bus->number == 0) 281 if (dev->bus->number == 0)
282 pe_num = 0; 282 pe_num = 0;
283 else 283 else
284 pe_num = pnv_ioda_alloc_pe(phb); 284 pe_num = pnv_ioda_alloc_pe(phb);
285 if (pe_num == IODA_INVALID_PE) { 285 if (pe_num == IODA_INVALID_PE) {
286 pr_warning("%s: Not enough PE# available, disabling device\n", 286 pr_warning("%s: Not enough PE# available, disabling device\n",
287 pci_name(dev)); 287 pci_name(dev));
288 return NULL; 288 return NULL;
289 } 289 }
290 290
291 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 291 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
292 * pointer in the PE data structure, both should be destroyed at the 292 * pointer in the PE data structure, both should be destroyed at the
293 * same time. However, this needs to be looked at more closely again 293 * same time. However, this needs to be looked at more closely again
294 * once we actually start removing things (Hotplug, SR-IOV, ...) 294 * once we actually start removing things (Hotplug, SR-IOV, ...)
295 * 295 *
296 * At some point we want to remove the PDN completely anyways 296 * At some point we want to remove the PDN completely anyways
297 */ 297 */
298 pe = &phb->ioda.pe_array[pe_num]; 298 pe = &phb->ioda.pe_array[pe_num];
299 pci_dev_get(dev); 299 pci_dev_get(dev);
300 pdn->pcidev = dev; 300 pdn->pcidev = dev;
301 pdn->pe_number = pe_num; 301 pdn->pe_number = pe_num;
302 pe->pdev = dev; 302 pe->pdev = dev;
303 pe->pbus = NULL; 303 pe->pbus = NULL;
304 pe->tce32_seg = -1; 304 pe->tce32_seg = -1;
305 pe->mve_number = -1; 305 pe->mve_number = -1;
306 pe->rid = dev->bus->number << 8 | pdn->devfn; 306 pe->rid = dev->bus->number << 8 | pdn->devfn;
307 307
308 pe_info(pe, "Associated device to PE\n"); 308 pe_info(pe, "Associated device to PE\n");
309 309
310 if (pnv_ioda_configure_pe(phb, pe)) { 310 if (pnv_ioda_configure_pe(phb, pe)) {
311 /* XXX What do we do here ? */ 311 /* XXX What do we do here ? */
312 if (pe_num) 312 if (pe_num)
313 pnv_ioda_free_pe(phb, pe_num); 313 pnv_ioda_free_pe(phb, pe_num);
314 pdn->pe_number = IODA_INVALID_PE; 314 pdn->pe_number = IODA_INVALID_PE;
315 pe->pdev = NULL; 315 pe->pdev = NULL;
316 pci_dev_put(dev); 316 pci_dev_put(dev);
317 return NULL; 317 return NULL;
318 } 318 }
319 319
320 /* Assign a DMA weight to the device */ 320 /* Assign a DMA weight to the device */
321 pe->dma_weight = pnv_ioda_dma_weight(dev); 321 pe->dma_weight = pnv_ioda_dma_weight(dev);
322 if (pe->dma_weight != 0) { 322 if (pe->dma_weight != 0) {
323 phb->ioda.dma_weight += pe->dma_weight; 323 phb->ioda.dma_weight += pe->dma_weight;
324 phb->ioda.dma_pe_count++; 324 phb->ioda.dma_pe_count++;
325 } 325 }
326 326
327 /* Link the PE */ 327 /* Link the PE */
328 pnv_ioda_link_pe_by_weight(phb, pe); 328 pnv_ioda_link_pe_by_weight(phb, pe);
329 329
330 return pe; 330 return pe;
331 } 331 }
332 #endif /* Useful for SRIOV case */ 332 #endif /* Useful for SRIOV case */
333 333
334 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 334 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
335 { 335 {
336 struct pci_dev *dev; 336 struct pci_dev *dev;
337 337
338 list_for_each_entry(dev, &bus->devices, bus_list) { 338 list_for_each_entry(dev, &bus->devices, bus_list) {
339 struct pci_dn *pdn = pci_get_pdn(dev); 339 struct pci_dn *pdn = pci_get_pdn(dev);
340 340
341 if (pdn == NULL) { 341 if (pdn == NULL) {
342 pr_warn("%s: No device node associated with device !\n", 342 pr_warn("%s: No device node associated with device !\n",
343 pci_name(dev)); 343 pci_name(dev));
344 continue; 344 continue;
345 } 345 }
346 pci_dev_get(dev); 346 pci_dev_get(dev);
347 pdn->pcidev = dev; 347 pdn->pcidev = dev;
348 pdn->pe_number = pe->pe_number; 348 pdn->pe_number = pe->pe_number;
349 pe->dma_weight += pnv_ioda_dma_weight(dev); 349 pe->dma_weight += pnv_ioda_dma_weight(dev);
350 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 350 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
351 pnv_ioda_setup_same_PE(dev->subordinate, pe); 351 pnv_ioda_setup_same_PE(dev->subordinate, pe);
352 } 352 }
353 } 353 }
354 354
355 /* 355 /*
356 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 356 * There're 2 types of PCI bus sensitive PEs: One that is compromised of
357 * single PCI bus. Another one that contains the primary PCI bus and its 357 * single PCI bus. Another one that contains the primary PCI bus and its
358 * subordinate PCI devices and buses. The second type of PE is normally 358 * subordinate PCI devices and buses. The second type of PE is normally
359 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 359 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
360 */ 360 */
361 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 361 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
362 { 362 {
363 struct pci_controller *hose = pci_bus_to_host(bus); 363 struct pci_controller *hose = pci_bus_to_host(bus);
364 struct pnv_phb *phb = hose->private_data; 364 struct pnv_phb *phb = hose->private_data;
365 struct pnv_ioda_pe *pe; 365 struct pnv_ioda_pe *pe;
366 int pe_num; 366 int pe_num;
367 367
368 pe_num = pnv_ioda_alloc_pe(phb); 368 pe_num = pnv_ioda_alloc_pe(phb);
369 if (pe_num == IODA_INVALID_PE) { 369 if (pe_num == IODA_INVALID_PE) {
370 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 370 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
371 __func__, pci_domain_nr(bus), bus->number); 371 __func__, pci_domain_nr(bus), bus->number);
372 return; 372 return;
373 } 373 }
374 374
375 pe = &phb->ioda.pe_array[pe_num]; 375 pe = &phb->ioda.pe_array[pe_num];
376 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 376 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
377 pe->pbus = bus; 377 pe->pbus = bus;
378 pe->pdev = NULL; 378 pe->pdev = NULL;
379 pe->tce32_seg = -1; 379 pe->tce32_seg = -1;
380 pe->mve_number = -1; 380 pe->mve_number = -1;
381 pe->rid = bus->busn_res.start << 8; 381 pe->rid = bus->busn_res.start << 8;
382 pe->dma_weight = 0; 382 pe->dma_weight = 0;
383 383
384 if (all) 384 if (all)
385 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 385 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
386 bus->busn_res.start, bus->busn_res.end, pe_num); 386 bus->busn_res.start, bus->busn_res.end, pe_num);
387 else 387 else
388 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 388 pe_info(pe, "Secondary bus %d associated with PE#%d\n",
389 bus->busn_res.start, pe_num); 389 bus->busn_res.start, pe_num);
390 390
391 if (pnv_ioda_configure_pe(phb, pe)) { 391 if (pnv_ioda_configure_pe(phb, pe)) {
392 /* XXX What do we do here ? */ 392 /* XXX What do we do here ? */
393 if (pe_num) 393 if (pe_num)
394 pnv_ioda_free_pe(phb, pe_num); 394 pnv_ioda_free_pe(phb, pe_num);
395 pe->pbus = NULL; 395 pe->pbus = NULL;
396 return; 396 return;
397 } 397 }
398 398
399 /* Associate it with all child devices */ 399 /* Associate it with all child devices */
400 pnv_ioda_setup_same_PE(bus, pe); 400 pnv_ioda_setup_same_PE(bus, pe);
401 401
402 /* Put PE to the list */ 402 /* Put PE to the list */
403 list_add_tail(&pe->list, &phb->ioda.pe_list); 403 list_add_tail(&pe->list, &phb->ioda.pe_list);
404 404
405 /* Account for one DMA PE if at least one DMA capable device exist 405 /* Account for one DMA PE if at least one DMA capable device exist
406 * below the bridge 406 * below the bridge
407 */ 407 */
408 if (pe->dma_weight != 0) { 408 if (pe->dma_weight != 0) {
409 phb->ioda.dma_weight += pe->dma_weight; 409 phb->ioda.dma_weight += pe->dma_weight;
410 phb->ioda.dma_pe_count++; 410 phb->ioda.dma_pe_count++;
411 } 411 }
412 412
413 /* Link the PE */ 413 /* Link the PE */
414 pnv_ioda_link_pe_by_weight(phb, pe); 414 pnv_ioda_link_pe_by_weight(phb, pe);
415 } 415 }
416 416
417 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 417 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
418 { 418 {
419 struct pci_dev *dev; 419 struct pci_dev *dev;
420 420
421 pnv_ioda_setup_bus_PE(bus, 0); 421 pnv_ioda_setup_bus_PE(bus, 0);
422 422
423 list_for_each_entry(dev, &bus->devices, bus_list) { 423 list_for_each_entry(dev, &bus->devices, bus_list) {
424 if (dev->subordinate) { 424 if (dev->subordinate) {
425 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 425 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
426 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 426 pnv_ioda_setup_bus_PE(dev->subordinate, 1);
427 else 427 else
428 pnv_ioda_setup_PEs(dev->subordinate); 428 pnv_ioda_setup_PEs(dev->subordinate);
429 } 429 }
430 } 430 }
431 } 431 }
432 432
433 /* 433 /*
434 * Configure PEs so that the downstream PCI buses and devices 434 * Configure PEs so that the downstream PCI buses and devices
435 * could have their associated PE#. Unfortunately, we didn't 435 * could have their associated PE#. Unfortunately, we didn't
436 * figure out the way to identify the PLX bridge yet. So we 436 * figure out the way to identify the PLX bridge yet. So we
437 * simply put the PCI bus and the subordinate behind the root 437 * simply put the PCI bus and the subordinate behind the root
438 * port to PE# here. The game rule here is expected to be changed 438 * port to PE# here. The game rule here is expected to be changed
439 * as soon as we can detected PLX bridge correctly. 439 * as soon as we can detected PLX bridge correctly.
440 */ 440 */
441 static void pnv_pci_ioda_setup_PEs(void) 441 static void pnv_pci_ioda_setup_PEs(void)
442 { 442 {
443 struct pci_controller *hose, *tmp; 443 struct pci_controller *hose, *tmp;
444 444
445 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 445 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
446 pnv_ioda_setup_PEs(hose->bus); 446 pnv_ioda_setup_PEs(hose->bus);
447 } 447 }
448 } 448 }
449 449
450 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 450 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
451 { 451 {
452 struct pci_dn *pdn = pci_get_pdn(pdev); 452 struct pci_dn *pdn = pci_get_pdn(pdev);
453 struct pnv_ioda_pe *pe; 453 struct pnv_ioda_pe *pe;
454 454
455 /* 455 /*
456 * The function can be called while the PE# 456 * The function can be called while the PE#
457 * hasn't been assigned. Do nothing for the 457 * hasn't been assigned. Do nothing for the
458 * case. 458 * case.
459 */ 459 */
460 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 460 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
461 return; 461 return;
462 462
463 pe = &phb->ioda.pe_array[pdn->pe_number]; 463 pe = &phb->ioda.pe_array[pdn->pe_number];
464 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 464 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
465 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); 465 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
466 } 466 }
467 467
468 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 468 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
469 struct pci_dev *pdev, u64 dma_mask) 469 struct pci_dev *pdev, u64 dma_mask)
470 { 470 {
471 struct pci_dn *pdn = pci_get_pdn(pdev); 471 struct pci_dn *pdn = pci_get_pdn(pdev);
472 struct pnv_ioda_pe *pe; 472 struct pnv_ioda_pe *pe;
473 uint64_t top; 473 uint64_t top;
474 bool bypass = false; 474 bool bypass = false;
475 475
476 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 476 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
477 return -ENODEV;; 477 return -ENODEV;;
478 478
479 pe = &phb->ioda.pe_array[pdn->pe_number]; 479 pe = &phb->ioda.pe_array[pdn->pe_number];
480 if (pe->tce_bypass_enabled) { 480 if (pe->tce_bypass_enabled) {
481 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 481 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
482 bypass = (dma_mask >= top); 482 bypass = (dma_mask >= top);
483 } 483 }
484 484
485 if (bypass) { 485 if (bypass) {
486 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 486 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
487 set_dma_ops(&pdev->dev, &dma_direct_ops); 487 set_dma_ops(&pdev->dev, &dma_direct_ops);
488 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 488 set_dma_offset(&pdev->dev, pe->tce_bypass_base);
489 } else { 489 } else {
490 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 490 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
491 set_dma_ops(&pdev->dev, &dma_iommu_ops); 491 set_dma_ops(&pdev->dev, &dma_iommu_ops);
492 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 492 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
493 } 493 }
494 return 0; 494 return 0;
495 } 495 }
496 496
497 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) 497 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
498 { 498 {
499 struct pci_dev *dev; 499 struct pci_dev *dev;
500 500
501 list_for_each_entry(dev, &bus->devices, bus_list) { 501 list_for_each_entry(dev, &bus->devices, bus_list) {
502 set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); 502 set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
503 if (dev->subordinate) 503 if (dev->subordinate)
504 pnv_ioda_setup_bus_dma(pe, dev->subordinate); 504 pnv_ioda_setup_bus_dma(pe, dev->subordinate);
505 } 505 }
506 } 506 }
507 507
508 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 508 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
509 struct iommu_table *tbl, 509 struct iommu_table *tbl,
510 __be64 *startp, __be64 *endp, bool rm) 510 __be64 *startp, __be64 *endp, bool rm)
511 { 511 {
512 __be64 __iomem *invalidate = rm ? 512 __be64 __iomem *invalidate = rm ?
513 (__be64 __iomem *)pe->tce_inval_reg_phys : 513 (__be64 __iomem *)pe->tce_inval_reg_phys :
514 (__be64 __iomem *)tbl->it_index; 514 (__be64 __iomem *)tbl->it_index;
515 unsigned long start, end, inc; 515 unsigned long start, end, inc;
516 516
517 start = __pa(startp); 517 start = __pa(startp);
518 end = __pa(endp); 518 end = __pa(endp);
519 519
520 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 520 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
521 if (tbl->it_busno) { 521 if (tbl->it_busno) {
522 start <<= 12; 522 start <<= 12;
523 end <<= 12; 523 end <<= 12;
524 inc = 128 << 12; 524 inc = 128 << 12;
525 start |= tbl->it_busno; 525 start |= tbl->it_busno;
526 end |= tbl->it_busno; 526 end |= tbl->it_busno;
527 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 527 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
528 /* p7ioc-style invalidation, 2 TCEs per write */ 528 /* p7ioc-style invalidation, 2 TCEs per write */
529 start |= (1ull << 63); 529 start |= (1ull << 63);
530 end |= (1ull << 63); 530 end |= (1ull << 63);
531 inc = 16; 531 inc = 16;
532 } else { 532 } else {
533 /* Default (older HW) */ 533 /* Default (older HW) */
534 inc = 128; 534 inc = 128;
535 } 535 }
536 536
537 end |= inc - 1; /* round up end to be different than start */ 537 end |= inc - 1; /* round up end to be different than start */
538 538
539 mb(); /* Ensure above stores are visible */ 539 mb(); /* Ensure above stores are visible */
540 while (start <= end) { 540 while (start <= end) {
541 if (rm) 541 if (rm)
542 __raw_rm_writeq(cpu_to_be64(start), invalidate); 542 __raw_rm_writeq(cpu_to_be64(start), invalidate);
543 else 543 else
544 __raw_writeq(cpu_to_be64(start), invalidate); 544 __raw_writeq(cpu_to_be64(start), invalidate);
545 start += inc; 545 start += inc;
546 } 546 }
547 547
548 /* 548 /*
549 * The iommu layer will do another mb() for us on build() 549 * The iommu layer will do another mb() for us on build()
550 * and we don't care on free() 550 * and we don't care on free()
551 */ 551 */
552 } 552 }
553 553
554 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 554 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
555 struct iommu_table *tbl, 555 struct iommu_table *tbl,
556 __be64 *startp, __be64 *endp, bool rm) 556 __be64 *startp, __be64 *endp, bool rm)
557 { 557 {
558 unsigned long start, end, inc; 558 unsigned long start, end, inc;
559 __be64 __iomem *invalidate = rm ? 559 __be64 __iomem *invalidate = rm ?
560 (__be64 __iomem *)pe->tce_inval_reg_phys : 560 (__be64 __iomem *)pe->tce_inval_reg_phys :
561 (__be64 __iomem *)tbl->it_index; 561 (__be64 __iomem *)tbl->it_index;
562 562
563 /* We'll invalidate DMA address in PE scope */ 563 /* We'll invalidate DMA address in PE scope */
564 start = 0x2ul << 60; 564 start = 0x2ul << 60;
565 start |= (pe->pe_number & 0xFF); 565 start |= (pe->pe_number & 0xFF);
566 end = start; 566 end = start;
567 567
568 /* Figure out the start, end and step */ 568 /* Figure out the start, end and step */
569 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 569 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
570 start |= (inc << 12); 570 start |= (inc << 12);
571 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 571 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
572 end |= (inc << 12); 572 end |= (inc << 12);
573 inc = (0x1ul << 12); 573 inc = (0x1ul << 12);
574 mb(); 574 mb();
575 575
576 while (start <= end) { 576 while (start <= end) {
577 if (rm) 577 if (rm)
578 __raw_rm_writeq(cpu_to_be64(start), invalidate); 578 __raw_rm_writeq(cpu_to_be64(start), invalidate);
579 else 579 else
580 __raw_writeq(cpu_to_be64(start), invalidate); 580 __raw_writeq(cpu_to_be64(start), invalidate);
581 start += inc; 581 start += inc;
582 } 582 }
583 } 583 }
584 584
585 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 585 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
586 __be64 *startp, __be64 *endp, bool rm) 586 __be64 *startp, __be64 *endp, bool rm)
587 { 587 {
588 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 588 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
589 tce32_table); 589 tce32_table);
590 struct pnv_phb *phb = pe->phb; 590 struct pnv_phb *phb = pe->phb;
591 591
592 if (phb->type == PNV_PHB_IODA1) 592 if (phb->type == PNV_PHB_IODA1)
593 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 593 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
594 else 594 else
595 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 595 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
596 } 596 }
597 597
598 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 598 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
599 struct pnv_ioda_pe *pe, unsigned int base, 599 struct pnv_ioda_pe *pe, unsigned int base,
600 unsigned int segs) 600 unsigned int segs)
601 { 601 {
602 602
603 struct page *tce_mem = NULL; 603 struct page *tce_mem = NULL;
604 const __be64 *swinvp; 604 const __be64 *swinvp;
605 struct iommu_table *tbl; 605 struct iommu_table *tbl;
606 unsigned int i; 606 unsigned int i;
607 int64_t rc; 607 int64_t rc;
608 void *addr; 608 void *addr;
609 609
610 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 610 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
611 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 611 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
612 612
613 /* XXX FIXME: Handle 64-bit only DMA devices */ 613 /* XXX FIXME: Handle 64-bit only DMA devices */
614 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 614 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
615 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 615 /* XXX FIXME: Allocate multi-level tables on PHB3 */
616 616
617 /* We shouldn't already have a 32-bit DMA associated */ 617 /* We shouldn't already have a 32-bit DMA associated */
618 if (WARN_ON(pe->tce32_seg >= 0)) 618 if (WARN_ON(pe->tce32_seg >= 0))
619 return; 619 return;
620 620
621 /* Grab a 32-bit TCE table */ 621 /* Grab a 32-bit TCE table */
622 pe->tce32_seg = base; 622 pe->tce32_seg = base;
623 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 623 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
624 (base << 28), ((base + segs) << 28) - 1); 624 (base << 28), ((base + segs) << 28) - 1);
625 625
626 /* XXX Currently, we allocate one big contiguous table for the 626 /* XXX Currently, we allocate one big contiguous table for the
627 * TCEs. We only really need one chunk per 256M of TCE space 627 * TCEs. We only really need one chunk per 256M of TCE space
628 * (ie per segment) but that's an optimization for later, it 628 * (ie per segment) but that's an optimization for later, it
629 * requires some added smarts with our get/put_tce implementation 629 * requires some added smarts with our get/put_tce implementation
630 */ 630 */
631 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 631 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
632 get_order(TCE32_TABLE_SIZE * segs)); 632 get_order(TCE32_TABLE_SIZE * segs));
633 if (!tce_mem) { 633 if (!tce_mem) {
634 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 634 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
635 goto fail; 635 goto fail;
636 } 636 }
637 addr = page_address(tce_mem); 637 addr = page_address(tce_mem);
638 memset(addr, 0, TCE32_TABLE_SIZE * segs); 638 memset(addr, 0, TCE32_TABLE_SIZE * segs);
639 639
640 /* Configure HW */ 640 /* Configure HW */
641 for (i = 0; i < segs; i++) { 641 for (i = 0; i < segs; i++) {
642 rc = opal_pci_map_pe_dma_window(phb->opal_id, 642 rc = opal_pci_map_pe_dma_window(phb->opal_id,
643 pe->pe_number, 643 pe->pe_number,
644 base + i, 1, 644 base + i, 1,
645 __pa(addr) + TCE32_TABLE_SIZE * i, 645 __pa(addr) + TCE32_TABLE_SIZE * i,
646 TCE32_TABLE_SIZE, 0x1000); 646 TCE32_TABLE_SIZE, 0x1000);
647 if (rc) { 647 if (rc) {
648 pe_err(pe, " Failed to configure 32-bit TCE table," 648 pe_err(pe, " Failed to configure 32-bit TCE table,"
649 " err %ld\n", rc); 649 " err %ld\n", rc);
650 goto fail; 650 goto fail;
651 } 651 }
652 } 652 }
653 653
654 /* Setup linux iommu table */ 654 /* Setup linux iommu table */
655 tbl = &pe->tce32_table; 655 tbl = &pe->tce32_table;
656 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 656 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
657 base << 28); 657 base << 28);
658 658
659 /* OPAL variant of P7IOC SW invalidated TCEs */ 659 /* OPAL variant of P7IOC SW invalidated TCEs */
660 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 660 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
661 if (swinvp) { 661 if (swinvp) {
662 /* We need a couple more fields -- an address and a data 662 /* We need a couple more fields -- an address and a data
663 * to or. Since the bus is only printed out on table free 663 * to or. Since the bus is only printed out on table free
664 * errors, and on the first pass the data will be a relative 664 * errors, and on the first pass the data will be a relative
665 * bus number, print that out instead. 665 * bus number, print that out instead.
666 */ 666 */
667 tbl->it_busno = 0; 667 tbl->it_busno = 0;
668 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 668 pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
669 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 669 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
670 8); 670 8);
671 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | 671 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
672 TCE_PCI_SWINV_PAIR; 672 TCE_PCI_SWINV_PAIR;
673 } 673 }
674 iommu_init_table(tbl, phb->hose->node); 674 iommu_init_table(tbl, phb->hose->node);
675 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 675 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
676 676
677 if (pe->pdev) 677 if (pe->pdev)
678 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 678 set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
679 else 679 else
680 pnv_ioda_setup_bus_dma(pe, pe->pbus); 680 pnv_ioda_setup_bus_dma(pe, pe->pbus);
681 681
682 return; 682 return;
683 fail: 683 fail:
684 /* XXX Failure: Try to fallback to 64-bit only ? */ 684 /* XXX Failure: Try to fallback to 64-bit only ? */
685 if (pe->tce32_seg >= 0) 685 if (pe->tce32_seg >= 0)
686 pe->tce32_seg = -1; 686 pe->tce32_seg = -1;
687 if (tce_mem) 687 if (tce_mem)
688 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 688 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
689 } 689 }
690 690
691 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 691 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
692 { 692 {
693 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 693 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
694 tce32_table); 694 tce32_table);
695 uint16_t window_id = (pe->pe_number << 1 ) + 1; 695 uint16_t window_id = (pe->pe_number << 1 ) + 1;
696 int64_t rc; 696 int64_t rc;
697 697
698 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 698 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
699 if (enable) { 699 if (enable) {
700 phys_addr_t top = memblock_end_of_DRAM(); 700 phys_addr_t top = memblock_end_of_DRAM();
701 701
702 top = roundup_pow_of_two(top); 702 top = roundup_pow_of_two(top);
703 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 703 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
704 pe->pe_number, 704 pe->pe_number,
705 window_id, 705 window_id,
706 pe->tce_bypass_base, 706 pe->tce_bypass_base,
707 top); 707 top);
708 } else { 708 } else {
709 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 709 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
710 pe->pe_number, 710 pe->pe_number,
711 window_id, 711 window_id,
712 pe->tce_bypass_base, 712 pe->tce_bypass_base,
713 0); 713 0);
714 714
715 /* 715 /*
716 * We might want to reset the DMA ops of all devices on 716 * We might want to reset the DMA ops of all devices on
717 * this PE. However in theory, that shouldn't be necessary 717 * this PE. However in theory, that shouldn't be necessary
718 * as this is used for VFIO/KVM pass-through and the device 718 * as this is used for VFIO/KVM pass-through and the device
719 * hasn't yet been returned to its kernel driver 719 * hasn't yet been returned to its kernel driver
720 */ 720 */
721 } 721 }
722 if (rc) 722 if (rc)
723 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 723 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
724 else 724 else
725 pe->tce_bypass_enabled = enable; 725 pe->tce_bypass_enabled = enable;
726 } 726 }
727 727
728 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 728 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
729 struct pnv_ioda_pe *pe) 729 struct pnv_ioda_pe *pe)
730 { 730 {
731 /* TVE #1 is selected by PCI address bit 59 */ 731 /* TVE #1 is selected by PCI address bit 59 */
732 pe->tce_bypass_base = 1ull << 59; 732 pe->tce_bypass_base = 1ull << 59;
733 733
734 /* Install set_bypass callback for VFIO */ 734 /* Install set_bypass callback for VFIO */
735 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 735 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
736 736
737 /* Enable bypass by default */ 737 /* Enable bypass by default */
738 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 738 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
739 } 739 }
740 740
741 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 741 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
742 struct pnv_ioda_pe *pe) 742 struct pnv_ioda_pe *pe)
743 { 743 {
744 struct page *tce_mem = NULL; 744 struct page *tce_mem = NULL;
745 void *addr; 745 void *addr;
746 const __be64 *swinvp; 746 const __be64 *swinvp;
747 struct iommu_table *tbl; 747 struct iommu_table *tbl;
748 unsigned int tce_table_size, end; 748 unsigned int tce_table_size, end;
749 int64_t rc; 749 int64_t rc;
750 750
751 /* We shouldn't already have a 32-bit DMA associated */ 751 /* We shouldn't already have a 32-bit DMA associated */
752 if (WARN_ON(pe->tce32_seg >= 0)) 752 if (WARN_ON(pe->tce32_seg >= 0))
753 return; 753 return;
754 754
755 /* The PE will reserve all possible 32-bits space */ 755 /* The PE will reserve all possible 32-bits space */
756 pe->tce32_seg = 0; 756 pe->tce32_seg = 0;
757 end = (1 << ilog2(phb->ioda.m32_pci_base)); 757 end = (1 << ilog2(phb->ioda.m32_pci_base));
758 tce_table_size = (end / 0x1000) * 8; 758 tce_table_size = (end / 0x1000) * 8;
759 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 759 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
760 end); 760 end);
761 761
762 /* Allocate TCE table */ 762 /* Allocate TCE table */
763 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 763 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
764 get_order(tce_table_size)); 764 get_order(tce_table_size));
765 if (!tce_mem) { 765 if (!tce_mem) {
766 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 766 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
767 goto fail; 767 goto fail;
768 } 768 }
769 addr = page_address(tce_mem); 769 addr = page_address(tce_mem);
770 memset(addr, 0, tce_table_size); 770 memset(addr, 0, tce_table_size);
771 771
772 /* 772 /*
773 * Map TCE table through TVT. The TVE index is the PE number 773 * Map TCE table through TVT. The TVE index is the PE number
774 * shifted by 1 bit for 32-bits DMA space. 774 * shifted by 1 bit for 32-bits DMA space.
775 */ 775 */
776 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 776 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
777 pe->pe_number << 1, 1, __pa(addr), 777 pe->pe_number << 1, 1, __pa(addr),
778 tce_table_size, 0x1000); 778 tce_table_size, 0x1000);
779 if (rc) { 779 if (rc) {
780 pe_err(pe, "Failed to configure 32-bit TCE table," 780 pe_err(pe, "Failed to configure 32-bit TCE table,"
781 " err %ld\n", rc); 781 " err %ld\n", rc);
782 goto fail; 782 goto fail;
783 } 783 }
784 784
785 /* Setup linux iommu table */ 785 /* Setup linux iommu table */
786 tbl = &pe->tce32_table; 786 tbl = &pe->tce32_table;
787 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); 787 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
788 788
789 /* OPAL variant of PHB3 invalidated TCEs */ 789 /* OPAL variant of PHB3 invalidated TCEs */
790 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 790 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
791 if (swinvp) { 791 if (swinvp) {
792 /* We need a couple more fields -- an address and a data 792 /* We need a couple more fields -- an address and a data
793 * to or. Since the bus is only printed out on table free 793 * to or. Since the bus is only printed out on table free
794 * errors, and on the first pass the data will be a relative 794 * errors, and on the first pass the data will be a relative
795 * bus number, print that out instead. 795 * bus number, print that out instead.
796 */ 796 */
797 tbl->it_busno = 0; 797 tbl->it_busno = 0;
798 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 798 pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
799 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 799 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
800 8); 800 8);
801 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; 801 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
802 } 802 }
803 iommu_init_table(tbl, phb->hose->node); 803 iommu_init_table(tbl, phb->hose->node);
804 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 804 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
805 805
806 if (pe->pdev) 806 if (pe->pdev)
807 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 807 set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
808 else 808 else
809 pnv_ioda_setup_bus_dma(pe, pe->pbus); 809 pnv_ioda_setup_bus_dma(pe, pe->pbus);
810 810
811 /* Also create a bypass window */ 811 /* Also create a bypass window */
812 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 812 pnv_pci_ioda2_setup_bypass_pe(phb, pe);
813 return; 813 return;
814 fail: 814 fail:
815 if (pe->tce32_seg >= 0) 815 if (pe->tce32_seg >= 0)
816 pe->tce32_seg = -1; 816 pe->tce32_seg = -1;
817 if (tce_mem) 817 if (tce_mem)
818 __free_pages(tce_mem, get_order(tce_table_size)); 818 __free_pages(tce_mem, get_order(tce_table_size));
819 } 819 }
820 820
821 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 821 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
822 { 822 {
823 struct pci_controller *hose = phb->hose; 823 struct pci_controller *hose = phb->hose;
824 unsigned int residual, remaining, segs, tw, base; 824 unsigned int residual, remaining, segs, tw, base;
825 struct pnv_ioda_pe *pe; 825 struct pnv_ioda_pe *pe;
826 826
827 /* If we have more PE# than segments available, hand out one 827 /* If we have more PE# than segments available, hand out one
828 * per PE until we run out and let the rest fail. If not, 828 * per PE until we run out and let the rest fail. If not,
829 * then we assign at least one segment per PE, plus more based 829 * then we assign at least one segment per PE, plus more based
830 * on the amount of devices under that PE 830 * on the amount of devices under that PE
831 */ 831 */
832 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 832 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
833 residual = 0; 833 residual = 0;
834 else 834 else
835 residual = phb->ioda.tce32_count - 835 residual = phb->ioda.tce32_count -
836 phb->ioda.dma_pe_count; 836 phb->ioda.dma_pe_count;
837 837
838 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 838 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
839 hose->global_number, phb->ioda.tce32_count); 839 hose->global_number, phb->ioda.tce32_count);
840 pr_info("PCI: %d PE# for a total weight of %d\n", 840 pr_info("PCI: %d PE# for a total weight of %d\n",
841 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 841 phb->ioda.dma_pe_count, phb->ioda.dma_weight);
842 842
843 /* Walk our PE list and configure their DMA segments, hand them 843 /* Walk our PE list and configure their DMA segments, hand them
844 * out one base segment plus any residual segments based on 844 * out one base segment plus any residual segments based on
845 * weight 845 * weight
846 */ 846 */
847 remaining = phb->ioda.tce32_count; 847 remaining = phb->ioda.tce32_count;
848 tw = phb->ioda.dma_weight; 848 tw = phb->ioda.dma_weight;
849 base = 0; 849 base = 0;
850 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 850 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
851 if (!pe->dma_weight) 851 if (!pe->dma_weight)
852 continue; 852 continue;
853 if (!remaining) { 853 if (!remaining) {
854 pe_warn(pe, "No DMA32 resources available\n"); 854 pe_warn(pe, "No DMA32 resources available\n");
855 continue; 855 continue;
856 } 856 }
857 segs = 1; 857 segs = 1;
858 if (residual) { 858 if (residual) {
859 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 859 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
860 if (segs > remaining) 860 if (segs > remaining)
861 segs = remaining; 861 segs = remaining;
862 } 862 }
863 863
864 /* 864 /*
865 * For IODA2 compliant PHB3, we needn't care about the weight. 865 * For IODA2 compliant PHB3, we needn't care about the weight.
866 * The all available 32-bits DMA space will be assigned to 866 * The all available 32-bits DMA space will be assigned to
867 * the specific PE. 867 * the specific PE.
868 */ 868 */
869 if (phb->type == PNV_PHB_IODA1) { 869 if (phb->type == PNV_PHB_IODA1) {
870 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 870 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
871 pe->dma_weight, segs); 871 pe->dma_weight, segs);
872 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 872 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
873 } else { 873 } else {
874 pe_info(pe, "Assign DMA32 space\n"); 874 pe_info(pe, "Assign DMA32 space\n");
875 segs = 0; 875 segs = 0;
876 pnv_pci_ioda2_setup_dma_pe(phb, pe); 876 pnv_pci_ioda2_setup_dma_pe(phb, pe);
877 } 877 }
878 878
879 remaining -= segs; 879 remaining -= segs;
880 base += segs; 880 base += segs;
881 } 881 }
882 } 882 }
883 883
884 #ifdef CONFIG_PCI_MSI 884 #ifdef CONFIG_PCI_MSI
885 static void pnv_ioda2_msi_eoi(struct irq_data *d) 885 static void pnv_ioda2_msi_eoi(struct irq_data *d)
886 { 886 {
887 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 887 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
888 struct irq_chip *chip = irq_data_get_irq_chip(d); 888 struct irq_chip *chip = irq_data_get_irq_chip(d);
889 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 889 struct pnv_phb *phb = container_of(chip, struct pnv_phb,
890 ioda.irq_chip); 890 ioda.irq_chip);
891 int64_t rc; 891 int64_t rc;
892 892
893 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 893 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
894 WARN_ON_ONCE(rc); 894 WARN_ON_ONCE(rc);
895 895
896 icp_native_eoi(d); 896 icp_native_eoi(d);
897 } 897 }
898 898
899 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 899 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
900 unsigned int hwirq, unsigned int virq, 900 unsigned int hwirq, unsigned int virq,
901 unsigned int is_64, struct msi_msg *msg) 901 unsigned int is_64, struct msi_msg *msg)
902 { 902 {
903 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 903 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
904 struct pci_dn *pdn = pci_get_pdn(dev); 904 struct pci_dn *pdn = pci_get_pdn(dev);
905 struct irq_data *idata; 905 struct irq_data *idata;
906 struct irq_chip *ichip; 906 struct irq_chip *ichip;
907 unsigned int xive_num = hwirq - phb->msi_base; 907 unsigned int xive_num = hwirq - phb->msi_base;
908 __be32 data; 908 __be32 data;
909 int rc; 909 int rc;
910 910
911 /* No PE assigned ? bail out ... no MSI for you ! */ 911 /* No PE assigned ? bail out ... no MSI for you ! */
912 if (pe == NULL) 912 if (pe == NULL)
913 return -ENXIO; 913 return -ENXIO;
914 914
915 /* Check if we have an MVE */ 915 /* Check if we have an MVE */
916 if (pe->mve_number < 0) 916 if (pe->mve_number < 0)
917 return -ENXIO; 917 return -ENXIO;
918 918
919 /* Force 32-bit MSI on some broken devices */ 919 /* Force 32-bit MSI on some broken devices */
920 if (pdn && pdn->force_32bit_msi) 920 if (pdn && pdn->force_32bit_msi)
921 is_64 = 0; 921 is_64 = 0;
922 922
923 /* Assign XIVE to PE */ 923 /* Assign XIVE to PE */
924 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 924 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
925 if (rc) { 925 if (rc) {
926 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 926 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
927 pci_name(dev), rc, xive_num); 927 pci_name(dev), rc, xive_num);
928 return -EIO; 928 return -EIO;
929 } 929 }
930 930
931 if (is_64) { 931 if (is_64) {
932 __be64 addr64; 932 __be64 addr64;
933 933
934 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 934 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
935 &addr64, &data); 935 &addr64, &data);
936 if (rc) { 936 if (rc) {
937 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 937 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
938 pci_name(dev), rc); 938 pci_name(dev), rc);
939 return -EIO; 939 return -EIO;
940 } 940 }
941 msg->address_hi = be64_to_cpu(addr64) >> 32; 941 msg->address_hi = be64_to_cpu(addr64) >> 32;
942 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 942 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
943 } else { 943 } else {
944 __be32 addr32; 944 __be32 addr32;
945 945
946 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 946 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
947 &addr32, &data); 947 &addr32, &data);
948 if (rc) { 948 if (rc) {
949 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 949 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
950 pci_name(dev), rc); 950 pci_name(dev), rc);
951 return -EIO; 951 return -EIO;
952 } 952 }
953 msg->address_hi = 0; 953 msg->address_hi = 0;
954 msg->address_lo = be32_to_cpu(addr32); 954 msg->address_lo = be32_to_cpu(addr32);
955 } 955 }
956 msg->data = be32_to_cpu(data); 956 msg->data = be32_to_cpu(data);
957 957
958 /* 958 /*
959 * Change the IRQ chip for the MSI interrupts on PHB3. 959 * Change the IRQ chip for the MSI interrupts on PHB3.
960 * The corresponding IRQ chip should be populated for 960 * The corresponding IRQ chip should be populated for
961 * the first time. 961 * the first time.
962 */ 962 */
963 if (phb->type == PNV_PHB_IODA2) { 963 if (phb->type == PNV_PHB_IODA2) {
964 if (!phb->ioda.irq_chip_init) { 964 if (!phb->ioda.irq_chip_init) {
965 idata = irq_get_irq_data(virq); 965 idata = irq_get_irq_data(virq);
966 ichip = irq_data_get_irq_chip(idata); 966 ichip = irq_data_get_irq_chip(idata);
967 phb->ioda.irq_chip_init = 1; 967 phb->ioda.irq_chip_init = 1;
968 phb->ioda.irq_chip = *ichip; 968 phb->ioda.irq_chip = *ichip;
969 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 969 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
970 } 970 }
971 971
972 irq_set_chip(virq, &phb->ioda.irq_chip); 972 irq_set_chip(virq, &phb->ioda.irq_chip);
973 } 973 }
974 974
975 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 975 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
976 " address=%x_%08x data=%x PE# %d\n", 976 " address=%x_%08x data=%x PE# %d\n",
977 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 977 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
978 msg->address_hi, msg->address_lo, data, pe->pe_number); 978 msg->address_hi, msg->address_lo, data, pe->pe_number);
979 979
980 return 0; 980 return 0;
981 } 981 }
982 982
983 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 983 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
984 { 984 {
985 unsigned int count; 985 unsigned int count;
986 const __be32 *prop = of_get_property(phb->hose->dn, 986 const __be32 *prop = of_get_property(phb->hose->dn,
987 "ibm,opal-msi-ranges", NULL); 987 "ibm,opal-msi-ranges", NULL);
988 if (!prop) { 988 if (!prop) {
989 /* BML Fallback */ 989 /* BML Fallback */
990 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 990 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
991 } 991 }
992 if (!prop) 992 if (!prop)
993 return; 993 return;
994 994
995 phb->msi_base = be32_to_cpup(prop); 995 phb->msi_base = be32_to_cpup(prop);
996 count = be32_to_cpup(prop + 1); 996 count = be32_to_cpup(prop + 1);
997 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 997 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
998 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 998 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
999 phb->hose->global_number); 999 phb->hose->global_number);
1000 return; 1000 return;
1001 } 1001 }
1002 1002
1003 phb->msi_setup = pnv_pci_ioda_msi_setup; 1003 phb->msi_setup = pnv_pci_ioda_msi_setup;
1004 phb->msi32_support = 1; 1004 phb->msi32_support = 1;
1005 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1005 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
1006 count, phb->msi_base); 1006 count, phb->msi_base);
1007 } 1007 }
1008 #else 1008 #else
1009 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1009 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
1010 #endif /* CONFIG_PCI_MSI */ 1010 #endif /* CONFIG_PCI_MSI */
1011 1011
1012 /* 1012 /*
1013 * This function is supposed to be called on basis of PE from top 1013 * This function is supposed to be called on basis of PE from top
1014 * to bottom style. So the the I/O or MMIO segment assigned to 1014 * to bottom style. So the the I/O or MMIO segment assigned to
1015 * parent PE could be overrided by its child PEs if necessary. 1015 * parent PE could be overrided by its child PEs if necessary.
1016 */ 1016 */
1017 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1017 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
1018 struct pnv_ioda_pe *pe) 1018 struct pnv_ioda_pe *pe)
1019 { 1019 {
1020 struct pnv_phb *phb = hose->private_data; 1020 struct pnv_phb *phb = hose->private_data;
1021 struct pci_bus_region region; 1021 struct pci_bus_region region;
1022 struct resource *res; 1022 struct resource *res;
1023 int i, index; 1023 int i, index;
1024 int rc; 1024 int rc;
1025 1025
1026 /* 1026 /*
1027 * NOTE: We only care PCI bus based PE for now. For PCI 1027 * NOTE: We only care PCI bus based PE for now. For PCI
1028 * device based PE, for example SRIOV sensitive VF should 1028 * device based PE, for example SRIOV sensitive VF should
1029 * be figured out later. 1029 * be figured out later.
1030 */ 1030 */
1031 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1031 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
1032 1032
1033 pci_bus_for_each_resource(pe->pbus, res, i) { 1033 pci_bus_for_each_resource(pe->pbus, res, i) {
1034 if (!res || !res->flags || 1034 if (!res || !res->flags ||
1035 res->start > res->end) 1035 res->start > res->end)
1036 continue; 1036 continue;
1037 1037
1038 if (res->flags & IORESOURCE_IO) { 1038 if (res->flags & IORESOURCE_IO) {
1039 region.start = res->start - phb->ioda.io_pci_base; 1039 region.start = res->start - phb->ioda.io_pci_base;
1040 region.end = res->end - phb->ioda.io_pci_base; 1040 region.end = res->end - phb->ioda.io_pci_base;
1041 index = region.start / phb->ioda.io_segsize; 1041 index = region.start / phb->ioda.io_segsize;
1042 1042
1043 while (index < phb->ioda.total_pe && 1043 while (index < phb->ioda.total_pe &&
1044 region.start <= region.end) { 1044 region.start <= region.end) {
1045 phb->ioda.io_segmap[index] = pe->pe_number; 1045 phb->ioda.io_segmap[index] = pe->pe_number;
1046 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1046 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
1047 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1047 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
1048 if (rc != OPAL_SUCCESS) { 1048 if (rc != OPAL_SUCCESS) {
1049 pr_err("%s: OPAL error %d when mapping IO " 1049 pr_err("%s: OPAL error %d when mapping IO "
1050 "segment #%d to PE#%d\n", 1050 "segment #%d to PE#%d\n",
1051 __func__, rc, index, pe->pe_number); 1051 __func__, rc, index, pe->pe_number);
1052 break; 1052 break;
1053 } 1053 }
1054 1054
1055 region.start += phb->ioda.io_segsize; 1055 region.start += phb->ioda.io_segsize;
1056 index++; 1056 index++;
1057 } 1057 }
1058 } else if (res->flags & IORESOURCE_MEM) { 1058 } else if (res->flags & IORESOURCE_MEM) {
1059 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to 1059 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
1060 * harden that algorithm when we start supporting M64 1060 * harden that algorithm when we start supporting M64
1061 */ 1061 */
1062 region.start = res->start - 1062 region.start = res->start -
1063 hose->mem_offset[0] - 1063 hose->mem_offset[0] -
1064 phb->ioda.m32_pci_base; 1064 phb->ioda.m32_pci_base;
1065 region.end = res->end - 1065 region.end = res->end -
1066 hose->mem_offset[0] - 1066 hose->mem_offset[0] -
1067 phb->ioda.m32_pci_base; 1067 phb->ioda.m32_pci_base;
1068 index = region.start / phb->ioda.m32_segsize; 1068 index = region.start / phb->ioda.m32_segsize;
1069 1069
1070 while (index < phb->ioda.total_pe && 1070 while (index < phb->ioda.total_pe &&
1071 region.start <= region.end) { 1071 region.start <= region.end) {
1072 phb->ioda.m32_segmap[index] = pe->pe_number; 1072 phb->ioda.m32_segmap[index] = pe->pe_number;
1073 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1073 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
1074 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1074 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
1075 if (rc != OPAL_SUCCESS) { 1075 if (rc != OPAL_SUCCESS) {
1076 pr_err("%s: OPAL error %d when mapping M32 " 1076 pr_err("%s: OPAL error %d when mapping M32 "
1077 "segment#%d to PE#%d", 1077 "segment#%d to PE#%d",
1078 __func__, rc, index, pe->pe_number); 1078 __func__, rc, index, pe->pe_number);
1079 break; 1079 break;
1080 } 1080 }
1081 1081
1082 region.start += phb->ioda.m32_segsize; 1082 region.start += phb->ioda.m32_segsize;
1083 index++; 1083 index++;
1084 } 1084 }
1085 } 1085 }
1086 } 1086 }
1087 } 1087 }
1088 1088
1089 static void pnv_pci_ioda_setup_seg(void) 1089 static void pnv_pci_ioda_setup_seg(void)
1090 { 1090 {
1091 struct pci_controller *tmp, *hose; 1091 struct pci_controller *tmp, *hose;
1092 struct pnv_phb *phb; 1092 struct pnv_phb *phb;
1093 struct pnv_ioda_pe *pe; 1093 struct pnv_ioda_pe *pe;
1094 1094
1095 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1095 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1096 phb = hose->private_data; 1096 phb = hose->private_data;
1097 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1097 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
1098 pnv_ioda_setup_pe_seg(hose, pe); 1098 pnv_ioda_setup_pe_seg(hose, pe);
1099 } 1099 }
1100 } 1100 }
1101 } 1101 }
1102 1102
1103 static void pnv_pci_ioda_setup_DMA(void) 1103 static void pnv_pci_ioda_setup_DMA(void)
1104 { 1104 {
1105 struct pci_controller *hose, *tmp; 1105 struct pci_controller *hose, *tmp;
1106 struct pnv_phb *phb; 1106 struct pnv_phb *phb;
1107 1107
1108 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1108 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1109 pnv_ioda_setup_dma(hose->private_data); 1109 pnv_ioda_setup_dma(hose->private_data);
1110 1110
1111 /* Mark the PHB initialization done */ 1111 /* Mark the PHB initialization done */
1112 phb = hose->private_data; 1112 phb = hose->private_data;
1113 phb->initialized = 1; 1113 phb->initialized = 1;
1114 } 1114 }
1115 } 1115 }
1116 1116
1117 static void pnv_pci_ioda_create_dbgfs(void) 1117 static void pnv_pci_ioda_create_dbgfs(void)
1118 { 1118 {
1119 #ifdef CONFIG_DEBUG_FS 1119 #ifdef CONFIG_DEBUG_FS
1120 struct pci_controller *hose, *tmp; 1120 struct pci_controller *hose, *tmp;
1121 struct pnv_phb *phb; 1121 struct pnv_phb *phb;
1122 char name[16]; 1122 char name[16];
1123 1123
1124 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1124 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1125 phb = hose->private_data; 1125 phb = hose->private_data;
1126 1126
1127 sprintf(name, "PCI%04x", hose->global_number); 1127 sprintf(name, "PCI%04x", hose->global_number);
1128 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1128 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
1129 if (!phb->dbgfs) 1129 if (!phb->dbgfs)
1130 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1130 pr_warning("%s: Error on creating debugfs on PHB#%x\n",
1131 __func__, hose->global_number); 1131 __func__, hose->global_number);
1132 } 1132 }
1133 #endif /* CONFIG_DEBUG_FS */ 1133 #endif /* CONFIG_DEBUG_FS */
1134 } 1134 }
1135 1135
1136 static void pnv_pci_ioda_fixup(void) 1136 static void pnv_pci_ioda_fixup(void)
1137 { 1137 {
1138 pnv_pci_ioda_setup_PEs(); 1138 pnv_pci_ioda_setup_PEs();
1139 pnv_pci_ioda_setup_seg(); 1139 pnv_pci_ioda_setup_seg();
1140 pnv_pci_ioda_setup_DMA(); 1140 pnv_pci_ioda_setup_DMA();
1141 1141
1142 pnv_pci_ioda_create_dbgfs(); 1142 pnv_pci_ioda_create_dbgfs();
1143 1143
1144 #ifdef CONFIG_EEH 1144 #ifdef CONFIG_EEH
1145 eeh_probe_mode_set(EEH_PROBE_MODE_DEV); 1145 eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
1146 eeh_addr_cache_build(); 1146 eeh_addr_cache_build();
1147 eeh_init(); 1147 eeh_init();
1148 #endif 1148 #endif
1149 } 1149 }
1150 1150
1151 /* 1151 /*
1152 * Returns the alignment for I/O or memory windows for P2P 1152 * Returns the alignment for I/O or memory windows for P2P
1153 * bridges. That actually depends on how PEs are segmented. 1153 * bridges. That actually depends on how PEs are segmented.
1154 * For now, we return I/O or M32 segment size for PE sensitive 1154 * For now, we return I/O or M32 segment size for PE sensitive
1155 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1155 * P2P bridges. Otherwise, the default values (4KiB for I/O,
1156 * 1MiB for memory) will be returned. 1156 * 1MiB for memory) will be returned.
1157 * 1157 *
1158 * The current PCI bus might be put into one PE, which was 1158 * The current PCI bus might be put into one PE, which was
1159 * create against the parent PCI bridge. For that case, we 1159 * create against the parent PCI bridge. For that case, we
1160 * needn't enlarge the alignment so that we can save some 1160 * needn't enlarge the alignment so that we can save some
1161 * resources. 1161 * resources.
1162 */ 1162 */
1163 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1163 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
1164 unsigned long type) 1164 unsigned long type)
1165 { 1165 {
1166 struct pci_dev *bridge; 1166 struct pci_dev *bridge;
1167 struct pci_controller *hose = pci_bus_to_host(bus); 1167 struct pci_controller *hose = pci_bus_to_host(bus);
1168 struct pnv_phb *phb = hose->private_data; 1168 struct pnv_phb *phb = hose->private_data;
1169 int num_pci_bridges = 0; 1169 int num_pci_bridges = 0;
1170 1170
1171 bridge = bus->self; 1171 bridge = bus->self;
1172 while (bridge) { 1172 while (bridge) {
1173 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1173 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
1174 num_pci_bridges++; 1174 num_pci_bridges++;
1175 if (num_pci_bridges >= 2) 1175 if (num_pci_bridges >= 2)
1176 return 1; 1176 return 1;
1177 } 1177 }
1178 1178
1179 bridge = bridge->bus->self; 1179 bridge = bridge->bus->self;
1180 } 1180 }
1181 1181
1182 /* We need support prefetchable memory window later */ 1182 /* We need support prefetchable memory window later */
1183 if (type & IORESOURCE_MEM) 1183 if (type & IORESOURCE_MEM)
1184 return phb->ioda.m32_segsize; 1184 return phb->ioda.m32_segsize;
1185 1185
1186 return phb->ioda.io_segsize; 1186 return phb->ioda.io_segsize;
1187 } 1187 }
1188 1188
1189 /* Prevent enabling devices for which we couldn't properly 1189 /* Prevent enabling devices for which we couldn't properly
1190 * assign a PE 1190 * assign a PE
1191 */ 1191 */
1192 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1192 static int pnv_pci_enable_device_hook(struct pci_dev *dev)
1193 { 1193 {
1194 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1194 struct pci_controller *hose = pci_bus_to_host(dev->bus);
1195 struct pnv_phb *phb = hose->private_data; 1195 struct pnv_phb *phb = hose->private_data;
1196 struct pci_dn *pdn; 1196 struct pci_dn *pdn;
1197 1197
1198 /* The function is probably called while the PEs have 1198 /* The function is probably called while the PEs have
1199 * not be created yet. For example, resource reassignment 1199 * not be created yet. For example, resource reassignment
1200 * during PCI probe period. We just skip the check if 1200 * during PCI probe period. We just skip the check if
1201 * PEs isn't ready. 1201 * PEs isn't ready.
1202 */ 1202 */
1203 if (!phb->initialized) 1203 if (!phb->initialized)
1204 return 0; 1204 return 0;
1205 1205
1206 pdn = pci_get_pdn(dev); 1206 pdn = pci_get_pdn(dev);
1207 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1207 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1208 return -EINVAL; 1208 return -EINVAL;
1209 1209
1210 return 0; 1210 return 0;
1211 } 1211 }
1212 1212
1213 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1213 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1214 u32 devfn) 1214 u32 devfn)
1215 { 1215 {
1216 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1216 return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1217 } 1217 }
1218 1218
1219 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1219 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
1220 { 1220 {
1221 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, 1221 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
1222 OPAL_ASSERT_RESET); 1222 OPAL_ASSERT_RESET);
1223 } 1223 }
1224 1224
1225 void __init pnv_pci_init_ioda_phb(struct device_node *np, 1225 void __init pnv_pci_init_ioda_phb(struct device_node *np,
1226 u64 hub_id, int ioda_type) 1226 u64 hub_id, int ioda_type)
1227 { 1227 {
1228 struct pci_controller *hose; 1228 struct pci_controller *hose;
1229 struct pnv_phb *phb; 1229 struct pnv_phb *phb;
1230 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1230 unsigned long size, m32map_off, pemap_off, iomap_off = 0;
1231 const __be64 *prop64; 1231 const __be64 *prop64;
1232 const __be32 *prop32; 1232 const __be32 *prop32;
1233 int len; 1233 int len;
1234 u64 phb_id; 1234 u64 phb_id;
1235 void *aux; 1235 void *aux;
1236 long rc; 1236 long rc;
1237 1237
1238 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1238 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
1239 1239
1240 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1240 prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1241 if (!prop64) { 1241 if (!prop64) {
1242 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1242 pr_err(" Missing \"ibm,opal-phbid\" property !\n");
1243 return; 1243 return;
1244 } 1244 }
1245 phb_id = be64_to_cpup(prop64); 1245 phb_id = be64_to_cpup(prop64);
1246 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1246 pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
1247 1247
1248 phb = alloc_bootmem(sizeof(struct pnv_phb)); 1248 phb = alloc_bootmem(sizeof(struct pnv_phb));
1249 if (!phb) { 1249 if (!phb) {
1250 pr_err(" Out of memory !\n"); 1250 pr_err(" Out of memory !\n");
1251 return; 1251 return;
1252 } 1252 }
1253 1253
1254 /* Allocate PCI controller */ 1254 /* Allocate PCI controller */
1255 memset(phb, 0, sizeof(struct pnv_phb)); 1255 memset(phb, 0, sizeof(struct pnv_phb));
1256 phb->hose = hose = pcibios_alloc_controller(np); 1256 phb->hose = hose = pcibios_alloc_controller(np);
1257 if (!phb->hose) { 1257 if (!phb->hose) {
1258 pr_err(" Can't allocate PCI controller for %s\n", 1258 pr_err(" Can't allocate PCI controller for %s\n",
1259 np->full_name); 1259 np->full_name);
1260 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); 1260 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
1261 return; 1261 return;
1262 } 1262 }
1263 1263
1264 spin_lock_init(&phb->lock); 1264 spin_lock_init(&phb->lock);
1265 prop32 = of_get_property(np, "bus-range", &len); 1265 prop32 = of_get_property(np, "bus-range", &len);
1266 if (prop32 && len == 8) { 1266 if (prop32 && len == 8) {
1267 hose->first_busno = be32_to_cpu(prop32[0]); 1267 hose->first_busno = be32_to_cpu(prop32[0]);
1268 hose->last_busno = be32_to_cpu(prop32[1]); 1268 hose->last_busno = be32_to_cpu(prop32[1]);
1269 } else { 1269 } else {
1270 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1270 pr_warn(" Broken <bus-range> on %s\n", np->full_name);
1271 hose->first_busno = 0; 1271 hose->first_busno = 0;
1272 hose->last_busno = 0xff; 1272 hose->last_busno = 0xff;
1273 } 1273 }
1274 hose->private_data = phb; 1274 hose->private_data = phb;
1275 phb->hub_id = hub_id; 1275 phb->hub_id = hub_id;
1276 phb->opal_id = phb_id; 1276 phb->opal_id = phb_id;
1277 phb->type = ioda_type; 1277 phb->type = ioda_type;
1278 1278
1279 /* Detect specific models for error handling */ 1279 /* Detect specific models for error handling */
1280 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1280 if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
1281 phb->model = PNV_PHB_MODEL_P7IOC; 1281 phb->model = PNV_PHB_MODEL_P7IOC;
1282 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1282 else if (of_device_is_compatible(np, "ibm,power8-pciex"))
1283 phb->model = PNV_PHB_MODEL_PHB3; 1283 phb->model = PNV_PHB_MODEL_PHB3;
1284 else 1284 else
1285 phb->model = PNV_PHB_MODEL_UNKNOWN; 1285 phb->model = PNV_PHB_MODEL_UNKNOWN;
1286 1286
1287 /* Parse 32-bit and IO ranges (if any) */ 1287 /* Parse 32-bit and IO ranges (if any) */
1288 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 1288 pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
1289 1289
1290 /* Get registers */ 1290 /* Get registers */
1291 phb->regs = of_iomap(np, 0); 1291 phb->regs = of_iomap(np, 0);
1292 if (phb->regs == NULL) 1292 if (phb->regs == NULL)
1293 pr_err(" Failed to map registers !\n"); 1293 pr_err(" Failed to map registers !\n");
1294 1294
1295 /* Initialize more IODA stuff */ 1295 /* Initialize more IODA stuff */
1296 phb->ioda.total_pe = 1; 1296 phb->ioda.total_pe = 1;
1297 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1297 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
1298 if (prop32) 1298 if (prop32)
1299 phb->ioda.total_pe = be32_to_cpup(prop32); 1299 phb->ioda.total_pe = be32_to_cpup(prop32);
1300 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 1300 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
1301 if (prop32) 1301 if (prop32)
1302 phb->ioda.reserved_pe = be32_to_cpup(prop32); 1302 phb->ioda.reserved_pe = be32_to_cpup(prop32);
1303 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1303 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1304 /* FW Has already off top 64k of M32 space (MSI space) */ 1304 /* FW Has already off top 64k of M32 space (MSI space) */
1305 phb->ioda.m32_size += 0x10000; 1305 phb->ioda.m32_size += 0x10000;
1306 1306
1307 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 1307 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1308 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 1308 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
1309 phb->ioda.io_size = hose->pci_io_size; 1309 phb->ioda.io_size = hose->pci_io_size;
1310 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 1310 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1311 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 1311 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1312 1312
1313 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 1313 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
1314 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 1314 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1315 m32map_off = size; 1315 m32map_off = size;
1316 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 1316 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
1317 if (phb->type == PNV_PHB_IODA1) { 1317 if (phb->type == PNV_PHB_IODA1) {
1318 iomap_off = size; 1318 iomap_off = size;
1319 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 1319 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
1320 } 1320 }
1321 pemap_off = size; 1321 pemap_off = size;
1322 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 1322 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1323 aux = alloc_bootmem(size); 1323 aux = alloc_bootmem(size);
1324 memset(aux, 0, size); 1324 memset(aux, 0, size);
1325 phb->ioda.pe_alloc = aux; 1325 phb->ioda.pe_alloc = aux;
1326 phb->ioda.m32_segmap = aux + m32map_off; 1326 phb->ioda.m32_segmap = aux + m32map_off;
1327 if (phb->type == PNV_PHB_IODA1) 1327 if (phb->type == PNV_PHB_IODA1)
1328 phb->ioda.io_segmap = aux + iomap_off; 1328 phb->ioda.io_segmap = aux + iomap_off;
1329 phb->ioda.pe_array = aux + pemap_off; 1329 phb->ioda.pe_array = aux + pemap_off;
1330 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 1330 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
1331 1331
1332 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 1332 INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
1333 INIT_LIST_HEAD(&phb->ioda.pe_list); 1333 INIT_LIST_HEAD(&phb->ioda.pe_list);
1334 1334
1335 /* Calculate how many 32-bit TCE segments we have */ 1335 /* Calculate how many 32-bit TCE segments we have */
1336 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 1336 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1337 1337
1338 /* Clear unusable m64 */ 1338 /* Clear unusable m64 */
1339 hose->mem_resources[1].flags = 0; 1339 hose->mem_resources[1].flags = 0;
1340 hose->mem_resources[1].start = 0; 1340 hose->mem_resources[1].start = 0;
1341 hose->mem_resources[1].end = 0; 1341 hose->mem_resources[1].end = 0;
1342 hose->mem_resources[2].flags = 0; 1342 hose->mem_resources[2].flags = 0;
1343 hose->mem_resources[2].start = 0; 1343 hose->mem_resources[2].start = 0;
1344 hose->mem_resources[2].end = 0; 1344 hose->mem_resources[2].end = 0;
1345 1345
1346 #if 0 /* We should really do that ... */ 1346 #if 0 /* We should really do that ... */
1347 rc = opal_pci_set_phb_mem_window(opal->phb_id, 1347 rc = opal_pci_set_phb_mem_window(opal->phb_id,
1348 window_type, 1348 window_type,
1349 window_num, 1349 window_num,
1350 starting_real_address, 1350 starting_real_address,
1351 starting_pci_address, 1351 starting_pci_address,
1352 segment_size); 1352 segment_size);
1353 #endif 1353 #endif
1354 1354
1355 pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" 1355 pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]"
1356 " IO: 0x%x [segment=0x%x]\n", 1356 " IO: 0x%x [segment=0x%x]\n",
1357 phb->ioda.total_pe, 1357 phb->ioda.total_pe,
1358 phb->ioda.reserved_pe, 1358 phb->ioda.reserved_pe,
1359 phb->ioda.m32_size, phb->ioda.m32_segsize, 1359 phb->ioda.m32_size, phb->ioda.m32_segsize,
1360 phb->ioda.io_size, phb->ioda.io_segsize); 1360 phb->ioda.io_size, phb->ioda.io_segsize);
1361 1361
1362 phb->hose->ops = &pnv_pci_ops; 1362 phb->hose->ops = &pnv_pci_ops;
1363 #ifdef CONFIG_EEH 1363 #ifdef CONFIG_EEH
1364 phb->eeh_ops = &ioda_eeh_ops; 1364 phb->eeh_ops = &ioda_eeh_ops;
1365 #endif 1365 #endif
1366 1366
1367 /* Setup RID -> PE mapping function */ 1367 /* Setup RID -> PE mapping function */
1368 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1368 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1369 1369
1370 /* Setup TCEs */ 1370 /* Setup TCEs */
1371 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1371 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1372 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 1372 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
1373 1373
1374 /* Setup shutdown function for kexec */ 1374 /* Setup shutdown function for kexec */
1375 phb->shutdown = pnv_pci_ioda_shutdown; 1375 phb->shutdown = pnv_pci_ioda_shutdown;
1376 1376
1377 /* Setup MSI support */ 1377 /* Setup MSI support */
1378 pnv_pci_init_ioda_msis(phb); 1378 pnv_pci_init_ioda_msis(phb);
1379 1379
1380 /* 1380 /*
1381 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1381 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
1382 * to let the PCI core do resource assignment. It's supposed 1382 * to let the PCI core do resource assignment. It's supposed
1383 * that the PCI core will do correct I/O and MMIO alignment 1383 * that the PCI core will do correct I/O and MMIO alignment
1384 * for the P2P bridge bars so that each PCI bus (excluding 1384 * for the P2P bridge bars so that each PCI bus (excluding
1385 * the child P2P bridges) can form individual PE. 1385 * the child P2P bridges) can form individual PE.
1386 */ 1386 */
1387 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1387 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
1388 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1388 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1389 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1389 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
1390 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1390 pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1391 1391
1392 /* Reset IODA tables to a clean state */ 1392 /* Reset IODA tables to a clean state */
1393 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1393 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1394 if (rc) 1394 if (rc)
1395 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1395 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
1396 } 1396 }
1397 1397
1398 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 1398 void __init pnv_pci_init_ioda2_phb(struct device_node *np)
1399 { 1399 {
1400 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 1400 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
1401 } 1401 }
1402 1402
1403 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1403 void __init pnv_pci_init_ioda_hub(struct device_node *np)
1404 { 1404 {
1405 struct device_node *phbn; 1405 struct device_node *phbn;
1406 const __be64 *prop64; 1406 const __be64 *prop64;
1407 u64 hub_id; 1407 u64 hub_id;
1408 1408
1409 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1409 pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1410 1410
1411 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1411 prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1412 if (!prop64) { 1412 if (!prop64) {
1413 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1413 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1414 return; 1414 return;
1415 } 1415 }
1416 hub_id = be64_to_cpup(prop64); 1416 hub_id = be64_to_cpup(prop64);
1417 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1417 pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1418 1418
1419 /* Count child PHBs */ 1419 /* Count child PHBs */
1420 for_each_child_of_node(np, phbn) { 1420 for_each_child_of_node(np, phbn) {
1421 /* Look for IODA1 PHBs */ 1421 /* Look for IODA1 PHBs */
1422 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1422 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1423 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 1423 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
1424 } 1424 }
1425 } 1425 }
1426 1426