Commit 361f2a2a1536a1d7ff6f52bf0e4848c1441e17ab

Authored by Gavin Shan
Committed by Benjamin Herrenschmidt
1 parent d92a208d08

powrpc/powernv: Reset PHB in kdump kernel

In the kdump scenario, the first kerenl doesn't shutdown PCI devices
and the kdump kerenl clean PHB IODA table at the early probe time.
That means the kdump kerenl can't support PCI transactions piled
by the first kerenl. Otherwise, lots of EEH errors and frozen PEs
will be detected.

In order to avoid the EEH errors, the PHB is resetted to drop all
PCI transaction from the first kerenl.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Showing 3 changed files with 24 additions and 4 deletions Inline Diff

arch/powerpc/platforms/powernv/eeh-ioda.c
1 /* 1 /*
2 * The file intends to implement the functions needed by EEH, which is 2 * The file intends to implement the functions needed by EEH, which is
3 * built on IODA compliant chip. Actually, lots of functions related 3 * built on IODA compliant chip. Actually, lots of functions related
4 * to EEH would be built based on the OPAL APIs. 4 * to EEH would be built based on the OPAL APIs.
5 * 5 *
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. 6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or 10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version. 11 * (at your option) any later version.
12 */ 12 */
13 13
14 #include <linux/bootmem.h> 14 #include <linux/bootmem.h>
15 #include <linux/debugfs.h> 15 #include <linux/debugfs.h>
16 #include <linux/delay.h> 16 #include <linux/delay.h>
17 #include <linux/io.h> 17 #include <linux/io.h>
18 #include <linux/irq.h> 18 #include <linux/irq.h>
19 #include <linux/kernel.h> 19 #include <linux/kernel.h>
20 #include <linux/msi.h> 20 #include <linux/msi.h>
21 #include <linux/notifier.h> 21 #include <linux/notifier.h>
22 #include <linux/pci.h> 22 #include <linux/pci.h>
23 #include <linux/string.h> 23 #include <linux/string.h>
24 24
25 #include <asm/eeh.h> 25 #include <asm/eeh.h>
26 #include <asm/eeh_event.h> 26 #include <asm/eeh_event.h>
27 #include <asm/io.h> 27 #include <asm/io.h>
28 #include <asm/iommu.h> 28 #include <asm/iommu.h>
29 #include <asm/msi_bitmap.h> 29 #include <asm/msi_bitmap.h>
30 #include <asm/opal.h> 30 #include <asm/opal.h>
31 #include <asm/pci-bridge.h> 31 #include <asm/pci-bridge.h>
32 #include <asm/ppc-pci.h> 32 #include <asm/ppc-pci.h>
33 #include <asm/tce.h> 33 #include <asm/tce.h>
34 34
35 #include "powernv.h" 35 #include "powernv.h"
36 #include "pci.h" 36 #include "pci.h"
37 37
38 static int ioda_eeh_nb_init = 0; 38 static int ioda_eeh_nb_init = 0;
39 39
40 static int ioda_eeh_event(struct notifier_block *nb, 40 static int ioda_eeh_event(struct notifier_block *nb,
41 unsigned long events, void *change) 41 unsigned long events, void *change)
42 { 42 {
43 uint64_t changed_evts = (uint64_t)change; 43 uint64_t changed_evts = (uint64_t)change;
44 44
45 /* 45 /*
46 * We simply send special EEH event if EEH has 46 * We simply send special EEH event if EEH has
47 * been enabled, or clear pending events in 47 * been enabled, or clear pending events in
48 * case that we enable EEH soon 48 * case that we enable EEH soon
49 */ 49 */
50 if (!(changed_evts & OPAL_EVENT_PCI_ERROR) || 50 if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
51 !(events & OPAL_EVENT_PCI_ERROR)) 51 !(events & OPAL_EVENT_PCI_ERROR))
52 return 0; 52 return 0;
53 53
54 if (eeh_enabled()) 54 if (eeh_enabled())
55 eeh_send_failure_event(NULL); 55 eeh_send_failure_event(NULL);
56 else 56 else
57 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); 57 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
58 58
59 return 0; 59 return 0;
60 } 60 }
61 61
62 static struct notifier_block ioda_eeh_nb = { 62 static struct notifier_block ioda_eeh_nb = {
63 .notifier_call = ioda_eeh_event, 63 .notifier_call = ioda_eeh_event,
64 .next = NULL, 64 .next = NULL,
65 .priority = 0 65 .priority = 0
66 }; 66 };
67 67
68 #ifdef CONFIG_DEBUG_FS 68 #ifdef CONFIG_DEBUG_FS
69 static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) 69 static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
70 { 70 {
71 struct pci_controller *hose = data; 71 struct pci_controller *hose = data;
72 struct pnv_phb *phb = hose->private_data; 72 struct pnv_phb *phb = hose->private_data;
73 73
74 out_be64(phb->regs + offset, val); 74 out_be64(phb->regs + offset, val);
75 return 0; 75 return 0;
76 } 76 }
77 77
78 static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val) 78 static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val)
79 { 79 {
80 struct pci_controller *hose = data; 80 struct pci_controller *hose = data;
81 struct pnv_phb *phb = hose->private_data; 81 struct pnv_phb *phb = hose->private_data;
82 82
83 *val = in_be64(phb->regs + offset); 83 *val = in_be64(phb->regs + offset);
84 return 0; 84 return 0;
85 } 85 }
86 86
87 static int ioda_eeh_outb_dbgfs_set(void *data, u64 val) 87 static int ioda_eeh_outb_dbgfs_set(void *data, u64 val)
88 { 88 {
89 return ioda_eeh_dbgfs_set(data, 0xD10, val); 89 return ioda_eeh_dbgfs_set(data, 0xD10, val);
90 } 90 }
91 91
92 static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val) 92 static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val)
93 { 93 {
94 return ioda_eeh_dbgfs_get(data, 0xD10, val); 94 return ioda_eeh_dbgfs_get(data, 0xD10, val);
95 } 95 }
96 96
97 static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val) 97 static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val)
98 { 98 {
99 return ioda_eeh_dbgfs_set(data, 0xD90, val); 99 return ioda_eeh_dbgfs_set(data, 0xD90, val);
100 } 100 }
101 101
102 static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val) 102 static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val)
103 { 103 {
104 return ioda_eeh_dbgfs_get(data, 0xD90, val); 104 return ioda_eeh_dbgfs_get(data, 0xD90, val);
105 } 105 }
106 106
107 static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val) 107 static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val)
108 { 108 {
109 return ioda_eeh_dbgfs_set(data, 0xE10, val); 109 return ioda_eeh_dbgfs_set(data, 0xE10, val);
110 } 110 }
111 111
112 static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val) 112 static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val)
113 { 113 {
114 return ioda_eeh_dbgfs_get(data, 0xE10, val); 114 return ioda_eeh_dbgfs_get(data, 0xE10, val);
115 } 115 }
116 116
117 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get, 117 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get,
118 ioda_eeh_outb_dbgfs_set, "0x%llx\n"); 118 ioda_eeh_outb_dbgfs_set, "0x%llx\n");
119 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get, 119 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get,
120 ioda_eeh_inbA_dbgfs_set, "0x%llx\n"); 120 ioda_eeh_inbA_dbgfs_set, "0x%llx\n");
121 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get, 121 DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
122 ioda_eeh_inbB_dbgfs_set, "0x%llx\n"); 122 ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
123 #endif /* CONFIG_DEBUG_FS */ 123 #endif /* CONFIG_DEBUG_FS */
124 124
125 125
126 /** 126 /**
127 * ioda_eeh_post_init - Chip dependent post initialization 127 * ioda_eeh_post_init - Chip dependent post initialization
128 * @hose: PCI controller 128 * @hose: PCI controller
129 * 129 *
130 * The function will be called after eeh PEs and devices 130 * The function will be called after eeh PEs and devices
131 * have been built. That means the EEH is ready to supply 131 * have been built. That means the EEH is ready to supply
132 * service with I/O cache. 132 * service with I/O cache.
133 */ 133 */
134 static int ioda_eeh_post_init(struct pci_controller *hose) 134 static int ioda_eeh_post_init(struct pci_controller *hose)
135 { 135 {
136 struct pnv_phb *phb = hose->private_data; 136 struct pnv_phb *phb = hose->private_data;
137 int ret; 137 int ret;
138 138
139 /* Register OPAL event notifier */ 139 /* Register OPAL event notifier */
140 if (!ioda_eeh_nb_init) { 140 if (!ioda_eeh_nb_init) {
141 ret = opal_notifier_register(&ioda_eeh_nb); 141 ret = opal_notifier_register(&ioda_eeh_nb);
142 if (ret) { 142 if (ret) {
143 pr_err("%s: Can't register OPAL event notifier (%d)\n", 143 pr_err("%s: Can't register OPAL event notifier (%d)\n",
144 __func__, ret); 144 __func__, ret);
145 return ret; 145 return ret;
146 } 146 }
147 147
148 ioda_eeh_nb_init = 1; 148 ioda_eeh_nb_init = 1;
149 } 149 }
150 150
151 #ifdef CONFIG_DEBUG_FS 151 #ifdef CONFIG_DEBUG_FS
152 if (!phb->has_dbgfs && phb->dbgfs) { 152 if (!phb->has_dbgfs && phb->dbgfs) {
153 phb->has_dbgfs = 1; 153 phb->has_dbgfs = 1;
154 154
155 debugfs_create_file("err_injct_outbound", 0600, 155 debugfs_create_file("err_injct_outbound", 0600,
156 phb->dbgfs, hose, 156 phb->dbgfs, hose,
157 &ioda_eeh_outb_dbgfs_ops); 157 &ioda_eeh_outb_dbgfs_ops);
158 debugfs_create_file("err_injct_inboundA", 0600, 158 debugfs_create_file("err_injct_inboundA", 0600,
159 phb->dbgfs, hose, 159 phb->dbgfs, hose,
160 &ioda_eeh_inbA_dbgfs_ops); 160 &ioda_eeh_inbA_dbgfs_ops);
161 debugfs_create_file("err_injct_inboundB", 0600, 161 debugfs_create_file("err_injct_inboundB", 0600,
162 phb->dbgfs, hose, 162 phb->dbgfs, hose,
163 &ioda_eeh_inbB_dbgfs_ops); 163 &ioda_eeh_inbB_dbgfs_ops);
164 } 164 }
165 #endif 165 #endif
166 166
167 /* If EEH is enabled, we're going to rely on that. 167 /* If EEH is enabled, we're going to rely on that.
168 * Otherwise, we restore to conventional mechanism 168 * Otherwise, we restore to conventional mechanism
169 * to clear frozen PE during PCI config access. 169 * to clear frozen PE during PCI config access.
170 */ 170 */
171 if (eeh_enabled()) 171 if (eeh_enabled())
172 phb->flags |= PNV_PHB_FLAG_EEH; 172 phb->flags |= PNV_PHB_FLAG_EEH;
173 else 173 else
174 phb->flags &= ~PNV_PHB_FLAG_EEH; 174 phb->flags &= ~PNV_PHB_FLAG_EEH;
175 175
176 return 0; 176 return 0;
177 } 177 }
178 178
179 /** 179 /**
180 * ioda_eeh_set_option - Set EEH operation or I/O setting 180 * ioda_eeh_set_option - Set EEH operation or I/O setting
181 * @pe: EEH PE 181 * @pe: EEH PE
182 * @option: options 182 * @option: options
183 * 183 *
184 * Enable or disable EEH option for the indicated PE. The 184 * Enable or disable EEH option for the indicated PE. The
185 * function also can be used to enable I/O or DMA for the 185 * function also can be used to enable I/O or DMA for the
186 * PE. 186 * PE.
187 */ 187 */
188 static int ioda_eeh_set_option(struct eeh_pe *pe, int option) 188 static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
189 { 189 {
190 s64 ret; 190 s64 ret;
191 u32 pe_no; 191 u32 pe_no;
192 struct pci_controller *hose = pe->phb; 192 struct pci_controller *hose = pe->phb;
193 struct pnv_phb *phb = hose->private_data; 193 struct pnv_phb *phb = hose->private_data;
194 194
195 /* Check on PE number */ 195 /* Check on PE number */
196 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { 196 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
197 pr_err("%s: PE address %x out of range [0, %x] " 197 pr_err("%s: PE address %x out of range [0, %x] "
198 "on PHB#%x\n", 198 "on PHB#%x\n",
199 __func__, pe->addr, phb->ioda.total_pe, 199 __func__, pe->addr, phb->ioda.total_pe,
200 hose->global_number); 200 hose->global_number);
201 return -EINVAL; 201 return -EINVAL;
202 } 202 }
203 203
204 pe_no = pe->addr; 204 pe_no = pe->addr;
205 switch (option) { 205 switch (option) {
206 case EEH_OPT_DISABLE: 206 case EEH_OPT_DISABLE:
207 ret = -EEXIST; 207 ret = -EEXIST;
208 break; 208 break;
209 case EEH_OPT_ENABLE: 209 case EEH_OPT_ENABLE:
210 ret = 0; 210 ret = 0;
211 break; 211 break;
212 case EEH_OPT_THAW_MMIO: 212 case EEH_OPT_THAW_MMIO:
213 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, 213 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
214 OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); 214 OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
215 if (ret) { 215 if (ret) {
216 pr_warning("%s: Failed to enable MMIO for " 216 pr_warning("%s: Failed to enable MMIO for "
217 "PHB#%x-PE#%x, err=%lld\n", 217 "PHB#%x-PE#%x, err=%lld\n",
218 __func__, hose->global_number, pe_no, ret); 218 __func__, hose->global_number, pe_no, ret);
219 return -EIO; 219 return -EIO;
220 } 220 }
221 221
222 break; 222 break;
223 case EEH_OPT_THAW_DMA: 223 case EEH_OPT_THAW_DMA:
224 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, 224 ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
225 OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); 225 OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
226 if (ret) { 226 if (ret) {
227 pr_warning("%s: Failed to enable DMA for " 227 pr_warning("%s: Failed to enable DMA for "
228 "PHB#%x-PE#%x, err=%lld\n", 228 "PHB#%x-PE#%x, err=%lld\n",
229 __func__, hose->global_number, pe_no, ret); 229 __func__, hose->global_number, pe_no, ret);
230 return -EIO; 230 return -EIO;
231 } 231 }
232 232
233 break; 233 break;
234 default: 234 default:
235 pr_warning("%s: Invalid option %d\n", __func__, option); 235 pr_warning("%s: Invalid option %d\n", __func__, option);
236 return -EINVAL; 236 return -EINVAL;
237 } 237 }
238 238
239 return ret; 239 return ret;
240 } 240 }
241 241
242 static void ioda_eeh_phb_diag(struct pci_controller *hose) 242 static void ioda_eeh_phb_diag(struct pci_controller *hose)
243 { 243 {
244 struct pnv_phb *phb = hose->private_data; 244 struct pnv_phb *phb = hose->private_data;
245 long rc; 245 long rc;
246 246
247 rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, 247 rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
248 PNV_PCI_DIAG_BUF_SIZE); 248 PNV_PCI_DIAG_BUF_SIZE);
249 if (rc != OPAL_SUCCESS) { 249 if (rc != OPAL_SUCCESS) {
250 pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", 250 pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
251 __func__, hose->global_number, rc); 251 __func__, hose->global_number, rc);
252 return; 252 return;
253 } 253 }
254 254
255 pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); 255 pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
256 } 256 }
257 257
258 /** 258 /**
259 * ioda_eeh_get_state - Retrieve the state of PE 259 * ioda_eeh_get_state - Retrieve the state of PE
260 * @pe: EEH PE 260 * @pe: EEH PE
261 * 261 *
262 * The PE's state should be retrieved from the PEEV, PEST 262 * The PE's state should be retrieved from the PEEV, PEST
263 * IODA tables. Since the OPAL has exported the function 263 * IODA tables. Since the OPAL has exported the function
264 * to do it, it'd better to use that. 264 * to do it, it'd better to use that.
265 */ 265 */
266 static int ioda_eeh_get_state(struct eeh_pe *pe) 266 static int ioda_eeh_get_state(struct eeh_pe *pe)
267 { 267 {
268 s64 ret = 0; 268 s64 ret = 0;
269 u8 fstate; 269 u8 fstate;
270 u16 pcierr; 270 u16 pcierr;
271 u32 pe_no; 271 u32 pe_no;
272 int result; 272 int result;
273 struct pci_controller *hose = pe->phb; 273 struct pci_controller *hose = pe->phb;
274 struct pnv_phb *phb = hose->private_data; 274 struct pnv_phb *phb = hose->private_data;
275 275
276 /* 276 /*
277 * Sanity check on PE address. The PHB PE address should 277 * Sanity check on PE address. The PHB PE address should
278 * be zero. 278 * be zero.
279 */ 279 */
280 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { 280 if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
281 pr_err("%s: PE address %x out of range [0, %x] " 281 pr_err("%s: PE address %x out of range [0, %x] "
282 "on PHB#%x\n", 282 "on PHB#%x\n",
283 __func__, pe->addr, phb->ioda.total_pe, 283 __func__, pe->addr, phb->ioda.total_pe,
284 hose->global_number); 284 hose->global_number);
285 return EEH_STATE_NOT_SUPPORT; 285 return EEH_STATE_NOT_SUPPORT;
286 } 286 }
287 287
288 /* 288 /*
289 * If we're in middle of PE reset, return normal 289 * If we're in middle of PE reset, return normal
290 * state to keep EEH core going. For PHB reset, we 290 * state to keep EEH core going. For PHB reset, we
291 * still expect to have fenced PHB cleared with 291 * still expect to have fenced PHB cleared with
292 * PHB reset. 292 * PHB reset.
293 */ 293 */
294 if (!(pe->type & EEH_PE_PHB) && 294 if (!(pe->type & EEH_PE_PHB) &&
295 (pe->state & EEH_PE_RESET)) { 295 (pe->state & EEH_PE_RESET)) {
296 result = (EEH_STATE_MMIO_ACTIVE | 296 result = (EEH_STATE_MMIO_ACTIVE |
297 EEH_STATE_DMA_ACTIVE | 297 EEH_STATE_DMA_ACTIVE |
298 EEH_STATE_MMIO_ENABLED | 298 EEH_STATE_MMIO_ENABLED |
299 EEH_STATE_DMA_ENABLED); 299 EEH_STATE_DMA_ENABLED);
300 return result; 300 return result;
301 } 301 }
302 302
303 /* Retrieve PE status through OPAL */ 303 /* Retrieve PE status through OPAL */
304 pe_no = pe->addr; 304 pe_no = pe->addr;
305 ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, 305 ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
306 &fstate, &pcierr, NULL); 306 &fstate, &pcierr, NULL);
307 if (ret) { 307 if (ret) {
308 pr_err("%s: Failed to get EEH status on " 308 pr_err("%s: Failed to get EEH status on "
309 "PHB#%x-PE#%x\n, err=%lld\n", 309 "PHB#%x-PE#%x\n, err=%lld\n",
310 __func__, hose->global_number, pe_no, ret); 310 __func__, hose->global_number, pe_no, ret);
311 return EEH_STATE_NOT_SUPPORT; 311 return EEH_STATE_NOT_SUPPORT;
312 } 312 }
313 313
314 /* Check PHB status */ 314 /* Check PHB status */
315 if (pe->type & EEH_PE_PHB) { 315 if (pe->type & EEH_PE_PHB) {
316 result = 0; 316 result = 0;
317 result &= ~EEH_STATE_RESET_ACTIVE; 317 result &= ~EEH_STATE_RESET_ACTIVE;
318 318
319 if (pcierr != OPAL_EEH_PHB_ERROR) { 319 if (pcierr != OPAL_EEH_PHB_ERROR) {
320 result |= EEH_STATE_MMIO_ACTIVE; 320 result |= EEH_STATE_MMIO_ACTIVE;
321 result |= EEH_STATE_DMA_ACTIVE; 321 result |= EEH_STATE_DMA_ACTIVE;
322 result |= EEH_STATE_MMIO_ENABLED; 322 result |= EEH_STATE_MMIO_ENABLED;
323 result |= EEH_STATE_DMA_ENABLED; 323 result |= EEH_STATE_DMA_ENABLED;
324 } else if (!(pe->state & EEH_PE_ISOLATED)) { 324 } else if (!(pe->state & EEH_PE_ISOLATED)) {
325 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 325 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
326 ioda_eeh_phb_diag(hose); 326 ioda_eeh_phb_diag(hose);
327 } 327 }
328 328
329 return result; 329 return result;
330 } 330 }
331 331
332 /* Parse result out */ 332 /* Parse result out */
333 result = 0; 333 result = 0;
334 switch (fstate) { 334 switch (fstate) {
335 case OPAL_EEH_STOPPED_NOT_FROZEN: 335 case OPAL_EEH_STOPPED_NOT_FROZEN:
336 result &= ~EEH_STATE_RESET_ACTIVE; 336 result &= ~EEH_STATE_RESET_ACTIVE;
337 result |= EEH_STATE_MMIO_ACTIVE; 337 result |= EEH_STATE_MMIO_ACTIVE;
338 result |= EEH_STATE_DMA_ACTIVE; 338 result |= EEH_STATE_DMA_ACTIVE;
339 result |= EEH_STATE_MMIO_ENABLED; 339 result |= EEH_STATE_MMIO_ENABLED;
340 result |= EEH_STATE_DMA_ENABLED; 340 result |= EEH_STATE_DMA_ENABLED;
341 break; 341 break;
342 case OPAL_EEH_STOPPED_MMIO_FREEZE: 342 case OPAL_EEH_STOPPED_MMIO_FREEZE:
343 result &= ~EEH_STATE_RESET_ACTIVE; 343 result &= ~EEH_STATE_RESET_ACTIVE;
344 result |= EEH_STATE_DMA_ACTIVE; 344 result |= EEH_STATE_DMA_ACTIVE;
345 result |= EEH_STATE_DMA_ENABLED; 345 result |= EEH_STATE_DMA_ENABLED;
346 break; 346 break;
347 case OPAL_EEH_STOPPED_DMA_FREEZE: 347 case OPAL_EEH_STOPPED_DMA_FREEZE:
348 result &= ~EEH_STATE_RESET_ACTIVE; 348 result &= ~EEH_STATE_RESET_ACTIVE;
349 result |= EEH_STATE_MMIO_ACTIVE; 349 result |= EEH_STATE_MMIO_ACTIVE;
350 result |= EEH_STATE_MMIO_ENABLED; 350 result |= EEH_STATE_MMIO_ENABLED;
351 break; 351 break;
352 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: 352 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
353 result &= ~EEH_STATE_RESET_ACTIVE; 353 result &= ~EEH_STATE_RESET_ACTIVE;
354 break; 354 break;
355 case OPAL_EEH_STOPPED_RESET: 355 case OPAL_EEH_STOPPED_RESET:
356 result |= EEH_STATE_RESET_ACTIVE; 356 result |= EEH_STATE_RESET_ACTIVE;
357 break; 357 break;
358 case OPAL_EEH_STOPPED_TEMP_UNAVAIL: 358 case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
359 result |= EEH_STATE_UNAVAILABLE; 359 result |= EEH_STATE_UNAVAILABLE;
360 break; 360 break;
361 case OPAL_EEH_STOPPED_PERM_UNAVAIL: 361 case OPAL_EEH_STOPPED_PERM_UNAVAIL:
362 result |= EEH_STATE_NOT_SUPPORT; 362 result |= EEH_STATE_NOT_SUPPORT;
363 break; 363 break;
364 default: 364 default:
365 pr_warning("%s: Unexpected EEH status 0x%x " 365 pr_warning("%s: Unexpected EEH status 0x%x "
366 "on PHB#%x-PE#%x\n", 366 "on PHB#%x-PE#%x\n",
367 __func__, fstate, hose->global_number, pe_no); 367 __func__, fstate, hose->global_number, pe_no);
368 } 368 }
369 369
370 /* Dump PHB diag-data for frozen PE */ 370 /* Dump PHB diag-data for frozen PE */
371 if (result != EEH_STATE_NOT_SUPPORT && 371 if (result != EEH_STATE_NOT_SUPPORT &&
372 (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) != 372 (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
373 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) && 373 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
374 !(pe->state & EEH_PE_ISOLATED)) { 374 !(pe->state & EEH_PE_ISOLATED)) {
375 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 375 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
376 ioda_eeh_phb_diag(hose); 376 ioda_eeh_phb_diag(hose);
377 } 377 }
378 378
379 return result; 379 return result;
380 } 380 }
381 381
382 static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) 382 static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
383 { 383 {
384 s64 rc = OPAL_HARDWARE; 384 s64 rc = OPAL_HARDWARE;
385 385
386 while (1) { 386 while (1) {
387 rc = opal_pci_poll(phb->opal_id); 387 rc = opal_pci_poll(phb->opal_id);
388 if (rc <= 0) 388 if (rc <= 0)
389 break; 389 break;
390 390
391 msleep(rc); 391 if (system_state < SYSTEM_RUNNING)
392 udelay(1000 * rc);
393 else
394 msleep(rc);
392 } 395 }
393 396
394 return rc; 397 return rc;
395 } 398 }
396 399
397 static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) 400 int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
398 { 401 {
399 struct pnv_phb *phb = hose->private_data; 402 struct pnv_phb *phb = hose->private_data;
400 s64 rc = OPAL_HARDWARE; 403 s64 rc = OPAL_HARDWARE;
401 404
402 pr_debug("%s: Reset PHB#%x, option=%d\n", 405 pr_debug("%s: Reset PHB#%x, option=%d\n",
403 __func__, hose->global_number, option); 406 __func__, hose->global_number, option);
404 407
405 /* Issue PHB complete reset request */ 408 /* Issue PHB complete reset request */
406 if (option == EEH_RESET_FUNDAMENTAL || 409 if (option == EEH_RESET_FUNDAMENTAL ||
407 option == EEH_RESET_HOT) 410 option == EEH_RESET_HOT)
408 rc = opal_pci_reset(phb->opal_id, 411 rc = opal_pci_reset(phb->opal_id,
409 OPAL_PHB_COMPLETE, 412 OPAL_PHB_COMPLETE,
410 OPAL_ASSERT_RESET); 413 OPAL_ASSERT_RESET);
411 else if (option == EEH_RESET_DEACTIVATE) 414 else if (option == EEH_RESET_DEACTIVATE)
412 rc = opal_pci_reset(phb->opal_id, 415 rc = opal_pci_reset(phb->opal_id,
413 OPAL_PHB_COMPLETE, 416 OPAL_PHB_COMPLETE,
414 OPAL_DEASSERT_RESET); 417 OPAL_DEASSERT_RESET);
415 if (rc < 0) 418 if (rc < 0)
416 goto out; 419 goto out;
417 420
418 /* 421 /*
419 * Poll state of the PHB until the request is done 422 * Poll state of the PHB until the request is done
420 * successfully. The PHB reset is usually PHB complete 423 * successfully. The PHB reset is usually PHB complete
421 * reset followed by hot reset on root bus. So we also 424 * reset followed by hot reset on root bus. So we also
422 * need the PCI bus settlement delay. 425 * need the PCI bus settlement delay.
423 */ 426 */
424 rc = ioda_eeh_phb_poll(phb); 427 rc = ioda_eeh_phb_poll(phb);
425 if (option == EEH_RESET_DEACTIVATE) 428 if (option == EEH_RESET_DEACTIVATE) {
426 msleep(EEH_PE_RST_SETTLE_TIME); 429 if (system_state < SYSTEM_RUNNING)
430 udelay(1000 * EEH_PE_RST_SETTLE_TIME);
431 else
432 msleep(EEH_PE_RST_SETTLE_TIME);
433 }
427 out: 434 out:
428 if (rc != OPAL_SUCCESS) 435 if (rc != OPAL_SUCCESS)
429 return -EIO; 436 return -EIO;
430 437
431 return 0; 438 return 0;
432 } 439 }
433 440
434 static int ioda_eeh_root_reset(struct pci_controller *hose, int option) 441 static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
435 { 442 {
436 struct pnv_phb *phb = hose->private_data; 443 struct pnv_phb *phb = hose->private_data;
437 s64 rc = OPAL_SUCCESS; 444 s64 rc = OPAL_SUCCESS;
438 445
439 pr_debug("%s: Reset PHB#%x, option=%d\n", 446 pr_debug("%s: Reset PHB#%x, option=%d\n",
440 __func__, hose->global_number, option); 447 __func__, hose->global_number, option);
441 448
442 /* 449 /*
443 * During the reset deassert time, we needn't care 450 * During the reset deassert time, we needn't care
444 * the reset scope because the firmware does nothing 451 * the reset scope because the firmware does nothing
445 * for fundamental or hot reset during deassert phase. 452 * for fundamental or hot reset during deassert phase.
446 */ 453 */
447 if (option == EEH_RESET_FUNDAMENTAL) 454 if (option == EEH_RESET_FUNDAMENTAL)
448 rc = opal_pci_reset(phb->opal_id, 455 rc = opal_pci_reset(phb->opal_id,
449 OPAL_PCI_FUNDAMENTAL_RESET, 456 OPAL_PCI_FUNDAMENTAL_RESET,
450 OPAL_ASSERT_RESET); 457 OPAL_ASSERT_RESET);
451 else if (option == EEH_RESET_HOT) 458 else if (option == EEH_RESET_HOT)
452 rc = opal_pci_reset(phb->opal_id, 459 rc = opal_pci_reset(phb->opal_id,
453 OPAL_PCI_HOT_RESET, 460 OPAL_PCI_HOT_RESET,
454 OPAL_ASSERT_RESET); 461 OPAL_ASSERT_RESET);
455 else if (option == EEH_RESET_DEACTIVATE) 462 else if (option == EEH_RESET_DEACTIVATE)
456 rc = opal_pci_reset(phb->opal_id, 463 rc = opal_pci_reset(phb->opal_id,
457 OPAL_PCI_HOT_RESET, 464 OPAL_PCI_HOT_RESET,
458 OPAL_DEASSERT_RESET); 465 OPAL_DEASSERT_RESET);
459 if (rc < 0) 466 if (rc < 0)
460 goto out; 467 goto out;
461 468
462 /* Poll state of the PHB until the request is done */ 469 /* Poll state of the PHB until the request is done */
463 rc = ioda_eeh_phb_poll(phb); 470 rc = ioda_eeh_phb_poll(phb);
464 if (option == EEH_RESET_DEACTIVATE) 471 if (option == EEH_RESET_DEACTIVATE)
465 msleep(EEH_PE_RST_SETTLE_TIME); 472 msleep(EEH_PE_RST_SETTLE_TIME);
466 out: 473 out:
467 if (rc != OPAL_SUCCESS) 474 if (rc != OPAL_SUCCESS)
468 return -EIO; 475 return -EIO;
469 476
470 return 0; 477 return 0;
471 } 478 }
472 479
473 static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) 480 static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
474 481
475 { 482 {
476 struct device_node *dn = pci_device_to_OF_node(dev); 483 struct device_node *dn = pci_device_to_OF_node(dev);
477 struct eeh_dev *edev = of_node_to_eeh_dev(dn); 484 struct eeh_dev *edev = of_node_to_eeh_dev(dn);
478 int aer = edev ? edev->aer_cap : 0; 485 int aer = edev ? edev->aer_cap : 0;
479 u32 ctrl; 486 u32 ctrl;
480 487
481 pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", 488 pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
482 __func__, pci_domain_nr(dev->bus), 489 __func__, pci_domain_nr(dev->bus),
483 dev->bus->number, option); 490 dev->bus->number, option);
484 491
485 switch (option) { 492 switch (option) {
486 case EEH_RESET_FUNDAMENTAL: 493 case EEH_RESET_FUNDAMENTAL:
487 case EEH_RESET_HOT: 494 case EEH_RESET_HOT:
488 /* Don't report linkDown event */ 495 /* Don't report linkDown event */
489 if (aer) { 496 if (aer) {
490 eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, 497 eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
491 4, &ctrl); 498 4, &ctrl);
492 ctrl |= PCI_ERR_UNC_SURPDN; 499 ctrl |= PCI_ERR_UNC_SURPDN;
493 eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, 500 eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
494 4, ctrl); 501 4, ctrl);
495 } 502 }
496 503
497 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); 504 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
498 ctrl |= PCI_BRIDGE_CTL_BUS_RESET; 505 ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
499 eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); 506 eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
500 msleep(EEH_PE_RST_HOLD_TIME); 507 msleep(EEH_PE_RST_HOLD_TIME);
501 508
502 break; 509 break;
503 case EEH_RESET_DEACTIVATE: 510 case EEH_RESET_DEACTIVATE:
504 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); 511 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
505 ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; 512 ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
506 eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); 513 eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
507 msleep(EEH_PE_RST_SETTLE_TIME); 514 msleep(EEH_PE_RST_SETTLE_TIME);
508 515
509 /* Continue reporting linkDown event */ 516 /* Continue reporting linkDown event */
510 if (aer) { 517 if (aer) {
511 eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, 518 eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
512 4, &ctrl); 519 4, &ctrl);
513 ctrl &= ~PCI_ERR_UNC_SURPDN; 520 ctrl &= ~PCI_ERR_UNC_SURPDN;
514 eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, 521 eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
515 4, ctrl); 522 4, ctrl);
516 } 523 }
517 524
518 break; 525 break;
519 } 526 }
520 527
521 return 0; 528 return 0;
522 } 529 }
523 530
524 void pnv_pci_reset_secondary_bus(struct pci_dev *dev) 531 void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
525 { 532 {
526 struct pci_controller *hose; 533 struct pci_controller *hose;
527 534
528 if (pci_is_root_bus(dev->bus)) { 535 if (pci_is_root_bus(dev->bus)) {
529 hose = pci_bus_to_host(dev->bus); 536 hose = pci_bus_to_host(dev->bus);
530 ioda_eeh_root_reset(hose, EEH_RESET_HOT); 537 ioda_eeh_root_reset(hose, EEH_RESET_HOT);
531 ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); 538 ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
532 } else { 539 } else {
533 ioda_eeh_bridge_reset(dev, EEH_RESET_HOT); 540 ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
534 ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); 541 ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
535 } 542 }
536 } 543 }
537 544
538 /** 545 /**
539 * ioda_eeh_reset - Reset the indicated PE 546 * ioda_eeh_reset - Reset the indicated PE
540 * @pe: EEH PE 547 * @pe: EEH PE
541 * @option: reset option 548 * @option: reset option
542 * 549 *
543 * Do reset on the indicated PE. For PCI bus sensitive PE, 550 * Do reset on the indicated PE. For PCI bus sensitive PE,
544 * we need to reset the parent p2p bridge. The PHB has to 551 * we need to reset the parent p2p bridge. The PHB has to
545 * be reinitialized if the p2p bridge is root bridge. For 552 * be reinitialized if the p2p bridge is root bridge. For
546 * PCI device sensitive PE, we will try to reset the device 553 * PCI device sensitive PE, we will try to reset the device
547 * through FLR. For now, we don't have OPAL APIs to do HARD 554 * through FLR. For now, we don't have OPAL APIs to do HARD
548 * reset yet, so all reset would be SOFT (HOT) reset. 555 * reset yet, so all reset would be SOFT (HOT) reset.
549 */ 556 */
550 static int ioda_eeh_reset(struct eeh_pe *pe, int option) 557 static int ioda_eeh_reset(struct eeh_pe *pe, int option)
551 { 558 {
552 struct pci_controller *hose = pe->phb; 559 struct pci_controller *hose = pe->phb;
553 struct pci_bus *bus; 560 struct pci_bus *bus;
554 int ret; 561 int ret;
555 562
556 /* 563 /*
557 * For PHB reset, we always have complete reset. For those PEs whose 564 * For PHB reset, we always have complete reset. For those PEs whose
558 * primary bus derived from root complex (root bus) or root port 565 * primary bus derived from root complex (root bus) or root port
559 * (usually bus#1), we apply hot or fundamental reset on the root port. 566 * (usually bus#1), we apply hot or fundamental reset on the root port.
560 * For other PEs, we always have hot reset on the PE primary bus. 567 * For other PEs, we always have hot reset on the PE primary bus.
561 * 568 *
562 * Here, we have different design to pHyp, which always clear the 569 * Here, we have different design to pHyp, which always clear the
563 * frozen state during PE reset. However, the good idea here from 570 * frozen state during PE reset. However, the good idea here from
564 * benh is to keep frozen state before we get PE reset done completely 571 * benh is to keep frozen state before we get PE reset done completely
565 * (until BAR restore). With the frozen state, HW drops illegal IO 572 * (until BAR restore). With the frozen state, HW drops illegal IO
566 * or MMIO access, which can incur recrusive frozen PE during PE 573 * or MMIO access, which can incur recrusive frozen PE during PE
567 * reset. The side effect is that EEH core has to clear the frozen 574 * reset. The side effect is that EEH core has to clear the frozen
568 * state explicitly after BAR restore. 575 * state explicitly after BAR restore.
569 */ 576 */
570 if (pe->type & EEH_PE_PHB) { 577 if (pe->type & EEH_PE_PHB) {
571 ret = ioda_eeh_phb_reset(hose, option); 578 ret = ioda_eeh_phb_reset(hose, option);
572 } else { 579 } else {
573 bus = eeh_pe_bus_get(pe); 580 bus = eeh_pe_bus_get(pe);
574 if (pci_is_root_bus(bus) || 581 if (pci_is_root_bus(bus) ||
575 pci_is_root_bus(bus->parent)) 582 pci_is_root_bus(bus->parent))
576 ret = ioda_eeh_root_reset(hose, option); 583 ret = ioda_eeh_root_reset(hose, option);
577 else 584 else
578 ret = ioda_eeh_bridge_reset(bus->self, option); 585 ret = ioda_eeh_bridge_reset(bus->self, option);
579 } 586 }
580 587
581 return ret; 588 return ret;
582 } 589 }
583 590
584 /** 591 /**
585 * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE 592 * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
586 * @pe: EEH PE 593 * @pe: EEH PE
587 * 594 *
588 * For particular PE, it might have included PCI bridges. In order 595 * For particular PE, it might have included PCI bridges. In order
589 * to make the PE work properly, those PCI bridges should be configured 596 * to make the PE work properly, those PCI bridges should be configured
590 * correctly. However, we need do nothing on P7IOC since the reset 597 * correctly. However, we need do nothing on P7IOC since the reset
591 * function will do everything that should be covered by the function. 598 * function will do everything that should be covered by the function.
592 */ 599 */
593 static int ioda_eeh_configure_bridge(struct eeh_pe *pe) 600 static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
594 { 601 {
595 return 0; 602 return 0;
596 } 603 }
597 604
598 static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) 605 static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
599 { 606 {
600 /* GEM */ 607 /* GEM */
601 pr_info(" GEM XFIR: %016llx\n", data->gemXfir); 608 pr_info(" GEM XFIR: %016llx\n", data->gemXfir);
602 pr_info(" GEM RFIR: %016llx\n", data->gemRfir); 609 pr_info(" GEM RFIR: %016llx\n", data->gemRfir);
603 pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); 610 pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir);
604 pr_info(" GEM Mask: %016llx\n", data->gemMask); 611 pr_info(" GEM Mask: %016llx\n", data->gemMask);
605 pr_info(" GEM RWOF: %016llx\n", data->gemRwof); 612 pr_info(" GEM RWOF: %016llx\n", data->gemRwof);
606 613
607 /* LEM */ 614 /* LEM */
608 pr_info(" LEM FIR: %016llx\n", data->lemFir); 615 pr_info(" LEM FIR: %016llx\n", data->lemFir);
609 pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); 616 pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask);
610 pr_info(" LEM Action 0: %016llx\n", data->lemAction0); 617 pr_info(" LEM Action 0: %016llx\n", data->lemAction0);
611 pr_info(" LEM Action 1: %016llx\n", data->lemAction1); 618 pr_info(" LEM Action 1: %016llx\n", data->lemAction1);
612 pr_info(" LEM WOF: %016llx\n", data->lemWof); 619 pr_info(" LEM WOF: %016llx\n", data->lemWof);
613 } 620 }
614 621
615 static void ioda_eeh_hub_diag(struct pci_controller *hose) 622 static void ioda_eeh_hub_diag(struct pci_controller *hose)
616 { 623 {
617 struct pnv_phb *phb = hose->private_data; 624 struct pnv_phb *phb = hose->private_data;
618 struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag; 625 struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
619 long rc; 626 long rc;
620 627
621 rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); 628 rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
622 if (rc != OPAL_SUCCESS) { 629 if (rc != OPAL_SUCCESS) {
623 pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", 630 pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
624 __func__, phb->hub_id, rc); 631 __func__, phb->hub_id, rc);
625 return; 632 return;
626 } 633 }
627 634
628 switch (data->type) { 635 switch (data->type) {
629 case OPAL_P7IOC_DIAG_TYPE_RGC: 636 case OPAL_P7IOC_DIAG_TYPE_RGC:
630 pr_info("P7IOC diag-data for RGC\n\n"); 637 pr_info("P7IOC diag-data for RGC\n\n");
631 ioda_eeh_hub_diag_common(data); 638 ioda_eeh_hub_diag_common(data);
632 pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); 639 pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus);
633 pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); 640 pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp);
634 break; 641 break;
635 case OPAL_P7IOC_DIAG_TYPE_BI: 642 case OPAL_P7IOC_DIAG_TYPE_BI:
636 pr_info("P7IOC diag-data for BI %s\n\n", 643 pr_info("P7IOC diag-data for BI %s\n\n",
637 data->bi.biDownbound ? "Downbound" : "Upbound"); 644 data->bi.biDownbound ? "Downbound" : "Upbound");
638 ioda_eeh_hub_diag_common(data); 645 ioda_eeh_hub_diag_common(data);
639 pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); 646 pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0);
640 pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); 647 pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1);
641 pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); 648 pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2);
642 pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); 649 pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus);
643 break; 650 break;
644 case OPAL_P7IOC_DIAG_TYPE_CI: 651 case OPAL_P7IOC_DIAG_TYPE_CI:
645 pr_info("P7IOC diag-data for CI Port %d\\nn", 652 pr_info("P7IOC diag-data for CI Port %d\\nn",
646 data->ci.ciPort); 653 data->ci.ciPort);
647 ioda_eeh_hub_diag_common(data); 654 ioda_eeh_hub_diag_common(data);
648 pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); 655 pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus);
649 pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); 656 pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp);
650 break; 657 break;
651 case OPAL_P7IOC_DIAG_TYPE_MISC: 658 case OPAL_P7IOC_DIAG_TYPE_MISC:
652 pr_info("P7IOC diag-data for MISC\n\n"); 659 pr_info("P7IOC diag-data for MISC\n\n");
653 ioda_eeh_hub_diag_common(data); 660 ioda_eeh_hub_diag_common(data);
654 break; 661 break;
655 case OPAL_P7IOC_DIAG_TYPE_I2C: 662 case OPAL_P7IOC_DIAG_TYPE_I2C:
656 pr_info("P7IOC diag-data for I2C\n\n"); 663 pr_info("P7IOC diag-data for I2C\n\n");
657 ioda_eeh_hub_diag_common(data); 664 ioda_eeh_hub_diag_common(data);
658 break; 665 break;
659 default: 666 default:
660 pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", 667 pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
661 __func__, phb->hub_id, data->type); 668 __func__, phb->hub_id, data->type);
662 } 669 }
663 } 670 }
664 671
665 static int ioda_eeh_get_pe(struct pci_controller *hose, 672 static int ioda_eeh_get_pe(struct pci_controller *hose,
666 u16 pe_no, struct eeh_pe **pe) 673 u16 pe_no, struct eeh_pe **pe)
667 { 674 {
668 struct eeh_pe *phb_pe, *dev_pe; 675 struct eeh_pe *phb_pe, *dev_pe;
669 struct eeh_dev dev; 676 struct eeh_dev dev;
670 677
671 /* Find the PHB PE */ 678 /* Find the PHB PE */
672 phb_pe = eeh_phb_pe_get(hose); 679 phb_pe = eeh_phb_pe_get(hose);
673 if (!phb_pe) 680 if (!phb_pe)
674 return -EEXIST; 681 return -EEXIST;
675 682
676 /* Find the PE according to PE# */ 683 /* Find the PE according to PE# */
677 memset(&dev, 0, sizeof(struct eeh_dev)); 684 memset(&dev, 0, sizeof(struct eeh_dev));
678 dev.phb = hose; 685 dev.phb = hose;
679 dev.pe_config_addr = pe_no; 686 dev.pe_config_addr = pe_no;
680 dev_pe = eeh_pe_get(&dev); 687 dev_pe = eeh_pe_get(&dev);
681 if (!dev_pe) return -EEXIST; 688 if (!dev_pe) return -EEXIST;
682 689
683 *pe = dev_pe; 690 *pe = dev_pe;
684 return 0; 691 return 0;
685 } 692 }
686 693
687 /** 694 /**
688 * ioda_eeh_next_error - Retrieve next error for EEH core to handle 695 * ioda_eeh_next_error - Retrieve next error for EEH core to handle
689 * @pe: The affected PE 696 * @pe: The affected PE
690 * 697 *
691 * The function is expected to be called by EEH core while it gets 698 * The function is expected to be called by EEH core while it gets
692 * special EEH event (without binding PE). The function calls to 699 * special EEH event (without binding PE). The function calls to
693 * OPAL APIs for next error to handle. The informational error is 700 * OPAL APIs for next error to handle. The informational error is
694 * handled internally by platform. However, the dead IOC, dead PHB, 701 * handled internally by platform. However, the dead IOC, dead PHB,
695 * fenced PHB and frozen PE should be handled by EEH core eventually. 702 * fenced PHB and frozen PE should be handled by EEH core eventually.
696 */ 703 */
697 static int ioda_eeh_next_error(struct eeh_pe **pe) 704 static int ioda_eeh_next_error(struct eeh_pe **pe)
698 { 705 {
699 struct pci_controller *hose; 706 struct pci_controller *hose;
700 struct pnv_phb *phb; 707 struct pnv_phb *phb;
701 struct eeh_pe *phb_pe; 708 struct eeh_pe *phb_pe;
702 u64 frozen_pe_no; 709 u64 frozen_pe_no;
703 u16 err_type, severity; 710 u16 err_type, severity;
704 long rc; 711 long rc;
705 int ret = EEH_NEXT_ERR_NONE; 712 int ret = EEH_NEXT_ERR_NONE;
706 713
707 /* 714 /*
708 * While running here, it's safe to purge the event queue. 715 * While running here, it's safe to purge the event queue.
709 * And we should keep the cached OPAL notifier event sychronized 716 * And we should keep the cached OPAL notifier event sychronized
710 * between the kernel and firmware. 717 * between the kernel and firmware.
711 */ 718 */
712 eeh_remove_event(NULL); 719 eeh_remove_event(NULL);
713 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); 720 opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
714 721
715 list_for_each_entry(hose, &hose_list, list_node) { 722 list_for_each_entry(hose, &hose_list, list_node) {
716 /* 723 /*
717 * If the subordinate PCI buses of the PHB has been 724 * If the subordinate PCI buses of the PHB has been
718 * removed or is exactly under error recovery, we 725 * removed or is exactly under error recovery, we
719 * needn't take care of it any more. 726 * needn't take care of it any more.
720 */ 727 */
721 phb = hose->private_data; 728 phb = hose->private_data;
722 phb_pe = eeh_phb_pe_get(hose); 729 phb_pe = eeh_phb_pe_get(hose);
723 if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED)) 730 if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
724 continue; 731 continue;
725 732
726 rc = opal_pci_next_error(phb->opal_id, 733 rc = opal_pci_next_error(phb->opal_id,
727 &frozen_pe_no, &err_type, &severity); 734 &frozen_pe_no, &err_type, &severity);
728 735
729 /* If OPAL API returns error, we needn't proceed */ 736 /* If OPAL API returns error, we needn't proceed */
730 if (rc != OPAL_SUCCESS) { 737 if (rc != OPAL_SUCCESS) {
731 pr_devel("%s: Invalid return value on " 738 pr_devel("%s: Invalid return value on "
732 "PHB#%x (0x%lx) from opal_pci_next_error", 739 "PHB#%x (0x%lx) from opal_pci_next_error",
733 __func__, hose->global_number, rc); 740 __func__, hose->global_number, rc);
734 continue; 741 continue;
735 } 742 }
736 743
737 /* If the PHB doesn't have error, stop processing */ 744 /* If the PHB doesn't have error, stop processing */
738 if (err_type == OPAL_EEH_NO_ERROR || 745 if (err_type == OPAL_EEH_NO_ERROR ||
739 severity == OPAL_EEH_SEV_NO_ERROR) { 746 severity == OPAL_EEH_SEV_NO_ERROR) {
740 pr_devel("%s: No error found on PHB#%x\n", 747 pr_devel("%s: No error found on PHB#%x\n",
741 __func__, hose->global_number); 748 __func__, hose->global_number);
742 continue; 749 continue;
743 } 750 }
744 751
745 /* 752 /*
746 * Processing the error. We're expecting the error with 753 * Processing the error. We're expecting the error with
747 * highest priority reported upon multiple errors on the 754 * highest priority reported upon multiple errors on the
748 * specific PHB. 755 * specific PHB.
749 */ 756 */
750 pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", 757 pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
751 __func__, err_type, severity, 758 __func__, err_type, severity,
752 frozen_pe_no, hose->global_number); 759 frozen_pe_no, hose->global_number);
753 switch (err_type) { 760 switch (err_type) {
754 case OPAL_EEH_IOC_ERROR: 761 case OPAL_EEH_IOC_ERROR:
755 if (severity == OPAL_EEH_SEV_IOC_DEAD) { 762 if (severity == OPAL_EEH_SEV_IOC_DEAD) {
756 pr_err("EEH: dead IOC detected\n"); 763 pr_err("EEH: dead IOC detected\n");
757 ret = EEH_NEXT_ERR_DEAD_IOC; 764 ret = EEH_NEXT_ERR_DEAD_IOC;
758 } else if (severity == OPAL_EEH_SEV_INF) { 765 } else if (severity == OPAL_EEH_SEV_INF) {
759 pr_info("EEH: IOC informative error " 766 pr_info("EEH: IOC informative error "
760 "detected\n"); 767 "detected\n");
761 ioda_eeh_hub_diag(hose); 768 ioda_eeh_hub_diag(hose);
762 ret = EEH_NEXT_ERR_NONE; 769 ret = EEH_NEXT_ERR_NONE;
763 } 770 }
764 771
765 break; 772 break;
766 case OPAL_EEH_PHB_ERROR: 773 case OPAL_EEH_PHB_ERROR:
767 if (severity == OPAL_EEH_SEV_PHB_DEAD) { 774 if (severity == OPAL_EEH_SEV_PHB_DEAD) {
768 *pe = phb_pe; 775 *pe = phb_pe;
769 pr_err("EEH: dead PHB#%x detected\n", 776 pr_err("EEH: dead PHB#%x detected\n",
770 hose->global_number); 777 hose->global_number);
771 ret = EEH_NEXT_ERR_DEAD_PHB; 778 ret = EEH_NEXT_ERR_DEAD_PHB;
772 } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { 779 } else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
773 *pe = phb_pe; 780 *pe = phb_pe;
774 pr_err("EEH: fenced PHB#%x detected\n", 781 pr_err("EEH: fenced PHB#%x detected\n",
775 hose->global_number); 782 hose->global_number);
776 ret = EEH_NEXT_ERR_FENCED_PHB; 783 ret = EEH_NEXT_ERR_FENCED_PHB;
777 } else if (severity == OPAL_EEH_SEV_INF) { 784 } else if (severity == OPAL_EEH_SEV_INF) {
778 pr_info("EEH: PHB#%x informative error " 785 pr_info("EEH: PHB#%x informative error "
779 "detected\n", 786 "detected\n",
780 hose->global_number); 787 hose->global_number);
781 ioda_eeh_phb_diag(hose); 788 ioda_eeh_phb_diag(hose);
782 ret = EEH_NEXT_ERR_NONE; 789 ret = EEH_NEXT_ERR_NONE;
783 } 790 }
784 791
785 break; 792 break;
786 case OPAL_EEH_PE_ERROR: 793 case OPAL_EEH_PE_ERROR:
787 /* 794 /*
788 * If we can't find the corresponding PE, the 795 * If we can't find the corresponding PE, the
789 * PEEV / PEST would be messy. So we force an 796 * PEEV / PEST would be messy. So we force an
790 * fenced PHB so that it can be recovered. 797 * fenced PHB so that it can be recovered.
791 * 798 *
792 * If the PE has been marked as isolated, that 799 * If the PE has been marked as isolated, that
793 * should have been removed permanently or in 800 * should have been removed permanently or in
794 * progress with recovery. We needn't report 801 * progress with recovery. We needn't report
795 * it again. 802 * it again.
796 */ 803 */
797 if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) { 804 if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) {
798 *pe = phb_pe; 805 *pe = phb_pe;
799 pr_err("EEH: Escalated fenced PHB#%x " 806 pr_err("EEH: Escalated fenced PHB#%x "
800 "detected for PE#%llx\n", 807 "detected for PE#%llx\n",
801 hose->global_number, 808 hose->global_number,
802 frozen_pe_no); 809 frozen_pe_no);
803 ret = EEH_NEXT_ERR_FENCED_PHB; 810 ret = EEH_NEXT_ERR_FENCED_PHB;
804 } else if ((*pe)->state & EEH_PE_ISOLATED) { 811 } else if ((*pe)->state & EEH_PE_ISOLATED) {
805 ret = EEH_NEXT_ERR_NONE; 812 ret = EEH_NEXT_ERR_NONE;
806 } else { 813 } else {
807 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", 814 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
808 (*pe)->addr, (*pe)->phb->global_number); 815 (*pe)->addr, (*pe)->phb->global_number);
809 ret = EEH_NEXT_ERR_FROZEN_PE; 816 ret = EEH_NEXT_ERR_FROZEN_PE;
810 } 817 }
811 818
812 break; 819 break;
813 default: 820 default:
814 pr_warn("%s: Unexpected error type %d\n", 821 pr_warn("%s: Unexpected error type %d\n",
815 __func__, err_type); 822 __func__, err_type);
816 } 823 }
817 824
818 /* 825 /*
819 * EEH core will try recover from fenced PHB or 826 * EEH core will try recover from fenced PHB or
820 * frozen PE. In the time for frozen PE, EEH core 827 * frozen PE. In the time for frozen PE, EEH core
821 * enable IO path for that before collecting logs, 828 * enable IO path for that before collecting logs,
822 * but it ruins the site. So we have to dump the 829 * but it ruins the site. So we have to dump the
823 * log in advance here. 830 * log in advance here.
824 */ 831 */
825 if ((ret == EEH_NEXT_ERR_FROZEN_PE || 832 if ((ret == EEH_NEXT_ERR_FROZEN_PE ||
826 ret == EEH_NEXT_ERR_FENCED_PHB) && 833 ret == EEH_NEXT_ERR_FENCED_PHB) &&
827 !((*pe)->state & EEH_PE_ISOLATED)) { 834 !((*pe)->state & EEH_PE_ISOLATED)) {
828 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); 835 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
829 ioda_eeh_phb_diag(hose); 836 ioda_eeh_phb_diag(hose);
830 } 837 }
831 838
832 /* 839 /*
833 * If we have no errors on the specific PHB or only 840 * If we have no errors on the specific PHB or only
834 * informative error there, we continue poking it. 841 * informative error there, we continue poking it.
835 * Otherwise, we need actions to be taken by upper 842 * Otherwise, we need actions to be taken by upper
836 * layer. 843 * layer.
837 */ 844 */
838 if (ret > EEH_NEXT_ERR_INF) 845 if (ret > EEH_NEXT_ERR_INF)
839 break; 846 break;
840 } 847 }
841 848
842 return ret; 849 return ret;
843 } 850 }
844 851
845 struct pnv_eeh_ops ioda_eeh_ops = { 852 struct pnv_eeh_ops ioda_eeh_ops = {
846 .post_init = ioda_eeh_post_init, 853 .post_init = ioda_eeh_post_init,
847 .set_option = ioda_eeh_set_option, 854 .set_option = ioda_eeh_set_option,
848 .get_state = ioda_eeh_get_state, 855 .get_state = ioda_eeh_get_state,
849 .reset = ioda_eeh_reset, 856 .reset = ioda_eeh_reset,
850 .configure_bridge = ioda_eeh_configure_bridge, 857 .configure_bridge = ioda_eeh_configure_bridge,
851 .next_error = ioda_eeh_next_error 858 .next_error = ioda_eeh_next_error
852 }; 859 };
853 860
arch/powerpc/platforms/powernv/pci-ioda.c
1 /* 1 /*
2 * Support PCI/PCIe on PowerNV platforms 2 * Support PCI/PCIe on PowerNV platforms
3 * 3 *
4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. 4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12 #undef DEBUG 12 #undef DEBUG
13 13
14 #include <linux/kernel.h> 14 #include <linux/kernel.h>
15 #include <linux/pci.h> 15 #include <linux/pci.h>
16 #include <linux/crash_dump.h>
16 #include <linux/debugfs.h> 17 #include <linux/debugfs.h>
17 #include <linux/delay.h> 18 #include <linux/delay.h>
18 #include <linux/string.h> 19 #include <linux/string.h>
19 #include <linux/init.h> 20 #include <linux/init.h>
20 #include <linux/bootmem.h> 21 #include <linux/bootmem.h>
21 #include <linux/irq.h> 22 #include <linux/irq.h>
22 #include <linux/io.h> 23 #include <linux/io.h>
23 #include <linux/msi.h> 24 #include <linux/msi.h>
24 #include <linux/memblock.h> 25 #include <linux/memblock.h>
25 26
26 #include <asm/sections.h> 27 #include <asm/sections.h>
27 #include <asm/io.h> 28 #include <asm/io.h>
28 #include <asm/prom.h> 29 #include <asm/prom.h>
29 #include <asm/pci-bridge.h> 30 #include <asm/pci-bridge.h>
30 #include <asm/machdep.h> 31 #include <asm/machdep.h>
31 #include <asm/msi_bitmap.h> 32 #include <asm/msi_bitmap.h>
32 #include <asm/ppc-pci.h> 33 #include <asm/ppc-pci.h>
33 #include <asm/opal.h> 34 #include <asm/opal.h>
34 #include <asm/iommu.h> 35 #include <asm/iommu.h>
35 #include <asm/tce.h> 36 #include <asm/tce.h>
36 #include <asm/xics.h> 37 #include <asm/xics.h>
37 #include <asm/debug.h> 38 #include <asm/debug.h>
38 39
39 #include "powernv.h" 40 #include "powernv.h"
40 #include "pci.h" 41 #include "pci.h"
41 42
42 #define define_pe_printk_level(func, kern_level) \ 43 #define define_pe_printk_level(func, kern_level) \
43 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ 44 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
44 { \ 45 { \
45 struct va_format vaf; \ 46 struct va_format vaf; \
46 va_list args; \ 47 va_list args; \
47 char pfix[32]; \ 48 char pfix[32]; \
48 int r; \ 49 int r; \
49 \ 50 \
50 va_start(args, fmt); \ 51 va_start(args, fmt); \
51 \ 52 \
52 vaf.fmt = fmt; \ 53 vaf.fmt = fmt; \
53 vaf.va = &args; \ 54 vaf.va = &args; \
54 \ 55 \
55 if (pe->pdev) \ 56 if (pe->pdev) \
56 strlcpy(pfix, dev_name(&pe->pdev->dev), \ 57 strlcpy(pfix, dev_name(&pe->pdev->dev), \
57 sizeof(pfix)); \ 58 sizeof(pfix)); \
58 else \ 59 else \
59 sprintf(pfix, "%04x:%02x ", \ 60 sprintf(pfix, "%04x:%02x ", \
60 pci_domain_nr(pe->pbus), \ 61 pci_domain_nr(pe->pbus), \
61 pe->pbus->number); \ 62 pe->pbus->number); \
62 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ 63 r = printk(kern_level "pci %s: [PE# %.3d] %pV", \
63 pfix, pe->pe_number, &vaf); \ 64 pfix, pe->pe_number, &vaf); \
64 \ 65 \
65 va_end(args); \ 66 va_end(args); \
66 \ 67 \
67 return r; \ 68 return r; \
68 } \ 69 } \
69 70
70 define_pe_printk_level(pe_err, KERN_ERR); 71 define_pe_printk_level(pe_err, KERN_ERR);
71 define_pe_printk_level(pe_warn, KERN_WARNING); 72 define_pe_printk_level(pe_warn, KERN_WARNING);
72 define_pe_printk_level(pe_info, KERN_INFO); 73 define_pe_printk_level(pe_info, KERN_INFO);
73 74
74 /* 75 /*
75 * stdcix is only supposed to be used in hypervisor real mode as per 76 * stdcix is only supposed to be used in hypervisor real mode as per
76 * the architecture spec 77 * the architecture spec
77 */ 78 */
78 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) 79 static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
79 { 80 {
80 __asm__ __volatile__("stdcix %0,0,%1" 81 __asm__ __volatile__("stdcix %0,0,%1"
81 : : "r" (val), "r" (paddr) : "memory"); 82 : : "r" (val), "r" (paddr) : "memory");
82 } 83 }
83 84
84 static int pnv_ioda_alloc_pe(struct pnv_phb *phb) 85 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
85 { 86 {
86 unsigned long pe; 87 unsigned long pe;
87 88
88 do { 89 do {
89 pe = find_next_zero_bit(phb->ioda.pe_alloc, 90 pe = find_next_zero_bit(phb->ioda.pe_alloc,
90 phb->ioda.total_pe, 0); 91 phb->ioda.total_pe, 0);
91 if (pe >= phb->ioda.total_pe) 92 if (pe >= phb->ioda.total_pe)
92 return IODA_INVALID_PE; 93 return IODA_INVALID_PE;
93 } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); 94 } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
94 95
95 phb->ioda.pe_array[pe].phb = phb; 96 phb->ioda.pe_array[pe].phb = phb;
96 phb->ioda.pe_array[pe].pe_number = pe; 97 phb->ioda.pe_array[pe].pe_number = pe;
97 return pe; 98 return pe;
98 } 99 }
99 100
100 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) 101 static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
101 { 102 {
102 WARN_ON(phb->ioda.pe_array[pe].pdev); 103 WARN_ON(phb->ioda.pe_array[pe].pdev);
103 104
104 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe)); 105 memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
105 clear_bit(pe, phb->ioda.pe_alloc); 106 clear_bit(pe, phb->ioda.pe_alloc);
106 } 107 }
107 108
108 /* Currently those 2 are only used when MSIs are enabled, this will change 109 /* Currently those 2 are only used when MSIs are enabled, this will change
109 * but in the meantime, we need to protect them to avoid warnings 110 * but in the meantime, we need to protect them to avoid warnings
110 */ 111 */
111 #ifdef CONFIG_PCI_MSI 112 #ifdef CONFIG_PCI_MSI
112 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) 113 static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
113 { 114 {
114 struct pci_controller *hose = pci_bus_to_host(dev->bus); 115 struct pci_controller *hose = pci_bus_to_host(dev->bus);
115 struct pnv_phb *phb = hose->private_data; 116 struct pnv_phb *phb = hose->private_data;
116 struct pci_dn *pdn = pci_get_pdn(dev); 117 struct pci_dn *pdn = pci_get_pdn(dev);
117 118
118 if (!pdn) 119 if (!pdn)
119 return NULL; 120 return NULL;
120 if (pdn->pe_number == IODA_INVALID_PE) 121 if (pdn->pe_number == IODA_INVALID_PE)
121 return NULL; 122 return NULL;
122 return &phb->ioda.pe_array[pdn->pe_number]; 123 return &phb->ioda.pe_array[pdn->pe_number];
123 } 124 }
124 #endif /* CONFIG_PCI_MSI */ 125 #endif /* CONFIG_PCI_MSI */
125 126
126 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 127 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
127 { 128 {
128 struct pci_dev *parent; 129 struct pci_dev *parent;
129 uint8_t bcomp, dcomp, fcomp; 130 uint8_t bcomp, dcomp, fcomp;
130 long rc, rid_end, rid; 131 long rc, rid_end, rid;
131 132
132 /* Bus validation ? */ 133 /* Bus validation ? */
133 if (pe->pbus) { 134 if (pe->pbus) {
134 int count; 135 int count;
135 136
136 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 137 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
137 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 138 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
138 parent = pe->pbus->self; 139 parent = pe->pbus->self;
139 if (pe->flags & PNV_IODA_PE_BUS_ALL) 140 if (pe->flags & PNV_IODA_PE_BUS_ALL)
140 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 141 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
141 else 142 else
142 count = 1; 143 count = 1;
143 144
144 switch(count) { 145 switch(count) {
145 case 1: bcomp = OpalPciBusAll; break; 146 case 1: bcomp = OpalPciBusAll; break;
146 case 2: bcomp = OpalPciBus7Bits; break; 147 case 2: bcomp = OpalPciBus7Bits; break;
147 case 4: bcomp = OpalPciBus6Bits; break; 148 case 4: bcomp = OpalPciBus6Bits; break;
148 case 8: bcomp = OpalPciBus5Bits; break; 149 case 8: bcomp = OpalPciBus5Bits; break;
149 case 16: bcomp = OpalPciBus4Bits; break; 150 case 16: bcomp = OpalPciBus4Bits; break;
150 case 32: bcomp = OpalPciBus3Bits; break; 151 case 32: bcomp = OpalPciBus3Bits; break;
151 default: 152 default:
152 pr_err("%s: Number of subordinate busses %d" 153 pr_err("%s: Number of subordinate busses %d"
153 " unsupported\n", 154 " unsupported\n",
154 pci_name(pe->pbus->self), count); 155 pci_name(pe->pbus->self), count);
155 /* Do an exact match only */ 156 /* Do an exact match only */
156 bcomp = OpalPciBusAll; 157 bcomp = OpalPciBusAll;
157 } 158 }
158 rid_end = pe->rid + (count << 8); 159 rid_end = pe->rid + (count << 8);
159 } else { 160 } else {
160 parent = pe->pdev->bus->self; 161 parent = pe->pdev->bus->self;
161 bcomp = OpalPciBusAll; 162 bcomp = OpalPciBusAll;
162 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 163 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
163 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 164 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
164 rid_end = pe->rid + 1; 165 rid_end = pe->rid + 1;
165 } 166 }
166 167
167 /* 168 /*
168 * Associate PE in PELT. We need add the PE into the 169 * Associate PE in PELT. We need add the PE into the
169 * corresponding PELT-V as well. Otherwise, the error 170 * corresponding PELT-V as well. Otherwise, the error
170 * originated from the PE might contribute to other 171 * originated from the PE might contribute to other
171 * PEs. 172 * PEs.
172 */ 173 */
173 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 174 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
174 bcomp, dcomp, fcomp, OPAL_MAP_PE); 175 bcomp, dcomp, fcomp, OPAL_MAP_PE);
175 if (rc) { 176 if (rc) {
176 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 177 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
177 return -ENXIO; 178 return -ENXIO;
178 } 179 }
179 180
180 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 181 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
181 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 182 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
182 if (rc) 183 if (rc)
183 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); 184 pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
184 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, 185 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
185 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 186 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
186 187
187 /* Add to all parents PELT-V */ 188 /* Add to all parents PELT-V */
188 while (parent) { 189 while (parent) {
189 struct pci_dn *pdn = pci_get_pdn(parent); 190 struct pci_dn *pdn = pci_get_pdn(parent);
190 if (pdn && pdn->pe_number != IODA_INVALID_PE) { 191 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
191 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 192 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
192 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); 193 pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
193 /* XXX What to do in case of error ? */ 194 /* XXX What to do in case of error ? */
194 } 195 }
195 parent = parent->bus->self; 196 parent = parent->bus->self;
196 } 197 }
197 /* Setup reverse map */ 198 /* Setup reverse map */
198 for (rid = pe->rid; rid < rid_end; rid++) 199 for (rid = pe->rid; rid < rid_end; rid++)
199 phb->ioda.pe_rmap[rid] = pe->pe_number; 200 phb->ioda.pe_rmap[rid] = pe->pe_number;
200 201
201 /* Setup one MVTs on IODA1 */ 202 /* Setup one MVTs on IODA1 */
202 if (phb->type == PNV_PHB_IODA1) { 203 if (phb->type == PNV_PHB_IODA1) {
203 pe->mve_number = pe->pe_number; 204 pe->mve_number = pe->pe_number;
204 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, 205 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
205 pe->pe_number); 206 pe->pe_number);
206 if (rc) { 207 if (rc) {
207 pe_err(pe, "OPAL error %ld setting up MVE %d\n", 208 pe_err(pe, "OPAL error %ld setting up MVE %d\n",
208 rc, pe->mve_number); 209 rc, pe->mve_number);
209 pe->mve_number = -1; 210 pe->mve_number = -1;
210 } else { 211 } else {
211 rc = opal_pci_set_mve_enable(phb->opal_id, 212 rc = opal_pci_set_mve_enable(phb->opal_id,
212 pe->mve_number, OPAL_ENABLE_MVE); 213 pe->mve_number, OPAL_ENABLE_MVE);
213 if (rc) { 214 if (rc) {
214 pe_err(pe, "OPAL error %ld enabling MVE %d\n", 215 pe_err(pe, "OPAL error %ld enabling MVE %d\n",
215 rc, pe->mve_number); 216 rc, pe->mve_number);
216 pe->mve_number = -1; 217 pe->mve_number = -1;
217 } 218 }
218 } 219 }
219 } else if (phb->type == PNV_PHB_IODA2) 220 } else if (phb->type == PNV_PHB_IODA2)
220 pe->mve_number = 0; 221 pe->mve_number = 0;
221 222
222 return 0; 223 return 0;
223 } 224 }
224 225
225 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, 226 static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
226 struct pnv_ioda_pe *pe) 227 struct pnv_ioda_pe *pe)
227 { 228 {
228 struct pnv_ioda_pe *lpe; 229 struct pnv_ioda_pe *lpe;
229 230
230 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { 231 list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
231 if (lpe->dma_weight < pe->dma_weight) { 232 if (lpe->dma_weight < pe->dma_weight) {
232 list_add_tail(&pe->dma_link, &lpe->dma_link); 233 list_add_tail(&pe->dma_link, &lpe->dma_link);
233 return; 234 return;
234 } 235 }
235 } 236 }
236 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); 237 list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
237 } 238 }
238 239
239 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) 240 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
240 { 241 {
241 /* This is quite simplistic. The "base" weight of a device 242 /* This is quite simplistic. The "base" weight of a device
242 * is 10. 0 means no DMA is to be accounted for it. 243 * is 10. 0 means no DMA is to be accounted for it.
243 */ 244 */
244 245
245 /* If it's a bridge, no DMA */ 246 /* If it's a bridge, no DMA */
246 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) 247 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
247 return 0; 248 return 0;
248 249
249 /* Reduce the weight of slow USB controllers */ 250 /* Reduce the weight of slow USB controllers */
250 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || 251 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
251 dev->class == PCI_CLASS_SERIAL_USB_OHCI || 252 dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
252 dev->class == PCI_CLASS_SERIAL_USB_EHCI) 253 dev->class == PCI_CLASS_SERIAL_USB_EHCI)
253 return 3; 254 return 3;
254 255
255 /* Increase the weight of RAID (includes Obsidian) */ 256 /* Increase the weight of RAID (includes Obsidian) */
256 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) 257 if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
257 return 15; 258 return 15;
258 259
259 /* Default */ 260 /* Default */
260 return 10; 261 return 10;
261 } 262 }
262 263
263 #if 0 264 #if 0
264 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) 265 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
265 { 266 {
266 struct pci_controller *hose = pci_bus_to_host(dev->bus); 267 struct pci_controller *hose = pci_bus_to_host(dev->bus);
267 struct pnv_phb *phb = hose->private_data; 268 struct pnv_phb *phb = hose->private_data;
268 struct pci_dn *pdn = pci_get_pdn(dev); 269 struct pci_dn *pdn = pci_get_pdn(dev);
269 struct pnv_ioda_pe *pe; 270 struct pnv_ioda_pe *pe;
270 int pe_num; 271 int pe_num;
271 272
272 if (!pdn) { 273 if (!pdn) {
273 pr_err("%s: Device tree node not associated properly\n", 274 pr_err("%s: Device tree node not associated properly\n",
274 pci_name(dev)); 275 pci_name(dev));
275 return NULL; 276 return NULL;
276 } 277 }
277 if (pdn->pe_number != IODA_INVALID_PE) 278 if (pdn->pe_number != IODA_INVALID_PE)
278 return NULL; 279 return NULL;
279 280
280 /* PE#0 has been pre-set */ 281 /* PE#0 has been pre-set */
281 if (dev->bus->number == 0) 282 if (dev->bus->number == 0)
282 pe_num = 0; 283 pe_num = 0;
283 else 284 else
284 pe_num = pnv_ioda_alloc_pe(phb); 285 pe_num = pnv_ioda_alloc_pe(phb);
285 if (pe_num == IODA_INVALID_PE) { 286 if (pe_num == IODA_INVALID_PE) {
286 pr_warning("%s: Not enough PE# available, disabling device\n", 287 pr_warning("%s: Not enough PE# available, disabling device\n",
287 pci_name(dev)); 288 pci_name(dev));
288 return NULL; 289 return NULL;
289 } 290 }
290 291
291 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the 292 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
292 * pointer in the PE data structure, both should be destroyed at the 293 * pointer in the PE data structure, both should be destroyed at the
293 * same time. However, this needs to be looked at more closely again 294 * same time. However, this needs to be looked at more closely again
294 * once we actually start removing things (Hotplug, SR-IOV, ...) 295 * once we actually start removing things (Hotplug, SR-IOV, ...)
295 * 296 *
296 * At some point we want to remove the PDN completely anyways 297 * At some point we want to remove the PDN completely anyways
297 */ 298 */
298 pe = &phb->ioda.pe_array[pe_num]; 299 pe = &phb->ioda.pe_array[pe_num];
299 pci_dev_get(dev); 300 pci_dev_get(dev);
300 pdn->pcidev = dev; 301 pdn->pcidev = dev;
301 pdn->pe_number = pe_num; 302 pdn->pe_number = pe_num;
302 pe->pdev = dev; 303 pe->pdev = dev;
303 pe->pbus = NULL; 304 pe->pbus = NULL;
304 pe->tce32_seg = -1; 305 pe->tce32_seg = -1;
305 pe->mve_number = -1; 306 pe->mve_number = -1;
306 pe->rid = dev->bus->number << 8 | pdn->devfn; 307 pe->rid = dev->bus->number << 8 | pdn->devfn;
307 308
308 pe_info(pe, "Associated device to PE\n"); 309 pe_info(pe, "Associated device to PE\n");
309 310
310 if (pnv_ioda_configure_pe(phb, pe)) { 311 if (pnv_ioda_configure_pe(phb, pe)) {
311 /* XXX What do we do here ? */ 312 /* XXX What do we do here ? */
312 if (pe_num) 313 if (pe_num)
313 pnv_ioda_free_pe(phb, pe_num); 314 pnv_ioda_free_pe(phb, pe_num);
314 pdn->pe_number = IODA_INVALID_PE; 315 pdn->pe_number = IODA_INVALID_PE;
315 pe->pdev = NULL; 316 pe->pdev = NULL;
316 pci_dev_put(dev); 317 pci_dev_put(dev);
317 return NULL; 318 return NULL;
318 } 319 }
319 320
320 /* Assign a DMA weight to the device */ 321 /* Assign a DMA weight to the device */
321 pe->dma_weight = pnv_ioda_dma_weight(dev); 322 pe->dma_weight = pnv_ioda_dma_weight(dev);
322 if (pe->dma_weight != 0) { 323 if (pe->dma_weight != 0) {
323 phb->ioda.dma_weight += pe->dma_weight; 324 phb->ioda.dma_weight += pe->dma_weight;
324 phb->ioda.dma_pe_count++; 325 phb->ioda.dma_pe_count++;
325 } 326 }
326 327
327 /* Link the PE */ 328 /* Link the PE */
328 pnv_ioda_link_pe_by_weight(phb, pe); 329 pnv_ioda_link_pe_by_weight(phb, pe);
329 330
330 return pe; 331 return pe;
331 } 332 }
332 #endif /* Useful for SRIOV case */ 333 #endif /* Useful for SRIOV case */
333 334
334 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) 335 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
335 { 336 {
336 struct pci_dev *dev; 337 struct pci_dev *dev;
337 338
338 list_for_each_entry(dev, &bus->devices, bus_list) { 339 list_for_each_entry(dev, &bus->devices, bus_list) {
339 struct pci_dn *pdn = pci_get_pdn(dev); 340 struct pci_dn *pdn = pci_get_pdn(dev);
340 341
341 if (pdn == NULL) { 342 if (pdn == NULL) {
342 pr_warn("%s: No device node associated with device !\n", 343 pr_warn("%s: No device node associated with device !\n",
343 pci_name(dev)); 344 pci_name(dev));
344 continue; 345 continue;
345 } 346 }
346 pdn->pcidev = dev; 347 pdn->pcidev = dev;
347 pdn->pe_number = pe->pe_number; 348 pdn->pe_number = pe->pe_number;
348 pe->dma_weight += pnv_ioda_dma_weight(dev); 349 pe->dma_weight += pnv_ioda_dma_weight(dev);
349 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) 350 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
350 pnv_ioda_setup_same_PE(dev->subordinate, pe); 351 pnv_ioda_setup_same_PE(dev->subordinate, pe);
351 } 352 }
352 } 353 }
353 354
354 /* 355 /*
355 * There're 2 types of PCI bus sensitive PEs: One that is compromised of 356 * There're 2 types of PCI bus sensitive PEs: One that is compromised of
356 * single PCI bus. Another one that contains the primary PCI bus and its 357 * single PCI bus. Another one that contains the primary PCI bus and its
357 * subordinate PCI devices and buses. The second type of PE is normally 358 * subordinate PCI devices and buses. The second type of PE is normally
358 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. 359 * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
359 */ 360 */
360 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) 361 static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
361 { 362 {
362 struct pci_controller *hose = pci_bus_to_host(bus); 363 struct pci_controller *hose = pci_bus_to_host(bus);
363 struct pnv_phb *phb = hose->private_data; 364 struct pnv_phb *phb = hose->private_data;
364 struct pnv_ioda_pe *pe; 365 struct pnv_ioda_pe *pe;
365 int pe_num; 366 int pe_num;
366 367
367 pe_num = pnv_ioda_alloc_pe(phb); 368 pe_num = pnv_ioda_alloc_pe(phb);
368 if (pe_num == IODA_INVALID_PE) { 369 if (pe_num == IODA_INVALID_PE) {
369 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", 370 pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
370 __func__, pci_domain_nr(bus), bus->number); 371 __func__, pci_domain_nr(bus), bus->number);
371 return; 372 return;
372 } 373 }
373 374
374 pe = &phb->ioda.pe_array[pe_num]; 375 pe = &phb->ioda.pe_array[pe_num];
375 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); 376 pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
376 pe->pbus = bus; 377 pe->pbus = bus;
377 pe->pdev = NULL; 378 pe->pdev = NULL;
378 pe->tce32_seg = -1; 379 pe->tce32_seg = -1;
379 pe->mve_number = -1; 380 pe->mve_number = -1;
380 pe->rid = bus->busn_res.start << 8; 381 pe->rid = bus->busn_res.start << 8;
381 pe->dma_weight = 0; 382 pe->dma_weight = 0;
382 383
383 if (all) 384 if (all)
384 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", 385 pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
385 bus->busn_res.start, bus->busn_res.end, pe_num); 386 bus->busn_res.start, bus->busn_res.end, pe_num);
386 else 387 else
387 pe_info(pe, "Secondary bus %d associated with PE#%d\n", 388 pe_info(pe, "Secondary bus %d associated with PE#%d\n",
388 bus->busn_res.start, pe_num); 389 bus->busn_res.start, pe_num);
389 390
390 if (pnv_ioda_configure_pe(phb, pe)) { 391 if (pnv_ioda_configure_pe(phb, pe)) {
391 /* XXX What do we do here ? */ 392 /* XXX What do we do here ? */
392 if (pe_num) 393 if (pe_num)
393 pnv_ioda_free_pe(phb, pe_num); 394 pnv_ioda_free_pe(phb, pe_num);
394 pe->pbus = NULL; 395 pe->pbus = NULL;
395 return; 396 return;
396 } 397 }
397 398
398 /* Associate it with all child devices */ 399 /* Associate it with all child devices */
399 pnv_ioda_setup_same_PE(bus, pe); 400 pnv_ioda_setup_same_PE(bus, pe);
400 401
401 /* Put PE to the list */ 402 /* Put PE to the list */
402 list_add_tail(&pe->list, &phb->ioda.pe_list); 403 list_add_tail(&pe->list, &phb->ioda.pe_list);
403 404
404 /* Account for one DMA PE if at least one DMA capable device exist 405 /* Account for one DMA PE if at least one DMA capable device exist
405 * below the bridge 406 * below the bridge
406 */ 407 */
407 if (pe->dma_weight != 0) { 408 if (pe->dma_weight != 0) {
408 phb->ioda.dma_weight += pe->dma_weight; 409 phb->ioda.dma_weight += pe->dma_weight;
409 phb->ioda.dma_pe_count++; 410 phb->ioda.dma_pe_count++;
410 } 411 }
411 412
412 /* Link the PE */ 413 /* Link the PE */
413 pnv_ioda_link_pe_by_weight(phb, pe); 414 pnv_ioda_link_pe_by_weight(phb, pe);
414 } 415 }
415 416
416 static void pnv_ioda_setup_PEs(struct pci_bus *bus) 417 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
417 { 418 {
418 struct pci_dev *dev; 419 struct pci_dev *dev;
419 420
420 pnv_ioda_setup_bus_PE(bus, 0); 421 pnv_ioda_setup_bus_PE(bus, 0);
421 422
422 list_for_each_entry(dev, &bus->devices, bus_list) { 423 list_for_each_entry(dev, &bus->devices, bus_list) {
423 if (dev->subordinate) { 424 if (dev->subordinate) {
424 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) 425 if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
425 pnv_ioda_setup_bus_PE(dev->subordinate, 1); 426 pnv_ioda_setup_bus_PE(dev->subordinate, 1);
426 else 427 else
427 pnv_ioda_setup_PEs(dev->subordinate); 428 pnv_ioda_setup_PEs(dev->subordinate);
428 } 429 }
429 } 430 }
430 } 431 }
431 432
432 /* 433 /*
433 * Configure PEs so that the downstream PCI buses and devices 434 * Configure PEs so that the downstream PCI buses and devices
434 * could have their associated PE#. Unfortunately, we didn't 435 * could have their associated PE#. Unfortunately, we didn't
435 * figure out the way to identify the PLX bridge yet. So we 436 * figure out the way to identify the PLX bridge yet. So we
436 * simply put the PCI bus and the subordinate behind the root 437 * simply put the PCI bus and the subordinate behind the root
437 * port to PE# here. The game rule here is expected to be changed 438 * port to PE# here. The game rule here is expected to be changed
438 * as soon as we can detected PLX bridge correctly. 439 * as soon as we can detected PLX bridge correctly.
439 */ 440 */
440 static void pnv_pci_ioda_setup_PEs(void) 441 static void pnv_pci_ioda_setup_PEs(void)
441 { 442 {
442 struct pci_controller *hose, *tmp; 443 struct pci_controller *hose, *tmp;
443 444
444 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 445 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
445 pnv_ioda_setup_PEs(hose->bus); 446 pnv_ioda_setup_PEs(hose->bus);
446 } 447 }
447 } 448 }
448 449
449 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) 450 static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
450 { 451 {
451 struct pci_dn *pdn = pci_get_pdn(pdev); 452 struct pci_dn *pdn = pci_get_pdn(pdev);
452 struct pnv_ioda_pe *pe; 453 struct pnv_ioda_pe *pe;
453 454
454 /* 455 /*
455 * The function can be called while the PE# 456 * The function can be called while the PE#
456 * hasn't been assigned. Do nothing for the 457 * hasn't been assigned. Do nothing for the
457 * case. 458 * case.
458 */ 459 */
459 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 460 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
460 return; 461 return;
461 462
462 pe = &phb->ioda.pe_array[pdn->pe_number]; 463 pe = &phb->ioda.pe_array[pdn->pe_number];
463 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 464 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
464 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 465 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
465 } 466 }
466 467
467 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 468 static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
468 struct pci_dev *pdev, u64 dma_mask) 469 struct pci_dev *pdev, u64 dma_mask)
469 { 470 {
470 struct pci_dn *pdn = pci_get_pdn(pdev); 471 struct pci_dn *pdn = pci_get_pdn(pdev);
471 struct pnv_ioda_pe *pe; 472 struct pnv_ioda_pe *pe;
472 uint64_t top; 473 uint64_t top;
473 bool bypass = false; 474 bool bypass = false;
474 475
475 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 476 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
476 return -ENODEV;; 477 return -ENODEV;;
477 478
478 pe = &phb->ioda.pe_array[pdn->pe_number]; 479 pe = &phb->ioda.pe_array[pdn->pe_number];
479 if (pe->tce_bypass_enabled) { 480 if (pe->tce_bypass_enabled) {
480 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; 481 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
481 bypass = (dma_mask >= top); 482 bypass = (dma_mask >= top);
482 } 483 }
483 484
484 if (bypass) { 485 if (bypass) {
485 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 486 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
486 set_dma_ops(&pdev->dev, &dma_direct_ops); 487 set_dma_ops(&pdev->dev, &dma_direct_ops);
487 set_dma_offset(&pdev->dev, pe->tce_bypass_base); 488 set_dma_offset(&pdev->dev, pe->tce_bypass_base);
488 } else { 489 } else {
489 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 490 dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
490 set_dma_ops(&pdev->dev, &dma_iommu_ops); 491 set_dma_ops(&pdev->dev, &dma_iommu_ops);
491 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 492 set_iommu_table_base(&pdev->dev, &pe->tce32_table);
492 } 493 }
493 return 0; 494 return 0;
494 } 495 }
495 496
496 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) 497 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
497 { 498 {
498 struct pci_dev *dev; 499 struct pci_dev *dev;
499 500
500 list_for_each_entry(dev, &bus->devices, bus_list) { 501 list_for_each_entry(dev, &bus->devices, bus_list) {
501 set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); 502 set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
502 if (dev->subordinate) 503 if (dev->subordinate)
503 pnv_ioda_setup_bus_dma(pe, dev->subordinate); 504 pnv_ioda_setup_bus_dma(pe, dev->subordinate);
504 } 505 }
505 } 506 }
506 507
507 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, 508 static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
508 struct iommu_table *tbl, 509 struct iommu_table *tbl,
509 __be64 *startp, __be64 *endp, bool rm) 510 __be64 *startp, __be64 *endp, bool rm)
510 { 511 {
511 __be64 __iomem *invalidate = rm ? 512 __be64 __iomem *invalidate = rm ?
512 (__be64 __iomem *)pe->tce_inval_reg_phys : 513 (__be64 __iomem *)pe->tce_inval_reg_phys :
513 (__be64 __iomem *)tbl->it_index; 514 (__be64 __iomem *)tbl->it_index;
514 unsigned long start, end, inc; 515 unsigned long start, end, inc;
515 516
516 start = __pa(startp); 517 start = __pa(startp);
517 end = __pa(endp); 518 end = __pa(endp);
518 519
519 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ 520 /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
520 if (tbl->it_busno) { 521 if (tbl->it_busno) {
521 start <<= 12; 522 start <<= 12;
522 end <<= 12; 523 end <<= 12;
523 inc = 128 << 12; 524 inc = 128 << 12;
524 start |= tbl->it_busno; 525 start |= tbl->it_busno;
525 end |= tbl->it_busno; 526 end |= tbl->it_busno;
526 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { 527 } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
527 /* p7ioc-style invalidation, 2 TCEs per write */ 528 /* p7ioc-style invalidation, 2 TCEs per write */
528 start |= (1ull << 63); 529 start |= (1ull << 63);
529 end |= (1ull << 63); 530 end |= (1ull << 63);
530 inc = 16; 531 inc = 16;
531 } else { 532 } else {
532 /* Default (older HW) */ 533 /* Default (older HW) */
533 inc = 128; 534 inc = 128;
534 } 535 }
535 536
536 end |= inc - 1; /* round up end to be different than start */ 537 end |= inc - 1; /* round up end to be different than start */
537 538
538 mb(); /* Ensure above stores are visible */ 539 mb(); /* Ensure above stores are visible */
539 while (start <= end) { 540 while (start <= end) {
540 if (rm) 541 if (rm)
541 __raw_rm_writeq(cpu_to_be64(start), invalidate); 542 __raw_rm_writeq(cpu_to_be64(start), invalidate);
542 else 543 else
543 __raw_writeq(cpu_to_be64(start), invalidate); 544 __raw_writeq(cpu_to_be64(start), invalidate);
544 start += inc; 545 start += inc;
545 } 546 }
546 547
547 /* 548 /*
548 * The iommu layer will do another mb() for us on build() 549 * The iommu layer will do another mb() for us on build()
549 * and we don't care on free() 550 * and we don't care on free()
550 */ 551 */
551 } 552 }
552 553
553 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, 554 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
554 struct iommu_table *tbl, 555 struct iommu_table *tbl,
555 __be64 *startp, __be64 *endp, bool rm) 556 __be64 *startp, __be64 *endp, bool rm)
556 { 557 {
557 unsigned long start, end, inc; 558 unsigned long start, end, inc;
558 __be64 __iomem *invalidate = rm ? 559 __be64 __iomem *invalidate = rm ?
559 (__be64 __iomem *)pe->tce_inval_reg_phys : 560 (__be64 __iomem *)pe->tce_inval_reg_phys :
560 (__be64 __iomem *)tbl->it_index; 561 (__be64 __iomem *)tbl->it_index;
561 562
562 /* We'll invalidate DMA address in PE scope */ 563 /* We'll invalidate DMA address in PE scope */
563 start = 0x2ul << 60; 564 start = 0x2ul << 60;
564 start |= (pe->pe_number & 0xFF); 565 start |= (pe->pe_number & 0xFF);
565 end = start; 566 end = start;
566 567
567 /* Figure out the start, end and step */ 568 /* Figure out the start, end and step */
568 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); 569 inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
569 start |= (inc << 12); 570 start |= (inc << 12);
570 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); 571 inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
571 end |= (inc << 12); 572 end |= (inc << 12);
572 inc = (0x1ul << 12); 573 inc = (0x1ul << 12);
573 mb(); 574 mb();
574 575
575 while (start <= end) { 576 while (start <= end) {
576 if (rm) 577 if (rm)
577 __raw_rm_writeq(cpu_to_be64(start), invalidate); 578 __raw_rm_writeq(cpu_to_be64(start), invalidate);
578 else 579 else
579 __raw_writeq(cpu_to_be64(start), invalidate); 580 __raw_writeq(cpu_to_be64(start), invalidate);
580 start += inc; 581 start += inc;
581 } 582 }
582 } 583 }
583 584
584 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 585 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
585 __be64 *startp, __be64 *endp, bool rm) 586 __be64 *startp, __be64 *endp, bool rm)
586 { 587 {
587 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 588 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
588 tce32_table); 589 tce32_table);
589 struct pnv_phb *phb = pe->phb; 590 struct pnv_phb *phb = pe->phb;
590 591
591 if (phb->type == PNV_PHB_IODA1) 592 if (phb->type == PNV_PHB_IODA1)
592 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); 593 pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
593 else 594 else
594 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); 595 pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
595 } 596 }
596 597
597 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, 598 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
598 struct pnv_ioda_pe *pe, unsigned int base, 599 struct pnv_ioda_pe *pe, unsigned int base,
599 unsigned int segs) 600 unsigned int segs)
600 { 601 {
601 602
602 struct page *tce_mem = NULL; 603 struct page *tce_mem = NULL;
603 const __be64 *swinvp; 604 const __be64 *swinvp;
604 struct iommu_table *tbl; 605 struct iommu_table *tbl;
605 unsigned int i; 606 unsigned int i;
606 int64_t rc; 607 int64_t rc;
607 void *addr; 608 void *addr;
608 609
609 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 610 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
610 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 611 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
611 612
612 /* XXX FIXME: Handle 64-bit only DMA devices */ 613 /* XXX FIXME: Handle 64-bit only DMA devices */
613 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 614 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
614 /* XXX FIXME: Allocate multi-level tables on PHB3 */ 615 /* XXX FIXME: Allocate multi-level tables on PHB3 */
615 616
616 /* We shouldn't already have a 32-bit DMA associated */ 617 /* We shouldn't already have a 32-bit DMA associated */
617 if (WARN_ON(pe->tce32_seg >= 0)) 618 if (WARN_ON(pe->tce32_seg >= 0))
618 return; 619 return;
619 620
620 /* Grab a 32-bit TCE table */ 621 /* Grab a 32-bit TCE table */
621 pe->tce32_seg = base; 622 pe->tce32_seg = base;
622 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", 623 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
623 (base << 28), ((base + segs) << 28) - 1); 624 (base << 28), ((base + segs) << 28) - 1);
624 625
625 /* XXX Currently, we allocate one big contiguous table for the 626 /* XXX Currently, we allocate one big contiguous table for the
626 * TCEs. We only really need one chunk per 256M of TCE space 627 * TCEs. We only really need one chunk per 256M of TCE space
627 * (ie per segment) but that's an optimization for later, it 628 * (ie per segment) but that's an optimization for later, it
628 * requires some added smarts with our get/put_tce implementation 629 * requires some added smarts with our get/put_tce implementation
629 */ 630 */
630 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 631 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
631 get_order(TCE32_TABLE_SIZE * segs)); 632 get_order(TCE32_TABLE_SIZE * segs));
632 if (!tce_mem) { 633 if (!tce_mem) {
633 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n"); 634 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
634 goto fail; 635 goto fail;
635 } 636 }
636 addr = page_address(tce_mem); 637 addr = page_address(tce_mem);
637 memset(addr, 0, TCE32_TABLE_SIZE * segs); 638 memset(addr, 0, TCE32_TABLE_SIZE * segs);
638 639
639 /* Configure HW */ 640 /* Configure HW */
640 for (i = 0; i < segs; i++) { 641 for (i = 0; i < segs; i++) {
641 rc = opal_pci_map_pe_dma_window(phb->opal_id, 642 rc = opal_pci_map_pe_dma_window(phb->opal_id,
642 pe->pe_number, 643 pe->pe_number,
643 base + i, 1, 644 base + i, 1,
644 __pa(addr) + TCE32_TABLE_SIZE * i, 645 __pa(addr) + TCE32_TABLE_SIZE * i,
645 TCE32_TABLE_SIZE, 0x1000); 646 TCE32_TABLE_SIZE, 0x1000);
646 if (rc) { 647 if (rc) {
647 pe_err(pe, " Failed to configure 32-bit TCE table," 648 pe_err(pe, " Failed to configure 32-bit TCE table,"
648 " err %ld\n", rc); 649 " err %ld\n", rc);
649 goto fail; 650 goto fail;
650 } 651 }
651 } 652 }
652 653
653 /* Setup linux iommu table */ 654 /* Setup linux iommu table */
654 tbl = &pe->tce32_table; 655 tbl = &pe->tce32_table;
655 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, 656 pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
656 base << 28); 657 base << 28);
657 658
658 /* OPAL variant of P7IOC SW invalidated TCEs */ 659 /* OPAL variant of P7IOC SW invalidated TCEs */
659 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 660 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
660 if (swinvp) { 661 if (swinvp) {
661 /* We need a couple more fields -- an address and a data 662 /* We need a couple more fields -- an address and a data
662 * to or. Since the bus is only printed out on table free 663 * to or. Since the bus is only printed out on table free
663 * errors, and on the first pass the data will be a relative 664 * errors, and on the first pass the data will be a relative
664 * bus number, print that out instead. 665 * bus number, print that out instead.
665 */ 666 */
666 tbl->it_busno = 0; 667 tbl->it_busno = 0;
667 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 668 pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
668 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 669 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
669 8); 670 8);
670 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | 671 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
671 TCE_PCI_SWINV_PAIR; 672 TCE_PCI_SWINV_PAIR;
672 } 673 }
673 iommu_init_table(tbl, phb->hose->node); 674 iommu_init_table(tbl, phb->hose->node);
674 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 675 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
675 676
676 if (pe->pdev) 677 if (pe->pdev)
677 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 678 set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
678 else 679 else
679 pnv_ioda_setup_bus_dma(pe, pe->pbus); 680 pnv_ioda_setup_bus_dma(pe, pe->pbus);
680 681
681 return; 682 return;
682 fail: 683 fail:
683 /* XXX Failure: Try to fallback to 64-bit only ? */ 684 /* XXX Failure: Try to fallback to 64-bit only ? */
684 if (pe->tce32_seg >= 0) 685 if (pe->tce32_seg >= 0)
685 pe->tce32_seg = -1; 686 pe->tce32_seg = -1;
686 if (tce_mem) 687 if (tce_mem)
687 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); 688 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
688 } 689 }
689 690
690 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 691 static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
691 { 692 {
692 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, 693 struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
693 tce32_table); 694 tce32_table);
694 uint16_t window_id = (pe->pe_number << 1 ) + 1; 695 uint16_t window_id = (pe->pe_number << 1 ) + 1;
695 int64_t rc; 696 int64_t rc;
696 697
697 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); 698 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
698 if (enable) { 699 if (enable) {
699 phys_addr_t top = memblock_end_of_DRAM(); 700 phys_addr_t top = memblock_end_of_DRAM();
700 701
701 top = roundup_pow_of_two(top); 702 top = roundup_pow_of_two(top);
702 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 703 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
703 pe->pe_number, 704 pe->pe_number,
704 window_id, 705 window_id,
705 pe->tce_bypass_base, 706 pe->tce_bypass_base,
706 top); 707 top);
707 } else { 708 } else {
708 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 709 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
709 pe->pe_number, 710 pe->pe_number,
710 window_id, 711 window_id,
711 pe->tce_bypass_base, 712 pe->tce_bypass_base,
712 0); 713 0);
713 714
714 /* 715 /*
715 * We might want to reset the DMA ops of all devices on 716 * We might want to reset the DMA ops of all devices on
716 * this PE. However in theory, that shouldn't be necessary 717 * this PE. However in theory, that shouldn't be necessary
717 * as this is used for VFIO/KVM pass-through and the device 718 * as this is used for VFIO/KVM pass-through and the device
718 * hasn't yet been returned to its kernel driver 719 * hasn't yet been returned to its kernel driver
719 */ 720 */
720 } 721 }
721 if (rc) 722 if (rc)
722 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); 723 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
723 else 724 else
724 pe->tce_bypass_enabled = enable; 725 pe->tce_bypass_enabled = enable;
725 } 726 }
726 727
727 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, 728 static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
728 struct pnv_ioda_pe *pe) 729 struct pnv_ioda_pe *pe)
729 { 730 {
730 /* TVE #1 is selected by PCI address bit 59 */ 731 /* TVE #1 is selected by PCI address bit 59 */
731 pe->tce_bypass_base = 1ull << 59; 732 pe->tce_bypass_base = 1ull << 59;
732 733
733 /* Install set_bypass callback for VFIO */ 734 /* Install set_bypass callback for VFIO */
734 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass; 735 pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
735 736
736 /* Enable bypass by default */ 737 /* Enable bypass by default */
737 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true); 738 pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
738 } 739 }
739 740
740 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 741 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
741 struct pnv_ioda_pe *pe) 742 struct pnv_ioda_pe *pe)
742 { 743 {
743 struct page *tce_mem = NULL; 744 struct page *tce_mem = NULL;
744 void *addr; 745 void *addr;
745 const __be64 *swinvp; 746 const __be64 *swinvp;
746 struct iommu_table *tbl; 747 struct iommu_table *tbl;
747 unsigned int tce_table_size, end; 748 unsigned int tce_table_size, end;
748 int64_t rc; 749 int64_t rc;
749 750
750 /* We shouldn't already have a 32-bit DMA associated */ 751 /* We shouldn't already have a 32-bit DMA associated */
751 if (WARN_ON(pe->tce32_seg >= 0)) 752 if (WARN_ON(pe->tce32_seg >= 0))
752 return; 753 return;
753 754
754 /* The PE will reserve all possible 32-bits space */ 755 /* The PE will reserve all possible 32-bits space */
755 pe->tce32_seg = 0; 756 pe->tce32_seg = 0;
756 end = (1 << ilog2(phb->ioda.m32_pci_base)); 757 end = (1 << ilog2(phb->ioda.m32_pci_base));
757 tce_table_size = (end / 0x1000) * 8; 758 tce_table_size = (end / 0x1000) * 8;
758 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", 759 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
759 end); 760 end);
760 761
761 /* Allocate TCE table */ 762 /* Allocate TCE table */
762 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, 763 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
763 get_order(tce_table_size)); 764 get_order(tce_table_size));
764 if (!tce_mem) { 765 if (!tce_mem) {
765 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); 766 pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
766 goto fail; 767 goto fail;
767 } 768 }
768 addr = page_address(tce_mem); 769 addr = page_address(tce_mem);
769 memset(addr, 0, tce_table_size); 770 memset(addr, 0, tce_table_size);
770 771
771 /* 772 /*
772 * Map TCE table through TVT. The TVE index is the PE number 773 * Map TCE table through TVT. The TVE index is the PE number
773 * shifted by 1 bit for 32-bits DMA space. 774 * shifted by 1 bit for 32-bits DMA space.
774 */ 775 */
775 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 776 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
776 pe->pe_number << 1, 1, __pa(addr), 777 pe->pe_number << 1, 1, __pa(addr),
777 tce_table_size, 0x1000); 778 tce_table_size, 0x1000);
778 if (rc) { 779 if (rc) {
779 pe_err(pe, "Failed to configure 32-bit TCE table," 780 pe_err(pe, "Failed to configure 32-bit TCE table,"
780 " err %ld\n", rc); 781 " err %ld\n", rc);
781 goto fail; 782 goto fail;
782 } 783 }
783 784
784 /* Setup linux iommu table */ 785 /* Setup linux iommu table */
785 tbl = &pe->tce32_table; 786 tbl = &pe->tce32_table;
786 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); 787 pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
787 788
788 /* OPAL variant of PHB3 invalidated TCEs */ 789 /* OPAL variant of PHB3 invalidated TCEs */
789 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); 790 swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
790 if (swinvp) { 791 if (swinvp) {
791 /* We need a couple more fields -- an address and a data 792 /* We need a couple more fields -- an address and a data
792 * to or. Since the bus is only printed out on table free 793 * to or. Since the bus is only printed out on table free
793 * errors, and on the first pass the data will be a relative 794 * errors, and on the first pass the data will be a relative
794 * bus number, print that out instead. 795 * bus number, print that out instead.
795 */ 796 */
796 tbl->it_busno = 0; 797 tbl->it_busno = 0;
797 pe->tce_inval_reg_phys = be64_to_cpup(swinvp); 798 pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
798 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, 799 tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
799 8); 800 8);
800 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; 801 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
801 } 802 }
802 iommu_init_table(tbl, phb->hose->node); 803 iommu_init_table(tbl, phb->hose->node);
803 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); 804 iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
804 805
805 if (pe->pdev) 806 if (pe->pdev)
806 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 807 set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
807 else 808 else
808 pnv_ioda_setup_bus_dma(pe, pe->pbus); 809 pnv_ioda_setup_bus_dma(pe, pe->pbus);
809 810
810 /* Also create a bypass window */ 811 /* Also create a bypass window */
811 pnv_pci_ioda2_setup_bypass_pe(phb, pe); 812 pnv_pci_ioda2_setup_bypass_pe(phb, pe);
812 return; 813 return;
813 fail: 814 fail:
814 if (pe->tce32_seg >= 0) 815 if (pe->tce32_seg >= 0)
815 pe->tce32_seg = -1; 816 pe->tce32_seg = -1;
816 if (tce_mem) 817 if (tce_mem)
817 __free_pages(tce_mem, get_order(tce_table_size)); 818 __free_pages(tce_mem, get_order(tce_table_size));
818 } 819 }
819 820
820 static void pnv_ioda_setup_dma(struct pnv_phb *phb) 821 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
821 { 822 {
822 struct pci_controller *hose = phb->hose; 823 struct pci_controller *hose = phb->hose;
823 unsigned int residual, remaining, segs, tw, base; 824 unsigned int residual, remaining, segs, tw, base;
824 struct pnv_ioda_pe *pe; 825 struct pnv_ioda_pe *pe;
825 826
826 /* If we have more PE# than segments available, hand out one 827 /* If we have more PE# than segments available, hand out one
827 * per PE until we run out and let the rest fail. If not, 828 * per PE until we run out and let the rest fail. If not,
828 * then we assign at least one segment per PE, plus more based 829 * then we assign at least one segment per PE, plus more based
829 * on the amount of devices under that PE 830 * on the amount of devices under that PE
830 */ 831 */
831 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) 832 if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
832 residual = 0; 833 residual = 0;
833 else 834 else
834 residual = phb->ioda.tce32_count - 835 residual = phb->ioda.tce32_count -
835 phb->ioda.dma_pe_count; 836 phb->ioda.dma_pe_count;
836 837
837 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", 838 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
838 hose->global_number, phb->ioda.tce32_count); 839 hose->global_number, phb->ioda.tce32_count);
839 pr_info("PCI: %d PE# for a total weight of %d\n", 840 pr_info("PCI: %d PE# for a total weight of %d\n",
840 phb->ioda.dma_pe_count, phb->ioda.dma_weight); 841 phb->ioda.dma_pe_count, phb->ioda.dma_weight);
841 842
842 /* Walk our PE list and configure their DMA segments, hand them 843 /* Walk our PE list and configure their DMA segments, hand them
843 * out one base segment plus any residual segments based on 844 * out one base segment plus any residual segments based on
844 * weight 845 * weight
845 */ 846 */
846 remaining = phb->ioda.tce32_count; 847 remaining = phb->ioda.tce32_count;
847 tw = phb->ioda.dma_weight; 848 tw = phb->ioda.dma_weight;
848 base = 0; 849 base = 0;
849 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { 850 list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
850 if (!pe->dma_weight) 851 if (!pe->dma_weight)
851 continue; 852 continue;
852 if (!remaining) { 853 if (!remaining) {
853 pe_warn(pe, "No DMA32 resources available\n"); 854 pe_warn(pe, "No DMA32 resources available\n");
854 continue; 855 continue;
855 } 856 }
856 segs = 1; 857 segs = 1;
857 if (residual) { 858 if (residual) {
858 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; 859 segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
859 if (segs > remaining) 860 if (segs > remaining)
860 segs = remaining; 861 segs = remaining;
861 } 862 }
862 863
863 /* 864 /*
864 * For IODA2 compliant PHB3, we needn't care about the weight. 865 * For IODA2 compliant PHB3, we needn't care about the weight.
865 * The all available 32-bits DMA space will be assigned to 866 * The all available 32-bits DMA space will be assigned to
866 * the specific PE. 867 * the specific PE.
867 */ 868 */
868 if (phb->type == PNV_PHB_IODA1) { 869 if (phb->type == PNV_PHB_IODA1) {
869 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", 870 pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
870 pe->dma_weight, segs); 871 pe->dma_weight, segs);
871 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); 872 pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
872 } else { 873 } else {
873 pe_info(pe, "Assign DMA32 space\n"); 874 pe_info(pe, "Assign DMA32 space\n");
874 segs = 0; 875 segs = 0;
875 pnv_pci_ioda2_setup_dma_pe(phb, pe); 876 pnv_pci_ioda2_setup_dma_pe(phb, pe);
876 } 877 }
877 878
878 remaining -= segs; 879 remaining -= segs;
879 base += segs; 880 base += segs;
880 } 881 }
881 } 882 }
882 883
883 #ifdef CONFIG_PCI_MSI 884 #ifdef CONFIG_PCI_MSI
884 static void pnv_ioda2_msi_eoi(struct irq_data *d) 885 static void pnv_ioda2_msi_eoi(struct irq_data *d)
885 { 886 {
886 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 887 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
887 struct irq_chip *chip = irq_data_get_irq_chip(d); 888 struct irq_chip *chip = irq_data_get_irq_chip(d);
888 struct pnv_phb *phb = container_of(chip, struct pnv_phb, 889 struct pnv_phb *phb = container_of(chip, struct pnv_phb,
889 ioda.irq_chip); 890 ioda.irq_chip);
890 int64_t rc; 891 int64_t rc;
891 892
892 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); 893 rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
893 WARN_ON_ONCE(rc); 894 WARN_ON_ONCE(rc);
894 895
895 icp_native_eoi(d); 896 icp_native_eoi(d);
896 } 897 }
897 898
898 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, 899 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
899 unsigned int hwirq, unsigned int virq, 900 unsigned int hwirq, unsigned int virq,
900 unsigned int is_64, struct msi_msg *msg) 901 unsigned int is_64, struct msi_msg *msg)
901 { 902 {
902 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); 903 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
903 struct pci_dn *pdn = pci_get_pdn(dev); 904 struct pci_dn *pdn = pci_get_pdn(dev);
904 struct irq_data *idata; 905 struct irq_data *idata;
905 struct irq_chip *ichip; 906 struct irq_chip *ichip;
906 unsigned int xive_num = hwirq - phb->msi_base; 907 unsigned int xive_num = hwirq - phb->msi_base;
907 __be32 data; 908 __be32 data;
908 int rc; 909 int rc;
909 910
910 /* No PE assigned ? bail out ... no MSI for you ! */ 911 /* No PE assigned ? bail out ... no MSI for you ! */
911 if (pe == NULL) 912 if (pe == NULL)
912 return -ENXIO; 913 return -ENXIO;
913 914
914 /* Check if we have an MVE */ 915 /* Check if we have an MVE */
915 if (pe->mve_number < 0) 916 if (pe->mve_number < 0)
916 return -ENXIO; 917 return -ENXIO;
917 918
918 /* Force 32-bit MSI on some broken devices */ 919 /* Force 32-bit MSI on some broken devices */
919 if (pdn && pdn->force_32bit_msi) 920 if (pdn && pdn->force_32bit_msi)
920 is_64 = 0; 921 is_64 = 0;
921 922
922 /* Assign XIVE to PE */ 923 /* Assign XIVE to PE */
923 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); 924 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
924 if (rc) { 925 if (rc) {
925 pr_warn("%s: OPAL error %d setting XIVE %d PE\n", 926 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
926 pci_name(dev), rc, xive_num); 927 pci_name(dev), rc, xive_num);
927 return -EIO; 928 return -EIO;
928 } 929 }
929 930
930 if (is_64) { 931 if (is_64) {
931 __be64 addr64; 932 __be64 addr64;
932 933
933 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, 934 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
934 &addr64, &data); 935 &addr64, &data);
935 if (rc) { 936 if (rc) {
936 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", 937 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
937 pci_name(dev), rc); 938 pci_name(dev), rc);
938 return -EIO; 939 return -EIO;
939 } 940 }
940 msg->address_hi = be64_to_cpu(addr64) >> 32; 941 msg->address_hi = be64_to_cpu(addr64) >> 32;
941 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; 942 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
942 } else { 943 } else {
943 __be32 addr32; 944 __be32 addr32;
944 945
945 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, 946 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
946 &addr32, &data); 947 &addr32, &data);
947 if (rc) { 948 if (rc) {
948 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", 949 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
949 pci_name(dev), rc); 950 pci_name(dev), rc);
950 return -EIO; 951 return -EIO;
951 } 952 }
952 msg->address_hi = 0; 953 msg->address_hi = 0;
953 msg->address_lo = be32_to_cpu(addr32); 954 msg->address_lo = be32_to_cpu(addr32);
954 } 955 }
955 msg->data = be32_to_cpu(data); 956 msg->data = be32_to_cpu(data);
956 957
957 /* 958 /*
958 * Change the IRQ chip for the MSI interrupts on PHB3. 959 * Change the IRQ chip for the MSI interrupts on PHB3.
959 * The corresponding IRQ chip should be populated for 960 * The corresponding IRQ chip should be populated for
960 * the first time. 961 * the first time.
961 */ 962 */
962 if (phb->type == PNV_PHB_IODA2) { 963 if (phb->type == PNV_PHB_IODA2) {
963 if (!phb->ioda.irq_chip_init) { 964 if (!phb->ioda.irq_chip_init) {
964 idata = irq_get_irq_data(virq); 965 idata = irq_get_irq_data(virq);
965 ichip = irq_data_get_irq_chip(idata); 966 ichip = irq_data_get_irq_chip(idata);
966 phb->ioda.irq_chip_init = 1; 967 phb->ioda.irq_chip_init = 1;
967 phb->ioda.irq_chip = *ichip; 968 phb->ioda.irq_chip = *ichip;
968 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; 969 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
969 } 970 }
970 971
971 irq_set_chip(virq, &phb->ioda.irq_chip); 972 irq_set_chip(virq, &phb->ioda.irq_chip);
972 } 973 }
973 974
974 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," 975 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
975 " address=%x_%08x data=%x PE# %d\n", 976 " address=%x_%08x data=%x PE# %d\n",
976 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, 977 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
977 msg->address_hi, msg->address_lo, data, pe->pe_number); 978 msg->address_hi, msg->address_lo, data, pe->pe_number);
978 979
979 return 0; 980 return 0;
980 } 981 }
981 982
982 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 983 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
983 { 984 {
984 unsigned int count; 985 unsigned int count;
985 const __be32 *prop = of_get_property(phb->hose->dn, 986 const __be32 *prop = of_get_property(phb->hose->dn,
986 "ibm,opal-msi-ranges", NULL); 987 "ibm,opal-msi-ranges", NULL);
987 if (!prop) { 988 if (!prop) {
988 /* BML Fallback */ 989 /* BML Fallback */
989 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); 990 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
990 } 991 }
991 if (!prop) 992 if (!prop)
992 return; 993 return;
993 994
994 phb->msi_base = be32_to_cpup(prop); 995 phb->msi_base = be32_to_cpup(prop);
995 count = be32_to_cpup(prop + 1); 996 count = be32_to_cpup(prop + 1);
996 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { 997 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
997 pr_err("PCI %d: Failed to allocate MSI bitmap !\n", 998 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
998 phb->hose->global_number); 999 phb->hose->global_number);
999 return; 1000 return;
1000 } 1001 }
1001 1002
1002 phb->msi_setup = pnv_pci_ioda_msi_setup; 1003 phb->msi_setup = pnv_pci_ioda_msi_setup;
1003 phb->msi32_support = 1; 1004 phb->msi32_support = 1;
1004 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", 1005 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
1005 count, phb->msi_base); 1006 count, phb->msi_base);
1006 } 1007 }
1007 #else 1008 #else
1008 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } 1009 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
1009 #endif /* CONFIG_PCI_MSI */ 1010 #endif /* CONFIG_PCI_MSI */
1010 1011
1011 /* 1012 /*
1012 * This function is supposed to be called on basis of PE from top 1013 * This function is supposed to be called on basis of PE from top
1013 * to bottom style. So the the I/O or MMIO segment assigned to 1014 * to bottom style. So the the I/O or MMIO segment assigned to
1014 * parent PE could be overrided by its child PEs if necessary. 1015 * parent PE could be overrided by its child PEs if necessary.
1015 */ 1016 */
1016 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, 1017 static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
1017 struct pnv_ioda_pe *pe) 1018 struct pnv_ioda_pe *pe)
1018 { 1019 {
1019 struct pnv_phb *phb = hose->private_data; 1020 struct pnv_phb *phb = hose->private_data;
1020 struct pci_bus_region region; 1021 struct pci_bus_region region;
1021 struct resource *res; 1022 struct resource *res;
1022 int i, index; 1023 int i, index;
1023 int rc; 1024 int rc;
1024 1025
1025 /* 1026 /*
1026 * NOTE: We only care PCI bus based PE for now. For PCI 1027 * NOTE: We only care PCI bus based PE for now. For PCI
1027 * device based PE, for example SRIOV sensitive VF should 1028 * device based PE, for example SRIOV sensitive VF should
1028 * be figured out later. 1029 * be figured out later.
1029 */ 1030 */
1030 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); 1031 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
1031 1032
1032 pci_bus_for_each_resource(pe->pbus, res, i) { 1033 pci_bus_for_each_resource(pe->pbus, res, i) {
1033 if (!res || !res->flags || 1034 if (!res || !res->flags ||
1034 res->start > res->end) 1035 res->start > res->end)
1035 continue; 1036 continue;
1036 1037
1037 if (res->flags & IORESOURCE_IO) { 1038 if (res->flags & IORESOURCE_IO) {
1038 region.start = res->start - phb->ioda.io_pci_base; 1039 region.start = res->start - phb->ioda.io_pci_base;
1039 region.end = res->end - phb->ioda.io_pci_base; 1040 region.end = res->end - phb->ioda.io_pci_base;
1040 index = region.start / phb->ioda.io_segsize; 1041 index = region.start / phb->ioda.io_segsize;
1041 1042
1042 while (index < phb->ioda.total_pe && 1043 while (index < phb->ioda.total_pe &&
1043 region.start <= region.end) { 1044 region.start <= region.end) {
1044 phb->ioda.io_segmap[index] = pe->pe_number; 1045 phb->ioda.io_segmap[index] = pe->pe_number;
1045 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1046 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
1046 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); 1047 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
1047 if (rc != OPAL_SUCCESS) { 1048 if (rc != OPAL_SUCCESS) {
1048 pr_err("%s: OPAL error %d when mapping IO " 1049 pr_err("%s: OPAL error %d when mapping IO "
1049 "segment #%d to PE#%d\n", 1050 "segment #%d to PE#%d\n",
1050 __func__, rc, index, pe->pe_number); 1051 __func__, rc, index, pe->pe_number);
1051 break; 1052 break;
1052 } 1053 }
1053 1054
1054 region.start += phb->ioda.io_segsize; 1055 region.start += phb->ioda.io_segsize;
1055 index++; 1056 index++;
1056 } 1057 }
1057 } else if (res->flags & IORESOURCE_MEM) { 1058 } else if (res->flags & IORESOURCE_MEM) {
1058 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to 1059 /* WARNING: Assumes M32 is mem region 0 in PHB. We need to
1059 * harden that algorithm when we start supporting M64 1060 * harden that algorithm when we start supporting M64
1060 */ 1061 */
1061 region.start = res->start - 1062 region.start = res->start -
1062 hose->mem_offset[0] - 1063 hose->mem_offset[0] -
1063 phb->ioda.m32_pci_base; 1064 phb->ioda.m32_pci_base;
1064 region.end = res->end - 1065 region.end = res->end -
1065 hose->mem_offset[0] - 1066 hose->mem_offset[0] -
1066 phb->ioda.m32_pci_base; 1067 phb->ioda.m32_pci_base;
1067 index = region.start / phb->ioda.m32_segsize; 1068 index = region.start / phb->ioda.m32_segsize;
1068 1069
1069 while (index < phb->ioda.total_pe && 1070 while (index < phb->ioda.total_pe &&
1070 region.start <= region.end) { 1071 region.start <= region.end) {
1071 phb->ioda.m32_segmap[index] = pe->pe_number; 1072 phb->ioda.m32_segmap[index] = pe->pe_number;
1072 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 1073 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
1073 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); 1074 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
1074 if (rc != OPAL_SUCCESS) { 1075 if (rc != OPAL_SUCCESS) {
1075 pr_err("%s: OPAL error %d when mapping M32 " 1076 pr_err("%s: OPAL error %d when mapping M32 "
1076 "segment#%d to PE#%d", 1077 "segment#%d to PE#%d",
1077 __func__, rc, index, pe->pe_number); 1078 __func__, rc, index, pe->pe_number);
1078 break; 1079 break;
1079 } 1080 }
1080 1081
1081 region.start += phb->ioda.m32_segsize; 1082 region.start += phb->ioda.m32_segsize;
1082 index++; 1083 index++;
1083 } 1084 }
1084 } 1085 }
1085 } 1086 }
1086 } 1087 }
1087 1088
1088 static void pnv_pci_ioda_setup_seg(void) 1089 static void pnv_pci_ioda_setup_seg(void)
1089 { 1090 {
1090 struct pci_controller *tmp, *hose; 1091 struct pci_controller *tmp, *hose;
1091 struct pnv_phb *phb; 1092 struct pnv_phb *phb;
1092 struct pnv_ioda_pe *pe; 1093 struct pnv_ioda_pe *pe;
1093 1094
1094 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1095 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1095 phb = hose->private_data; 1096 phb = hose->private_data;
1096 list_for_each_entry(pe, &phb->ioda.pe_list, list) { 1097 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
1097 pnv_ioda_setup_pe_seg(hose, pe); 1098 pnv_ioda_setup_pe_seg(hose, pe);
1098 } 1099 }
1099 } 1100 }
1100 } 1101 }
1101 1102
1102 static void pnv_pci_ioda_setup_DMA(void) 1103 static void pnv_pci_ioda_setup_DMA(void)
1103 { 1104 {
1104 struct pci_controller *hose, *tmp; 1105 struct pci_controller *hose, *tmp;
1105 struct pnv_phb *phb; 1106 struct pnv_phb *phb;
1106 1107
1107 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1108 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1108 pnv_ioda_setup_dma(hose->private_data); 1109 pnv_ioda_setup_dma(hose->private_data);
1109 1110
1110 /* Mark the PHB initialization done */ 1111 /* Mark the PHB initialization done */
1111 phb = hose->private_data; 1112 phb = hose->private_data;
1112 phb->initialized = 1; 1113 phb->initialized = 1;
1113 } 1114 }
1114 } 1115 }
1115 1116
1116 static void pnv_pci_ioda_create_dbgfs(void) 1117 static void pnv_pci_ioda_create_dbgfs(void)
1117 { 1118 {
1118 #ifdef CONFIG_DEBUG_FS 1119 #ifdef CONFIG_DEBUG_FS
1119 struct pci_controller *hose, *tmp; 1120 struct pci_controller *hose, *tmp;
1120 struct pnv_phb *phb; 1121 struct pnv_phb *phb;
1121 char name[16]; 1122 char name[16];
1122 1123
1123 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1124 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1124 phb = hose->private_data; 1125 phb = hose->private_data;
1125 1126
1126 sprintf(name, "PCI%04x", hose->global_number); 1127 sprintf(name, "PCI%04x", hose->global_number);
1127 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); 1128 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
1128 if (!phb->dbgfs) 1129 if (!phb->dbgfs)
1129 pr_warning("%s: Error on creating debugfs on PHB#%x\n", 1130 pr_warning("%s: Error on creating debugfs on PHB#%x\n",
1130 __func__, hose->global_number); 1131 __func__, hose->global_number);
1131 } 1132 }
1132 #endif /* CONFIG_DEBUG_FS */ 1133 #endif /* CONFIG_DEBUG_FS */
1133 } 1134 }
1134 1135
1135 static void pnv_pci_ioda_fixup(void) 1136 static void pnv_pci_ioda_fixup(void)
1136 { 1137 {
1137 pnv_pci_ioda_setup_PEs(); 1138 pnv_pci_ioda_setup_PEs();
1138 pnv_pci_ioda_setup_seg(); 1139 pnv_pci_ioda_setup_seg();
1139 pnv_pci_ioda_setup_DMA(); 1140 pnv_pci_ioda_setup_DMA();
1140 1141
1141 pnv_pci_ioda_create_dbgfs(); 1142 pnv_pci_ioda_create_dbgfs();
1142 1143
1143 #ifdef CONFIG_EEH 1144 #ifdef CONFIG_EEH
1144 eeh_probe_mode_set(EEH_PROBE_MODE_DEV); 1145 eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
1145 eeh_addr_cache_build(); 1146 eeh_addr_cache_build();
1146 eeh_init(); 1147 eeh_init();
1147 #endif 1148 #endif
1148 } 1149 }
1149 1150
1150 /* 1151 /*
1151 * Returns the alignment for I/O or memory windows for P2P 1152 * Returns the alignment for I/O or memory windows for P2P
1152 * bridges. That actually depends on how PEs are segmented. 1153 * bridges. That actually depends on how PEs are segmented.
1153 * For now, we return I/O or M32 segment size for PE sensitive 1154 * For now, we return I/O or M32 segment size for PE sensitive
1154 * P2P bridges. Otherwise, the default values (4KiB for I/O, 1155 * P2P bridges. Otherwise, the default values (4KiB for I/O,
1155 * 1MiB for memory) will be returned. 1156 * 1MiB for memory) will be returned.
1156 * 1157 *
1157 * The current PCI bus might be put into one PE, which was 1158 * The current PCI bus might be put into one PE, which was
1158 * create against the parent PCI bridge. For that case, we 1159 * create against the parent PCI bridge. For that case, we
1159 * needn't enlarge the alignment so that we can save some 1160 * needn't enlarge the alignment so that we can save some
1160 * resources. 1161 * resources.
1161 */ 1162 */
1162 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, 1163 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
1163 unsigned long type) 1164 unsigned long type)
1164 { 1165 {
1165 struct pci_dev *bridge; 1166 struct pci_dev *bridge;
1166 struct pci_controller *hose = pci_bus_to_host(bus); 1167 struct pci_controller *hose = pci_bus_to_host(bus);
1167 struct pnv_phb *phb = hose->private_data; 1168 struct pnv_phb *phb = hose->private_data;
1168 int num_pci_bridges = 0; 1169 int num_pci_bridges = 0;
1169 1170
1170 bridge = bus->self; 1171 bridge = bus->self;
1171 while (bridge) { 1172 while (bridge) {
1172 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { 1173 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
1173 num_pci_bridges++; 1174 num_pci_bridges++;
1174 if (num_pci_bridges >= 2) 1175 if (num_pci_bridges >= 2)
1175 return 1; 1176 return 1;
1176 } 1177 }
1177 1178
1178 bridge = bridge->bus->self; 1179 bridge = bridge->bus->self;
1179 } 1180 }
1180 1181
1181 /* We need support prefetchable memory window later */ 1182 /* We need support prefetchable memory window later */
1182 if (type & IORESOURCE_MEM) 1183 if (type & IORESOURCE_MEM)
1183 return phb->ioda.m32_segsize; 1184 return phb->ioda.m32_segsize;
1184 1185
1185 return phb->ioda.io_segsize; 1186 return phb->ioda.io_segsize;
1186 } 1187 }
1187 1188
1188 /* Prevent enabling devices for which we couldn't properly 1189 /* Prevent enabling devices for which we couldn't properly
1189 * assign a PE 1190 * assign a PE
1190 */ 1191 */
1191 static int pnv_pci_enable_device_hook(struct pci_dev *dev) 1192 static int pnv_pci_enable_device_hook(struct pci_dev *dev)
1192 { 1193 {
1193 struct pci_controller *hose = pci_bus_to_host(dev->bus); 1194 struct pci_controller *hose = pci_bus_to_host(dev->bus);
1194 struct pnv_phb *phb = hose->private_data; 1195 struct pnv_phb *phb = hose->private_data;
1195 struct pci_dn *pdn; 1196 struct pci_dn *pdn;
1196 1197
1197 /* The function is probably called while the PEs have 1198 /* The function is probably called while the PEs have
1198 * not be created yet. For example, resource reassignment 1199 * not be created yet. For example, resource reassignment
1199 * during PCI probe period. We just skip the check if 1200 * during PCI probe period. We just skip the check if
1200 * PEs isn't ready. 1201 * PEs isn't ready.
1201 */ 1202 */
1202 if (!phb->initialized) 1203 if (!phb->initialized)
1203 return 0; 1204 return 0;
1204 1205
1205 pdn = pci_get_pdn(dev); 1206 pdn = pci_get_pdn(dev);
1206 if (!pdn || pdn->pe_number == IODA_INVALID_PE) 1207 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1207 return -EINVAL; 1208 return -EINVAL;
1208 1209
1209 return 0; 1210 return 0;
1210 } 1211 }
1211 1212
1212 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, 1213 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1213 u32 devfn) 1214 u32 devfn)
1214 { 1215 {
1215 return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; 1216 return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1216 } 1217 }
1217 1218
1218 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) 1219 static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
1219 { 1220 {
1220 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, 1221 opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
1221 OPAL_ASSERT_RESET); 1222 OPAL_ASSERT_RESET);
1222 } 1223 }
1223 1224
1224 void __init pnv_pci_init_ioda_phb(struct device_node *np, 1225 void __init pnv_pci_init_ioda_phb(struct device_node *np,
1225 u64 hub_id, int ioda_type) 1226 u64 hub_id, int ioda_type)
1226 { 1227 {
1227 struct pci_controller *hose; 1228 struct pci_controller *hose;
1228 struct pnv_phb *phb; 1229 struct pnv_phb *phb;
1229 unsigned long size, m32map_off, pemap_off, iomap_off = 0; 1230 unsigned long size, m32map_off, pemap_off, iomap_off = 0;
1230 const __be64 *prop64; 1231 const __be64 *prop64;
1231 const __be32 *prop32; 1232 const __be32 *prop32;
1232 int len; 1233 int len;
1233 u64 phb_id; 1234 u64 phb_id;
1234 void *aux; 1235 void *aux;
1235 long rc; 1236 long rc;
1236 1237
1237 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); 1238 pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
1238 1239
1239 prop64 = of_get_property(np, "ibm,opal-phbid", NULL); 1240 prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1240 if (!prop64) { 1241 if (!prop64) {
1241 pr_err(" Missing \"ibm,opal-phbid\" property !\n"); 1242 pr_err(" Missing \"ibm,opal-phbid\" property !\n");
1242 return; 1243 return;
1243 } 1244 }
1244 phb_id = be64_to_cpup(prop64); 1245 phb_id = be64_to_cpup(prop64);
1245 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 1246 pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
1246 1247
1247 phb = alloc_bootmem(sizeof(struct pnv_phb)); 1248 phb = alloc_bootmem(sizeof(struct pnv_phb));
1248 if (!phb) { 1249 if (!phb) {
1249 pr_err(" Out of memory !\n"); 1250 pr_err(" Out of memory !\n");
1250 return; 1251 return;
1251 } 1252 }
1252 1253
1253 /* Allocate PCI controller */ 1254 /* Allocate PCI controller */
1254 memset(phb, 0, sizeof(struct pnv_phb)); 1255 memset(phb, 0, sizeof(struct pnv_phb));
1255 phb->hose = hose = pcibios_alloc_controller(np); 1256 phb->hose = hose = pcibios_alloc_controller(np);
1256 if (!phb->hose) { 1257 if (!phb->hose) {
1257 pr_err(" Can't allocate PCI controller for %s\n", 1258 pr_err(" Can't allocate PCI controller for %s\n",
1258 np->full_name); 1259 np->full_name);
1259 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); 1260 free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
1260 return; 1261 return;
1261 } 1262 }
1262 1263
1263 spin_lock_init(&phb->lock); 1264 spin_lock_init(&phb->lock);
1264 prop32 = of_get_property(np, "bus-range", &len); 1265 prop32 = of_get_property(np, "bus-range", &len);
1265 if (prop32 && len == 8) { 1266 if (prop32 && len == 8) {
1266 hose->first_busno = be32_to_cpu(prop32[0]); 1267 hose->first_busno = be32_to_cpu(prop32[0]);
1267 hose->last_busno = be32_to_cpu(prop32[1]); 1268 hose->last_busno = be32_to_cpu(prop32[1]);
1268 } else { 1269 } else {
1269 pr_warn(" Broken <bus-range> on %s\n", np->full_name); 1270 pr_warn(" Broken <bus-range> on %s\n", np->full_name);
1270 hose->first_busno = 0; 1271 hose->first_busno = 0;
1271 hose->last_busno = 0xff; 1272 hose->last_busno = 0xff;
1272 } 1273 }
1273 hose->private_data = phb; 1274 hose->private_data = phb;
1274 phb->hub_id = hub_id; 1275 phb->hub_id = hub_id;
1275 phb->opal_id = phb_id; 1276 phb->opal_id = phb_id;
1276 phb->type = ioda_type; 1277 phb->type = ioda_type;
1277 1278
1278 /* Detect specific models for error handling */ 1279 /* Detect specific models for error handling */
1279 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) 1280 if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
1280 phb->model = PNV_PHB_MODEL_P7IOC; 1281 phb->model = PNV_PHB_MODEL_P7IOC;
1281 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 1282 else if (of_device_is_compatible(np, "ibm,power8-pciex"))
1282 phb->model = PNV_PHB_MODEL_PHB3; 1283 phb->model = PNV_PHB_MODEL_PHB3;
1283 else 1284 else
1284 phb->model = PNV_PHB_MODEL_UNKNOWN; 1285 phb->model = PNV_PHB_MODEL_UNKNOWN;
1285 1286
1286 /* Parse 32-bit and IO ranges (if any) */ 1287 /* Parse 32-bit and IO ranges (if any) */
1287 pci_process_bridge_OF_ranges(hose, np, !hose->global_number); 1288 pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
1288 1289
1289 /* Get registers */ 1290 /* Get registers */
1290 phb->regs = of_iomap(np, 0); 1291 phb->regs = of_iomap(np, 0);
1291 if (phb->regs == NULL) 1292 if (phb->regs == NULL)
1292 pr_err(" Failed to map registers !\n"); 1293 pr_err(" Failed to map registers !\n");
1293 1294
1294 /* Initialize more IODA stuff */ 1295 /* Initialize more IODA stuff */
1295 phb->ioda.total_pe = 1; 1296 phb->ioda.total_pe = 1;
1296 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); 1297 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
1297 if (prop32) 1298 if (prop32)
1298 phb->ioda.total_pe = be32_to_cpup(prop32); 1299 phb->ioda.total_pe = be32_to_cpup(prop32);
1299 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); 1300 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
1300 if (prop32) 1301 if (prop32)
1301 phb->ioda.reserved_pe = be32_to_cpup(prop32); 1302 phb->ioda.reserved_pe = be32_to_cpup(prop32);
1302 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); 1303 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1303 /* FW Has already off top 64k of M32 space (MSI space) */ 1304 /* FW Has already off top 64k of M32 space (MSI space) */
1304 phb->ioda.m32_size += 0x10000; 1305 phb->ioda.m32_size += 0x10000;
1305 1306
1306 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe; 1307 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1307 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; 1308 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
1308 phb->ioda.io_size = hose->pci_io_size; 1309 phb->ioda.io_size = hose->pci_io_size;
1309 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; 1310 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1310 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ 1311 phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1311 1312
1312 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ 1313 /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
1313 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); 1314 size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1314 m32map_off = size; 1315 m32map_off = size;
1315 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); 1316 size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
1316 if (phb->type == PNV_PHB_IODA1) { 1317 if (phb->type == PNV_PHB_IODA1) {
1317 iomap_off = size; 1318 iomap_off = size;
1318 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); 1319 size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
1319 } 1320 }
1320 pemap_off = size; 1321 pemap_off = size;
1321 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); 1322 size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1322 aux = alloc_bootmem(size); 1323 aux = alloc_bootmem(size);
1323 memset(aux, 0, size); 1324 memset(aux, 0, size);
1324 phb->ioda.pe_alloc = aux; 1325 phb->ioda.pe_alloc = aux;
1325 phb->ioda.m32_segmap = aux + m32map_off; 1326 phb->ioda.m32_segmap = aux + m32map_off;
1326 if (phb->type == PNV_PHB_IODA1) 1327 if (phb->type == PNV_PHB_IODA1)
1327 phb->ioda.io_segmap = aux + iomap_off; 1328 phb->ioda.io_segmap = aux + iomap_off;
1328 phb->ioda.pe_array = aux + pemap_off; 1329 phb->ioda.pe_array = aux + pemap_off;
1329 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); 1330 set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
1330 1331
1331 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 1332 INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
1332 INIT_LIST_HEAD(&phb->ioda.pe_list); 1333 INIT_LIST_HEAD(&phb->ioda.pe_list);
1333 1334
1334 /* Calculate how many 32-bit TCE segments we have */ 1335 /* Calculate how many 32-bit TCE segments we have */
1335 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; 1336 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1336 1337
1337 /* Clear unusable m64 */ 1338 /* Clear unusable m64 */
1338 hose->mem_resources[1].flags = 0; 1339 hose->mem_resources[1].flags = 0;
1339 hose->mem_resources[1].start = 0; 1340 hose->mem_resources[1].start = 0;
1340 hose->mem_resources[1].end = 0; 1341 hose->mem_resources[1].end = 0;
1341 hose->mem_resources[2].flags = 0; 1342 hose->mem_resources[2].flags = 0;
1342 hose->mem_resources[2].start = 0; 1343 hose->mem_resources[2].start = 0;
1343 hose->mem_resources[2].end = 0; 1344 hose->mem_resources[2].end = 0;
1344 1345
1345 #if 0 /* We should really do that ... */ 1346 #if 0 /* We should really do that ... */
1346 rc = opal_pci_set_phb_mem_window(opal->phb_id, 1347 rc = opal_pci_set_phb_mem_window(opal->phb_id,
1347 window_type, 1348 window_type,
1348 window_num, 1349 window_num,
1349 starting_real_address, 1350 starting_real_address,
1350 starting_pci_address, 1351 starting_pci_address,
1351 segment_size); 1352 segment_size);
1352 #endif 1353 #endif
1353 1354
1354 pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" 1355 pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]"
1355 " IO: 0x%x [segment=0x%x]\n", 1356 " IO: 0x%x [segment=0x%x]\n",
1356 phb->ioda.total_pe, 1357 phb->ioda.total_pe,
1357 phb->ioda.reserved_pe, 1358 phb->ioda.reserved_pe,
1358 phb->ioda.m32_size, phb->ioda.m32_segsize, 1359 phb->ioda.m32_size, phb->ioda.m32_segsize,
1359 phb->ioda.io_size, phb->ioda.io_segsize); 1360 phb->ioda.io_size, phb->ioda.io_segsize);
1360 1361
1361 phb->hose->ops = &pnv_pci_ops; 1362 phb->hose->ops = &pnv_pci_ops;
1362 #ifdef CONFIG_EEH 1363 #ifdef CONFIG_EEH
1363 phb->eeh_ops = &ioda_eeh_ops; 1364 phb->eeh_ops = &ioda_eeh_ops;
1364 #endif 1365 #endif
1365 1366
1366 /* Setup RID -> PE mapping function */ 1367 /* Setup RID -> PE mapping function */
1367 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; 1368 phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1368 1369
1369 /* Setup TCEs */ 1370 /* Setup TCEs */
1370 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; 1371 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1371 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; 1372 phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
1372 1373
1373 /* Setup shutdown function for kexec */ 1374 /* Setup shutdown function for kexec */
1374 phb->shutdown = pnv_pci_ioda_shutdown; 1375 phb->shutdown = pnv_pci_ioda_shutdown;
1375 1376
1376 /* Setup MSI support */ 1377 /* Setup MSI support */
1377 pnv_pci_init_ioda_msis(phb); 1378 pnv_pci_init_ioda_msis(phb);
1378 1379
1379 /* 1380 /*
1380 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here 1381 * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
1381 * to let the PCI core do resource assignment. It's supposed 1382 * to let the PCI core do resource assignment. It's supposed
1382 * that the PCI core will do correct I/O and MMIO alignment 1383 * that the PCI core will do correct I/O and MMIO alignment
1383 * for the P2P bridge bars so that each PCI bus (excluding 1384 * for the P2P bridge bars so that each PCI bus (excluding
1384 * the child P2P bridges) can form individual PE. 1385 * the child P2P bridges) can form individual PE.
1385 */ 1386 */
1386 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 1387 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
1387 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; 1388 ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1388 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; 1389 ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
1389 ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus; 1390 ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
1390 pci_add_flags(PCI_REASSIGN_ALL_RSRC); 1391 pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1391 1392
1392 /* Reset IODA tables to a clean state */ 1393 /* Reset IODA tables to a clean state */
1393 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); 1394 rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1394 if (rc) 1395 if (rc)
1395 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); 1396 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
1397
1398 /* If we're running in kdump kerenl, the previous kerenl never
1399 * shutdown PCI devices correctly. We already got IODA table
1400 * cleaned out. So we have to issue PHB reset to stop all PCI
1401 * transactions from previous kerenl.
1402 */
1403 if (is_kdump_kernel()) {
1404 pr_info(" Issue PHB reset ...\n");
1405 ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
1406 ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
1407 }
1396 } 1408 }
1397 1409
1398 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 1410 void __init pnv_pci_init_ioda2_phb(struct device_node *np)
1399 { 1411 {
1400 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 1412 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
1401 } 1413 }
1402 1414
1403 void __init pnv_pci_init_ioda_hub(struct device_node *np) 1415 void __init pnv_pci_init_ioda_hub(struct device_node *np)
1404 { 1416 {
1405 struct device_node *phbn; 1417 struct device_node *phbn;
1406 const __be64 *prop64; 1418 const __be64 *prop64;
1407 u64 hub_id; 1419 u64 hub_id;
1408 1420
1409 pr_info("Probing IODA IO-Hub %s\n", np->full_name); 1421 pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1410 1422
1411 prop64 = of_get_property(np, "ibm,opal-hubid", NULL); 1423 prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1412 if (!prop64) { 1424 if (!prop64) {
1413 pr_err(" Missing \"ibm,opal-hubid\" property !\n"); 1425 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1414 return; 1426 return;
1415 } 1427 }
1416 hub_id = be64_to_cpup(prop64); 1428 hub_id = be64_to_cpup(prop64);
1417 pr_devel(" HUB-ID : 0x%016llx\n", hub_id); 1429 pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1418 1430
1419 /* Count child PHBs */ 1431 /* Count child PHBs */
1420 for_each_child_of_node(np, phbn) { 1432 for_each_child_of_node(np, phbn) {
1421 /* Look for IODA1 PHBs */ 1433 /* Look for IODA1 PHBs */
1422 if (of_device_is_compatible(phbn, "ibm,ioda-phb")) 1434 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1423 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); 1435 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
1424 } 1436 }
1425 } 1437 }
1426 1438
arch/powerpc/platforms/powernv/pci.h
1 #ifndef __POWERNV_PCI_H 1 #ifndef __POWERNV_PCI_H
2 #define __POWERNV_PCI_H 2 #define __POWERNV_PCI_H
3 3
4 struct pci_dn; 4 struct pci_dn;
5 5
6 enum pnv_phb_type { 6 enum pnv_phb_type {
7 PNV_PHB_P5IOC2 = 0, 7 PNV_PHB_P5IOC2 = 0,
8 PNV_PHB_IODA1 = 1, 8 PNV_PHB_IODA1 = 1,
9 PNV_PHB_IODA2 = 2, 9 PNV_PHB_IODA2 = 2,
10 }; 10 };
11 11
12 /* Precise PHB model for error management */ 12 /* Precise PHB model for error management */
13 enum pnv_phb_model { 13 enum pnv_phb_model {
14 PNV_PHB_MODEL_UNKNOWN, 14 PNV_PHB_MODEL_UNKNOWN,
15 PNV_PHB_MODEL_P5IOC2, 15 PNV_PHB_MODEL_P5IOC2,
16 PNV_PHB_MODEL_P7IOC, 16 PNV_PHB_MODEL_P7IOC,
17 PNV_PHB_MODEL_PHB3, 17 PNV_PHB_MODEL_PHB3,
18 }; 18 };
19 19
20 #define PNV_PCI_DIAG_BUF_SIZE 8192 20 #define PNV_PCI_DIAG_BUF_SIZE 8192
21 #define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */ 21 #define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */
22 #define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */ 22 #define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */
23 #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ 23 #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */
24 24
25 /* Data associated with a PE, including IOMMU tracking etc.. */ 25 /* Data associated with a PE, including IOMMU tracking etc.. */
26 struct pnv_phb; 26 struct pnv_phb;
27 struct pnv_ioda_pe { 27 struct pnv_ioda_pe {
28 unsigned long flags; 28 unsigned long flags;
29 struct pnv_phb *phb; 29 struct pnv_phb *phb;
30 30
31 /* A PE can be associated with a single device or an 31 /* A PE can be associated with a single device or an
32 * entire bus (& children). In the former case, pdev 32 * entire bus (& children). In the former case, pdev
33 * is populated, in the later case, pbus is. 33 * is populated, in the later case, pbus is.
34 */ 34 */
35 struct pci_dev *pdev; 35 struct pci_dev *pdev;
36 struct pci_bus *pbus; 36 struct pci_bus *pbus;
37 37
38 /* Effective RID (device RID for a device PE and base bus 38 /* Effective RID (device RID for a device PE and base bus
39 * RID with devfn 0 for a bus PE) 39 * RID with devfn 0 for a bus PE)
40 */ 40 */
41 unsigned int rid; 41 unsigned int rid;
42 42
43 /* PE number */ 43 /* PE number */
44 unsigned int pe_number; 44 unsigned int pe_number;
45 45
46 /* "Weight" assigned to the PE for the sake of DMA resource 46 /* "Weight" assigned to the PE for the sake of DMA resource
47 * allocations 47 * allocations
48 */ 48 */
49 unsigned int dma_weight; 49 unsigned int dma_weight;
50 50
51 /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ 51 /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
52 int tce32_seg; 52 int tce32_seg;
53 int tce32_segcount; 53 int tce32_segcount;
54 struct iommu_table tce32_table; 54 struct iommu_table tce32_table;
55 phys_addr_t tce_inval_reg_phys; 55 phys_addr_t tce_inval_reg_phys;
56 56
57 /* 64-bit TCE bypass region */ 57 /* 64-bit TCE bypass region */
58 bool tce_bypass_enabled; 58 bool tce_bypass_enabled;
59 uint64_t tce_bypass_base; 59 uint64_t tce_bypass_base;
60 60
61 /* MSIs. MVE index is identical for for 32 and 64 bit MSI 61 /* MSIs. MVE index is identical for for 32 and 64 bit MSI
62 * and -1 if not supported. (It's actually identical to the 62 * and -1 if not supported. (It's actually identical to the
63 * PE number) 63 * PE number)
64 */ 64 */
65 int mve_number; 65 int mve_number;
66 66
67 /* Link in list of PE#s */ 67 /* Link in list of PE#s */
68 struct list_head dma_link; 68 struct list_head dma_link;
69 struct list_head list; 69 struct list_head list;
70 }; 70 };
71 71
72 /* IOC dependent EEH operations */ 72 /* IOC dependent EEH operations */
73 #ifdef CONFIG_EEH 73 #ifdef CONFIG_EEH
74 struct pnv_eeh_ops { 74 struct pnv_eeh_ops {
75 int (*post_init)(struct pci_controller *hose); 75 int (*post_init)(struct pci_controller *hose);
76 int (*set_option)(struct eeh_pe *pe, int option); 76 int (*set_option)(struct eeh_pe *pe, int option);
77 int (*get_state)(struct eeh_pe *pe); 77 int (*get_state)(struct eeh_pe *pe);
78 int (*reset)(struct eeh_pe *pe, int option); 78 int (*reset)(struct eeh_pe *pe, int option);
79 int (*get_log)(struct eeh_pe *pe, int severity, 79 int (*get_log)(struct eeh_pe *pe, int severity,
80 char *drv_log, unsigned long len); 80 char *drv_log, unsigned long len);
81 int (*configure_bridge)(struct eeh_pe *pe); 81 int (*configure_bridge)(struct eeh_pe *pe);
82 int (*next_error)(struct eeh_pe **pe); 82 int (*next_error)(struct eeh_pe **pe);
83 }; 83 };
84 #endif /* CONFIG_EEH */ 84 #endif /* CONFIG_EEH */
85 85
86 #define PNV_PHB_FLAG_EEH (1 << 0) 86 #define PNV_PHB_FLAG_EEH (1 << 0)
87 87
88 struct pnv_phb { 88 struct pnv_phb {
89 struct pci_controller *hose; 89 struct pci_controller *hose;
90 enum pnv_phb_type type; 90 enum pnv_phb_type type;
91 enum pnv_phb_model model; 91 enum pnv_phb_model model;
92 u64 hub_id; 92 u64 hub_id;
93 u64 opal_id; 93 u64 opal_id;
94 int flags; 94 int flags;
95 void __iomem *regs; 95 void __iomem *regs;
96 int initialized; 96 int initialized;
97 spinlock_t lock; 97 spinlock_t lock;
98 98
99 #ifdef CONFIG_EEH 99 #ifdef CONFIG_EEH
100 struct pnv_eeh_ops *eeh_ops; 100 struct pnv_eeh_ops *eeh_ops;
101 #endif 101 #endif
102 102
103 #ifdef CONFIG_DEBUG_FS 103 #ifdef CONFIG_DEBUG_FS
104 int has_dbgfs; 104 int has_dbgfs;
105 struct dentry *dbgfs; 105 struct dentry *dbgfs;
106 #endif 106 #endif
107 107
108 #ifdef CONFIG_PCI_MSI 108 #ifdef CONFIG_PCI_MSI
109 unsigned int msi_base; 109 unsigned int msi_base;
110 unsigned int msi32_support; 110 unsigned int msi32_support;
111 struct msi_bitmap msi_bmp; 111 struct msi_bitmap msi_bmp;
112 #endif 112 #endif
113 int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev, 113 int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
114 unsigned int hwirq, unsigned int virq, 114 unsigned int hwirq, unsigned int virq,
115 unsigned int is_64, struct msi_msg *msg); 115 unsigned int is_64, struct msi_msg *msg);
116 void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); 116 void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
117 int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, 117 int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
118 u64 dma_mask); 118 u64 dma_mask);
119 void (*fixup_phb)(struct pci_controller *hose); 119 void (*fixup_phb)(struct pci_controller *hose);
120 u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); 120 u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
121 void (*shutdown)(struct pnv_phb *phb); 121 void (*shutdown)(struct pnv_phb *phb);
122 122
123 union { 123 union {
124 struct { 124 struct {
125 struct iommu_table iommu_table; 125 struct iommu_table iommu_table;
126 } p5ioc2; 126 } p5ioc2;
127 127
128 struct { 128 struct {
129 /* Global bridge info */ 129 /* Global bridge info */
130 unsigned int total_pe; 130 unsigned int total_pe;
131 unsigned int reserved_pe; 131 unsigned int reserved_pe;
132 unsigned int m32_size; 132 unsigned int m32_size;
133 unsigned int m32_segsize; 133 unsigned int m32_segsize;
134 unsigned int m32_pci_base; 134 unsigned int m32_pci_base;
135 unsigned int io_size; 135 unsigned int io_size;
136 unsigned int io_segsize; 136 unsigned int io_segsize;
137 unsigned int io_pci_base; 137 unsigned int io_pci_base;
138 138
139 /* PE allocation bitmap */ 139 /* PE allocation bitmap */
140 unsigned long *pe_alloc; 140 unsigned long *pe_alloc;
141 141
142 /* M32 & IO segment maps */ 142 /* M32 & IO segment maps */
143 unsigned int *m32_segmap; 143 unsigned int *m32_segmap;
144 unsigned int *io_segmap; 144 unsigned int *io_segmap;
145 struct pnv_ioda_pe *pe_array; 145 struct pnv_ioda_pe *pe_array;
146 146
147 /* IRQ chip */ 147 /* IRQ chip */
148 int irq_chip_init; 148 int irq_chip_init;
149 struct irq_chip irq_chip; 149 struct irq_chip irq_chip;
150 150
151 /* Sorted list of used PE's based 151 /* Sorted list of used PE's based
152 * on the sequence of creation 152 * on the sequence of creation
153 */ 153 */
154 struct list_head pe_list; 154 struct list_head pe_list;
155 155
156 /* Reverse map of PEs, will have to extend if 156 /* Reverse map of PEs, will have to extend if
157 * we are to support more than 256 PEs, indexed 157 * we are to support more than 256 PEs, indexed
158 * bus { bus, devfn } 158 * bus { bus, devfn }
159 */ 159 */
160 unsigned char pe_rmap[0x10000]; 160 unsigned char pe_rmap[0x10000];
161 161
162 /* 32-bit TCE tables allocation */ 162 /* 32-bit TCE tables allocation */
163 unsigned long tce32_count; 163 unsigned long tce32_count;
164 164
165 /* Total "weight" for the sake of DMA resources 165 /* Total "weight" for the sake of DMA resources
166 * allocation 166 * allocation
167 */ 167 */
168 unsigned int dma_weight; 168 unsigned int dma_weight;
169 unsigned int dma_pe_count; 169 unsigned int dma_pe_count;
170 170
171 /* Sorted list of used PE's, sorted at 171 /* Sorted list of used PE's, sorted at
172 * boot for resource allocation purposes 172 * boot for resource allocation purposes
173 */ 173 */
174 struct list_head pe_dma_list; 174 struct list_head pe_dma_list;
175 } ioda; 175 } ioda;
176 }; 176 };
177 177
178 /* PHB and hub status structure */ 178 /* PHB and hub status structure */
179 union { 179 union {
180 unsigned char blob[PNV_PCI_DIAG_BUF_SIZE]; 180 unsigned char blob[PNV_PCI_DIAG_BUF_SIZE];
181 struct OpalIoP7IOCPhbErrorData p7ioc; 181 struct OpalIoP7IOCPhbErrorData p7ioc;
182 struct OpalIoPhb3ErrorData phb3; 182 struct OpalIoPhb3ErrorData phb3;
183 struct OpalIoP7IOCErrorData hub_diag; 183 struct OpalIoP7IOCErrorData hub_diag;
184 } diag; 184 } diag;
185 185
186 }; 186 };
187 187
188 extern struct pci_ops pnv_pci_ops; 188 extern struct pci_ops pnv_pci_ops;
189 #ifdef CONFIG_EEH 189 #ifdef CONFIG_EEH
190 extern struct pnv_eeh_ops ioda_eeh_ops; 190 extern struct pnv_eeh_ops ioda_eeh_ops;
191 #endif 191 #endif
192 192
193 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, 193 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
194 unsigned char *log_buff); 194 unsigned char *log_buff);
195 int pnv_pci_cfg_read(struct device_node *dn, 195 int pnv_pci_cfg_read(struct device_node *dn,
196 int where, int size, u32 *val); 196 int where, int size, u32 *val);
197 int pnv_pci_cfg_write(struct device_node *dn, 197 int pnv_pci_cfg_write(struct device_node *dn,
198 int where, int size, u32 val); 198 int where, int size, u32 val);
199 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, 199 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
200 void *tce_mem, u64 tce_size, 200 void *tce_mem, u64 tce_size,
201 u64 dma_offset); 201 u64 dma_offset);
202 extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); 202 extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
203 extern void pnv_pci_init_ioda_hub(struct device_node *np); 203 extern void pnv_pci_init_ioda_hub(struct device_node *np);
204 extern void pnv_pci_init_ioda2_phb(struct device_node *np); 204 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
205 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, 205 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
206 __be64 *startp, __be64 *endp, bool rm); 206 __be64 *startp, __be64 *endp, bool rm);
207 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); 207 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
208 extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
208 209
209 #endif /* __POWERNV_PCI_H */ 210 #endif /* __POWERNV_PCI_H */
210 211