Commit 7f1b358a236ee9c19657a619ac6f2dcabcaa0924
Committed by
Dan Williams
1 parent
16a37acaaf
Exists in
master
and in
39 other branches
I/OAT: I/OAT version 3.0 support
This patch adds to ioatdma and dca modules support for Intel I/OAT DMA engine ver.3 (aka CB3 device). The main features of I/OAT ver.3 are: * 8 single channel DMA devices (8 channels total) * 8 DCA providers, each can accept 2 requesters * 8-bit TAG values and 32-bit extended APIC IDs Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Showing 10 changed files with 481 additions and 49 deletions Side-by-side Diff
drivers/dca/dca-core.c
... | ... | @@ -28,39 +28,69 @@ |
28 | 28 | #include <linux/device.h> |
29 | 29 | #include <linux/dca.h> |
30 | 30 | |
31 | +#define DCA_VERSION "1.4" | |
32 | + | |
33 | +MODULE_VERSION(DCA_VERSION); | |
31 | 34 | MODULE_LICENSE("GPL"); |
35 | +MODULE_AUTHOR("Intel Corporation"); | |
32 | 36 | |
33 | -/* For now we're assuming a single, global, DCA provider for the system. */ | |
34 | - | |
35 | 37 | static DEFINE_SPINLOCK(dca_lock); |
36 | 38 | |
37 | -static struct dca_provider *global_dca = NULL; | |
39 | +static LIST_HEAD(dca_providers); | |
38 | 40 | |
41 | +static struct dca_provider *dca_find_provider_by_dev(struct device *dev) | |
42 | +{ | |
43 | + struct dca_provider *dca, *ret = NULL; | |
44 | + | |
45 | + list_for_each_entry(dca, &dca_providers, node) { | |
46 | + if ((!dev) || (dca->ops->dev_managed(dca, dev))) { | |
47 | + ret = dca; | |
48 | + break; | |
49 | + } | |
50 | + } | |
51 | + | |
52 | + return ret; | |
53 | +} | |
54 | + | |
39 | 55 | /** |
40 | 56 | * dca_add_requester - add a dca client to the list |
41 | 57 | * @dev - the device that wants dca service |
42 | 58 | */ |
43 | 59 | int dca_add_requester(struct device *dev) |
44 | 60 | { |
45 | - int err, slot; | |
61 | + struct dca_provider *dca; | |
62 | + int err, slot = -ENODEV; | |
46 | 63 | |
47 | - if (!global_dca) | |
48 | - return -ENODEV; | |
64 | + if (!dev) | |
65 | + return -EFAULT; | |
49 | 66 | |
50 | 67 | spin_lock(&dca_lock); |
51 | - slot = global_dca->ops->add_requester(global_dca, dev); | |
52 | - spin_unlock(&dca_lock); | |
53 | - if (slot < 0) | |
68 | + | |
69 | + /* check if the requester has not been added already */ | |
70 | + dca = dca_find_provider_by_dev(dev); | |
71 | + if (dca) { | |
72 | + spin_unlock(&dca_lock); | |
73 | + return -EEXIST; | |
74 | + } | |
75 | + | |
76 | + list_for_each_entry(dca, &dca_providers, node) { | |
77 | + slot = dca->ops->add_requester(dca, dev); | |
78 | + if (slot >= 0) | |
79 | + break; | |
80 | + } | |
81 | + if (slot < 0) { | |
82 | + spin_unlock(&dca_lock); | |
54 | 83 | return slot; |
84 | + } | |
55 | 85 | |
56 | - err = dca_sysfs_add_req(global_dca, dev, slot); | |
86 | + err = dca_sysfs_add_req(dca, dev, slot); | |
57 | 87 | if (err) { |
58 | - spin_lock(&dca_lock); | |
59 | - global_dca->ops->remove_requester(global_dca, dev); | |
88 | + dca->ops->remove_requester(dca, dev); | |
60 | 89 | spin_unlock(&dca_lock); |
61 | 90 | return err; |
62 | 91 | } |
63 | 92 | |
93 | + spin_unlock(&dca_lock); | |
64 | 94 | return 0; |
65 | 95 | } |
66 | 96 | EXPORT_SYMBOL_GPL(dca_add_requester); |
67 | 97 | |
68 | 98 | |
69 | 99 | |
70 | 100 | |
71 | 101 | |
72 | 102 | |
73 | 103 | |
74 | 104 | |
75 | 105 | |
76 | 106 | |
... | ... | @@ -71,31 +101,79 @@ |
71 | 101 | */ |
72 | 102 | int dca_remove_requester(struct device *dev) |
73 | 103 | { |
104 | + struct dca_provider *dca; | |
74 | 105 | int slot; |
75 | - if (!global_dca) | |
76 | - return -ENODEV; | |
77 | 106 | |
107 | + if (!dev) | |
108 | + return -EFAULT; | |
109 | + | |
78 | 110 | spin_lock(&dca_lock); |
79 | - slot = global_dca->ops->remove_requester(global_dca, dev); | |
80 | - spin_unlock(&dca_lock); | |
81 | - if (slot < 0) | |
111 | + dca = dca_find_provider_by_dev(dev); | |
112 | + if (!dca) { | |
113 | + spin_unlock(&dca_lock); | |
114 | + return -ENODEV; | |
115 | + } | |
116 | + slot = dca->ops->remove_requester(dca, dev); | |
117 | + if (slot < 0) { | |
118 | + spin_unlock(&dca_lock); | |
82 | 119 | return slot; |
120 | + } | |
83 | 121 | |
84 | - dca_sysfs_remove_req(global_dca, slot); | |
122 | + dca_sysfs_remove_req(dca, slot); | |
123 | + | |
124 | + spin_unlock(&dca_lock); | |
85 | 125 | return 0; |
86 | 126 | } |
87 | 127 | EXPORT_SYMBOL_GPL(dca_remove_requester); |
88 | 128 | |
89 | 129 | /** |
90 | - * dca_get_tag - return the dca tag for the given cpu | |
130 | + * dca_common_get_tag - return the dca tag (serves both new and old api) | |
131 | + * @dev - the device that wants dca service | |
91 | 132 | * @cpu - the cpuid as returned by get_cpu() |
92 | 133 | */ |
93 | -u8 dca_get_tag(int cpu) | |
134 | +u8 dca_common_get_tag(struct device *dev, int cpu) | |
94 | 135 | { |
95 | - if (!global_dca) | |
136 | + struct dca_provider *dca; | |
137 | + u8 tag; | |
138 | + | |
139 | + spin_lock(&dca_lock); | |
140 | + | |
141 | + dca = dca_find_provider_by_dev(dev); | |
142 | + if (!dca) { | |
143 | + spin_unlock(&dca_lock); | |
96 | 144 | return -ENODEV; |
97 | - return global_dca->ops->get_tag(global_dca, cpu); | |
145 | + } | |
146 | + tag = dca->ops->get_tag(dca, dev, cpu); | |
147 | + | |
148 | + spin_unlock(&dca_lock); | |
149 | + return tag; | |
98 | 150 | } |
151 | + | |
152 | +/** | |
153 | + * dca3_get_tag - return the dca tag to the requester device | |
154 | + * for the given cpu (new api) | |
155 | + * @dev - the device that wants dca service | |
156 | + * @cpu - the cpuid as returned by get_cpu() | |
157 | + */ | |
158 | +u8 dca3_get_tag(struct device *dev, int cpu) | |
159 | +{ | |
160 | + if (!dev) | |
161 | + return -EFAULT; | |
162 | + | |
163 | + return dca_common_get_tag(dev, cpu); | |
164 | +} | |
165 | +EXPORT_SYMBOL_GPL(dca3_get_tag); | |
166 | + | |
167 | +/** | |
168 | + * dca_get_tag - return the dca tag for the given cpu (old api) | |
169 | + * @cpu - the cpuid as returned by get_cpu() | |
170 | + */ | |
171 | +u8 dca_get_tag(int cpu) | |
172 | +{ | |
173 | + struct device *dev = NULL; | |
174 | + | |
175 | + return dca_common_get_tag(dev, cpu); | |
176 | +} | |
99 | 177 | EXPORT_SYMBOL_GPL(dca_get_tag); |
100 | 178 | |
101 | 179 | /** |
102 | 180 | |
... | ... | @@ -140,12 +218,10 @@ |
140 | 218 | { |
141 | 219 | int err; |
142 | 220 | |
143 | - if (global_dca) | |
144 | - return -EEXIST; | |
145 | 221 | err = dca_sysfs_add_provider(dca, dev); |
146 | 222 | if (err) |
147 | 223 | return err; |
148 | - global_dca = dca; | |
224 | + list_add(&dca->node, &dca_providers); | |
149 | 225 | blocking_notifier_call_chain(&dca_provider_chain, |
150 | 226 | DCA_PROVIDER_ADD, NULL); |
151 | 227 | return 0; |
152 | 228 | |
... | ... | @@ -158,11 +234,9 @@ |
158 | 234 | */ |
159 | 235 | void unregister_dca_provider(struct dca_provider *dca) |
160 | 236 | { |
161 | - if (!global_dca) | |
162 | - return; | |
163 | 237 | blocking_notifier_call_chain(&dca_provider_chain, |
164 | 238 | DCA_PROVIDER_REMOVE, NULL); |
165 | - global_dca = NULL; | |
239 | + list_del(&dca->node); | |
166 | 240 | dca_sysfs_remove_provider(dca); |
167 | 241 | } |
168 | 242 | EXPORT_SYMBOL_GPL(unregister_dca_provider); |
... | ... | @@ -187,6 +261,7 @@ |
187 | 261 | |
188 | 262 | static int __init dca_init(void) |
189 | 263 | { |
264 | + printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); | |
190 | 265 | return dca_sysfs_init(); |
191 | 266 | } |
192 | 267 |
drivers/dca/dca-sysfs.c
... | ... | @@ -13,9 +13,10 @@ |
13 | 13 | int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot) |
14 | 14 | { |
15 | 15 | struct device *cd; |
16 | + static int req_count; | |
16 | 17 | |
17 | 18 | cd = device_create(dca_class, dca->cd, MKDEV(0, slot + 1), |
18 | - "requester%d", slot); | |
19 | + "requester%d", req_count++); | |
19 | 20 | if (IS_ERR(cd)) |
20 | 21 | return PTR_ERR(cd); |
21 | 22 | return 0; |
drivers/dma/ioat.c
... | ... | @@ -47,6 +47,16 @@ |
47 | 47 | |
48 | 48 | /* I/OAT v2 platforms */ |
49 | 49 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, |
50 | + | |
51 | + /* I/OAT v3 platforms */ | |
52 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, | |
53 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, | |
54 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, | |
55 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, | |
56 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, | |
57 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, | |
58 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, | |
59 | + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, | |
50 | 60 | { 0, } |
51 | 61 | }; |
52 | 62 | |
... | ... | @@ -82,6 +92,11 @@ |
82 | 92 | device->dma = ioat_dma_probe(pdev, iobase); |
83 | 93 | if (device->dma && ioat_dca_enabled) |
84 | 94 | device->dca = ioat2_dca_init(pdev, iobase); |
95 | + break; | |
96 | + case IOAT_VER_3_0: | |
97 | + device->dma = ioat_dma_probe(pdev, iobase); | |
98 | + if (device->dma && ioat_dca_enabled) | |
99 | + device->dca = ioat3_dca_init(pdev, iobase); | |
85 | 100 | break; |
86 | 101 | default: |
87 | 102 | err = -ENODEV; |
drivers/dma/ioat_dca.c
... | ... | @@ -37,12 +37,18 @@ |
37 | 37 | #include "ioatdma_registers.h" |
38 | 38 | |
39 | 39 | /* |
40 | - * Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15 | |
40 | + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 | |
41 | 41 | * contain the bit number of the APIC ID to map into the DCA tag. If the valid |
42 | 42 | * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. |
43 | 43 | */ |
44 | 44 | #define DCA_TAG_MAP_VALID 0x80 |
45 | 45 | |
46 | +#define DCA3_TAG_MAP_BIT_TO_INV 0x80 | |
47 | +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 | |
48 | +#define DCA3_TAG_MAP_LITERAL_VAL 0x1 | |
49 | + | |
50 | +#define DCA_TAG_MAP_MASK 0xDF | |
51 | + | |
46 | 52 | /* |
47 | 53 | * "Legacy" DCA systems do not implement the DCA register set in the |
48 | 54 | * I/OAT device. Software needs direct support for their tag mappings. |
... | ... | @@ -95,6 +101,7 @@ |
95 | 101 | }; |
96 | 102 | |
97 | 103 | #define IOAT_DCA_MAX_REQ 6 |
104 | +#define IOAT3_DCA_MAX_REQ 2 | |
98 | 105 | |
99 | 106 | struct ioat_dca_priv { |
100 | 107 | void __iomem *iobase; |
... | ... | @@ -171,7 +178,9 @@ |
171 | 178 | return -ENODEV; |
172 | 179 | } |
173 | 180 | |
174 | -static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu) | |
181 | +static u8 ioat_dca_get_tag(struct dca_provider *dca, | |
182 | + struct device *dev, | |
183 | + int cpu) | |
175 | 184 | { |
176 | 185 | struct ioat_dca_priv *ioatdca = dca_priv(dca); |
177 | 186 | int i, apic_id, bit, value; |
178 | 187 | |
... | ... | @@ -193,10 +202,26 @@ |
193 | 202 | return tag; |
194 | 203 | } |
195 | 204 | |
205 | +static int ioat_dca_dev_managed(struct dca_provider *dca, | |
206 | + struct device *dev) | |
207 | +{ | |
208 | + struct ioat_dca_priv *ioatdca = dca_priv(dca); | |
209 | + struct pci_dev *pdev; | |
210 | + int i; | |
211 | + | |
212 | + pdev = to_pci_dev(dev); | |
213 | + for (i = 0; i < ioatdca->max_requesters; i++) { | |
214 | + if (ioatdca->req_slots[i].pdev == pdev) | |
215 | + return 1; | |
216 | + } | |
217 | + return 0; | |
218 | +} | |
219 | + | |
196 | 220 | static struct dca_ops ioat_dca_ops = { |
197 | 221 | .add_requester = ioat_dca_add_requester, |
198 | 222 | .remove_requester = ioat_dca_remove_requester, |
199 | 223 | .get_tag = ioat_dca_get_tag, |
224 | + .dev_managed = ioat_dca_dev_managed, | |
200 | 225 | }; |
201 | 226 | |
202 | 227 | |
... | ... | @@ -207,6 +232,8 @@ |
207 | 232 | u8 *tag_map = NULL; |
208 | 233 | int i; |
209 | 234 | int err; |
235 | + u8 version; | |
236 | + u8 max_requesters; | |
210 | 237 | |
211 | 238 | if (!system_has_dca_enabled(pdev)) |
212 | 239 | return NULL; |
213 | 240 | |
214 | 241 | |
... | ... | @@ -237,15 +264,20 @@ |
237 | 264 | if (tag_map == NULL) |
238 | 265 | return NULL; |
239 | 266 | |
267 | + version = readb(iobase + IOAT_VER_OFFSET); | |
268 | + if (version == IOAT_VER_3_0) | |
269 | + max_requesters = IOAT3_DCA_MAX_REQ; | |
270 | + else | |
271 | + max_requesters = IOAT_DCA_MAX_REQ; | |
272 | + | |
240 | 273 | dca = alloc_dca_provider(&ioat_dca_ops, |
241 | 274 | sizeof(*ioatdca) + |
242 | - (sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ)); | |
275 | + (sizeof(struct ioat_dca_slot) * max_requesters)); | |
243 | 276 | if (!dca) |
244 | 277 | return NULL; |
245 | 278 | |
246 | 279 | ioatdca = dca_priv(dca); |
247 | - ioatdca->max_requesters = IOAT_DCA_MAX_REQ; | |
248 | - | |
280 | + ioatdca->max_requesters = max_requesters; | |
249 | 281 | ioatdca->dca_base = iobase + 0x54; |
250 | 282 | |
251 | 283 | /* copy over the APIC ID to DCA tag mapping */ |
252 | 284 | |
... | ... | @@ -323,11 +355,13 @@ |
323 | 355 | return -ENODEV; |
324 | 356 | } |
325 | 357 | |
326 | -static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu) | |
358 | +static u8 ioat2_dca_get_tag(struct dca_provider *dca, | |
359 | + struct device *dev, | |
360 | + int cpu) | |
327 | 361 | { |
328 | 362 | u8 tag; |
329 | 363 | |
330 | - tag = ioat_dca_get_tag(dca, cpu); | |
364 | + tag = ioat_dca_get_tag(dca, dev, cpu); | |
331 | 365 | tag = (~tag) & 0x1F; |
332 | 366 | return tag; |
333 | 367 | } |
... | ... | @@ -336,6 +370,7 @@ |
336 | 370 | .add_requester = ioat2_dca_add_requester, |
337 | 371 | .remove_requester = ioat2_dca_remove_requester, |
338 | 372 | .get_tag = ioat2_dca_get_tag, |
373 | + .dev_managed = ioat_dca_dev_managed, | |
339 | 374 | }; |
340 | 375 | |
341 | 376 | static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) |
... | ... | @@ -415,6 +450,201 @@ |
415 | 450 | ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; |
416 | 451 | else |
417 | 452 | ioatdca->tag_map[i] = 0; |
453 | + } | |
454 | + | |
455 | + err = register_dca_provider(dca, &pdev->dev); | |
456 | + if (err) { | |
457 | + free_dca_provider(dca); | |
458 | + return NULL; | |
459 | + } | |
460 | + | |
461 | + return dca; | |
462 | +} | |
463 | + | |
464 | +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) | |
465 | +{ | |
466 | + struct ioat_dca_priv *ioatdca = dca_priv(dca); | |
467 | + struct pci_dev *pdev; | |
468 | + int i; | |
469 | + u16 id; | |
470 | + u16 global_req_table; | |
471 | + | |
472 | + /* This implementation only supports PCI-Express */ | |
473 | + if (dev->bus != &pci_bus_type) | |
474 | + return -ENODEV; | |
475 | + pdev = to_pci_dev(dev); | |
476 | + id = dcaid_from_pcidev(pdev); | |
477 | + | |
478 | + if (ioatdca->requester_count == ioatdca->max_requesters) | |
479 | + return -ENODEV; | |
480 | + | |
481 | + for (i = 0; i < ioatdca->max_requesters; i++) { | |
482 | + if (ioatdca->req_slots[i].pdev == NULL) { | |
483 | + /* found an empty slot */ | |
484 | + ioatdca->requester_count++; | |
485 | + ioatdca->req_slots[i].pdev = pdev; | |
486 | + ioatdca->req_slots[i].rid = id; | |
487 | + global_req_table = | |
488 | + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); | |
489 | + writel(id | IOAT_DCA_GREQID_VALID, | |
490 | + ioatdca->iobase + global_req_table + (i * 4)); | |
491 | + return i; | |
492 | + } | |
493 | + } | |
494 | + /* Error, ioatdma->requester_count is out of whack */ | |
495 | + return -EFAULT; | |
496 | +} | |
497 | + | |
498 | +static int ioat3_dca_remove_requester(struct dca_provider *dca, | |
499 | + struct device *dev) | |
500 | +{ | |
501 | + struct ioat_dca_priv *ioatdca = dca_priv(dca); | |
502 | + struct pci_dev *pdev; | |
503 | + int i; | |
504 | + u16 global_req_table; | |
505 | + | |
506 | + /* This implementation only supports PCI-Express */ | |
507 | + if (dev->bus != &pci_bus_type) | |
508 | + return -ENODEV; | |
509 | + pdev = to_pci_dev(dev); | |
510 | + | |
511 | + for (i = 0; i < ioatdca->max_requesters; i++) { | |
512 | + if (ioatdca->req_slots[i].pdev == pdev) { | |
513 | + global_req_table = | |
514 | + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); | |
515 | + writel(0, ioatdca->iobase + global_req_table + (i * 4)); | |
516 | + ioatdca->req_slots[i].pdev = NULL; | |
517 | + ioatdca->req_slots[i].rid = 0; | |
518 | + ioatdca->requester_count--; | |
519 | + return i; | |
520 | + } | |
521 | + } | |
522 | + return -ENODEV; | |
523 | +} | |
524 | + | |
525 | +static u8 ioat3_dca_get_tag(struct dca_provider *dca, | |
526 | + struct device *dev, | |
527 | + int cpu) | |
528 | +{ | |
529 | + u8 tag; | |
530 | + | |
531 | + struct ioat_dca_priv *ioatdca = dca_priv(dca); | |
532 | + int i, apic_id, bit, value; | |
533 | + u8 entry; | |
534 | + | |
535 | + tag = 0; | |
536 | + apic_id = cpu_physical_id(cpu); | |
537 | + | |
538 | + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { | |
539 | + entry = ioatdca->tag_map[i]; | |
540 | + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { | |
541 | + bit = entry & | |
542 | + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); | |
543 | + value = (apic_id & (1 << bit)) ? 1 : 0; | |
544 | + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { | |
545 | + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; | |
546 | + value = (apic_id & (1 << bit)) ? 0 : 1; | |
547 | + } else { | |
548 | + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; | |
549 | + } | |
550 | + tag |= (value << i); | |
551 | + } | |
552 | + | |
553 | + return tag; | |
554 | +} | |
555 | + | |
556 | +static struct dca_ops ioat3_dca_ops = { | |
557 | + .add_requester = ioat3_dca_add_requester, | |
558 | + .remove_requester = ioat3_dca_remove_requester, | |
559 | + .get_tag = ioat3_dca_get_tag, | |
560 | + .dev_managed = ioat_dca_dev_managed, | |
561 | +}; | |
562 | + | |
563 | +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) | |
564 | +{ | |
565 | + int slots = 0; | |
566 | + u32 req; | |
567 | + u16 global_req_table; | |
568 | + | |
569 | + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); | |
570 | + if (global_req_table == 0) | |
571 | + return 0; | |
572 | + | |
573 | + do { | |
574 | + req = readl(iobase + global_req_table + (slots * sizeof(u32))); | |
575 | + slots++; | |
576 | + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); | |
577 | + | |
578 | + return slots; | |
579 | +} | |
580 | + | |
581 | +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) | |
582 | +{ | |
583 | + struct dca_provider *dca; | |
584 | + struct ioat_dca_priv *ioatdca; | |
585 | + int slots; | |
586 | + int i; | |
587 | + int err; | |
588 | + u16 dca_offset; | |
589 | + u16 csi_fsb_control; | |
590 | + u16 pcie_control; | |
591 | + u8 bit; | |
592 | + | |
593 | + union { | |
594 | + u64 full; | |
595 | + struct { | |
596 | + u32 low; | |
597 | + u32 high; | |
598 | + }; | |
599 | + } tag_map; | |
600 | + | |
601 | + if (!system_has_dca_enabled(pdev)) | |
602 | + return NULL; | |
603 | + | |
604 | + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); | |
605 | + if (dca_offset == 0) | |
606 | + return NULL; | |
607 | + | |
608 | + slots = ioat3_dca_count_dca_slots(iobase, dca_offset); | |
609 | + if (slots == 0) | |
610 | + return NULL; | |
611 | + | |
612 | + dca = alloc_dca_provider(&ioat3_dca_ops, | |
613 | + sizeof(*ioatdca) | |
614 | + + (sizeof(struct ioat_dca_slot) * slots)); | |
615 | + if (!dca) | |
616 | + return NULL; | |
617 | + | |
618 | + ioatdca = dca_priv(dca); | |
619 | + ioatdca->iobase = iobase; | |
620 | + ioatdca->dca_base = iobase + dca_offset; | |
621 | + ioatdca->max_requesters = slots; | |
622 | + | |
623 | + /* some bios might not know to turn these on */ | |
624 | + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); | |
625 | + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { | |
626 | + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; | |
627 | + writew(csi_fsb_control, | |
628 | + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); | |
629 | + } | |
630 | + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); | |
631 | + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { | |
632 | + pcie_control |= IOAT3_PCI_CONTROL_MEMWR; | |
633 | + writew(pcie_control, | |
634 | + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); | |
635 | + } | |
636 | + | |
637 | + | |
638 | + /* TODO version, compatibility and configuration checks */ | |
639 | + | |
640 | + /* copy out the APIC to DCA tag map */ | |
641 | + tag_map.low = | |
642 | + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); | |
643 | + tag_map.high = | |
644 | + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); | |
645 | + for (i = 0; i < 8; i++) { | |
646 | + bit = tag_map.full >> (8 * i); | |
647 | + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; | |
418 | 648 | } |
419 | 649 | |
420 | 650 | err = register_dca_provider(dca, &pdev->dev); |
drivers/dma/ioat_dma.c
... | ... | @@ -53,6 +53,12 @@ |
53 | 53 | static void ioat_dma_chan_reset_part2(struct work_struct *work); |
54 | 54 | static void ioat_dma_chan_watchdog(struct work_struct *work); |
55 | 55 | |
56 | +/* | |
57 | + * workaround for IOAT ver.3.0 null descriptor issue | |
58 | + * (channel returns error when size is 0) | |
59 | + */ | |
60 | +#define NULL_DESC_BUFFER_SIZE 1 | |
61 | + | |
56 | 62 | /* internal functions */ |
57 | 63 | static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); |
58 | 64 | static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); |
... | ... | @@ -129,6 +135,38 @@ |
129 | 135 | int i; |
130 | 136 | struct ioat_dma_chan *ioat_chan; |
131 | 137 | |
138 | + /* | |
139 | + * IOAT ver.3 workarounds | |
140 | + */ | |
141 | + if (device->version == IOAT_VER_3_0) { | |
142 | + u32 chan_err_mask; | |
143 | + u16 dev_id; | |
144 | + u32 dmauncerrsts; | |
145 | + | |
146 | + /* | |
147 | + * Write CHANERRMSK_INT with 3E07h to mask out the errors | |
148 | + * that can cause stability issues for IOAT ver.3 | |
149 | + */ | |
150 | + chan_err_mask = 0x3E07; | |
151 | + pci_write_config_dword(device->pdev, | |
152 | + IOAT_PCI_CHANERRMASK_INT_OFFSET, | |
153 | + chan_err_mask); | |
154 | + | |
155 | + /* | |
156 | + * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit | |
157 | + * (workaround for spurious config parity error after restart) | |
158 | + */ | |
159 | + pci_read_config_word(device->pdev, | |
160 | + IOAT_PCI_DEVICE_ID_OFFSET, | |
161 | + &dev_id); | |
162 | + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { | |
163 | + dmauncerrsts = 0x10; | |
164 | + pci_write_config_dword(device->pdev, | |
165 | + IOAT_PCI_DMAUNCERRSTS_OFFSET, | |
166 | + dmauncerrsts); | |
167 | + } | |
168 | + } | |
169 | + | |
132 | 170 | device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); |
133 | 171 | xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); |
134 | 172 | xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); |
... | ... | @@ -473,6 +511,13 @@ |
473 | 511 | prev = new; |
474 | 512 | } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); |
475 | 513 | |
514 | + if (!new) { | |
515 | + dev_err(&ioat_chan->device->pdev->dev, | |
516 | + "tx submit failed\n"); | |
517 | + spin_unlock_bh(&ioat_chan->desc_lock); | |
518 | + return -ENOMEM; | |
519 | + } | |
520 | + | |
476 | 521 | hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; |
477 | 522 | if (new->async_tx.callback) { |
478 | 523 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; |
... | ... | @@ -558,7 +603,14 @@ |
558 | 603 | desc_count++; |
559 | 604 | } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); |
560 | 605 | |
561 | - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | |
606 | + if (!new) { | |
607 | + dev_err(&ioat_chan->device->pdev->dev, | |
608 | + "tx submit failed\n"); | |
609 | + spin_unlock_bh(&ioat_chan->desc_lock); | |
610 | + return -ENOMEM; | |
611 | + } | |
612 | + | |
613 | + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | |
562 | 614 | if (new->async_tx.callback) { |
563 | 615 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; |
564 | 616 | if (first != new) { |
... | ... | @@ -629,6 +681,7 @@ |
629 | 681 | desc_sw->async_tx.tx_submit = ioat1_tx_submit; |
630 | 682 | break; |
631 | 683 | case IOAT_VER_2_0: |
684 | + case IOAT_VER_3_0: | |
632 | 685 | desc_sw->async_tx.tx_submit = ioat2_tx_submit; |
633 | 686 | break; |
634 | 687 | } |
... | ... | @@ -779,6 +832,7 @@ |
779 | 832 | } |
780 | 833 | break; |
781 | 834 | case IOAT_VER_2_0: |
835 | + case IOAT_VER_3_0: | |
782 | 836 | list_for_each_entry_safe(desc, _desc, |
783 | 837 | ioat_chan->free_desc.next, node) { |
784 | 838 | list_del(&desc->node); |
... | ... | @@ -868,7 +922,8 @@ |
868 | 922 | |
869 | 923 | /* set up the noop descriptor */ |
870 | 924 | noop_desc = to_ioat_desc(ioat_chan->used_desc.next); |
871 | - noop_desc->hw->size = 0; | |
925 | + /* set size to non-zero value (channel returns error when size is 0) */ | |
926 | + noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; | |
872 | 927 | noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; |
873 | 928 | noop_desc->hw->src_addr = 0; |
874 | 929 | noop_desc->hw->dst_addr = 0; |
... | ... | @@ -918,6 +973,7 @@ |
918 | 973 | return ioat1_dma_get_next_descriptor(ioat_chan); |
919 | 974 | break; |
920 | 975 | case IOAT_VER_2_0: |
976 | + case IOAT_VER_3_0: | |
921 | 977 | return ioat2_dma_get_next_descriptor(ioat_chan); |
922 | 978 | break; |
923 | 979 | } |
... | ... | @@ -1061,10 +1117,12 @@ |
1061 | 1117 | * perhaps we're stuck so hard that the watchdog can't go off? |
1062 | 1118 | * try to catch it after 2 seconds |
1063 | 1119 | */ |
1064 | - if (time_after(jiffies, | |
1065 | - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { | |
1066 | - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); | |
1067 | - ioat_chan->last_completion_time = jiffies; | |
1120 | + if (ioat_chan->device->version != IOAT_VER_3_0) { | |
1121 | + if (time_after(jiffies, | |
1122 | + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { | |
1123 | + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); | |
1124 | + ioat_chan->last_completion_time = jiffies; | |
1125 | + } | |
1068 | 1126 | } |
1069 | 1127 | return; |
1070 | 1128 | } |
... | ... | @@ -1120,6 +1178,7 @@ |
1120 | 1178 | } |
1121 | 1179 | break; |
1122 | 1180 | case IOAT_VER_2_0: |
1181 | + case IOAT_VER_3_0: | |
1123 | 1182 | /* has some other thread has already cleaned up? */ |
1124 | 1183 | if (ioat_chan->used_desc.prev == NULL) |
1125 | 1184 | break; |
1126 | 1185 | |
... | ... | @@ -1223,10 +1282,19 @@ |
1223 | 1282 | spin_lock_bh(&ioat_chan->desc_lock); |
1224 | 1283 | |
1225 | 1284 | desc = ioat_dma_get_next_descriptor(ioat_chan); |
1285 | + | |
1286 | + if (!desc) { | |
1287 | + dev_err(&ioat_chan->device->pdev->dev, | |
1288 | + "Unable to start null desc - get next desc failed\n"); | |
1289 | + spin_unlock_bh(&ioat_chan->desc_lock); | |
1290 | + return; | |
1291 | + } | |
1292 | + | |
1226 | 1293 | desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL |
1227 | 1294 | | IOAT_DMA_DESCRIPTOR_CTL_INT_GN |
1228 | 1295 | | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; |
1229 | - desc->hw->size = 0; | |
1296 | + /* set size to non-zero value (channel returns error when size is 0) */ | |
1297 | + desc->hw->size = NULL_DESC_BUFFER_SIZE; | |
1230 | 1298 | desc->hw->src_addr = 0; |
1231 | 1299 | desc->hw->dst_addr = 0; |
1232 | 1300 | async_tx_ack(&desc->async_tx); |
... | ... | @@ -1244,6 +1312,7 @@ |
1244 | 1312 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); |
1245 | 1313 | break; |
1246 | 1314 | case IOAT_VER_2_0: |
1315 | + case IOAT_VER_3_0: | |
1247 | 1316 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, |
1248 | 1317 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); |
1249 | 1318 | writel(((u64) desc->async_tx.phys) >> 32, |
... | ... | @@ -1562,6 +1631,7 @@ |
1562 | 1631 | ioat1_dma_memcpy_issue_pending; |
1563 | 1632 | break; |
1564 | 1633 | case IOAT_VER_2_0: |
1634 | + case IOAT_VER_3_0: | |
1565 | 1635 | device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; |
1566 | 1636 | device->common.device_issue_pending = |
1567 | 1637 | ioat2_dma_memcpy_issue_pending; |
... | ... | @@ -1585,9 +1655,11 @@ |
1585 | 1655 | |
1586 | 1656 | dma_async_device_register(&device->common); |
1587 | 1657 | |
1588 | - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); | |
1589 | - schedule_delayed_work(&device->work, | |
1590 | - WATCHDOG_DELAY); | |
1658 | + if (device->version != IOAT_VER_3_0) { | |
1659 | + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); | |
1660 | + schedule_delayed_work(&device->work, | |
1661 | + WATCHDOG_DELAY); | |
1662 | + } | |
1591 | 1663 | |
1592 | 1664 | return device; |
1593 | 1665 | |
... | ... | @@ -1621,7 +1693,9 @@ |
1621 | 1693 | pci_release_regions(device->pdev); |
1622 | 1694 | pci_disable_device(device->pdev); |
1623 | 1695 | |
1624 | - cancel_delayed_work(&device->work); | |
1696 | + if (device->version != IOAT_VER_3_0) { | |
1697 | + cancel_delayed_work(&device->work); | |
1698 | + } | |
1625 | 1699 | |
1626 | 1700 | list_for_each_entry_safe(chan, _chan, |
1627 | 1701 | &device->common.channels, device_node) { |
drivers/dma/ioatdma.h
... | ... | @@ -29,7 +29,7 @@ |
29 | 29 | #include <linux/pci_ids.h> |
30 | 30 | #include <net/tcp.h> |
31 | 31 | |
32 | -#define IOAT_DMA_VERSION "2.18" | |
32 | +#define IOAT_DMA_VERSION "3.30" | |
33 | 33 | |
34 | 34 | enum ioat_interrupt { |
35 | 35 | none = 0, |
... | ... | @@ -135,6 +135,7 @@ |
135 | 135 | #ifdef CONFIG_NET_DMA |
136 | 136 | switch (dev->version) { |
137 | 137 | case IOAT_VER_1_2: |
138 | + case IOAT_VER_3_0: | |
138 | 139 | sysctl_tcp_dma_copybreak = 4096; |
139 | 140 | break; |
140 | 141 | case IOAT_VER_2_0: |
141 | 142 | |
... | ... | @@ -150,11 +151,13 @@ |
150 | 151 | void ioat_dma_remove(struct ioatdma_device *device); |
151 | 152 | struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); |
152 | 153 | struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); |
154 | +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); | |
153 | 155 | #else |
154 | 156 | #define ioat_dma_probe(pdev, iobase) NULL |
155 | 157 | #define ioat_dma_remove(device) do { } while (0) |
156 | 158 | #define ioat_dca_init(pdev, iobase) NULL |
157 | 159 | #define ioat2_dca_init(pdev, iobase) NULL |
160 | +#define ioat3_dca_init(pdev, iobase) NULL | |
158 | 161 | #endif |
159 | 162 | |
160 | 163 | #endif /* IOATDMA_H */ |
drivers/dma/ioatdma_hw.h
drivers/dma/ioatdma_registers.h
... | ... | @@ -25,6 +25,10 @@ |
25 | 25 | #define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 |
26 | 26 | #define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 |
27 | 27 | |
28 | +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 | |
29 | +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 | |
30 | +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 | |
31 | + | |
28 | 32 | /* MMIO Device Registers */ |
29 | 33 | #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ |
30 | 34 | |
31 | 35 | |
... | ... | @@ -149,7 +153,23 @@ |
149 | 153 | #define IOAT_DCA_GREQID_VALID 0x20000000 |
150 | 154 | #define IOAT_DCA_GREQID_LASTID 0x80000000 |
151 | 155 | |
156 | +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 | |
157 | +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 | |
152 | 158 | |
159 | +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A | |
160 | +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 | |
161 | + | |
162 | +#define IOAT3_CSI_CONTROL_OFFSET 0x0C | |
163 | +#define IOAT3_CSI_CONTROL_PREFETCH 0x1 | |
164 | + | |
165 | +#define IOAT3_PCI_CONTROL_OFFSET 0x0E | |
166 | +#define IOAT3_PCI_CONTROL_MEMWR 0x1 | |
167 | + | |
168 | +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 | |
169 | +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 | |
170 | +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 | |
171 | + | |
172 | +#define IOAT3_DCA_GREQID_OFFSET 0x02 | |
153 | 173 | |
154 | 174 | #define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ |
155 | 175 | #define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ |
include/linux/dca.h
... | ... | @@ -10,6 +10,7 @@ |
10 | 10 | #define DCA_PROVIDER_REMOVE 0x0002 |
11 | 11 | |
12 | 12 | struct dca_provider { |
13 | + struct list_head node; | |
13 | 14 | struct dca_ops *ops; |
14 | 15 | struct device *cd; |
15 | 16 | int id; |
... | ... | @@ -18,7 +19,9 @@ |
18 | 19 | struct dca_ops { |
19 | 20 | int (*add_requester) (struct dca_provider *, struct device *); |
20 | 21 | int (*remove_requester) (struct dca_provider *, struct device *); |
21 | - u8 (*get_tag) (struct dca_provider *, int cpu); | |
22 | + u8 (*get_tag) (struct dca_provider *, struct device *, | |
23 | + int cpu); | |
24 | + int (*dev_managed) (struct dca_provider *, struct device *); | |
22 | 25 | }; |
23 | 26 | |
24 | 27 | struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); |
25 | 28 | |
... | ... | @@ -32,9 +35,11 @@ |
32 | 35 | } |
33 | 36 | |
34 | 37 | /* Requester API */ |
38 | +#define DCA_GET_TAG_TWO_ARGS | |
35 | 39 | int dca_add_requester(struct device *dev); |
36 | 40 | int dca_remove_requester(struct device *dev); |
37 | 41 | u8 dca_get_tag(int cpu); |
42 | +u8 dca3_get_tag(struct device *dev, int cpu); | |
38 | 43 | |
39 | 44 | /* internal stuff */ |
40 | 45 | int __init dca_sysfs_init(void); |
include/linux/pci_ids.h
... | ... | @@ -2363,6 +2363,14 @@ |
2363 | 2363 | #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 |
2364 | 2364 | #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 |
2365 | 2365 | #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340 |
2366 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429 | |
2367 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a | |
2368 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b | |
2369 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c | |
2370 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430 | |
2371 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431 | |
2372 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432 | |
2373 | +#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 | |
2366 | 2374 | #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 |
2367 | 2375 | #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 |
2368 | 2376 | #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 |