Commit 7f1b358a236ee9c19657a619ac6f2dcabcaa0924

Authored by Maciej Sosnowski
Committed by Dan Williams
1 parent 16a37acaaf

I/OAT: I/OAT version 3.0 support

This patch adds to ioatdma and dca modules
support for Intel I/OAT DMA engine ver.3 (aka CB3 device).
The main features of I/OAT ver.3 are:
 * 8 single channel DMA devices (8 channels total)
 * 8 DCA providers, each can accept 2 requesters
 * 8-bit TAG values and 32-bit extended APIC IDs

Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Showing 10 changed files with 481 additions and 49 deletions Side-by-side Diff

drivers/dca/dca-core.c
... ... @@ -28,39 +28,69 @@
28 28 #include <linux/device.h>
29 29 #include <linux/dca.h>
30 30  
  31 +#define DCA_VERSION "1.4"
  32 +
  33 +MODULE_VERSION(DCA_VERSION);
31 34 MODULE_LICENSE("GPL");
  35 +MODULE_AUTHOR("Intel Corporation");
32 36  
33   -/* For now we're assuming a single, global, DCA provider for the system. */
34   -
35 37 static DEFINE_SPINLOCK(dca_lock);
36 38  
37   -static struct dca_provider *global_dca = NULL;
  39 +static LIST_HEAD(dca_providers);
38 40  
  41 +static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
  42 +{
  43 + struct dca_provider *dca, *ret = NULL;
  44 +
  45 + list_for_each_entry(dca, &dca_providers, node) {
  46 + if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
  47 + ret = dca;
  48 + break;
  49 + }
  50 + }
  51 +
  52 + return ret;
  53 +}
  54 +
39 55 /**
40 56 * dca_add_requester - add a dca client to the list
41 57 * @dev - the device that wants dca service
42 58 */
43 59 int dca_add_requester(struct device *dev)
44 60 {
45   - int err, slot;
  61 + struct dca_provider *dca;
  62 + int err, slot = -ENODEV;
46 63  
47   - if (!global_dca)
48   - return -ENODEV;
  64 + if (!dev)
  65 + return -EFAULT;
49 66  
50 67 spin_lock(&dca_lock);
51   - slot = global_dca->ops->add_requester(global_dca, dev);
52   - spin_unlock(&dca_lock);
53   - if (slot < 0)
  68 +
  69 + /* check if the requester has not been added already */
  70 + dca = dca_find_provider_by_dev(dev);
  71 + if (dca) {
  72 + spin_unlock(&dca_lock);
  73 + return -EEXIST;
  74 + }
  75 +
  76 + list_for_each_entry(dca, &dca_providers, node) {
  77 + slot = dca->ops->add_requester(dca, dev);
  78 + if (slot >= 0)
  79 + break;
  80 + }
  81 + if (slot < 0) {
  82 + spin_unlock(&dca_lock);
54 83 return slot;
  84 + }
55 85  
56   - err = dca_sysfs_add_req(global_dca, dev, slot);
  86 + err = dca_sysfs_add_req(dca, dev, slot);
57 87 if (err) {
58   - spin_lock(&dca_lock);
59   - global_dca->ops->remove_requester(global_dca, dev);
  88 + dca->ops->remove_requester(dca, dev);
60 89 spin_unlock(&dca_lock);
61 90 return err;
62 91 }
63 92  
  93 + spin_unlock(&dca_lock);
64 94 return 0;
65 95 }
66 96 EXPORT_SYMBOL_GPL(dca_add_requester);
67 97  
68 98  
69 99  
70 100  
71 101  
72 102  
73 103  
74 104  
75 105  
76 106  
... ... @@ -71,31 +101,79 @@
71 101 */
72 102 int dca_remove_requester(struct device *dev)
73 103 {
  104 + struct dca_provider *dca;
74 105 int slot;
75   - if (!global_dca)
76   - return -ENODEV;
77 106  
  107 + if (!dev)
  108 + return -EFAULT;
  109 +
78 110 spin_lock(&dca_lock);
79   - slot = global_dca->ops->remove_requester(global_dca, dev);
80   - spin_unlock(&dca_lock);
81   - if (slot < 0)
  111 + dca = dca_find_provider_by_dev(dev);
  112 + if (!dca) {
  113 + spin_unlock(&dca_lock);
  114 + return -ENODEV;
  115 + }
  116 + slot = dca->ops->remove_requester(dca, dev);
  117 + if (slot < 0) {
  118 + spin_unlock(&dca_lock);
82 119 return slot;
  120 + }
83 121  
84   - dca_sysfs_remove_req(global_dca, slot);
  122 + dca_sysfs_remove_req(dca, slot);
  123 +
  124 + spin_unlock(&dca_lock);
85 125 return 0;
86 126 }
87 127 EXPORT_SYMBOL_GPL(dca_remove_requester);
88 128  
89 129 /**
90   - * dca_get_tag - return the dca tag for the given cpu
  130 + * dca_common_get_tag - return the dca tag (serves both new and old api)
  131 + * @dev - the device that wants dca service
91 132 * @cpu - the cpuid as returned by get_cpu()
92 133 */
93   -u8 dca_get_tag(int cpu)
  134 +u8 dca_common_get_tag(struct device *dev, int cpu)
94 135 {
95   - if (!global_dca)
  136 + struct dca_provider *dca;
  137 + u8 tag;
  138 +
  139 + spin_lock(&dca_lock);
  140 +
  141 + dca = dca_find_provider_by_dev(dev);
  142 + if (!dca) {
  143 + spin_unlock(&dca_lock);
96 144 return -ENODEV;
97   - return global_dca->ops->get_tag(global_dca, cpu);
  145 + }
  146 + tag = dca->ops->get_tag(dca, dev, cpu);
  147 +
  148 + spin_unlock(&dca_lock);
  149 + return tag;
98 150 }
  151 +
  152 +/**
  153 + * dca3_get_tag - return the dca tag to the requester device
  154 + * for the given cpu (new api)
  155 + * @dev - the device that wants dca service
  156 + * @cpu - the cpuid as returned by get_cpu()
  157 + */
  158 +u8 dca3_get_tag(struct device *dev, int cpu)
  159 +{
  160 + if (!dev)
  161 + return -EFAULT;
  162 +
  163 + return dca_common_get_tag(dev, cpu);
  164 +}
  165 +EXPORT_SYMBOL_GPL(dca3_get_tag);
  166 +
  167 +/**
  168 + * dca_get_tag - return the dca tag for the given cpu (old api)
  169 + * @cpu - the cpuid as returned by get_cpu()
  170 + */
  171 +u8 dca_get_tag(int cpu)
  172 +{
  173 + struct device *dev = NULL;
  174 +
  175 + return dca_common_get_tag(dev, cpu);
  176 +}
99 177 EXPORT_SYMBOL_GPL(dca_get_tag);
100 178  
101 179 /**
102 180  
... ... @@ -140,12 +218,10 @@
140 218 {
141 219 int err;
142 220  
143   - if (global_dca)
144   - return -EEXIST;
145 221 err = dca_sysfs_add_provider(dca, dev);
146 222 if (err)
147 223 return err;
148   - global_dca = dca;
  224 + list_add(&dca->node, &dca_providers);
149 225 blocking_notifier_call_chain(&dca_provider_chain,
150 226 DCA_PROVIDER_ADD, NULL);
151 227 return 0;
152 228  
... ... @@ -158,11 +234,9 @@
158 234 */
159 235 void unregister_dca_provider(struct dca_provider *dca)
160 236 {
161   - if (!global_dca)
162   - return;
163 237 blocking_notifier_call_chain(&dca_provider_chain,
164 238 DCA_PROVIDER_REMOVE, NULL);
165   - global_dca = NULL;
  239 + list_del(&dca->node);
166 240 dca_sysfs_remove_provider(dca);
167 241 }
168 242 EXPORT_SYMBOL_GPL(unregister_dca_provider);
... ... @@ -187,6 +261,7 @@
187 261  
188 262 static int __init dca_init(void)
189 263 {
  264 + printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
190 265 return dca_sysfs_init();
191 266 }
192 267  
drivers/dca/dca-sysfs.c
... ... @@ -13,9 +13,10 @@
13 13 int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot)
14 14 {
15 15 struct device *cd;
  16 + static int req_count;
16 17  
17 18 cd = device_create(dca_class, dca->cd, MKDEV(0, slot + 1),
18   - "requester%d", slot);
  19 + "requester%d", req_count++);
19 20 if (IS_ERR(cd))
20 21 return PTR_ERR(cd);
21 22 return 0;
... ... @@ -47,6 +47,16 @@
47 47  
48 48 /* I/OAT v2 platforms */
49 49 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
  50 +
  51 + /* I/OAT v3 platforms */
  52 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
  53 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
  54 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
  55 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
  56 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
  57 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
  58 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
  59 + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
50 60 { 0, }
51 61 };
52 62  
... ... @@ -82,6 +92,11 @@
82 92 device->dma = ioat_dma_probe(pdev, iobase);
83 93 if (device->dma && ioat_dca_enabled)
84 94 device->dca = ioat2_dca_init(pdev, iobase);
  95 + break;
  96 + case IOAT_VER_3_0:
  97 + device->dma = ioat_dma_probe(pdev, iobase);
  98 + if (device->dma && ioat_dca_enabled)
  99 + device->dca = ioat3_dca_init(pdev, iobase);
85 100 break;
86 101 default:
87 102 err = -ENODEV;
drivers/dma/ioat_dca.c
... ... @@ -37,12 +37,18 @@
37 37 #include "ioatdma_registers.h"
38 38  
39 39 /*
40   - * Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15
  40 + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
41 41 * contain the bit number of the APIC ID to map into the DCA tag. If the valid
42 42 * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
43 43 */
44 44 #define DCA_TAG_MAP_VALID 0x80
45 45  
  46 +#define DCA3_TAG_MAP_BIT_TO_INV 0x80
  47 +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
  48 +#define DCA3_TAG_MAP_LITERAL_VAL 0x1
  49 +
  50 +#define DCA_TAG_MAP_MASK 0xDF
  51 +
46 52 /*
47 53 * "Legacy" DCA systems do not implement the DCA register set in the
48 54 * I/OAT device. Software needs direct support for their tag mappings.
... ... @@ -95,6 +101,7 @@
95 101 };
96 102  
97 103 #define IOAT_DCA_MAX_REQ 6
  104 +#define IOAT3_DCA_MAX_REQ 2
98 105  
99 106 struct ioat_dca_priv {
100 107 void __iomem *iobase;
... ... @@ -171,7 +178,9 @@
171 178 return -ENODEV;
172 179 }
173 180  
174   -static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu)
  181 +static u8 ioat_dca_get_tag(struct dca_provider *dca,
  182 + struct device *dev,
  183 + int cpu)
175 184 {
176 185 struct ioat_dca_priv *ioatdca = dca_priv(dca);
177 186 int i, apic_id, bit, value;
178 187  
... ... @@ -193,10 +202,26 @@
193 202 return tag;
194 203 }
195 204  
  205 +static int ioat_dca_dev_managed(struct dca_provider *dca,
  206 + struct device *dev)
  207 +{
  208 + struct ioat_dca_priv *ioatdca = dca_priv(dca);
  209 + struct pci_dev *pdev;
  210 + int i;
  211 +
  212 + pdev = to_pci_dev(dev);
  213 + for (i = 0; i < ioatdca->max_requesters; i++) {
  214 + if (ioatdca->req_slots[i].pdev == pdev)
  215 + return 1;
  216 + }
  217 + return 0;
  218 +}
  219 +
196 220 static struct dca_ops ioat_dca_ops = {
197 221 .add_requester = ioat_dca_add_requester,
198 222 .remove_requester = ioat_dca_remove_requester,
199 223 .get_tag = ioat_dca_get_tag,
  224 + .dev_managed = ioat_dca_dev_managed,
200 225 };
201 226  
202 227  
... ... @@ -207,6 +232,8 @@
207 232 u8 *tag_map = NULL;
208 233 int i;
209 234 int err;
  235 + u8 version;
  236 + u8 max_requesters;
210 237  
211 238 if (!system_has_dca_enabled(pdev))
212 239 return NULL;
213 240  
214 241  
... ... @@ -237,15 +264,20 @@
237 264 if (tag_map == NULL)
238 265 return NULL;
239 266  
  267 + version = readb(iobase + IOAT_VER_OFFSET);
  268 + if (version == IOAT_VER_3_0)
  269 + max_requesters = IOAT3_DCA_MAX_REQ;
  270 + else
  271 + max_requesters = IOAT_DCA_MAX_REQ;
  272 +
240 273 dca = alloc_dca_provider(&ioat_dca_ops,
241 274 sizeof(*ioatdca) +
242   - (sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ));
  275 + (sizeof(struct ioat_dca_slot) * max_requesters));
243 276 if (!dca)
244 277 return NULL;
245 278  
246 279 ioatdca = dca_priv(dca);
247   - ioatdca->max_requesters = IOAT_DCA_MAX_REQ;
248   -
  280 + ioatdca->max_requesters = max_requesters;
249 281 ioatdca->dca_base = iobase + 0x54;
250 282  
251 283 /* copy over the APIC ID to DCA tag mapping */
252 284  
... ... @@ -323,11 +355,13 @@
323 355 return -ENODEV;
324 356 }
325 357  
326   -static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu)
  358 +static u8 ioat2_dca_get_tag(struct dca_provider *dca,
  359 + struct device *dev,
  360 + int cpu)
327 361 {
328 362 u8 tag;
329 363  
330   - tag = ioat_dca_get_tag(dca, cpu);
  364 + tag = ioat_dca_get_tag(dca, dev, cpu);
331 365 tag = (~tag) & 0x1F;
332 366 return tag;
333 367 }
... ... @@ -336,6 +370,7 @@
336 370 .add_requester = ioat2_dca_add_requester,
337 371 .remove_requester = ioat2_dca_remove_requester,
338 372 .get_tag = ioat2_dca_get_tag,
  373 + .dev_managed = ioat_dca_dev_managed,
339 374 };
340 375  
341 376 static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
... ... @@ -415,6 +450,201 @@
415 450 ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
416 451 else
417 452 ioatdca->tag_map[i] = 0;
  453 + }
  454 +
  455 + err = register_dca_provider(dca, &pdev->dev);
  456 + if (err) {
  457 + free_dca_provider(dca);
  458 + return NULL;
  459 + }
  460 +
  461 + return dca;
  462 +}
  463 +
  464 +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
  465 +{
  466 + struct ioat_dca_priv *ioatdca = dca_priv(dca);
  467 + struct pci_dev *pdev;
  468 + int i;
  469 + u16 id;
  470 + u16 global_req_table;
  471 +
  472 + /* This implementation only supports PCI-Express */
  473 + if (dev->bus != &pci_bus_type)
  474 + return -ENODEV;
  475 + pdev = to_pci_dev(dev);
  476 + id = dcaid_from_pcidev(pdev);
  477 +
  478 + if (ioatdca->requester_count == ioatdca->max_requesters)
  479 + return -ENODEV;
  480 +
  481 + for (i = 0; i < ioatdca->max_requesters; i++) {
  482 + if (ioatdca->req_slots[i].pdev == NULL) {
  483 + /* found an empty slot */
  484 + ioatdca->requester_count++;
  485 + ioatdca->req_slots[i].pdev = pdev;
  486 + ioatdca->req_slots[i].rid = id;
  487 + global_req_table =
  488 + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
  489 + writel(id | IOAT_DCA_GREQID_VALID,
  490 + ioatdca->iobase + global_req_table + (i * 4));
  491 + return i;
  492 + }
  493 + }
  494 + /* Error, ioatdma->requester_count is out of whack */
  495 + return -EFAULT;
  496 +}
  497 +
  498 +static int ioat3_dca_remove_requester(struct dca_provider *dca,
  499 + struct device *dev)
  500 +{
  501 + struct ioat_dca_priv *ioatdca = dca_priv(dca);
  502 + struct pci_dev *pdev;
  503 + int i;
  504 + u16 global_req_table;
  505 +
  506 + /* This implementation only supports PCI-Express */
  507 + if (dev->bus != &pci_bus_type)
  508 + return -ENODEV;
  509 + pdev = to_pci_dev(dev);
  510 +
  511 + for (i = 0; i < ioatdca->max_requesters; i++) {
  512 + if (ioatdca->req_slots[i].pdev == pdev) {
  513 + global_req_table =
  514 + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
  515 + writel(0, ioatdca->iobase + global_req_table + (i * 4));
  516 + ioatdca->req_slots[i].pdev = NULL;
  517 + ioatdca->req_slots[i].rid = 0;
  518 + ioatdca->requester_count--;
  519 + return i;
  520 + }
  521 + }
  522 + return -ENODEV;
  523 +}
  524 +
  525 +static u8 ioat3_dca_get_tag(struct dca_provider *dca,
  526 + struct device *dev,
  527 + int cpu)
  528 +{
  529 + u8 tag;
  530 +
  531 + struct ioat_dca_priv *ioatdca = dca_priv(dca);
  532 + int i, apic_id, bit, value;
  533 + u8 entry;
  534 +
  535 + tag = 0;
  536 + apic_id = cpu_physical_id(cpu);
  537 +
  538 + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
  539 + entry = ioatdca->tag_map[i];
  540 + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
  541 + bit = entry &
  542 + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
  543 + value = (apic_id & (1 << bit)) ? 1 : 0;
  544 + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
  545 + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
  546 + value = (apic_id & (1 << bit)) ? 0 : 1;
  547 + } else {
  548 + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
  549 + }
  550 + tag |= (value << i);
  551 + }
  552 +
  553 + return tag;
  554 +}
  555 +
  556 +static struct dca_ops ioat3_dca_ops = {
  557 + .add_requester = ioat3_dca_add_requester,
  558 + .remove_requester = ioat3_dca_remove_requester,
  559 + .get_tag = ioat3_dca_get_tag,
  560 + .dev_managed = ioat_dca_dev_managed,
  561 +};
  562 +
  563 +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
  564 +{
  565 + int slots = 0;
  566 + u32 req;
  567 + u16 global_req_table;
  568 +
  569 + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
  570 + if (global_req_table == 0)
  571 + return 0;
  572 +
  573 + do {
  574 + req = readl(iobase + global_req_table + (slots * sizeof(u32)));
  575 + slots++;
  576 + } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
  577 +
  578 + return slots;
  579 +}
  580 +
  581 +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
  582 +{
  583 + struct dca_provider *dca;
  584 + struct ioat_dca_priv *ioatdca;
  585 + int slots;
  586 + int i;
  587 + int err;
  588 + u16 dca_offset;
  589 + u16 csi_fsb_control;
  590 + u16 pcie_control;
  591 + u8 bit;
  592 +
  593 + union {
  594 + u64 full;
  595 + struct {
  596 + u32 low;
  597 + u32 high;
  598 + };
  599 + } tag_map;
  600 +
  601 + if (!system_has_dca_enabled(pdev))
  602 + return NULL;
  603 +
  604 + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
  605 + if (dca_offset == 0)
  606 + return NULL;
  607 +
  608 + slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
  609 + if (slots == 0)
  610 + return NULL;
  611 +
  612 + dca = alloc_dca_provider(&ioat3_dca_ops,
  613 + sizeof(*ioatdca)
  614 + + (sizeof(struct ioat_dca_slot) * slots));
  615 + if (!dca)
  616 + return NULL;
  617 +
  618 + ioatdca = dca_priv(dca);
  619 + ioatdca->iobase = iobase;
  620 + ioatdca->dca_base = iobase + dca_offset;
  621 + ioatdca->max_requesters = slots;
  622 +
  623 + /* some bios might not know to turn these on */
  624 + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
  625 + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
  626 + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
  627 + writew(csi_fsb_control,
  628 + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
  629 + }
  630 + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
  631 + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
  632 + pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
  633 + writew(pcie_control,
  634 + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
  635 + }
  636 +
  637 +
  638 + /* TODO version, compatibility and configuration checks */
  639 +
  640 + /* copy out the APIC to DCA tag map */
  641 + tag_map.low =
  642 + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
  643 + tag_map.high =
  644 + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
  645 + for (i = 0; i < 8; i++) {
  646 + bit = tag_map.full >> (8 * i);
  647 + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
418 648 }
419 649  
420 650 err = register_dca_provider(dca, &pdev->dev);
drivers/dma/ioat_dma.c
... ... @@ -53,6 +53,12 @@
53 53 static void ioat_dma_chan_reset_part2(struct work_struct *work);
54 54 static void ioat_dma_chan_watchdog(struct work_struct *work);
55 55  
  56 +/*
  57 + * workaround for IOAT ver.3.0 null descriptor issue
  58 + * (channel returns error when size is 0)
  59 + */
  60 +#define NULL_DESC_BUFFER_SIZE 1
  61 +
56 62 /* internal functions */
57 63 static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
58 64 static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
... ... @@ -129,6 +135,38 @@
129 135 int i;
130 136 struct ioat_dma_chan *ioat_chan;
131 137  
  138 + /*
  139 + * IOAT ver.3 workarounds
  140 + */
  141 + if (device->version == IOAT_VER_3_0) {
  142 + u32 chan_err_mask;
  143 + u16 dev_id;
  144 + u32 dmauncerrsts;
  145 +
  146 + /*
  147 + * Write CHANERRMSK_INT with 3E07h to mask out the errors
  148 + * that can cause stability issues for IOAT ver.3
  149 + */
  150 + chan_err_mask = 0x3E07;
  151 + pci_write_config_dword(device->pdev,
  152 + IOAT_PCI_CHANERRMASK_INT_OFFSET,
  153 + chan_err_mask);
  154 +
  155 + /*
  156 + * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
  157 + * (workaround for spurious config parity error after restart)
  158 + */
  159 + pci_read_config_word(device->pdev,
  160 + IOAT_PCI_DEVICE_ID_OFFSET,
  161 + &dev_id);
  162 + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
  163 + dmauncerrsts = 0x10;
  164 + pci_write_config_dword(device->pdev,
  165 + IOAT_PCI_DMAUNCERRSTS_OFFSET,
  166 + dmauncerrsts);
  167 + }
  168 + }
  169 +
132 170 device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
133 171 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
134 172 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
... ... @@ -473,6 +511,13 @@
473 511 prev = new;
474 512 } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
475 513  
  514 + if (!new) {
  515 + dev_err(&ioat_chan->device->pdev->dev,
  516 + "tx submit failed\n");
  517 + spin_unlock_bh(&ioat_chan->desc_lock);
  518 + return -ENOMEM;
  519 + }
  520 +
476 521 hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
477 522 if (new->async_tx.callback) {
478 523 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
... ... @@ -558,7 +603,14 @@
558 603 desc_count++;
559 604 } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
560 605  
561   - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
  606 + if (!new) {
  607 + dev_err(&ioat_chan->device->pdev->dev,
  608 + "tx submit failed\n");
  609 + spin_unlock_bh(&ioat_chan->desc_lock);
  610 + return -ENOMEM;
  611 + }
  612 +
  613 + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
562 614 if (new->async_tx.callback) {
563 615 hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
564 616 if (first != new) {
... ... @@ -629,6 +681,7 @@
629 681 desc_sw->async_tx.tx_submit = ioat1_tx_submit;
630 682 break;
631 683 case IOAT_VER_2_0:
  684 + case IOAT_VER_3_0:
632 685 desc_sw->async_tx.tx_submit = ioat2_tx_submit;
633 686 break;
634 687 }
... ... @@ -779,6 +832,7 @@
779 832 }
780 833 break;
781 834 case IOAT_VER_2_0:
  835 + case IOAT_VER_3_0:
782 836 list_for_each_entry_safe(desc, _desc,
783 837 ioat_chan->free_desc.next, node) {
784 838 list_del(&desc->node);
... ... @@ -868,7 +922,8 @@
868 922  
869 923 /* set up the noop descriptor */
870 924 noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
871   - noop_desc->hw->size = 0;
  925 + /* set size to non-zero value (channel returns error when size is 0) */
  926 + noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
872 927 noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
873 928 noop_desc->hw->src_addr = 0;
874 929 noop_desc->hw->dst_addr = 0;
... ... @@ -918,6 +973,7 @@
918 973 return ioat1_dma_get_next_descriptor(ioat_chan);
919 974 break;
920 975 case IOAT_VER_2_0:
  976 + case IOAT_VER_3_0:
921 977 return ioat2_dma_get_next_descriptor(ioat_chan);
922 978 break;
923 979 }
... ... @@ -1061,10 +1117,12 @@
1061 1117 * perhaps we're stuck so hard that the watchdog can't go off?
1062 1118 * try to catch it after 2 seconds
1063 1119 */
1064   - if (time_after(jiffies,
1065   - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
1066   - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
1067   - ioat_chan->last_completion_time = jiffies;
  1120 + if (ioat_chan->device->version != IOAT_VER_3_0) {
  1121 + if (time_after(jiffies,
  1122 + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
  1123 + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
  1124 + ioat_chan->last_completion_time = jiffies;
  1125 + }
1068 1126 }
1069 1127 return;
1070 1128 }
... ... @@ -1120,6 +1178,7 @@
1120 1178 }
1121 1179 break;
1122 1180 case IOAT_VER_2_0:
  1181 + case IOAT_VER_3_0:
1123 1182 /* has some other thread has already cleaned up? */
1124 1183 if (ioat_chan->used_desc.prev == NULL)
1125 1184 break;
1126 1185  
... ... @@ -1223,10 +1282,19 @@
1223 1282 spin_lock_bh(&ioat_chan->desc_lock);
1224 1283  
1225 1284 desc = ioat_dma_get_next_descriptor(ioat_chan);
  1285 +
  1286 + if (!desc) {
  1287 + dev_err(&ioat_chan->device->pdev->dev,
  1288 + "Unable to start null desc - get next desc failed\n");
  1289 + spin_unlock_bh(&ioat_chan->desc_lock);
  1290 + return;
  1291 + }
  1292 +
1226 1293 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
1227 1294 | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
1228 1295 | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
1229   - desc->hw->size = 0;
  1296 + /* set size to non-zero value (channel returns error when size is 0) */
  1297 + desc->hw->size = NULL_DESC_BUFFER_SIZE;
1230 1298 desc->hw->src_addr = 0;
1231 1299 desc->hw->dst_addr = 0;
1232 1300 async_tx_ack(&desc->async_tx);
... ... @@ -1244,6 +1312,7 @@
1244 1312 + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
1245 1313 break;
1246 1314 case IOAT_VER_2_0:
  1315 + case IOAT_VER_3_0:
1247 1316 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
1248 1317 ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
1249 1318 writel(((u64) desc->async_tx.phys) >> 32,
... ... @@ -1562,6 +1631,7 @@
1562 1631 ioat1_dma_memcpy_issue_pending;
1563 1632 break;
1564 1633 case IOAT_VER_2_0:
  1634 + case IOAT_VER_3_0:
1565 1635 device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
1566 1636 device->common.device_issue_pending =
1567 1637 ioat2_dma_memcpy_issue_pending;
... ... @@ -1585,9 +1655,11 @@
1585 1655  
1586 1656 dma_async_device_register(&device->common);
1587 1657  
1588   - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
1589   - schedule_delayed_work(&device->work,
1590   - WATCHDOG_DELAY);
  1658 + if (device->version != IOAT_VER_3_0) {
  1659 + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
  1660 + schedule_delayed_work(&device->work,
  1661 + WATCHDOG_DELAY);
  1662 + }
1591 1663  
1592 1664 return device;
1593 1665  
... ... @@ -1621,7 +1693,9 @@
1621 1693 pci_release_regions(device->pdev);
1622 1694 pci_disable_device(device->pdev);
1623 1695  
1624   - cancel_delayed_work(&device->work);
  1696 + if (device->version != IOAT_VER_3_0) {
  1697 + cancel_delayed_work(&device->work);
  1698 + }
1625 1699  
1626 1700 list_for_each_entry_safe(chan, _chan,
1627 1701 &device->common.channels, device_node) {
drivers/dma/ioatdma.h
... ... @@ -29,7 +29,7 @@
29 29 #include <linux/pci_ids.h>
30 30 #include <net/tcp.h>
31 31  
32   -#define IOAT_DMA_VERSION "2.18"
  32 +#define IOAT_DMA_VERSION "3.30"
33 33  
34 34 enum ioat_interrupt {
35 35 none = 0,
... ... @@ -135,6 +135,7 @@
135 135 #ifdef CONFIG_NET_DMA
136 136 switch (dev->version) {
137 137 case IOAT_VER_1_2:
  138 + case IOAT_VER_3_0:
138 139 sysctl_tcp_dma_copybreak = 4096;
139 140 break;
140 141 case IOAT_VER_2_0:
141 142  
... ... @@ -150,11 +151,13 @@
150 151 void ioat_dma_remove(struct ioatdma_device *device);
151 152 struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
152 153 struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
  154 +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
153 155 #else
154 156 #define ioat_dma_probe(pdev, iobase) NULL
155 157 #define ioat_dma_remove(device) do { } while (0)
156 158 #define ioat_dca_init(pdev, iobase) NULL
157 159 #define ioat2_dca_init(pdev, iobase) NULL
  160 +#define ioat3_dca_init(pdev, iobase) NULL
158 161 #endif
159 162  
160 163 #endif /* IOATDMA_H */
drivers/dma/ioatdma_hw.h
... ... @@ -35,6 +35,7 @@
35 35 #define IOAT_PCI_SID 0x8086
36 36 #define IOAT_VER_1_2 0x12 /* Version 1.2 */
37 37 #define IOAT_VER_2_0 0x20 /* Version 2.0 */
  38 +#define IOAT_VER_3_0 0x30 /* Version 3.0 */
38 39  
39 40 struct ioat_dma_descriptor {
40 41 uint32_t size;
drivers/dma/ioatdma_registers.h
... ... @@ -25,6 +25,10 @@
25 25 #define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
26 26 #define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
27 27  
  28 +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
  29 +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
  30 +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
  31 +
28 32 /* MMIO Device Registers */
29 33 #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
30 34  
31 35  
... ... @@ -149,7 +153,23 @@
149 153 #define IOAT_DCA_GREQID_VALID 0x20000000
150 154 #define IOAT_DCA_GREQID_LASTID 0x80000000
151 155  
  156 +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
  157 +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
152 158  
  159 +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
  160 +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
  161 +
  162 +#define IOAT3_CSI_CONTROL_OFFSET 0x0C
  163 +#define IOAT3_CSI_CONTROL_PREFETCH 0x1
  164 +
  165 +#define IOAT3_PCI_CONTROL_OFFSET 0x0E
  166 +#define IOAT3_PCI_CONTROL_MEMWR 0x1
  167 +
  168 +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
  169 +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
  170 +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
  171 +
  172 +#define IOAT3_DCA_GREQID_OFFSET 0x02
153 173  
154 174 #define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
155 175 #define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
... ... @@ -10,6 +10,7 @@
10 10 #define DCA_PROVIDER_REMOVE 0x0002
11 11  
12 12 struct dca_provider {
  13 + struct list_head node;
13 14 struct dca_ops *ops;
14 15 struct device *cd;
15 16 int id;
... ... @@ -18,7 +19,9 @@
18 19 struct dca_ops {
19 20 int (*add_requester) (struct dca_provider *, struct device *);
20 21 int (*remove_requester) (struct dca_provider *, struct device *);
21   - u8 (*get_tag) (struct dca_provider *, int cpu);
  22 + u8 (*get_tag) (struct dca_provider *, struct device *,
  23 + int cpu);
  24 + int (*dev_managed) (struct dca_provider *, struct device *);
22 25 };
23 26  
24 27 struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
25 28  
... ... @@ -32,9 +35,11 @@
32 35 }
33 36  
34 37 /* Requester API */
  38 +#define DCA_GET_TAG_TWO_ARGS
35 39 int dca_add_requester(struct device *dev);
36 40 int dca_remove_requester(struct device *dev);
37 41 u8 dca_get_tag(int cpu);
  42 +u8 dca3_get_tag(struct device *dev, int cpu);
38 43  
39 44 /* internal stuff */
40 45 int __init dca_sysfs_init(void);
include/linux/pci_ids.h
... ... @@ -2363,6 +2363,14 @@
2363 2363 #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916
2364 2364 #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918
2365 2365 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340
  2366 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429
  2367 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a
  2368 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b
  2369 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c
  2370 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430
  2371 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431
  2372 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432
  2373 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433
2366 2374 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575
2367 2375 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577
2368 2376 #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580