Commit 93a23a7271dfb811b3adb72779054c3a24433112

Authored by Yu Zhao
Committed by David Woodhouse
1 parent 9dd2fe8906

VT-d: support the device IOTLB

Enable the device IOTLB (i.e. ATS) for both the bare metal and KVM
environments.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

Showing 3 changed files with 102 additions and 9 deletions Side-by-side Diff

drivers/pci/intel-iommu.c
... ... @@ -252,6 +252,7 @@
252 252 u8 bus; /* PCI bus number */
253 253 u8 devfn; /* PCI devfn number */
254 254 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
  255 + struct intel_iommu *iommu; /* IOMMU used by this device */
255 256 struct dmar_domain *domain; /* pointer to domain */
256 257 };
257 258  
... ... @@ -945,6 +946,77 @@
945 946 (unsigned long long)DMA_TLB_IAIG(val));
946 947 }
947 948  
  949 +static struct device_domain_info *iommu_support_dev_iotlb(
  950 + struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
  951 +{
  952 + int found = 0;
  953 + unsigned long flags;
  954 + struct device_domain_info *info;
  955 + struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
  956 +
  957 + if (!ecap_dev_iotlb_support(iommu->ecap))
  958 + return NULL;
  959 +
  960 + if (!iommu->qi)
  961 + return NULL;
  962 +
  963 + spin_lock_irqsave(&device_domain_lock, flags);
  964 + list_for_each_entry(info, &domain->devices, link)
  965 + if (info->bus == bus && info->devfn == devfn) {
  966 + found = 1;
  967 + break;
  968 + }
  969 + spin_unlock_irqrestore(&device_domain_lock, flags);
  970 +
  971 + if (!found || !info->dev)
  972 + return NULL;
  973 +
  974 + if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
  975 + return NULL;
  976 +
  977 + if (!dmar_find_matched_atsr_unit(info->dev))
  978 + return NULL;
  979 +
  980 + info->iommu = iommu;
  981 +
  982 + return info;
  983 +}
  984 +
  985 +static void iommu_enable_dev_iotlb(struct device_domain_info *info)
  986 +{
  987 + if (!info)
  988 + return;
  989 +
  990 + pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
  991 +}
  992 +
  993 +static void iommu_disable_dev_iotlb(struct device_domain_info *info)
  994 +{
  995 + if (!info->dev || !pci_ats_enabled(info->dev))
  996 + return;
  997 +
  998 + pci_disable_ats(info->dev);
  999 +}
  1000 +
  1001 +static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
  1002 + u64 addr, unsigned mask)
  1003 +{
  1004 + u16 sid, qdep;
  1005 + unsigned long flags;
  1006 + struct device_domain_info *info;
  1007 +
  1008 + spin_lock_irqsave(&device_domain_lock, flags);
  1009 + list_for_each_entry(info, &domain->devices, link) {
  1010 + if (!info->dev || !pci_ats_enabled(info->dev))
  1011 + continue;
  1012 +
  1013 + sid = info->bus << 8 | info->devfn;
  1014 + qdep = pci_ats_queue_depth(info->dev);
  1015 + qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
  1016 + }
  1017 + spin_unlock_irqrestore(&device_domain_lock, flags);
  1018 +}
  1019 +
948 1020 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
949 1021 u64 addr, unsigned int pages)
950 1022 {
... ... @@ -965,6 +1037,8 @@
965 1037 else
966 1038 iommu->flush.flush_iotlb(iommu, did, addr, mask,
967 1039 DMA_TLB_PSI_FLUSH);
  1040 + if (did)
  1041 + iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
968 1042 }
969 1043  
970 1044 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
... ... @@ -1305,6 +1379,7 @@
1305 1379 unsigned long ndomains;
1306 1380 int id;
1307 1381 int agaw;
  1382 + struct device_domain_info *info = NULL;
1308 1383  
1309 1384 pr_debug("Set context mapping for %02x:%02x.%d\n",
1310 1385 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1311 1386  
1312 1387  
... ... @@ -1372,15 +1447,21 @@
1372 1447  
1373 1448 context_set_domain_id(context, id);
1374 1449  
  1450 + if (translation != CONTEXT_TT_PASS_THROUGH) {
  1451 + info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
  1452 + translation = info ? CONTEXT_TT_DEV_IOTLB :
  1453 + CONTEXT_TT_MULTI_LEVEL;
  1454 + }
1375 1455 /*
1376 1456 * In pass through mode, AW must be programmed to indicate the largest
1377 1457 * AGAW value supported by hardware. And ASR is ignored by hardware.
1378 1458 */
1379   - if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) {
1380   - context_set_address_width(context, iommu->agaw);
1381   - context_set_address_root(context, virt_to_phys(pgd));
1382   - } else
  1459 + if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1383 1460 context_set_address_width(context, iommu->msagaw);
  1461 + else {
  1462 + context_set_address_root(context, virt_to_phys(pgd));
  1463 + context_set_address_width(context, iommu->agaw);
  1464 + }
1384 1465  
1385 1466 context_set_translation_type(context, translation);
1386 1467 context_set_fault_enable(context);
... ... @@ -1402,6 +1483,7 @@
1402 1483 } else {
1403 1484 iommu_flush_write_buffer(iommu);
1404 1485 }
  1486 + iommu_enable_dev_iotlb(info);
1405 1487 spin_unlock_irqrestore(&iommu->lock, flags);
1406 1488  
1407 1489 spin_lock_irqsave(&domain->iommu_lock, flags);
... ... @@ -1552,6 +1634,7 @@
1552 1634 info->dev->dev.archdata.iommu = NULL;
1553 1635 spin_unlock_irqrestore(&device_domain_lock, flags);
1554 1636  
  1637 + iommu_disable_dev_iotlb(info);
1555 1638 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1556 1639 iommu_detach_dev(iommu, info->bus, info->devfn);
1557 1640 free_devinfo_mem(info);
1558 1641  
... ... @@ -2259,10 +2342,16 @@
2259 2342 continue;
2260 2343  
2261 2344 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2262   - DMA_TLB_GLOBAL_FLUSH, 0);
  2345 + DMA_TLB_GLOBAL_FLUSH);
2263 2346 for (j = 0; j < deferred_flush[i].next; j++) {
2264   - __free_iova(&deferred_flush[i].domain[j]->iovad,
2265   - deferred_flush[i].iova[j]);
  2347 + unsigned long mask;
  2348 + struct iova *iova = deferred_flush[i].iova[j];
  2349 +
  2350 + mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT;
  2351 + mask = ilog2(mask >> VTD_PAGE_SHIFT);
  2352 + iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
  2353 + iova->pfn_lo << PAGE_SHIFT, mask);
  2354 + __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2266 2355 }
2267 2356 deferred_flush[i].next = 0;
2268 2357 }
... ... @@ -2943,6 +3032,7 @@
2943 3032 info->dev->dev.archdata.iommu = NULL;
2944 3033 spin_unlock_irqrestore(&device_domain_lock, flags);
2945 3034  
  3035 + iommu_disable_dev_iotlb(info);
2946 3036 iommu_detach_dev(iommu, info->bus, info->devfn);
2947 3037 iommu_detach_dependent_devices(iommu, pdev);
2948 3038 free_devinfo_mem(info);
... ... @@ -2993,6 +3083,7 @@
2993 3083  
2994 3084 spin_unlock_irqrestore(&device_domain_lock, flags1);
2995 3085  
  3086 + iommu_disable_dev_iotlb(info);
2996 3087 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
2997 3088 iommu_detach_dev(iommu, info->bus, info->devfn);
2998 3089 iommu_detach_dependent_devices(iommu, info->dev);
2999 3090  
... ... @@ -3197,11 +3288,11 @@
3197 3288 return -EFAULT;
3198 3289 }
3199 3290  
3200   - ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
  3291 + ret = vm_domain_add_dev_info(dmar_domain, pdev);
3201 3292 if (ret)
3202 3293 return ret;
3203 3294  
3204   - ret = vm_domain_add_dev_info(dmar_domain, pdev);
  3295 + ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3205 3296 return ret;
3206 3297 }
3207 3298  
include/linux/dma_remapping.h
... ... @@ -14,6 +14,7 @@
14 14 #define DMA_PTE_SNP (1 << 11)
15 15  
16 16 #define CONTEXT_TT_MULTI_LEVEL 0
  17 +#define CONTEXT_TT_DEV_IOTLB 1
17 18 #define CONTEXT_TT_PASS_THROUGH 2
18 19  
19 20 struct intel_iommu;
include/linux/intel-iommu.h
... ... @@ -124,6 +124,7 @@
124 124 #define ecap_pass_through(e) ((e >> 6) & 0x1)
125 125 #define ecap_eim_support(e) ((e >> 4) & 0x1)
126 126 #define ecap_ir_support(e) ((e >> 3) & 0x1)
  127 +#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1)
127 128 #define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
128 129 #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */
129 130