Commit b0a11f44aba631fd4b898b620c93cc2096f3f15c

Authored by Joerg Roedel

Merge branches 'iommu/api' and 'iommu/amd' into for-linus

Showing 4 changed files Side-by-side Diff

... ... @@ -586,6 +586,16 @@
586 586 your BIOS for an option to enable it or if you have an IVRS ACPI
587 587 table.
588 588  
  589 +config AMD_IOMMU_STATS
  590 + bool "Export AMD IOMMU statistics to debugfs"
  591 + depends on AMD_IOMMU
  592 + select DEBUG_FS
  593 + help
  594 + This option enables code in the AMD IOMMU driver to collect various
  595 + statistics about whats happening in the driver and exports that
  596 + information to userspace via debugfs.
  597 + If unsure, say N.
  598 +
589 599 # need this always selected by IOMMU for the VIA workaround
590 600 config SWIOTLB
591 601 def_bool y if X86_64
arch/x86/include/asm/amd_iommu_types.h
... ... @@ -190,16 +190,23 @@
190 190 /* FIXME: move this macro to <linux/pci.h> */
191 191 #define PCI_BUS(x) (((x) >> 8) & 0xff)
192 192  
  193 +/* Protection domain flags */
  194 +#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
  195 +#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
  196 + domain for an IOMMU */
  197 +
193 198 /*
194 199 * This structure contains generic data for IOMMU protection domains
195 200 * independent of their use.
196 201 */
197 202 struct protection_domain {
198   - spinlock_t lock; /* mostly used to lock the page table*/
199   - u16 id; /* the domain id written to the device table */
200   - int mode; /* paging mode (0-6 levels) */
201   - u64 *pt_root; /* page table root pointer */
202   - void *priv; /* private data */
  203 + spinlock_t lock; /* mostly used to lock the page table*/
  204 + u16 id; /* the domain id written to the device table */
  205 + int mode; /* paging mode (0-6 levels) */
  206 + u64 *pt_root; /* page table root pointer */
  207 + unsigned long flags; /* flags to find out type of domain */
  208 + unsigned dev_cnt; /* devices assigned to this domain */
  209 + void *priv; /* private data */
203 210 };
204 211  
205 212 /*
... ... @@ -295,7 +302,7 @@
295 302 bool int_enabled;
296 303  
297 304 /* if one, we need to send a completion wait command */
298   - int need_sync;
  305 + bool need_sync;
299 306  
300 307 /* default dma_ops domain for that IOMMU */
301 308 struct dma_ops_domain *default_dom;
... ... @@ -374,7 +381,7 @@
374 381 extern unsigned long *amd_iommu_pd_alloc_bitmap;
375 382  
376 383 /* will be 1 if device isolation is enabled */
377   -extern int amd_iommu_isolate;
  384 +extern bool amd_iommu_isolate;
378 385  
379 386 /*
380 387 * If true, the addresses will be flushed on unmap time, not when
381 388  
... ... @@ -382,24 +389,40 @@
382 389 */
383 390 extern bool amd_iommu_unmap_flush;
384 391  
385   -/* takes a PCI device id and prints it out in a readable form */
386   -static inline void print_devid(u16 devid, int nl)
387   -{
388   - int bus = devid >> 8;
389   - int dev = devid >> 3 & 0x1f;
390   - int fn = devid & 0x07;
391   -
392   - printk("%02x:%02x.%x", bus, dev, fn);
393   - if (nl)
394   - printk("\n");
395   -}
396   -
397 392 /* takes bus and device/function and returns the device id
398 393 * FIXME: should that be in generic PCI code? */
399 394 static inline u16 calc_devid(u8 bus, u8 devfn)
400 395 {
401 396 return (((u16)bus) << 8) | devfn;
402 397 }
  398 +
  399 +#ifdef CONFIG_AMD_IOMMU_STATS
  400 +
  401 +struct __iommu_counter {
  402 + char *name;
  403 + struct dentry *dent;
  404 + u64 value;
  405 +};
  406 +
  407 +#define DECLARE_STATS_COUNTER(nm) \
  408 + static struct __iommu_counter nm = { \
  409 + .name = #nm, \
  410 + }
  411 +
  412 +#define INC_STATS_COUNTER(name) name.value += 1
  413 +#define ADD_STATS_COUNTER(name, x) name.value += (x)
  414 +#define SUB_STATS_COUNTER(name, x) name.value -= (x)
  415 +
  416 +#else /* CONFIG_AMD_IOMMU_STATS */
  417 +
  418 +#define DECLARE_STATS_COUNTER(name)
  419 +#define INC_STATS_COUNTER(name)
  420 +#define ADD_STATS_COUNTER(name, x)
  421 +#define SUB_STATS_COUNTER(name, x)
  422 +
  423 +static inline void amd_iommu_stats_init(void) { }
  424 +
  425 +#endif /* CONFIG_AMD_IOMMU_STATS */
403 426  
404 427 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
arch/x86/kernel/amd_iommu.c
Changes suppressed. Click to show
... ... @@ -20,8 +20,12 @@
20 20 #include <linux/pci.h>
21 21 #include <linux/gfp.h>
22 22 #include <linux/bitops.h>
  23 +#include <linux/debugfs.h>
23 24 #include <linux/scatterlist.h>
24 25 #include <linux/iommu-helper.h>
  26 +#ifdef CONFIG_IOMMU_API
  27 +#include <linux/iommu.h>
  28 +#endif
25 29 #include <asm/proto.h>
26 30 #include <asm/iommu.h>
27 31 #include <asm/gart.h>
... ... @@ -38,6 +42,10 @@
38 42 static LIST_HEAD(iommu_pd_list);
39 43 static DEFINE_SPINLOCK(iommu_pd_list_lock);
40 44  
  45 +#ifdef CONFIG_IOMMU_API
  46 +static struct iommu_ops amd_iommu_ops;
  47 +#endif
  48 +
41 49 /*
42 50 * general struct to manage commands send to an IOMMU
43 51 */
44 52  
... ... @@ -47,7 +55,69 @@
47 55  
48 56 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
49 57 struct unity_map_entry *e);
  58 +static struct dma_ops_domain *find_protection_domain(u16 devid);
50 59  
  60 +
  61 +#ifdef CONFIG_AMD_IOMMU_STATS
  62 +
  63 +/*
  64 + * Initialization code for statistics collection
  65 + */
  66 +
  67 +DECLARE_STATS_COUNTER(compl_wait);
  68 +DECLARE_STATS_COUNTER(cnt_map_single);
  69 +DECLARE_STATS_COUNTER(cnt_unmap_single);
  70 +DECLARE_STATS_COUNTER(cnt_map_sg);
  71 +DECLARE_STATS_COUNTER(cnt_unmap_sg);
  72 +DECLARE_STATS_COUNTER(cnt_alloc_coherent);
  73 +DECLARE_STATS_COUNTER(cnt_free_coherent);
  74 +DECLARE_STATS_COUNTER(cross_page);
  75 +DECLARE_STATS_COUNTER(domain_flush_single);
  76 +DECLARE_STATS_COUNTER(domain_flush_all);
  77 +DECLARE_STATS_COUNTER(alloced_io_mem);
  78 +DECLARE_STATS_COUNTER(total_map_requests);
  79 +
  80 +static struct dentry *stats_dir;
  81 +static struct dentry *de_isolate;
  82 +static struct dentry *de_fflush;
  83 +
  84 +static void amd_iommu_stats_add(struct __iommu_counter *cnt)
  85 +{
  86 + if (stats_dir == NULL)
  87 + return;
  88 +
  89 + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
  90 + &cnt->value);
  91 +}
  92 +
  93 +static void amd_iommu_stats_init(void)
  94 +{
  95 + stats_dir = debugfs_create_dir("amd-iommu", NULL);
  96 + if (stats_dir == NULL)
  97 + return;
  98 +
  99 + de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
  100 + (u32 *)&amd_iommu_isolate);
  101 +
  102 + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
  103 + (u32 *)&amd_iommu_unmap_flush);
  104 +
  105 + amd_iommu_stats_add(&compl_wait);
  106 + amd_iommu_stats_add(&cnt_map_single);
  107 + amd_iommu_stats_add(&cnt_unmap_single);
  108 + amd_iommu_stats_add(&cnt_map_sg);
  109 + amd_iommu_stats_add(&cnt_unmap_sg);
  110 + amd_iommu_stats_add(&cnt_alloc_coherent);
  111 + amd_iommu_stats_add(&cnt_free_coherent);
  112 + amd_iommu_stats_add(&cross_page);
  113 + amd_iommu_stats_add(&domain_flush_single);
  114 + amd_iommu_stats_add(&domain_flush_all);
  115 + amd_iommu_stats_add(&alloced_io_mem);
  116 + amd_iommu_stats_add(&total_map_requests);
  117 +}
  118 +
  119 +#endif
  120 +
51 121 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
52 122 static int iommu_has_npcache(struct amd_iommu *iommu)
53 123 {
54 124  
... ... @@ -189,13 +259,55 @@
189 259 spin_lock_irqsave(&iommu->lock, flags);
190 260 ret = __iommu_queue_command(iommu, cmd);
191 261 if (!ret)
192   - iommu->need_sync = 1;
  262 + iommu->need_sync = true;
193 263 spin_unlock_irqrestore(&iommu->lock, flags);
194 264  
195 265 return ret;
196 266 }
197 267  
198 268 /*
  269 + * This function waits until an IOMMU has completed a completion
  270 + * wait command
  271 + */
  272 +static void __iommu_wait_for_completion(struct amd_iommu *iommu)
  273 +{
  274 + int ready = 0;
  275 + unsigned status = 0;
  276 + unsigned long i = 0;
  277 +
  278 + INC_STATS_COUNTER(compl_wait);
  279 +
  280 + while (!ready && (i < EXIT_LOOP_COUNT)) {
  281 + ++i;
  282 + /* wait for the bit to become one */
  283 + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
  284 + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
  285 + }
  286 +
  287 + /* set bit back to zero */
  288 + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
  289 + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
  290 +
  291 + if (unlikely(i == EXIT_LOOP_COUNT))
  292 + panic("AMD IOMMU: Completion wait loop failed\n");
  293 +}
  294 +
  295 +/*
  296 + * This function queues a completion wait command into the command
  297 + * buffer of an IOMMU
  298 + */
  299 +static int __iommu_completion_wait(struct amd_iommu *iommu)
  300 +{
  301 + struct iommu_cmd cmd;
  302 +
  303 + memset(&cmd, 0, sizeof(cmd));
  304 + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
  305 + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
  306 +
  307 + return __iommu_queue_command(iommu, &cmd);
  308 +}
  309 +
  310 +/*
199 311 * This function is called whenever we need to ensure that the IOMMU has
200 312 * completed execution of all commands we sent. It sends a
201 313 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
202 314  
203 315  
204 316  
205 317  
206 318  
... ... @@ -204,41 +316,23 @@
204 316 */
205 317 static int iommu_completion_wait(struct amd_iommu *iommu)
206 318 {
207   - int ret = 0, ready = 0;
208   - unsigned status = 0;
209   - struct iommu_cmd cmd;
210   - unsigned long flags, i = 0;
  319 + int ret = 0;
  320 + unsigned long flags;
211 321  
212   - memset(&cmd, 0, sizeof(cmd));
213   - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
214   - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
215   -
216 322 spin_lock_irqsave(&iommu->lock, flags);
217 323  
218 324 if (!iommu->need_sync)
219 325 goto out;
220 326  
221   - iommu->need_sync = 0;
  327 + ret = __iommu_completion_wait(iommu);
222 328  
223   - ret = __iommu_queue_command(iommu, &cmd);
  329 + iommu->need_sync = false;
224 330  
225 331 if (ret)
226 332 goto out;
227 333  
228   - while (!ready && (i < EXIT_LOOP_COUNT)) {
229   - ++i;
230   - /* wait for the bit to become one */
231   - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
232   - ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
233   - }
  334 + __iommu_wait_for_completion(iommu);
234 335  
235   - /* set bit back to zero */
236   - status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
237   - writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
238   -
239   - if (unlikely(i == EXIT_LOOP_COUNT))
240   - panic("AMD IOMMU: Completion wait loop failed\n");
241   -
242 336 out:
243 337 spin_unlock_irqrestore(&iommu->lock, flags);
244 338  
... ... @@ -264,6 +358,21 @@
264 358 return ret;
265 359 }
266 360  
  361 +static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
  362 + u16 domid, int pde, int s)
  363 +{
  364 + memset(cmd, 0, sizeof(*cmd));
  365 + address &= PAGE_MASK;
  366 + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
  367 + cmd->data[1] |= domid;
  368 + cmd->data[2] = lower_32_bits(address);
  369 + cmd->data[3] = upper_32_bits(address);
  370 + if (s) /* size bit - we flush more than one 4kb page */
  371 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
  372 + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
  373 + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
  374 +}
  375 +
267 376 /*
268 377 * Generic command send function for invalidaing TLB entries
269 378 */
... ... @@ -273,16 +382,7 @@
273 382 struct iommu_cmd cmd;
274 383 int ret;
275 384  
276   - memset(&cmd, 0, sizeof(cmd));
277   - address &= PAGE_MASK;
278   - CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
279   - cmd.data[1] |= domid;
280   - cmd.data[2] = lower_32_bits(address);
281   - cmd.data[3] = upper_32_bits(address);
282   - if (s) /* size bit - we flush more than one 4kb page */
283   - cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
284   - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
285   - cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
  385 + __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
286 386  
287 387 ret = iommu_queue_command(iommu, &cmd);
288 388  
289 389  
... ... @@ -321,9 +421,37 @@
321 421 {
322 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
323 423  
  424 + INC_STATS_COUNTER(domain_flush_single);
  425 +
324 426 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
325 427 }
326 428  
  429 +#ifdef CONFIG_IOMMU_API
  430 +/*
  431 + * This function is used to flush the IO/TLB for a given protection domain
  432 + * on every IOMMU in the system
  433 + */
  434 +static void iommu_flush_domain(u16 domid)
  435 +{
  436 + unsigned long flags;
  437 + struct amd_iommu *iommu;
  438 + struct iommu_cmd cmd;
  439 +
  440 + INC_STATS_COUNTER(domain_flush_all);
  441 +
  442 + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
  443 + domid, 1, 1);
  444 +
  445 + list_for_each_entry(iommu, &amd_iommu_list, list) {
  446 + spin_lock_irqsave(&iommu->lock, flags);
  447 + __iommu_queue_command(iommu, &cmd);
  448 + __iommu_completion_wait(iommu);
  449 + __iommu_wait_for_completion(iommu);
  450 + spin_unlock_irqrestore(&iommu->lock, flags);
  451 + }
  452 +}
  453 +#endif
  454 +
327 455 /****************************************************************************
328 456 *
329 457 * The functions below are used the create the page table mappings for
... ... @@ -338,10 +466,10 @@
338 466 * supporting all features of AMD IOMMU page tables like level skipping
339 467 * and full 64 bit address spaces.
340 468 */
341   -static int iommu_map(struct protection_domain *dom,
342   - unsigned long bus_addr,
343   - unsigned long phys_addr,
344   - int prot)
  469 +static int iommu_map_page(struct protection_domain *dom,
  470 + unsigned long bus_addr,
  471 + unsigned long phys_addr,
  472 + int prot)
345 473 {
346 474 u64 __pte, *pte, *page;
347 475  
... ... @@ -388,6 +516,30 @@
388 516 return 0;
389 517 }
390 518  
  519 +#ifdef CONFIG_IOMMU_API
  520 +static void iommu_unmap_page(struct protection_domain *dom,
  521 + unsigned long bus_addr)
  522 +{
  523 + u64 *pte;
  524 +
  525 + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
  526 +
  527 + if (!IOMMU_PTE_PRESENT(*pte))
  528 + return;
  529 +
  530 + pte = IOMMU_PTE_PAGE(*pte);
  531 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
  532 +
  533 + if (!IOMMU_PTE_PRESENT(*pte))
  534 + return;
  535 +
  536 + pte = IOMMU_PTE_PAGE(*pte);
  537 + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
  538 +
  539 + *pte = 0;
  540 +}
  541 +#endif
  542 +
391 543 /*
392 544 * This function checks if a specific unity mapping entry is needed for
393 545 * this specific IOMMU.
... ... @@ -440,7 +592,7 @@
440 592  
441 593 for (addr = e->address_start; addr < e->address_end;
442 594 addr += PAGE_SIZE) {
443   - ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
  595 + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
444 596 if (ret)
445 597 return ret;
446 598 /*
... ... @@ -571,6 +723,18 @@
571 723 return id;
572 724 }
573 725  
  726 +#ifdef CONFIG_IOMMU_API
  727 +static void domain_id_free(int id)
  728 +{
  729 + unsigned long flags;
  730 +
  731 + write_lock_irqsave(&amd_iommu_devtable_lock, flags);
  732 + if (id > 0 && id < MAX_DOMAIN_ID)
  733 + __clear_bit(id, amd_iommu_pd_alloc_bitmap);
  734 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
  735 +}
  736 +#endif
  737 +
574 738 /*
575 739 * Used to reserve address ranges in the aperture (e.g. for exclusion
576 740 * ranges.
577 741  
... ... @@ -587,12 +751,12 @@
587 751 iommu_area_reserve(dom->bitmap, start_page, pages);
588 752 }
589 753  
590   -static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
  754 +static void free_pagetable(struct protection_domain *domain)
591 755 {
592 756 int i, j;
593 757 u64 *p1, *p2, *p3;
594 758  
595   - p1 = dma_dom->domain.pt_root;
  759 + p1 = domain->pt_root;
596 760  
597 761 if (!p1)
598 762 return;
... ... @@ -613,6 +777,8 @@
613 777 }
614 778  
615 779 free_page((unsigned long)p1);
  780 +
  781 + domain->pt_root = NULL;
616 782 }
617 783  
618 784 /*
... ... @@ -624,7 +790,7 @@
624 790 if (!dom)
625 791 return;
626 792  
627   - dma_ops_free_pagetable(dom);
  793 + free_pagetable(&dom->domain);
628 794  
629 795 kfree(dom->pte_pages);
630 796  
... ... @@ -663,6 +829,7 @@
663 829 goto free_dma_dom;
664 830 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
665 831 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
  832 + dma_dom->domain.flags = PD_DMA_OPS_MASK;
666 833 dma_dom->domain.priv = dma_dom;
667 834 if (!dma_dom->domain.pt_root)
668 835 goto free_dma_dom;
... ... @@ -725,6 +892,15 @@
725 892 }
726 893  
727 894 /*
  895 + * little helper function to check whether a given protection domain is a
  896 + * dma_ops domain
  897 + */
  898 +static bool dma_ops_domain(struct protection_domain *domain)
  899 +{
  900 + return domain->flags & PD_DMA_OPS_MASK;
  901 +}
  902 +
  903 +/*
728 904 * Find out the protection domain structure for a given PCI device. This
729 905 * will give us the pointer to the page table root for example.
730 906 */
731 907  
732 908  
... ... @@ -744,14 +920,15 @@
744 920 * If a device is not yet associated with a domain, this function does
745 921 * assigns it visible for the hardware
746 922 */
747   -static void set_device_domain(struct amd_iommu *iommu,
748   - struct protection_domain *domain,
749   - u16 devid)
  923 +static void attach_device(struct amd_iommu *iommu,
  924 + struct protection_domain *domain,
  925 + u16 devid)
750 926 {
751 927 unsigned long flags;
752   -
753 928 u64 pte_root = virt_to_phys(domain->pt_root);
754 929  
  930 + domain->dev_cnt += 1;
  931 +
755 932 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
756 933 << DEV_ENTRY_MODE_SHIFT;
757 934 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
... ... @@ -767,6 +944,116 @@
767 944 iommu_queue_inv_dev_entry(iommu, devid);
768 945 }
769 946  
  947 +/*
  948 + * Removes a device from a protection domain (unlocked)
  949 + */
  950 +static void __detach_device(struct protection_domain *domain, u16 devid)
  951 +{
  952 +
  953 + /* lock domain */
  954 + spin_lock(&domain->lock);
  955 +
  956 + /* remove domain from the lookup table */
  957 + amd_iommu_pd_table[devid] = NULL;
  958 +
  959 + /* remove entry from the device table seen by the hardware */
  960 + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
  961 + amd_iommu_dev_table[devid].data[1] = 0;
  962 + amd_iommu_dev_table[devid].data[2] = 0;
  963 +
  964 + /* decrease reference counter */
  965 + domain->dev_cnt -= 1;
  966 +
  967 + /* ready */
  968 + spin_unlock(&domain->lock);
  969 +}
  970 +
  971 +/*
  972 + * Removes a device from a protection domain (with devtable_lock held)
  973 + */
  974 +static void detach_device(struct protection_domain *domain, u16 devid)
  975 +{
  976 + unsigned long flags;
  977 +
  978 + /* lock device table */
  979 + write_lock_irqsave(&amd_iommu_devtable_lock, flags);
  980 + __detach_device(domain, devid);
  981 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
  982 +}
  983 +
  984 +static int device_change_notifier(struct notifier_block *nb,
  985 + unsigned long action, void *data)
  986 +{
  987 + struct device *dev = data;
  988 + struct pci_dev *pdev = to_pci_dev(dev);
  989 + u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
  990 + struct protection_domain *domain;
  991 + struct dma_ops_domain *dma_domain;
  992 + struct amd_iommu *iommu;
  993 + int order = amd_iommu_aperture_order;
  994 + unsigned long flags;
  995 +
  996 + if (devid > amd_iommu_last_bdf)
  997 + goto out;
  998 +
  999 + devid = amd_iommu_alias_table[devid];
  1000 +
  1001 + iommu = amd_iommu_rlookup_table[devid];
  1002 + if (iommu == NULL)
  1003 + goto out;
  1004 +
  1005 + domain = domain_for_device(devid);
  1006 +
  1007 + if (domain && !dma_ops_domain(domain))
  1008 + WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
  1009 + "to a non-dma-ops domain\n", dev_name(dev));
  1010 +
  1011 + switch (action) {
  1012 + case BUS_NOTIFY_BOUND_DRIVER:
  1013 + if (domain)
  1014 + goto out;
  1015 + dma_domain = find_protection_domain(devid);
  1016 + if (!dma_domain)
  1017 + dma_domain = iommu->default_dom;
  1018 + attach_device(iommu, &dma_domain->domain, devid);
  1019 + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
  1020 + "device %s\n", dma_domain->domain.id, dev_name(dev));
  1021 + break;
  1022 + case BUS_NOTIFY_UNBIND_DRIVER:
  1023 + if (!domain)
  1024 + goto out;
  1025 + detach_device(domain, devid);
  1026 + break;
  1027 + case BUS_NOTIFY_ADD_DEVICE:
  1028 + /* allocate a protection domain if a device is added */
  1029 + dma_domain = find_protection_domain(devid);
  1030 + if (dma_domain)
  1031 + goto out;
  1032 + dma_domain = dma_ops_domain_alloc(iommu, order);
  1033 + if (!dma_domain)
  1034 + goto out;
  1035 + dma_domain->target_dev = devid;
  1036 +
  1037 + spin_lock_irqsave(&iommu_pd_list_lock, flags);
  1038 + list_add_tail(&dma_domain->list, &iommu_pd_list);
  1039 + spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
  1040 +
  1041 + break;
  1042 + default:
  1043 + goto out;
  1044 + }
  1045 +
  1046 + iommu_queue_inv_dev_entry(iommu, devid);
  1047 + iommu_completion_wait(iommu);
  1048 +
  1049 +out:
  1050 + return 0;
  1051 +}
  1052 +
  1053 +struct notifier_block device_nb = {
  1054 + .notifier_call = device_change_notifier,
  1055 +};
  1056 +
770 1057 /*****************************************************************************
771 1058 *
772 1059 * The next functions belong to the dma_ops mapping/unmapping code.
... ... @@ -802,7 +1089,6 @@
802 1089 list_for_each_entry(entry, &iommu_pd_list, list) {
803 1090 if (entry->target_dev == devid) {
804 1091 ret = entry;
805   - list_del(&ret->list);
806 1092 break;
807 1093 }
808 1094 }
809 1095  
810 1096  
... ... @@ -853,14 +1139,13 @@
853 1139 if (!dma_dom)
854 1140 dma_dom = (*iommu)->default_dom;
855 1141 *domain = &dma_dom->domain;
856   - set_device_domain(*iommu, *domain, *bdf);
  1142 + attach_device(*iommu, *domain, *bdf);
857 1143 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
858   - "device ", (*domain)->id);
859   - print_devid(_bdf, 1);
  1144 + "device %s\n", (*domain)->id, dev_name(dev));
860 1145 }
861 1146  
862 1147 if (domain_for_device(_bdf) == NULL)
863   - set_device_domain(*iommu, *domain, _bdf);
  1148 + attach_device(*iommu, *domain, _bdf);
864 1149  
865 1150 return 1;
866 1151 }
... ... @@ -946,6 +1231,11 @@
946 1231 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
947 1232 paddr &= PAGE_MASK;
948 1233  
  1234 + INC_STATS_COUNTER(total_map_requests);
  1235 +
  1236 + if (pages > 1)
  1237 + INC_STATS_COUNTER(cross_page);
  1238 +
949 1239 if (align)
950 1240 align_mask = (1UL << get_order(size)) - 1;
951 1241  
... ... @@ -962,6 +1252,8 @@
962 1252 }
963 1253 address += offset;
964 1254  
  1255 + ADD_STATS_COUNTER(alloced_io_mem, size);
  1256 +
965 1257 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
966 1258 iommu_flush_tlb(iommu, dma_dom->domain.id);
967 1259 dma_dom->need_flush = false;
... ... @@ -998,6 +1290,8 @@
998 1290 start += PAGE_SIZE;
999 1291 }
1000 1292  
  1293 + SUB_STATS_COUNTER(alloced_io_mem, size);
  1294 +
1001 1295 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1002 1296  
1003 1297 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
... ... @@ -1019,6 +1313,8 @@
1019 1313 dma_addr_t addr;
1020 1314 u64 dma_mask;
1021 1315  
  1316 + INC_STATS_COUNTER(cnt_map_single);
  1317 +
1022 1318 if (!check_device(dev))
1023 1319 return bad_dma_address;
1024 1320  
... ... @@ -1030,6 +1326,9 @@
1030 1326 /* device not handled by any AMD IOMMU */
1031 1327 return (dma_addr_t)paddr;
1032 1328  
  1329 + if (!dma_ops_domain(domain))
  1330 + return bad_dma_address;
  1331 +
1033 1332 spin_lock_irqsave(&domain->lock, flags);
1034 1333 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1035 1334 dma_mask);
1036 1335  
... ... @@ -1055,11 +1354,16 @@
1055 1354 struct protection_domain *domain;
1056 1355 u16 devid;
1057 1356  
  1357 + INC_STATS_COUNTER(cnt_unmap_single);
  1358 +
1058 1359 if (!check_device(dev) ||
1059 1360 !get_device_resources(dev, &iommu, &domain, &devid))
1060 1361 /* device not handled by any AMD IOMMU */
1061 1362 return;
1062 1363  
  1364 + if (!dma_ops_domain(domain))
  1365 + return;
  1366 +
1063 1367 spin_lock_irqsave(&domain->lock, flags);
1064 1368  
1065 1369 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
... ... @@ -1104,6 +1408,8 @@
1104 1408 int mapped_elems = 0;
1105 1409 u64 dma_mask;
1106 1410  
  1411 + INC_STATS_COUNTER(cnt_map_sg);
  1412 +
1107 1413 if (!check_device(dev))
1108 1414 return 0;
1109 1415  
... ... @@ -1114,6 +1420,9 @@
1114 1420 if (!iommu || !domain)
1115 1421 return map_sg_no_iommu(dev, sglist, nelems, dir);
1116 1422  
  1423 + if (!dma_ops_domain(domain))
  1424 + return 0;
  1425 +
1117 1426 spin_lock_irqsave(&domain->lock, flags);
1118 1427  
1119 1428 for_each_sg(sglist, s, nelems, i) {
1120 1429  
... ... @@ -1163,10 +1472,15 @@
1163 1472 u16 devid;
1164 1473 int i;
1165 1474  
  1475 + INC_STATS_COUNTER(cnt_unmap_sg);
  1476 +
1166 1477 if (!check_device(dev) ||
1167 1478 !get_device_resources(dev, &iommu, &domain, &devid))
1168 1479 return;
1169 1480  
  1481 + if (!dma_ops_domain(domain))
  1482 + return;
  1483 +
1170 1484 spin_lock_irqsave(&domain->lock, flags);
1171 1485  
1172 1486 for_each_sg(sglist, s, nelems, i) {
... ... @@ -1194,6 +1508,8 @@
1194 1508 phys_addr_t paddr;
1195 1509 u64 dma_mask = dev->coherent_dma_mask;
1196 1510  
  1511 + INC_STATS_COUNTER(cnt_alloc_coherent);
  1512 +
1197 1513 if (!check_device(dev))
1198 1514 return NULL;
1199 1515  
... ... @@ -1212,6 +1528,9 @@
1212 1528 return virt_addr;
1213 1529 }
1214 1530  
  1531 + if (!dma_ops_domain(domain))
  1532 + goto out_free;
  1533 +
1215 1534 if (!dma_mask)
1216 1535 dma_mask = *dev->dma_mask;
1217 1536  
1218 1537  
1219 1538  
... ... @@ -1220,18 +1539,20 @@
1220 1539 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1221 1540 size, DMA_BIDIRECTIONAL, true, dma_mask);
1222 1541  
1223   - if (*dma_addr == bad_dma_address) {
1224   - free_pages((unsigned long)virt_addr, get_order(size));
1225   - virt_addr = NULL;
1226   - goto out;
1227   - }
  1542 + if (*dma_addr == bad_dma_address)
  1543 + goto out_free;
1228 1544  
1229 1545 iommu_completion_wait(iommu);
1230 1546  
1231   -out:
1232 1547 spin_unlock_irqrestore(&domain->lock, flags);
1233 1548  
1234 1549 return virt_addr;
  1550 +
  1551 +out_free:
  1552 +
  1553 + free_pages((unsigned long)virt_addr, get_order(size));
  1554 +
  1555 + return NULL;
1235 1556 }
1236 1557  
1237 1558 /*
... ... @@ -1245,6 +1566,8 @@
1245 1566 struct protection_domain *domain;
1246 1567 u16 devid;
1247 1568  
  1569 + INC_STATS_COUNTER(cnt_free_coherent);
  1570 +
1248 1571 if (!check_device(dev))
1249 1572 return;
1250 1573  
... ... @@ -1253,6 +1576,9 @@
1253 1576 if (!iommu || !domain)
1254 1577 goto free_mem;
1255 1578  
  1579 + if (!dma_ops_domain(domain))
  1580 + goto free_mem;
  1581 +
1256 1582 spin_lock_irqsave(&domain->lock, flags);
1257 1583  
1258 1584 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
... ... @@ -1296,7 +1622,7 @@
1296 1622 * we don't need to preallocate the protection domains anymore.
1297 1623 * For now we have to.
1298 1624 */
1299   -void prealloc_protection_domains(void)
  1625 +static void prealloc_protection_domains(void)
1300 1626 {
1301 1627 struct pci_dev *dev = NULL;
1302 1628 struct dma_ops_domain *dma_dom;
... ... @@ -1305,7 +1631,7 @@
1305 1631 u16 devid;
1306 1632  
1307 1633 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1308   - devid = (dev->bus->number << 8) | dev->devfn;
  1634 + devid = calc_devid(dev->bus->number, dev->devfn);
1309 1635 if (devid > amd_iommu_last_bdf)
1310 1636 continue;
1311 1637 devid = amd_iommu_alias_table[devid];
... ... @@ -1352,6 +1678,7 @@
1352 1678 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1353 1679 if (iommu->default_dom == NULL)
1354 1680 return -ENOMEM;
  1681 + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
1355 1682 ret = iommu_init_unity_mappings(iommu);
1356 1683 if (ret)
1357 1684 goto free_domains;
... ... @@ -1375,6 +1702,14 @@
1375 1702 /* Make the driver finally visible to the drivers */
1376 1703 dma_ops = &amd_iommu_dma_ops;
1377 1704  
  1705 +#ifdef CONFIG_IOMMU_API
  1706 + register_iommu(&amd_iommu_ops);
  1707 +#endif
  1708 +
  1709 + bus_register_notifier(&pci_bus_type, &device_nb);
  1710 +
  1711 + amd_iommu_stats_init();
  1712 +
1378 1713 return 0;
1379 1714  
1380 1715 free_domains:
... ... @@ -1386,4 +1721,228 @@
1386 1721  
1387 1722 return ret;
1388 1723 }
  1724 +
  1725 +/*****************************************************************************
  1726 + *
  1727 + * The following functions belong to the exported interface of AMD IOMMU
  1728 + *
  1729 + * This interface allows access to lower level functions of the IOMMU
  1730 + * like protection domain handling and assignement of devices to domains
  1731 + * which is not possible with the dma_ops interface.
  1732 + *
  1733 + *****************************************************************************/
  1734 +
  1735 +#ifdef CONFIG_IOMMU_API
  1736 +
  1737 +static void cleanup_domain(struct protection_domain *domain)
  1738 +{
  1739 + unsigned long flags;
  1740 + u16 devid;
  1741 +
  1742 + write_lock_irqsave(&amd_iommu_devtable_lock, flags);
  1743 +
  1744 + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
  1745 + if (amd_iommu_pd_table[devid] == domain)
  1746 + __detach_device(domain, devid);
  1747 +
  1748 + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
  1749 +}
  1750 +
  1751 +static int amd_iommu_domain_init(struct iommu_domain *dom)
  1752 +{
  1753 + struct protection_domain *domain;
  1754 +
  1755 + domain = kzalloc(sizeof(*domain), GFP_KERNEL);
  1756 + if (!domain)
  1757 + return -ENOMEM;
  1758 +
  1759 + spin_lock_init(&domain->lock);
  1760 + domain->mode = PAGE_MODE_3_LEVEL;
  1761 + domain->id = domain_id_alloc();
  1762 + if (!domain->id)
  1763 + goto out_free;
  1764 + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
  1765 + if (!domain->pt_root)
  1766 + goto out_free;
  1767 +
  1768 + dom->priv = domain;
  1769 +
  1770 + return 0;
  1771 +
  1772 +out_free:
  1773 + kfree(domain);
  1774 +
  1775 + return -ENOMEM;
  1776 +}
  1777 +
  1778 +static void amd_iommu_domain_destroy(struct iommu_domain *dom)
  1779 +{
  1780 + struct protection_domain *domain = dom->priv;
  1781 +
  1782 + if (!domain)
  1783 + return;
  1784 +
  1785 + if (domain->dev_cnt > 0)
  1786 + cleanup_domain(domain);
  1787 +
  1788 + BUG_ON(domain->dev_cnt != 0);
  1789 +
  1790 + free_pagetable(domain);
  1791 +
  1792 + domain_id_free(domain->id);
  1793 +
  1794 + kfree(domain);
  1795 +
  1796 + dom->priv = NULL;
  1797 +}
  1798 +
  1799 +static void amd_iommu_detach_device(struct iommu_domain *dom,
  1800 + struct device *dev)
  1801 +{
  1802 + struct protection_domain *domain = dom->priv;
  1803 + struct amd_iommu *iommu;
  1804 + struct pci_dev *pdev;
  1805 + u16 devid;
  1806 +
  1807 + if (dev->bus != &pci_bus_type)
  1808 + return;
  1809 +
  1810 + pdev = to_pci_dev(dev);
  1811 +
  1812 + devid = calc_devid(pdev->bus->number, pdev->devfn);
  1813 +
  1814 + if (devid > 0)
  1815 + detach_device(domain, devid);
  1816 +
  1817 + iommu = amd_iommu_rlookup_table[devid];
  1818 + if (!iommu)
  1819 + return;
  1820 +
  1821 + iommu_queue_inv_dev_entry(iommu, devid);
  1822 + iommu_completion_wait(iommu);
  1823 +}
  1824 +
  1825 +static int amd_iommu_attach_device(struct iommu_domain *dom,
  1826 + struct device *dev)
  1827 +{
  1828 + struct protection_domain *domain = dom->priv;
  1829 + struct protection_domain *old_domain;
  1830 + struct amd_iommu *iommu;
  1831 + struct pci_dev *pdev;
  1832 + u16 devid;
  1833 +
  1834 + if (dev->bus != &pci_bus_type)
  1835 + return -EINVAL;
  1836 +
  1837 + pdev = to_pci_dev(dev);
  1838 +
  1839 + devid = calc_devid(pdev->bus->number, pdev->devfn);
  1840 +
  1841 + if (devid >= amd_iommu_last_bdf ||
  1842 + devid != amd_iommu_alias_table[devid])
  1843 + return -EINVAL;
  1844 +
  1845 + iommu = amd_iommu_rlookup_table[devid];
  1846 + if (!iommu)
  1847 + return -EINVAL;
  1848 +
  1849 + old_domain = domain_for_device(devid);
  1850 + if (old_domain)
  1851 + return -EBUSY;
  1852 +
  1853 + attach_device(iommu, domain, devid);
  1854 +
  1855 + iommu_completion_wait(iommu);
  1856 +
  1857 + return 0;
  1858 +}
  1859 +
  1860 +static int amd_iommu_map_range(struct iommu_domain *dom,
  1861 + unsigned long iova, phys_addr_t paddr,
  1862 + size_t size, int iommu_prot)
  1863 +{
  1864 + struct protection_domain *domain = dom->priv;
  1865 + unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
  1866 + int prot = 0;
  1867 + int ret;
  1868 +
  1869 + if (iommu_prot & IOMMU_READ)
  1870 + prot |= IOMMU_PROT_IR;
  1871 + if (iommu_prot & IOMMU_WRITE)
  1872 + prot |= IOMMU_PROT_IW;
  1873 +
  1874 + iova &= PAGE_MASK;
  1875 + paddr &= PAGE_MASK;
  1876 +
  1877 + for (i = 0; i < npages; ++i) {
  1878 + ret = iommu_map_page(domain, iova, paddr, prot);
  1879 + if (ret)
  1880 + return ret;
  1881 +
  1882 + iova += PAGE_SIZE;
  1883 + paddr += PAGE_SIZE;
  1884 + }
  1885 +
  1886 + return 0;
  1887 +}
  1888 +
  1889 +static void amd_iommu_unmap_range(struct iommu_domain *dom,
  1890 + unsigned long iova, size_t size)
  1891 +{
  1892 +
  1893 + struct protection_domain *domain = dom->priv;
  1894 + unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
  1895 +
  1896 + iova &= PAGE_MASK;
  1897 +
  1898 + for (i = 0; i < npages; ++i) {
  1899 + iommu_unmap_page(domain, iova);
  1900 + iova += PAGE_SIZE;
  1901 + }
  1902 +
  1903 + iommu_flush_domain(domain->id);
  1904 +}
  1905 +
  1906 +static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
  1907 + unsigned long iova)
  1908 +{
  1909 + struct protection_domain *domain = dom->priv;
  1910 + unsigned long offset = iova & ~PAGE_MASK;
  1911 + phys_addr_t paddr;
  1912 + u64 *pte;
  1913 +
  1914 + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
  1915 +
  1916 + if (!IOMMU_PTE_PRESENT(*pte))
  1917 + return 0;
  1918 +
  1919 + pte = IOMMU_PTE_PAGE(*pte);
  1920 + pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
  1921 +
  1922 + if (!IOMMU_PTE_PRESENT(*pte))
  1923 + return 0;
  1924 +
  1925 + pte = IOMMU_PTE_PAGE(*pte);
  1926 + pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
  1927 +
  1928 + if (!IOMMU_PTE_PRESENT(*pte))
  1929 + return 0;
  1930 +
  1931 + paddr = *pte & IOMMU_PAGE_MASK;
  1932 + paddr |= offset;
  1933 +
  1934 + return paddr;
  1935 +}
  1936 +
  1937 +static struct iommu_ops amd_iommu_ops = {
  1938 + .domain_init = amd_iommu_domain_init,
  1939 + .domain_destroy = amd_iommu_domain_destroy,
  1940 + .attach_dev = amd_iommu_attach_device,
  1941 + .detach_dev = amd_iommu_detach_device,
  1942 + .map = amd_iommu_map_range,
  1943 + .unmap = amd_iommu_unmap_range,
  1944 + .iova_to_phys = amd_iommu_iova_to_phys,
  1945 +};
  1946 +
  1947 +#endif
arch/x86/kernel/amd_iommu_init.c
... ... @@ -122,7 +122,8 @@
122 122 LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 123 we find in ACPI */
124 124 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
125   -int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
  125 +bool amd_iommu_isolate = true; /* if true, device isolation is
  126 + enabled */
126 127 bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
127 128  
128 129 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
... ... @@ -245,12 +246,8 @@
245 246 /* Function to enable the hardware */
246 247 void __init iommu_enable(struct amd_iommu *iommu)
247 248 {
248   - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
249   - "at %02x:%02x.%x cap 0x%hx\n",
250   - iommu->dev->bus->number,
251   - PCI_SLOT(iommu->dev->devfn),
252   - PCI_FUNC(iommu->dev->devfn),
253   - iommu->cap_ptr);
  249 + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
  250 + dev_name(&iommu->dev->dev), iommu->cap_ptr);
254 251  
255 252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
256 253 }
257 254  
... ... @@ -1218,9 +1215,9 @@
1218 1215 {
1219 1216 for (; *str; ++str) {
1220 1217 if (strncmp(str, "isolate", 7) == 0)
1221   - amd_iommu_isolate = 1;
  1218 + amd_iommu_isolate = true;
1222 1219 if (strncmp(str, "share", 5) == 0)
1223   - amd_iommu_isolate = 0;
  1220 + amd_iommu_isolate = false;
1224 1221 if (strncmp(str, "fullflush", 9) == 0)
1225 1222 amd_iommu_unmap_flush = true;
1226 1223 }