Commit 4e8b0cf46b2570331a4c4157d53906883c442a22

Authored by Nishanth Aravamudan
Committed by Benjamin Herrenschmidt
1 parent 6edc642ebe

powerpc/pseries: Add support for dynamic dma windows

If firmware allows us to map all of a partition's memory for DMA on a
particular bridge, create a 1:1 mapping of that memory. Add hooks for
dealing with hotplug events. Dynamic DMA windows can use larger than the
default page size, and we use the largest one possible.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Showing 2 changed files with 591 additions and 0 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -626,6 +626,10 @@
626 626 disable= [IPV6]
627 627 See Documentation/networking/ipv6.txt.
628 628  
  629 + disable_ddw [PPC/PSERIES]
  630 + Disable Dynamic DMA Window support. Use this if
  631 + to workaround buggy firmware.
  632 +
629 633 disable_ipv6= [IPV6]
630 634 See Documentation/networking/ipv6.txt.
631 635  
arch/powerpc/platforms/pseries/iommu.c
... ... @@ -33,6 +33,7 @@
33 33 #include <linux/pci.h>
34 34 #include <linux/dma-mapping.h>
35 35 #include <linux/crash_dump.h>
  36 +#include <linux/memory.h>
36 37 #include <asm/io.h>
37 38 #include <asm/prom.h>
38 39 #include <asm/rtas.h>
... ... @@ -45,6 +46,7 @@
45 46 #include <asm/tce.h>
46 47 #include <asm/ppc-pci.h>
47 48 #include <asm/udbg.h>
  49 +#include <asm/mmzone.h>
48 50  
49 51 #include "plpar_wrappers.h"
50 52  
... ... @@ -270,6 +272,152 @@
270 272 return tce_ret;
271 273 }
272 274  
  275 +/* this is compatable with cells for the device tree property */
  276 +struct dynamic_dma_window_prop {
  277 + __be32 liobn; /* tce table number */
  278 + __be64 dma_base; /* address hi,lo */
  279 + __be32 tce_shift; /* ilog2(tce_page_size) */
  280 + __be32 window_shift; /* ilog2(tce_window_size) */
  281 +};
  282 +
  283 +struct direct_window {
  284 + struct device_node *device;
  285 + const struct dynamic_dma_window_prop *prop;
  286 + struct list_head list;
  287 +};
  288 +
  289 +/* Dynamic DMA Window support */
  290 +struct ddw_query_response {
  291 + u32 windows_available;
  292 + u32 largest_available_block;
  293 + u32 page_size;
  294 + u32 migration_capable;
  295 +};
  296 +
  297 +struct ddw_create_response {
  298 + u32 liobn;
  299 + u32 addr_hi;
  300 + u32 addr_lo;
  301 +};
  302 +
  303 +static LIST_HEAD(direct_window_list);
  304 +/* prevents races between memory on/offline and window creation */
  305 +static DEFINE_SPINLOCK(direct_window_list_lock);
  306 +/* protects initializing window twice for same device */
  307 +static DEFINE_MUTEX(direct_window_init_mutex);
  308 +#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
  309 +
  310 +static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
  311 + unsigned long num_pfn, const void *arg)
  312 +{
  313 + const struct dynamic_dma_window_prop *maprange = arg;
  314 + int rc;
  315 + u64 tce_size, num_tce, dma_offset, next;
  316 + u32 tce_shift;
  317 + long limit;
  318 +
  319 + tce_shift = be32_to_cpu(maprange->tce_shift);
  320 + tce_size = 1ULL << tce_shift;
  321 + next = start_pfn << PAGE_SHIFT;
  322 + num_tce = num_pfn << PAGE_SHIFT;
  323 +
  324 + /* round back to the beginning of the tce page size */
  325 + num_tce += next & (tce_size - 1);
  326 + next &= ~(tce_size - 1);
  327 +
  328 + /* covert to number of tces */
  329 + num_tce |= tce_size - 1;
  330 + num_tce >>= tce_shift;
  331 +
  332 + do {
  333 + /*
  334 + * Set up the page with TCE data, looping through and setting
  335 + * the values.
  336 + */
  337 + limit = min_t(long, num_tce, 512);
  338 + dma_offset = next + be64_to_cpu(maprange->dma_base);
  339 +
  340 + rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
  341 + dma_offset,
  342 + 0, limit);
  343 + num_tce -= limit;
  344 + } while (num_tce > 0 && !rc);
  345 +
  346 + return rc;
  347 +}
  348 +
  349 +static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
  350 + unsigned long num_pfn, const void *arg)
  351 +{
  352 + const struct dynamic_dma_window_prop *maprange = arg;
  353 + u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
  354 + u32 tce_shift;
  355 + u64 rc = 0;
  356 + long l, limit;
  357 +
  358 + local_irq_disable(); /* to protect tcep and the page behind it */
  359 + tcep = __get_cpu_var(tce_page);
  360 +
  361 + if (!tcep) {
  362 + tcep = (u64 *)__get_free_page(GFP_ATOMIC);
  363 + if (!tcep) {
  364 + local_irq_enable();
  365 + return -ENOMEM;
  366 + }
  367 + __get_cpu_var(tce_page) = tcep;
  368 + }
  369 +
  370 + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
  371 +
  372 + liobn = (u64)be32_to_cpu(maprange->liobn);
  373 + tce_shift = be32_to_cpu(maprange->tce_shift);
  374 + tce_size = 1ULL << tce_shift;
  375 + next = start_pfn << PAGE_SHIFT;
  376 + num_tce = num_pfn << PAGE_SHIFT;
  377 +
  378 + /* round back to the beginning of the tce page size */
  379 + num_tce += next & (tce_size - 1);
  380 + next &= ~(tce_size - 1);
  381 +
  382 + /* covert to number of tces */
  383 + num_tce |= tce_size - 1;
  384 + num_tce >>= tce_shift;
  385 +
  386 + /* We can map max one pageful of TCEs at a time */
  387 + do {
  388 + /*
  389 + * Set up the page with TCE data, looping through and setting
  390 + * the values.
  391 + */
  392 + limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
  393 + dma_offset = next + be64_to_cpu(maprange->dma_base);
  394 +
  395 + for (l = 0; l < limit; l++) {
  396 + tcep[l] = proto_tce | next;
  397 + next += tce_size;
  398 + }
  399 +
  400 + rc = plpar_tce_put_indirect(liobn,
  401 + dma_offset,
  402 + (u64)virt_to_abs(tcep),
  403 + limit);
  404 +
  405 + num_tce -= limit;
  406 + } while (num_tce > 0 && !rc);
  407 +
  408 + /* error cleanup: caller will clear whole range */
  409 +
  410 + local_irq_enable();
  411 + return rc;
  412 +}
  413 +
  414 +static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
  415 + unsigned long num_pfn, void *arg)
  416 +{
  417 + return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
  418 +}
  419 +
  420 +
273 421 #ifdef CONFIG_PCI
274 422 static void iommu_table_setparms(struct pci_controller *phb,
275 423 struct device_node *dn,
... ... @@ -495,6 +643,329 @@
495 643 pci_name(dev));
496 644 }
497 645  
  646 +static int __read_mostly disable_ddw;
  647 +
  648 +static int __init disable_ddw_setup(char *str)
  649 +{
  650 + disable_ddw = 1;
  651 + printk(KERN_INFO "ppc iommu: disabling ddw.\n");
  652 +
  653 + return 0;
  654 +}
  655 +
  656 +early_param("disable_ddw", disable_ddw_setup);
  657 +
  658 +static void remove_ddw(struct device_node *np)
  659 +{
  660 + struct dynamic_dma_window_prop *dwp;
  661 + struct property *win64;
  662 + const u32 *ddr_avail;
  663 + u64 liobn;
  664 + int len, ret;
  665 +
  666 + ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
  667 + win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
  668 + if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
  669 + return;
  670 +
  671 + dwp = win64->value;
  672 + liobn = (u64)be32_to_cpu(dwp->liobn);
  673 +
  674 + /* clear the whole window, note the arg is in kernel pages */
  675 + ret = tce_clearrange_multi_pSeriesLP(0,
  676 + 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
  677 + if (ret)
  678 + pr_warning("%s failed to clear tces in window.\n",
  679 + np->full_name);
  680 + else
  681 + pr_debug("%s successfully cleared tces in window.\n",
  682 + np->full_name);
  683 +
  684 + ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
  685 + if (ret)
  686 + pr_warning("%s: failed to remove direct window: rtas returned "
  687 + "%d to ibm,remove-pe-dma-window(%x) %llx\n",
  688 + np->full_name, ret, ddr_avail[2], liobn);
  689 + else
  690 + pr_debug("%s: successfully removed direct window: rtas returned "
  691 + "%d to ibm,remove-pe-dma-window(%x) %llx\n",
  692 + np->full_name, ret, ddr_avail[2], liobn);
  693 +}
  694 +
  695 +
  696 +static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
  697 +{
  698 + struct device_node *dn;
  699 + struct pci_dn *pcidn;
  700 + struct direct_window *window;
  701 + const struct dynamic_dma_window_prop *direct64;
  702 + u64 dma_addr = 0;
  703 +
  704 + dn = pci_device_to_OF_node(dev);
  705 + pcidn = PCI_DN(dn);
  706 + spin_lock(&direct_window_list_lock);
  707 + /* check if we already created a window and dupe that config if so */
  708 + list_for_each_entry(window, &direct_window_list, list) {
  709 + if (window->device == pdn) {
  710 + direct64 = window->prop;
  711 + dma_addr = direct64->dma_base;
  712 + break;
  713 + }
  714 + }
  715 + spin_unlock(&direct_window_list_lock);
  716 +
  717 + return dma_addr;
  718 +}
  719 +
  720 +static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
  721 +{
  722 + struct device_node *dn;
  723 + struct pci_dn *pcidn;
  724 + int len;
  725 + struct direct_window *window;
  726 + const struct dynamic_dma_window_prop *direct64;
  727 + u64 dma_addr = 0;
  728 +
  729 + dn = pci_device_to_OF_node(dev);
  730 + pcidn = PCI_DN(dn);
  731 + direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
  732 + if (direct64) {
  733 + window = kzalloc(sizeof(*window), GFP_KERNEL);
  734 + if (!window) {
  735 + remove_ddw(pdn);
  736 + } else {
  737 + window->device = pdn;
  738 + window->prop = direct64;
  739 + spin_lock(&direct_window_list_lock);
  740 + list_add(&window->list, &direct_window_list);
  741 + spin_unlock(&direct_window_list_lock);
  742 + dma_addr = direct64->dma_base;
  743 + }
  744 + }
  745 +
  746 + return dma_addr;
  747 +}
  748 +
  749 +static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
  750 + struct ddw_query_response *query)
  751 +{
  752 + struct device_node *dn;
  753 + struct pci_dn *pcidn;
  754 + u32 cfg_addr;
  755 + u64 buid;
  756 + int ret;
  757 +
  758 + /*
  759 + * Get the config address and phb buid of the PE window.
  760 + * Rely on eeh to retrieve this for us.
  761 + * Retrieve them from the pci device, not the node with the
  762 + * dma-window property
  763 + */
  764 + dn = pci_device_to_OF_node(dev);
  765 + pcidn = PCI_DN(dn);
  766 + cfg_addr = pcidn->eeh_config_addr;
  767 + if (pcidn->eeh_pe_config_addr)
  768 + cfg_addr = pcidn->eeh_pe_config_addr;
  769 + buid = pcidn->phb->buid;
  770 + ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
  771 + cfg_addr, BUID_HI(buid), BUID_LO(buid));
  772 + dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
  773 + " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
  774 + BUID_LO(buid), ret);
  775 + return ret;
  776 +}
  777 +
  778 +static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
  779 + struct ddw_create_response *create, int page_shift,
  780 + int window_shift)
  781 +{
  782 + struct device_node *dn;
  783 + struct pci_dn *pcidn;
  784 + u32 cfg_addr;
  785 + u64 buid;
  786 + int ret;
  787 +
  788 + /*
  789 + * Get the config address and phb buid of the PE window.
  790 + * Rely on eeh to retrieve this for us.
  791 + * Retrieve them from the pci device, not the node with the
  792 + * dma-window property
  793 + */
  794 + dn = pci_device_to_OF_node(dev);
  795 + pcidn = PCI_DN(dn);
  796 + cfg_addr = pcidn->eeh_config_addr;
  797 + if (pcidn->eeh_pe_config_addr)
  798 + cfg_addr = pcidn->eeh_pe_config_addr;
  799 + buid = pcidn->phb->buid;
  800 +
  801 + do {
  802 + /* extra outputs are LIOBN and dma-addr (hi, lo) */
  803 + ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
  804 + BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
  805 + } while (rtas_busy_delay(ret));
  806 + dev_info(&dev->dev,
  807 + "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
  808 + "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
  809 + cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
  810 + window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
  811 +
  812 + return ret;
  813 +}
  814 +
  815 +/*
  816 + * If the PE supports dynamic dma windows, and there is space for a table
  817 + * that can map all pages in a linear offset, then setup such a table,
  818 + * and record the dma-offset in the struct device.
  819 + *
  820 + * dev: the pci device we are checking
  821 + * pdn: the parent pe node with the ibm,dma_window property
  822 + * Future: also check if we can remap the base window for our base page size
  823 + *
  824 + * returns the dma offset for use by dma_set_mask
  825 + */
  826 +static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
  827 +{
  828 + int len, ret;
  829 + struct ddw_query_response query;
  830 + struct ddw_create_response create;
  831 + int page_shift;
  832 + u64 dma_addr, max_addr;
  833 + struct device_node *dn;
  834 + const u32 *uninitialized_var(ddr_avail);
  835 + struct direct_window *window;
  836 + struct property *uninitialized_var(win64);
  837 + struct dynamic_dma_window_prop *ddwprop;
  838 +
  839 + mutex_lock(&direct_window_init_mutex);
  840 +
  841 + dma_addr = dupe_ddw_if_already_created(dev, pdn);
  842 + if (dma_addr != 0)
  843 + goto out_unlock;
  844 +
  845 + dma_addr = dupe_ddw_if_kexec(dev, pdn);
  846 + if (dma_addr != 0)
  847 + goto out_unlock;
  848 +
  849 + /*
  850 + * the ibm,ddw-applicable property holds the tokens for:
  851 + * ibm,query-pe-dma-window
  852 + * ibm,create-pe-dma-window
  853 + * ibm,remove-pe-dma-window
  854 + * for the given node in that order.
  855 + * the property is actually in the parent, not the PE
  856 + */
  857 + ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
  858 + if (!ddr_avail || len < 3 * sizeof(u32))
  859 + goto out_unlock;
  860 +
  861 + /*
  862 + * Query if there is a second window of size to map the
  863 + * whole partition. Query returns number of windows, largest
  864 + * block assigned to PE (partition endpoint), and two bitmasks
  865 + * of page sizes: supported and supported for migrate-dma.
  866 + */
  867 + dn = pci_device_to_OF_node(dev);
  868 + ret = query_ddw(dev, ddr_avail, &query);
  869 + if (ret != 0)
  870 + goto out_unlock;
  871 +
  872 + if (query.windows_available == 0) {
  873 + /*
  874 + * no additional windows are available for this device.
  875 + * We might be able to reallocate the existing window,
  876 + * trading in for a larger page size.
  877 + */
  878 + dev_dbg(&dev->dev, "no free dynamic windows");
  879 + goto out_unlock;
  880 + }
  881 + if (query.page_size & 4) {
  882 + page_shift = 24; /* 16MB */
  883 + } else if (query.page_size & 2) {
  884 + page_shift = 16; /* 64kB */
  885 + } else if (query.page_size & 1) {
  886 + page_shift = 12; /* 4kB */
  887 + } else {
  888 + dev_dbg(&dev->dev, "no supported direct page size in mask %x",
  889 + query.page_size);
  890 + goto out_unlock;
  891 + }
  892 + /* verify the window * number of ptes will map the partition */
  893 + /* check largest block * page size > max memory hotplug addr */
  894 + max_addr = memory_hotplug_max();
  895 + if (query.largest_available_block < (max_addr >> page_shift)) {
  896 + dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
  897 + "%llu-sized pages\n", max_addr, query.largest_available_block,
  898 + 1ULL << page_shift);
  899 + goto out_unlock;
  900 + }
  901 + len = order_base_2(max_addr);
  902 + win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
  903 + if (!win64) {
  904 + dev_info(&dev->dev,
  905 + "couldn't allocate property for 64bit dma window\n");
  906 + goto out_unlock;
  907 + }
  908 + win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
  909 + win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
  910 + if (!win64->name || !win64->value) {
  911 + dev_info(&dev->dev,
  912 + "couldn't allocate property name and value\n");
  913 + goto out_free_prop;
  914 + }
  915 +
  916 + ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
  917 + if (ret != 0)
  918 + goto out_free_prop;
  919 +
  920 + ddwprop->liobn = cpu_to_be32(create.liobn);
  921 + ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2));
  922 + ddwprop->tce_shift = cpu_to_be32(page_shift);
  923 + ddwprop->window_shift = cpu_to_be32(len);
  924 +
  925 + dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n",
  926 + create.liobn, dn->full_name);
  927 +
  928 + window = kzalloc(sizeof(*window), GFP_KERNEL);
  929 + if (!window)
  930 + goto out_clear_window;
  931 +
  932 + ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
  933 + win64->value, tce_setrange_multi_pSeriesLP_walk);
  934 + if (ret) {
  935 + dev_info(&dev->dev, "failed to map direct window for %s: %d\n",
  936 + dn->full_name, ret);
  937 + goto out_clear_window;
  938 + }
  939 +
  940 + ret = prom_add_property(pdn, win64);
  941 + if (ret) {
  942 + dev_err(&dev->dev, "unable to add dma window property for %s: %d",
  943 + pdn->full_name, ret);
  944 + goto out_clear_window;
  945 + }
  946 +
  947 + window->device = pdn;
  948 + window->prop = ddwprop;
  949 + spin_lock(&direct_window_list_lock);
  950 + list_add(&window->list, &direct_window_list);
  951 + spin_unlock(&direct_window_list_lock);
  952 +
  953 + dma_addr = of_read_number(&create.addr_hi, 2);
  954 + goto out_unlock;
  955 +
  956 +out_clear_window:
  957 + remove_ddw(pdn);
  958 +
  959 +out_free_prop:
  960 + kfree(win64->name);
  961 + kfree(win64->value);
  962 + kfree(win64);
  963 +
  964 +out_unlock:
  965 + mutex_unlock(&direct_window_init_mutex);
  966 + return dma_addr;
  967 +}
  968 +
498 969 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
499 970 {
500 971 struct device_node *pdn, *dn;
501 972  
502 973  
503 974  
504 975  
... ... @@ -541,23 +1012,137 @@
541 1012  
542 1013 set_iommu_table_base(&dev->dev, pci->iommu_table);
543 1014 }
  1015 +
  1016 +static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
  1017 +{
  1018 + bool ddw_enabled = false;
  1019 + struct device_node *pdn, *dn;
  1020 + struct pci_dev *pdev;
  1021 + const void *dma_window = NULL;
  1022 + u64 dma_offset;
  1023 +
  1024 + if (!dev->dma_mask || !dma_supported(dev, dma_mask))
  1025 + return -EIO;
  1026 +
  1027 + /* only attempt to use a new window if 64-bit DMA is requested */
  1028 + if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
  1029 + pdev = to_pci_dev(dev);
  1030 +
  1031 + dn = pci_device_to_OF_node(pdev);
  1032 + dev_dbg(dev, "node is %s\n", dn->full_name);
  1033 +
  1034 + /*
  1035 + * the device tree might contain the dma-window properties
  1036 + * per-device and not neccesarily for the bus. So we need to
  1037 + * search upwards in the tree until we either hit a dma-window
  1038 + * property, OR find a parent with a table already allocated.
  1039 + */
  1040 + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
  1041 + pdn = pdn->parent) {
  1042 + dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
  1043 + if (dma_window)
  1044 + break;
  1045 + }
  1046 + if (pdn && PCI_DN(pdn)) {
  1047 + dma_offset = enable_ddw(pdev, pdn);
  1048 + if (dma_offset != 0) {
  1049 + dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
  1050 + set_dma_offset(dev, dma_offset);
  1051 + set_dma_ops(dev, &dma_direct_ops);
  1052 + ddw_enabled = true;
  1053 + }
  1054 + }
  1055 + }
  1056 +
  1057 + /* fall-through to iommu ops */
  1058 + if (!ddw_enabled) {
  1059 + dev_info(dev, "Using 32-bit DMA via iommu\n");
  1060 + set_dma_ops(dev, &dma_iommu_ops);
  1061 + }
  1062 +
  1063 + *dev->dma_mask = dma_mask;
  1064 + return 0;
  1065 +}
  1066 +
544 1067 #else /* CONFIG_PCI */
545 1068 #define pci_dma_bus_setup_pSeries NULL
546 1069 #define pci_dma_dev_setup_pSeries NULL
547 1070 #define pci_dma_bus_setup_pSeriesLP NULL
548 1071 #define pci_dma_dev_setup_pSeriesLP NULL
  1072 +#define dma_set_mask_pSeriesLP NULL
549 1073 #endif /* !CONFIG_PCI */
550 1074  
  1075 +static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
  1076 + void *data)
  1077 +{
  1078 + struct direct_window *window;
  1079 + struct memory_notify *arg = data;
  1080 + int ret = 0;
  1081 +
  1082 + switch (action) {
  1083 + case MEM_GOING_ONLINE:
  1084 + spin_lock(&direct_window_list_lock);
  1085 + list_for_each_entry(window, &direct_window_list, list) {
  1086 + ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
  1087 + arg->nr_pages, window->prop);
  1088 + /* XXX log error */
  1089 + }
  1090 + spin_unlock(&direct_window_list_lock);
  1091 + break;
  1092 + case MEM_CANCEL_ONLINE:
  1093 + case MEM_OFFLINE:
  1094 + spin_lock(&direct_window_list_lock);
  1095 + list_for_each_entry(window, &direct_window_list, list) {
  1096 + ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
  1097 + arg->nr_pages, window->prop);
  1098 + /* XXX log error */
  1099 + }
  1100 + spin_unlock(&direct_window_list_lock);
  1101 + break;
  1102 + default:
  1103 + break;
  1104 + }
  1105 + if (ret && action != MEM_CANCEL_ONLINE)
  1106 + return NOTIFY_BAD;
  1107 +
  1108 + return NOTIFY_OK;
  1109 +}
  1110 +
  1111 +static struct notifier_block iommu_mem_nb = {
  1112 + .notifier_call = iommu_mem_notifier,
  1113 +};
  1114 +
551 1115 static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
552 1116 {
553 1117 int err = NOTIFY_OK;
554 1118 struct device_node *np = node;
555 1119 struct pci_dn *pci = PCI_DN(np);
  1120 + struct direct_window *window;
556 1121  
557 1122 switch (action) {
558 1123 case PSERIES_RECONFIG_REMOVE:
559 1124 if (pci && pci->iommu_table)
560 1125 iommu_free_table(pci->iommu_table, np->full_name);
  1126 +
  1127 + spin_lock(&direct_window_list_lock);
  1128 + list_for_each_entry(window, &direct_window_list, list) {
  1129 + if (window->device == np) {
  1130 + list_del(&window->list);
  1131 + kfree(window);
  1132 + break;
  1133 + }
  1134 + }
  1135 + spin_unlock(&direct_window_list_lock);
  1136 +
  1137 + /*
  1138 + * Because the notifier runs after isolation of the
  1139 + * slot, we are guaranteed any DMA window has already
  1140 + * been revoked and the TCEs have been marked invalid,
  1141 + * so we don't need a call to remove_ddw(np). However,
  1142 + * if an additional notifier action is added before the
  1143 + * isolate call, we should update this code for
  1144 + * completeness with such a call.
  1145 + */
561 1146 break;
562 1147 default:
563 1148 err = NOTIFY_DONE;
... ... @@ -587,6 +1172,7 @@
587 1172 ppc_md.tce_get = tce_get_pSeriesLP;
588 1173 ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
589 1174 ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
  1175 + ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
590 1176 } else {
591 1177 ppc_md.tce_build = tce_build_pSeries;
592 1178 ppc_md.tce_free = tce_free_pSeries;
... ... @@ -597,6 +1183,7 @@
597 1183  
598 1184  
599 1185 pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
  1186 + register_memory_notifier(&iommu_mem_nb);
600 1187  
601 1188 set_pci_dma_ops(&dma_iommu_ops);
602 1189 }