Commit 4e8b0cf46b2570331a4c4157d53906883c442a22
Committed by
Benjamin Herrenschmidt
1 parent
6edc642ebe
Exists in
master
and in
20 other branches
powerpc/pseries: Add support for dynamic dma windows
If firmware allows us to map all of a partition's memory for DMA on a particular bridge, create a 1:1 mapping of that memory. Add hooks for dealing with hotplug events. Dynamic DMA windows can use larger than the default page size, and we use the largest one possible. Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Showing 2 changed files with 591 additions and 0 deletions Side-by-side Diff
Documentation/kernel-parameters.txt
... | ... | @@ -626,6 +626,10 @@ |
626 | 626 | disable= [IPV6] |
627 | 627 | See Documentation/networking/ipv6.txt. |
628 | 628 | |
629 | + disable_ddw [PPC/PSERIES] | |
630 | + Disable Dynamic DMA Window support. Use this if | |
631 | + to workaround buggy firmware. | |
632 | + | |
629 | 633 | disable_ipv6= [IPV6] |
630 | 634 | See Documentation/networking/ipv6.txt. |
631 | 635 |
arch/powerpc/platforms/pseries/iommu.c
... | ... | @@ -33,6 +33,7 @@ |
33 | 33 | #include <linux/pci.h> |
34 | 34 | #include <linux/dma-mapping.h> |
35 | 35 | #include <linux/crash_dump.h> |
36 | +#include <linux/memory.h> | |
36 | 37 | #include <asm/io.h> |
37 | 38 | #include <asm/prom.h> |
38 | 39 | #include <asm/rtas.h> |
... | ... | @@ -45,6 +46,7 @@ |
45 | 46 | #include <asm/tce.h> |
46 | 47 | #include <asm/ppc-pci.h> |
47 | 48 | #include <asm/udbg.h> |
49 | +#include <asm/mmzone.h> | |
48 | 50 | |
49 | 51 | #include "plpar_wrappers.h" |
50 | 52 | |
... | ... | @@ -270,6 +272,152 @@ |
270 | 272 | return tce_ret; |
271 | 273 | } |
272 | 274 | |
275 | +/* this is compatable with cells for the device tree property */ | |
276 | +struct dynamic_dma_window_prop { | |
277 | + __be32 liobn; /* tce table number */ | |
278 | + __be64 dma_base; /* address hi,lo */ | |
279 | + __be32 tce_shift; /* ilog2(tce_page_size) */ | |
280 | + __be32 window_shift; /* ilog2(tce_window_size) */ | |
281 | +}; | |
282 | + | |
283 | +struct direct_window { | |
284 | + struct device_node *device; | |
285 | + const struct dynamic_dma_window_prop *prop; | |
286 | + struct list_head list; | |
287 | +}; | |
288 | + | |
289 | +/* Dynamic DMA Window support */ | |
290 | +struct ddw_query_response { | |
291 | + u32 windows_available; | |
292 | + u32 largest_available_block; | |
293 | + u32 page_size; | |
294 | + u32 migration_capable; | |
295 | +}; | |
296 | + | |
297 | +struct ddw_create_response { | |
298 | + u32 liobn; | |
299 | + u32 addr_hi; | |
300 | + u32 addr_lo; | |
301 | +}; | |
302 | + | |
303 | +static LIST_HEAD(direct_window_list); | |
304 | +/* prevents races between memory on/offline and window creation */ | |
305 | +static DEFINE_SPINLOCK(direct_window_list_lock); | |
306 | +/* protects initializing window twice for same device */ | |
307 | +static DEFINE_MUTEX(direct_window_init_mutex); | |
308 | +#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" | |
309 | + | |
310 | +static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, | |
311 | + unsigned long num_pfn, const void *arg) | |
312 | +{ | |
313 | + const struct dynamic_dma_window_prop *maprange = arg; | |
314 | + int rc; | |
315 | + u64 tce_size, num_tce, dma_offset, next; | |
316 | + u32 tce_shift; | |
317 | + long limit; | |
318 | + | |
319 | + tce_shift = be32_to_cpu(maprange->tce_shift); | |
320 | + tce_size = 1ULL << tce_shift; | |
321 | + next = start_pfn << PAGE_SHIFT; | |
322 | + num_tce = num_pfn << PAGE_SHIFT; | |
323 | + | |
324 | + /* round back to the beginning of the tce page size */ | |
325 | + num_tce += next & (tce_size - 1); | |
326 | + next &= ~(tce_size - 1); | |
327 | + | |
328 | + /* covert to number of tces */ | |
329 | + num_tce |= tce_size - 1; | |
330 | + num_tce >>= tce_shift; | |
331 | + | |
332 | + do { | |
333 | + /* | |
334 | + * Set up the page with TCE data, looping through and setting | |
335 | + * the values. | |
336 | + */ | |
337 | + limit = min_t(long, num_tce, 512); | |
338 | + dma_offset = next + be64_to_cpu(maprange->dma_base); | |
339 | + | |
340 | + rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), | |
341 | + dma_offset, | |
342 | + 0, limit); | |
343 | + num_tce -= limit; | |
344 | + } while (num_tce > 0 && !rc); | |
345 | + | |
346 | + return rc; | |
347 | +} | |
348 | + | |
349 | +static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, | |
350 | + unsigned long num_pfn, const void *arg) | |
351 | +{ | |
352 | + const struct dynamic_dma_window_prop *maprange = arg; | |
353 | + u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn; | |
354 | + u32 tce_shift; | |
355 | + u64 rc = 0; | |
356 | + long l, limit; | |
357 | + | |
358 | + local_irq_disable(); /* to protect tcep and the page behind it */ | |
359 | + tcep = __get_cpu_var(tce_page); | |
360 | + | |
361 | + if (!tcep) { | |
362 | + tcep = (u64 *)__get_free_page(GFP_ATOMIC); | |
363 | + if (!tcep) { | |
364 | + local_irq_enable(); | |
365 | + return -ENOMEM; | |
366 | + } | |
367 | + __get_cpu_var(tce_page) = tcep; | |
368 | + } | |
369 | + | |
370 | + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; | |
371 | + | |
372 | + liobn = (u64)be32_to_cpu(maprange->liobn); | |
373 | + tce_shift = be32_to_cpu(maprange->tce_shift); | |
374 | + tce_size = 1ULL << tce_shift; | |
375 | + next = start_pfn << PAGE_SHIFT; | |
376 | + num_tce = num_pfn << PAGE_SHIFT; | |
377 | + | |
378 | + /* round back to the beginning of the tce page size */ | |
379 | + num_tce += next & (tce_size - 1); | |
380 | + next &= ~(tce_size - 1); | |
381 | + | |
382 | + /* covert to number of tces */ | |
383 | + num_tce |= tce_size - 1; | |
384 | + num_tce >>= tce_shift; | |
385 | + | |
386 | + /* We can map max one pageful of TCEs at a time */ | |
387 | + do { | |
388 | + /* | |
389 | + * Set up the page with TCE data, looping through and setting | |
390 | + * the values. | |
391 | + */ | |
392 | + limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE); | |
393 | + dma_offset = next + be64_to_cpu(maprange->dma_base); | |
394 | + | |
395 | + for (l = 0; l < limit; l++) { | |
396 | + tcep[l] = proto_tce | next; | |
397 | + next += tce_size; | |
398 | + } | |
399 | + | |
400 | + rc = plpar_tce_put_indirect(liobn, | |
401 | + dma_offset, | |
402 | + (u64)virt_to_abs(tcep), | |
403 | + limit); | |
404 | + | |
405 | + num_tce -= limit; | |
406 | + } while (num_tce > 0 && !rc); | |
407 | + | |
408 | + /* error cleanup: caller will clear whole range */ | |
409 | + | |
410 | + local_irq_enable(); | |
411 | + return rc; | |
412 | +} | |
413 | + | |
414 | +static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, | |
415 | + unsigned long num_pfn, void *arg) | |
416 | +{ | |
417 | + return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); | |
418 | +} | |
419 | + | |
420 | + | |
273 | 421 | #ifdef CONFIG_PCI |
274 | 422 | static void iommu_table_setparms(struct pci_controller *phb, |
275 | 423 | struct device_node *dn, |
... | ... | @@ -495,6 +643,329 @@ |
495 | 643 | pci_name(dev)); |
496 | 644 | } |
497 | 645 | |
646 | +static int __read_mostly disable_ddw; | |
647 | + | |
648 | +static int __init disable_ddw_setup(char *str) | |
649 | +{ | |
650 | + disable_ddw = 1; | |
651 | + printk(KERN_INFO "ppc iommu: disabling ddw.\n"); | |
652 | + | |
653 | + return 0; | |
654 | +} | |
655 | + | |
656 | +early_param("disable_ddw", disable_ddw_setup); | |
657 | + | |
658 | +static void remove_ddw(struct device_node *np) | |
659 | +{ | |
660 | + struct dynamic_dma_window_prop *dwp; | |
661 | + struct property *win64; | |
662 | + const u32 *ddr_avail; | |
663 | + u64 liobn; | |
664 | + int len, ret; | |
665 | + | |
666 | + ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len); | |
667 | + win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); | |
668 | + if (!win64 || !ddr_avail || len < 3 * sizeof(u32)) | |
669 | + return; | |
670 | + | |
671 | + dwp = win64->value; | |
672 | + liobn = (u64)be32_to_cpu(dwp->liobn); | |
673 | + | |
674 | + /* clear the whole window, note the arg is in kernel pages */ | |
675 | + ret = tce_clearrange_multi_pSeriesLP(0, | |
676 | + 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); | |
677 | + if (ret) | |
678 | + pr_warning("%s failed to clear tces in window.\n", | |
679 | + np->full_name); | |
680 | + else | |
681 | + pr_debug("%s successfully cleared tces in window.\n", | |
682 | + np->full_name); | |
683 | + | |
684 | + ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn); | |
685 | + if (ret) | |
686 | + pr_warning("%s: failed to remove direct window: rtas returned " | |
687 | + "%d to ibm,remove-pe-dma-window(%x) %llx\n", | |
688 | + np->full_name, ret, ddr_avail[2], liobn); | |
689 | + else | |
690 | + pr_debug("%s: successfully removed direct window: rtas returned " | |
691 | + "%d to ibm,remove-pe-dma-window(%x) %llx\n", | |
692 | + np->full_name, ret, ddr_avail[2], liobn); | |
693 | +} | |
694 | + | |
695 | + | |
696 | +static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn) | |
697 | +{ | |
698 | + struct device_node *dn; | |
699 | + struct pci_dn *pcidn; | |
700 | + struct direct_window *window; | |
701 | + const struct dynamic_dma_window_prop *direct64; | |
702 | + u64 dma_addr = 0; | |
703 | + | |
704 | + dn = pci_device_to_OF_node(dev); | |
705 | + pcidn = PCI_DN(dn); | |
706 | + spin_lock(&direct_window_list_lock); | |
707 | + /* check if we already created a window and dupe that config if so */ | |
708 | + list_for_each_entry(window, &direct_window_list, list) { | |
709 | + if (window->device == pdn) { | |
710 | + direct64 = window->prop; | |
711 | + dma_addr = direct64->dma_base; | |
712 | + break; | |
713 | + } | |
714 | + } | |
715 | + spin_unlock(&direct_window_list_lock); | |
716 | + | |
717 | + return dma_addr; | |
718 | +} | |
719 | + | |
720 | +static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn) | |
721 | +{ | |
722 | + struct device_node *dn; | |
723 | + struct pci_dn *pcidn; | |
724 | + int len; | |
725 | + struct direct_window *window; | |
726 | + const struct dynamic_dma_window_prop *direct64; | |
727 | + u64 dma_addr = 0; | |
728 | + | |
729 | + dn = pci_device_to_OF_node(dev); | |
730 | + pcidn = PCI_DN(dn); | |
731 | + direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); | |
732 | + if (direct64) { | |
733 | + window = kzalloc(sizeof(*window), GFP_KERNEL); | |
734 | + if (!window) { | |
735 | + remove_ddw(pdn); | |
736 | + } else { | |
737 | + window->device = pdn; | |
738 | + window->prop = direct64; | |
739 | + spin_lock(&direct_window_list_lock); | |
740 | + list_add(&window->list, &direct_window_list); | |
741 | + spin_unlock(&direct_window_list_lock); | |
742 | + dma_addr = direct64->dma_base; | |
743 | + } | |
744 | + } | |
745 | + | |
746 | + return dma_addr; | |
747 | +} | |
748 | + | |
749 | +static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, | |
750 | + struct ddw_query_response *query) | |
751 | +{ | |
752 | + struct device_node *dn; | |
753 | + struct pci_dn *pcidn; | |
754 | + u32 cfg_addr; | |
755 | + u64 buid; | |
756 | + int ret; | |
757 | + | |
758 | + /* | |
759 | + * Get the config address and phb buid of the PE window. | |
760 | + * Rely on eeh to retrieve this for us. | |
761 | + * Retrieve them from the pci device, not the node with the | |
762 | + * dma-window property | |
763 | + */ | |
764 | + dn = pci_device_to_OF_node(dev); | |
765 | + pcidn = PCI_DN(dn); | |
766 | + cfg_addr = pcidn->eeh_config_addr; | |
767 | + if (pcidn->eeh_pe_config_addr) | |
768 | + cfg_addr = pcidn->eeh_pe_config_addr; | |
769 | + buid = pcidn->phb->buid; | |
770 | + ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query, | |
771 | + cfg_addr, BUID_HI(buid), BUID_LO(buid)); | |
772 | + dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" | |
773 | + " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid), | |
774 | + BUID_LO(buid), ret); | |
775 | + return ret; | |
776 | +} | |
777 | + | |
778 | +static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, | |
779 | + struct ddw_create_response *create, int page_shift, | |
780 | + int window_shift) | |
781 | +{ | |
782 | + struct device_node *dn; | |
783 | + struct pci_dn *pcidn; | |
784 | + u32 cfg_addr; | |
785 | + u64 buid; | |
786 | + int ret; | |
787 | + | |
788 | + /* | |
789 | + * Get the config address and phb buid of the PE window. | |
790 | + * Rely on eeh to retrieve this for us. | |
791 | + * Retrieve them from the pci device, not the node with the | |
792 | + * dma-window property | |
793 | + */ | |
794 | + dn = pci_device_to_OF_node(dev); | |
795 | + pcidn = PCI_DN(dn); | |
796 | + cfg_addr = pcidn->eeh_config_addr; | |
797 | + if (pcidn->eeh_pe_config_addr) | |
798 | + cfg_addr = pcidn->eeh_pe_config_addr; | |
799 | + buid = pcidn->phb->buid; | |
800 | + | |
801 | + do { | |
802 | + /* extra outputs are LIOBN and dma-addr (hi, lo) */ | |
803 | + ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr, | |
804 | + BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); | |
805 | + } while (rtas_busy_delay(ret)); | |
806 | + dev_info(&dev->dev, | |
807 | + "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " | |
808 | + "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1], | |
809 | + cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, | |
810 | + window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); | |
811 | + | |
812 | + return ret; | |
813 | +} | |
814 | + | |
815 | +/* | |
816 | + * If the PE supports dynamic dma windows, and there is space for a table | |
817 | + * that can map all pages in a linear offset, then setup such a table, | |
818 | + * and record the dma-offset in the struct device. | |
819 | + * | |
820 | + * dev: the pci device we are checking | |
821 | + * pdn: the parent pe node with the ibm,dma_window property | |
822 | + * Future: also check if we can remap the base window for our base page size | |
823 | + * | |
824 | + * returns the dma offset for use by dma_set_mask | |
825 | + */ | |
826 | +static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) | |
827 | +{ | |
828 | + int len, ret; | |
829 | + struct ddw_query_response query; | |
830 | + struct ddw_create_response create; | |
831 | + int page_shift; | |
832 | + u64 dma_addr, max_addr; | |
833 | + struct device_node *dn; | |
834 | + const u32 *uninitialized_var(ddr_avail); | |
835 | + struct direct_window *window; | |
836 | + struct property *uninitialized_var(win64); | |
837 | + struct dynamic_dma_window_prop *ddwprop; | |
838 | + | |
839 | + mutex_lock(&direct_window_init_mutex); | |
840 | + | |
841 | + dma_addr = dupe_ddw_if_already_created(dev, pdn); | |
842 | + if (dma_addr != 0) | |
843 | + goto out_unlock; | |
844 | + | |
845 | + dma_addr = dupe_ddw_if_kexec(dev, pdn); | |
846 | + if (dma_addr != 0) | |
847 | + goto out_unlock; | |
848 | + | |
849 | + /* | |
850 | + * the ibm,ddw-applicable property holds the tokens for: | |
851 | + * ibm,query-pe-dma-window | |
852 | + * ibm,create-pe-dma-window | |
853 | + * ibm,remove-pe-dma-window | |
854 | + * for the given node in that order. | |
855 | + * the property is actually in the parent, not the PE | |
856 | + */ | |
857 | + ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); | |
858 | + if (!ddr_avail || len < 3 * sizeof(u32)) | |
859 | + goto out_unlock; | |
860 | + | |
861 | + /* | |
862 | + * Query if there is a second window of size to map the | |
863 | + * whole partition. Query returns number of windows, largest | |
864 | + * block assigned to PE (partition endpoint), and two bitmasks | |
865 | + * of page sizes: supported and supported for migrate-dma. | |
866 | + */ | |
867 | + dn = pci_device_to_OF_node(dev); | |
868 | + ret = query_ddw(dev, ddr_avail, &query); | |
869 | + if (ret != 0) | |
870 | + goto out_unlock; | |
871 | + | |
872 | + if (query.windows_available == 0) { | |
873 | + /* | |
874 | + * no additional windows are available for this device. | |
875 | + * We might be able to reallocate the existing window, | |
876 | + * trading in for a larger page size. | |
877 | + */ | |
878 | + dev_dbg(&dev->dev, "no free dynamic windows"); | |
879 | + goto out_unlock; | |
880 | + } | |
881 | + if (query.page_size & 4) { | |
882 | + page_shift = 24; /* 16MB */ | |
883 | + } else if (query.page_size & 2) { | |
884 | + page_shift = 16; /* 64kB */ | |
885 | + } else if (query.page_size & 1) { | |
886 | + page_shift = 12; /* 4kB */ | |
887 | + } else { | |
888 | + dev_dbg(&dev->dev, "no supported direct page size in mask %x", | |
889 | + query.page_size); | |
890 | + goto out_unlock; | |
891 | + } | |
892 | + /* verify the window * number of ptes will map the partition */ | |
893 | + /* check largest block * page size > max memory hotplug addr */ | |
894 | + max_addr = memory_hotplug_max(); | |
895 | + if (query.largest_available_block < (max_addr >> page_shift)) { | |
896 | + dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " | |
897 | + "%llu-sized pages\n", max_addr, query.largest_available_block, | |
898 | + 1ULL << page_shift); | |
899 | + goto out_unlock; | |
900 | + } | |
901 | + len = order_base_2(max_addr); | |
902 | + win64 = kzalloc(sizeof(struct property), GFP_KERNEL); | |
903 | + if (!win64) { | |
904 | + dev_info(&dev->dev, | |
905 | + "couldn't allocate property for 64bit dma window\n"); | |
906 | + goto out_unlock; | |
907 | + } | |
908 | + win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); | |
909 | + win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); | |
910 | + if (!win64->name || !win64->value) { | |
911 | + dev_info(&dev->dev, | |
912 | + "couldn't allocate property name and value\n"); | |
913 | + goto out_free_prop; | |
914 | + } | |
915 | + | |
916 | + ret = create_ddw(dev, ddr_avail, &create, page_shift, len); | |
917 | + if (ret != 0) | |
918 | + goto out_free_prop; | |
919 | + | |
920 | + ddwprop->liobn = cpu_to_be32(create.liobn); | |
921 | + ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); | |
922 | + ddwprop->tce_shift = cpu_to_be32(page_shift); | |
923 | + ddwprop->window_shift = cpu_to_be32(len); | |
924 | + | |
925 | + dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n", | |
926 | + create.liobn, dn->full_name); | |
927 | + | |
928 | + window = kzalloc(sizeof(*window), GFP_KERNEL); | |
929 | + if (!window) | |
930 | + goto out_clear_window; | |
931 | + | |
932 | + ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, | |
933 | + win64->value, tce_setrange_multi_pSeriesLP_walk); | |
934 | + if (ret) { | |
935 | + dev_info(&dev->dev, "failed to map direct window for %s: %d\n", | |
936 | + dn->full_name, ret); | |
937 | + goto out_clear_window; | |
938 | + } | |
939 | + | |
940 | + ret = prom_add_property(pdn, win64); | |
941 | + if (ret) { | |
942 | + dev_err(&dev->dev, "unable to add dma window property for %s: %d", | |
943 | + pdn->full_name, ret); | |
944 | + goto out_clear_window; | |
945 | + } | |
946 | + | |
947 | + window->device = pdn; | |
948 | + window->prop = ddwprop; | |
949 | + spin_lock(&direct_window_list_lock); | |
950 | + list_add(&window->list, &direct_window_list); | |
951 | + spin_unlock(&direct_window_list_lock); | |
952 | + | |
953 | + dma_addr = of_read_number(&create.addr_hi, 2); | |
954 | + goto out_unlock; | |
955 | + | |
956 | +out_clear_window: | |
957 | + remove_ddw(pdn); | |
958 | + | |
959 | +out_free_prop: | |
960 | + kfree(win64->name); | |
961 | + kfree(win64->value); | |
962 | + kfree(win64); | |
963 | + | |
964 | +out_unlock: | |
965 | + mutex_unlock(&direct_window_init_mutex); | |
966 | + return dma_addr; | |
967 | +} | |
968 | + | |
498 | 969 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
499 | 970 | { |
500 | 971 | struct device_node *pdn, *dn; |
501 | 972 | |
502 | 973 | |
503 | 974 | |
504 | 975 | |
... | ... | @@ -541,23 +1012,137 @@ |
541 | 1012 | |
542 | 1013 | set_iommu_table_base(&dev->dev, pci->iommu_table); |
543 | 1014 | } |
1015 | + | |
1016 | +static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) | |
1017 | +{ | |
1018 | + bool ddw_enabled = false; | |
1019 | + struct device_node *pdn, *dn; | |
1020 | + struct pci_dev *pdev; | |
1021 | + const void *dma_window = NULL; | |
1022 | + u64 dma_offset; | |
1023 | + | |
1024 | + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) | |
1025 | + return -EIO; | |
1026 | + | |
1027 | + /* only attempt to use a new window if 64-bit DMA is requested */ | |
1028 | + if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) { | |
1029 | + pdev = to_pci_dev(dev); | |
1030 | + | |
1031 | + dn = pci_device_to_OF_node(pdev); | |
1032 | + dev_dbg(dev, "node is %s\n", dn->full_name); | |
1033 | + | |
1034 | + /* | |
1035 | + * the device tree might contain the dma-window properties | |
1036 | + * per-device and not neccesarily for the bus. So we need to | |
1037 | + * search upwards in the tree until we either hit a dma-window | |
1038 | + * property, OR find a parent with a table already allocated. | |
1039 | + */ | |
1040 | + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; | |
1041 | + pdn = pdn->parent) { | |
1042 | + dma_window = of_get_property(pdn, "ibm,dma-window", NULL); | |
1043 | + if (dma_window) | |
1044 | + break; | |
1045 | + } | |
1046 | + if (pdn && PCI_DN(pdn)) { | |
1047 | + dma_offset = enable_ddw(pdev, pdn); | |
1048 | + if (dma_offset != 0) { | |
1049 | + dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset); | |
1050 | + set_dma_offset(dev, dma_offset); | |
1051 | + set_dma_ops(dev, &dma_direct_ops); | |
1052 | + ddw_enabled = true; | |
1053 | + } | |
1054 | + } | |
1055 | + } | |
1056 | + | |
1057 | + /* fall-through to iommu ops */ | |
1058 | + if (!ddw_enabled) { | |
1059 | + dev_info(dev, "Using 32-bit DMA via iommu\n"); | |
1060 | + set_dma_ops(dev, &dma_iommu_ops); | |
1061 | + } | |
1062 | + | |
1063 | + *dev->dma_mask = dma_mask; | |
1064 | + return 0; | |
1065 | +} | |
1066 | + | |
544 | 1067 | #else /* CONFIG_PCI */ |
545 | 1068 | #define pci_dma_bus_setup_pSeries NULL |
546 | 1069 | #define pci_dma_dev_setup_pSeries NULL |
547 | 1070 | #define pci_dma_bus_setup_pSeriesLP NULL |
548 | 1071 | #define pci_dma_dev_setup_pSeriesLP NULL |
1072 | +#define dma_set_mask_pSeriesLP NULL | |
549 | 1073 | #endif /* !CONFIG_PCI */ |
550 | 1074 | |
1075 | +static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, | |
1076 | + void *data) | |
1077 | +{ | |
1078 | + struct direct_window *window; | |
1079 | + struct memory_notify *arg = data; | |
1080 | + int ret = 0; | |
1081 | + | |
1082 | + switch (action) { | |
1083 | + case MEM_GOING_ONLINE: | |
1084 | + spin_lock(&direct_window_list_lock); | |
1085 | + list_for_each_entry(window, &direct_window_list, list) { | |
1086 | + ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, | |
1087 | + arg->nr_pages, window->prop); | |
1088 | + /* XXX log error */ | |
1089 | + } | |
1090 | + spin_unlock(&direct_window_list_lock); | |
1091 | + break; | |
1092 | + case MEM_CANCEL_ONLINE: | |
1093 | + case MEM_OFFLINE: | |
1094 | + spin_lock(&direct_window_list_lock); | |
1095 | + list_for_each_entry(window, &direct_window_list, list) { | |
1096 | + ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, | |
1097 | + arg->nr_pages, window->prop); | |
1098 | + /* XXX log error */ | |
1099 | + } | |
1100 | + spin_unlock(&direct_window_list_lock); | |
1101 | + break; | |
1102 | + default: | |
1103 | + break; | |
1104 | + } | |
1105 | + if (ret && action != MEM_CANCEL_ONLINE) | |
1106 | + return NOTIFY_BAD; | |
1107 | + | |
1108 | + return NOTIFY_OK; | |
1109 | +} | |
1110 | + | |
1111 | +static struct notifier_block iommu_mem_nb = { | |
1112 | + .notifier_call = iommu_mem_notifier, | |
1113 | +}; | |
1114 | + | |
551 | 1115 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) |
552 | 1116 | { |
553 | 1117 | int err = NOTIFY_OK; |
554 | 1118 | struct device_node *np = node; |
555 | 1119 | struct pci_dn *pci = PCI_DN(np); |
1120 | + struct direct_window *window; | |
556 | 1121 | |
557 | 1122 | switch (action) { |
558 | 1123 | case PSERIES_RECONFIG_REMOVE: |
559 | 1124 | if (pci && pci->iommu_table) |
560 | 1125 | iommu_free_table(pci->iommu_table, np->full_name); |
1126 | + | |
1127 | + spin_lock(&direct_window_list_lock); | |
1128 | + list_for_each_entry(window, &direct_window_list, list) { | |
1129 | + if (window->device == np) { | |
1130 | + list_del(&window->list); | |
1131 | + kfree(window); | |
1132 | + break; | |
1133 | + } | |
1134 | + } | |
1135 | + spin_unlock(&direct_window_list_lock); | |
1136 | + | |
1137 | + /* | |
1138 | + * Because the notifier runs after isolation of the | |
1139 | + * slot, we are guaranteed any DMA window has already | |
1140 | + * been revoked and the TCEs have been marked invalid, | |
1141 | + * so we don't need a call to remove_ddw(np). However, | |
1142 | + * if an additional notifier action is added before the | |
1143 | + * isolate call, we should update this code for | |
1144 | + * completeness with such a call. | |
1145 | + */ | |
561 | 1146 | break; |
562 | 1147 | default: |
563 | 1148 | err = NOTIFY_DONE; |
... | ... | @@ -587,6 +1172,7 @@ |
587 | 1172 | ppc_md.tce_get = tce_get_pSeriesLP; |
588 | 1173 | ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
589 | 1174 | ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP; |
1175 | + ppc_md.dma_set_mask = dma_set_mask_pSeriesLP; | |
590 | 1176 | } else { |
591 | 1177 | ppc_md.tce_build = tce_build_pSeries; |
592 | 1178 | ppc_md.tce_free = tce_free_pSeries; |
... | ... | @@ -597,6 +1183,7 @@ |
597 | 1183 | |
598 | 1184 | |
599 | 1185 | pSeries_reconfig_notifier_register(&iommu_reconfig_nb); |
1186 | + register_memory_notifier(&iommu_mem_nb); | |
600 | 1187 | |
601 | 1188 | set_pci_dma_ops(&dma_iommu_ops); |
602 | 1189 | } |