Commit 93899a679fd6b2534b5c297d9316bae039ebcbe1

Authored by Alex Williamson
1 parent 0f905ce2b5

vfio-pci: Fix remove path locking

Locking both the remove() and release() path results in a deadlock
that should have been obvious.  To fix this we can get and hold the
vfio_device reference as we evaluate whether to do a bus/slot reset.
This will automatically block any remove() calls, allowing us to
remove the explict lock.  Fixes 61d792562b53.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org	[3.17]

Showing 1 changed file with 57 additions and 79 deletions Side-by-side Diff

drivers/vfio/pci/vfio_pci.c
... ... @@ -876,15 +876,11 @@
876 876 {
877 877 struct vfio_pci_device *vdev;
878 878  
879   - mutex_lock(&driver_lock);
880   -
881 879 vdev = vfio_del_group_dev(&pdev->dev);
882 880 if (vdev) {
883 881 iommu_group_put(pdev->dev.iommu_group);
884 882 kfree(vdev);
885 883 }
886   -
887   - mutex_unlock(&driver_lock);
888 884 }
889 885  
890 886 static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
891 887  
892 888  
893 889  
894 890  
895 891  
896 892  
897 893  
898 894  
899 895  
900 896  
901 897  
902 898  
... ... @@ -927,108 +923,90 @@
927 923 .err_handler = &vfio_err_handlers,
928 924 };
929 925  
930   -/*
931   - * Test whether a reset is necessary and possible. We mark devices as
932   - * needs_reset when they are released, but don't have a function-local reset
933   - * available. If any of these exist in the affected devices, we want to do
934   - * a bus/slot reset. We also need all of the affected devices to be unused,
935   - * so we abort if any device has a non-zero refcnt. driver_lock prevents a
936   - * device from being opened during the scan or unbound from vfio-pci.
937   - */
938   -static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data)
939   -{
940   - bool *needs_reset = data;
941   - struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
942   - int ret = -EBUSY;
  926 +struct vfio_devices {
  927 + struct vfio_device **devices;
  928 + int cur_index;
  929 + int max_index;
  930 +};
943 931  
944   - if (pci_drv == &vfio_pci_driver) {
945   - struct vfio_device *device;
946   - struct vfio_pci_device *vdev;
947   -
948   - device = vfio_device_get_from_dev(&pdev->dev);
949   - if (!device)
950   - return ret;
951   -
952   - vdev = vfio_device_data(device);
953   - if (vdev) {
954   - if (vdev->needs_reset)
955   - *needs_reset = true;
956   -
957   - if (!vdev->refcnt)
958   - ret = 0;
959   - }
960   -
961   - vfio_device_put(device);
962   - }
963   -
964   - /*
965   - * TODO: vfio-core considers groups to be viable even if some devices
966   - * are attached to known drivers, like pci-stub or pcieport. We can't
967   - * freeze devices from being unbound to those drivers like we can
968   - * here though, so it would be racy to test for them. We also can't
969   - * use device_lock() to prevent changes as that would interfere with
970   - * PCI-core taking device_lock during bus reset. For now, we require
971   - * devices to be bound to vfio-pci to get a bus/slot reset on release.
972   - */
973   -
974   - return ret;
975   -}
976   -
977   -/* Clear needs_reset on all affected devices after successful bus/slot reset */
978   -static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data)
  932 +static int vfio_pci_get_devs(struct pci_dev *pdev, void *data)
979 933 {
  934 + struct vfio_devices *devs = data;
980 935 struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
981 936  
982   - if (pci_drv == &vfio_pci_driver) {
983   - struct vfio_device *device;
984   - struct vfio_pci_device *vdev;
  937 + if (pci_drv != &vfio_pci_driver)
  938 + return -EBUSY;
985 939  
986   - device = vfio_device_get_from_dev(&pdev->dev);
987   - if (!device)
988   - return 0;
  940 + if (devs->cur_index == devs->max_index)
  941 + return -ENOSPC;
989 942  
990   - vdev = vfio_device_data(device);
991   - if (vdev)
992   - vdev->needs_reset = false;
  943 + devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev);
  944 + if (!devs->devices[devs->cur_index])
  945 + return -EINVAL;
993 946  
994   - vfio_device_put(device);
995   - }
996   -
  947 + devs->cur_index++;
997 948 return 0;
998 949 }
999 950  
1000 951 /*
1001 952 * Attempt to do a bus/slot reset if there are devices affected by a reset for
1002 953 * this device that are needs_reset and all of the affected devices are unused
1003   - * (!refcnt). Callers of this function are required to hold driver_lock such
1004   - * that devices can not be unbound from vfio-pci or opened by a user while we
1005   - * test for and perform a bus/slot reset.
  954 + * (!refcnt). Callers are required to hold driver_lock when calling this to
  955 + * prevent device opens and concurrent bus reset attempts. We prevent device
  956 + * unbinds by acquiring and holding a reference to the vfio_device.
  957 + *
  958 + * NB: vfio-core considers a group to be viable even if some devices are
  959 + * bound to drivers like pci-stub or pcieport. Here we require all devices
  960 + * to be bound to vfio_pci since that's the only way we can be sure they
  961 + * stay put.
1006 962 */
1007 963 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
1008 964 {
  965 + struct vfio_devices devs = { .cur_index = 0 };
  966 + int i = 0, ret = -EINVAL;
1009 967 bool needs_reset = false, slot = false;
1010   - int ret;
  968 + struct vfio_pci_device *tmp;
1011 969  
1012 970 if (!pci_probe_reset_slot(vdev->pdev->slot))
1013 971 slot = true;
1014 972 else if (pci_probe_reset_bus(vdev->pdev->bus))
1015 973 return;
1016 974  
1017   - if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
1018   - vfio_pci_test_bus_reset,
1019   - &needs_reset, slot) || !needs_reset)
  975 + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
  976 + &i, slot) || !i)
1020 977 return;
1021 978  
1022   - if (slot)
1023   - ret = pci_try_reset_slot(vdev->pdev->slot);
1024   - else
1025   - ret = pci_try_reset_bus(vdev->pdev->bus);
1026   -
1027   - if (ret)
  979 + devs.max_index = i;
  980 + devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
  981 + if (!devs.devices)
1028 982 return;
1029 983  
1030   - vfio_pci_for_each_slot_or_bus(vdev->pdev,
1031   - vfio_pci_clear_needs_reset, NULL, slot);
  984 + if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
  985 + vfio_pci_get_devs, &devs, slot))
  986 + goto put_devs;
  987 +
  988 + for (i = 0; i < devs.cur_index; i++) {
  989 + tmp = vfio_device_data(devs.devices[i]);
  990 + if (tmp->needs_reset)
  991 + needs_reset = true;
  992 + if (tmp->refcnt)
  993 + goto put_devs;
  994 + }
  995 +
  996 + if (needs_reset)
  997 + ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
  998 + pci_try_reset_bus(vdev->pdev->bus);
  999 +
  1000 +put_devs:
  1001 + for (i = 0; i < devs.cur_index; i++) {
  1002 + if (!ret) {
  1003 + tmp = vfio_device_data(devs.devices[i]);
  1004 + tmp->needs_reset = false;
  1005 + }
  1006 + vfio_device_put(devs.devices[i]);
  1007 + }
  1008 +
  1009 + kfree(devs.devices);
1032 1010 }
1033 1011  
1034 1012 static void __exit vfio_pci_cleanup(void)