Blame SOURCES/0002-bus-pci-consider-only-usable-devices-for-IOVA-mode.patch

eb2664
From 25986da4cfa1b20ca6e9f4e39a34e12d72435963 Mon Sep 17 00:00:00 2001
eb2664
From: Ben Walker <benjamin.walker@intel.com>
eb2664
Date: Mon, 18 Nov 2019 15:23:18 +0000
eb2664
Subject: [PATCH 2/4] bus/pci: consider only usable devices for IOVA mode
eb2664
eb2664
When selecting the preferred IOVA mode of the pci bus, the current
eb2664
heuristic ("are devices bound?", "are devices bound to UIO?", "are pmd
eb2664
drivers supporting IOVA as VA?" etc..) should honor the device
eb2664
white/blacklist so that an unwanted device does not impact the decision.
eb2664
eb2664
There is no reason to consider a device which has no driver available.
eb2664
eb2664
This applies to all OS, so implements this in common code then call a
eb2664
OS specific callback.
eb2664
eb2664
On Linux side:
eb2664
- the VFIO special considerations should be evaluated only if VFIO
eb2664
  support is built,
eb2664
- there is no strong requirement on using VA rather than PA if a driver
eb2664
  supports VA, so defaulting to DC in such a case.
eb2664
eb2664
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
eb2664
Signed-off-by: David Marchand <david.marchand@redhat.com>
eb2664
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
eb2664
eb2664
(cherry picked from commit 703458e19c16135143b3f30089e1af66100c82dc)
eb2664
Signed-off-by: David Marchand <david.marchand@redhat.com>
eb2664
eb2664
Conflicts:
eb2664
        drivers/bus/pci/linux/pci.c
eb2664
        drivers/bus/pci/pci_common.c
eb2664
---
eb2664
 drivers/bus/pci/bsd/pci.c    |   9 +-
eb2664
 drivers/bus/pci/linux/pci.c  | 185 +++++++++--------------------------
eb2664
 drivers/bus/pci/pci_common.c |  65 ++++++++++++
eb2664
 drivers/bus/pci/private.h    |   8 ++
eb2664
 4 files changed, 126 insertions(+), 141 deletions(-)
eb2664
eb2664
diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
eb2664
index d09f8ee5a..0f23f12b9 100644
eb2664
--- a/drivers/bus/pci/bsd/pci.c
eb2664
+++ b/drivers/bus/pci/bsd/pci.c
eb2664
@@ -377,11 +377,12 @@ rte_pci_scan(void)
eb2664
 }
eb2664
 
eb2664
-/*
eb2664
- * Get iommu class of PCI devices on the bus.
eb2664
- */
eb2664
 enum rte_iova_mode
eb2664
-rte_pci_get_iommu_class(void)
eb2664
+pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused,
eb2664
+		     const struct rte_pci_device *pdev)
eb2664
 {
eb2664
 	/* Supports only RTE_KDRV_NIC_UIO */
eb2664
+	if (pdev->kdrv != RTE_KDRV_NIC_UIO)
eb2664
+		RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");
eb2664
+
eb2664
 	return RTE_IOVA_PA;
eb2664
 }
eb2664
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
eb2664
index 74794a3ba..7d73d9de5 100644
eb2664
--- a/drivers/bus/pci/linux/pci.c
eb2664
+++ b/drivers/bus/pci/linux/pci.c
eb2664
@@ -498,91 +498,11 @@ rte_pci_scan(void)
eb2664
 }
eb2664
 
eb2664
-/*
eb2664
- * Is pci device bound to any kdrv
eb2664
- */
eb2664
-static inline int
eb2664
-pci_one_device_is_bound(void)
eb2664
-{
eb2664
-	struct rte_pci_device *dev = NULL;
eb2664
-	int ret = 0;
eb2664
-
eb2664
-	FOREACH_DEVICE_ON_PCIBUS(dev) {
eb2664
-		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
eb2664
-		    dev->kdrv == RTE_KDRV_NONE) {
eb2664
-			continue;
eb2664
-		} else {
eb2664
-			ret = 1;
eb2664
-			break;
eb2664
-		}
eb2664
-	}
eb2664
-	return ret;
eb2664
-}
eb2664
-
eb2664
-/*
eb2664
- * Any one of the device bound to uio
eb2664
- */
eb2664
-static inline int
eb2664
-pci_one_device_bound_uio(void)
eb2664
-{
eb2664
-	struct rte_pci_device *dev = NULL;
eb2664
-	struct rte_devargs *devargs;
eb2664
-	int need_check;
eb2664
-
eb2664
-	FOREACH_DEVICE_ON_PCIBUS(dev) {
eb2664
-		devargs = dev->device.devargs;
eb2664
-
eb2664
-		need_check = 0;
eb2664
-		switch (rte_pci_bus.bus.conf.scan_mode) {
eb2664
-		case RTE_BUS_SCAN_WHITELIST:
eb2664
-			if (devargs && devargs->policy == RTE_DEV_WHITELISTED)
eb2664
-				need_check = 1;
eb2664
-			break;
eb2664
-		case RTE_BUS_SCAN_UNDEFINED:
eb2664
-		case RTE_BUS_SCAN_BLACKLIST:
eb2664
-			if (devargs == NULL ||
eb2664
-			    devargs->policy != RTE_DEV_BLACKLISTED)
eb2664
-				need_check = 1;
eb2664
-			break;
eb2664
-		}
eb2664
-
eb2664
-		if (!need_check)
eb2664
-			continue;
eb2664
-
eb2664
-		if (dev->kdrv == RTE_KDRV_IGB_UIO ||
eb2664
-		   dev->kdrv == RTE_KDRV_UIO_GENERIC) {
eb2664
-			return 1;
eb2664
-		}
eb2664
-	}
eb2664
-	return 0;
eb2664
-}
eb2664
-
eb2664
-/*
eb2664
- * Any one of the device has iova as va
eb2664
- */
eb2664
-static inline int
eb2664
-pci_one_device_has_iova_va(void)
eb2664
-{
eb2664
-	struct rte_pci_device *dev = NULL;
eb2664
-	struct rte_pci_driver *drv = NULL;
eb2664
-
eb2664
-	FOREACH_DRIVER_ON_PCIBUS(drv) {
eb2664
-		if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
eb2664
-			FOREACH_DEVICE_ON_PCIBUS(dev) {
eb2664
-				if (dev->kdrv == RTE_KDRV_VFIO &&
eb2664
-				    rte_pci_match(drv, dev))
eb2664
-					return 1;
eb2664
-			}
eb2664
-		}
eb2664
-	}
eb2664
-	return 0;
eb2664
-}
eb2664
-
eb2664
 #if defined(RTE_ARCH_X86)
eb2664
 static bool
eb2664
-pci_one_device_iommu_support_va(struct rte_pci_device *dev)
eb2664
+pci_one_device_iommu_support_va(const struct rte_pci_device *dev)
eb2664
 {
eb2664
 #define VTD_CAP_MGAW_SHIFT	16
eb2664
 #define VTD_CAP_MGAW_MASK	(0x3fULL << VTD_CAP_MGAW_SHIFT)
eb2664
-	struct rte_pci_addr *addr = &dev->addr;
eb2664
+	const struct rte_pci_addr *addr = &dev->addr;
eb2664
 	char filename[PATH_MAX];
eb2664
 	FILE *fp;
eb2664
@@ -628,5 +548,5 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev)
eb2664
 #elif defined(RTE_ARCH_PPC_64)
eb2664
 static bool
eb2664
-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
eb2664
+pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
eb2664
 {
eb2664
 	return false;
eb2664
@@ -634,5 +554,5 @@ pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
eb2664
 #else
eb2664
 static bool
eb2664
-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
eb2664
+pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
eb2664
 {
eb2664
 	return true;
eb2664
@@ -640,66 +560,57 @@ pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev)
eb2664
 #endif
eb2664
 
eb2664
-/*
eb2664
- * All devices IOMMUs support VA as IOVA
eb2664
- */
eb2664
-static bool
eb2664
-pci_devices_iommu_support_va(void)
eb2664
-{
eb2664
-	struct rte_pci_device *dev = NULL;
eb2664
-	struct rte_pci_driver *drv = NULL;
eb2664
-
eb2664
-	FOREACH_DRIVER_ON_PCIBUS(drv) {
eb2664
-		FOREACH_DEVICE_ON_PCIBUS(dev) {
eb2664
-			if (!rte_pci_match(drv, dev))
eb2664
-				continue;
eb2664
-			/*
eb2664
-			 * just one PCI device needs to be checked out because
eb2664
-			 * the IOMMU hardware is the same for all of them.
eb2664
-			 */
eb2664
-			return pci_one_device_iommu_support_va(dev);
eb2664
-		}
eb2664
-	}
eb2664
-	return true;
eb2664
-}
eb2664
-
eb2664
-/*
eb2664
- * Get iommu class of PCI devices on the bus.
eb2664
- */
eb2664
 enum rte_iova_mode
eb2664
-rte_pci_get_iommu_class(void)
eb2664
+pci_device_iova_mode(const struct rte_pci_driver *pdrv,
eb2664
+		     const struct rte_pci_device *pdev)
eb2664
 {
eb2664
-	bool is_bound;
eb2664
-	bool is_vfio_noiommu_enabled = true;
eb2664
-	bool has_iova_va;
eb2664
-	bool is_bound_uio;
eb2664
-	bool iommu_no_va;
eb2664
+	enum rte_iova_mode iova_mode = RTE_IOVA_DC;
eb2664
+	static int iommu_no_va = -1;
eb2664
 
eb2664
-	is_bound = pci_one_device_is_bound();
eb2664
-	if (!is_bound)
eb2664
-		return RTE_IOVA_DC;
eb2664
-
eb2664
-	has_iova_va = pci_one_device_has_iova_va();
eb2664
-	is_bound_uio = pci_one_device_bound_uio();
eb2664
-	iommu_no_va = !pci_devices_iommu_support_va();
eb2664
+	switch (pdev->kdrv) {
eb2664
+	case RTE_KDRV_VFIO: {
eb2664
 #ifdef VFIO_PRESENT
eb2664
-	is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ?
eb2664
-					true : false;
eb2664
+		static int is_vfio_noiommu_enabled = -1;
eb2664
+
eb2664
+		if (is_vfio_noiommu_enabled == -1) {
eb2664
+			if (rte_vfio_noiommu_is_enabled() == 1)
eb2664
+				is_vfio_noiommu_enabled = 1;
eb2664
+			else
eb2664
+				is_vfio_noiommu_enabled = 0;
eb2664
+		}
eb2664
+		if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) {
eb2664
+			iova_mode = RTE_IOVA_PA;
eb2664
+		} else if (is_vfio_noiommu_enabled != 0) {
eb2664
+			RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu mode configured\n");
eb2664
+			iova_mode = RTE_IOVA_PA;
eb2664
+		}
eb2664
 #endif
eb2664
+		break;
eb2664
+	}
eb2664
 
eb2664
-	if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled &&
eb2664
-			!iommu_no_va)
eb2664
-		return RTE_IOVA_VA;
eb2664
+	case RTE_KDRV_IGB_UIO:
eb2664
+	case RTE_KDRV_UIO_GENERIC:
eb2664
+		iova_mode = RTE_IOVA_PA;
eb2664
+		break;
eb2664
 
eb2664
-	if (has_iova_va) {
eb2664
-		RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. ");
eb2664
-		if (is_vfio_noiommu_enabled)
eb2664
-			RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
eb2664
-		if (is_bound_uio)
eb2664
-			RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
eb2664
-		if (iommu_no_va)
eb2664
-			RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as VA\n");
eb2664
+	default:
eb2664
+		RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n");
eb2664
+		iova_mode = RTE_IOVA_PA;
eb2664
+		break;
eb2664
 	}
eb2664
 
eb2664
-	return RTE_IOVA_PA;
eb2664
+	if (iova_mode != RTE_IOVA_PA) {
eb2664
+		/*
eb2664
+		 * We can check this only once, because the IOMMU hardware is
eb2664
+		 * the same for all of them.
eb2664
+		 */
eb2664
+		if (iommu_no_va == -1)
eb2664
+			iommu_no_va = pci_one_device_iommu_support_va(pdev)
eb2664
+					? 0 : 1;
eb2664
+		if (iommu_no_va != 0) {
eb2664
+			RTE_LOG(DEBUG, EAL, "Forcing to 'PA', IOMMU does not support IOVA as 'VA'\n");
eb2664
+			iova_mode = RTE_IOVA_PA;
eb2664
+		}
eb2664
+	}
eb2664
+	return iova_mode;
eb2664
 }
eb2664
 
eb2664
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
eb2664
index 6276e5d69..b8cc6d31a 100644
eb2664
--- a/drivers/bus/pci/pci_common.c
eb2664
+++ b/drivers/bus/pci/pci_common.c
eb2664
@@ -529,4 +529,69 @@ pci_unplug(struct rte_device *dev)
eb2664
 }
eb2664
 
eb2664
+static bool
eb2664
+pci_ignore_device(const struct rte_pci_device *dev)
eb2664
+{
eb2664
+	struct rte_devargs *devargs = dev->device.devargs;
eb2664
+
eb2664
+	switch (rte_pci_bus.bus.conf.scan_mode) {
eb2664
+	case RTE_BUS_SCAN_WHITELIST:
eb2664
+		if (devargs && devargs->policy == RTE_DEV_WHITELISTED)
eb2664
+			return false;
eb2664
+		break;
eb2664
+	case RTE_BUS_SCAN_UNDEFINED:
eb2664
+	case RTE_BUS_SCAN_BLACKLIST:
eb2664
+		if (devargs == NULL ||
eb2664
+		    devargs->policy != RTE_DEV_BLACKLISTED)
eb2664
+			return false;
eb2664
+		break;
eb2664
+	}
eb2664
+	return true;
eb2664
+}
eb2664
+
eb2664
+enum rte_iova_mode
eb2664
+rte_pci_get_iommu_class(void)
eb2664
+{
eb2664
+	enum rte_iova_mode iova_mode = RTE_IOVA_DC;
eb2664
+	const struct rte_pci_device *dev;
eb2664
+	const struct rte_pci_driver *drv;
eb2664
+	bool devices_want_va = false;
eb2664
+	bool devices_want_pa = false;
eb2664
+
eb2664
+	FOREACH_DEVICE_ON_PCIBUS(dev) {
eb2664
+		if (pci_ignore_device(dev))
eb2664
+			continue;
eb2664
+		if (dev->kdrv == RTE_KDRV_UNKNOWN ||
eb2664
+		    dev->kdrv == RTE_KDRV_NONE)
eb2664
+			continue;
eb2664
+		FOREACH_DRIVER_ON_PCIBUS(drv) {
eb2664
+			enum rte_iova_mode dev_iova_mode;
eb2664
+
eb2664
+			if (!rte_pci_match(drv, dev))
eb2664
+				continue;
eb2664
+
eb2664
+			dev_iova_mode = pci_device_iova_mode(drv, dev);
eb2664
+			RTE_LOG(DEBUG, EAL, "PCI driver %s for device "
eb2664
+				PCI_PRI_FMT " wants IOVA as '%s'\n",
eb2664
+				drv->driver.name,
eb2664
+				dev->addr.domain, dev->addr.bus,
eb2664
+				dev->addr.devid, dev->addr.function,
eb2664
+				dev_iova_mode == RTE_IOVA_DC ? "DC" :
eb2664
+				(dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA"));
eb2664
+			if (dev_iova_mode == RTE_IOVA_PA)
eb2664
+				devices_want_pa = true;
eb2664
+			else if (dev_iova_mode == RTE_IOVA_VA)
eb2664
+				devices_want_va = true;
eb2664
+		}
eb2664
+	}
eb2664
+	if (devices_want_pa) {
eb2664
+		iova_mode = RTE_IOVA_PA;
eb2664
+		if (devices_want_va)
eb2664
+			RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'PA' because other devices want it\n");
eb2664
+	} else if (devices_want_va) {
eb2664
+		iova_mode = RTE_IOVA_VA;
eb2664
+	}
eb2664
+	return iova_mode;
eb2664
+}
eb2664
+
eb2664
 struct rte_pci_bus rte_pci_bus = {
eb2664
 	.bus = {
eb2664
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
eb2664
index 13c3324bb..8a5524052 100644
eb2664
--- a/drivers/bus/pci/private.h
eb2664
+++ b/drivers/bus/pci/private.h
eb2664
@@ -173,4 +173,12 @@ rte_pci_match(const struct rte_pci_driver *pci_drv,
eb2664
 	      const struct rte_pci_device *pci_dev);
eb2664
 
eb2664
+/**
eb2664
+ * OS specific callback for rte_pci_get_iommu_class
eb2664
+ *
eb2664
+ */
eb2664
+enum rte_iova_mode
eb2664
+pci_device_iova_mode(const struct rte_pci_driver *pci_drv,
eb2664
+		     const struct rte_pci_device *pci_dev);
eb2664
+
eb2664
 /**
eb2664
  * Get iommu class of PCI devices on the bus.
eb2664
-- 
eb2664
2.21.0
eb2664