|
|
eb2664 |
From a6fc8e35d9e72b2acd605b6c6a8b08d2541c0609 Mon Sep 17 00:00:00 2001
|
|
|
eb2664 |
From: Ben Walker <benjamin.walker@intel.com>
|
|
|
eb2664 |
Date: Fri, 14 Jun 2019 11:39:16 +0200
|
|
|
eb2664 |
Subject: [PATCH 1/4] eal: compute IOVA mode based on PA availability
|
|
|
eb2664 |
|
|
|
eb2664 |
Currently, if the bus selects IOVA as PA, the memory init can fail when
|
|
|
eb2664 |
lacking access to physical addresses.
|
|
|
eb2664 |
This can be quite hard for normal users to understand what is wrong
|
|
|
eb2664 |
since this is the default behavior.
|
|
|
eb2664 |
|
|
|
eb2664 |
Catch this situation earlier in eal init by validating physical addresses
|
|
|
eb2664 |
availability, or select IOVA when no clear preferrence had been expressed.
|
|
|
eb2664 |
|
|
|
eb2664 |
The bus code is changed so that it reports when it does not care about
|
|
|
eb2664 |
the IOVA mode and let the eal init decide.
|
|
|
eb2664 |
|
|
|
eb2664 |
In Linux implementation, rework rte_eal_using_phys_addrs() so that it can
|
|
|
eb2664 |
be called earlier but still avoid a circular dependency with
|
|
|
eb2664 |
rte_mem_virt2phys().
|
|
|
eb2664 |
In FreeBSD implementation, rte_eal_using_phys_addrs() always returns
|
|
|
eb2664 |
false, so the detection part is left as is.
|
|
|
eb2664 |
|
|
|
eb2664 |
If librte_kni is compiled in and the KNI kmod is loaded,
|
|
|
eb2664 |
- if the buses requested VA, force to PA if physical addresses are
|
|
|
eb2664 |
available as it was done before,
|
|
|
eb2664 |
- else, keep iova as VA, KNI init will fail later.
|
|
|
eb2664 |
|
|
|
eb2664 |
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
|
|
|
eb2664 |
Signed-off-by: David Marchand <david.marchand@redhat.com>
|
|
|
eb2664 |
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
|
|
|
eb2664 |
|
|
|
eb2664 |
(cherry picked from commit c2361bab70c56f64e50f07946b1b20bf688d782a)
|
|
|
eb2664 |
Signed-off-by: David Marchand <david.marchand@redhat.com>
|
|
|
eb2664 |
---
|
|
|
eb2664 |
lib/librte_eal/bsdapp/eal/eal.c | 10 +++++--
|
|
|
eb2664 |
lib/librte_eal/common/eal_common_bus.c | 4 ---
|
|
|
eb2664 |
lib/librte_eal/common/include/rte_bus.h | 2 +-
|
|
|
eb2664 |
lib/librte_eal/linuxapp/eal/eal.c | 38 ++++++++++++++++++++------
|
|
|
eb2664 |
lib/librte_eal/linuxapp/eal/eal_memory.c | 46 +++++++++-----------------------
|
|
|
eb2664 |
5 files changed, 51 insertions(+), 49 deletions(-)
|
|
|
eb2664 |
|
|
|
eb2664 |
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
|
|
|
eb2664 |
index bfac7fd..14ae853 100644
|
|
|
eb2664 |
--- a/lib/librte_eal/bsdapp/eal/eal.c
|
|
|
eb2664 |
+++ b/lib/librte_eal/bsdapp/eal/eal.c
|
|
|
eb2664 |
@@ -689,13 +689,19 @@ rte_eal_init(int argc, char **argv)
|
|
|
eb2664 |
/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
|
|
|
eb2664 |
if (internal_config.iova_mode == RTE_IOVA_DC) {
|
|
|
eb2664 |
/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
|
|
|
eb2664 |
- rte_eal_get_configuration()->iova_mode =
|
|
|
eb2664 |
- rte_bus_get_iommu_class();
|
|
|
eb2664 |
+ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
|
|
|
eb2664 |
+
|
|
|
eb2664 |
+ if (iova_mode == RTE_IOVA_DC)
|
|
|
eb2664 |
+ iova_mode = RTE_IOVA_PA;
|
|
|
eb2664 |
+ rte_eal_get_configuration()->iova_mode = iova_mode;
|
|
|
eb2664 |
} else {
|
|
|
eb2664 |
rte_eal_get_configuration()->iova_mode =
|
|
|
eb2664 |
internal_config.iova_mode;
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
+ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
|
|
|
eb2664 |
+ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
|
|
|
eb2664 |
+
|
|
|
eb2664 |
if (internal_config.no_hugetlbfs == 0) {
|
|
|
eb2664 |
/* rte_config isn't initialized yet */
|
|
|
eb2664 |
ret = internal_config.process_type == RTE_PROC_PRIMARY ?
|
|
|
eb2664 |
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
|
|
|
eb2664 |
index c8f1901..77f1be1 100644
|
|
|
eb2664 |
--- a/lib/librte_eal/common/eal_common_bus.c
|
|
|
eb2664 |
+++ b/lib/librte_eal/common/eal_common_bus.c
|
|
|
eb2664 |
@@ -237,10 +237,6 @@ rte_bus_get_iommu_class(void)
|
|
|
eb2664 |
mode |= bus->get_iommu_class();
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
- if (mode != RTE_IOVA_VA) {
|
|
|
eb2664 |
- /* Use default IOVA mode */
|
|
|
eb2664 |
- mode = RTE_IOVA_PA;
|
|
|
eb2664 |
- }
|
|
|
eb2664 |
return mode;
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
|
|
|
eb2664 |
index 6be4b5c..b87e23b 100644
|
|
|
eb2664 |
--- a/lib/librte_eal/common/include/rte_bus.h
|
|
|
eb2664 |
+++ b/lib/librte_eal/common/include/rte_bus.h
|
|
|
eb2664 |
@@ -348,7 +348,7 @@ struct rte_bus *rte_bus_find_by_name(const char *busname);
|
|
|
eb2664 |
|
|
|
eb2664 |
/**
|
|
|
eb2664 |
* Get the common iommu class of devices bound on to buses available in the
|
|
|
eb2664 |
- * system. The default mode is PA.
|
|
|
eb2664 |
+ * system. RTE_IOVA_DC means that no preferrence has been expressed.
|
|
|
eb2664 |
*
|
|
|
eb2664 |
* @return
|
|
|
eb2664 |
* enum rte_iova_mode value.
|
|
|
eb2664 |
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
|
|
|
eb2664 |
index 7a08cf1..6899307 100644
|
|
|
eb2664 |
--- a/lib/librte_eal/linuxapp/eal/eal.c
|
|
|
eb2664 |
+++ b/lib/librte_eal/linuxapp/eal/eal.c
|
|
|
eb2664 |
@@ -943,6 +943,7 @@ rte_eal_init(int argc, char **argv)
|
|
|
eb2664 |
static char logid[PATH_MAX];
|
|
|
eb2664 |
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
|
|
|
eb2664 |
char thread_name[RTE_MAX_THREAD_NAME_LEN];
|
|
|
eb2664 |
+ bool phys_addrs;
|
|
|
eb2664 |
|
|
|
eb2664 |
/* checks if the machine is adequate */
|
|
|
eb2664 |
if (!rte_cpu_is_supported()) {
|
|
|
eb2664 |
@@ -1030,25 +1031,46 @@ rte_eal_init(int argc, char **argv)
|
|
|
eb2664 |
return -1;
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
+ phys_addrs = rte_eal_using_phys_addrs() != 0;
|
|
|
eb2664 |
+
|
|
|
eb2664 |
/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
|
|
|
eb2664 |
if (internal_config.iova_mode == RTE_IOVA_DC) {
|
|
|
eb2664 |
- /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
|
|
|
eb2664 |
- rte_eal_get_configuration()->iova_mode =
|
|
|
eb2664 |
- rte_bus_get_iommu_class();
|
|
|
eb2664 |
+ /* autodetect the IOVA mapping mode */
|
|
|
eb2664 |
+ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
|
|
|
eb2664 |
|
|
|
eb2664 |
+ if (iova_mode == RTE_IOVA_DC) {
|
|
|
eb2664 |
+ iova_mode = phys_addrs ? RTE_IOVA_PA : RTE_IOVA_VA;
|
|
|
eb2664 |
+ RTE_LOG(DEBUG, EAL,
|
|
|
eb2664 |
+ "Buses did not request a specific IOVA mode, using '%s' based on physical addresses availability.\n",
|
|
|
eb2664 |
+ phys_addrs ? "PA" : "VA");
|
|
|
eb2664 |
+ }
|
|
|
eb2664 |
+#ifdef RTE_LIBRTE_KNI
|
|
|
eb2664 |
/* Workaround for KNI which requires physical address to work */
|
|
|
eb2664 |
- if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
|
|
|
eb2664 |
+ if (iova_mode == RTE_IOVA_VA &&
|
|
|
eb2664 |
rte_eal_check_module("rte_kni") == 1) {
|
|
|
eb2664 |
- rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
|
|
|
eb2664 |
- RTE_LOG(WARNING, EAL,
|
|
|
eb2664 |
- "Some devices want IOVA as VA but PA will be used because.. "
|
|
|
eb2664 |
- "KNI module inserted\n");
|
|
|
eb2664 |
+ if (phys_addrs) {
|
|
|
eb2664 |
+ iova_mode = RTE_IOVA_PA;
|
|
|
eb2664 |
+ RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n");
|
|
|
eb2664 |
+ } else {
|
|
|
eb2664 |
+ RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n");
|
|
|
eb2664 |
+ }
|
|
|
eb2664 |
}
|
|
|
eb2664 |
+#endif
|
|
|
eb2664 |
+ rte_eal_get_configuration()->iova_mode = iova_mode;
|
|
|
eb2664 |
} else {
|
|
|
eb2664 |
rte_eal_get_configuration()->iova_mode =
|
|
|
eb2664 |
internal_config.iova_mode;
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
+ if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) {
|
|
|
eb2664 |
+ rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available");
|
|
|
eb2664 |
+ rte_errno = EINVAL;
|
|
|
eb2664 |
+ return -1;
|
|
|
eb2664 |
+ }
|
|
|
eb2664 |
+
|
|
|
eb2664 |
+ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
|
|
|
eb2664 |
+ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
|
|
|
eb2664 |
+
|
|
|
eb2664 |
if (internal_config.no_hugetlbfs == 0) {
|
|
|
eb2664 |
/* rte_config isn't initialized yet */
|
|
|
eb2664 |
ret = internal_config.process_type == RTE_PROC_PRIMARY ?
|
|
|
eb2664 |
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
|
|
|
eb2664 |
index 898bdb7..24d99c0 100644
|
|
|
eb2664 |
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
|
|
|
eb2664 |
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
|
|
|
eb2664 |
@@ -62,34 +62,10 @@
|
|
|
eb2664 |
* zone as well as a physical contiguous zone.
|
|
|
eb2664 |
*/
|
|
|
eb2664 |
|
|
|
eb2664 |
-static bool phys_addrs_available = true;
|
|
|
eb2664 |
+static int phys_addrs_available = -1;
|
|
|
eb2664 |
|
|
|
eb2664 |
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
|
|
|
eb2664 |
|
|
|
eb2664 |
-static void
|
|
|
eb2664 |
-test_phys_addrs_available(void)
|
|
|
eb2664 |
-{
|
|
|
eb2664 |
- uint64_t tmp = 0;
|
|
|
eb2664 |
- phys_addr_t physaddr;
|
|
|
eb2664 |
-
|
|
|
eb2664 |
- if (!rte_eal_has_hugepages()) {
|
|
|
eb2664 |
- RTE_LOG(ERR, EAL,
|
|
|
eb2664 |
- "Started without hugepages support, physical addresses not available\n");
|
|
|
eb2664 |
- phys_addrs_available = false;
|
|
|
eb2664 |
- return;
|
|
|
eb2664 |
- }
|
|
|
eb2664 |
-
|
|
|
eb2664 |
- physaddr = rte_mem_virt2phy(&tmp);
|
|
|
eb2664 |
- if (physaddr == RTE_BAD_PHYS_ADDR) {
|
|
|
eb2664 |
- if (rte_eal_iova_mode() == RTE_IOVA_PA)
|
|
|
eb2664 |
- RTE_LOG(ERR, EAL,
|
|
|
eb2664 |
- "Cannot obtain physical addresses: %s. "
|
|
|
eb2664 |
- "Only vfio will function.\n",
|
|
|
eb2664 |
- strerror(errno));
|
|
|
eb2664 |
- phys_addrs_available = false;
|
|
|
eb2664 |
- }
|
|
|
eb2664 |
-}
|
|
|
eb2664 |
-
|
|
|
eb2664 |
/*
|
|
|
eb2664 |
* Get physical address of any mapped virtual address in the current process.
|
|
|
eb2664 |
*/
|
|
|
eb2664 |
@@ -102,8 +78,7 @@ rte_mem_virt2phy(const void *virtaddr)
|
|
|
eb2664 |
int page_size;
|
|
|
eb2664 |
off_t offset;
|
|
|
eb2664 |
|
|
|
eb2664 |
- /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
|
|
|
eb2664 |
- if (!phys_addrs_available)
|
|
|
eb2664 |
+ if (phys_addrs_available == 0)
|
|
|
eb2664 |
return RTE_BAD_IOVA;
|
|
|
eb2664 |
|
|
|
eb2664 |
/* standard page size */
|
|
|
eb2664 |
@@ -1332,8 +1307,6 @@ eal_legacy_hugepage_init(void)
|
|
|
eb2664 |
int nr_hugefiles, nr_hugepages = 0;
|
|
|
eb2664 |
void *addr;
|
|
|
eb2664 |
|
|
|
eb2664 |
- test_phys_addrs_available();
|
|
|
eb2664 |
-
|
|
|
eb2664 |
memset(used_hp, 0, sizeof(used_hp));
|
|
|
eb2664 |
|
|
|
eb2664 |
/* get pointer to global configuration */
|
|
|
eb2664 |
@@ -1466,7 +1439,7 @@ eal_legacy_hugepage_init(void)
|
|
|
eb2664 |
continue;
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
- if (phys_addrs_available &&
|
|
|
eb2664 |
+ if (rte_eal_using_phys_addrs() &&
|
|
|
eb2664 |
rte_eal_iova_mode() != RTE_IOVA_VA) {
|
|
|
eb2664 |
/* find physical addresses for each hugepage */
|
|
|
eb2664 |
if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
|
|
|
eb2664 |
@@ -1685,8 +1658,6 @@ eal_hugepage_init(void)
|
|
|
eb2664 |
uint64_t memory[RTE_MAX_NUMA_NODES];
|
|
|
eb2664 |
int hp_sz_idx, socket_id;
|
|
|
eb2664 |
|
|
|
eb2664 |
- test_phys_addrs_available();
|
|
|
eb2664 |
-
|
|
|
eb2664 |
memset(used_hp, 0, sizeof(used_hp));
|
|
|
eb2664 |
|
|
|
eb2664 |
for (hp_sz_idx = 0;
|
|
|
eb2664 |
@@ -1812,8 +1783,6 @@ eal_legacy_hugepage_attach(void)
|
|
|
eb2664 |
"into secondary processes\n");
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
- test_phys_addrs_available();
|
|
|
eb2664 |
-
|
|
|
eb2664 |
fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY);
|
|
|
eb2664 |
if (fd_hugepage < 0) {
|
|
|
eb2664 |
RTE_LOG(ERR, EAL, "Could not open %s\n",
|
|
|
eb2664 |
@@ -1953,6 +1922,15 @@ rte_eal_hugepage_attach(void)
|
|
|
eb2664 |
int
|
|
|
eb2664 |
rte_eal_using_phys_addrs(void)
|
|
|
eb2664 |
{
|
|
|
eb2664 |
+ if (phys_addrs_available == -1) {
|
|
|
eb2664 |
+ uint64_t tmp = 0;
|
|
|
eb2664 |
+
|
|
|
eb2664 |
+ if (rte_eal_has_hugepages() != 0 &&
|
|
|
eb2664 |
+ rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR)
|
|
|
eb2664 |
+ phys_addrs_available = 1;
|
|
|
eb2664 |
+ else
|
|
|
eb2664 |
+ phys_addrs_available = 0;
|
|
|
eb2664 |
+ }
|
|
|
eb2664 |
return phys_addrs_available;
|
|
|
eb2664 |
}
|
|
|
eb2664 |
|
|
|
eb2664 |
--
|
|
|
eb2664 |
1.8.3.1
|
|
|
eb2664 |
|