|
|
dd2e6b |
From 293c0c4b957f811dc7a099d4bdf8f8acf36f0174 Mon Sep 17 00:00:00 2001
|
|
|
dd2e6b |
From: Alejandro Lucero <alejandro.lucero@netronome.com>
|
|
|
dd2e6b |
Date: Tue, 10 Jul 2018 18:25:50 +0100
|
|
|
dd2e6b |
Subject: [3/5] mem: use address hint for mapping hugepages
|
|
|
dd2e6b |
|
|
|
dd2e6b |
Linux kernel uses a really high address as starting address for
|
|
|
dd2e6b |
serving mmaps calls. If there exists addressing limitations and
|
|
|
dd2e6b |
IOVA mode is VA, this starting address is likely too high for
|
|
|
dd2e6b |
those devices. However, it is possible to use a lower address in
|
|
|
dd2e6b |
the process virtual address space as with 64 bits there is a lot
|
|
|
dd2e6b |
of available space.
|
|
|
dd2e6b |
|
|
|
dd2e6b |
This patch adds an address hint as starting address for 64 bits
|
|
|
dd2e6b |
systems.
|
|
|
dd2e6b |
|
|
|
dd2e6b |
Applicable to v17.11.3 only.
|
|
|
dd2e6b |
|
|
|
dd2e6b |
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
|
|
|
dd2e6b |
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
|
|
|
dd2e6b |
Acked-by: Eelco Chaudron <echaudro@redhat.com>
|
|
|
dd2e6b |
---
|
|
|
dd2e6b |
lib/librte_eal/linuxapp/eal/eal_memory.c | 55 ++++++++++++++++++++++++++------
|
|
|
dd2e6b |
1 file changed, 46 insertions(+), 9 deletions(-)
|
|
|
dd2e6b |
|
|
|
dd2e6b |
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
|
|
|
dd2e6b |
index 0913895..bac969a 100644
|
|
|
dd2e6b |
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
|
|
|
dd2e6b |
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
|
|
|
dd2e6b |
@@ -88,6 +88,23 @@
|
|
|
dd2e6b |
|
|
|
dd2e6b |
static uint64_t baseaddr_offset;
|
|
|
dd2e6b |
|
|
|
dd2e6b |
+#ifdef RTE_ARCH_64
|
|
|
dd2e6b |
+/*
|
|
|
dd2e6b |
+ * Linux kernel uses a really high address as starting address for serving
|
|
|
dd2e6b |
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
|
|
|
dd2e6b |
+ * this starting address is likely too high for those devices. However, it
|
|
|
dd2e6b |
+ * is possible to use a lower address in the process virtual address space
|
|
|
dd2e6b |
+ * as with 64 bits there is a lot of available space.
|
|
|
dd2e6b |
+ *
|
|
|
dd2e6b |
+ * Current known limitations are 39 or 40 bits. Setting the starting address
|
|
|
dd2e6b |
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
|
|
|
dd2e6b |
+ * hugepages. This is likely enough for most systems, although a device with
|
|
|
dd2e6b |
+ * addressing limitations should call rte_dev_check_dma_mask for ensuring all
|
|
|
dd2e6b |
+ * memory is within supported range.
|
|
|
dd2e6b |
+ */
|
|
|
dd2e6b |
+static uint64_t baseaddr = 0x100000000;
|
|
|
dd2e6b |
+#endif
|
|
|
dd2e6b |
+
|
|
|
dd2e6b |
static bool phys_addrs_available = true;
|
|
|
dd2e6b |
|
|
|
dd2e6b |
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
|
|
|
dd2e6b |
@@ -250,6 +267,23 @@ aslr_enabled(void)
|
|
|
dd2e6b |
}
|
|
|
dd2e6b |
}
|
|
|
dd2e6b |
|
|
|
dd2e6b |
+static void *
|
|
|
dd2e6b |
+get_addr_hint(void)
|
|
|
dd2e6b |
+{
|
|
|
dd2e6b |
+ if (internal_config.base_virtaddr != 0) {
|
|
|
dd2e6b |
+ return (void *) (uintptr_t)
|
|
|
dd2e6b |
+ (internal_config.base_virtaddr +
|
|
|
dd2e6b |
+ baseaddr_offset);
|
|
|
dd2e6b |
+ } else {
|
|
|
dd2e6b |
+#ifdef RTE_ARCH_64
|
|
|
dd2e6b |
+ return (void *) (uintptr_t) (baseaddr +
|
|
|
dd2e6b |
+ baseaddr_offset);
|
|
|
dd2e6b |
+#else
|
|
|
dd2e6b |
+ return NULL;
|
|
|
dd2e6b |
+#endif
|
|
|
dd2e6b |
+ }
|
|
|
dd2e6b |
+}
|
|
|
dd2e6b |
+
|
|
|
dd2e6b |
/*
|
|
|
dd2e6b |
* Try to mmap *size bytes in /dev/zero. If it is successful, return the
|
|
|
dd2e6b |
* pointer to the mmap'd area and keep *size unmodified. Else, retry
|
|
|
dd2e6b |
@@ -260,16 +294,10 @@ aslr_enabled(void)
|
|
|
dd2e6b |
static void *
|
|
|
dd2e6b |
get_virtual_area(size_t *size, size_t hugepage_sz)
|
|
|
dd2e6b |
{
|
|
|
dd2e6b |
- void *addr;
|
|
|
dd2e6b |
+ void *addr, *addr_hint;
|
|
|
dd2e6b |
int fd;
|
|
|
dd2e6b |
long aligned_addr;
|
|
|
dd2e6b |
|
|
|
dd2e6b |
- if (internal_config.base_virtaddr != 0) {
|
|
|
dd2e6b |
- addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
|
|
|
dd2e6b |
- baseaddr_offset);
|
|
|
dd2e6b |
- }
|
|
|
dd2e6b |
- else addr = NULL;
|
|
|
dd2e6b |
-
|
|
|
dd2e6b |
RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
|
|
|
dd2e6b |
|
|
|
dd2e6b |
fd = open("/dev/zero", O_RDONLY);
|
|
|
dd2e6b |
@@ -278,7 +306,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
|
|
|
dd2e6b |
return NULL;
|
|
|
dd2e6b |
}
|
|
|
dd2e6b |
do {
|
|
|
dd2e6b |
- addr = mmap(addr,
|
|
|
dd2e6b |
+ addr_hint = get_addr_hint();
|
|
|
dd2e6b |
+
|
|
|
dd2e6b |
+ addr = mmap(addr_hint,
|
|
|
dd2e6b |
(*size) + hugepage_sz, PROT_READ,
|
|
|
dd2e6b |
#ifdef RTE_ARCH_PPC_64
|
|
|
dd2e6b |
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
|
|
|
dd2e6b |
@@ -286,8 +316,15 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
|
|
|
dd2e6b |
MAP_PRIVATE,
|
|
|
dd2e6b |
#endif
|
|
|
dd2e6b |
fd, 0);
|
|
|
dd2e6b |
- if (addr == MAP_FAILED)
|
|
|
dd2e6b |
+ if (addr == MAP_FAILED) {
|
|
|
dd2e6b |
+ /* map failed. Let's try with less memory */
|
|
|
dd2e6b |
*size -= hugepage_sz;
|
|
|
dd2e6b |
+ } else if (addr_hint && addr != addr_hint) {
|
|
|
dd2e6b |
+ /* hint was not used. Try with another offset */
|
|
|
dd2e6b |
+ munmap(addr, (*size) + hugepage_sz);
|
|
|
dd2e6b |
+ addr = MAP_FAILED;
|
|
|
dd2e6b |
+ baseaddr_offset += 0x100000000;
|
|
|
dd2e6b |
+ }
|
|
|
dd2e6b |
} while (addr == MAP_FAILED && *size > 0);
|
|
|
dd2e6b |
|
|
|
dd2e6b |
if (addr == MAP_FAILED) {
|
|
|
dd2e6b |
--
|
|
|
dd2e6b |
1.8.3.1
|
|
|
dd2e6b |
|