Blame SOURCES/0003-mem-use-address-hint-for-mapping-hugepages.patch

dd2e6b
From 293c0c4b957f811dc7a099d4bdf8f8acf36f0174 Mon Sep 17 00:00:00 2001
dd2e6b
From: Alejandro Lucero <alejandro.lucero@netronome.com>
dd2e6b
Date: Tue, 10 Jul 2018 18:25:50 +0100
dd2e6b
Subject: [3/5] mem: use address hint for mapping hugepages
dd2e6b
dd2e6b
Linux kernel uses a really high address as starting address for
dd2e6b
serving mmaps calls. If there exists addressing limitations and
dd2e6b
IOVA mode is VA, this starting address is likely too high for
dd2e6b
those devices. However, it is possible to use a lower address in
dd2e6b
the process virtual address space as with 64 bits there is a lot
dd2e6b
of available space.
dd2e6b
dd2e6b
This patch adds an address hint as starting address for 64 bits
dd2e6b
systems.
dd2e6b
dd2e6b
Applicable to v17.11.3 only.
dd2e6b
dd2e6b
Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
dd2e6b
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
dd2e6b
Acked-by: Eelco Chaudron <echaudro@redhat.com>
dd2e6b
---
dd2e6b
 lib/librte_eal/linuxapp/eal/eal_memory.c | 55 ++++++++++++++++++++++++++------
dd2e6b
 1 file changed, 46 insertions(+), 9 deletions(-)
dd2e6b
dd2e6b
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
dd2e6b
index 0913895..bac969a 100644
dd2e6b
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
dd2e6b
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
dd2e6b
@@ -88,6 +88,23 @@
dd2e6b
 
dd2e6b
 static uint64_t baseaddr_offset;
dd2e6b
 
dd2e6b
+#ifdef RTE_ARCH_64
dd2e6b
+/*
dd2e6b
+ * Linux kernel uses a really high address as starting address for serving
dd2e6b
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
dd2e6b
+ * this starting address is likely too high for those devices. However, it
dd2e6b
+ * is possible to use a lower address in the process virtual address space
dd2e6b
+ * as with 64 bits there is a lot of available space.
dd2e6b
+ *
dd2e6b
+ * Current known limitations are 39 or 40 bits. Setting the starting address
dd2e6b
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
dd2e6b
+ * hugepages. This is likely enough for most systems, although a device with
dd2e6b
+ * addressing limitations should call rte_dev_check_dma_mask for ensuring all
dd2e6b
+ * memory is within supported range.
dd2e6b
+ */
dd2e6b
+static uint64_t baseaddr = 0x100000000;
dd2e6b
+#endif
dd2e6b
+
dd2e6b
 static bool phys_addrs_available = true;
dd2e6b
 
dd2e6b
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
dd2e6b
@@ -250,6 +267,23 @@ aslr_enabled(void)
dd2e6b
 	}
dd2e6b
 }
dd2e6b
 
dd2e6b
+static void *
dd2e6b
+get_addr_hint(void)
dd2e6b
+{
dd2e6b
+	if (internal_config.base_virtaddr != 0) {
dd2e6b
+		return (void *) (uintptr_t)
dd2e6b
+			    (internal_config.base_virtaddr +
dd2e6b
+			     baseaddr_offset);
dd2e6b
+	} else {
dd2e6b
+#ifdef RTE_ARCH_64
dd2e6b
+		return (void *) (uintptr_t) (baseaddr +
dd2e6b
+				baseaddr_offset);
dd2e6b
+#else
dd2e6b
+		return NULL;
dd2e6b
+#endif
dd2e6b
+	}
dd2e6b
+}
dd2e6b
+
dd2e6b
 /*
dd2e6b
  * Try to mmap *size bytes in /dev/zero. If it is successful, return the
dd2e6b
  * pointer to the mmap'd area and keep *size unmodified. Else, retry
dd2e6b
@@ -260,16 +294,10 @@ aslr_enabled(void)
dd2e6b
 static void *
dd2e6b
 get_virtual_area(size_t *size, size_t hugepage_sz)
dd2e6b
 {
dd2e6b
-	void *addr;
dd2e6b
+	void *addr, *addr_hint;
dd2e6b
 	int fd;
dd2e6b
 	long aligned_addr;
dd2e6b
 
dd2e6b
-	if (internal_config.base_virtaddr != 0) {
dd2e6b
-		addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
dd2e6b
-				baseaddr_offset);
dd2e6b
-	}
dd2e6b
-	else addr = NULL;
dd2e6b
-
dd2e6b
 	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
dd2e6b
 
dd2e6b
 	fd = open("/dev/zero", O_RDONLY);
dd2e6b
@@ -278,7 +306,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
dd2e6b
 		return NULL;
dd2e6b
 	}
dd2e6b
 	do {
dd2e6b
-		addr = mmap(addr,
dd2e6b
+		addr_hint = get_addr_hint();
dd2e6b
+
dd2e6b
+		addr = mmap(addr_hint,
dd2e6b
 				(*size) + hugepage_sz, PROT_READ,
dd2e6b
 #ifdef RTE_ARCH_PPC_64
dd2e6b
 				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
dd2e6b
@@ -286,8 +316,15 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
dd2e6b
 				MAP_PRIVATE,
dd2e6b
 #endif
dd2e6b
 				fd, 0);
dd2e6b
-		if (addr == MAP_FAILED)
dd2e6b
+		if (addr == MAP_FAILED) {
dd2e6b
+			/* map failed. Let's try with less memory */
dd2e6b
 			*size -= hugepage_sz;
dd2e6b
+		} else if (addr_hint && addr != addr_hint) {
dd2e6b
+			/* hint was not used. Try with another offset */
dd2e6b
+			munmap(addr, (*size) + hugepage_sz);
dd2e6b
+			addr = MAP_FAILED;
dd2e6b
+			baseaddr_offset += 0x100000000;
dd2e6b
+		}
dd2e6b
 	} while (addr == MAP_FAILED && *size > 0);
dd2e6b
 
dd2e6b
 	if (addr == MAP_FAILED) {
dd2e6b
-- 
dd2e6b
1.8.3.1
dd2e6b