yeahuh / rpms / qemu-kvm

Forked from rpms/qemu-kvm 2 years ago
Clone
Pablo Greco e6a3ae
From e69f257e657473ba59f48692d387e292a24892bb Mon Sep 17 00:00:00 2001
Pablo Greco e6a3ae
From: "plai@redhat.com" <plai@redhat.com>
Pablo Greco e6a3ae
Date: Tue, 20 Aug 2019 16:12:50 +0100
Pablo Greco e6a3ae
Subject: [PATCH 03/11] mmap-alloc: fix hugetlbfs misaligned length in ppc64
Pablo Greco e6a3ae
Pablo Greco e6a3ae
RH-Author: plai@redhat.com
Pablo Greco e6a3ae
Message-id: <1566317571-5697-4-git-send-email-plai@redhat.com>
Pablo Greco e6a3ae
Patchwork-id: 90082
Pablo Greco e6a3ae
O-Subject: [RHEL8.2 qemu-kvm PATCH 3/4] mmap-alloc: fix hugetlbfs misaligned length in ppc64
Pablo Greco e6a3ae
Bugzilla: 1539282
Pablo Greco e6a3ae
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
From: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc:
Pablo Greco e6a3ae
fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
However, we still need to consider the underlying huge page size
Pablo Greco e6a3ae
during munmap() because it requires that both address and length be a
Pablo Greco e6a3ae
multiple of the underlying huge page size for Huge TLB mappings.
Pablo Greco e6a3ae
Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES
Pablo Greco e6a3ae
section of the munmap(2) manual:
Pablo Greco e6a3ae
Pablo Greco e6a3ae
  "For munmap(), addr and length must both be a multiple of the
Pablo Greco e6a3ae
  underlying huge page size."
Pablo Greco e6a3ae
Pablo Greco e6a3ae
On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB
Pablo Greco e6a3ae
mappings because the mapped segment can be aligned with the underlying
Pablo Greco e6a3ae
huge page size, not aligned with the native system page size, as
Pablo Greco e6a3ae
returned by getpagesize().
Pablo Greco e6a3ae
Pablo Greco e6a3ae
This has the side effect of not releasing huge pages back to the pool
Pablo Greco e6a3ae
after a hugetlbfs file-backed memory device is hot-unplugged.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
This patch fixes the situation in qemu_ram_mmap() and
Pablo Greco e6a3ae
qemu_ram_munmap() by considering the underlying page size on ppc64.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
After this patch, memory hot-unplug releases huge pages back to the
Pablo Greco e6a3ae
pool.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218
Pablo Greco e6a3ae
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Pablo Greco e6a3ae
Reviewed-by: Greg Kurz <groug@kaod.org>
Pablo Greco e6a3ae
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Pablo Greco e6a3ae
(cherry picked from commit 53adb9d43e1abba187387a51f238e878e934c647)
Pablo Greco e6a3ae
Signed-off-by: Paul Lai <plai@redhat.com>
Pablo Greco e6a3ae
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
Pablo Greco e6a3ae
---
Pablo Greco e6a3ae
 exec.c                    |  4 ++--
Pablo Greco e6a3ae
 include/qemu/mmap-alloc.h |  2 +-
Pablo Greco e6a3ae
 util/mmap-alloc.c         | 22 ++++++++++++++++------
Pablo Greco e6a3ae
 util/oslib-posix.c        |  2 +-
Pablo Greco e6a3ae
 4 files changed, 20 insertions(+), 10 deletions(-)
Pablo Greco e6a3ae
Pablo Greco e6a3ae
diff --git a/exec.c b/exec.c
Pablo Greco e6a3ae
index a79eaa3..9112d8b 100644
Pablo Greco e6a3ae
--- a/exec.c
Pablo Greco e6a3ae
+++ b/exec.c
Pablo Greco e6a3ae
@@ -1679,7 +1679,7 @@ static void *file_ram_alloc(RAMBlock *block,
Pablo Greco e6a3ae
     if (mem_prealloc) {
Pablo Greco e6a3ae
         os_mem_prealloc(fd, area, memory, smp_cpus, errp);
Pablo Greco e6a3ae
         if (errp && *errp) {
Pablo Greco e6a3ae
-            qemu_ram_munmap(area, memory);
Pablo Greco e6a3ae
+            qemu_ram_munmap(fd, area, memory);
Pablo Greco e6a3ae
             return NULL;
Pablo Greco e6a3ae
         }
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
@@ -2200,7 +2200,7 @@ static void reclaim_ramblock(RAMBlock *block)
Pablo Greco e6a3ae
         xen_invalidate_map_cache_entry(block->host);
Pablo Greco e6a3ae
 #ifndef _WIN32
Pablo Greco e6a3ae
     } else if (block->fd >= 0) {
Pablo Greco e6a3ae
-        qemu_ram_munmap(block->host, block->max_length);
Pablo Greco e6a3ae
+        qemu_ram_munmap(block->fd, block->host, block->max_length);
Pablo Greco e6a3ae
         close(block->fd);
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
     } else {
Pablo Greco e6a3ae
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
Pablo Greco e6a3ae
index 190688a..eec98d8 100644
Pablo Greco e6a3ae
--- a/include/qemu/mmap-alloc.h
Pablo Greco e6a3ae
+++ b/include/qemu/mmap-alloc.h
Pablo Greco e6a3ae
@@ -28,6 +28,6 @@ void *qemu_ram_mmap(int fd,
Pablo Greco e6a3ae
                     bool shared,
Pablo Greco e6a3ae
                     bool is_pmem);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
-void qemu_ram_munmap(void *ptr, size_t size);
Pablo Greco e6a3ae
+void qemu_ram_munmap(int fd, void *ptr, size_t size);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
Pablo Greco e6a3ae
index b29fcee..bbd9077 100644
Pablo Greco e6a3ae
--- a/util/mmap-alloc.c
Pablo Greco e6a3ae
+++ b/util/mmap-alloc.c
Pablo Greco e6a3ae
@@ -82,6 +82,7 @@ void *qemu_ram_mmap(int fd,
Pablo Greco e6a3ae
     int flags;
Pablo Greco e6a3ae
     int guardfd;
Pablo Greco e6a3ae
     size_t offset;
Pablo Greco e6a3ae
+    size_t pagesize;
Pablo Greco e6a3ae
     size_t total;
Pablo Greco e6a3ae
     void *guardptr;
Pablo Greco e6a3ae
     void *ptr;
Pablo Greco e6a3ae
@@ -102,7 +103,8 @@ void *qemu_ram_mmap(int fd,
Pablo Greco e6a3ae
      * anonymous memory is OK.
Pablo Greco e6a3ae
      */
Pablo Greco e6a3ae
     flags = MAP_PRIVATE;
Pablo Greco e6a3ae
-    if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) {
Pablo Greco e6a3ae
+    pagesize = qemu_fd_getpagesize(fd);
Pablo Greco e6a3ae
+    if (fd == -1 || pagesize == getpagesize()) {
Pablo Greco e6a3ae
         guardfd = -1;
Pablo Greco e6a3ae
         flags |= MAP_ANONYMOUS;
Pablo Greco e6a3ae
     } else {
Pablo Greco e6a3ae
@@ -111,6 +113,7 @@ void *qemu_ram_mmap(int fd,
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 #else
Pablo Greco e6a3ae
     guardfd = -1;
Pablo Greco e6a3ae
+    pagesize = getpagesize();
Pablo Greco e6a3ae
     flags = MAP_PRIVATE | MAP_ANONYMOUS;
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
@@ -122,7 +125,7 @@ void *qemu_ram_mmap(int fd,
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     assert(is_power_of_2(align));
Pablo Greco e6a3ae
     /* Always align to host page size */
Pablo Greco e6a3ae
-    assert(align >= getpagesize());
Pablo Greco e6a3ae
+    assert(align >= pagesize);
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     flags = MAP_FIXED;
Pablo Greco e6a3ae
     flags |= fd == -1 ? MAP_ANONYMOUS : 0;
Pablo Greco e6a3ae
@@ -145,17 +148,24 @@ void *qemu_ram_mmap(int fd,
Pablo Greco e6a3ae
      * a guard page guarding against potential buffer overflows.
Pablo Greco e6a3ae
      */
Pablo Greco e6a3ae
     total -= offset;
Pablo Greco e6a3ae
-    if (total > size + getpagesize()) {
Pablo Greco e6a3ae
-        munmap(ptr + size + getpagesize(), total - size - getpagesize());
Pablo Greco e6a3ae
+    if (total > size + pagesize) {
Pablo Greco e6a3ae
+        munmap(ptr + size + pagesize, total - size - pagesize);
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     return ptr;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
-void qemu_ram_munmap(void *ptr, size_t size)
Pablo Greco e6a3ae
+void qemu_ram_munmap(int fd, void *ptr, size_t size)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
+    size_t pagesize;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     if (ptr) {
Pablo Greco e6a3ae
         /* Unmap both the RAM block and the guard page */
Pablo Greco e6a3ae
-        munmap(ptr, size + getpagesize());
Pablo Greco e6a3ae
+#if defined(__powerpc64__) && defined(__linux__)
Pablo Greco e6a3ae
+        pagesize = qemu_fd_getpagesize(fd);
Pablo Greco e6a3ae
+#else
Pablo Greco e6a3ae
+        pagesize = getpagesize();
Pablo Greco e6a3ae
+#endif
Pablo Greco e6a3ae
+        munmap(ptr, size + pagesize);
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
Pablo Greco e6a3ae
index c36b2bb..7b6db04 100644
Pablo Greco e6a3ae
--- a/util/oslib-posix.c
Pablo Greco e6a3ae
+++ b/util/oslib-posix.c
Pablo Greco e6a3ae
@@ -153,7 +153,7 @@ void qemu_vfree(void *ptr)
Pablo Greco e6a3ae
 void qemu_anon_ram_free(void *ptr, size_t size)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
     trace_qemu_anon_ram_free(ptr, size);
Pablo Greco e6a3ae
-    qemu_ram_munmap(ptr, size);
Pablo Greco e6a3ae
+    qemu_ram_munmap(-1, ptr, size);
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
 void qemu_set_block(int fd)
Pablo Greco e6a3ae
-- 
Pablo Greco e6a3ae
1.8.3.1
Pablo Greco e6a3ae