render / rpms / libvirt

Forked from rpms/libvirt 11 months ago
Clone
Pablo Greco 40546a
From af079d99de7c556c3b9bb10037dae90e0f23f38a Mon Sep 17 00:00:00 2001
Pablo Greco 40546a
Message-Id: <af079d99de7c556c3b9bb10037dae90e0f23f38a@dist-git>
Pablo Greco 40546a
From: Michal Privoznik <mprivozn@redhat.com>
Pablo Greco 40546a
Date: Tue, 18 Dec 2018 11:47:36 +0100
Pablo Greco 40546a
Subject: [PATCH] qemu: Don't use -mem-prealloc among with .prealloc=yes
Pablo Greco 40546a
MIME-Version: 1.0
Pablo Greco 40546a
Content-Type: text/plain; charset=UTF-8
Pablo Greco 40546a
Content-Transfer-Encoding: 8bit
Pablo Greco 40546a
Pablo Greco 40546a
https://bugzilla.redhat.com/show_bug.cgi?id=1624223
Pablo Greco 40546a
Pablo Greco 40546a
There are two ways to request memory preallocation on cmd line:
Pablo Greco 40546a
-mem-prealloc and .prealloc attribute for a memory-backend-file.
Pablo Greco 40546a
However, as it turns out it's not safe to use both at the same
Pablo Greco 40546a
time. If -mem-prealloc is used then qemu will fully allocate the
Pablo Greco 40546a
memory (this is done by actually touching every page that has
Pablo Greco 40546a
been allocated). Then, if .prealloc=yes is specified,
Pablo Greco 40546a
mbind(flags = MPOL_MF_STRICT | MPOL_MF_MOVE) is called which:
Pablo Greco 40546a
Pablo Greco 40546a
a) has to (possibly) move the memory to a different NUMA node,
Pablo Greco 40546a
b) can have no effect when hugepages are in play (thus ignoring user
Pablo Greco 40546a
request to place memory on desired NUMA nodes).
Pablo Greco 40546a
Pablo Greco 40546a
Prefer -mem-prealloc as it is more backward compatible
Pablo Greco 40546a
compared to switching to "-numa node,memdev=  + -object
Pablo Greco 40546a
memory-backend-file".
Pablo Greco 40546a
Pablo Greco 40546a
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Pablo Greco 40546a
Reviewed-by: John Ferlan <jferlan@redhat.com>
Pablo Greco 40546a
(cherry picked from commit c658764decf357ef2a064f09235fb6b8bd027f8b)
Pablo Greco 40546a
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Pablo Greco 40546a
Pablo Greco 40546a
Conflicts:
Pablo Greco 40546a
src/qemu/qemu_command.c:
Pablo Greco 40546a
src/qemu/qemu_domain.c:
Pablo Greco 40546a
src/qemu/qemu_domain.h: Context mostly, the upstream code
Pablo Greco 40546a
                        diverged.
Pablo Greco 40546a
Pablo Greco 40546a
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Pablo Greco 40546a
Message-Id: <37771aafbb9d1855721efde7ade34c7b98fb1fc7.1545129996.git.mprivozn@redhat.com>
Pablo Greco 40546a
Reviewed-by: Ján Tomko <jtomko@redhat.com>
Pablo Greco 40546a
---
Pablo Greco 40546a
 src/qemu/qemu_command.c                       | 26 ++++++++++++-------
Pablo Greco 40546a
 src/qemu/qemu_domain.c                        |  7 +++++
Pablo Greco 40546a
 src/qemu/qemu_domain.h                        |  3 +++
Pablo Greco 40546a
 .../hugepages-numa-default-dimm.args          |  2 +-
Pablo Greco 40546a
 4 files changed, 28 insertions(+), 10 deletions(-)
Pablo Greco 40546a
Pablo Greco 40546a
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
Pablo Greco 40546a
index fa2b904239..7ffc4358e3 100644
Pablo Greco 40546a
--- a/src/qemu/qemu_command.c
Pablo Greco 40546a
+++ b/src/qemu/qemu_command.c
Pablo Greco 40546a
@@ -3185,11 +3185,13 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
Pablo Greco 40546a
         if (useHugepage) {
Pablo Greco 40546a
             if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0)
Pablo Greco 40546a
                 goto cleanup;
Pablo Greco 40546a
-            prealloc = true;
Pablo Greco 40546a
+            if (!priv->memPrealloc)
Pablo Greco 40546a
+                prealloc = true;
Pablo Greco 40546a
         } else if (mem->nvdimmPath) {
Pablo Greco 40546a
             if (VIR_STRDUP(memPath, mem->nvdimmPath) < 0)
Pablo Greco 40546a
                 goto cleanup;
Pablo Greco 40546a
-            prealloc = true;
Pablo Greco 40546a
+            if (!priv->memPrealloc)
Pablo Greco 40546a
+                prealloc = true;
Pablo Greco 40546a
         } else {
Pablo Greco 40546a
             /* We can have both pagesize and mem source. If that's the case,
Pablo Greco 40546a
              * prefer hugepages as those are more specific. */
Pablo Greco 40546a
@@ -7603,7 +7605,8 @@ qemuBuildSmpCommandLine(virCommandPtr cmd,
Pablo Greco 40546a
 static int
Pablo Greco 40546a
 qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
Pablo Greco 40546a
                     const virDomainDef *def,
Pablo Greco 40546a
-                    virCommandPtr cmd)
Pablo Greco 40546a
+                    virCommandPtr cmd,
Pablo Greco 40546a
+                    qemuDomainObjPrivatePtr priv)
Pablo Greco 40546a
 {
Pablo Greco 40546a
     const long system_page_size = virGetSystemPageSizeKB();
Pablo Greco 40546a
     char *mem_path = NULL;
Pablo Greco 40546a
@@ -7624,8 +7627,10 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
Pablo Greco 40546a
     if (qemuGetDomainHupageMemPath(def, cfg, def->mem.hugepages[0].size, &mem_path) < 0)
Pablo Greco 40546a
         return -1;
Pablo Greco 40546a
 
Pablo Greco 40546a
-    if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE)
Pablo Greco 40546a
+    if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) {
Pablo Greco 40546a
         virCommandAddArgList(cmd, "-mem-prealloc", NULL);
Pablo Greco 40546a
+        priv->memPrealloc = true;
Pablo Greco 40546a
+    }
Pablo Greco 40546a
 
Pablo Greco 40546a
     virCommandAddArgList(cmd, "-mem-path", mem_path, NULL);
Pablo Greco 40546a
     VIR_FREE(mem_path);
Pablo Greco 40546a
@@ -7638,7 +7643,8 @@ static int
Pablo Greco 40546a
 qemuBuildMemCommandLine(virCommandPtr cmd,
Pablo Greco 40546a
                         virQEMUDriverConfigPtr cfg,
Pablo Greco 40546a
                         const virDomainDef *def,
Pablo Greco 40546a
-                        virQEMUCapsPtr qemuCaps)
Pablo Greco 40546a
+                        virQEMUCapsPtr qemuCaps,
Pablo Greco 40546a
+                        qemuDomainObjPrivatePtr priv)
Pablo Greco 40546a
 {
Pablo Greco 40546a
     if (qemuDomainDefValidateMemoryHotplug(def, qemuCaps, NULL) < 0)
Pablo Greco 40546a
         return -1;
Pablo Greco 40546a
@@ -7657,15 +7663,17 @@ qemuBuildMemCommandLine(virCommandPtr cmd,
Pablo Greco 40546a
                               virDomainDefGetMemoryInitial(def) / 1024);
Pablo Greco 40546a
     }
Pablo Greco 40546a
 
Pablo Greco 40546a
-    if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE)
Pablo Greco 40546a
+    if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) {
Pablo Greco 40546a
         virCommandAddArgList(cmd, "-mem-prealloc", NULL);
Pablo Greco 40546a
+        priv->memPrealloc = true;
Pablo Greco 40546a
+    }
Pablo Greco 40546a
 
Pablo Greco 40546a
     /*
Pablo Greco 40546a
      * Add '-mem-path' (and '-mem-prealloc') parameter here if
Pablo Greco 40546a
      * the hugepages and no numa node is specified.
Pablo Greco 40546a
      */
Pablo Greco 40546a
     if (!virDomainNumaGetNodeCount(def->numa) &&
Pablo Greco 40546a
-        qemuBuildMemPathStr(cfg, def, cmd) < 0)
Pablo Greco 40546a
+        qemuBuildMemPathStr(cfg, def, cmd, priv) < 0)
Pablo Greco 40546a
         return -1;
Pablo Greco 40546a
 
Pablo Greco 40546a
     if (def->mem.locked && !virQEMUCapsGet(qemuCaps, QEMU_CAPS_REALTIME_MLOCK)) {
Pablo Greco 40546a
@@ -7772,7 +7780,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
Pablo Greco 40546a
     }
Pablo Greco 40546a
 
Pablo Greco 40546a
     if (!needBackend &&
Pablo Greco 40546a
-        qemuBuildMemPathStr(cfg, def, cmd) < 0)
Pablo Greco 40546a
+        qemuBuildMemPathStr(cfg, def, cmd, priv) < 0)
Pablo Greco 40546a
         goto cleanup;
Pablo Greco 40546a
 
Pablo Greco 40546a
     for (i = 0; i < ncells; i++) {
Pablo Greco 40546a
@@ -10445,7 +10453,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
Pablo Greco 40546a
     if (!migrateURI && !snapshot && qemuDomainAlignMemorySizes(def) < 0)
Pablo Greco 40546a
         goto error;
Pablo Greco 40546a
 
Pablo Greco 40546a
-    if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps) < 0)
Pablo Greco 40546a
+    if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps, priv) < 0)
Pablo Greco 40546a
         goto error;
Pablo Greco 40546a
 
Pablo Greco 40546a
     if (qemuBuildSmpCommandLine(cmd, def) < 0)
Pablo Greco 40546a
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
Pablo Greco 40546a
index 8604385aa2..95b84af78a 100644
Pablo Greco 40546a
--- a/src/qemu/qemu_domain.c
Pablo Greco 40546a
+++ b/src/qemu/qemu_domain.c
Pablo Greco 40546a
@@ -1936,6 +1936,8 @@ qemuDomainObjPrivateDataClear(qemuDomainObjPrivatePtr priv)
Pablo Greco 40546a
     VIR_FREE(priv->libDir);
Pablo Greco 40546a
     VIR_FREE(priv->channelTargetDir);
Pablo Greco 40546a
 
Pablo Greco 40546a
+    priv->memPrealloc = false;
Pablo Greco 40546a
+
Pablo Greco 40546a
     /* remove automatic pinning data */
Pablo Greco 40546a
     virBitmapFree(priv->autoNodeset);
Pablo Greco 40546a
     priv->autoNodeset = NULL;
Pablo Greco 40546a
@@ -2439,6 +2441,9 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf,
Pablo Greco 40546a
 
Pablo Greco 40546a
     qemuDomainObjPrivateXMLFormatPR(buf, priv);
Pablo Greco 40546a
 
Pablo Greco 40546a
+    if (priv->memPrealloc)
Pablo Greco 40546a
+        virBufferAddLit(buf, "<memPrealloc/>\n");
Pablo Greco 40546a
+
Pablo Greco 40546a
     if (qemuDomainObjPrivateXMLFormatBlockjobs(buf, vm) < 0)
Pablo Greco 40546a
         return -1;
Pablo Greco 40546a
 
Pablo Greco 40546a
@@ -2934,6 +2939,8 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt,
Pablo Greco 40546a
     if (qemuDomainObjPrivateXMLParseBlockjobs(priv, ctxt) < 0)
Pablo Greco 40546a
         goto error;
Pablo Greco 40546a
 
Pablo Greco 40546a
+    priv->memPrealloc = virXPathBoolean("boolean(./memPrealloc)", ctxt) == 1;
Pablo Greco 40546a
+
Pablo Greco 40546a
     return 0;
Pablo Greco 40546a
 
Pablo Greco 40546a
  error:
Pablo Greco 40546a
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
Pablo Greco 40546a
index cc406e3ca0..8463a8b706 100644
Pablo Greco 40546a
--- a/src/qemu/qemu_domain.h
Pablo Greco 40546a
+++ b/src/qemu/qemu_domain.h
Pablo Greco 40546a
@@ -367,6 +367,9 @@ struct _qemuDomainObjPrivate {
Pablo Greco 40546a
     /* qemuProcessStartCPUs stores the reason for starting vCPUs here for the
Pablo Greco 40546a
      * RESUME event handler to use it */
Pablo Greco 40546a
     virDomainRunningReason runningReason;
Pablo Greco 40546a
+
Pablo Greco 40546a
+    /* true if global -mem-prealloc appears on cmd line */
Pablo Greco 40546a
+    bool memPrealloc;
Pablo Greco 40546a
 };
Pablo Greco 40546a
 
Pablo Greco 40546a
 # define QEMU_DOMAIN_PRIVATE(vm) \
Pablo Greco 40546a
diff --git a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args
Pablo Greco 40546a
index 855966a137..e7294a0882 100644
Pablo Greco 40546a
--- a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args
Pablo Greco 40546a
+++ b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args
Pablo Greco 40546a
@@ -13,7 +13,7 @@ QEMU_AUDIO_DRV=none \
Pablo Greco 40546a
 -mem-prealloc \
Pablo Greco 40546a
 -mem-path /dev/hugepages2M/libvirt/qemu/-1-fedora \
Pablo Greco 40546a
 -numa node,nodeid=0,cpus=0-1,mem=1024 \
Pablo Greco 40546a
--object memory-backend-file,id=memdimm0,prealloc=yes,\
Pablo Greco 40546a
+-object memory-backend-file,id=memdimm0,\
Pablo Greco 40546a
 mem-path=/dev/hugepages1G/libvirt/qemu/-1-fedora,size=1073741824,\
Pablo Greco 40546a
 host-nodes=1-3,policy=bind \
Pablo Greco 40546a
 -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \
Pablo Greco 40546a
-- 
Pablo Greco 40546a
2.22.0
Pablo Greco 40546a