Pablo Greco e6a3ae
From 1cfbcbeebc6d9ca1f1f7656fff572bf6ac50de76 Mon Sep 17 00:00:00 2001
Pablo Greco e6a3ae
From: "plai@redhat.com" <plai@redhat.com>
Pablo Greco e6a3ae
Date: Tue, 26 Nov 2019 19:36:52 +0000
Pablo Greco e6a3ae
Subject: [PATCH 08/11] kvm: support -overcommit cpu-pm=on|off
Pablo Greco e6a3ae
Pablo Greco e6a3ae
RH-Author: plai@redhat.com
Pablo Greco e6a3ae
Message-id: <1574797015-32564-5-git-send-email-plai@redhat.com>
Pablo Greco e6a3ae
Patchwork-id: 92697
Pablo Greco e6a3ae
O-Subject: [RHEL8.2 qemu-kvm PATCH 4/7] kvm: support -overcommit cpu-pm=on|off
Pablo Greco e6a3ae
Bugzilla: 1634827
Pablo Greco e6a3ae
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
From: "Michael S. Tsirkin" <mst@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
With this flag, kvm allows guest to control host CPU power state.  This
Pablo Greco e6a3ae
increases latency for other processes using same host CPU in an
Pablo Greco e6a3ae
unpredictable way, but if decreases idle entry/exit times for the
Pablo Greco e6a3ae
running VCPU, so to use it QEMU needs a hint about whether host CPU is
Pablo Greco e6a3ae
overcommitted, hence the flag name.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Follow-up patches will expose this capability to guest
Pablo Greco e6a3ae
(using mwait leaf).
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Based on a patch by Wanpeng Li <kernellwp@gmail.com> .
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Pablo Greco e6a3ae
Message-Id: <20180622192148.178309-2-mst@redhat.com>
Pablo Greco e6a3ae
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Pablo Greco e6a3ae
(cherry picked from commit 6f131f13e68d648a8e4f083c667ab1acd88ce4cd)
Pablo Greco e6a3ae
Signed-off-by: Paul Lai <plai@redhat.com>
Pablo Greco e6a3ae
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
Pablo Greco e6a3ae
---
Pablo Greco e6a3ae
 include/sysemu/sysemu.h |  1 +
Pablo Greco e6a3ae
 qemu-options.hx         | 24 ++++++++++++++++++++++++
Pablo Greco e6a3ae
 target/i386/kvm.c       | 23 +++++++++++++++++++++++
Pablo Greco e6a3ae
 vl.c                    | 32 +++++++++++++++++++++++++++++++-
Pablo Greco e6a3ae
 4 files changed, 79 insertions(+), 1 deletion(-)
Pablo Greco e6a3ae
Pablo Greco e6a3ae
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
Pablo Greco e6a3ae
index f20e4f5..f38fad0 100644
Pablo Greco e6a3ae
--- a/include/sysemu/sysemu.h
Pablo Greco e6a3ae
+++ b/include/sysemu/sysemu.h
Pablo Greco e6a3ae
@@ -131,6 +131,7 @@ extern bool boot_strict;
Pablo Greco e6a3ae
 extern uint8_t *boot_splash_filedata;
Pablo Greco e6a3ae
 extern size_t boot_splash_filedata_size;
Pablo Greco e6a3ae
 extern bool enable_mlock;
Pablo Greco e6a3ae
+extern bool enable_cpu_pm;
Pablo Greco e6a3ae
 extern uint8_t qemu_extra_params_fw[2];
Pablo Greco e6a3ae
 extern QEMUClockType rtc_clock;
Pablo Greco e6a3ae
 extern const char *mem_path;
Pablo Greco e6a3ae
diff --git a/qemu-options.hx b/qemu-options.hx
Pablo Greco e6a3ae
index 1243057..99933a0 100644
Pablo Greco e6a3ae
--- a/qemu-options.hx
Pablo Greco e6a3ae
+++ b/qemu-options.hx
Pablo Greco e6a3ae
@@ -3331,6 +3331,30 @@ mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on}
Pablo Greco e6a3ae
 (enabled by default).
Pablo Greco e6a3ae
 ETEXI
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
Pablo Greco e6a3ae
+    "--overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
Pablo Greco e6a3ae
+    "                run qemu with overcommit hints\n"
Pablo Greco e6a3ae
+    "                mem-lock=on|off controls memory lock support (default: off)\n"
Pablo Greco e6a3ae
+    "                cpu-pm=on|off controls cpu power management (default: off)\n",
Pablo Greco e6a3ae
+    QEMU_ARCH_ALL)
Pablo Greco e6a3ae
+STEXI
Pablo Greco e6a3ae
+@item -overcommit mem-lock=on|off
Pablo Greco e6a3ae
+@item -overcommit cpu-pm=on|off
Pablo Greco e6a3ae
+@findex -overcommit
Pablo Greco e6a3ae
+Run qemu with hints about host resource overcommit. The default is
Pablo Greco e6a3ae
+to assume that host overcommits all resources.
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+Locking qemu and guest memory can be enabled via @option{mem-lock=on} (disabled
Pablo Greco e6a3ae
+by default).  This works when host memory is not overcommitted and reduces the
Pablo Greco e6a3ae
+worst-case latency for guest.  This is equivalent to @option{realtime}.
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+Guest ability to manage power state of host cpus (increasing latency for other
Pablo Greco e6a3ae
+processes on the same host cpu, but decreasing latency for guest) can be
Pablo Greco e6a3ae
+enabled via @option{cpu-pm=on} (disabled by default).  This works best when
Pablo Greco e6a3ae
+host CPU is not overcommitted. When used, host estimates of CPU cycle and power
Pablo Greco e6a3ae
+utilization will be incorrect, not taking into account guest idle time.
Pablo Greco e6a3ae
+ETEXI
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \
Pablo Greco e6a3ae
     "-gdb dev        wait for gdb connection on 'dev'\n", QEMU_ARCH_ALL)
Pablo Greco e6a3ae
 STEXI
Pablo Greco e6a3ae
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
Pablo Greco e6a3ae
index 107c53b..879c3e0 100644
Pablo Greco e6a3ae
--- a/target/i386/kvm.c
Pablo Greco e6a3ae
+++ b/target/i386/kvm.c
Pablo Greco e6a3ae
@@ -1606,6 +1606,29 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
Pablo Greco e6a3ae
         smram_machine_done.notify = register_smram_listener;
Pablo Greco e6a3ae
         qemu_add_machine_init_done_notifier(&smram_machine_done);
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (enable_cpu_pm) {
Pablo Greco e6a3ae
+        int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS);
Pablo Greco e6a3ae
+        int ret;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+/* Work around for kernel header with a typo. TODO: fix header and drop. */
Pablo Greco e6a3ae
+#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT)
Pablo Greco e6a3ae
+#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL
Pablo Greco e6a3ae
+#endif
Pablo Greco e6a3ae
+        if (disable_exits) {
Pablo Greco e6a3ae
+            disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT |
Pablo Greco e6a3ae
+                              KVM_X86_DISABLE_EXITS_HLT |
Pablo Greco e6a3ae
+                              KVM_X86_DISABLE_EXITS_PAUSE);
Pablo Greco e6a3ae
+        }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+        ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0,
Pablo Greco e6a3ae
+                                disable_exits);
Pablo Greco e6a3ae
+        if (ret < 0) {
Pablo Greco e6a3ae
+            error_report("kvm: guest stopping CPU not supported: %s",
Pablo Greco e6a3ae
+                         strerror(-ret));
Pablo Greco e6a3ae
+        }
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
     return 0;
Pablo Greco e6a3ae
 }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
diff --git a/vl.c b/vl.c
Pablo Greco e6a3ae
index 932c1cf..aa08ab5 100644
Pablo Greco e6a3ae
--- a/vl.c
Pablo Greco e6a3ae
+++ b/vl.c
Pablo Greco e6a3ae
@@ -150,6 +150,7 @@ ram_addr_t ram_size;
Pablo Greco e6a3ae
 const char *mem_path = NULL;
Pablo Greco e6a3ae
 int mem_prealloc = 0; /* force preallocation of physical target memory */
Pablo Greco e6a3ae
 bool enable_mlock = false;
Pablo Greco e6a3ae
+bool enable_cpu_pm = false;
Pablo Greco e6a3ae
 int nb_nics;
Pablo Greco e6a3ae
 NICInfo nd_table[MAX_NICS];
Pablo Greco e6a3ae
 int autostart;
Pablo Greco e6a3ae
@@ -428,6 +429,22 @@ static QemuOptsList qemu_realtime_opts = {
Pablo Greco e6a3ae
     },
Pablo Greco e6a3ae
 };
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+static QemuOptsList qemu_overcommit_opts = {
Pablo Greco e6a3ae
+    .name = "overcommit",
Pablo Greco e6a3ae
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_overcommit_opts.head),
Pablo Greco e6a3ae
+    .desc = {
Pablo Greco e6a3ae
+        {
Pablo Greco e6a3ae
+            .name = "mem-lock",
Pablo Greco e6a3ae
+            .type = QEMU_OPT_BOOL,
Pablo Greco e6a3ae
+        },
Pablo Greco e6a3ae
+        {
Pablo Greco e6a3ae
+            .name = "cpu-pm",
Pablo Greco e6a3ae
+            .type = QEMU_OPT_BOOL,
Pablo Greco e6a3ae
+        },
Pablo Greco e6a3ae
+        { /* end of list */ }
Pablo Greco e6a3ae
+    },
Pablo Greco e6a3ae
+};
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 static QemuOptsList qemu_msg_opts = {
Pablo Greco e6a3ae
     .name = "msg",
Pablo Greco e6a3ae
     .head = QTAILQ_HEAD_INITIALIZER(qemu_msg_opts.head),
Pablo Greco e6a3ae
@@ -4089,7 +4106,20 @@ int main(int argc, char **argv, char **envp)
Pablo Greco e6a3ae
                 if (!opts) {
Pablo Greco e6a3ae
                     exit(1);
Pablo Greco e6a3ae
                 }
Pablo Greco e6a3ae
-                enable_mlock = qemu_opt_get_bool(opts, "mlock", true);
Pablo Greco e6a3ae
+                /* Don't override the -overcommit option if set */
Pablo Greco e6a3ae
+                enable_mlock = enable_mlock ||
Pablo Greco e6a3ae
+                    qemu_opt_get_bool(opts, "mlock", true);
Pablo Greco e6a3ae
+                break;
Pablo Greco e6a3ae
+            case QEMU_OPTION_overcommit:
Pablo Greco e6a3ae
+                opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
Pablo Greco e6a3ae
+                                               optarg, false);
Pablo Greco e6a3ae
+                if (!opts) {
Pablo Greco e6a3ae
+                    exit(1);
Pablo Greco e6a3ae
+                }
Pablo Greco e6a3ae
+                /* Don't override the -realtime option if set */
Pablo Greco e6a3ae
+                enable_mlock = enable_mlock ||
Pablo Greco e6a3ae
+                    qemu_opt_get_bool(opts, "mem-lock", false);
Pablo Greco e6a3ae
+                enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false);
Pablo Greco e6a3ae
                 break;
Pablo Greco e6a3ae
             case QEMU_OPTION_msg:
Pablo Greco e6a3ae
                 opts = qemu_opts_parse_noisily(qemu_find_opts("msg"), optarg,
Pablo Greco e6a3ae
-- 
Pablo Greco e6a3ae
1.8.3.1
Pablo Greco e6a3ae