|
|
016a62 |
From 1cfbcbeebc6d9ca1f1f7656fff572bf6ac50de76 Mon Sep 17 00:00:00 2001
|
|
|
016a62 |
From: "plai@redhat.com" <plai@redhat.com>
|
|
|
016a62 |
Date: Tue, 26 Nov 2019 19:36:52 +0000
|
|
|
016a62 |
Subject: [PATCH 08/11] kvm: support -overcommit cpu-pm=on|off
|
|
|
016a62 |
|
|
|
016a62 |
RH-Author: plai@redhat.com
|
|
|
016a62 |
Message-id: <1574797015-32564-5-git-send-email-plai@redhat.com>
|
|
|
016a62 |
Patchwork-id: 92697
|
|
|
016a62 |
O-Subject: [RHEL8.2 qemu-kvm PATCH 4/7] kvm: support -overcommit cpu-pm=on|off
|
|
|
016a62 |
Bugzilla: 1634827
|
|
|
016a62 |
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
|
|
|
016a62 |
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
016a62 |
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
|
|
016a62 |
|
|
|
016a62 |
From: "Michael S. Tsirkin" <mst@redhat.com>
|
|
|
016a62 |
|
|
|
016a62 |
With this flag, kvm allows guest to control host CPU power state. This
|
|
|
016a62 |
increases latency for other processes using same host CPU in an
|
|
|
016a62 |
unpredictable way, but if decreases idle entry/exit times for the
|
|
|
016a62 |
running VCPU, so to use it QEMU needs a hint about whether host CPU is
|
|
|
016a62 |
overcommitted, hence the flag name.
|
|
|
016a62 |
|
|
|
016a62 |
Follow-up patches will expose this capability to guest
|
|
|
016a62 |
(using mwait leaf).
|
|
|
016a62 |
|
|
|
016a62 |
Based on a patch by Wanpeng Li <kernellwp@gmail.com> .
|
|
|
016a62 |
|
|
|
016a62 |
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
016a62 |
Message-Id: <20180622192148.178309-2-mst@redhat.com>
|
|
|
016a62 |
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
016a62 |
(cherry picked from commit 6f131f13e68d648a8e4f083c667ab1acd88ce4cd)
|
|
|
016a62 |
Signed-off-by: Paul Lai <plai@redhat.com>
|
|
|
016a62 |
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
|
016a62 |
---
|
|
|
016a62 |
include/sysemu/sysemu.h | 1 +
|
|
|
016a62 |
qemu-options.hx | 24 ++++++++++++++++++++++++
|
|
|
016a62 |
target/i386/kvm.c | 23 +++++++++++++++++++++++
|
|
|
016a62 |
vl.c | 32 +++++++++++++++++++++++++++++++-
|
|
|
016a62 |
4 files changed, 79 insertions(+), 1 deletion(-)
|
|
|
016a62 |
|
|
|
016a62 |
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
|
|
|
016a62 |
index f20e4f5..f38fad0 100644
|
|
|
016a62 |
--- a/include/sysemu/sysemu.h
|
|
|
016a62 |
+++ b/include/sysemu/sysemu.h
|
|
|
016a62 |
@@ -131,6 +131,7 @@ extern bool boot_strict;
|
|
|
016a62 |
extern uint8_t *boot_splash_filedata;
|
|
|
016a62 |
extern size_t boot_splash_filedata_size;
|
|
|
016a62 |
extern bool enable_mlock;
|
|
|
016a62 |
+extern bool enable_cpu_pm;
|
|
|
016a62 |
extern uint8_t qemu_extra_params_fw[2];
|
|
|
016a62 |
extern QEMUClockType rtc_clock;
|
|
|
016a62 |
extern const char *mem_path;
|
|
|
016a62 |
diff --git a/qemu-options.hx b/qemu-options.hx
|
|
|
016a62 |
index 1243057..99933a0 100644
|
|
|
016a62 |
--- a/qemu-options.hx
|
|
|
016a62 |
+++ b/qemu-options.hx
|
|
|
016a62 |
@@ -3331,6 +3331,30 @@ mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on}
|
|
|
016a62 |
(enabled by default).
|
|
|
016a62 |
ETEXI
|
|
|
016a62 |
|
|
|
016a62 |
+DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
|
|
|
016a62 |
+ "--overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
|
|
|
016a62 |
+ " run qemu with overcommit hints\n"
|
|
|
016a62 |
+ " mem-lock=on|off controls memory lock support (default: off)\n"
|
|
|
016a62 |
+ " cpu-pm=on|off controls cpu power management (default: off)\n",
|
|
|
016a62 |
+ QEMU_ARCH_ALL)
|
|
|
016a62 |
+STEXI
|
|
|
016a62 |
+@item -overcommit mem-lock=on|off
|
|
|
016a62 |
+@item -overcommit cpu-pm=on|off
|
|
|
016a62 |
+@findex -overcommit
|
|
|
016a62 |
+Run qemu with hints about host resource overcommit. The default is
|
|
|
016a62 |
+to assume that host overcommits all resources.
|
|
|
016a62 |
+
|
|
|
016a62 |
+Locking qemu and guest memory can be enabled via @option{mem-lock=on} (disabled
|
|
|
016a62 |
+by default). This works when host memory is not overcommitted and reduces the
|
|
|
016a62 |
+worst-case latency for guest. This is equivalent to @option{realtime}.
|
|
|
016a62 |
+
|
|
|
016a62 |
+Guest ability to manage power state of host cpus (increasing latency for other
|
|
|
016a62 |
+processes on the same host cpu, but decreasing latency for guest) can be
|
|
|
016a62 |
+enabled via @option{cpu-pm=on} (disabled by default). This works best when
|
|
|
016a62 |
+host CPU is not overcommitted. When used, host estimates of CPU cycle and power
|
|
|
016a62 |
+utilization will be incorrect, not taking into account guest idle time.
|
|
|
016a62 |
+ETEXI
|
|
|
016a62 |
+
|
|
|
016a62 |
DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \
|
|
|
016a62 |
"-gdb dev wait for gdb connection on 'dev'\n", QEMU_ARCH_ALL)
|
|
|
016a62 |
STEXI
|
|
|
016a62 |
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
|
|
|
016a62 |
index 107c53b..879c3e0 100644
|
|
|
016a62 |
--- a/target/i386/kvm.c
|
|
|
016a62 |
+++ b/target/i386/kvm.c
|
|
|
016a62 |
@@ -1606,6 +1606,29 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
|
|
|
016a62 |
smram_machine_done.notify = register_smram_listener;
|
|
|
016a62 |
qemu_add_machine_init_done_notifier(&smram_machine_done);
|
|
|
016a62 |
}
|
|
|
016a62 |
+
|
|
|
016a62 |
+ if (enable_cpu_pm) {
|
|
|
016a62 |
+ int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS);
|
|
|
016a62 |
+ int ret;
|
|
|
016a62 |
+
|
|
|
016a62 |
+/* Work around for kernel header with a typo. TODO: fix header and drop. */
|
|
|
016a62 |
+#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT)
|
|
|
016a62 |
+#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL
|
|
|
016a62 |
+#endif
|
|
|
016a62 |
+ if (disable_exits) {
|
|
|
016a62 |
+ disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT |
|
|
|
016a62 |
+ KVM_X86_DISABLE_EXITS_HLT |
|
|
|
016a62 |
+ KVM_X86_DISABLE_EXITS_PAUSE);
|
|
|
016a62 |
+ }
|
|
|
016a62 |
+
|
|
|
016a62 |
+ ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0,
|
|
|
016a62 |
+ disable_exits);
|
|
|
016a62 |
+ if (ret < 0) {
|
|
|
016a62 |
+ error_report("kvm: guest stopping CPU not supported: %s",
|
|
|
016a62 |
+ strerror(-ret));
|
|
|
016a62 |
+ }
|
|
|
016a62 |
+ }
|
|
|
016a62 |
+
|
|
|
016a62 |
return 0;
|
|
|
016a62 |
}
|
|
|
016a62 |
|
|
|
016a62 |
diff --git a/vl.c b/vl.c
|
|
|
016a62 |
index 932c1cf..aa08ab5 100644
|
|
|
016a62 |
--- a/vl.c
|
|
|
016a62 |
+++ b/vl.c
|
|
|
016a62 |
@@ -150,6 +150,7 @@ ram_addr_t ram_size;
|
|
|
016a62 |
const char *mem_path = NULL;
|
|
|
016a62 |
int mem_prealloc = 0; /* force preallocation of physical target memory */
|
|
|
016a62 |
bool enable_mlock = false;
|
|
|
016a62 |
+bool enable_cpu_pm = false;
|
|
|
016a62 |
int nb_nics;
|
|
|
016a62 |
NICInfo nd_table[MAX_NICS];
|
|
|
016a62 |
int autostart;
|
|
|
016a62 |
@@ -428,6 +429,22 @@ static QemuOptsList qemu_realtime_opts = {
|
|
|
016a62 |
},
|
|
|
016a62 |
};
|
|
|
016a62 |
|
|
|
016a62 |
+static QemuOptsList qemu_overcommit_opts = {
|
|
|
016a62 |
+ .name = "overcommit",
|
|
|
016a62 |
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_overcommit_opts.head),
|
|
|
016a62 |
+ .desc = {
|
|
|
016a62 |
+ {
|
|
|
016a62 |
+ .name = "mem-lock",
|
|
|
016a62 |
+ .type = QEMU_OPT_BOOL,
|
|
|
016a62 |
+ },
|
|
|
016a62 |
+ {
|
|
|
016a62 |
+ .name = "cpu-pm",
|
|
|
016a62 |
+ .type = QEMU_OPT_BOOL,
|
|
|
016a62 |
+ },
|
|
|
016a62 |
+ { /* end of list */ }
|
|
|
016a62 |
+ },
|
|
|
016a62 |
+};
|
|
|
016a62 |
+
|
|
|
016a62 |
static QemuOptsList qemu_msg_opts = {
|
|
|
016a62 |
.name = "msg",
|
|
|
016a62 |
.head = QTAILQ_HEAD_INITIALIZER(qemu_msg_opts.head),
|
|
|
016a62 |
@@ -4089,7 +4106,20 @@ int main(int argc, char **argv, char **envp)
|
|
|
016a62 |
if (!opts) {
|
|
|
016a62 |
exit(1);
|
|
|
016a62 |
}
|
|
|
016a62 |
- enable_mlock = qemu_opt_get_bool(opts, "mlock", true);
|
|
|
016a62 |
+ /* Don't override the -overcommit option if set */
|
|
|
016a62 |
+ enable_mlock = enable_mlock ||
|
|
|
016a62 |
+ qemu_opt_get_bool(opts, "mlock", true);
|
|
|
016a62 |
+ break;
|
|
|
016a62 |
+ case QEMU_OPTION_overcommit:
|
|
|
016a62 |
+ opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
|
|
|
016a62 |
+ optarg, false);
|
|
|
016a62 |
+ if (!opts) {
|
|
|
016a62 |
+ exit(1);
|
|
|
016a62 |
+ }
|
|
|
016a62 |
+ /* Don't override the -realtime option if set */
|
|
|
016a62 |
+ enable_mlock = enable_mlock ||
|
|
|
016a62 |
+ qemu_opt_get_bool(opts, "mem-lock", false);
|
|
|
016a62 |
+ enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false);
|
|
|
016a62 |
break;
|
|
|
016a62 |
case QEMU_OPTION_msg:
|
|
|
016a62 |
opts = qemu_opts_parse_noisily(qemu_find_opts("msg"), optarg,
|
|
|
016a62 |
--
|
|
|
016a62 |
1.8.3.1
|
|
|
016a62 |
|