|
|
fbe740 |
From 84c5cad5921e96c6106cfd217de2064b64e1464f Mon Sep 17 00:00:00 2001
|
|
|
fbe740 |
Message-Id: <84c5cad5921e96c6106cfd217de2064b64e1464f@dist-git>
|
|
|
fbe740 |
From: Michal Privoznik <mprivozn@redhat.com>
|
|
|
fbe740 |
Date: Thu, 16 Jan 2020 10:03:54 +0100
|
|
|
fbe740 |
Subject: [PATCH] qemu: Stop domain on failed restore
|
|
|
fbe740 |
MIME-Version: 1.0
|
|
|
fbe740 |
Content-Type: text/plain; charset=UTF-8
|
|
|
fbe740 |
Content-Transfer-Encoding: 8bit
|
|
|
fbe740 |
|
|
|
fbe740 |
When resuming a domain from a save file, we read the domain XML
|
|
|
fbe740 |
from the file, add it onto our internal list of domains, start
|
|
|
fbe740 |
the qemu process, let it load the incoming migration stream and
|
|
|
fbe740 |
resume its vCPUs afterwards. If anything goes wrong, the domain
|
|
|
fbe740 |
object is removed from the list of domains and error is returned
|
|
|
fbe740 |
to the caller. However, the qemu process might be left behind -
|
|
|
fbe740 |
if resuming vCPUs fails (e.g. because qemu is unable to acquire
|
|
|
fbe740 |
write lock on a disk) then due to a bug the qemu process is not
|
|
|
fbe740 |
killed but the domain object is removed from the list.
|
|
|
fbe740 |
|
|
|
fbe740 |
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1718707
|
|
|
fbe740 |
|
|
|
fbe740 |
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
|
|
|
fbe740 |
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
|
|
|
fbe740 |
(cherry picked from commit 4c581527d431939a63be70c201b4ddab703cddbe)
|
|
|
fbe740 |
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
|
|
|
fbe740 |
Message-Id: <4048f92488a8b8c31c7a17a14b579840a9492328.1579165329.git.mprivozn@redhat.com>
|
|
|
fbe740 |
Reviewed-by: Ján Tomko <jtomko@redhat.com>
|
|
|
fbe740 |
---
|
|
|
fbe740 |
src/qemu/qemu_driver.c | 23 ++++++++++++-----------
|
|
|
fbe740 |
1 file changed, 12 insertions(+), 11 deletions(-)
|
|
|
fbe740 |
|
|
|
fbe740 |
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
|
|
|
fbe740 |
index ce9b1772c1..217d873671 100644
|
|
|
fbe740 |
--- a/src/qemu/qemu_driver.c
|
|
|
fbe740 |
+++ b/src/qemu/qemu_driver.c
|
|
|
fbe740 |
@@ -6800,7 +6800,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
|
|
|
fbe740 |
{
|
|
|
fbe740 |
qemuDomainObjPrivatePtr priv = vm->privateData;
|
|
|
fbe740 |
int ret = -1;
|
|
|
fbe740 |
- bool restored = false;
|
|
|
fbe740 |
+ bool started = false;
|
|
|
fbe740 |
virObjectEventPtr event;
|
|
|
fbe740 |
VIR_AUTOCLOSE intermediatefd = -1;
|
|
|
fbe740 |
g_autoptr(virCommand) cmd = NULL;
|
|
|
fbe740 |
@@ -6808,6 +6808,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
|
|
|
fbe740 |
g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
|
|
|
fbe740 |
virQEMUSaveHeaderPtr header = &data->header;
|
|
|
fbe740 |
g_autoptr(qemuDomainSaveCookie) cookie = NULL;
|
|
|
fbe740 |
+ int rc = 0;
|
|
|
fbe740 |
|
|
|
fbe740 |
if (virSaveCookieParseString(data->cookie, (virObjectPtr *)&cookie,
|
|
|
fbe740 |
virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0)
|
|
|
fbe740 |
@@ -6848,12 +6849,12 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
|
|
|
fbe740 |
VIR_NETDEV_VPORT_PROFILE_OP_RESTORE,
|
|
|
fbe740 |
VIR_QEMU_PROCESS_START_PAUSED |
|
|
|
fbe740 |
VIR_QEMU_PROCESS_START_GEN_VMID) == 0)
|
|
|
fbe740 |
- restored = true;
|
|
|
fbe740 |
+ started = true;
|
|
|
fbe740 |
|
|
|
fbe740 |
if (intermediatefd != -1) {
|
|
|
fbe740 |
virErrorPtr orig_err = NULL;
|
|
|
fbe740 |
|
|
|
fbe740 |
- if (!restored) {
|
|
|
fbe740 |
+ if (!started) {
|
|
|
fbe740 |
/* if there was an error setting up qemu, the intermediate
|
|
|
fbe740 |
* process will wait forever to write to stdout, so we
|
|
|
fbe740 |
* must manually kill it and ignore any error related to
|
|
|
fbe740 |
@@ -6864,21 +6865,17 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
|
|
|
fbe740 |
VIR_FORCE_CLOSE(*fd);
|
|
|
fbe740 |
}
|
|
|
fbe740 |
|
|
|
fbe740 |
- if (virCommandWait(cmd, NULL) < 0) {
|
|
|
fbe740 |
- qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, 0);
|
|
|
fbe740 |
- restored = false;
|
|
|
fbe740 |
- }
|
|
|
fbe740 |
+ rc = virCommandWait(cmd, NULL);
|
|
|
fbe740 |
VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf));
|
|
|
fbe740 |
-
|
|
|
fbe740 |
virErrorRestore(&orig_err);
|
|
|
fbe740 |
}
|
|
|
fbe740 |
if (VIR_CLOSE(*fd) < 0) {
|
|
|
fbe740 |
virReportSystemError(errno, _("cannot close file: %s"), path);
|
|
|
fbe740 |
- restored = false;
|
|
|
fbe740 |
+ rc = -1;
|
|
|
fbe740 |
}
|
|
|
fbe740 |
|
|
|
fbe740 |
- virDomainAuditStart(vm, "restored", restored);
|
|
|
fbe740 |
- if (!restored)
|
|
|
fbe740 |
+ virDomainAuditStart(vm, "restored", started);
|
|
|
fbe740 |
+ if (!started || rc < 0)
|
|
|
fbe740 |
goto cleanup;
|
|
|
fbe740 |
|
|
|
fbe740 |
/* qemuProcessStart doesn't unset the qemu error reporting infrastructure
|
|
|
fbe740 |
@@ -6918,6 +6915,10 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
|
|
|
fbe740 |
ret = 0;
|
|
|
fbe740 |
|
|
|
fbe740 |
cleanup:
|
|
|
fbe740 |
+ if (ret < 0 && started) {
|
|
|
fbe740 |
+ qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,
|
|
|
fbe740 |
+ asyncJob, VIR_QEMU_PROCESS_STOP_MIGRATED);
|
|
|
fbe740 |
+ }
|
|
|
fbe740 |
if (qemuSecurityRestoreSavedStateLabel(driver, vm, path) < 0)
|
|
|
fbe740 |
VIR_WARN("failed to restore save state label on %s", path);
|
|
|
fbe740 |
return ret;
|
|
|
fbe740 |
--
|
|
|
fbe740 |
2.25.0
|
|
|
fbe740 |
|