c401cc
From aa779e5b9449f52c9a7de83d63fd7f3e2cccbf7a Mon Sep 17 00:00:00 2001
c401cc
Message-Id: <aa779e5b9449f52c9a7de83d63fd7f3e2cccbf7a@dist-git>
c401cc
From: Martin Kletzander <mkletzan@redhat.com>
c401cc
Date: Fri, 7 Feb 2014 11:39:30 +0100
c401cc
Subject: [PATCH] qemu: keep pre-migration domain state after failed migration
c401cc
c401cc
Couple of codepaths shared the same code which can be moved out to a
c401cc
function and on one of such places, qemuMigrationConfirmPhase(), the
c401cc
domain was resumed even if it wasn't running before the migration
c401cc
started.
c401cc
c401cc
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1057407
c401cc
c401cc
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
c401cc
(cherry picked from commit 440a1aa508f7abec635a035158e9b37e179f2db2)
c401cc
c401cc
Conflicts:
c401cc
	src/qemu/qemu_migration.c -- 6ffce0f6
c401cc
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
c401cc
---
c401cc
 src/qemu/qemu_domain.h    |   3 +-
c401cc
 src/qemu/qemu_migration.c | 112 +++++++++++++++++++++++++---------------------
c401cc
 2 files changed, 63 insertions(+), 52 deletions(-)
c401cc
c401cc
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
c401cc
index 04f08a3..9735761 100644
c401cc
--- a/src/qemu/qemu_domain.h
c401cc
+++ b/src/qemu/qemu_domain.h
c401cc
@@ -1,7 +1,7 @@
c401cc
 /*
c401cc
  * qemu_domain.h: QEMU domain private state
c401cc
  *
c401cc
- * Copyright (C) 2006-2013 Red Hat, Inc.
c401cc
+ * Copyright (C) 2006-2014 Red Hat, Inc.
c401cc
  * Copyright (C) 2006 Daniel P. Berrange
c401cc
  *
c401cc
  * This library is free software; you can redistribute it and/or
c401cc
@@ -161,6 +161,7 @@ struct _qemuDomainObjPrivate {
c401cc
     char *origname;
c401cc
     int nbdPort; /* Port used for migration with NBD */
c401cc
     unsigned short migrationPort;
c401cc
+    int preMigrationState;
c401cc
 
c401cc
     virChrdevsPtr devs;
c401cc
 
c401cc
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
c401cc
index 8fd3c9e..03ae9e4 100644
c401cc
--- a/src/qemu/qemu_migration.c
c401cc
+++ b/src/qemu/qemu_migration.c
c401cc
@@ -1078,6 +1078,53 @@ error:
c401cc
     return NULL;
c401cc
 }
c401cc
 
c401cc
+static void
c401cc
+qemuMigrationStoreDomainState(virDomainObjPtr vm)
c401cc
+{
c401cc
+    qemuDomainObjPrivatePtr priv = vm->privateData;
c401cc
+    priv->preMigrationState = virDomainObjGetState(vm, NULL);
c401cc
+
c401cc
+    VIR_DEBUG("Storing pre-migration state=%d domain=%p",
c401cc
+              priv->preMigrationState, vm);
c401cc
+}
c401cc
+
c401cc
+/* Returns true if the domain was resumed, false otherwise */
c401cc
+static bool
c401cc
+qemuMigrationRestoreDomainState(virConnectPtr conn, virDomainObjPtr vm)
c401cc
+{
c401cc
+    virQEMUDriverPtr driver = conn->privateData;
c401cc
+    qemuDomainObjPrivatePtr priv = vm->privateData;
c401cc
+    int state = virDomainObjGetState(vm, NULL);
c401cc
+    bool ret = false;
c401cc
+
c401cc
+    VIR_DEBUG("driver=%p, vm=%p, pre-mig-state=%d, state=%d",
c401cc
+              driver, vm, priv->preMigrationState, state);
c401cc
+
c401cc
+    if (state == VIR_DOMAIN_PAUSED &&
c401cc
+        priv->preMigrationState == VIR_DOMAIN_RUNNING) {
c401cc
+        /* This is basically the only restore possibility that's safe
c401cc
+         * and we should attempt to do */
c401cc
+
c401cc
+        VIR_DEBUG("Restoring pre-migration state due to migration error");
c401cc
+
c401cc
+        /* we got here through some sort of failure; start the domain again */
c401cc
+        if (qemuProcessStartCPUs(driver, vm, conn,
c401cc
+                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
c401cc
+                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
c401cc
+            /* Hm, we already know we are in error here.  We don't want to
c401cc
+             * overwrite the previous error, though, so we just throw something
c401cc
+             * to the logs and hope for the best */
c401cc
+            VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name);
c401cc
+            goto cleanup;
c401cc
+        }
c401cc
+        ret = true;
c401cc
+    }
c401cc
+
c401cc
+ cleanup:
c401cc
+    priv->preMigrationState = VIR_DOMAIN_NOSTATE;
c401cc
+    return ret;
c401cc
+}
c401cc
+
c401cc
 /**
c401cc
  * qemuMigrationStartNBDServer:
c401cc
  * @driver: qemu driver
c401cc
@@ -2079,6 +2126,8 @@ qemuMigrationBegin(virConnectPtr conn,
c401cc
         asyncJob = QEMU_ASYNC_JOB_NONE;
c401cc
     }
c401cc
 
c401cc
+    qemuMigrationStoreDomainState(vm);
c401cc
+
c401cc
     if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) {
c401cc
         virReportError(VIR_ERR_OPERATION_INVALID,
c401cc
                        "%s", _("domain is not running"));
c401cc
@@ -2750,22 +2799,12 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
c401cc
         /* cancel any outstanding NBD jobs */
c401cc
         qemuMigrationCancelDriveMirror(mig, driver, vm);
c401cc
 
c401cc
-        /* run 'cont' on the destination, which allows migration on qemu
c401cc
-         * >= 0.10.6 to work properly.  This isn't strictly necessary on
c401cc
-         * older qemu's, but it also doesn't hurt anything there
c401cc
-         */
c401cc
-        if (qemuProcessStartCPUs(driver, vm, conn,
c401cc
-                                 VIR_DOMAIN_RUNNING_MIGRATED,
c401cc
-                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
c401cc
-            if (virGetLastError() == NULL)
c401cc
-                virReportError(VIR_ERR_INTERNAL_ERROR,
c401cc
-                               "%s", _("resume operation failed"));
c401cc
-            goto cleanup;
c401cc
+        if (qemuMigrationRestoreDomainState(conn, vm)) {
c401cc
+            event = virDomainEventNewFromObj(vm,
c401cc
+                                             VIR_DOMAIN_EVENT_RESUMED,
c401cc
+                                             VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
c401cc
         }
c401cc
 
c401cc
-        event = virDomainEventNewFromObj(vm,
c401cc
-                                         VIR_DOMAIN_EVENT_RESUMED,
c401cc
-                                         VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
c401cc
         if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) {
c401cc
             VIR_WARN("Failed to save status on vm %s", vm->def->name);
c401cc
             goto cleanup;
c401cc
@@ -4063,7 +4102,6 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
c401cc
 {
c401cc
     virDomainEventPtr event = NULL;
c401cc
     int ret = -1;
c401cc
-    int resume = 0;
c401cc
     virErrorPtr orig_err = NULL;
c401cc
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
c401cc
     bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
c401cc
@@ -4083,7 +4121,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
c401cc
     if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def))
c401cc
         goto endjob;
c401cc
 
c401cc
-    resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
c401cc
+    qemuMigrationStoreDomainState(vm);
c401cc
 
c401cc
     if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
c401cc
         ret = doPeer2PeerMigrate(driver, conn, vm, xmlin,
c401cc
@@ -4110,25 +4148,12 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
c401cc
                                          VIR_DOMAIN_EVENT_STOPPED,
c401cc
                                          VIR_DOMAIN_EVENT_STOPPED_MIGRATED);
c401cc
     }
c401cc
-    resume = 0;
c401cc
 
c401cc
 endjob:
c401cc
     if (ret < 0)
c401cc
         orig_err = virSaveLastError();
c401cc
 
c401cc
-    if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
c401cc
-        /* we got here through some sort of failure; start the domain again */
c401cc
-        if (qemuProcessStartCPUs(driver, vm, conn,
c401cc
-                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
c401cc
-                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
c401cc
-            /* Hm, we already know we are in error here.  We don't want to
c401cc
-             * overwrite the previous error, though, so we just throw something
c401cc
-             * to the logs and hope for the best
c401cc
-             */
c401cc
-            VIR_ERROR(_("Failed to resume guest %s after failure"),
c401cc
-                      vm->def->name);
c401cc
-        }
c401cc
-
c401cc
+    if (qemuMigrationRestoreDomainState(conn, vm)) {
c401cc
         event = virDomainEventNewFromObj(vm,
c401cc
                                          VIR_DOMAIN_EVENT_RESUMED,
c401cc
                                          VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
c401cc
@@ -4177,7 +4202,6 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
c401cc
 {
c401cc
     virDomainEventPtr event = NULL;
c401cc
     int ret = -1;
c401cc
-    bool resume;
c401cc
     bool hasrefs;
c401cc
 
c401cc
     /* If we didn't start the job in the begin phase, start it now. */
c401cc
@@ -4192,32 +4216,18 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
c401cc
     virCloseCallbacksUnset(driver->closeCallbacks, vm,
c401cc
                            qemuMigrationCleanup);
c401cc
 
c401cc
-    resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
c401cc
     ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen,
c401cc
                           cookieout, cookieoutlen,
c401cc
                           flags, resource, NULL, graphicsuri);
c401cc
 
c401cc
-    if (ret < 0 && resume &&
c401cc
-        virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
c401cc
-        /* we got here through some sort of failure; start the domain again */
c401cc
-        if (qemuProcessStartCPUs(driver, vm, conn,
c401cc
-                                 VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
c401cc
-                                 QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
c401cc
-            /* Hm, we already know we are in error here.  We don't want to
c401cc
-             * overwrite the previous error, though, so we just throw something
c401cc
-             * to the logs and hope for the best
c401cc
-             */
c401cc
-            VIR_ERROR(_("Failed to resume guest %s after failure"),
c401cc
-                      vm->def->name);
c401cc
+    if (ret < 0) {
c401cc
+        if (qemuMigrationRestoreDomainState(conn, vm)) {
c401cc
+            event = virDomainEventNewFromObj(vm,
c401cc
+                                             VIR_DOMAIN_EVENT_RESUMED,
c401cc
+                                             VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
c401cc
         }
c401cc
-
c401cc
-        event = virDomainEventNewFromObj(vm,
c401cc
-                                         VIR_DOMAIN_EVENT_RESUMED,
c401cc
-                                         VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
c401cc
-    }
c401cc
-
c401cc
-    if (ret < 0)
c401cc
         goto endjob;
c401cc
+    }
c401cc
 
c401cc
     qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE);
c401cc
 
c401cc
-- 
c401cc
1.8.5.4
c401cc