7a3408
From 043fdb6ee0ef322f8cf925d8515d0dd5adfe2ca7 Mon Sep 17 00:00:00 2001
7a3408
Message-Id: <043fdb6ee0ef322f8cf925d8515d0dd5adfe2ca7@dist-git>
7a3408
From: Peter Krempa <pkrempa@redhat.com>
7a3408
Date: Tue, 21 Jul 2015 16:18:33 +0200
7a3408
Subject: [PATCH] virsh: Refactor block job waiting in cmdBlockPull
7a3408
7a3408
https://bugzilla.redhat.com/show_bug.cgi?id=1227551
7a3408
https://bugzilla.redhat.com/show_bug.cgi?id=1197592
7a3408
7a3408
Introduce helper function that will provide logic for waiting for block
7a3408
job completion so the 3 open coded places can be unified and improved.
7a3408
7a3408
This patch introduces the whole logic and uses it to fix
7a3408
cmdBlockJobPull. The vshBlockJobWait function provides common logic for
7a3408
block job waiting that should be robust enough to work across all
7a3408
previous versions of libvirt. Since virsh allows passing user-provided
7a3408
strings as paths of block devices we can't reliably use block job events
7a3408
for detection of block job states so the function contains a great deal
7a3408
of fallback logic.
7a3408
7a3408
(cherry picked from commit 2e7827636476fdf976f17cd234b636973dedffc0)
7a3408
7a3408
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
7a3408
---
7a3408
 tools/virsh-domain.c | 326 ++++++++++++++++++++++++++++++++++++++-------------
7a3408
 1 file changed, 244 insertions(+), 82 deletions(-)
7a3408
7a3408
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
7a3408
index 2f6ad46..385eba2 100644
7a3408
--- a/tools/virsh-domain.c
7a3408
+++ b/tools/virsh-domain.c
7a3408
@@ -53,6 +53,7 @@
7a3408
 #include "virsh-console.h"
7a3408
 #include "virsh-domain-monitor.h"
7a3408
 #include "virerror.h"
7a3408
+#include "virtime.h"
7a3408
 #include "virtypedparam.h"
7a3408
 #include "virxml.h"
7a3408
 
7a3408
@@ -1702,17 +1703,237 @@ static void vshCatchInt(int sig ATTRIBUTE_UNUSED,
7a3408
     intCaught = 1;
7a3408
 }
7a3408
 
7a3408
+
7a3408
+typedef struct _vshBlockJobWaitData vshBlockJobWaitData;
7a3408
+typedef vshBlockJobWaitData *vshBlockJobWaitDataPtr;
7a3408
+struct _vshBlockJobWaitData {
7a3408
+    vshControl *ctl;
7a3408
+    virDomainPtr dom;
7a3408
+    const char *dev;
7a3408
+    const char *job_name;
7a3408
+
7a3408
+    bool verbose;
7a3408
+    unsigned int timeout;
7a3408
+    bool async_abort;
7a3408
+
7a3408
+    int cb_id;
7a3408
+    int cb_id2;
7a3408
+    int status;
7a3408
+};
7a3408
+
7a3408
+
7a3408
 static void
7a3408
 vshBlockJobStatusHandler(virConnectPtr conn ATTRIBUTE_UNUSED,
7a3408
                          virDomainPtr dom ATTRIBUTE_UNUSED,
7a3408
-                         const char *disk ATTRIBUTE_UNUSED,
7a3408
+                         const char *disk,
7a3408
                          int type ATTRIBUTE_UNUSED,
7a3408
                          int status,
7a3408
                          void *opaque)
7a3408
 {
7a3408
-    *(int *) opaque = status;
7a3408
+    vshBlockJobWaitDataPtr data = opaque;
7a3408
+
7a3408
+    if (STREQ_NULLABLE(disk, data->dev))
7a3408
+        data->status = status;
7a3408
 }
7a3408
 
7a3408
+
7a3408
+/**
7a3408
+ * vshBlockJobWaitInit:
7a3408
+ * @ctl: vsh control structure
7a3408
+ * @dom: domain object
7a3408
+ * @dev: block device name to wait for
7a3408
+ * @job_name: block job name to display in user-facing messages
7a3408
+ * @verbose: enable progress reporting
7a3408
+ * @timeout: number of milliseconds to wait before aborting the job
7a3408
+ * @async_abort: abort the job asynchronously
7a3408
+ *
7a3408
+ * Prepares virsh for waiting for completion of a block job. This function
7a3408
+ * registers event handlers for block job events and prepares the data structures
7a3408
+ * for them. A call to vshBlockJobWait then waits for completion of the given
7a3408
+ * block job. This function should be tolerant to different versions of daemon
7a3408
+ * and the reporting capabilities of those.
7a3408
+ *
7a3408
+ * Returns the data structure that holds data needed for block job waiting or
7a3408
+ * NULL in case of error.
7a3408
+ */
7a3408
+static vshBlockJobWaitDataPtr
7a3408
+vshBlockJobWaitInit(vshControl *ctl,
7a3408
+                    virDomainPtr dom,
7a3408
+                    const char *dev,
7a3408
+                    const char *job_name,
7a3408
+                    bool verbose,
7a3408
+                    unsigned int timeout,
7a3408
+                    bool async_abort)
7a3408
+{
7a3408
+    vshBlockJobWaitDataPtr ret;
7a3408
+
7a3408
+    if (VIR_ALLOC(ret) < 0)
7a3408
+        return NULL;
7a3408
+
7a3408
+    ret->ctl = ctl;
7a3408
+    ret->dom = dom;
7a3408
+    ret->dev = dev;
7a3408
+    ret->job_name = job_name;
7a3408
+
7a3408
+    ret->async_abort = async_abort;
7a3408
+    ret->timeout = timeout;
7a3408
+    ret->verbose = verbose;
7a3408
+
7a3408
+    ret->status = -1;
7a3408
+
7a3408
+    virConnectDomainEventGenericCallback cb =
7a3408
+        VIR_DOMAIN_EVENT_CALLBACK(vshBlockJobStatusHandler);
7a3408
+
7a3408
+    if ((ret->cb_id = virConnectDomainEventRegisterAny(ctl->conn, dom,
7a3408
+                                                       VIR_DOMAIN_EVENT_ID_BLOCK_JOB,
7a3408
+                                                       cb, ret, NULL)) < 0)
7a3408
+        vshResetLibvirtError();
7a3408
+
7a3408
+    if ((ret->cb_id2 = virConnectDomainEventRegisterAny(ctl->conn, dom,
7a3408
+                                                        VIR_DOMAIN_EVENT_ID_BLOCK_JOB_2,
7a3408
+                                                        cb, ret, NULL)) < 0)
7a3408
+        vshResetLibvirtError();
7a3408
+
7a3408
+    return ret;
7a3408
+}
7a3408
+
7a3408
+
7a3408
+static void
7a3408
+vshBlockJobWaitFree(vshBlockJobWaitDataPtr data)
7a3408
+{
7a3408
+    if (!data)
7a3408
+        return;
7a3408
+
7a3408
+    if (data->cb_id >= 0)
7a3408
+        virConnectDomainEventDeregisterAny(data->ctl->conn, data->cb_id);
7a3408
+    if (data->cb_id2 >= 0)
7a3408
+        virConnectDomainEventDeregisterAny(data->ctl->conn, data->cb_id2);
7a3408
+
7a3408
+    VIR_FREE(data);
7a3408
+}
7a3408
+
7a3408
+
7a3408
+/**
7a3408
+ * vshBlockJobWait:
7a3408
+ * @data: private data initialized by vshBlockJobWaitInit
7a3408
+ *
7a3408
+ * Waits for the block job to complete. This function prefers to get an event
7a3408
+ * from libvirt but still has fallback means if the device name can't be matched
7a3408
+ *
7a3408
+ * This function returns values from the virConnectDomainEventBlockJobStatus enum
7a3408
+ * or -1 in case of a internal error. Fallback states if a block job vanishes
7a3408
+ * without triggering the event is VIR_DOMAIN_BLOCK_JOB_COMPLETED. For two phase
7a3408
+ * jobs after the retry count for waiting for the event expires is
7a3408
+ * VIR_DOMAIN_BLOCK_JOB_READY.
7a3408
+ */
7a3408
+static int
7a3408
+vshBlockJobWait(vshBlockJobWaitDataPtr data)
7a3408
+{
7a3408
+    /* For two phase jobs like active commit or block copy, the marker reaches
7a3408
+     * 100% and an event fires. In case where virsh would not be able to match
7a3408
+     * the event to the given block job we will wait for the number of retries
7a3408
+     * before claiming that we entered synchronised phase */
7a3408
+    unsigned int retries = 5;
7a3408
+
7a3408
+    struct sigaction sig_action;
7a3408
+    struct sigaction old_sig_action;
7a3408
+    sigset_t sigmask, oldsigmask;
7a3408
+
7a3408
+    unsigned long long start = 0;
7a3408
+    unsigned long long curr = 0;
7a3408
+
7a3408
+    unsigned int abort_flags = 0;
7a3408
+    int ret = -1;
7a3408
+    virDomainBlockJobInfo info;
7a3408
+    int result;
7a3408
+
7a3408
+    if (!data)
7a3408
+        return 0;
7a3408
+
7a3408
+    if (data->async_abort)
7a3408
+        abort_flags |= VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC;
7a3408
+
7a3408
+    sigemptyset(&sigmask);
7a3408
+    sigaddset(&sigmask, SIGINT);
7a3408
+
7a3408
+    intCaught = 0;
7a3408
+    sig_action.sa_sigaction = vshCatchInt;
7a3408
+    sig_action.sa_flags = SA_SIGINFO;
7a3408
+    sigemptyset(&sig_action.sa_mask);
7a3408
+    sigaction(SIGINT, &sig_action, &old_sig_action);
7a3408
+
7a3408
+    if (data->timeout && virTimeMillisNow(&start) < 0) {
7a3408
+        vshSaveLibvirtError();
7a3408
+        return -1;
7a3408
+    }
7a3408
+
7a3408
+    while (true) {
7a3408
+        pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask);
7a3408
+        result = virDomainGetBlockJobInfo(data->dom, data->dev, &info, 0);
7a3408
+        pthread_sigmask(SIG_SETMASK, &oldsigmask, NULL);
7a3408
+
7a3408
+        if (result < 0) {
7a3408
+            vshError(data->ctl, _("failed to query job for disk %s"), data->dev);
7a3408
+            goto cleanup;
7a3408
+        }
7a3408
+
7a3408
+        /* if we've got an event for the device we are waiting for we can end
7a3408
+         * the waiting loop */
7a3408
+        if ((data->cb_id >= 0 || data->cb_id2 >= 0) && data->status != -1) {
7a3408
+            ret = data->status;
7a3408
+            goto cleanup;
7a3408
+        }
7a3408
+
7a3408
+        /* since virsh can't guarantee that the path provided by the user will
7a3408
+         * later be matched in the event we will need to keep the fallback
7a3408
+         * approach and claim success if the block job finishes or vanishes. */
7a3408
+        if (result == 0)
7a3408
+            break;
7a3408
+
7a3408
+        /* for two-phase jobs we will try to wait in the synchronized phase
7a3408
+         * for event arrival since 100% completion doesn't necessarily mean that
7a3408
+         * the block job has finished and can be terminated with success */
7a3408
+        if (info.end == info.cur && --retries == 0) {
7a3408
+            ret = VIR_DOMAIN_BLOCK_JOB_READY;
7a3408
+            goto cleanup;
7a3408
+        }
7a3408
+
7a3408
+        if (data->verbose)
7a3408
+            vshPrintJobProgress(data->job_name, info.end - info.cur, info.end);
7a3408
+
7a3408
+        if (data->timeout && virTimeMillisNow(&curr) < 0) {
7a3408
+            vshSaveLibvirtError();
7a3408
+            goto cleanup;
7a3408
+        }
7a3408
+
7a3408
+        if (intCaught || (data->timeout && (curr - start > data->timeout))) {
7a3408
+            if (virDomainBlockJobAbort(data->dom, data->dev, abort_flags) < 0) {
7a3408
+                vshError(data->ctl, _("failed to abort job for disk '%s'"),
7a3408
+                         data->dev);
7a3408
+                goto cleanup;
7a3408
+            }
7a3408
+
7a3408
+            ret = VIR_DOMAIN_BLOCK_JOB_CANCELED;
7a3408
+            goto cleanup;
7a3408
+        }
7a3408
+
7a3408
+        usleep(500 * 1000);
7a3408
+    }
7a3408
+
7a3408
+    ret = VIR_DOMAIN_BLOCK_JOB_COMPLETED;
7a3408
+
7a3408
+ cleanup:
7a3408
+    /* print 100% completed */
7a3408
+    if (data->verbose &&
7a3408
+        (ret == VIR_DOMAIN_BLOCK_JOB_COMPLETED ||
7a3408
+         ret == VIR_DOMAIN_BLOCK_JOB_READY))
7a3408
+        vshPrintJobProgress(data->job_name, 0, 1);
7a3408
+
7a3408
+    sigaction(SIGINT, &old_sig_action, NULL);
7a3408
+    return ret;
7a3408
+}
7a3408
+
7a3408
+
7a3408
 /*
7a3408
  * "blockcommit" command
7a3408
  */
7a3408
@@ -2656,19 +2877,11 @@ cmdBlockPull(vshControl *ctl, const vshCmd *cmd)
7a3408
     bool verbose = vshCommandOptBool(cmd, "verbose");
7a3408
     bool async = vshCommandOptBool(cmd, "async");
7a3408
     int timeout = 0;
7a3408
-    struct sigaction sig_action;
7a3408
-    struct sigaction old_sig_action;
7a3408
-    sigset_t sigmask, oldsigmask;
7a3408
-    struct timeval start;
7a3408
-    struct timeval curr;
7a3408
     const char *path = NULL;
7a3408
     const char *base = NULL;
7a3408
     unsigned long bandwidth = 0;
7a3408
-    bool quit = false;
7a3408
-    int abort_flags = 0;
7a3408
-    int status = -1;
7a3408
-    int cb_id = -1;
7a3408
     unsigned int flags = 0;
7a3408
+    vshBlockJobWaitDataPtr bjWait = NULL;
7a3408
 
7a3408
     VSH_REQUIRE_OPTION("verbose", "wait");
7a3408
     VSH_REQUIRE_OPTION("async", "wait");
7a3408
@@ -2688,35 +2901,13 @@ cmdBlockPull(vshControl *ctl, const vshCmd *cmd)
7a3408
     if (vshCommandOptBool(cmd, "keep-relative"))
7a3408
         flags |= VIR_DOMAIN_BLOCK_REBASE_RELATIVE;
7a3408
 
7a3408
-    if (async)
7a3408
-        abort_flags |= VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC;
7a3408
-
7a3408
-    if (blocking) {
7a3408
-        sigemptyset(&sigmask);
7a3408
-        sigaddset(&sigmask, SIGINT);
7a3408
-
7a3408
-        intCaught = 0;
7a3408
-        sig_action.sa_sigaction = vshCatchInt;
7a3408
-        sig_action.sa_flags = SA_SIGINFO;
7a3408
-        sigemptyset(&sig_action.sa_mask);
7a3408
-        sigaction(SIGINT, &sig_action, &old_sig_action);
7a3408
-
7a3408
-        GETTIMEOFDAY(&start;;
7a3408
-    }
7a3408
-
7a3408
     if (!(dom = vshCommandOptDomain(ctl, cmd, NULL)))
7a3408
         return false;
7a3408
 
7a3408
-    virConnectDomainEventGenericCallback cb =
7a3408
-        VIR_DOMAIN_EVENT_CALLBACK(vshBlockJobStatusHandler);
7a3408
-
7a3408
-    if ((cb_id = virConnectDomainEventRegisterAny(ctl->conn,
7a3408
-                                                  dom,
7a3408
-                                                  VIR_DOMAIN_EVENT_ID_BLOCK_JOB,
7a3408
-                                                  cb,
7a3408
-                                                  &status,
7a3408
-                                                  NULL)) < 0)
7a3408
-        vshResetLibvirtError();
7a3408
+    if (blocking &&
7a3408
+        !(bjWait = vshBlockJobWaitInit(ctl, dom, path, _("Block Pull"), verbose,
7a3408
+                                       timeout, async)))
7a3408
+        goto cleanup;
7a3408
 
7a3408
     if (base || flags) {
7a3408
         if (virDomainBlockRebase(dom, path, base, bandwidth, flags) < 0)
7a3408
@@ -2732,61 +2923,32 @@ cmdBlockPull(vshControl *ctl, const vshCmd *cmd)
7a3408
         goto cleanup;
7a3408
     }
7a3408
 
7a3408
-    while (blocking) {
7a3408
-        virDomainBlockJobInfo info;
7a3408
-        int result;
7a3408
-
7a3408
-        pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask);
7a3408
-        result = virDomainGetBlockJobInfo(dom, path, &info, 0);
7a3408
-        pthread_sigmask(SIG_SETMASK, &oldsigmask, NULL);
7a3408
+    /* Execution continues here only if --wait or friends were specified */
7a3408
+    switch (vshBlockJobWait(bjWait)) {
7a3408
+        case -1:
7a3408
+            goto cleanup;
7a3408
 
7a3408
-        if (result < 0) {
7a3408
-            vshError(ctl, _("failed to query job for disk %s"), path);
7a3408
+        case VIR_DOMAIN_BLOCK_JOB_CANCELED:
7a3408
+            vshPrint(ctl, "\n%s", _("Pull aborted"));
7a3408
             goto cleanup;
7a3408
-        }
7a3408
-        if (result == 0)
7a3408
             break;
7a3408
 
7a3408
-        if (verbose)
7a3408
-            vshPrintJobProgress(_("Block Pull"), info.end - info.cur, info.end);
7a3408
-
7a3408
-        GETTIMEOFDAY(&curr);
7a3408
-        if (intCaught || (timeout &&
7a3408
-                          (((int)(curr.tv_sec - start.tv_sec)  * 1000 +
7a3408
-                            (int)(curr.tv_usec - start.tv_usec) / 1000) >
7a3408
-                           timeout))) {
7a3408
-            vshDebug(ctl, VSH_ERR_DEBUG,
7a3408
-                     intCaught ? "interrupted" : "timeout");
7a3408
-            intCaught = 0;
7a3408
-            timeout = 0;
7a3408
-            status = VIR_DOMAIN_BLOCK_JOB_CANCELED;
7a3408
-            if (virDomainBlockJobAbort(dom, path, abort_flags) < 0) {
7a3408
-                vshError(ctl, _("failed to abort job for disk %s"), path);
7a3408
-                goto cleanup;
7a3408
-            }
7a3408
-            if (abort_flags & VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC)
7a3408
-                break;
7a3408
-        } else {
7a3408
-            usleep(500 * 1000);
7a3408
-        }
7a3408
-    }
7a3408
-
7a3408
-    if (status == VIR_DOMAIN_BLOCK_JOB_CANCELED)
7a3408
-        quit = true;
7a3408
+        case VIR_DOMAIN_BLOCK_JOB_FAILED:
7a3408
+            vshPrint(ctl, "\n%s", _("Pull failed"));
7a3408
+            goto cleanup;
7a3408
+            break;
7a3408
 
7a3408
-    if (verbose && !quit) {
7a3408
-        /* printf [100 %] */
7a3408
-        vshPrintJobProgress(_("Block Pull"), 0, 1);
7a3408
+        case VIR_DOMAIN_BLOCK_JOB_READY:
7a3408
+        case VIR_DOMAIN_BLOCK_JOB_COMPLETED:
7a3408
+            vshPrint(ctl, "\n%s", _("Pull complete"));
7a3408
+            break;
7a3408
     }
7a3408
-    vshPrint(ctl, "\n%s", quit ? _("Pull aborted") : _("Pull complete"));
7a3408
 
7a3408
     ret = true;
7a3408
+
7a3408
  cleanup:
7a3408
     virDomainFree(dom);
7a3408
-    if (blocking)
7a3408
-        sigaction(SIGINT, &old_sig_action, NULL);
7a3408
-    if (cb_id >= 0)
7a3408
-        virConnectDomainEventDeregisterAny(ctl->conn, cb_id);
7a3408
+    vshBlockJobWaitFree(bjWait);
7a3408
     return ret;
7a3408
 }
7a3408
 
7a3408
-- 
7a3408
2.5.0
7a3408