Blame 0037-virtiofsd-Fast-path-for-virtio-read.patch

1d442b
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
1d442b
Date: Mon, 27 Jan 2020 19:01:06 +0000
1d442b
Subject: [PATCH] virtiofsd: Fast path for virtio read
1d442b
1d442b
Readv the data straight into the guests buffer.
1d442b
1d442b
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1d442b
With fix by:
1d442b
Signed-off-by: Eryu Guan <eguan@linux.alibaba.com>
1d442b
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
1d442b
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1d442b
(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7)
1d442b
---
1d442b
 tools/virtiofsd/fuse_lowlevel.c |   5 +
1d442b
 tools/virtiofsd/fuse_virtio.c   | 162 ++++++++++++++++++++++++++++++++
1d442b
 tools/virtiofsd/fuse_virtio.h   |   4 +
1d442b
 3 files changed, 171 insertions(+)
1d442b
1d442b
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
1d442b
index 380d93bd01..4f4684d942 100644
1d442b
--- a/tools/virtiofsd/fuse_lowlevel.c
1d442b
+++ b/tools/virtiofsd/fuse_lowlevel.c
1d442b
@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se,
1d442b
         return fuse_send_msg(se, ch, iov, iov_count);
1d442b
     }
1d442b
 
1d442b
+    if (fuse_lowlevel_is_virtio(se) && buf->count == 1 &&
1d442b
+        buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) {
1d442b
+        return virtio_send_data_iov(se, ch, iov, iov_count, buf, len);
1d442b
+    }
1d442b
+
1d442b
     abort(); /* Will have taken vhost path */
1d442b
     return 0;
1d442b
 }
1d442b
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
1d442b
index f1adeb6345..7e2711b504 100644
1d442b
--- a/tools/virtiofsd/fuse_virtio.c
1d442b
+++ b/tools/virtiofsd/fuse_virtio.c
1d442b
@@ -230,6 +230,168 @@ err:
1d442b
     return ret;
1d442b
 }
1d442b
 
1d442b
+/*
1d442b
+ * Callback from fuse_send_data_iov_* when it's virtio and the buffer
1d442b
+ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
1d442b
+ * We need send the iov and then the buffer.
1d442b
+ * Return 0 on success
1d442b
+ */
1d442b
+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
1d442b
+                         struct iovec *iov, int count, struct fuse_bufvec *buf,
1d442b
+                         size_t len)
1d442b
+{
1d442b
+    int ret = 0;
1d442b
+    VuVirtqElement *elem;
1d442b
+    VuVirtq *q;
1d442b
+
1d442b
+    assert(count >= 1);
1d442b
+    assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
1d442b
+
1d442b
+    struct fuse_out_header *out = iov[0].iov_base;
1d442b
+    /* TODO: Endianness! */
1d442b
+
1d442b
+    size_t iov_len = iov_size(iov, count);
1d442b
+    size_t tosend_len = iov_len + len;
1d442b
+
1d442b
+    out->len = tosend_len;
1d442b
+
1d442b
+    fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__,
1d442b
+             count, len, iov_len);
1d442b
+
1d442b
+    /* unique == 0 is notification which we don't support */
1d442b
+    assert(out->unique);
1d442b
+
1d442b
+    /* For virtio we always have ch */
1d442b
+    assert(ch);
1d442b
+    assert(!ch->qi->reply_sent);
1d442b
+    elem = ch->qi->qe;
1d442b
+    q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx];
1d442b
+
1d442b
+    /* The 'in' part of the elem is to qemu */
1d442b
+    unsigned int in_num = elem->in_num;
1d442b
+    struct iovec *in_sg = elem->in_sg;
1d442b
+    size_t in_len = iov_size(in_sg, in_num);
1d442b
+    fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
1d442b
+             __func__, elem->index, in_num, in_len);
1d442b
+
1d442b
+    /*
1d442b
+     * The elem should have room for a 'fuse_out_header' (out from fuse)
1d442b
+     * plus the data based on the len in the header.
1d442b
+     */
1d442b
+    if (in_len < sizeof(struct fuse_out_header)) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
1d442b
+                 __func__, elem->index);
1d442b
+        ret = E2BIG;
1d442b
+        goto err;
1d442b
+    }
1d442b
+    if (in_len < tosend_len) {
1d442b
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
1d442b
+                 __func__, elem->index, tosend_len);
1d442b
+        ret = E2BIG;
1d442b
+        goto err;
1d442b
+    }
1d442b
+
1d442b
+    /* TODO: Limit to 'len' */
1d442b
+
1d442b
+    /* First copy the header data from iov->in_sg */
1d442b
+    copy_iov(iov, count, in_sg, in_num, iov_len);
1d442b
+
1d442b
+    /*
1d442b
+     * Build a copy of the the in_sg iov so we can skip bits in it,
1d442b
+     * including changing the offsets
1d442b
+     */
1d442b
+    struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num);
1d442b
+    assert(in_sg_cpy);
1d442b
+    memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
1d442b
+    /* These get updated as we skip */
1d442b
+    struct iovec *in_sg_ptr = in_sg_cpy;
1d442b
+    int in_sg_cpy_count = in_num;
1d442b
+
1d442b
+    /* skip over parts of in_sg that contained the header iov */
1d442b
+    size_t skip_size = iov_len;
1d442b
+
1d442b
+    size_t in_sg_left = 0;
1d442b
+    do {
1d442b
+        while (skip_size != 0 && in_sg_cpy_count) {
1d442b
+            if (skip_size >= in_sg_ptr[0].iov_len) {
1d442b
+                skip_size -= in_sg_ptr[0].iov_len;
1d442b
+                in_sg_ptr++;
1d442b
+                in_sg_cpy_count--;
1d442b
+            } else {
1d442b
+                in_sg_ptr[0].iov_len -= skip_size;
1d442b
+                in_sg_ptr[0].iov_base += skip_size;
1d442b
+                break;
1d442b
+            }
1d442b
+        }
1d442b
+
1d442b
+        int i;
1d442b
+        for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) {
1d442b
+            in_sg_left += in_sg_ptr[i].iov_len;
1d442b
+        }
1d442b
+        fuse_log(FUSE_LOG_DEBUG,
1d442b
+                 "%s: after skip skip_size=%zd in_sg_cpy_count=%d "
1d442b
+                 "in_sg_left=%zd\n",
1d442b
+                 __func__, skip_size, in_sg_cpy_count, in_sg_left);
1d442b
+        ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
1d442b
+                     buf->buf[0].pos);
1d442b
+
1d442b
+        if (ret == -1) {
1d442b
+            ret = errno;
1d442b
+            fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
1d442b
+                     __func__, len);
1d442b
+            free(in_sg_cpy);
1d442b
+            goto err;
1d442b
+        }
1d442b
+        fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
1d442b
+                 ret, len);
1d442b
+        if (ret < len && ret) {
1d442b
+            fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
1d442b
+            /* Skip over this much next time around */
1d442b
+            skip_size = ret;
1d442b
+            buf->buf[0].pos += ret;
1d442b
+            len -= ret;
1d442b
+
1d442b
+            /* Lets do another read */
1d442b
+            continue;
1d442b
+        }
1d442b
+        if (!ret) {
1d442b
+            /* EOF case? */
1d442b
+            fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__,
1d442b
+                     in_sg_left);
1d442b
+            break;
1d442b
+        }
1d442b
+        if (ret != len) {
1d442b
+            fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__);
1d442b
+            ret = EIO;
1d442b
+            free(in_sg_cpy);
1d442b
+            goto err;
1d442b
+        }
1d442b
+        in_sg_left -= ret;
1d442b
+        len -= ret;
1d442b
+    } while (in_sg_left);
1d442b
+    free(in_sg_cpy);
1d442b
+
1d442b
+    /* Need to fix out->len on EOF */
1d442b
+    if (len) {
1d442b
+        struct fuse_out_header *out_sg = in_sg[0].iov_base;
1d442b
+
1d442b
+        tosend_len -= len;
1d442b
+        out_sg->len = tosend_len;
1d442b
+    }
1d442b
+
1d442b
+    ret = 0;
1d442b
+
1d442b
+    vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len);
1d442b
+    vu_queue_notify(&se->virtio_dev->dev, q);
1d442b
+
1d442b
+err:
1d442b
+    if (ret == 0) {
1d442b
+        ch->qi->reply_sent = true;
1d442b
+    }
1d442b
+
1d442b
+    return ret;
1d442b
+}
1d442b
+
1d442b
 /* Thread function for individual queues, created when a queue is 'started' */
1d442b
 static void *fv_queue_thread(void *opaque)
1d442b
 {
1d442b
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
1d442b
index 135a14875a..cc676b9193 100644
1d442b
--- a/tools/virtiofsd/fuse_virtio.h
1d442b
+++ b/tools/virtiofsd/fuse_virtio.h
1d442b
@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se);
1d442b
 int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
1d442b
                     struct iovec *iov, int count);
1d442b
 
1d442b
+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
1d442b
+                         struct iovec *iov, int count,
1d442b
+                         struct fuse_bufvec *buf, size_t len);
1d442b
+
1d442b
 #endif