958e1b
From 55c524d38cafb5856220c76b9573efa6e22dd1f1 Mon Sep 17 00:00:00 2001
958e1b
From: Laszlo Ersek <lersek@redhat.com>
958e1b
Date: Fri, 7 Nov 2014 17:18:01 +0100
958e1b
Subject: [PATCH 14/41] dump: add API to write dump pages
958e1b
958e1b
Message-id: <1415380693-16593-15-git-send-email-lersek@redhat.com>
958e1b
Patchwork-id: 62200
958e1b
O-Subject: [RHEL-7.1 qemu-kvm PATCH 14/26] dump: add API to write dump pages
958e1b
Bugzilla: 1157798
958e1b
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
958e1b
RH-Acked-by: Luiz Capitulino <lcapitulino@redhat.com>
958e1b
RH-Acked-by: dgibson <dgibson@redhat.com>
958e1b
958e1b
From: qiaonuohan <qiaonuohan@cn.fujitsu.com>
958e1b
958e1b
functions are used to write page to vmcore. vmcore is written page by page.
958e1b
page desc is used to store the information of a page, including a page's size,
958e1b
offset, compression format, etc.
958e1b
958e1b
Signed-off-by: Qiao Nuohan <qiaonuohan@cn.fujitsu.com>
958e1b
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
958e1b
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
958e1b
(cherry picked from commit d12f57ec6640d36e380367a0ab6ab9f3f29b6d51)
958e1b
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
958e1b
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
958e1b
---
958e1b
 dump.c                | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++
958e1b
 include/sysemu/dump.h |   7 ++
958e1b
 2 files changed, 238 insertions(+)
958e1b
958e1b
diff --git a/dump.c b/dump.c
958e1b
index 926ab84..fc5530f 100644
958e1b
--- a/dump.c
958e1b
+++ b/dump.c
958e1b
@@ -25,6 +25,14 @@
958e1b
 #include "qapi/error.h"
958e1b
 #include "qmp-commands.h"
958e1b
 
958e1b
+#include <zlib.h>
958e1b
+#ifdef CONFIG_LZO
958e1b
+#include <lzo/lzo1x.h>
958e1b
+#endif
958e1b
+#ifdef CONFIG_SNAPPY
958e1b
+#include <snappy-c.h>
958e1b
+#endif
958e1b
+
958e1b
 static uint16_t cpu_convert_to_target16(uint16_t val, int endian)
958e1b
 {
958e1b
     if (endian == ELFDATA2LSB) {
958e1b
@@ -1218,6 +1226,229 @@ static void free_data_cache(DataCache *data_cache)
958e1b
     g_free(data_cache->buf);
958e1b
 }
958e1b
 
958e1b
+static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
958e1b
+{
958e1b
+    size_t len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy;
958e1b
+    size_t len_buf_out;
958e1b
+
958e1b
+    /* init buf_out */
958e1b
+    len_buf_out_zlib = len_buf_out_lzo = len_buf_out_snappy = 0;
958e1b
+
958e1b
+    /* buf size for zlib */
958e1b
+    len_buf_out_zlib = compressBound(page_size);
958e1b
+
958e1b
+    /* buf size for lzo */
958e1b
+#ifdef CONFIG_LZO
958e1b
+    if (flag_compress & DUMP_DH_COMPRESSED_LZO) {
958e1b
+        if (lzo_init() != LZO_E_OK) {
958e1b
+            /* return 0 to indicate lzo is unavailable */
958e1b
+            return 0;
958e1b
+        }
958e1b
+    }
958e1b
+
958e1b
+    /*
958e1b
+     * LZO will expand incompressible data by a little amount. please check the
958e1b
+     * following URL to see the expansion calculation:
958e1b
+     * http://www.oberhumer.com/opensource/lzo/lzofaq.php
958e1b
+     */
958e1b
+    len_buf_out_lzo = page_size + page_size / 16 + 64 + 3;
958e1b
+#endif
958e1b
+
958e1b
+#ifdef CONFIG_SNAPPY
958e1b
+    /* buf size for snappy */
958e1b
+    len_buf_out_snappy = snappy_max_compressed_length(page_size);
958e1b
+#endif
958e1b
+
958e1b
+    /* get the biggest that can store all kinds of compressed page */
958e1b
+    len_buf_out = MAX(len_buf_out_zlib,
958e1b
+                      MAX(len_buf_out_lzo, len_buf_out_snappy));
958e1b
+
958e1b
+    return len_buf_out;
958e1b
+}
958e1b
+
958e1b
+/*
958e1b
+ * check if the page is all 0
958e1b
+ */
958e1b
+static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
958e1b
+{
958e1b
+    return buffer_is_zero(buf, page_size);
958e1b
+}
958e1b
+
958e1b
+static int write_dump_pages(DumpState *s)
958e1b
+{
958e1b
+    int ret = 0;
958e1b
+    DataCache page_desc, page_data;
958e1b
+    size_t len_buf_out, size_out;
958e1b
+#ifdef CONFIG_LZO
958e1b
+    lzo_bytep wrkmem = NULL;
958e1b
+#endif
958e1b
+    uint8_t *buf_out = NULL;
958e1b
+    off_t offset_desc, offset_data;
958e1b
+    PageDescriptor pd, pd_zero;
958e1b
+    uint8_t *buf;
958e1b
+    int endian = s->dump_info.d_endian;
958e1b
+    GuestPhysBlock *block_iter = NULL;
958e1b
+    uint64_t pfn_iter;
958e1b
+
958e1b
+    /* get offset of page_desc and page_data in dump file */
958e1b
+    offset_desc = s->offset_page;
958e1b
+    offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
958e1b
+
958e1b
+    prepare_data_cache(&page_desc, s, offset_desc);
958e1b
+    prepare_data_cache(&page_data, s, offset_data);
958e1b
+
958e1b
+    /* prepare buffer to store compressed data */
958e1b
+    len_buf_out = get_len_buf_out(s->page_size, s->flag_compress);
958e1b
+    if (len_buf_out == 0) {
958e1b
+        dump_error(s, "dump: failed to get length of output buffer.\n");
958e1b
+        goto out;
958e1b
+    }
958e1b
+
958e1b
+#ifdef CONFIG_LZO
958e1b
+    wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
958e1b
+#endif
958e1b
+
958e1b
+    buf_out = g_malloc(len_buf_out);
958e1b
+
958e1b
+    /*
958e1b
+     * init zero page's page_desc and page_data, because every zero page
958e1b
+     * uses the same page_data
958e1b
+     */
958e1b
+    pd_zero.size = cpu_convert_to_target32(s->page_size, endian);
958e1b
+    pd_zero.flags = cpu_convert_to_target32(0, endian);
958e1b
+    pd_zero.offset = cpu_convert_to_target64(offset_data, endian);
958e1b
+    pd_zero.page_flags = cpu_convert_to_target64(0, endian);
958e1b
+    buf = g_malloc0(s->page_size);
958e1b
+    ret = write_cache(&page_data, buf, s->page_size, false);
958e1b
+    g_free(buf);
958e1b
+    if (ret < 0) {
958e1b
+        dump_error(s, "dump: failed to write page data(zero page).\n");
958e1b
+        goto out;
958e1b
+    }
958e1b
+
958e1b
+    offset_data += s->page_size;
958e1b
+
958e1b
+    /*
958e1b
+     * dump memory to vmcore page by page. zero page will all be resided in the
958e1b
+     * first page of page section
958e1b
+     */
958e1b
+    while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
958e1b
+        /* check zero page */
958e1b
+        if (is_zero_page(buf, s->page_size)) {
958e1b
+            ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
958e1b
+                              false);
958e1b
+            if (ret < 0) {
958e1b
+                dump_error(s, "dump: failed to write page desc.\n");
958e1b
+                goto out;
958e1b
+            }
958e1b
+        } else {
958e1b
+            /*
958e1b
+             * not zero page, then:
958e1b
+             * 1. compress the page
958e1b
+             * 2. write the compressed page into the cache of page_data
958e1b
+             * 3. get page desc of the compressed page and write it into the
958e1b
+             *    cache of page_desc
958e1b
+             *
958e1b
+             * only one compression format will be used here, for
958e1b
+             * s->flag_compress is set. But when compression fails to work,
958e1b
+             * we fall back to save in plaintext.
958e1b
+             */
958e1b
+             size_out = len_buf_out;
958e1b
+             if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
958e1b
+                    (compress2(buf_out, (uLongf *)&size_out, buf, s->page_size,
958e1b
+                    Z_BEST_SPEED) == Z_OK) && (size_out < s->page_size)) {
958e1b
+                pd.flags = cpu_convert_to_target32(DUMP_DH_COMPRESSED_ZLIB,
958e1b
+                                                   endian);
958e1b
+                pd.size  = cpu_convert_to_target32(size_out, endian);
958e1b
+
958e1b
+                ret = write_cache(&page_data, buf_out, size_out, false);
958e1b
+                if (ret < 0) {
958e1b
+                    dump_error(s, "dump: failed to write page data.\n");
958e1b
+                    goto out;
958e1b
+                }
958e1b
+#ifdef CONFIG_LZO
958e1b
+            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
958e1b
+                    (lzo1x_1_compress(buf, s->page_size, buf_out,
958e1b
+                    (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
958e1b
+                    (size_out < s->page_size)) {
958e1b
+                pd.flags = cpu_convert_to_target32(DUMP_DH_COMPRESSED_LZO,
958e1b
+                                                   endian);
958e1b
+                pd.size  = cpu_convert_to_target32(size_out, endian);
958e1b
+
958e1b
+                ret = write_cache(&page_data, buf_out, size_out, false);
958e1b
+                if (ret < 0) {
958e1b
+                    dump_error(s, "dump: failed to write page data.\n");
958e1b
+                    goto out;
958e1b
+                }
958e1b
+#endif
958e1b
+#ifdef CONFIG_SNAPPY
958e1b
+            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
958e1b
+                    (snappy_compress((char *)buf, s->page_size,
958e1b
+                    (char *)buf_out, &size_out) == SNAPPY_OK) &&
958e1b
+                    (size_out < s->page_size)) {
958e1b
+                pd.flags = cpu_convert_to_target32(
958e1b
+                                        DUMP_DH_COMPRESSED_SNAPPY, endian);
958e1b
+                pd.size  = cpu_convert_to_target32(size_out, endian);
958e1b
+
958e1b
+                ret = write_cache(&page_data, buf_out, size_out, false);
958e1b
+                if (ret < 0) {
958e1b
+                    dump_error(s, "dump: failed to write page data.\n");
958e1b
+                    goto out;
958e1b
+                }
958e1b
+#endif
958e1b
+            } else {
958e1b
+                /*
958e1b
+                 * fall back to save in plaintext, size_out should be
958e1b
+                 * assigned to s->page_size
958e1b
+                 */
958e1b
+                pd.flags = cpu_convert_to_target32(0, endian);
958e1b
+                size_out = s->page_size;
958e1b
+                pd.size = cpu_convert_to_target32(size_out, endian);
958e1b
+
958e1b
+                ret = write_cache(&page_data, buf, s->page_size, false);
958e1b
+                if (ret < 0) {
958e1b
+                    dump_error(s, "dump: failed to write page data.\n");
958e1b
+                    goto out;
958e1b
+                }
958e1b
+            }
958e1b
+
958e1b
+            /* get and write page desc here */
958e1b
+            pd.page_flags = cpu_convert_to_target64(0, endian);
958e1b
+            pd.offset = cpu_convert_to_target64(offset_data, endian);
958e1b
+            offset_data += size_out;
958e1b
+
958e1b
+            ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
958e1b
+            if (ret < 0) {
958e1b
+                dump_error(s, "dump: failed to write page desc.\n");
958e1b
+                goto out;
958e1b
+            }
958e1b
+        }
958e1b
+    }
958e1b
+
958e1b
+    ret = write_cache(&page_desc, NULL, 0, true);
958e1b
+    if (ret < 0) {
958e1b
+        dump_error(s, "dump: failed to sync cache for page_desc.\n");
958e1b
+        goto out;
958e1b
+    }
958e1b
+    ret = write_cache(&page_data, NULL, 0, true);
958e1b
+    if (ret < 0) {
958e1b
+        dump_error(s, "dump: failed to sync cache for page_data.\n");
958e1b
+        goto out;
958e1b
+    }
958e1b
+
958e1b
+out:
958e1b
+    free_data_cache(&page_desc);
958e1b
+    free_data_cache(&page_data);
958e1b
+
958e1b
+#ifdef CONFIG_LZO
958e1b
+    g_free(wrkmem);
958e1b
+#endif
958e1b
+
958e1b
+    g_free(buf_out);
958e1b
+
958e1b
+    return ret;
958e1b
+}
958e1b
+
958e1b
 static ram_addr_t get_start_block(DumpState *s)
958e1b
 {
958e1b
     GuestPhysBlock *block;
958e1b
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
958e1b
index 92a95e4..efab7a3 100644
958e1b
--- a/include/sysemu/dump.h
958e1b
+++ b/include/sysemu/dump.h
958e1b
@@ -151,6 +151,13 @@ typedef struct DataCache {
958e1b
     off_t offset;       /* offset of the file */
958e1b
 } DataCache;
958e1b
 
958e1b
+typedef struct QEMU_PACKED PageDescriptor {
958e1b
+    uint64_t offset;                /* the offset of the page data*/
958e1b
+    uint32_t size;                  /* the size of this dump page */
958e1b
+    uint32_t flags;                 /* flags */
958e1b
+    uint64_t page_flags;            /* page flags */
958e1b
+} PageDescriptor;
958e1b
+
958e1b
 struct GuestPhysBlockList; /* memory_mapping.h */
958e1b
 int cpu_get_dump_info(ArchDumpInfo *info,
958e1b
                       const struct GuestPhysBlockList *guest_phys_blocks);
958e1b
-- 
958e1b
1.8.3.1
958e1b