Blame SOURCES/1107-btrfs-progs-receive-encoded_write-fallback-to-explic.patch

dbfe2d
From 8f5d9e7c4a6b9e923a02c1677854e2792822574a Mon Sep 17 00:00:00 2001
dbfe2d
From: Boris Burkov <boris@bur.io>
dbfe2d
Date: Fri, 21 Aug 2020 00:40:06 -0700
dbfe2d
Subject: [PATCH] btrfs-progs: receive: encoded_write fallback to explicit
dbfe2d
 decode and write
dbfe2d
dbfe2d
An encoded_write can fail if the file system it is being applied to does
dbfe2d
not support encoded writes or if it can't find enough contiguous space
dbfe2d
to accommodate the encoded extent. In those cases, we can likely still
dbfe2d
process an encoded_write by explicitly decoding the data and doing a
dbfe2d
normal write.
dbfe2d
dbfe2d
Add the necessary fallback path for decoding data compressed with zlib,
dbfe2d
lzo, or zstd. zlib and zstd have reusable decoding context data
dbfe2d
structures which we cache in the receive context so that we don't have
dbfe2d
to recreate them on every encoded_write.
dbfe2d
dbfe2d
Finally, add a command line flag for force-decompress which causes
dbfe2d
receive to always use the fallback path rather than first attempting the
dbfe2d
encoded write.
dbfe2d
dbfe2d
Signed-off-by: Boris Burkov <boris@bur.io>
dbfe2d
---
dbfe2d
 Documentation/btrfs-receive.rst |   5 +
dbfe2d
 cmds/receive.c                  | 261 +++++++++++++++++++++++++++++++-
dbfe2d
 2 files changed, 259 insertions(+), 7 deletions(-)
dbfe2d
dbfe2d
diff --git a/Documentation/btrfs-receive.rst b/Documentation/btrfs-receive.rst
dbfe2d
index 86ffdcc6..b9a3cad6 100644
dbfe2d
--- a/Documentation/btrfs-receive.rst
dbfe2d
+++ b/Documentation/btrfs-receive.rst
dbfe2d
@@ -57,6 +57,11 @@ A subvolume is made read-only after the receiving process finishes successfully
dbfe2d
         If */proc* is not accessible, eg. in a chroot environment, use this option to
dbfe2d
         tell us where this filesystem is mounted.
dbfe2d
 
dbfe2d
+--force-decompress
dbfe2d
+        if the stream contains compressed data (see *--compressed-data* in
dbfe2d
+        ``btrfs-send(8)``), always decompress it instead of writing it with
dbfe2d
+        encoded I/O
dbfe2d
+
dbfe2d
 --dump
dbfe2d
         dump the stream metadata, one line per operation
dbfe2d
 
dbfe2d
diff --git a/cmds/receive.c b/cmds/receive.c
dbfe2d
index 8226ca32..5fd939ce 100644
dbfe2d
--- a/cmds/receive.c
dbfe2d
+++ b/cmds/receive.c
dbfe2d
@@ -40,6 +40,10 @@
dbfe2d
 #include <sys/xattr.h>
dbfe2d
 #include <uuid/uuid.h>
dbfe2d
 
dbfe2d
+#include <lzo/lzo1x.h>
dbfe2d
+#include <zlib.h>
dbfe2d
+#include <zstd.h>
dbfe2d
+
dbfe2d
 #include "kernel-shared/ctree.h"
dbfe2d
 #include "ioctl.h"
dbfe2d
 #include "cmds/commands.h"
dbfe2d
@@ -75,6 +79,12 @@ struct btrfs_receive
dbfe2d
 	char cur_subvol_path[PATH_MAX];
dbfe2d
 
dbfe2d
 	int honor_end_cmd;
dbfe2d
+
dbfe2d
+	bool force_decompress;
dbfe2d
+
dbfe2d
+	/* Reuse stream objects for encoded_write decompression fallback */
dbfe2d
+	ZSTD_DStream *zstd_dstream;
dbfe2d
+	z_stream *zlib_stream;
dbfe2d
 };
dbfe2d
 
dbfe2d
 static int finish_subvol(struct btrfs_receive *rctx)
dbfe2d
@@ -985,6 +995,219 @@ static int process_update_extent(const char *path, u64 offset, u64 len,
dbfe2d
 	return 0;
dbfe2d
 }
dbfe2d
 
dbfe2d
+static int decompress_zlib(struct btrfs_receive *rctx, const char *encoded_data,
dbfe2d
+			   u64 encoded_len, char *unencoded_data,
dbfe2d
+			   u64 unencoded_len)
dbfe2d
+{
dbfe2d
+	bool init = false;
dbfe2d
+	int ret;
dbfe2d
+
dbfe2d
+	if (!rctx->zlib_stream) {
dbfe2d
+		init = true;
dbfe2d
+		rctx->zlib_stream = malloc(sizeof(z_stream));
dbfe2d
+		if (!rctx->zlib_stream) {
dbfe2d
+			error("failed to allocate zlib stream %m");
dbfe2d
+			return -ENOMEM;
dbfe2d
+		}
dbfe2d
+	}
dbfe2d
+	rctx->zlib_stream->next_in = (void *)encoded_data;
dbfe2d
+	rctx->zlib_stream->avail_in = encoded_len;
dbfe2d
+	rctx->zlib_stream->next_out = (void *)unencoded_data;
dbfe2d
+	rctx->zlib_stream->avail_out = unencoded_len;
dbfe2d
+
dbfe2d
+	if (init) {
dbfe2d
+		rctx->zlib_stream->zalloc = Z_NULL;
dbfe2d
+		rctx->zlib_stream->zfree = Z_NULL;
dbfe2d
+		rctx->zlib_stream->opaque = Z_NULL;
dbfe2d
+		ret = inflateInit(rctx->zlib_stream);
dbfe2d
+	} else {
dbfe2d
+		ret = inflateReset(rctx->zlib_stream);
dbfe2d
+	}
dbfe2d
+	if (ret != Z_OK) {
dbfe2d
+		error("zlib inflate init failed: %d", ret);
dbfe2d
+		return -EIO;
dbfe2d
+	}
dbfe2d
+
dbfe2d
+	while (rctx->zlib_stream->avail_in > 0 &&
dbfe2d
+	       rctx->zlib_stream->avail_out > 0) {
dbfe2d
+		ret = inflate(rctx->zlib_stream, Z_FINISH);
dbfe2d
+		if (ret == Z_STREAM_END) {
dbfe2d
+			break;
dbfe2d
+		} else if (ret != Z_OK) {
dbfe2d
+			error("zlib inflate failed: %d", ret);
dbfe2d
+			return -EIO;
dbfe2d
+		}
dbfe2d
+	}
dbfe2d
+	return 0;
dbfe2d
+}
dbfe2d
+
dbfe2d
+static int decompress_zstd(struct btrfs_receive *rctx, const char *encoded_buf,
dbfe2d
+			   u64 encoded_len, char *unencoded_buf,
dbfe2d
+			   u64 unencoded_len)
dbfe2d
+{
dbfe2d
+	ZSTD_inBuffer in_buf = {
dbfe2d
+		.src = encoded_buf,
dbfe2d
+		.size = encoded_len
dbfe2d
+	};
dbfe2d
+	ZSTD_outBuffer out_buf = {
dbfe2d
+		.dst = unencoded_buf,
dbfe2d
+		.size = unencoded_len
dbfe2d
+	};
dbfe2d
+	size_t ret;
dbfe2d
+
dbfe2d
+	if (!rctx->zstd_dstream) {
dbfe2d
+		rctx->zstd_dstream = ZSTD_createDStream();
dbfe2d
+		if (!rctx->zstd_dstream) {
dbfe2d
+			error("failed to create zstd dstream");
dbfe2d
+			return -ENOMEM;
dbfe2d
+		}
dbfe2d
+	}
dbfe2d
+	ret = ZSTD_initDStream(rctx->zstd_dstream);
dbfe2d
+	if (ZSTD_isError(ret)) {
dbfe2d
+		error("failed to init zstd stream: %s", ZSTD_getErrorName(ret));
dbfe2d
+		return -EIO;
dbfe2d
+	}
dbfe2d
+	while (in_buf.pos < in_buf.size && out_buf.pos < out_buf.size) {
dbfe2d
+		ret = ZSTD_decompressStream(rctx->zstd_dstream, &out_buf, &in_buf);
dbfe2d
+		if (ret == 0) {
dbfe2d
+			break;
dbfe2d
+		} else if (ZSTD_isError(ret)) {
dbfe2d
+			error("failed to decompress zstd stream: %s",
dbfe2d
+			      ZSTD_getErrorName(ret));
dbfe2d
+			return -EIO;
dbfe2d
+		}
dbfe2d
+	}
dbfe2d
+	return 0;
dbfe2d
+}
dbfe2d
+
dbfe2d
+static int decompress_lzo(const char *encoded_data, u64 encoded_len,
dbfe2d
+			  char *unencoded_data, u64 unencoded_len,
dbfe2d
+			  unsigned int sector_size)
dbfe2d
+{
dbfe2d
+	uint32_t total_len;
dbfe2d
+	size_t in_pos, out_pos;
dbfe2d
+
dbfe2d
+	if (encoded_len < 4) {
dbfe2d
+		error("lzo header is truncated");
dbfe2d
+		return -EIO;
dbfe2d
+	}
dbfe2d
+	memcpy(&total_len, encoded_data, 4);
dbfe2d
+	total_len = le32toh(total_len);
dbfe2d
+	if (total_len > encoded_len) {
dbfe2d
+		error("lzo header is invalid");
dbfe2d
+		return -EIO;
dbfe2d
+	}
dbfe2d
+
dbfe2d
+	in_pos = 4;
dbfe2d
+	out_pos = 0;
dbfe2d
+	while (in_pos < total_len && out_pos < unencoded_len) {
dbfe2d
+		size_t sector_remaining;
dbfe2d
+		uint32_t src_len;
dbfe2d
+		lzo_uint dst_len;
dbfe2d
+		int ret;
dbfe2d
+
dbfe2d
+		sector_remaining = -in_pos % sector_size;
dbfe2d
+		if (sector_remaining < 4) {
dbfe2d
+			if (total_len - in_pos <= sector_remaining)
dbfe2d
+				break;
dbfe2d
+			in_pos += sector_remaining;
dbfe2d
+		}
dbfe2d
+
dbfe2d
+		if (total_len - in_pos < 4) {
dbfe2d
+			error("lzo segment header is truncated");
dbfe2d
+			return -EIO;
dbfe2d
+		}
dbfe2d
+
dbfe2d
+		memcpy(&src_len, encoded_data + in_pos, 4);
dbfe2d
+		src_len = le32toh(src_len);
dbfe2d
+		in_pos += 4;
dbfe2d
+		if (src_len > total_len - in_pos) {
dbfe2d
+			error("lzo segment header is invalid");
dbfe2d
+			return -EIO;
dbfe2d
+		}
dbfe2d
+
dbfe2d
+		dst_len = sector_size;
dbfe2d
+		ret = lzo1x_decompress_safe((void *)(encoded_data + in_pos),
dbfe2d
+					    src_len,
dbfe2d
+					    (void *)(unencoded_data + out_pos),
dbfe2d
+					    &dst_len, NULL);
dbfe2d
+		if (ret != LZO_E_OK) {
dbfe2d
+			error("lzo1x_decompress_safe failed: %d", ret);
dbfe2d
+			return -EIO;
dbfe2d
+		}
dbfe2d
+
dbfe2d
+		in_pos += src_len;
dbfe2d
+		out_pos += dst_len;
dbfe2d
+	}
dbfe2d
+	return 0;
dbfe2d
+}
dbfe2d
+
dbfe2d
+static int decompress_and_write(struct btrfs_receive *rctx,
dbfe2d
+				const char *encoded_data, u64 offset,
dbfe2d
+				u64 encoded_len, u64 unencoded_file_len,
dbfe2d
+				u64 unencoded_len, u64 unencoded_offset,
dbfe2d
+				u32 compression)
dbfe2d
+{
dbfe2d
+	int ret = 0;
dbfe2d
+	size_t pos;
dbfe2d
+	ssize_t w;
dbfe2d
+	char *unencoded_data;
dbfe2d
+	int sector_shift;
dbfe2d
+
dbfe2d
+	unencoded_data = calloc(unencoded_len, 1);
dbfe2d
+	if (!unencoded_data) {
dbfe2d
+		error("allocating space for unencoded data failed: %m");
dbfe2d
+		return -errno;
dbfe2d
+	}
dbfe2d
+
dbfe2d
+	switch (compression) {
dbfe2d
+	case BTRFS_ENCODED_IO_COMPRESSION_ZLIB:
dbfe2d
+		ret = decompress_zlib(rctx, encoded_data, encoded_len,
dbfe2d
+				      unencoded_data, unencoded_len);
dbfe2d
+		if (ret)
dbfe2d
+			goto out;
dbfe2d
+		break;
dbfe2d
+	case BTRFS_ENCODED_IO_COMPRESSION_ZSTD:
dbfe2d
+		ret = decompress_zstd(rctx, encoded_data, encoded_len,
dbfe2d
+				      unencoded_data, unencoded_len);
dbfe2d
+		if (ret)
dbfe2d
+			goto out;
dbfe2d
+		break;
dbfe2d
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_4K:
dbfe2d
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_8K:
dbfe2d
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_16K:
dbfe2d
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_32K:
dbfe2d
+	case BTRFS_ENCODED_IO_COMPRESSION_LZO_64K:
dbfe2d
+		sector_shift =
dbfe2d
+			compression - BTRFS_ENCODED_IO_COMPRESSION_LZO_4K + 12;
dbfe2d
+		ret = decompress_lzo(encoded_data, encoded_len, unencoded_data,
dbfe2d
+				     unencoded_len, 1U << sector_shift);
dbfe2d
+		if (ret)
dbfe2d
+			goto out;
dbfe2d
+		break;
dbfe2d
+	default:
dbfe2d
+		error("unknown compression: %d", compression);
dbfe2d
+		ret = -EOPNOTSUPP;
dbfe2d
+		goto out;
dbfe2d
+	}
dbfe2d
+
dbfe2d
+	pos = unencoded_offset;
dbfe2d
+	while (pos < unencoded_file_len) {
dbfe2d
+		w = pwrite(rctx->write_fd, unencoded_data + pos,
dbfe2d
+			   unencoded_file_len - pos, offset);
dbfe2d
+		if (w < 0) {
dbfe2d
+			ret = -errno;
dbfe2d
+			error("writing unencoded data failed: %m");
dbfe2d
+			goto out;
dbfe2d
+		}
dbfe2d
+		pos += w;
dbfe2d
+		offset += w;
dbfe2d
+	}
dbfe2d
+out:
dbfe2d
+	free(unencoded_data);
dbfe2d
+	return ret;
dbfe2d
+}
dbfe2d
+
dbfe2d
 static int process_encoded_write(const char *path, const void *data, u64 offset,
dbfe2d
 				 u64 len, u64 unencoded_file_len,
dbfe2d
 				 u64 unencoded_len, u64 unencoded_offset,
dbfe2d
@@ -1020,13 +1243,21 @@ static int process_encoded_write(const char *path, const void *data, u64 offset,
dbfe2d
 	if (ret < 0)
dbfe2d
 		return ret;
dbfe2d
 
dbfe2d
-	ret = ioctl(rctx->write_fd, BTRFS_IOC_ENCODED_WRITE, &encoded);
dbfe2d
-	if (ret < 0) {
dbfe2d
-		ret = -errno;
dbfe2d
-		error("encoded_write: writing to %s failed: %m", path);
dbfe2d
-		return ret;
dbfe2d
+	if (!rctx->force_decompress) {
dbfe2d
+		ret = ioctl(rctx->write_fd, BTRFS_IOC_ENCODED_WRITE, &encoded);
dbfe2d
+		if (ret >= 0)
dbfe2d
+			return 0;
dbfe2d
+		/* Fall back for these errors, fail hard for anything else. */
dbfe2d
+		if (errno != ENOSPC && errno != ENOTTY && errno != EINVAL) {
dbfe2d
+			ret = -errno;
dbfe2d
+			error("encoded_write: writing to %s failed: %m", path);
dbfe2d
+			return ret;
dbfe2d
+		}
dbfe2d
 	}
dbfe2d
-	return 0;
dbfe2d
+
dbfe2d
+	return decompress_and_write(rctx, data, offset, len, unencoded_file_len,
dbfe2d
+				    unencoded_len, unencoded_offset,
dbfe2d
+				    compression);
dbfe2d
 }
dbfe2d
 
dbfe2d
 static struct btrfs_send_ops send_ops = {
dbfe2d
@@ -1204,6 +1435,12 @@ out:
dbfe2d
 		close(rctx->dest_dir_fd);
dbfe2d
 		rctx->dest_dir_fd = -1;
dbfe2d
 	}
dbfe2d
+	if (rctx->zstd_dstream)
dbfe2d
+		ZSTD_freeDStream(rctx->zstd_dstream);
dbfe2d
+	if (rctx->zlib_stream) {
dbfe2d
+		inflateEnd(rctx->zlib_stream);
dbfe2d
+		free(rctx->zlib_stream);
dbfe2d
+	}
dbfe2d
 
dbfe2d
 	return ret;
dbfe2d
 }
dbfe2d
@@ -1234,6 +1471,9 @@ static const char * const cmd_receive_usage[] = {
dbfe2d
 	"-m ROOTMOUNT     the root mount point of the destination filesystem.",
dbfe2d
 	"                 If /proc is not accessible, use this to tell us where",
dbfe2d
 	"                 this file system is mounted.",
dbfe2d
+	"--force-decompress",
dbfe2d
+	"                 if the stream contains compressed data, always",
dbfe2d
+	"                 decompress it instead of writing it with encoded I/O",
dbfe2d
 	"--dump           dump stream metadata, one line per operation,",
dbfe2d
 	"                 does not require the MOUNT parameter",
dbfe2d
 	"-v               deprecated, alias for global -v option",
dbfe2d
@@ -1277,12 +1517,16 @@ static int cmd_receive(const struct cmd_struct *cmd, int argc, char **argv)
dbfe2d
 	optind = 0;
dbfe2d
 	while (1) {
dbfe2d
 		int c;
dbfe2d
-		enum { GETOPT_VAL_DUMP = 257 };
dbfe2d
+		enum {
dbfe2d
+			GETOPT_VAL_DUMP = 257,
dbfe2d
+			GETOPT_VAL_FORCE_DECOMPRESS,
dbfe2d
+		};
dbfe2d
 		static const struct option long_opts[] = {
dbfe2d
 			{ "max-errors", required_argument, NULL, 'E' },
dbfe2d
 			{ "chroot", no_argument, NULL, 'C' },
dbfe2d
 			{ "dump", no_argument, NULL, GETOPT_VAL_DUMP },
dbfe2d
 			{ "quiet", no_argument, NULL, 'q' },
dbfe2d
+			{ "force-decompress", no_argument, NULL, GETOPT_VAL_FORCE_DECOMPRESS },
dbfe2d
 			{ NULL, 0, NULL, 0 }
dbfe2d
 		};
dbfe2d
 
dbfe2d
@@ -1325,6 +1569,9 @@ static int cmd_receive(const struct cmd_struct *cmd, int argc, char **argv)
dbfe2d
 		case GETOPT_VAL_DUMP:
dbfe2d
 			dump = 1;
dbfe2d
 			break;
dbfe2d
+		case GETOPT_VAL_FORCE_DECOMPRESS:
dbfe2d
+			rctx.force_decompress = true;
dbfe2d
+			break;
dbfe2d
 		default:
dbfe2d
 			usage_unknown_option(cmd, argv);
dbfe2d
 		}
dbfe2d
-- 
dbfe2d
2.35.1
dbfe2d