linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Omar Sandoval <osandov@osandov.com>
To: linux-btrfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Subject: [PATCH 07/11] btrfs-progs: receive: encoded_write fallback to explicit decode and write
Date: Fri, 21 Aug 2020 00:40:06 -0700	[thread overview]
Message-ID: <8e4bf8b613ad4504376e9f00ff87ee2e51222a23.1597994354.git.osandov@osandov.com> (raw)
In-Reply-To: <cover.1597994106.git.osandov@osandov.com>

From: Boris Burkov <boris@bur.io>

An encoded_write can fail if the file system it is being applied to does
not support encoded writes or if it can't find enough contiguous space
to accommodate the encoded extent. In those cases, we can likely still
process an encoded_write by explicitly decoding the data and doing a
normal write.

Add the necessary fallback path for decoding data compressed with zlib,
lzo, or zstd. zlib and zstd have reusable decoding context data
structures which we cache in the receive context so that we don't have
to recreate them on every encoded_write.

Finally, add a command line flag for force-decompress which causes
receive to always use the fallback path rather than first attempting the
encoded write.

Signed-off-by: Boris Burkov <boris@bur.io>
---
 cmds/receive.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 262 insertions(+), 11 deletions(-)

diff --git a/cmds/receive.c b/cmds/receive.c
index cd0f47ec..c67d4653 100644
--- a/cmds/receive.c
+++ b/cmds/receive.c
@@ -41,6 +41,10 @@
 #include <sys/xattr.h>
 #include <uuid/uuid.h>
 
+#include <lzo/lzo1x.h>
+#include <zlib.h>
+#include <zstd.h>
+
 #include "kernel-shared/ctree.h"
 #include "ioctl.h"
 #include "cmds/commands.h"
@@ -82,6 +86,8 @@ struct btrfs_receive
 
 	int honor_end_cmd;
 
+	int force_decompress;
+
 	/*
 	 * Buffer to store capabilities from security.capabilities xattr,
 	 * usually 20 bytes, but make same room for potentially larger
@@ -89,6 +95,10 @@ struct btrfs_receive
 	 */
 	char cached_capabilities[64];
 	int cached_capabilities_len;
+
+	/* Reuse stream objects for encoded_write decompression fallback */
+	ZSTD_DStream *zstd_dstream;
+	z_stream *zlib_stream;
 };
 
 static int finish_subvol(struct btrfs_receive *rctx)
@@ -1073,9 +1083,210 @@ static int process_update_extent(const char *path, u64 offset, u64 len,
 	return 0;
 }
 
+static int decompress_zlib(struct btrfs_receive *rctx, const void *encoded_data,
+			   u64 encoded_len, char *unencoded_data,
+			   u64 unencoded_len)
+{
+	int status = 0;
+	bool init = false;
+	int ret;
+
+	if (!rctx->zlib_stream) {
+		init = true;
+		rctx->zlib_stream = malloc(sizeof(z_stream));
+		if (!rctx->zlib_stream) {
+			error("failed to allocate zlib stream %m");
+			status = -ENOMEM;
+			goto out;
+		}
+	}
+	rctx->zlib_stream->next_in = (void *)encoded_data;
+	rctx->zlib_stream->avail_in = encoded_len;
+	rctx->zlib_stream->next_out = (void *)unencoded_data;
+	rctx->zlib_stream->avail_out = unencoded_len;
+
+	if (!init)
+		ret = inflateReset(rctx->zlib_stream);
+	else {
+		rctx->zlib_stream->zalloc = Z_NULL;
+		rctx->zlib_stream->zfree = Z_NULL;
+		rctx->zlib_stream->opaque = Z_NULL;
+		ret = inflateInit(rctx->zlib_stream);
+	}
+	if (ret != Z_OK) {
+		error("zlib inflate init failed %d", ret);
+		status = -EIO;
+		goto out;
+	}
+
+	while (rctx->zlib_stream->avail_in > 0 &&
+	       rctx->zlib_stream->avail_out > 0) {
+		ret = inflate(rctx->zlib_stream, Z_FINISH);
+		if (ret == Z_STREAM_END) {
+			break;
+		} else if (ret != Z_OK) {
+			error("zlib inflate failed %d", ret);
+			status = -EIO;
+			break;
+		}
+	}
+out:
+	return status;
+}
+
+static int decompress_lzo(const void *encoded_data, u64 encoded_len,
+			  char *unencoded_data, u64 unencoded_len)
+{
+	uint32_t total_len;
+	size_t in_pos, out_pos;
+
+	if (encoded_len < 4) {
+		error("lzo header is truncated");
+		return -EIO;
+	}
+	memcpy(&total_len, encoded_data, 4);
+	total_len = le32toh(total_len);
+	if (total_len > encoded_len) {
+		error("lzo header is invalid");
+		return -EIO;
+	}
+
+	in_pos = 4;
+	out_pos = 0;
+	while (in_pos < total_len && out_pos < unencoded_len) {
+		uint32_t src_len;
+		lzo_uint dst_len = unencoded_len - out_pos;
+		int ret;
+
+		if (total_len - in_pos < 4) {
+			error("lzo segment header is truncated");
+			return -EIO;
+		}
+		memcpy(&src_len, encoded_data + in_pos, 4);
+		src_len = le32toh(src_len);
+		in_pos += 4;
+		if (src_len > total_len - in_pos) {
+			error("lzo segment header is invalid\n");
+			return -EIO;
+		}
+
+		ret = lzo1x_decompress_safe((void *)(encoded_data + in_pos),
+			src_len, (void *)(unencoded_data + out_pos), &dst_len,
+			NULL);
+		if (ret != LZO_E_OK) {
+			error("lzo1x_decompress_safe failed: %d", ret);
+			return -EIO;
+		}
+
+		in_pos += src_len;
+		out_pos += dst_len;
+	}
+	return 0;
+}
+
+static int decompress_zstd(struct btrfs_receive *rctx, const void *encoded_buf,
+			   u64 encoded_len, char *unencoded_buf,
+			   u64 unencoded_len)
+{
+	ZSTD_inBuffer in_buf = {
+		.src = encoded_buf,
+		.size = encoded_len
+	};
+	ZSTD_outBuffer out_buf = {
+		.dst = unencoded_buf,
+		.size = unencoded_len
+	};
+	int status = 0;
+	size_t ret;
+
+	if (!rctx->zstd_dstream) {
+		rctx->zstd_dstream = ZSTD_createDStream();
+		if (!rctx->zstd_dstream) {
+			error("failed to create zstd dstream");
+			status = -ENOMEM;
+			goto out;
+		}
+	}
+	ret = ZSTD_initDStream(rctx->zstd_dstream);
+	if (ZSTD_isError(ret)) {
+		error("failed to init zstd stream %s", ZSTD_getErrorName(ret));
+		status = -EIO;
+		goto out;
+	}
+	while (in_buf.pos < in_buf.size && out_buf.pos < out_buf.size) {
+		ret = ZSTD_decompressStream(rctx->zstd_dstream, &out_buf, &in_buf);
+		if (ret == 0) {
+			break;
+		} else if (ZSTD_isError(ret)) {
+			error("failed to decompress zstd stream: %s",
+			      ZSTD_getErrorName(ret));
+			status = -EIO;
+			goto out;
+		}
+	}
+
+out:
+	return status;
+}
+
+static int decompress_and_write(const void *encoded_data, u64 encoded_len,
+				u64 unencoded_file_len, u64 unencoded_len,
+				u64 unencoded_offset, u32 compression,
+				void *user)
+{
+	int ret = 0;
+	size_t pos;
+	ssize_t w;
+	struct btrfs_receive *rctx = user;
+	char *unencoded_data;
+
+	unencoded_data = calloc(unencoded_len, sizeof(*unencoded_data));
+	if (!unencoded_data) {
+		error("allocating space for unencoded data failed: %m");
+		return -errno;
+	}
+
+	switch (compression) {
+	case ENCODED_IOV_COMPRESSION_ZLIB:
+		ret = decompress_zlib(rctx, encoded_data, encoded_len,
+				  unencoded_data, unencoded_len);
+		if (ret)
+			goto out;
+		break;
+	case ENCODED_IOV_COMPRESSION_LZO:
+		ret = decompress_lzo(encoded_data, encoded_len,
+				 unencoded_data, unencoded_len);
+		if (ret)
+			goto out;
+		break;
+	case ENCODED_IOV_COMPRESSION_ZSTD:
+		ret = decompress_zstd(rctx, encoded_data, encoded_len,
+				  unencoded_data, unencoded_len);
+		if (ret)
+			goto out;
+		break;
+	}
+
+	pos = unencoded_offset;
+	while (pos < unencoded_file_len) {
+		w = pwrite(rctx->write_fd, unencoded_data + pos,
+			   unencoded_file_len - pos, unencoded_offset + pos);
+		if (w < 0) {
+			ret = -errno;
+			error("writing unencoded data failed: %m");
+			goto out;
+		}
+		pos += w;
+	}
+out:
+	free(unencoded_data);
+	return ret;
+}
+
 static int process_encoded_write(const char *path, const void *data, u64 offset,
-	u64 len, u64 unencoded_file_len, u64 unencoded_len,
-	u64 unencoded_offset, u32 compression, u32 encryption, void *user)
+				 u64 len, u64 unencoded_file_len,
+				 u64 unencoded_len, u64 unencoded_offset,
+				 u32 compression, u32 encryption, void *user)
 {
 	int ret;
 	struct btrfs_receive *rctx = user;
@@ -1091,6 +1302,14 @@ static int process_encoded_write(const char *path, const void *data, u64 offset,
 		{ &encoded, sizeof(encoded) },
 		{ (char *)data, len }
 	};
+	bool encoded_write = !rctx->force_decompress;
+	bool decompress = rctx->force_decompress;
+
+	if (encryption) {
+		error("encoded_write: encryption not supported\n");
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	ret = path_cat_out(full_path, rctx->full_subvol_path, path);
 	if (ret < 0) {
@@ -1102,15 +1321,37 @@ static int process_encoded_write(const char *path, const void *data, u64 offset,
 	if (ret < 0)
 		goto out;
 
-	/*
-	 * NOTE: encoded writes guarantee no partial writes,
-	 * so we don't need to handle that possibility.
-	 */
-	ret = pwritev2(rctx->write_fd, iov, 2, offset, RWF_ENCODED);
-	if (ret < 0) {
-		ret = -errno;
-		error("encoded_write: writing to %s failed: %m", path);
+	if (encoded_write) {
+		/*
+		 * NOTE: encoded writes guarantee no partial writes,
+		 * so we don't need to handle that possibility.
+		 */
+		ret = pwritev2(rctx->write_fd, iov, 2, offset, RWF_ENCODED);
+		if (ret < 0) {
+			/*
+			 * error conditions where fallback to manual decompress
+			 * and write make sense.
+			 */
+			if (errno == ENOSPC ||
+			    errno == EOPNOTSUPP ||
+			    errno == EINVAL)
+				decompress = true;
+			else {
+				ret = -errno;
+				error("encoded_write: writing to %s failed: %m", path);
+				goto out;
+			}
+		}
 	}
+
+	if (decompress) {
+		ret = decompress_and_write(data, len, unencoded_file_len,
+				unencoded_len, unencoded_offset,
+				compression, user);
+		if (ret < 0)
+			goto out;
+	}
+	ret = 0;
 out:
 	return ret;
 }
@@ -1300,6 +1541,12 @@ out:
 		close(rctx->dest_dir_fd);
 		rctx->dest_dir_fd = -1;
 	}
+	if (rctx->zstd_dstream)
+		ZSTD_freeDStream(rctx->zstd_dstream);
+	if (rctx->zlib_stream) {
+		inflateEnd(rctx->zlib_stream);
+		free(rctx->zlib_stream);
+	}
 
 	return ret;
 }
@@ -1373,12 +1620,13 @@ static int cmd_receive(const struct cmd_struct *cmd, int argc, char **argv)
 	optind = 0;
 	while (1) {
 		int c;
-		enum { GETOPT_VAL_DUMP = 257 };
+		enum { GETOPT_VAL_DUMP = 257, GETOPT_VAL_FORCE_DECOMPRESS };
 		static const struct option long_opts[] = {
 			{ "max-errors", required_argument, NULL, 'E' },
 			{ "chroot", no_argument, NULL, 'C' },
 			{ "dump", no_argument, NULL, GETOPT_VAL_DUMP },
 			{ "quiet", no_argument, NULL, 'q' },
+			{ "force-decompress", no_argument, NULL, GETOPT_VAL_FORCE_DECOMPRESS },
 			{ NULL, 0, NULL, 0 }
 		};
 
@@ -1421,6 +1669,9 @@ static int cmd_receive(const struct cmd_struct *cmd, int argc, char **argv)
 		case GETOPT_VAL_DUMP:
 			dump = 1;
 			break;
+		case GETOPT_VAL_FORCE_DECOMPRESS:
+			rctx.force_decompress = 1;
+			break;
 		default:
 			usage_unknown_option(cmd, argv);
 		}
-- 
2.28.0


  parent reply	other threads:[~2020-08-21  7:41 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-21  7:39 [PATCH 0/9] btrfs: implement send/receive of compressed extents without decompressing Omar Sandoval
2020-08-21  7:39 ` [PATCH 1/9] btrfs: send: get rid of i_size logic in send_write() Omar Sandoval
2020-08-21 17:26   ` Filipe Manana
2020-08-24 17:39   ` Josef Bacik
2020-08-21  7:39 ` [PATCH 2/9] btrfs: send: avoid copying file data Omar Sandoval
2020-08-21 17:29   ` Filipe Manana
2020-08-24 21:34     ` Omar Sandoval
2020-08-24 17:47   ` Josef Bacik
2020-09-11 14:13   ` David Sterba
2020-09-14 22:04     ` Omar Sandoval
2020-09-15  8:14       ` David Sterba
2020-08-21  7:39 ` [PATCH 3/9] btrfs: send: use btrfs_file_extent_end() in send_write_or_clone() Omar Sandoval
2020-08-21 17:30   ` Filipe Manana
2020-08-21  7:39 ` [PATCH 4/9] btrfs: add send_stream_version attribute to sysfs Omar Sandoval
2020-08-21  7:39 ` [PATCH 5/9] btrfs: add send stream v2 definitions Omar Sandoval
2020-08-24 17:49   ` Josef Bacik
2020-08-21  7:39 ` [PATCH 6/9] btrfs: send: write larger chunks when using stream v2 Omar Sandoval
2020-08-24 17:57   ` Josef Bacik
2020-08-21  7:39 ` [PATCH 7/9] btrfs: send: allocate send buffer with alloc_page() and vmap() for v2 Omar Sandoval
2020-08-21  7:39 ` [PATCH 8/9] btrfs: send: send compressed extents with encoded writes Omar Sandoval
2020-08-24 17:32   ` Josef Bacik
2020-08-24 17:52     ` Omar Sandoval
2020-08-21  7:39 ` [PATCH 9/9] btrfs: send: enable support for stream v2 and compressed writes Omar Sandoval
2020-08-21  7:40 ` [PATCH 01/11] btrfs-progs: receive: support v2 send stream larger tlv_len Omar Sandoval
2020-08-21  7:40 ` [PATCH 02/11] btrfs-progs: receive: dynamically allocate sctx->read_buf Omar Sandoval
2020-08-21  7:40 ` [PATCH 03/11] btrfs-progs: receive: support v2 send stream DATA tlv format Omar Sandoval
2020-08-21  7:40 ` [PATCH 04/11] btrfs-progs: receive: add send stream v2 cmds and attrs to send.h Omar Sandoval
2020-08-21  7:40 ` [PATCH 05/11] btrfs-progs: receive: add stub implementation for pwritev2 Omar Sandoval
2020-08-21  7:40 ` [PATCH 06/11] btrfs-progs: receive: process encoded_write commands Omar Sandoval
2020-08-21  7:40 ` Omar Sandoval [this message]
2020-08-21  7:40 ` [PATCH 08/11] btrfs-progs: receive: process fallocate commands Omar Sandoval
2020-08-21  7:40 ` [PATCH 09/11] btrfs-progs: receive: process setflags ioctl commands Omar Sandoval
2020-08-21  7:40 ` [PATCH 10/11] btrfs-progs: send: stream v2 ioctl flags Omar Sandoval
2020-08-21  7:40 ` [PATCH 11/11] btrfs-progs: receive: add tests for basic encoded_write send/receive Omar Sandoval
2020-08-24 19:57 ` [PATCH 0/9] btrfs: implement send/receive of compressed extents without decompressing David Sterba
2020-08-24 22:16   ` Omar Sandoval
2020-09-10 11:28 ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8e4bf8b613ad4504376e9f00ff87ee2e51222a23.1597994354.git.osandov@osandov.com \
    --to=osandov@osandov.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).