[PREVIEW] Add x86 zstd kernel compression
diff mbox series

Message ID 20171005192248.640495-1-terrelln@fb.com
State New, archived
Headers show
Series
  • [PREVIEW] Add x86 zstd kernel compression
Related show

Commit Message

Nick Terrell Oct. 5, 2017, 7:22 p.m. UTC
On 10/5/17, 1:18 AM, "René Rebe" <rene@exactcode.com> wrote:
>
> Hi Nick,
>
> On Oct 5, 2017, at 1:29, Nick Terrell <terrelln@fb.com> wrote:
>
>> On 10/4/17, 3:01 AM, "linux-kernel-owner@vger.kernel.org on behalf of René Rebe" <linux-kernel-owner@vger.kernel.org on behalf of rene@exactcode.com> wrote:
>>> Hi,
>>>
>>> I noticed zstd compression was recently added for btrfs and squashfs.
>>> Are there actually already patches floating around for zstd kernel and intird compression?
>>> Looks like that would be a quite nice fit regarding speed and compression ratio, …
>>>
>>> Regards,
>>> 	René
>>
>> I started working on some patches yesterday, and just got zstd kernel,
>> initrd, and initramfs compression working today. I think I'll be ready to
>> send the patches out within a week.
>
> awesome, if you have something to test I could give it a try on my test bench, too.

Here is a preview of the patch, though its not quite ready.
I believe it works, but I still need to clean it up a bit, and split it
into the decompression function, the initrd piece, and the x86 specific
piece. I'll try to get the patch set out this week, and I'll CC you.

> Regards,
> 	René

Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 arch/x86/Kconfig                  |   1 +
 arch/x86/boot/compressed/Makefile |   5 +-
 arch/x86/boot/compressed/misc.c   |   4 +
 arch/x86/boot/header.S            |   8 +-
 arch/x86/include/asm/boot.h       |   6 +-
 include/linux/decompress/mm.h     |   2 +
 include/linux/decompress/unzstd.h |  24 +++
 init/Kconfig                      |  11 +-
 lib/Kconfig                       |   6 +-
 lib/Makefile                      |   1 +
 lib/decompress.c                  |   5 +
 lib/decompress_unzstd.c           | 328 ++++++++++++++++++++++++++++++++++++++
 lib/xxhash.c                      |  21 +--
 lib/zstd/decompress.c             |   2 +
 lib/zstd/fse_decompress.c         |   9 +-
 scripts/Makefile.lib              |  16 ++
 usr/Kconfig                       |  16 ++
 17 files changed, 442 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/decompress/unzstd.h
 create mode 100644 lib/decompress_unzstd.c

--
2.9.5

Comments

Rene Rebe Oct. 5, 2017, 9:15 p.m. UTC | #1
Hi,

On 10/05/2017 09:22 PM, Nick Terrell wrote:
> On 10/5/17, 1:18 AM, "René Rebe" <rene@exactcode.com> wrote:
>>
>> Hi Nick,
>>
>> On Oct 5, 2017, at 1:29, Nick Terrell <terrelln@fb.com> wrote:
>>
>>> On 10/4/17, 3:01 AM, "linux-kernel-owner@vger.kernel.org on behalf of René Rebe" <linux-kernel-owner@vger.kernel.org on behalf of rene@exactcode.com> wrote:
>>>> Hi,
>>>>
>>>> I noticed zstd compression was recently added for btrfs and squashfs.
>>>> Are there actually already patches floating around for zstd kernel and intird compression?
>>>> Looks like that would be a quite nice fit regarding speed and compression ratio, …
>>>>
>>>> Regards,
>>>> 	René
>>>
>>> I started working on some patches yesterday, and just got zstd kernel,
>>> initrd, and initramfs compression working today. I think I'll be ready to
>>> send the patches out within a week.
>>
>> awesome, if you have something to test I could give it a try on my test bench, too.
> 
> Here is a preview of the patch, though its not quite ready.
> I believe it works, but I still need to clean it up a bit, and split it
> into the decompression function, the initrd piece, and the x86 specific
> piece. I'll try to get the patch set out this week, and I'll CC you.

thanks for the preview, which I applied cleanly, modulo this white-space 
chunk:

diff --git a/lib/Kconfig b/lib/Kconfig
index b1445b2..02fdcac 100644
@@ -296,7 +300,7 @@ config GENERIC_ALLOCATOR
   #
   config REED_SOLOMON
   	tristate
-
+
   config REED_SOLOMON_ENC8
   	bool
  to the 4.14-rc2 tarball and it compiled and booted on x86-64,
including a zstd initrd!

Great work, will probably start testing it on my personal
laptop, to use the same compression everywhere.

Regards,
	René

Patch
diff mbox series

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 971feac..e34e3e5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -150,6 +150,7 @@  config X86
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_XZ
+	select HAVE_KERNEL_ZSTD
 	select HAVE_KPROBES
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_KRETPROBES
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 8a95827..118a423 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -23,7 +23,7 @@  OBJECT_FILES_NON_STANDARD	:= y
 KCOV_INSTRUMENT		:= n

 targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
-	vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
+	vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst

 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ -O2
 KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
@@ -133,6 +133,8 @@  $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzo)
 $(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lz4)
+$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
+	$(call if_changed,zstd)

 suffix-$(CONFIG_KERNEL_GZIP)	:= gz
 suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
@@ -140,6 +142,7 @@  suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
 suffix-$(CONFIG_KERNEL_XZ)	:= xz
 suffix-$(CONFIG_KERNEL_LZO) 	:= lzo
 suffix-$(CONFIG_KERNEL_LZ4) 	:= lz4
+suffix-$(CONFIG_KERNEL_ZSTD)	:= zst

 quiet_cmd_mkpiggy = MKPIGGY $@
       cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c14217c..8140895 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -74,6 +74,10 @@  static int lines, cols;
 #ifdef CONFIG_KERNEL_LZ4
 #include "../../../../lib/decompress_unlz4.c"
 #endif
+
+#ifdef CONFIG_KERNEL_ZSTD
+#include "../../../../lib/decompress_unzstd.c"
+#endif
 /*
  * NOTE: When adding a new decompressor, please update the analysis in
  * ../header.S.
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 1bb08ec..11b70e1 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -526,8 +526,14 @@  pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
 # the size-dependent part now grows so fast.
 #
 # extra_bytes = (uncompressed_size >> 8) + 65536
+#
+# ZSTD compressed data grows by at most 3 bytes per 128K, and only has a 22
+# byte fixed overhead but has a maximum block size of 128K, so it needs a
+# larger margin.
+#
+# extra_bytes = (uncompressed_size >> 8) + 131072

-#define ZO_z_extra_bytes	((ZO_z_output_len >> 8) + 65536)
+#define ZO_z_extra_bytes	((ZO_z_output_len >> 8) + 131072)
 #if ZO_z_output_len > ZO_z_input_len
 # define ZO_z_extract_offset	(ZO_z_output_len + ZO_z_extra_bytes - \
 				 ZO_z_input_len)
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index abd06b1..dcfdd4e 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -23,9 +23,11 @@ 
 # error "Invalid value for CONFIG_PHYSICAL_ALIGN"
 #endif

-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
 # define BOOT_HEAP_SIZE		0x400000
-#else /* !CONFIG_KERNEL_BZIP2 */
+#elif defined(CONFIG_KERNEL_ZSTD)
+# define BOOT_HEAP_SIZE		 0x30000
+#else
 # define BOOT_HEAP_SIZE		 0x10000
 #endif

diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 7925bf0..e6ec175 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -53,6 +53,8 @@  static void *malloc(int size)

 static void free(void *where)
 {
+	if (!where)
+		return;
 	malloc_count--;
 	if (!malloc_count)
 		malloc_ptr = free_mem_ptr;
diff --git a/include/linux/decompress/unzstd.h b/include/linux/decompress/unzstd.h
new file mode 100644
index 0000000..645c367
--- /dev/null
+++ b/include/linux/decompress/unzstd.h
@@ -0,0 +1,24 @@ 
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+#ifndef LINUX_DECOMPRESS_UNZSTD_H
+#define LINUX_DECOMPRESS_UNZSTD_H
+
+int unzstd(unsigned char *inbuf, long len,
+	   long (*fill)(void*, unsigned long),
+	   long (*flush)(void*, unsigned long),
+	   unsigned char *output,
+	   long *pos,
+	   void(*error_fn)(char *x));
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index 78cb246..5a2a70a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -127,10 +127,13 @@  config HAVE_KERNEL_LZO
 config HAVE_KERNEL_LZ4
 	bool

+config HAVE_KERNEL_ZSTD
+	bool
+
 choice
 	prompt "Kernel compression mode"
 	default KERNEL_GZIP
-	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4
+	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_ZSTD
 	help
 	  The linux kernel is a kind of self-extracting executable.
 	  Several compression algorithms are available, which differ
@@ -209,6 +212,12 @@  config KERNEL_LZ4
 	  is about 8% bigger than LZO. But the decompression speed is
 	  faster than LZO.

+config KERNEL_ZSTD
+	bool "ZSTD"
+	depends on HAVE_KERNEL_ZSTD
+	help
+	  TODO
+
 endchoice

 config DEFAULT_HOSTNAME
diff --git a/lib/Kconfig b/lib/Kconfig
index b1445b2..02fdcac 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -285,6 +285,10 @@  config DECOMPRESS_LZ4
 	select LZ4_DECOMPRESS
 	tristate

+config DECOMPRESS_ZSTD
+	select ZSTD_DECOMPRESS
+	tristate
+
 #
 # Generic allocator support is selected if needed
 #
@@ -296,7 +300,7 @@  config GENERIC_ALLOCATOR
 #
 config REED_SOLOMON
 	tristate
-
+
 config REED_SOLOMON_ENC8
 	bool

diff --git a/lib/Makefile b/lib/Makefile
index dafa796..8afcdc3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -128,6 +128,7 @@  lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
 lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
 lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
 lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o
+lib-$(CONFIG_DECOMPRESS_ZSTD) += decompress_unzstd.o

 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
 obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
diff --git a/lib/decompress.c b/lib/decompress.c
index 62696df..b17bb07 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -12,6 +12,7 @@ 
 #include <linux/decompress/inflate.h>
 #include <linux/decompress/unlzo.h>
 #include <linux/decompress/unlz4.h>
+#include <linux/decompress/unzstd.h>

 #include <linux/types.h>
 #include <linux/string.h>
@@ -36,6 +37,9 @@ 
 #ifndef CONFIG_DECOMPRESS_LZ4
 # define unlz4 NULL
 #endif
+#ifndef CONFIG_DECOMPRESS_ZSTD
+# define unzstd NULL
+#endif

 struct compress_format {
 	unsigned char magic[2];
@@ -51,6 +55,7 @@  static const struct compress_format compressed_formats[] __initconst = {
 	{ {0xfd, 0x37}, "xz", unxz },
 	{ {0x89, 0x4c}, "lzo", unlzo },
 	{ {0x02, 0x21}, "lz4", unlz4 },
+	{ {0x28, 0xb5}, "zstd", unzstd },
 	{ {0, 0}, NULL, NULL }
 };

diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
new file mode 100644
index 0000000..44fb672
--- /dev/null
+++ b/lib/decompress_unzstd.c
@@ -0,0 +1,328 @@ 
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+/*
+ * Important notes about in-place decompression
+ *
+ * At least on x86, the kernel is decompressed in place: the compressed data
+ * is placed to the end of the output buffer, and the decompressor overwrites
+ * most of the compressed data. There must be enough safety margin to
+ * guarantee that the write position is always behind the read position.
+ *
+ * The safety margin for ZSTD with a 128 KB block size is calculated below.
+ * Note that the margin with ZSTD is bigger than with GZIP or XZ!
+ *
+ * The worst case for in-place decompression is that the beginning of
+ * the file is compressed extremely well, and the rest of the file is
+ * uncompressible. Thus, we must look for worst-case expansion when the
+ * compressor is encoding uncompressible data.
+ *
+ * The structure of the .zst file in case of a compresed kernel is as follows.
+ * Maximum sizes (as bytes) of the fields are in parenthesis.
+ *
+ *    Frame Header: (18)
+ *    Blocks: (N)
+ *    Checksum: (4)
+ *
+ * The frame header and checksum overhead is at most 22 bytes.
+ *
+ * ZSTD stores the data in blocks. Each block has a header whose size is
+ * a 3 bytes. After the block header, there is up to 128 KB of payload.
+ * The maximum uncompressed size of the payload is 128 KB. The minimum
+ * uncompressed size of the payload is never less than the payload size
+ * (excluding the block header).
+ *
+ * The assumption, that the uncompressed size of the payload is never
+ * smaller than the payload itself, is valid only when talking about
+ * the payload as a whole. It is possible that the payload has parts where
+ * the decompressor consumes more input than it produces output. Calculating
+ * the worst case for this would be tricky. Instead of trying to do that,
+ * let's simply make sure that the decompressor never overwrites any bytes
+ * of the payload which it is currently reading.
+ *
+ * Now we have enough information to calculate the safety margin. We need
+ *   - 22 bytes for the .zst file format headers;
+ *   - 3 bytes per every 128 KiB of uncompressed size (one block header per
+ *     block); and
+ *   - 128 KiB (biggest possible zstd block size) to make sure that the
+ *     decompressor never overwrites anything from the block it is currently
+ *     reading.
+ *
+ * We get the following formula:
+ *
+ *    safety_margin = 22 + uncompressed_size * 3 / 131072 + 131072
+ *                 <= 22 + (uncompressed_size >> 15) + 131072
+ */
+
+#ifdef STATIC
+#	define PREBOOT
+#	include "xxhash.c"
+#	include "zstd/entropy_common.c"
+#	include "zstd/fse_decompress.c"
+#	include "zstd/huf_decompress.c"
+#	include "zstd/zstd_common.c"
+#	include "zstd/decompress.c"
+#endif
+
+#include <linux/decompress/mm.h>
+#include <linux/zstd.h>
+
+/* 8 MB maximum window size */
+#define ZSTD_WINDOWSIZE_MAX	(1 << 23)
+/* Size of the input and output buffers in multi-call mdoe */
+#define ZSTD_IOBUF_SIZE		4096
+
+static int INIT handle_zstd_error(size_t ret, void (*error)(char *x))
+{
+	const int err = ZSTD_getErrorCode(ret);
+
+	if (!ZSTD_isError(ret))
+		return 0;
+
+	switch (err) {
+	case ZSTD_error_memory_allocation:
+		error("ZSTD decompressor ran out of memory");
+		break;
+	case ZSTD_error_prefix_unknown:
+		error("Input is not in the ZSTD format (wrong magic bytes)");
+		break;
+	case ZSTD_error_dstSize_tooSmall:
+	case ZSTD_error_corruption_detected:
+	case ZSTD_error_checksum_wrong:
+		error("ZSTD-compressed data is corrupt");
+		break;
+	default:
+		error("ZSTD-compressed data is probably corrupt");
+		break;
+	}
+	return -1;
+}
+
+/*
+ * Handle the case where we have the entire input and output in one segment.
+ * We can allocate less memory (no circular buffer for the sliding window),
+ * and avoid some memcpy() calls.
+ */
+static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf,
+				  long out_len, long *pos,
+				  void (*error)(char *x))
+{
+	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+	void *wksp = large_malloc(wksp_size);
+	ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size);
+	int err;
+	size_t ret;
+
+	if (!dctx) {
+		error("Out of memory while allocating ZSTD_DCtx");
+		err = -1;
+		goto out;
+	}
+	/* Find out how large the frame actually is, there may be junk at
+	 * the end of the frame that ZSTD_decompressDCtx() can't handle.
+	 */
+	ret = ZSTD_findFrameCompressedSize(in_buf, in_len);
+	if ((err = handle_zstd_error(ret, error)))
+		goto out;
+	in_len = (long)ret;
+
+	ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len);
+	if ((err = handle_zstd_error(ret, error)))
+		goto out;
+
+	if (pos != NULL)
+		*pos = in_len;
+
+	err = 0;
+out:
+	if (wksp)
+		large_free(wksp);
+	return err;
+}
+
+static int INIT __unzstd(unsigned char *in_buf, long in_len,
+			 long (*fill)(void*, unsigned long),
+			 long (*flush)(void*, unsigned long),
+			 unsigned char *out_buf, long out_len,
+			 long *pos,
+			 void (*error)(char *x))
+{
+	ZSTD_inBuffer in;
+	ZSTD_outBuffer out;
+	ZSTD_frameParams params;
+	void *in_allocated = NULL;
+	void *out_allocated = NULL;
+	void *wksp = NULL;
+	size_t wksp_size;
+	ZSTD_DStream *dstream;
+	int err = -1;
+	size_t ret;
+
+	if (!out_len)
+		out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */
+
+	if (fill == NULL && flush == NULL)
+		/* We can decompress faster and with less memory when we have a
+		 * single chunk.
+		 */
+		return decompress_single(in_buf, in_len, out_buf, out_len, pos,
+					 error);
+
+	/* If in_buf is not provided, we must be using fill(), so allocate
+	 * a large enough buffer. If it is provided, it must be at least
+	 * ZSTD_IOBUF_SIZE large.
+	 */
+	if (!in_buf) {
+		in_allocated = malloc(ZSTD_IOBUF_SIZE);
+		if (!in_allocated) {
+			error("Out of memory while allocating input buffer");
+			err = -1;
+			goto out;
+		}
+		in_buf = in_allocated;
+		in_len = 0;
+	}
+	/* Read the first chunk, since we need to decode the frame header */
+	if (fill)
+		in_len = fill(in_buf, ZSTD_IOBUF_SIZE);
+	if (in_len < 0) {
+		error("ZSTD-compressed data is truncated");
+		err = -1;
+		goto out;
+	}
+	/* Set the first non-empty input buffer */
+	in.src = in_buf;
+	in.pos = 0;
+	in.size = in_len;
+	/* Allocate the output buffer if we are using flush(). */
+	if (flush) {
+		out_allocated = malloc(ZSTD_IOBUF_SIZE);
+		if (!out_allocated) {
+			error("Out of memory while allocating output buffer");
+			err = -1;
+			goto out;
+		}
+		out_buf = out_allocated;
+		out_len = ZSTD_IOBUF_SIZE;
+	}
+	/* Set the output buffer */
+	out.dst = out_buf;
+	out.pos = 0;
+	out.size = out_len;
+
+	/* We need to know the window size to allocate the ZSTD_DStream.
+	 * Since we are streaming, we need to allocate a buffer for the sliding
+	 * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX
+	 * (8 MB), so it is important to use the actual value so as not to
+	 * waste memory when it is smaller.
+	 */
+	ret = ZSTD_getFrameParams(&params, in.src, in.size);
+	if ((err = handle_zstd_error(ret, error)))
+		goto out;
+	if (ret != 0) {
+		error("ZSTD-compressed data has an incomplete frame header");
+		err = -1;
+		goto out;
+	}
+	if (params.windowSize > ZSTD_WINDOWSIZE_MAX) {
+		error("ZSTD-compressed data has too large a window size");
+		err = -1;
+		goto out;
+	}
+
+	/* Allocate the ZSTD_DStream now that we know how much memory is
+	 * required.
+	 */
+	wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize);
+	wksp = large_malloc(wksp_size);
+	dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size);
+	if (!dstream) {
+		error("Out of memory while allocating ZSTD_DStream");
+		err = -1;
+		goto out;
+	}
+	/* Decompression loop:
+	 * Read more data if necessary (error if no more data can be read).
+	 * Call the decompression function, which returns 0 when finished.
+	 * Flush any data produced if using flush().
+	 */
+	if (pos != NULL)
+		*pos = 0;
+	do {
+		/* If we need to reload data, either we have fill() and can
+		 * try to get more data, or we don't have the input is
+		 * truncated.
+		 */
+		if (in.pos == in.size) {
+			if (pos != NULL)
+				*pos += in.pos;
+			in_len = fill ? fill(in_buf, ZSTD_IOBUF_SIZE) : -1;
+			if (in_len < 0) {
+				error("ZSTD-compressed data is truncated");
+				err = -1;
+				goto out;
+			}
+			in.pos = 0;
+			in.size = in_len;
+		}
+		/* Returns zero when the frame is complete */
+		ret = ZSTD_decompressStream(dstream, &out, &in);
+		if ((err = handle_zstd_error(ret, error)))
+			goto out;
+		/* Flush all of the data produced if using flush() */
+		if (flush && out.pos > 0) {
+			if (out.pos != flush(out.dst, out.pos)) {
+				error("Failed to flush()");
+				err = -1;
+				goto out;
+			}
+			out.pos = 0;
+		}
+	} while (ret != 0);
+
+	if (pos != NULL)
+		*pos += in.pos;
+
+	err = 0;
+out:
+	if (in_allocated)
+		free(in_allocated);
+	if (out_allocated)
+		free(out_allocated);
+	if (wksp)
+		large_free(wksp);
+	return err;
+}
+
+#ifndef PREBOOT
+STATIC int INIT unzstd(unsigned char *buf, long len,
+		       long (*fill)(void*, unsigned long),
+		       long (*flush)(void*, unsigned long),
+		       unsigned char *out_buf,
+		       long *pos,
+		       void (*error)(char *x))
+{
+	return __unzstd(buf, len, fill, flush, out_buf, 0, pos, error);
+}
+#else
+STATIC int INIT __decompress(unsigned char *buf, long len,
+			     long (*fill)(void*, unsigned long),
+			     long (*flush)(void*, unsigned long),
+			     unsigned char *out_buf, long out_len,
+			     long *pos,
+			     void (*error)(char *x))
+{
+	return __unzstd(buf, len, fill, flush, out_buf, out_len, pos, error);
+}
+#endif
diff --git a/lib/xxhash.c b/lib/xxhash.c
index aa61e2a..7f1d3cb 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -80,13 +80,11 @@  void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
 {
 	memcpy(dst, src, sizeof(*dst));
 }
-EXPORT_SYMBOL(xxh32_copy_state);

 void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
 {
 	memcpy(dst, src, sizeof(*dst));
 }
-EXPORT_SYMBOL(xxh64_copy_state);

 /*-***************************
  * Simple Hash Functions
@@ -151,7 +149,6 @@  uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)

 	return h32;
 }
-EXPORT_SYMBOL(xxh32);

 static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
 {
@@ -234,7 +231,6 @@  uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)

 	return h64;
 }
-EXPORT_SYMBOL(xxh64);

 /*-**************************************************
  * Advanced Hash Functions
@@ -251,7 +247,6 @@  void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
 	state.v4 = seed - PRIME32_1;
 	memcpy(statePtr, &state, sizeof(state));
 }
-EXPORT_SYMBOL(xxh32_reset);

 void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
 {
@@ -265,7 +260,6 @@  void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
 	state.v4 = seed - PRIME64_1;
 	memcpy(statePtr, &state, sizeof(state));
 }
-EXPORT_SYMBOL(xxh64_reset);

 int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
 {
@@ -334,7 +328,6 @@  int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)

 	return 0;
 }
-EXPORT_SYMBOL(xxh32_update);

 uint32_t xxh32_digest(const struct xxh32_state *state)
 {
@@ -372,7 +365,6 @@  uint32_t xxh32_digest(const struct xxh32_state *state)

 	return h32;
 }
-EXPORT_SYMBOL(xxh32_digest);

 int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
 {
@@ -439,7 +431,6 @@  int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)

 	return 0;
 }
-EXPORT_SYMBOL(xxh64_update);

 uint64_t xxh64_digest(const struct xxh64_state *state)
 {
@@ -494,7 +485,19 @@  uint64_t xxh64_digest(const struct xxh64_state *state)

 	return h64;
 }
+
+#ifndef PREBOOT
+EXPORT_SYMBOL(xxh32_copy_state);
+EXPORT_SYMBOL(xxh64_copy_state);
+EXPORT_SYMBOL(xxh32);
+EXPORT_SYMBOL(xxh64);
+EXPORT_SYMBOL(xxh32_reset);
+EXPORT_SYMBOL(xxh64_reset);
+EXPORT_SYMBOL(xxh32_update);
+EXPORT_SYMBOL(xxh32_digest);
+EXPORT_SYMBOL(xxh64_update);
 EXPORT_SYMBOL(xxh64_digest);

 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("xxHash");
+#endif
diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
index b178467..b7b6599 100644
--- a/lib/zstd/decompress.c
+++ b/lib/zstd/decompress.c
@@ -2487,6 +2487,7 @@  size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 	}
 }

+#ifndef PREBOOT
 EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound);
 EXPORT_SYMBOL(ZSTD_initDCtx);
 EXPORT_SYMBOL(ZSTD_decompressDCtx);
@@ -2526,3 +2527,4 @@  EXPORT_SYMBOL(ZSTD_insertBlock);

 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Zstd Decompressor");
+#endif
diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c
index a84300e..0b35353 100644
--- a/lib/zstd/fse_decompress.c
+++ b/lib/zstd/fse_decompress.c
@@ -47,6 +47,7 @@ 
 ****************************************************************/
 #include "bitstream.h"
 #include "fse.h"
+#include "zstd_internal.h"
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/string.h> /* memcpy, memset */
@@ -60,14 +61,6 @@ 
 		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
 	} /* use only *after* variable declarations */

-/* check and forward error code */
-#define CHECK_F(f)                  \
-	{                           \
-		size_t const e = f; \
-		if (FSE_isError(e)) \
-			return e;   \
-	}
-
 /* **************************************************************
 *  Templates
 ****************************************************************/
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 5e975fe..35df22e 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -360,6 +360,7 @@  cmd_lz4 = (cat $(filter-out FORCE,$^) | \
 	lz4c -l -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
 	(rm -f $@ ; false)

+
 # U-Boot mkimage
 # ---------------------------------------------------------------------------

@@ -412,6 +413,21 @@  cmd_xzmisc = (cat $(filter-out FORCE,$^) | \
 	xz --check=crc32 --lzma2=dict=1MiB) > $@ || \
 	(rm -f $@ ; false)

+# ZSTD
+# ---------------------------------------------------------------------------
+# Appends the uncompressed size of the data using size_append. The .zst
+# format has the size information available at the beginning of the file too,
+# but it's in a more complex format and it's good to avoid changing the part
+# of the boot code that reads the uncompressed size.
+# Note that the bytes added by size_append will make the zstd tool think that
+# the file is corrupt. This is expected.
+
+quiet_cmd_zstd = ZSTD    $@
+cmd_zstd = (cat $(filter-out FORCE,$^) | \
+	zstd -19 && \
+        $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
+
 # ASM offsets
 # ---------------------------------------------------------------------------

diff --git a/usr/Kconfig b/usr/Kconfig
index d53112fd..b7c54de 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -105,6 +105,14 @@  config RD_LZ4
 	  Support loading of a LZ4 encoded initial ramdisk or cpio buffer
 	  If unsure, say N.

+config RD_ZSTD
+	bool "Support initial ramdisk/ramfs compressed using ZSTD"
+	default y
+	depends on BLK_DEV_INITRD
+	select DECOMPRESS_ZSTD
+	help
+	  TODO
+
 choice
 	prompt "Built-in initramfs compression mode"
 	depends on INITRAMFS_SOURCE!=""
@@ -213,6 +221,12 @@  config INITRAMFS_COMPRESSION_LZ4
 	  If you choose this, keep in mind that most distros don't provide lz4
 	  by default which could cause a build failure.

+config INITRAMFS_COMPRESSION_ZSTD
+	bool "LZ4"
+	depends on RD_ZSTD
+	help
+	  TODO
+
 endchoice

 config INITRAMFS_COMPRESSION
@@ -225,10 +239,12 @@  config INITRAMFS_COMPRESSION
 	default ".xz"   if INITRAMFS_COMPRESSION_XZ
 	default ".lzo"  if INITRAMFS_COMPRESSION_LZO
 	default ".lz4"  if INITRAMFS_COMPRESSION_LZ4
+	default ".zst"  if INITRAMFS_COMPRESSION_ZSTD
 	default ".gz"   if RD_GZIP
 	default ".lz4"  if RD_LZ4
 	default ".lzo"  if RD_LZO
 	default ".xz"   if RD_XZ
 	default ".lzma" if RD_LZMA
 	default ".bz2"  if RD_BZIP2
+	default ".zst"  if RD_ZSTD
 	default ""