All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Update LZ4 compressor module
@ 2016-12-20 18:53 Sven Schmidt
  2016-12-20 18:53 ` [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version Sven Schmidt
                   ` (10 more replies)
  0 siblings, 11 replies; 104+ messages in thread
From: Sven Schmidt @ 2016-12-20 18:53 UTC (permalink / raw)
  To: akpm; +Cc: bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.2 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
compression ratio and compression speed.

We will use LZ4 fast in order to support compression in lustre. LZ4 fast empowers
us to do client-side as well as server-side compression/decompression while
being able to provide appropriate parameters to enable users to tune lustre's
behaviour to obtain the best performance/compression/etc. on their behalf
(adapative compression).

Also, it will be useful for other users of LZ4 compression,
as with LZ4 fast it is possible to enable applications to use fast and/or high
compression depending of the usecase. E.g. a developer can use very
high compression (low acceleration) for sending data over a network with
limited rate of transmission or he trades the compression ratio for higher
compression speed.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.2
Compression in lustre: http://wiki.lustre.org/Enhanced_Adaptive_Compression_in_Lustre

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html

[PATCH 1/3] crypto: Change lz4 modules to work with new lz4
[PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions
[PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
@ 2016-12-20 18:53 ` Sven Schmidt
  2016-12-21  5:25   ` kbuild test robot
  2016-12-22 13:25   ` Sergey Senozhatsky
  2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
                   ` (9 subsequent siblings)
  10 siblings, 2 replies; 104+ messages in thread
From: Sven Schmidt @ 2016-12-20 18:53 UTC (permalink / raw)
  To: akpm; +Cc: bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel, Sven Schmidt

This patch updates the crypto modules using LZ4 compression to work with the
new LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c   | 93 +++++++--------------------------------------------------
 crypto/lz4hc.c | 94 +++++++---------------------------------------------------
 2 files changed, 22 insertions(+), 165 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..9fa217e 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -23,53 +23,36 @@
 #include <linux/crypto.h>
 #include <linux/vmalloc.h>
 #include <linux/lz4.h>
-#include <crypto/internal/scompress.h>
 
 struct lz4_ctx {
 	void *lz4_comp_mem;
 };
 
-static void *lz4_alloc_ctx(struct crypto_scomp *tfm)
-{
-	void *ctx;
-
-	ctx = vmalloc(LZ4_MEM_COMPRESS);
-	if (!ctx)
-		return ERR_PTR(-ENOMEM);
-
-	return ctx;
-}
-
 static int lz4_init(struct crypto_tfm *tfm)
 {
 	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	ctx->lz4_comp_mem = lz4_alloc_ctx(NULL);
-	if (IS_ERR(ctx->lz4_comp_mem))
+	ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS);
+	if (!ctx->lz4_comp_mem)
 		return -ENOMEM;
 
 	return 0;
 }
 
-static void lz4_free_ctx(struct crypto_scomp *tfm, void *ctx)
-{
-	vfree(ctx);
-}
-
 static void lz4_exit(struct crypto_tfm *tfm)
 {
 	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lz4_free_ctx(NULL, ctx->lz4_comp_mem);
+	vfree(ctx->lz4_comp_mem);
 }
 
-static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
-				 u8 *dst, unsigned int *dlen, void *ctx)
+static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+			    unsigned int slen, u8 *dst, unsigned int *dlen)
 {
+	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
 	size_t tmp_len = *dlen;
 	int err;
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
+	err = LZ4_compress_default(src, dst, slen, tmp_len, ctx->lz4_comp_mem);
 
 	if (err < 0)
 		return -EINVAL;
@@ -78,29 +61,14 @@ static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 	return 0;
 }
 
-static int lz4_scompress(struct crypto_scomp *tfm, const u8 *src,
-			 unsigned int slen, u8 *dst, unsigned int *dlen,
-			 void *ctx)
-{
-	return __lz4_compress_crypto(src, slen, dst, dlen, ctx);
-}
-
-static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
-			       unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	return __lz4_compress_crypto(src, slen, dst, dlen, ctx->lz4_comp_mem);
-}
-
-static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
-				   u8 *dst, unsigned int *dlen, void *ctx)
+static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+			      unsigned int slen, u8 *dst, unsigned int *dlen)
 {
 	int err;
 	size_t tmp_len = *dlen;
 	size_t __slen = slen;
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
+	err = LZ4_decompress_safe(src, dst, __slen, tmp_len);
 	if (err < 0)
 		return -EINVAL;
 
@@ -108,20 +76,6 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 	return err;
 }
 
-static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-			   unsigned int slen, u8 *dst, unsigned int *dlen,
-			   void *ctx)
-{
-	return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
-}
-
-static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
-				 unsigned int slen, u8 *dst,
-				 unsigned int *dlen)
-{
-	return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
-}
-
 static struct crypto_alg alg_lz4 = {
 	.cra_name		= "lz4",
 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
@@ -135,39 +89,14 @@ static struct crypto_alg alg_lz4 = {
 	.coa_decompress		= lz4_decompress_crypto } }
 };
 
-static struct scomp_alg scomp = {
-	.alloc_ctx		= lz4_alloc_ctx,
-	.free_ctx		= lz4_free_ctx,
-	.compress		= lz4_scompress,
-	.decompress		= lz4_sdecompress,
-	.base			= {
-		.cra_name	= "lz4",
-		.cra_driver_name = "lz4-scomp",
-		.cra_module	 = THIS_MODULE,
-	}
-};
-
 static int __init lz4_mod_init(void)
 {
-	int ret;
-
-	ret = crypto_register_alg(&alg_lz4);
-	if (ret)
-		return ret;
-
-	ret = crypto_register_scomp(&scomp);
-	if (ret) {
-		crypto_unregister_alg(&alg_lz4);
-		return ret;
-	}
-
-	return ret;
+	return crypto_register_alg(&alg_lz4);
 }
 
 static void __exit lz4_mod_fini(void)
 {
 	crypto_unregister_alg(&alg_lz4);
-	crypto_unregister_scomp(&scomp);
 }
 
 module_init(lz4_mod_init);
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..aa1265c 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -22,53 +22,37 @@
 #include <linux/crypto.h>
 #include <linux/vmalloc.h>
 #include <linux/lz4.h>
-#include <crypto/internal/scompress.h>
 
 struct lz4hc_ctx {
 	void *lz4hc_comp_mem;
 };
 
-static void *lz4hc_alloc_ctx(struct crypto_scomp *tfm)
-{
-	void *ctx;
-
-	ctx = vmalloc(LZ4HC_MEM_COMPRESS);
-	if (!ctx)
-		return ERR_PTR(-ENOMEM);
-
-	return ctx;
-}
-
 static int lz4hc_init(struct crypto_tfm *tfm)
 {
 	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	ctx->lz4hc_comp_mem = lz4hc_alloc_ctx(NULL);
-	if (IS_ERR(ctx->lz4hc_comp_mem))
+	ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS);
+	if (!ctx->lz4hc_comp_mem)
 		return -ENOMEM;
 
 	return 0;
 }
 
-static void lz4hc_free_ctx(struct crypto_scomp *tfm, void *ctx)
-{
-	vfree(ctx);
-}
-
 static void lz4hc_exit(struct crypto_tfm *tfm)
 {
 	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	lz4hc_free_ctx(NULL, ctx->lz4hc_comp_mem);
+	vfree(ctx->lz4hc_comp_mem);
 }
 
-static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
-				   u8 *dst, unsigned int *dlen, void *ctx)
+static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+			    unsigned int slen, u8 *dst, unsigned int *dlen)
 {
+	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
 	size_t tmp_len = *dlen;
 	int err;
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
+	err = LZ4_compress_HC(src, dst, slen, tmp_len, LZ4HC_DEFAULT_CLEVEL, ctx->lz4hc_comp_mem);
 
 	if (err < 0)
 		return -EINVAL;
@@ -77,31 +61,14 @@ static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 	return 0;
 }
 
-static int lz4hc_scompress(struct crypto_scomp *tfm, const u8 *src,
-			   unsigned int slen, u8 *dst, unsigned int *dlen,
-			   void *ctx)
-{
-	return __lz4hc_compress_crypto(src, slen, dst, dlen, ctx);
-}
-
-static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
-				 unsigned int slen, u8 *dst,
-				 unsigned int *dlen)
-{
-	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	return __lz4hc_compress_crypto(src, slen, dst, dlen,
-					ctx->lz4hc_comp_mem);
-}
-
-static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
-				     u8 *dst, unsigned int *dlen, void *ctx)
+static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+			      unsigned int slen, u8 *dst, unsigned int *dlen)
 {
 	int err;
 	size_t tmp_len = *dlen;
 	size_t __slen = slen;
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
+	err = LZ4_decompress_safe(src, dst, __slen, (int)tmp_len);
 	if (err < 0)
 		return -EINVAL;
 
@@ -109,20 +76,6 @@ static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 	return err;
 }
 
-static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-			     unsigned int slen, u8 *dst, unsigned int *dlen,
-			     void *ctx)
-{
-	return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
-}
-
-static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
-				   unsigned int slen, u8 *dst,
-				   unsigned int *dlen)
-{
-	return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
-}
-
 static struct crypto_alg alg_lz4hc = {
 	.cra_name		= "lz4hc",
 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
@@ -136,39 +89,14 @@ static struct crypto_alg alg_lz4hc = {
 	.coa_decompress		= lz4hc_decompress_crypto } }
 };
 
-static struct scomp_alg scomp = {
-	.alloc_ctx		= lz4hc_alloc_ctx,
-	.free_ctx		= lz4hc_free_ctx,
-	.compress		= lz4hc_scompress,
-	.decompress		= lz4hc_sdecompress,
-	.base			= {
-		.cra_name	= "lz4hc",
-		.cra_driver_name = "lz4hc-scomp",
-		.cra_module	 = THIS_MODULE,
-	}
-};
-
 static int __init lz4hc_mod_init(void)
 {
-	int ret;
-
-	ret = crypto_register_alg(&alg_lz4hc);
-	if (ret)
-		return ret;
-
-	ret = crypto_register_scomp(&scomp);
-	if (ret) {
-		crypto_unregister_alg(&alg_lz4hc);
-		return ret;
-	}
-
-	return ret;
+	return crypto_register_alg(&alg_lz4hc);
 }
 
 static void __exit lz4hc_mod_fini(void)
 {
 	crypto_unregister_alg(&alg_lz4hc);
-	crypto_unregister_scomp(&scomp);
 }
 
 module_init(lz4hc_mod_init);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
  2016-12-20 18:53 ` [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version Sven Schmidt
@ 2016-12-20 18:53 ` Sven Schmidt
  2016-12-21  5:16   ` kbuild test robot
                     ` (3 more replies)
  2016-12-20 18:53 ` [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
                   ` (8 subsequent siblings)
  10 siblings, 4 replies; 104+ messages in thread
From: Sven Schmidt @ 2016-12-20 18:53 UTC (permalink / raw)
  To: akpm; +Cc: bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel, Sven Schmidt

This patch updates the fs/pstore and fs/squashfs to work with the
new LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 11 +++++------
 fs/squashfs/lz4_wrapper.c |  3 +--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..037dc07 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,7 +342,7 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
 	if (ret) {
 		pr_err("lz4_compress error, ret = %d!\n", ret);
 		return -EIO;
@@ -355,7 +355,7 @@ static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
 	if (ret) {
 		pr_err("lz4_decompress error, ret = %d!\n", ret);
 		return -EIO;
@@ -366,7 +366,7 @@ static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
@@ -493,7 +493,6 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		if (!is_locked) {
 			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
 				       , in_nmi() ? "NMI" : why);
-			return;
 		}
 	} else {
 		spin_lock_irqsave(&psinfo->buf_lock, flags);
@@ -585,8 +584,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
 		} else {
 			spin_lock_irqsave(&psinfo->buf_lock, flags);
 		}
-		psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0,
-				  s, 0, c, psinfo);
+		memcpy(psinfo->buf, s, c);
+		psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo);
 		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 		s += c;
 		c = e - s;
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..c087a63 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -108,8 +108,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
+	res = LZ4_decompress_safe(stream->input, stream->output, length, (int)dest_len);
 	if (res)
 		return -EIO;
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
  2016-12-20 18:53 ` [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version Sven Schmidt
  2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
@ 2016-12-20 18:53 ` Sven Schmidt
  2016-12-20 19:52   ` Joe Perches
  2016-12-21  7:09   ` kbuild test robot
  2016-12-22 17:29 ` [PATCH 0/3] Update LZ4 compressor module Greg KH
                   ` (7 subsequent siblings)
  10 siblings, 2 replies; 104+ messages in thread
From: Sven Schmidt @ 2016-12-20 18:53 UTC (permalink / raw)
  To: akpm; +Cc: bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel, Sven Schmidt

This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since there were alias
methods added to ensure backwards compatibility.

API changes:

New method LZ4_compress_fast which differs from the variant available in kernel
by the new acceleration parameter, allowing to trade compression ratio for
more speedup.

LZ4_decompress_fast is the respective decompression method,
allowing to decompress data compressed with any LZ4 compression method including
LZ4HC.

Also the useful functions LZ4_decompress_safe_partial
LZ4_compress_destsize were added. The latter reverses the logic by trying to
compress as much data as possible from source to dest while the former aims
to decompress partial blocks of data.

The methods lz4_compress and lz4_decompress_unknownoutputsize are now known as
LZ4_compress_default respectivley LZ4_decompress_safe. The old methods are still
available for offering backwards compatibility.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  267 +++++++++---
 lib/decompress_unlz4.c   |   11 +-
 lib/lz4/lz4_compress.c   | 1024 +++++++++++++++++++++++++------------------
 lib/lz4/lz4_decompress.c |  627 +++++++++++++--------------
 lib/lz4/lz4defs.h        |  359 +++++++++------
 lib/lz4/lz4hc_compress.c | 1083 +++++++++++++++++++++++++---------------------
 6 files changed, 1941 insertions(+), 1430 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..4b8fce7 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,218 @@
 #ifndef __LZ4_H__
 #define __LZ4_H__
+
 /*
  * LZ4 Kernel Interface
  *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#include <linux/types.h>
+
+/*-************************************************************************
+*  Version
+**************************************************************************/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+/*-************************************************************************
+*  Constants
+**************************************************************************/
 #define LZ4_MEM_COMPRESS	(16384)
 #define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
 
-/*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
- */
-static inline size_t lz4_compressbound(size_t isize)
-{
-	return isize + (isize / 255) + 16;
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+#define LZ4HC_MIN_CLEVEL        3
+#define LZ4HC_DEFAULT_CLEVEL    9
+#define LZ4HC_MAX_CLEVEL        16
+
+/*-************************************************************************
+*  Compression Functions
+**************************************************************************/
+
+/*!LZ4_compressbound() :
+    Provides the maximum size that LZ4 may output in a "worst case" scenario
+    (input data not compressible)
+*/
+static inline size_t LZ4_compressBound(size_t isize) {
+  return LZ4_COMPRESSBOUND(isize);
 }
 
-/*
- * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
- *		the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
+/*! lz4_compressbound() :
+    For backward compabitiliby
+*/
+static inline size_t lz4_compressbound(size_t isize) {
+	return lz4_compressbound(isize);
+}
 
-/*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+/*! LZ4_compress_default() :
+    Compresses 'sourceSize' bytes from buffer 'source'
+    into already allocated 'dest' buffer of size 'maxDestSize'.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    It also runs faster, so it's a recommended setting.
+    If the function cannot compress 'source' into a more limited 'dest' budget,
+    compression stops *immediately*, and the function result is zero.
+    As a consequence, 'dest' content is not valid.
+    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
+        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
+        workmem : address of the working memory. This requires 'workmem' of size LZ4_MEM_COMPRESS.
+        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
+              or 0 if compression fails */
+int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem);
 
-/*
- * lz4_decompress_unknownoutputsize()
- *	src     : source address of the compressed data
- *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
- *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+/*!
+LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT, which is 1.
+*/
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem, int acceleration);
+
+/*!
+LZ4_compress_destSize() :
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
+    into already allocated buffer 'dest' of size 'targetDestSize'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
+                         New value is necessarily <= old value.
+        workmem : address of the working memory. This requires 'workmem' of size LZ4_MEM_COMPRESS.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+              or 0 if compression fails
+*/
+int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize, void* wrkmem);
+
+/*!
+ * lz4_compress() :
+    For backwards compabitiliby
+        src     : source address of the original data
+        src_len : size of the original data
+        dst     : output buffer address of the compressed data
+                This requires 'dst' of size LZ4_COMPRESSBOUND.
+        dst_len : is the output size, which is returned after compress done
+        workmem : address of the working memory.
+                This requires 'workmem' of size LZ4_MEM_COMPRESS.
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer and workmem must be already allocated with
+                the defined size.
  */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+int lz4_compress(const unsigned char *src, int src_len, unsigned char *dst, int dst_len, void *wrkmem);
+
+/*-************************************************************************
+*  Decompression Functions
+**************************************************************************/
+
+/*!
+LZ4_decompress_fast() :
+    originalSize : is the original and therefore uncompressed size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+    note : This function fully respect memory boundaries for properly formed compressed data.
+           It is a bit faster than LZ4_decompress_safe().
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
+           Use this function in trusted environment only (data to decode comes from a trusted source).
+*/
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize);
+
+/*!
+LZ4_decompress_safe() :
+    compressedSize : is the precise full size of the compressed block.
+    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
+             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits, including malicious data packets.
+             It never writes outside output buffer, nor reads outside input buffer.
+*/
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+
+/*!
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'compressedSize' at position 'source'
+    into destination buffer 'dest' of size 'maxDecompressedSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/*!
+lz4_decompress_unknownoutputsize() :
+    For backwards compabitiliby
+        src     : source address of the compressed data
+        src_len : is the input size, therefore the compressed size
+        dest    : output buffer address of the decompressed data
+        dest_len: is the max size of the destination buffer, which is
+                        returned with actual size of decompressed data after
+                        decompress done
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer must be already allocated.
+*/
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, unsigned char *dest, size_t *dest_len);
+
+/*!
+lz4_decompress() :
+    For backwards compabitiliby
+        src     : source address of the compressed data
+        src_len : is the input size, whcih is returned after decompress done
+        dest    : output buffer address of the decompressed data
+        actual_dest_len: is the size of uncompressed data, supposing it's known
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer must be already allocated.
+                slightly faster than lz4_decompress_unknownoutputsize()
+*/
+int lz4_decompress(const unsigned char *src, size_t *src_len, unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *  LZ4 HC Compression
+ **************************************************************************/
+
+/*! LZ4_compress_HC() :
+    Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm. dst` must be already allocated.
+        wrkmem : address of the working memory. This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+        Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)`
+        Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE
+        compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_MAX_CLEVEL will work.
+                        Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+        @return : the number of bytes written into 'dst' or 0 if compression fails.
+*/
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel, void* wrkmem);
+
+/*! lz4hc_compress()
+    For backwards compabitiliby
+        src     : source address of the original data
+        src_len : size of the original data
+        dst     : output buffer address of the compressed data
+               This requires 'dst' of size LZ4_COMPRESSBOUND.
+        dst_len : is the output size, which is returned after compress done
+        workmem : address of the working memory.
+               This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer and workmem must be already allocated with
+                the defined size.
+*/
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem);
+
 #endif
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..ee00eb2 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,12 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, chunksize);
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..551f586 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,443 +1,639 @@
 /*
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
- *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
- */
-
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2016, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+
+
+    Changed for kernel use by:
+    Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+*/
+
+/*-************************************
+*  Includes
+**************************************/
+#include "lz4defs.h"
+#include <linux/lz4.h>
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
 /*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
+ * ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+#define ACCELERATION_DEFAULT 1
+
+/*-******************************
+*  Compression functions
+********************************/
+static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+static const U64 prime5bytes = 889523592379ULL;
+static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType)
+{
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    const U32 hashMask = (1<<hashLog) - 1;
+    return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask;
+}
+
+static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType)
+{
+    if (LZ4_64bits())
+        return LZ4_hashSequence64(sequence, tableType);
+    return LZ4_hashSequence((U32)sequence, tableType);
+}
+
+static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); }
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/** LZ4_compress_generic() :
+    inlined, to ensure branches are decided at compilation time */
+FORCE_INLINE int LZ4_compress_generic(
+                 void* const ctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputLimited,
+                 const tableType_t tableType,
+                 const dict_directive dict,
+                 const dictIssue_directive dictIssue,
+                 const U32 acceleration)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
-#if LZ4_ARCH64
-	const BYTE * const base = ip;
-#else
-	const int base = 0;
-#endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
-
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
-
-	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
-
-	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
-
-		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
-
-		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
-
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
-
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
-
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+    LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx;
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* base;
+    const BYTE* lowLimit;
+    const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
+    const BYTE* const dictionary = dictPtr->dictionary;
+    const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
+    const size_t dictDelta = dictEnd - (const BYTE*)source;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 forwardH;
+    size_t refDelta=0;
+
+    /* Init conditions */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
+    switch(dict)
+    {
+    case noDict:
+    default:
+        base = (const BYTE*)source;
+        lowLimit = (const BYTE*)source;
+        break;
+    case withPrefix64k:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source - dictPtr->dictSize;
+        break;
+    case usingExtDict:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source;
+        break;
+    }
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+
+        /* Find a match */
+        {   const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                if (dict==usingExtDict) {
+                    if (match < (const BYTE*)source) {
+                        refDelta = dictDelta;
+                        lowLimit = dictionary;
+                    } else {
+                        refDelta = 0;
+                        lowLimit = (const BYTE*)source;
+                }   }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
+                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while (((ip>anchor) & (match+refDelta > lowLimit)) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+
+        /* Encode Literals */
+        {   unsigned const litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputLimited) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
+                return 0;
+            if (litLength >= RUN_MASK) {
+                int len = (int)litLength-RUN_MASK;
+                *token = (RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op+=litLength;
+        }
+
 _next_match:
-		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
-
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
-		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
-
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
-			break;
-		}
-
-		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
-
-		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
-			token = op++;
-			*token = 0;
-			goto _next_match;
-		}
-
-		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
-	}
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {   unsigned matchCode;
+
+            if ((dict==usingExtDict) && (lowLimit==dictionary)) {
+                const BYTE* limit;
+                match += refDelta;
+                limit = ip + (dictEnd-match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += MINMATCH + matchCode;
+                if (ip==limit) {
+                    unsigned const more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    matchCode += more;
+                    ip += more;
+                }
+            } else {
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += MINMATCH + matchCode;
+            }
+
+            if ( outputLimited &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) )
+                return 0;
+            if (matchCode >= ML_MASK) {
+                *token += ML_MASK;
+                matchCode -= ML_MASK;
+                LZ4_write32(op, 0xFFFFFFFF);
+                while (matchCode >= 4*255) op+=4, LZ4_write32(op, 0xFFFFFFFF), matchCode -= 4*255;
+                op += matchCode / 255;
+                *op++ = (BYTE)(matchCode % 255);
+            } else
+                *token += (BYTE)(matchCode);
+        }
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip > mflimit) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        if (dict==usingExtDict) {
+            if (match < (const BYTE*)source) {
+                refDelta = dictDelta;
+                lowLimit = dictionary;
+            } else {
+                refDelta = 0;
+                lowLimit = (const BYTE*)source;
+        }   }
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
+            && (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
 
 _last_literals:
-	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
-
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
-
-	/* End */
-	return (int)(((char *)op) - dest);
+    /* Encode Last Literals */
+    {   const size_t lastRun = (size_t)(iend - anchor);
+        if ( (outputLimited) &&  /* Check output buffer overflow */
+            ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) )
+            return 0;
+        if (lastRun >= RUN_MASK) {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRun);
+        op += lastRun;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_resetStream((LZ4_stream_t*)state);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    } else {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem, int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
-
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
-
-	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
-
-	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
-
-		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
-
-		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
-
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
-
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+    int result = LZ4_compress_fast_extState(wrkmem, source, dest, inputSize, maxOutputSize, acceleration);
+
+    return result;
+}
+
+
+int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem)
+{
+    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, wrkmem, 1);
+}
+
+/*-******************************
+*  *_destSize() variant
+********************************/
+
+static int LZ4_compress_destSize_generic(
+                       void* const ctx,
+                 const char* const src,
+                       char* const dst,
+                       int*  const srcSizePtr,
+                 const int targetDstSize,
+                 const tableType_t tableType)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* base = (const BYTE*) src;
+    const BYTE* lowLimit = (const BYTE*) src;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dst;
+    BYTE* const oend = op + targetDstSize;
+    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
+    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
+    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
+
+    U32 forwardH;
+
+
+    /* Init conditions */
+    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
+    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    *srcSizePtr = 0;
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+
+        /* Find a match */
+        {   const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
+
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        /* Encode Literal length */
+        {   unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if (op + ((litLength+240)/255) + litLength > oMaxLit) {
+                /* Not enough space for a last match */
+                op--;
+                goto _last_literals;
+            }
+            if (litLength>=RUN_MASK) {
+                unsigned len = litLength - RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op += litLength;
+        }
 
 _next_match:
-		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
-
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
-
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
-
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
-			break;
-		}
-
-		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
-
-		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
-			goto _next_match;
-		}
-
-		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
-	}
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {   size_t matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+
+            if (op + ((matchLength+240)/255) > oMaxMatch) {
+                /* Match description too long : reduce it */
+                matchLength = (15-1) + (oMaxMatch-op) * 255;
+            }
+            ip += MINMATCH + matchLength;
+
+            if (matchLength>=ML_MASK) {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of block */
+        if (ip > mflimit) break;
+        if (op > oMaxSeq) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
 
 _last_literals:
-	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
-	/* End */
-	return (int)(((char *)op) - dest);
+    /* Encode Last Literals */
+    {   size_t lastRunSize = (size_t)(iend - anchor);
+        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) {
+            /* adapt lastRunSize to fill 'dst' */
+            lastRunSize  = (oend-op) - 1;
+            lastRunSize -= (lastRunSize+240)/255;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK) {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRunSize<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
+    }
+
+    /* End */
+    *srcSizePtr = (int) (((const char*)ip)-src);
+    return (int) (((char*)op)-dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+
+static int LZ4_compress_destSize_extState (void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+    LZ4_resetStream((LZ4_stream_t*)state);
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    } else {
+        if (*srcSizePtr < LZ4_64Klimit)
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16);
+        else
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ4_64bits() ? byU32 : byPtr);
+    }
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize, void* wrkmem)
+{
+    return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr, targetDstSize);
+}
 
-	*dst_len = out_len;
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+{
+    if ((LZ4_dict->currentOffset > 0x80000000) ||
+        ((size_t)LZ4_dict->currentOffset > (size_t)src)) {   /* address space overflow */
+        /* rescale hash table */
+        U32 const delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
 
-	return 0;
-exit:
-	return ret;
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = (const BYTE*) source;
+    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
+    LZ4_renormDictT(streamPtr, smallest);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source) {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
+        streamPtr->dictSize += (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+
+    /* external dictionary mode */
+    {   int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+}
+
+/*~
+ * for backwards compatibility
+*/
+int lz4_compress(const unsigned char *src, int src_len, unsigned char *dst, int dst_len, void *wrkmem) {
+    return LZ4_compress_default(src, dst, src_len, dst_len, wrkmem);
+}
+
+/* Hidden debug function, to force external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
+    int result;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = dictEnd;
+    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
+    LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
+
+    result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)inputSize;
+    streamPtr->currentOffset += (U32)inputSize;
+
+    return result;
+}
+
+
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+
+    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
 }
-EXPORT_SYMBOL(lz4_compress);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
+
+/* Kernel exports */
+EXPORT_SYMBOL(LZ4_compress_default);
+EXPORT_SYMBOL(LZ4_compress_fast);
+EXPORT_SYMBOL(LZ4_compress_destSize);
+EXPORT_SYMBOL(lz4_compress);
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..bcadc31 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,343 +1,326 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
- */
-
-#ifndef STATIC
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2016, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+
+    Changed for kernel use by:
+    Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+*/
+
+/*-************************************
+*  Includes
+**************************************/
+#include "lz4defs.h"
+#include <linux/lz4.h>
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+*  Decompression functions
+*******************************/
+/*! LZ4_decompress_generic() :
+ *  This generic decompression function cover all use cases.
+ *  It shall be instantiated several times, using different sets of directives
+ *  Note that it is important this generic function is really inlined,
+ *  in order to remove useless branches during compilation optimization.
+ */
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest when no prefix */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
-	BYTE *cpy;
-	unsigned token;
-	size_t length;
-
-	while (1) {
-
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			size_t len;
-
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
-				goto _output_error;
-			length += len;
-		}
-
-		/* copy literals */
-		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
-			memcpy(op, ip, length);
-			ip += length;
-			break; /* EOF */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
-
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
-			goto _output_error;
-
-		/* get matchlength */
-		length = token & ML_MASK;
-		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
-				goto _output_error;
-			length += *ip++;
-		}
-
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
-
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
-	}
-	/* end of decoding */
-	return (int) (((char *)ip) - source);
-
-	/* write overflow error detected */
+    /* Local Variables */
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
+
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+    /* Special cases */
+    if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                        /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+    /* Main Loop : decode sequences */
+    while (1) {
+        unsigned token;
+        size_t length;
+        const BYTE* match;
+        size_t offset;
+
+        /* get literal length */
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK) {
+            unsigned s;
+            do {
+                s = *ip++;
+                length += s;
+            } while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) & (s==255) );
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
+        }
+
+        /* copy literals */
+        cpy = op+length;
+        if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+        {
+            if (partialDecoding) {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            } else {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+            }
+            memcpy(op, ip, length);
+            ip += length;
+            op += length;
+            break;     /* Necessarily EOF, due to parsing restrictions */
+        }
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK) {
+            unsigned s;
+            do {
+                s = *ip++;
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+                length += s;
+            } while (s==255);
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
+        }
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix)) {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match)) {
+                /* match can be copied as a single segment from external dictionary */
+                memmove(op, dictEnd - (lowPrefix-match), length);
+                op += length;
+            } else {
+                /* match encompass external dictionary and current block */
+                size_t const copySize = (size_t)(lowPrefix-match);
+                size_t const restSize = length - copySize;
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                if (restSize > (size_t)(op-lowPrefix)) {  /* overlap copy */
+                    BYTE* const endOfMatch = op + restSize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                } else {
+                    memcpy(op, lowPrefix, restSize);
+                    op += restSize;
+            }   }
+            continue;
+        }
+
+        /* copy match within block */
+        cpy = op + length;
+        if (unlikely(offset<8)) {
+            const int dec64 = dec64table[offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;
+
+        if (unlikely(cpy>oend-12)) {
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit) {
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
+            }
+            while (op<cpy) *op++ = *match++;
+        } else {
+            LZ4_copy8(op, match);
+            if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
+        }
+        op=cpy;   /* correction */
+    }
+
+    /* end of decoding */
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+    else
+       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
+
+    /* Overflow error detected */
 _output_error:
-	return -1;
+    return (int) (-(((const char*)ip)-source))-1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
-
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
-
-	/* Main Loop */
-	while (ip < iend) {
-
-		unsigned token;
-		size_t length;
-
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
-
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
-
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
-
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
-	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
-
-	/* write overflow error detected */
-_output_error:
-	return -1;
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
 }
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
 {
-	int ret = -1;
-	int input_len = 0;
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+}
 
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+}
 
-	return 0;
-exit_0:
-	return ret;
+/*!
+ * LZ4_setStreamDecode() :
+ * Use this function to instruct where to find the dictionary.
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest) {
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += result;
+        lz4sd->prefixEnd  += result;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest) {
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+/*
+ * for backwards compatibility
+ */
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, unsigned char *dest, size_t *dest_len) {
+    return LZ4_decompress_safe(src, dest, (int)src_len, (int)*dest_len);
+}
+
+/*
+ * for backwards compatibility
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len, unsigned char *dest, size_t actual_dest_len) {
+    return LZ4_decompress_fast(src, dest, (int)actual_dest_len);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
+
+/* Kernel exports */
+EXPORT_SYMBOL(LZ4_decompress_fast);
+EXPORT_SYMBOL(LZ4_decompress_safe);
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
+EXPORT_SYMBOL(lz4_decompress);
+EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..a024344 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,272 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
+ * lz4defs.h -- common and architecture specific defines
  *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
+ #include <asm/unaligned.h>
+
+#define FORCE_INLINE static inline
+
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
+static inline unsigned LZ4_64bits(void) { return LZ4_ARCH64; }
+
 /*
- * Architecture-specific macros
+ * Little/big endian
  */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
+#ifdef __LITTLE_ENDIAN
+#define LZ4_isLittleEndian(void) (true)
+#else
+#define LZ4_isLittleEndian(void) (false)
 #endif
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
+/*-************************************
+*  Tuning parameter
+**************************************/
+/*! * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 10
 
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+/*-************************************
+*  Memory routines
+**************************************/
+#include <linux/slab.h>
+#include <linux/string.h>   /* memset, memcpy */
+#define MEM_INIT       memset
+
+/*-************************************
+*  Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
 #endif
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+/*-************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static inline allocation */
+
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+/*-************************************
+*  Reading and writing into memory
+**************************************/
+
+static inline U16 LZ4_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline U32 LZ4_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void* memPtr)
+{
+    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline void LZ4_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian()) {
+        return LZ4_read16(memPtr);
+    } else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static inline void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian()) {
+        LZ4_write16(memPtr, value);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_copy8(void* dst, const void* src)
+{
+    memcpy(dst,src,8);
+}
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
+static inline void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+#if 0
+    const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
+    LZ4_copy8(d,s); if (d>e-9) return;
+    d+=l2; s+=l2;
+#endif /* join to align */
 
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#endif
+#else
 #ifdef __BIG_ENDIAN
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
-
 #endif
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+static inline unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+        size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NBCOMMONBYTES(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+/*-************************************
+*  Common Utils
+**************************************/
+#define LZ4_static_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+/*^**********************************************
+ * !!!!!!   static inline LINKING ONLY   !!!!!!
+ ***********************************************/
+
+typedef struct {
+    uint32_t hashTable[LZ4_HASH_SIZE_U32];
+    uint32_t currentOffset;
+    uint32_t initCheck;
+    const uint8_t* dictionary;
+    uint8_t* bufferStart;   /* obsolete, used for slideInputBuffer */
+    uint32_t dictSize;
+} LZ4_stream_t_internal;
+
+typedef struct {
+    const uint8_t* externalDict;
+    size_t extDictSize;
+    const uint8_t* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+/*-**********************************************
+*  Streaming Decompression
+************************************************/
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
+
+/*-*********************************************
+*  Streaming Compression
+***********************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
+/*!
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ * important : init this structure content before first use !
+ * note : only allocated directly the structure if you are static inlineally linking LZ4
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
+ */
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+
+typedef union LZ4_streamHC_u LZ4_streamHC_t;   /* incomplete type (defined later) */
+
+/*-******************************
+*  Streaming functions
+********************************/
+
+static inline void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..5474372 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,539 +1,624 @@
 /*
- * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
- *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
- */
-
+    LZ4 HC - High Compression Mode of LZ4
+    Copyright (C) 2011-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/lz4/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+
+    Changed for kernel use by:
+    Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+*/
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+
+/*-************************************
+*  Includes
+**************************************/
+#include "lz4defs.h"
+#include <linux/lz4.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
-#include <asm/unaligned.h>
-#include "lz4defs.h"
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+*  Tuning Parameter
+***************************************/
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#include <stdint.h>
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+typedef struct
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
+    uint32_t   hashTable[LZ4HC_HASHTABLESIZE];
+    uint16_t   chainTable[LZ4HC_MAXD];
+    const uint8_t* end;        /* next block here to continue on current prefix */
+    const uint8_t* base;       /* All index relative to this position */
+    const uint8_t* dictBase;   /* alternate base for extDict */
+    uint8_t* inputBuffer;      /* deprecated */
+    uint32_t   dictLimit;        /* below that point, need extDict */
+    uint32_t   lowLimit;         /* below that point, no more dict */
+    uint32_t   nextToUpdate;     /* index from which to continue dictionary update */
+    uint32_t   compressionLevel;
+} LZ4HC_CCtx_internal;
 
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
 #else
-	hc4->nexttoupdate = base;
+
+typedef struct
+{
+    unsigned int   hashTable[LZ4HC_HASHTABLESIZE];
+    unsigned short   chainTable[LZ4HC_MAXD];
+    const unsigned char* end;        /* next block here to continue on current prefix */
+    const unsigned char* base;       /* All index relative to this position */
+    const unsigned char* dictBase;   /* alternate base for extDict */
+    unsigned char* inputBuffer;      /* deprecated */
+    unsigned int   dictLimit;        /* below that point, need extDict */
+    unsigned int   lowLimit;         /* below that point, no more dict */
+    unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
+    unsigned int   compressionLevel;
+} LZ4HC_CCtx_internal;
+
+#endif
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+union LZ4_streamHC_u {
+    size_t table[LZ4_STREAMHCSIZE_SIZET];
+    LZ4HC_CCtx_internal internal_donotuse;
+};   /* previously typedef'd to LZ4_streamHC_t */
+
+/* *************************************
+*  Local Compiler Options
+***************************************/
+#if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined (__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
 #endif
-	hc4->base = base;
-	return 1;
+
+
+/* *************************************
+*  Common LZ4 definition
+***************************************/
+#define LZ4_COMMONDEFS_ONLY
+//#include "lz4.c"
+
+
+/* *************************************
+*  Local Constants
+***************************************/
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+
+
+/**************************************
+*  Local Macros
+**************************************/
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+/* #define DELTANEXTU16(p)        chainTable[(p) & LZ4HC_MAXD_MASK] */   /* flexible, LZ4HC_MAXD dependent */
+#define DELTANEXTU16(p)        chainTable[(U16)(p)]   /* faster */
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+
+/**************************************
+*  HC Compression
+**************************************/
+static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+{
+    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+    hc4->nextToUpdate = 64 KB;
+    hc4->base = start - 64 KB;
+    hc4->end = start;
+    hc4->dictBase = start - 64 KB;
+    hc4->dictLimit = 64 KB;
+    hc4->lowLimit = 64 KB;
 }
 
+
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
-	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
-		if (delta > MAX_DISTANCE)
-			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
+    U16* const chainTable = hc4->chainTable;
+    U32* const hashTable  = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = hc4->nextToUpdate;
+
+    while (idx < target) {
+        U32 const h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - hashTable[h];
+        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+        DELTANEXTU16(idx) = (U16)delta;
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    hc4->nextToUpdate = target;
 }
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
+
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* hc4,   /* Index table will be updated */
+                                               const BYTE* ip, const BYTE* const iLimit,
+                                               const BYTE** matchpos,
+                                               const int maxNbAttempts)
 {
-	const u8 *p1t = p1;
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const BYTE* const dictBase = hc4->dictBase;
+    const U32 dictLimit = hc4->dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    U32 matchIndex;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=0;
+
+    /* HC4 match finder */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
+        nbAttempts--;
+        if (matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            if (*(match+ml) == *(ip+ml)
+                && (LZ4_read32(match) == LZ4_read32(ip)))
+            {
+                size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                if (mlt > ml) { ml = mlt; *matchpos = match; }
+            }
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip)) {
+                size_t mlt;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iLimit) vLimit = iLimit;
+                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
+                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   /* virtual matchpos */
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return (int)ml;
+}
 
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
-	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+    LZ4HC_CCtx_internal* hc4,
+    const BYTE* const ip,
+    const BYTE* const iLowLimit,
+    const BYTE* const iHighLimit,
+    int longest,
+    const BYTE** matchpos,
+    const BYTE** startpos,
+    const int maxNbAttempts)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 dictLimit = hc4->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    const BYTE* const dictBase = hc4->dictBase;
+    U32   matchIndex;
+    int nbAttempts = maxNbAttempts;
+    int delta = (int)(ip-iLowLimit);
+
+
+    /* First Match */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
+        nbAttempts--;
+        if (matchIndex >= dictLimit) {
+            const BYTE* matchPtr = base + matchIndex;
+            if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
+                if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    int back = 0;
+
+                    while ((ip+back > iLowLimit)
+                           && (matchPtr+back > lowPrefixPtr)
+                           && (ip[back-1] == matchPtr[back-1]))
+                            back--;
+
+                    mlt -= back;
+
+                    if (mlt > longest) {
+                        longest = (int)mlt;
+                        *matchpos = matchPtr+back;
+                        *startpos = ip+back;
+                    }
+                }
+            }
+        } else {
+            const BYTE* const matchPtr = dictBase + matchIndex;
+            if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                size_t mlt;
+                int back=0;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
+                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
+                mlt -= back;
+                if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return longest;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+FORCE_INLINE int LZ4HC_encodeSequence (
+    const BYTE** ip,
+    BYTE** op,
+    const BYTE** anchor,
+    int matchLength,
+    const BYTE* const match,
+    limitedOutput_directive limitedOutputBuffer,
+    BYTE* oend)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
-	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
-
-	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-				if (mlt > ml) {
-					ml = mlt;
-					*matchpos = ref;
-				}
-			}
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
-	}
-
-	return (int)ml;
+    int length;
+    BYTE* token;
+
+    /* Encode Literal length */
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
+    else *token = (BYTE)(length<<ML_BITS);
+
+    /* Copy Literals */
+    LZ4_wildCopy(*op, *anchor, (*op) + length);
+    *op += length;
+
+    /* Encode Offset */
+    LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+
+    /* Encode MatchLength */
+    length = (int)(matchLength-MINMATCH);
+    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)ML_MASK) {
+        *token += ML_MASK;
+        length -= ML_MASK;
+        for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; }
+        if (length > 254) { length-=255; *(*op)++ = 255; }
+        *(*op)++ = (BYTE)length;
+    } else {
+        *token += (BYTE)(length);
+    }
+
+    /* Prepare next loop */
+    *ip += matchLength;
+    *anchor = *ip;
+
+    return 0;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+
+static int LZ4HC_compress_generic (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const source,
+    char* const dest,
+    int const inputSize,
+    int const maxOutputSize,
+    int compressionLevel,
+    limitedOutput_directive limit
+    )
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
-	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
-
-	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
-					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
-				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
-				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
-				}
-			}
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-	return longest;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    unsigned maxNbAttempts;
+    int   ml, ml2, ml3, ml0;
+    const BYTE* ref = NULL;
+    const BYTE* start2 = NULL;
+    const BYTE* ref2 = NULL;
+    const BYTE* start3 = NULL;
+    const BYTE* ref3 = NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+    /* init */
+    if (compressionLevel > LZ4HC_MAX_CLEVEL) compressionLevel = LZ4HC_MAX_CLEVEL;
+    if (compressionLevel < 1) compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+    maxNbAttempts = 1 << (compressionLevel-1);
+    ctx->end += inputSize;
+
+    ip++;
+
+    /* Main Loop */
+    while (ip < mflimit) {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
+        if (!ml) { ip++; continue; }
+
+        /* saved, in case we would skip too much */
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+        else ml2 = ml;
+
+        if (ml2 == ml) { /* No better match */
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start0 < ip) {
+            if (start2 < ip + ml0) {  /* empirical */
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        /* Here, start0==ip */
+        if ((start2 - ip) < 3) {  /* First Match too small : removed */
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        /*
+        * Currently we have :
+        * ml2 > ml1, and
+        * ip1+3 <= ip2 (usually < ip1+ml1)
+        */
+        if ((start2 - ip) < OPTIMAL_ML) {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0) {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) {  /* No better match : 2 sequences to encode */
+            /* ip & ref are known; Now for ml */
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            /* Now, encode 2 sequences */
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            ip = start2;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start3 < ip+ml+3) {  /* Not enough space for match 2 : remove it */
+            if (start3 >= (ip+ml)) {  /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+                if (start2 < ip+ml) {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH) {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        /*
+        * OK, now we have 3 ascending matches; let's write at least the first one
+        * ip & ref are known; Now for ml
+        */
+        if (start2 < ip+ml) {
+            if ((start2 - ip) < (int)ML_MASK) {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0) {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            } else {
+                ml = (int)(start2 - ip);
+            }
+        }
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+    }
+
+    /* Encode Last Literals */
+    {   int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
+{
+    LZ4HC_CCtx_internal* ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4HC_init (ctx, (const BYTE*)src);
+    if (maxDstSize < LZ4_compressBound(srcSize))
+        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
+    else
+        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, notLimited);
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel, void* wrkmem)
 {
-	int length, len;
-	u8 *token;
-
-	/* Encode Literal length */
-	length = (int)(*ip - *anchor);
-	token = (*op)++;
-	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
-		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
-			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
-	} else
-		*token = (length << ML_BITS);
-
-	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
-
-	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
-
-	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
-		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
-			*(*op)++ = 255;
-			*(*op)++ = 255;
-		}
-		if (len > 254) {
-			len -= 255;
-			*(*op)++ = 255;
-		}
-		*(*op)++ = (u8)len;
-	} else
-		*token += len;
-
-	/* Prepare next loop */
-	*ip += ml;
-	*anchor = *ip;
-
-	return 0;
+    return LZ4_compress_HC_extStateHC(wrkmem, src, dst, srcSize, maxDstSize, compressionLevel);
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
-
-	u8 *op = (u8 *)dest;
-
-	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
-
-	ip++;
-
-	/* Main Loop */
-	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
-		if (!ml) {
-			ip++;
-			continue;
-		}
-
-		/* saved, in case we would skip too much */
-		start0 = ip;
-		ref0 = ref;
-		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
-		else
-			ml2 = ml;
-		/* No better match */
-		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-			continue;
-		}
-
-		if (start0 < ip) {
-			/* empirical */
-			if (start2 < ip + ml0) {
-				ip = start0;
-				ref = ref0;
-				ml = ml0;
-			}
-		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
-		if ((start2 - ip) < 3) {
-			ml = ml2;
-			ip = start2;
-			ref = ref2;
-			goto _search2;
-		}
-
-_search3:
-		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
-		if ((start2 - ip) < OPTIMAL_ML) {
-			int correction;
-			int new_ml = ml;
-			if (new_ml > OPTIMAL_ML)
-				new_ml = OPTIMAL_ML;
-			if (ip + new_ml > start2 + ml2 - MINMATCH)
-				new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
-			correction = new_ml - (int)(start2 - ip);
-			if (correction > 0) {
-				start2 += correction;
-				ref2 += correction;
-				ml2 -= correction;
-			}
-		}
-		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
-		 */
-		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
-		else
-			ml3 = ml2;
-
-		/* No better match : 2 sequences to encode */
-		if (ml3 == ml2) {
-			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
-				ml = (int)(start2 - ip);
-
-			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
-			continue;
-		}
-
-		/* Not enough space for match 2 : remove it */
-		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
-			if (start3 >= (ip + ml)) {
-				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-					if (ml2 < MINMATCH) {
-						start2 = start3;
-						ref2 = ref3;
-						ml2 = ml3;
-					}
-				}
-
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
-				ref = ref3;
-				ml  = ml3;
-
-				start0 = start2;
-				ref0 = ref2;
-				ml0 = ml2;
-				goto _search2;
-			}
-
-			start2 = start3;
-			ref2 = ref3;
-			ml2 = ml3;
-			goto _search3;
-		}
-
-		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
-		if (start2 < ip + ml) {
-			if ((start2 - ip) < (int)ML_MASK) {
-				int correction;
-				if (ml > OPTIMAL_ML)
-					ml = OPTIMAL_ML;
-				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
-				correction = ml - (int)(start2 - ip);
-				if (correction > 0) {
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-				}
-			} else
-				ml = (int)(start2 - ip);
-		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-
-		ip = start2;
-		ref = ref2;
-		ml = ml2;
-
-		start2 = start3;
-		ref2 = ref3;
-		ml2 = ml3;
-
-		goto _search3;
-	}
-
-	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
-	/* End */
-	return (int) (((char *)op) - dest);
+  return LZ4_compress_HC(src, dst, (int)src_len, (int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+/**************************************
+*  Streaming Functions
+**************************************/
+
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
+    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    if (dictSize > 64 KB) {
+        dictionary += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
+    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
+    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    return dictSize;
+}
+
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+/* compression */
 
-	if (out_len < 0)
-		goto exit;
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
+{
+    if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+    ctxPtr->lowLimit  = ctxPtr->dictLimit;
+    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+    ctxPtr->dictBase  = ctxPtr->base;
+    ctxPtr->base = newBlock - ctxPtr->dictLimit;
+    ctxPtr->end  = newBlock;
+    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
+}
 
-	*dst_len = out_len;
-	return 0;
+static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+                                            const char* source, char* dest,
+                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
+{
+    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    /* auto-init if forgotten */
+    if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) source);
+
+    /* Check overflow */
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
+        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+        if (dictSize > 64 KB) dictSize = 64 KB;
+        LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+    }
+
+    /* Check if blocks follow each other */
+    if ((const BYTE*)source != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* const dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd)) {
+            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+        }
+    }
+
+    return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
 
-exit:
-	return ret;
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    if (maxOutputSize < LZ4_compressBound(inputSize))
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
+    else
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, notLimited);
 }
-EXPORT_SYMBOL(lz4hc_compress);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
+
+/* Kernel exports */
+EXPORT_SYMBOL(LZ4_compress_HC);
+EXPORT_SYMBOL(lz4hc_compress);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2016-12-20 18:53 ` [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
@ 2016-12-20 19:52   ` Joe Perches
  2016-12-21 20:04     ` Sven Schmidt
  2016-12-21  7:09   ` kbuild test robot
  1 sibling, 1 reply; 104+ messages in thread
From: Joe Perches @ 2016-12-20 19:52 UTC (permalink / raw)
  To: Sven Schmidt, akpm; +Cc: bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel

On Tue, 2016-12-20 at 19:53 +0100, Sven Schmidt wrote:
> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
> The kernel module is inspired by the previous work by Chanho Min.
> The updated LZ4 module will not break existing code since there were alias
> methods added to ensure backwards compatibility.

[]

> diff --git a/include/linux/lz4.h b/include/linux/lz4.h
[]
> @@ -1,87 +1,218 @@
>  #ifndef __LZ4_H__
>  #define __LZ4_H__
> +
>  /*
>   * LZ4 Kernel Interface
>   *
> - * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
> + * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>

Deleting copyright notices is poor form and shouldn't be done.

I didn't look at the rest

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version
  2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
@ 2016-12-21  5:16   ` kbuild test robot
  2016-12-21  5:18   ` kbuild test robot
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 104+ messages in thread
From: kbuild test robot @ 2016-12-21  5:16 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: kbuild-all, akpm, bongkyu.kim, sergey.senozhatsky, gregkh,
	linux-kernel, Sven Schmidt

[-- Attachment #1: Type: text/plain, Size: 2836 bytes --]

Hi Sven,

[auto build test ERROR on linus/master]
[also build test ERROR on next-20161221]
[cannot apply to v4.9]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Sven-Schmidt/Update-LZ4-compressor-module/20161221-130109
config: x86_64-randconfig-x006-201651 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: the linux-review/Sven-Schmidt/Update-LZ4-compressor-module/20161221-130109 HEAD 08652114d6d8867d2e8b3545b5e9f8a2fe11a5d7 builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   fs/pstore/platform.c: In function 'compress_lz4':
>> fs/pstore/platform.c:345:8: error: implicit declaration of function 'LZ4_compress_default' [-Werror=implicit-function-declaration]
     ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
           ^~~~~~~~~~~~~~~~~~~~
   fs/pstore/platform.c: In function 'decompress_lz4':
>> fs/pstore/platform.c:358:8: error: implicit declaration of function 'LZ4_decompress_safe' [-Werror=implicit-function-declaration]
     ret = LZ4_decompress_safe(in, out, inlen, outlen);
           ^~~~~~~~~~~~~~~~~~~
   fs/pstore/platform.c: In function 'allocate_lz4':
>> fs/pstore/platform.c:369:20: error: implicit declaration of function 'LZ4_compressBound' [-Werror=implicit-function-declaration]
     big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
                       ^~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/LZ4_compress_default +345 fs/pstore/platform.c

   339	
   340	#ifdef CONFIG_PSTORE_LZ4_COMPRESS
   341	static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
   342	{
   343		int ret;
   344	
 > 345		ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
   346		if (ret) {
   347			pr_err("lz4_compress error, ret = %d!\n", ret);
   348			return -EIO;
   349		}
   350	
   351		return outlen;
   352	}
   353	
   354	static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
   355	{
   356		int ret;
   357	
 > 358		ret = LZ4_decompress_safe(in, out, inlen, outlen);
   359		if (ret) {
   360			pr_err("lz4_decompress error, ret = %d!\n", ret);
   361			return -EIO;
   362		}
   363	
   364		return outlen;
   365	}
   366	
   367	static void allocate_lz4(void)
   368	{
 > 369		big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
   370		big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
   371		if (big_oops_buf) {
   372			workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 23709 bytes --]

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version
  2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
  2016-12-21  5:16   ` kbuild test robot
@ 2016-12-21  5:18   ` kbuild test robot
  2016-12-22 13:21   ` Sergey Senozhatsky
  2016-12-22 13:27   ` Sergey Senozhatsky
  3 siblings, 0 replies; 104+ messages in thread
From: kbuild test robot @ 2016-12-21  5:18 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: kbuild-all, akpm, bongkyu.kim, sergey.senozhatsky, gregkh,
	linux-kernel, Sven Schmidt

[-- Attachment #1: Type: text/plain, Size: 1567 bytes --]

Hi Sven,

[auto build test ERROR on linus/master]
[also build test ERROR on next-20161221]
[cannot apply to v4.9]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Sven-Schmidt/Update-LZ4-compressor-module/20161221-130109
config: x86_64-randconfig-x004-201651 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: the linux-review/Sven-Schmidt/Update-LZ4-compressor-module/20161221-130109 HEAD 08652114d6d8867d2e8b3545b5e9f8a2fe11a5d7 builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   fs/squashfs/lz4_wrapper.c: In function 'lz4_uncompress':
>> fs/squashfs/lz4_wrapper.c:111:8: error: implicit declaration of function 'LZ4_decompress_safe' [-Werror=implicit-function-declaration]
     res = LZ4_decompress_safe(stream->input, stream->output, length, (int)dest_len);
           ^~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/LZ4_decompress_safe +111 fs/squashfs/lz4_wrapper.c

   105			buff += avail;
   106			bytes -= avail;
   107			offset = 0;
   108			put_bh(bh[i]);
   109		}
   110	
 > 111		res = LZ4_decompress_safe(stream->input, stream->output, length, (int)dest_len);
   112		if (res)
   113			return -EIO;
   114	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 27240 bytes --]

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version
  2016-12-20 18:53 ` [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version Sven Schmidt
@ 2016-12-21  5:25   ` kbuild test robot
  2016-12-22 13:25   ` Sergey Senozhatsky
  1 sibling, 0 replies; 104+ messages in thread
From: kbuild test robot @ 2016-12-21  5:25 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: kbuild-all, akpm, bongkyu.kim, sergey.senozhatsky, gregkh,
	linux-kernel, Sven Schmidt

[-- Attachment #1: Type: text/plain, Size: 3282 bytes --]

Hi Sven,

[auto build test ERROR on linus/master]
[also build test ERROR on next-20161221]
[cannot apply to v4.9]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Sven-Schmidt/Update-LZ4-compressor-module/20161221-130109
config: i386-randconfig-x003-201651 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

Note: the linux-review/Sven-Schmidt/Update-LZ4-compressor-module/20161221-130109 HEAD 08652114d6d8867d2e8b3545b5e9f8a2fe11a5d7 builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   crypto/lz4.c: In function 'lz4_compress_crypto':
>> crypto/lz4.c:55:8: error: implicit declaration of function 'LZ4_compress_default' [-Werror=implicit-function-declaration]
     err = LZ4_compress_default(src, dst, slen, tmp_len, ctx->lz4_comp_mem);
           ^~~~~~~~~~~~~~~~~~~~
   crypto/lz4.c: In function 'lz4_decompress_crypto':
>> crypto/lz4.c:71:8: error: implicit declaration of function 'LZ4_decompress_safe' [-Werror=implicit-function-declaration]
     err = LZ4_decompress_safe(src, dst, __slen, tmp_len);
           ^~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors
--
   crypto/lz4hc.c: In function 'lz4hc_compress_crypto':
>> crypto/lz4hc.c:55:8: error: implicit declaration of function 'LZ4_compress_HC' [-Werror=implicit-function-declaration]
     err = LZ4_compress_HC(src, dst, slen, tmp_len, LZ4HC_DEFAULT_CLEVEL, ctx->lz4hc_comp_mem);
           ^~~~~~~~~~~~~~~
>> crypto/lz4hc.c:55:49: error: 'LZ4HC_DEFAULT_CLEVEL' undeclared (first use in this function)
     err = LZ4_compress_HC(src, dst, slen, tmp_len, LZ4HC_DEFAULT_CLEVEL, ctx->lz4hc_comp_mem);
                                                    ^~~~~~~~~~~~~~~~~~~~
   crypto/lz4hc.c:55:49: note: each undeclared identifier is reported only once for each function it appears in
   crypto/lz4hc.c: In function 'lz4hc_decompress_crypto':
>> crypto/lz4hc.c:71:8: error: implicit declaration of function 'LZ4_decompress_safe' [-Werror=implicit-function-declaration]
     err = LZ4_decompress_safe(src, dst, __slen, (int)tmp_len);
           ^~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/LZ4_compress_default +55 crypto/lz4.c

    49				    unsigned int slen, u8 *dst, unsigned int *dlen)
    50	{
    51		struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
    52		size_t tmp_len = *dlen;
    53		int err;
    54	
  > 55		err = LZ4_compress_default(src, dst, slen, tmp_len, ctx->lz4_comp_mem);
    56	
    57		if (err < 0)
    58			return -EINVAL;
    59	
    60		*dlen = tmp_len;
    61		return 0;
    62	}
    63	
    64	static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
    65				      unsigned int slen, u8 *dst, unsigned int *dlen)
    66	{
    67		int err;
    68		size_t tmp_len = *dlen;
    69		size_t __slen = slen;
    70	
  > 71		err = LZ4_decompress_safe(src, dst, __slen, tmp_len);
    72		if (err < 0)
    73			return -EINVAL;
    74	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24652 bytes --]

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2016-12-20 18:53 ` [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
  2016-12-20 19:52   ` Joe Perches
@ 2016-12-21  7:09   ` kbuild test robot
  1 sibling, 0 replies; 104+ messages in thread
From: kbuild test robot @ 2016-12-21  7:09 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: kbuild-all, akpm, bongkyu.kim, sergey.senozhatsky, gregkh,
	linux-kernel, Sven Schmidt

[-- Attachment #1: Type: text/plain, Size: 2647 bytes --]

Hi Sven,

[auto build test ERROR on linus/master]
[also build test ERROR on next-20161221]
[cannot apply to v4.9]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Sven-Schmidt/Update-LZ4-compressor-module/20161221-130109
config: x86_64-randconfig-r0-12211341 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   In file included from /usr/lib/gcc/x86_64-linux-gnu/6/include/stdint.h:11:0,
                    from arch/x86/boot/compressed/../../../../lib/lz4/lz4defs.h:60,
                    from arch/x86/boot/compressed/../../../../lib/lz4/lz4_decompress.c:36,
                    from arch/x86/boot/compressed/../../../../lib/decompress_unlz4.c:13,
                    from arch/x86/boot/compressed/misc.c:75:
>> /usr/lib/gcc/x86_64-linux-gnu/6/include/stdint-gcc.h:43:24: error: conflicting types for 'int64_t'
    typedef __INT64_TYPE__ int64_t;
                           ^~~~~~~
   In file included from include/uapi/linux/screen_info.h:4:0,
                    from include/linux/screen_info.h:4,
                    from arch/x86/boot/compressed/misc.h:15,
                    from arch/x86/boot/compressed/misc.c:14:
   include/linux/types.h:113:17: note: previous declaration of 'int64_t' was here
    typedef  __s64  int64_t;
                    ^~~~~~~
   In file included from /usr/lib/gcc/x86_64-linux-gnu/6/include/stdint.h:11:0,
                    from arch/x86/boot/compressed/../../../../lib/lz4/lz4defs.h:60,
                    from arch/x86/boot/compressed/../../../../lib/lz4/lz4_decompress.c:36,
                    from arch/x86/boot/compressed/../../../../lib/decompress_unlz4.c:13,
                    from arch/x86/boot/compressed/misc.c:75:
>> /usr/lib/gcc/x86_64-linux-gnu/6/include/stdint-gcc.h:55:25: error: conflicting types for 'uint64_t'
    typedef __UINT64_TYPE__ uint64_t;
                            ^~~~~~~~
   In file included from include/uapi/linux/screen_info.h:4:0,
                    from include/linux/screen_info.h:4,
                    from arch/x86/boot/compressed/misc.h:15,
                    from arch/x86/boot/compressed/misc.c:14:
   include/linux/types.h:111:17: note: previous declaration of 'uint64_t' was here
    typedef  __u64  uint64_t;
                    ^~~~~~~~

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 21153 bytes --]

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2016-12-20 19:52   ` Joe Perches
@ 2016-12-21 20:04     ` Sven Schmidt
  2016-12-22 17:31       ` Greg KH
  0 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2016-12-21 20:04 UTC (permalink / raw)
  To: joe
  Cc: akpm, bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel,
	Sven Schmidt

On 12/20/2016 08:52 PM, Joe Perches wrote:
> On Tue, 2016-12-20 at 19:53 +0100, Sven Schmidt wrote:
>> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
>> The kernel module is inspired by the previous work by Chanho Min.
>> The updated LZ4 module will not break existing code since there were alias
>> methods added to ensure backwards compatibility.
>
>[]
>
>> diff --git a/include/linux/lz4.h b/include/linux/lz4.h
> []
>> @@ -1,87 +1,218 @@
>>  #ifndef __LZ4_H__
>>  #define __LZ4_H__
>> +
>>  /*
>>   * LZ4 Kernel Interface
>>   *
>> - * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
>> + * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
>
> Deleting copyright notices is poor form and shouldn't be done.
>
> I didn't look at the rest
>

Thanks Joe for your time. If you would take a look at the rest, you may notice
that I changed almost the whole file. That's why I also changed the copyright note.
If you think I should keep the original note, I will do that.

What I did change:
- I included additional information stating that the file is based on Yann Collets
original header file (especially in matters of the function comments)

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version
  2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
  2016-12-21  5:16   ` kbuild test robot
  2016-12-21  5:18   ` kbuild test robot
@ 2016-12-22 13:21   ` Sergey Senozhatsky
  2016-12-22 15:31     ` Sven Schmidt
  2016-12-22 13:27   ` Sergey Senozhatsky
  3 siblings, 1 reply; 104+ messages in thread
From: Sergey Senozhatsky @ 2016-12-22 13:21 UTC (permalink / raw)
  To: Sven Schmidt; +Cc: akpm, bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel

On (12/20/16 19:53), Sven Schmidt wrote:
> @@ -366,7 +366,7 @@ static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
>  
>  static void allocate_lz4(void)
>  {
> -	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
> +	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
>  	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
>  	if (big_oops_buf) {
>  		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
> @@ -493,7 +493,6 @@ static void pstore_dump(struct kmsg_dumper *dumper,
>  		if (!is_locked) {
>  			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
>  				       , in_nmi() ? "NMI" : why);
> -			return;

why did you remove this "failed to lock buf in critical path" return?


>  		}
>  	} else {
>  		spin_lock_irqsave(&psinfo->buf_lock, flags);
> @@ -585,8 +584,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
>  		} else {
>  			spin_lock_irqsave(&psinfo->buf_lock, flags);
>  		}
> -		psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0,
> -				  s, 0, c, psinfo);
> +		memcpy(psinfo->buf, s, c);
> +		psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo);

why extra memcpy()?

	-ss

>  		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
>  		s += c;
>  		c = e - s;
> diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
> index ff4468b..c087a63 100644
> --- a/fs/squashfs/lz4_wrapper.c
> +++ b/fs/squashfs/lz4_wrapper.c
> @@ -108,8 +108,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
>  		put_bh(bh[i]);
>  	}
>  
> -	res = lz4_decompress_unknownoutputsize(stream->input, length,
> -					stream->output, &dest_len);
> +	res = LZ4_decompress_safe(stream->input, stream->output, length, (int)dest_len);
>  	if (res)
>  		return -EIO;

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version
  2016-12-20 18:53 ` [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version Sven Schmidt
  2016-12-21  5:25   ` kbuild test robot
@ 2016-12-22 13:25   ` Sergey Senozhatsky
  1 sibling, 0 replies; 104+ messages in thread
From: Sergey Senozhatsky @ 2016-12-22 13:25 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel,
	Herbert Xu, linux-crypto


Cc Herbert and linux-crypto on this

https://marc.info/?l=linux-kernel&m=148226084823926


On (12/20/16 19:53), Sven Schmidt wrote:
> 
> This patch updates the crypto modules using LZ4 compression to work with the
> new LZ4 kernel module version.
> 
> Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
> ---
>  crypto/lz4.c   | 93 +++++++--------------------------------------------------
>  crypto/lz4hc.c | 94 +++++++---------------------------------------------------
>  2 files changed, 22 insertions(+), 165 deletions(-)
> 
> diff --git a/crypto/lz4.c b/crypto/lz4.c
> index 99c1b2c..9fa217e 100644
> --- a/crypto/lz4.c
> +++ b/crypto/lz4.c
> @@ -23,53 +23,36 @@
>  #include <linux/crypto.h>
>  #include <linux/vmalloc.h>
>  #include <linux/lz4.h>
> -#include <crypto/internal/scompress.h>
>  
>  struct lz4_ctx {
>  	void *lz4_comp_mem;
>  };
>  
> -static void *lz4_alloc_ctx(struct crypto_scomp *tfm)
> -{
> -	void *ctx;
> -
> -	ctx = vmalloc(LZ4_MEM_COMPRESS);
> -	if (!ctx)
> -		return ERR_PTR(-ENOMEM);
> -
> -	return ctx;
> -}
> -
>  static int lz4_init(struct crypto_tfm *tfm)
>  {
>  	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
>  
> -	ctx->lz4_comp_mem = lz4_alloc_ctx(NULL);
> -	if (IS_ERR(ctx->lz4_comp_mem))
> +	ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS);
> +	if (!ctx->lz4_comp_mem)
>  		return -ENOMEM;
>  
>  	return 0;
>  }
>  
> -static void lz4_free_ctx(struct crypto_scomp *tfm, void *ctx)
> -{
> -	vfree(ctx);
> -}
> -
>  static void lz4_exit(struct crypto_tfm *tfm)
>  {
>  	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
> -
> -	lz4_free_ctx(NULL, ctx->lz4_comp_mem);
> +	vfree(ctx->lz4_comp_mem);
>  }
>  
> -static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
> -				 u8 *dst, unsigned int *dlen, void *ctx)
> +static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
> +			    unsigned int slen, u8 *dst, unsigned int *dlen)
>  {
> +	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
>  	size_t tmp_len = *dlen;
>  	int err;
>  
> -	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
> +	err = LZ4_compress_default(src, dst, slen, tmp_len, ctx->lz4_comp_mem);
>  
>  	if (err < 0)
>  		return -EINVAL;
> @@ -78,29 +61,14 @@ static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
>  	return 0;
>  }
>  
> -static int lz4_scompress(struct crypto_scomp *tfm, const u8 *src,
> -			 unsigned int slen, u8 *dst, unsigned int *dlen,
> -			 void *ctx)
> -{
> -	return __lz4_compress_crypto(src, slen, dst, dlen, ctx);
> -}
> -
> -static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
> -			       unsigned int slen, u8 *dst, unsigned int *dlen)
> -{
> -	struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
> -
> -	return __lz4_compress_crypto(src, slen, dst, dlen, ctx->lz4_comp_mem);
> -}
> -
> -static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
> -				   u8 *dst, unsigned int *dlen, void *ctx)
> +static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
> +			      unsigned int slen, u8 *dst, unsigned int *dlen)
>  {
>  	int err;
>  	size_t tmp_len = *dlen;
>  	size_t __slen = slen;
>  
> -	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
> +	err = LZ4_decompress_safe(src, dst, __slen, tmp_len);
>  	if (err < 0)
>  		return -EINVAL;
>  
> @@ -108,20 +76,6 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
>  	return err;
>  }
>  
> -static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
> -			   unsigned int slen, u8 *dst, unsigned int *dlen,
> -			   void *ctx)
> -{
> -	return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
> -}
> -
> -static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
> -				 unsigned int slen, u8 *dst,
> -				 unsigned int *dlen)
> -{
> -	return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
> -}
> -
>  static struct crypto_alg alg_lz4 = {
>  	.cra_name		= "lz4",
>  	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
> @@ -135,39 +89,14 @@ static struct crypto_alg alg_lz4 = {
>  	.coa_decompress		= lz4_decompress_crypto } }
>  };
>  
> -static struct scomp_alg scomp = {
> -	.alloc_ctx		= lz4_alloc_ctx,
> -	.free_ctx		= lz4_free_ctx,
> -	.compress		= lz4_scompress,
> -	.decompress		= lz4_sdecompress,
> -	.base			= {
> -		.cra_name	= "lz4",
> -		.cra_driver_name = "lz4-scomp",
> -		.cra_module	 = THIS_MODULE,
> -	}
> -};
> -
>  static int __init lz4_mod_init(void)
>  {
> -	int ret;
> -
> -	ret = crypto_register_alg(&alg_lz4);
> -	if (ret)
> -		return ret;
> -
> -	ret = crypto_register_scomp(&scomp);
> -	if (ret) {
> -		crypto_unregister_alg(&alg_lz4);
> -		return ret;
> -	}
> -
> -	return ret;
> +	return crypto_register_alg(&alg_lz4);
>  }
>  
>  static void __exit lz4_mod_fini(void)
>  {
>  	crypto_unregister_alg(&alg_lz4);
> -	crypto_unregister_scomp(&scomp);
>  }
>  
>  module_init(lz4_mod_init);
> diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
> index 75ffc4a..aa1265c 100644
> --- a/crypto/lz4hc.c
> +++ b/crypto/lz4hc.c
> @@ -22,53 +22,37 @@
>  #include <linux/crypto.h>
>  #include <linux/vmalloc.h>
>  #include <linux/lz4.h>
> -#include <crypto/internal/scompress.h>
>  
>  struct lz4hc_ctx {
>  	void *lz4hc_comp_mem;
>  };
>  
> -static void *lz4hc_alloc_ctx(struct crypto_scomp *tfm)
> -{
> -	void *ctx;
> -
> -	ctx = vmalloc(LZ4HC_MEM_COMPRESS);
> -	if (!ctx)
> -		return ERR_PTR(-ENOMEM);
> -
> -	return ctx;
> -}
> -
>  static int lz4hc_init(struct crypto_tfm *tfm)
>  {
>  	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
>  
> -	ctx->lz4hc_comp_mem = lz4hc_alloc_ctx(NULL);
> -	if (IS_ERR(ctx->lz4hc_comp_mem))
> +	ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS);
> +	if (!ctx->lz4hc_comp_mem)
>  		return -ENOMEM;
>  
>  	return 0;
>  }
>  
> -static void lz4hc_free_ctx(struct crypto_scomp *tfm, void *ctx)
> -{
> -	vfree(ctx);
> -}
> -
>  static void lz4hc_exit(struct crypto_tfm *tfm)
>  {
>  	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
>  
> -	lz4hc_free_ctx(NULL, ctx->lz4hc_comp_mem);
> +	vfree(ctx->lz4hc_comp_mem);
>  }
>  
> -static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
> -				   u8 *dst, unsigned int *dlen, void *ctx)
> +static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
> +			    unsigned int slen, u8 *dst, unsigned int *dlen)
>  {
> +	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
>  	size_t tmp_len = *dlen;
>  	int err;
>  
> -	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
> +	err = LZ4_compress_HC(src, dst, slen, tmp_len, LZ4HC_DEFAULT_CLEVEL, ctx->lz4hc_comp_mem);
>  
>  	if (err < 0)
>  		return -EINVAL;
> @@ -77,31 +61,14 @@ static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
>  	return 0;
>  }
>  
> -static int lz4hc_scompress(struct crypto_scomp *tfm, const u8 *src,
> -			   unsigned int slen, u8 *dst, unsigned int *dlen,
> -			   void *ctx)
> -{
> -	return __lz4hc_compress_crypto(src, slen, dst, dlen, ctx);
> -}
> -
> -static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
> -				 unsigned int slen, u8 *dst,
> -				 unsigned int *dlen)
> -{
> -	struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
> -
> -	return __lz4hc_compress_crypto(src, slen, dst, dlen,
> -					ctx->lz4hc_comp_mem);
> -}
> -
> -static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
> -				     u8 *dst, unsigned int *dlen, void *ctx)
> +static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
> +			      unsigned int slen, u8 *dst, unsigned int *dlen)
>  {
>  	int err;
>  	size_t tmp_len = *dlen;
>  	size_t __slen = slen;
>  
> -	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
> +	err = LZ4_decompress_safe(src, dst, __slen, (int)tmp_len);
>  	if (err < 0)
>  		return -EINVAL;
>  
> @@ -109,20 +76,6 @@ static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
>  	return err;
>  }
>  
> -static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
> -			     unsigned int slen, u8 *dst, unsigned int *dlen,
> -			     void *ctx)
> -{
> -	return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
> -}
> -
> -static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
> -				   unsigned int slen, u8 *dst,
> -				   unsigned int *dlen)
> -{
> -	return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
> -}
> -
>  static struct crypto_alg alg_lz4hc = {
>  	.cra_name		= "lz4hc",
>  	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
> @@ -136,39 +89,14 @@ static struct crypto_alg alg_lz4hc = {
>  	.coa_decompress		= lz4hc_decompress_crypto } }
>  };
>  
> -static struct scomp_alg scomp = {
> -	.alloc_ctx		= lz4hc_alloc_ctx,
> -	.free_ctx		= lz4hc_free_ctx,
> -	.compress		= lz4hc_scompress,
> -	.decompress		= lz4hc_sdecompress,
> -	.base			= {
> -		.cra_name	= "lz4hc",
> -		.cra_driver_name = "lz4hc-scomp",
> -		.cra_module	 = THIS_MODULE,
> -	}
> -};
> -
>  static int __init lz4hc_mod_init(void)
>  {
> -	int ret;
> -
> -	ret = crypto_register_alg(&alg_lz4hc);
> -	if (ret)
> -		return ret;
> -
> -	ret = crypto_register_scomp(&scomp);
> -	if (ret) {
> -		crypto_unregister_alg(&alg_lz4hc);
> -		return ret;
> -	}
> -
> -	return ret;
> +	return crypto_register_alg(&alg_lz4hc);
>  }
>  
>  static void __exit lz4hc_mod_fini(void)
>  {
>  	crypto_unregister_alg(&alg_lz4hc);
> -	crypto_unregister_scomp(&scomp);
>  }
>  
>  module_init(lz4hc_mod_init);
> -- 
> 2.1.4
> 

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version
  2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
                     ` (2 preceding siblings ...)
  2016-12-22 13:21   ` Sergey Senozhatsky
@ 2016-12-22 13:27   ` Sergey Senozhatsky
  3 siblings, 0 replies; 104+ messages in thread
From: Sergey Senozhatsky @ 2016-12-22 13:27 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, sergey.senozhatsky, gregkh, linux-kernel,
	Anton Vorontsov, Colin Cross, Kees Cook, Tony Luck

Cc Anton, Colin, Kees, Tony on this

https://marc.info/?l=linux-kernel&m=148226086323932

On (12/20/16 19:53), Sven Schmidt wrote:
> 
> This patch updates the fs/pstore and fs/squashfs to work with the
> new LZ4 kernel module version.
> 
> Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
> ---
>  fs/pstore/platform.c      | 11 +++++------
>  fs/squashfs/lz4_wrapper.c |  3 +--
>  2 files changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
> index 729677e..037dc07 100644
> --- a/fs/pstore/platform.c
> +++ b/fs/pstore/platform.c
> @@ -342,7 +342,7 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
>  {
>  	int ret;
>  
> -	ret = lz4_compress(in, inlen, out, &outlen, workspace);
> +	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
>  	if (ret) {
>  		pr_err("lz4_compress error, ret = %d!\n", ret);
>  		return -EIO;
> @@ -355,7 +355,7 @@ static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
>  {
>  	int ret;
>  
> -	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
> +	ret = LZ4_decompress_safe(in, out, inlen, outlen);
>  	if (ret) {
>  		pr_err("lz4_decompress error, ret = %d!\n", ret);
>  		return -EIO;
> @@ -366,7 +366,7 @@ static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
>  
>  static void allocate_lz4(void)
>  {
> -	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
> +	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
>  	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
>  	if (big_oops_buf) {
>  		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
> @@ -493,7 +493,6 @@ static void pstore_dump(struct kmsg_dumper *dumper,
>  		if (!is_locked) {
>  			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
>  				       , in_nmi() ? "NMI" : why);
> -			return;
>  		}
>  	} else {
>  		spin_lock_irqsave(&psinfo->buf_lock, flags);
> @@ -585,8 +584,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
>  		} else {
>  			spin_lock_irqsave(&psinfo->buf_lock, flags);
>  		}
> -		psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0,
> -				  s, 0, c, psinfo);
> +		memcpy(psinfo->buf, s, c);
> +		psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo);
>  		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
>  		s += c;
>  		c = e - s;
> diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
> index ff4468b..c087a63 100644
> --- a/fs/squashfs/lz4_wrapper.c
> +++ b/fs/squashfs/lz4_wrapper.c
> @@ -108,8 +108,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
>  		put_bh(bh[i]);
>  	}
>  
> -	res = lz4_decompress_unknownoutputsize(stream->input, length,
> -					stream->output, &dest_len);
> +	res = LZ4_decompress_safe(stream->input, stream->output, length, (int)dest_len);
>  	if (res)
>  		return -EIO;
>  
> -- 
> 2.1.4
> 

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version
  2016-12-22 13:21   ` Sergey Senozhatsky
@ 2016-12-22 15:31     ` Sven Schmidt
  2016-12-22 15:36       ` Sergey Senozhatsky
  0 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2016-12-22 15:31 UTC (permalink / raw)
  To: sergey.senozhatsky
  Cc: akpm, bongkyu.kim, gregkh, ccross, keescook, anton, linux-kernel,
	tony.luck, Sven Schmidt

On 12/22/2016 02:21 PM, Sergey Senozhatsky wrote:
> On (12/20/16 19:53), Sven Schmidt wrote:
>> @@ -366,7 +366,7 @@ static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
>>  
>>  static void allocate_lz4(void)
>>  {
>> -	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
>> +	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
>>  	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
>>  	if (big_oops_buf) {
>>  		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
>> @@ -493,7 +493,6 @@ static void pstore_dump(struct kmsg_dumper *dumper,
>>  		if (!is_locked) {
>>  			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
>>  				       , in_nmi() ? "NMI" : why);
>> -			return;
> 
> why did you remove this "failed to lock buf in critical path" return?
> 
> 
>>  		}
>>  	} else {
>>  		spin_lock_irqsave(&psinfo->buf_lock, flags);
>> @@ -585,8 +584,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
>>  		} else {
>>  			spin_lock_irqsave(&psinfo->buf_lock, flags);
>>  		}
>> -		psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0,
>> -				  s, 0, c, psinfo);
>> +		memcpy(psinfo->buf, s, c);
>> +		psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo);
> 
> why extra memcpy()?
> 
> 	-ss
> 
>>  		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
>>  		s += c;
>>  		c = e - s;
>> diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
>> index ff4468b..c087a63 100644
>> --- a/fs/squashfs/lz4_wrapper.c
>> +++ b/fs/squashfs/lz4_wrapper.c
>> @@ -108,8 +108,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
>>  		put_bh(bh[i]);
>>  	}
>>  
>> -	res = lz4_decompress_unknownoutputsize(stream->input, length,
>> -					stream->output, &dest_len);
>> +	res = LZ4_decompress_safe(stream->input, stream->output, length, (int)dest_len);
>>  	if (res)
>>  		return -EIO;
>

Hey Sergey and thanks for your time. I'm afraid I messed up the diffstat on this.
Actually I just replaced the function calls and corrected the use of return values.
I will send an updated patchset later since there also have been errors
reported from the buildbots.

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version
  2016-12-22 15:31     ` Sven Schmidt
@ 2016-12-22 15:36       ` Sergey Senozhatsky
  0 siblings, 0 replies; 104+ messages in thread
From: Sergey Senozhatsky @ 2016-12-22 15:36 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: sergey.senozhatsky, akpm, bongkyu.kim, gregkh, ccross, keescook,
	anton, linux-kernel, tony.luck

On (12/22/16 16:31), Sven Schmidt wrote:
> Hey Sergey and thanks for your time. I'm afraid I messed up the diffstat on this.
> Actually I just replaced the function calls and corrected the use of return values.
> I will send an updated patchset later since there also have been errors
> reported from the buildbots.

and please use scripts/get_maintainer.pl so maintainers are in Cc list.

	-ss

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 0/3] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (2 preceding siblings ...)
  2016-12-20 18:53 ` [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
@ 2016-12-22 17:29 ` Greg KH
  2016-12-22 18:35   ` Sven Schmidt
  2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 104+ messages in thread
From: Greg KH @ 2016-12-22 17:29 UTC (permalink / raw)
  To: Sven Schmidt; +Cc: akpm, bongkyu.kim, sergey.senozhatsky, linux-kernel

On Tue, Dec 20, 2016 at 07:53:09PM +0100, Sven Schmidt wrote:
> 
> This patchset is for updating the LZ4 compression module to a version based
> on LZ4 v1.7.2 allowing to use the fast compression algorithm aka LZ4 fast
> which provides an "acceleration" parameter as a tradeoff between
> compression ratio and compression speed.

But why do this?

> We will use LZ4 fast in order to support compression in lustre. LZ4 fast empowers
> us to do client-side as well as server-side compression/decompression while
> being able to provide appropriate parameters to enable users to tune lustre's
> behaviour to obtain the best performance/compression/etc. on their behalf
> (adapative compression).

We don't care about lustre, especially as it is not merged into the main
portion of the kernel tree :)

Seriously, work on fixing up the known issues in lustre before adding
additional features, I've only been saying this for a few _years_ now...

> Also, it will be useful for other users of LZ4 compression,
> as with LZ4 fast it is possible to enable applications to use fast and/or high
> compression depending of the usecase. E.g. a developer can use very
> high compression (low acceleration) for sending data over a network with
> limited rate of transmission or he trades the compression ratio for higher
> compression speed.

This whole patch series is broken, always test-build your code, there's
nothing we could do with these patches even if we wanted to :(

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2016-12-21 20:04     ` Sven Schmidt
@ 2016-12-22 17:31       ` Greg KH
  2016-12-22 18:39         ` Sven Schmidt
  0 siblings, 1 reply; 104+ messages in thread
From: Greg KH @ 2016-12-22 17:31 UTC (permalink / raw)
  To: Sven Schmidt; +Cc: joe, akpm, bongkyu.kim, sergey.senozhatsky, linux-kernel

On Wed, Dec 21, 2016 at 09:04:02PM +0100, Sven Schmidt wrote:
> On 12/20/2016 08:52 PM, Joe Perches wrote:
> > On Tue, 2016-12-20 at 19:53 +0100, Sven Schmidt wrote:
> >> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
> >> The kernel module is inspired by the previous work by Chanho Min.
> >> The updated LZ4 module will not break existing code since there were alias
> >> methods added to ensure backwards compatibility.
> >
> >[]
> >
> >> diff --git a/include/linux/lz4.h b/include/linux/lz4.h
> > []
> >> @@ -1,87 +1,218 @@
> >>  #ifndef __LZ4_H__
> >>  #define __LZ4_H__
> >> +
> >>  /*
> >>   * LZ4 Kernel Interface
> >>   *
> >> - * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
> >> + * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
> >
> > Deleting copyright notices is poor form and shouldn't be done.
> >
> > I didn't look at the rest
> >
> 
> Thanks Joe for your time. If you would take a look at the rest, you may notice
> that I changed almost the whole file. That's why I also changed the copyright note.
> If you think I should keep the original note, I will do that.

You have to, please consult a lawyer if you have questions about
copyright issues.  Or better yet, there's a free presentation/course
online about copyright on the linuxfoundation.org website somewhere that
should answer all of your questions here.  I recommend taking a few
minutes and going through that.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 0/3] Update LZ4 compressor module
  2016-12-22 17:29 ` [PATCH 0/3] Update LZ4 compressor module Greg KH
@ 2016-12-22 18:35   ` Sven Schmidt
  2016-12-23 20:53     ` Greg KH
  0 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2016-12-22 18:35 UTC (permalink / raw)
  To: gregkh; +Cc: akpm, bongkyu.kim, sergey.senozhatsky, linux-kernel, Sven Schmidt

On 12/22/2016 06:29 PM, Greg KH wrote:
> On Tue, Dec 20, 2016 at 07:53:09PM +0100, Sven Schmidt wrote:
>>
>> This patchset is for updating the LZ4 compression module to a version based
>> on LZ4 v1.7.2 allowing to use the fast compression algorithm aka LZ4 fast
>> which provides an "acceleration" parameter as a tradeoff between
>> compression ratio and compression speed.
>
> But why do this?
>
>> We will use LZ4 fast in order to support compression in lustre. LZ4 fast empowers
>> us to do client-side as well as server-side compression/decompression while
>> being able to provide appropriate parameters to enable users to tune lustre's
>> behaviour to obtain the best performance/compression/etc. on their behalf
>> (adapative compression).
>
> We don't care about lustre, especially as it is not merged into the main
> portion of the kernel tree :)
>
> Seriously, work on fixing up the known issues in lustre before adding
> additional features, I've only been saying this for a few _years_ now...
>

Hey Greg and thanks for your time. Actually, we're not the guys behind lustre,
hence I'd leave fixing the bugs in lustre to them ;)

I'm working with the research group for scientific computing on the University
of Hamburg on the German Climate Computing Centre. The research group is working
with high performance storage systems etc. In this case, we investigate
data reduction techniques in behalf of the increasing gap between computational speed,
network speed and storage capacity. That's why we're ultimately aiming
for compression support in lustre.

Initial studies have shown that an adequate compression ratio for scientific data
can be archieved using LZ4. Since the currently available version of LZ4
in the kernel is about three years old, we would love if you accept 
our work on getting a more current version into the kernel.

>> Also, it will be useful for other users of LZ4 compression,
>> as with LZ4 fast it is possible to enable applications to use fast and/or high
>> compression depending of the usecase. E.g. a developer can use very
>> high compression (low acceleration) for sending data over a network with
>> limited rate of transmission or he trades the compression ratio for higher
>> compression speed.
>
> This whole patch series is broken, always test-build your code, there's
> nothing we could do with these patches even if we wanted to :(
>
> greg k-h
>

I'm sorry, this is my first patchset, so please be kind :(
Already got rid of the issues on my local machine and reviewed the output from
the buildbots. Will send an updated patchset later!

Thanks
Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2016-12-22 17:31       ` Greg KH
@ 2016-12-22 18:39         ` Sven Schmidt
  0 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2016-12-22 18:39 UTC (permalink / raw)
  To: gregkh
  Cc: joe, akpm, bongkyu.kim, sergey.senozhatsky, linux-kernel, Sven Schmidt

On 12/22/2016 06:31 PM, Greg KH wrote:
>On Wed, Dec 21, 2016 at 09:04:02PM +0100, Sven Schmidt wrote:
>> On 12/20/2016 08:52 PM, Joe Perches wrote:
>> > On Tue, 2016-12-20 at 19:53 +0100, Sven Schmidt wrote:
>> >> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
>> >> The kernel module is inspired by the previous work by Chanho Min.
>> >> The updated LZ4 module will not break existing code since there were alias
>> >> methods added to ensure backwards compatibility.
>> >
>> >[]
>> >
>> >> diff --git a/include/linux/lz4.h b/include/linux/lz4.h
>> > []
>> >> @@ -1,87 +1,218 @@
>> >>  #ifndef __LZ4_H__
>> >>  #define __LZ4_H__
>> >> +
>> >>  /*
>> >>   * LZ4 Kernel Interface
>> >>   *
>> >> - * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
>> >> + * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
>> >
>> > Deleting copyright notices is poor form and shouldn't be done.
>> >
>> > I didn't look at the rest
>> >
>>
>> Thanks Joe for your time. If you would take a look at the rest, you may notice
>> that I changed almost the whole file. That's why I also changed the copyright note.
>> If you think I should keep the original note, I will do that.
>
>You have to, please consult a lawyer if you have questions about
>copyright issues.  Or better yet, there's a free presentation/course
>online about copyright on the linuxfoundation.org website somewhere that
>should answer all of your questions here.  I recommend taking a few
>minutes and going through that.
>
>thanks,
>
>greg k-h
>

Will do, thanks Greg!

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 0/3] Update LZ4 compressor module
  2016-12-22 18:35   ` Sven Schmidt
@ 2016-12-23 20:53     ` Greg KH
  0 siblings, 0 replies; 104+ messages in thread
From: Greg KH @ 2016-12-23 20:53 UTC (permalink / raw)
  To: Sven Schmidt; +Cc: akpm, bongkyu.kim, sergey.senozhatsky, linux-kernel

On Thu, Dec 22, 2016 at 07:35:14PM +0100, Sven Schmidt wrote:
> On 12/22/2016 06:29 PM, Greg KH wrote:
> > On Tue, Dec 20, 2016 at 07:53:09PM +0100, Sven Schmidt wrote:
> >>
> >> This patchset is for updating the LZ4 compression module to a version based
> >> on LZ4 v1.7.2 allowing to use the fast compression algorithm aka LZ4 fast
> >> which provides an "acceleration" parameter as a tradeoff between
> >> compression ratio and compression speed.
> >
> > But why do this?
> >
> >> We will use LZ4 fast in order to support compression in lustre. LZ4 fast empowers
> >> us to do client-side as well as server-side compression/decompression while
> >> being able to provide appropriate parameters to enable users to tune lustre's
> >> behaviour to obtain the best performance/compression/etc. on their behalf
> >> (adapative compression).
> >
> > We don't care about lustre, especially as it is not merged into the main
> > portion of the kernel tree :)
> >
> > Seriously, work on fixing up the known issues in lustre before adding
> > additional features, I've only been saying this for a few _years_ now...
> >
> 
> Hey Greg and thanks for your time. Actually, we're not the guys behind lustre,
> hence I'd leave fixing the bugs in lustre to them ;)
> 
> I'm working with the research group for scientific computing on the University
> of Hamburg on the German Climate Computing Centre. The research group is working
> with high performance storage systems etc. In this case, we investigate
> data reduction techniques in behalf of the increasing gap between computational speed,
> network speed and storage capacity. That's why we're ultimately aiming
> for compression support in lustre.
> 
> Initial studies have shown that an adequate compression ratio for scientific data
> can be archieved using LZ4. Since the currently available version of LZ4
> in the kernel is about three years old, we would love if you accept 
> our work on getting a more current version into the kernel.

But the LZ4 code isn't really used much in the kernel today, so if you
change it, what is really going to benefit?  And if you want to archive
things, you aren't using the in-kernel lz4 code, right?

If you want to add support for compression for lustre, great, I suggest
working with those developers, but note, I can't take any new features
for lustre until that code is out of drivers/staging/.

> >> Also, it will be useful for other users of LZ4 compression,
> >> as with LZ4 fast it is possible to enable applications to use fast and/or high
> >> compression depending of the usecase. E.g. a developer can use very
> >> high compression (low acceleration) for sending data over a network with
> >> limited rate of transmission or he trades the compression ratio for higher
> >> compression speed.
> >
> > This whole patch series is broken, always test-build your code, there's
> > nothing we could do with these patches even if we wanted to :(
> >
> > greg k-h
> >
> 
> I'm sorry, this is my first patchset, so please be kind :(

Not a problem, just always test-build your patches, in series, so that
we don't get grumpy for obvious problems :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v2 0/4] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (3 preceding siblings ...)
  2016-12-22 17:29 ` [PATCH 0/3] Update LZ4 compressor module Greg KH
@ 2017-01-07 16:55 ` Sven Schmidt
  2017-01-07 16:55   ` [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
                     ` (3 more replies)
  2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
                   ` (5 subsequent siblings)
  10 siblings, 4 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-07 16:55 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.2 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.2

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html

[PATCHv2 1/4] lib: Update LZ4  compressor module based on LZ4 v1.7.2
[PATCHv2 2/4] lib: Update decompress_unlz4 wrapper to work with new LZ4 module
[PATCHv2 3/4] crypto: Change lz4 modules to work with new LZ4 module
[PATCHv2 4/4] fs/pstore: fs/squashfs: Change LZ4 compressor functions to work with new LZ4 module

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
@ 2017-01-07 16:55   ` Sven Schmidt
  2017-01-08 11:22     ` Greg KH
  2017-01-08 11:25     ` Greg KH
  2017-01-07 16:55   ` [PATCH v2 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-07 16:55 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since there were alias
methods added to ensure backwards compatibility.

API changes:

New method LZ4_compress_fast which differs from the variant available in kernel
by the new acceleration parameter, allowing to trade compression ratio for
more speedup.

LZ4_decompress_fast is the respective decompression method,
allowing to decompress data compressed with any LZ4 compression method including
LZ4HC.

Also the useful functions LZ4_decompress_safe_partial
LZ4_compress_destsize were added. The latter reverses the logic by trying to
compress as much data as possible from source to dest while the former aims
to decompress partial blocks of data.

The methods lz4_compress and lz4_decompress_unknownoutputsize are now known as
LZ4_compress_default respectivley LZ4_decompress_safe. The old methods are still
available for offering backwards compatibility.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  307 ++++++++++----
 lib/lz4/lz4_compress.c   | 1024 +++++++++++++++++++++++++++------------------
 lib/lz4/lz4_decompress.c |  631 ++++++++++++++--------------
 lib/lz4/lz4defs.h        |  378 +++++++++++------
 lib/lz4/lz4hc_compress.c | 1045 ++++++++++++++++++++++++----------------------
 5 files changed, 1938 insertions(+), 1447 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..20280f8 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,238 @@
+/* LZ4 Kernel Interface
+
+Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License version 2 as
+published by the Free Software Foundation.
+
+This file is based on the original header file for LZ4 - Fast LZ compression algorithm.
+
+LZ4 - Fast LZ compression algorithm
+Copyright (C) 2011-2016, Yann Collet.
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+You can contact the author at :
+ - LZ4 homepage : http://www.lz4.org
+ - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
 #ifndef __LZ4_H__
 #define __LZ4_H__
-/*
- * LZ4 Kernel Interface
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+
+#include <linux/types.h>
+
+/*-************************************************************************
+*  Constants
+**************************************************************************/
 #define LZ4_MEM_COMPRESS	(16384)
 #define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
 
-/*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
- */
-static inline size_t lz4_compressbound(size_t isize)
-{
-	return isize + (isize / 255) + 16;
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+#define LZ4HC_MIN_CLEVEL        3
+#define LZ4HC_DEFAULT_CLEVEL    9
+#define LZ4HC_MAX_CLEVEL        16
+
+/*-************************************************************************
+*  Compression Functions
+**************************************************************************/
+
+/*!LZ4_compressbound() :
+    Provides the maximum size that LZ4 may output in a "worst case" scenario
+    (input data not compressible)
+*/
+static inline int LZ4_compressBound(int isize) {
+  return LZ4_COMPRESSBOUND(isize);
 }
 
-/*
- * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
- *		the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
-/*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
-
-/*
- * lz4_decompress_unknownoutputsize()
- *	src     : source address of the compressed data
- *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
- *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+/*! lz4_compressbound() :
+    For backward compabitiliby
+*/
+static inline size_t lz4_compressbound(size_t isize) {
+	return (int)LZ4_COMPRESSBOUND(isize);
+}
+
+/*! LZ4_compress_default() :
+    Compresses 'sourceSize' bytes from buffer 'source'
+    into already allocated 'dest' buffer of size 'maxDestSize'.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    It also runs faster, so it's a recommended setting.
+    If the function cannot compress 'source' into a more limited 'dest' budget,
+    compression stops *immediately*, and the function result is zero.
+    As a consequence, 'dest' content is not valid.
+    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
+        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
+        workmem : address of the working memory. This requires 'workmem' of size LZ4_MEM_COMPRESS.
+        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
+              or 0 if compression fails */
+int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem);
+
+/*!
+LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT, which is 1.
+*/
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem, int acceleration);
+
+/*!
+LZ4_compress_destSize() :
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
+    into already allocated buffer 'dest' of size 'targetDestSize'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
+                         New value is necessarily <= old value.
+        workmem : address of the working memory. This requires 'workmem' of size LZ4_MEM_COMPRESS.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+              or 0 if compression fails
+*/
+int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize, void* wrkmem);
+
+/*!
+ * lz4_compress() :
+    For backwards compabitiliby
+        src     : source address of the original data
+        src_len : size of the original data
+        dst     : output buffer address of the compressed data
+                This requires 'dst' of size LZ4_COMPRESSBOUND.
+        dst_len : is the output size, which is returned after compress done
+        workmem : address of the working memory.
+                This requires 'workmem' of size LZ4_MEM_COMPRESS.
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer and workmem must be already allocated with
+                the defined size.
  */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+*  Decompression Functions
+**************************************************************************/
+
+/*!
+LZ4_decompress_fast() :
+    originalSize : is the original and therefore uncompressed size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+    note : This function fully respect memory boundaries for properly formed compressed data.
+           It is a bit faster than LZ4_decompress_safe().
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
+           Use this function in trusted environment only (data to decode comes from a trusted source).
+*/
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize);
+
+/*!
+LZ4_decompress_safe() :
+    compressedSize : is the precise full size of the compressed block.
+    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
+             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits, including malicious data packets.
+             It never writes outside output buffer, nor reads outside input buffer.
+*/
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+
+/*!
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'compressedSize' at position 'source'
+    into destination buffer 'dest' of size 'maxDecompressedSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/*!
+lz4_decompress_unknownoutputsize() :
+    For backwards compabitiliby
+        src     : source address of the compressed data
+        src_len : is the input size, therefore the compressed size
+        dest    : output buffer address of the decompressed data
+        dest_len: is the max size of the destination buffer, which is
+                        returned with actual size of decompressed data after
+                        decompress done
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer must be already allocated.
+*/
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, unsigned char *dest, size_t *dest_len);
+
+/*!
+lz4_decompress() :
+    For backwards compabitiliby
+        src     : source address of the compressed data
+        src_len : is the input size, which is returned after decompress done
+        dest    : output buffer address of the decompressed data
+        actual_dest_len: is the size of uncompressed data, supposing it's known
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer must be already allocated.
+                slightly faster than lz4_decompress_unknownoutputsize()
+*/
+int lz4_decompress(const unsigned char *src, size_t *src_len, unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *  LZ4 HC Compression
+ **************************************************************************/
+
+/*! LZ4_compress_HC() :
+    Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm. dst` must be already allocated.
+        wrkmem : address of the working memory. This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+        Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)`
+        Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE
+        compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_MAX_CLEVEL will work.
+                        Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+        @return : the number of bytes written into 'dst' or 0 if compression fails.
+*/
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel, void* wrkmem);
+
+/*! lz4hc_compress()
+    For backwards compabitiliby
+        src     : source address of the original data
+        src_len : size of the original data
+        dst     : output buffer address of the compressed data
+               This requires 'dst' of size LZ4_COMPRESSBOUND.
+        dst_len : is the output size, which is returned after compress done
+        workmem : address of the working memory.
+               This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+        return  : Success if return 0
+                  Error if return (< 0)
+        note :  Destination buffer and workmem must be already allocated with
+                the defined size.
+*/
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem);
+
 #endif
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..1cba405 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,443 +1,639 @@
 /*
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
- *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
- */
-
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2016, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+
+    Changed for kernel use by:
+    Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+*/
+
+/*-************************************
+*  Includes
+**************************************/
+#include "lz4defs.h"
+#include <linux/lz4.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
 /*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
+ * ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+#define ACCELERATION_DEFAULT 1
+
+/*-******************************
+*  Compression functions
+********************************/
+static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+static const U64 prime5bytes = 889523592379ULL;
+static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType)
+{
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    const U32 hashMask = (1<<hashLog) - 1;
+    return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask;
+}
+
+static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType)
+{
+    if (LZ4_64bits())
+        return LZ4_hashSequence64(sequence, tableType);
+    return LZ4_hashSequence((U32)sequence, tableType);
+}
+
+static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); }
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/** LZ4_compress_generic() :
+    inlined, to ensure branches are decided at compilation time */
+static inline int LZ4_compress_generic(
+                 void* const ctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputLimited,
+                 const tableType_t tableType,
+                 const dict_directive dict,
+                 const dictIssue_directive dictIssue,
+                 const U32 acceleration)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
-#if LZ4_ARCH64
-	const BYTE * const base = ip;
-#else
-	const int base = 0;
-#endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
-
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
-
-	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
-
-	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
-
-		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
-
-		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
-
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
-
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
-
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+    LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx;
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* base;
+    const BYTE* lowLimit;
+    const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
+    const BYTE* const dictionary = dictPtr->dictionary;
+    const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
+    const size_t dictDelta = dictEnd - (const BYTE*)source;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 forwardH;
+    size_t refDelta=0;
+
+    /* Init conditions */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
+    switch(dict)
+    {
+    case noDict:
+    default:
+        base = (const BYTE*)source;
+        lowLimit = (const BYTE*)source;
+        break;
+    case withPrefix64k:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source - dictPtr->dictSize;
+        break;
+    case usingExtDict:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source;
+        break;
+    }
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+
+        /* Find a match */
+        {   const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                if (dict==usingExtDict) {
+                    if (match < (const BYTE*)source) {
+                        refDelta = dictDelta;
+                        lowLimit = dictionary;
+                    } else {
+                        refDelta = 0;
+                        lowLimit = (const BYTE*)source;
+                }   }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
+                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while (((ip>anchor) & (match+refDelta > lowLimit)) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+
+        /* Encode Literals */
+        {   unsigned const litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputLimited) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
+                return 0;
+            if (litLength >= RUN_MASK) {
+                int len = (int)litLength-RUN_MASK;
+                *token = (RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op+=litLength;
+        }
+
 _next_match:
-		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
-
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
-		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
-
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
-			break;
-		}
-
-		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
-
-		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
-			token = op++;
-			*token = 0;
-			goto _next_match;
-		}
-
-		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
-	}
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {   unsigned matchCode;
+
+            if ((dict==usingExtDict) && (lowLimit==dictionary)) {
+                const BYTE* limit;
+                match += refDelta;
+                limit = ip + (dictEnd-match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += MINMATCH + matchCode;
+                if (ip==limit) {
+                    unsigned const more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    matchCode += more;
+                    ip += more;
+                }
+            } else {
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += MINMATCH + matchCode;
+            }
+
+            if ( outputLimited &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) )
+                return 0;
+            if (matchCode >= ML_MASK) {
+                *token += ML_MASK;
+                matchCode -= ML_MASK;
+                LZ4_write32(op, 0xFFFFFFFF);
+                while (matchCode >= 4*255) op+=4, LZ4_write32(op, 0xFFFFFFFF), matchCode -= 4*255;
+                op += matchCode / 255;
+                *op++ = (BYTE)(matchCode % 255);
+            } else
+                *token += (BYTE)(matchCode);
+        }
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip > mflimit) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        if (dict==usingExtDict) {
+            if (match < (const BYTE*)source) {
+                refDelta = dictDelta;
+                lowLimit = dictionary;
+            } else {
+                refDelta = 0;
+                lowLimit = (const BYTE*)source;
+        }   }
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
+            && (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
 
 _last_literals:
-	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
-
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
-
-	/* End */
-	return (int)(((char *)op) - dest);
+    /* Encode Last Literals */
+    {   const size_t lastRun = (size_t)(iend - anchor);
+        if ( (outputLimited) &&  /* Check output buffer overflow */
+            ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) )
+            return 0;
+        if (lastRun >= RUN_MASK) {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRun);
+        op += lastRun;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_resetStream((LZ4_stream_t*)state);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    } else {
+        if (inputSize < LZ4_64Klimit)
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+        else
+            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem, int acceleration)
+{
+    int result = LZ4_compress_fast_extState(wrkmem, source, dest, inputSize, maxOutputSize, acceleration);
+
+    return result;
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+
+int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize, void* wrkmem)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
-
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
-
-	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
-
-	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
-
-		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
-
-		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
-
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
-
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, wrkmem, 1);
+}
+
+/*-******************************
+*  *_destSize() variant
+********************************/
+
+static int LZ4_compress_destSize_generic(
+                       void* const ctx,
+                 const char* const src,
+                       char* const dst,
+                       int*  const srcSizePtr,
+                 const int targetDstSize,
+                 const tableType_t tableType)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* base = (const BYTE*) src;
+    const BYTE* lowLimit = (const BYTE*) src;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dst;
+    BYTE* const oend = op + targetDstSize;
+    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
+    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
+    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
+
+    U32 forwardH;
+
+
+    /* Init conditions */
+    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
+    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    *srcSizePtr = 0;
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+
+        /* Find a match */
+        {   const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
+
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        /* Encode Literal length */
+        {   unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if (op + ((litLength+240)/255) + litLength > oMaxLit) {
+                /* Not enough space for a last match */
+                op--;
+                goto _last_literals;
+            }
+            if (litLength>=RUN_MASK) {
+                unsigned len = litLength - RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op += litLength;
+        }
 
 _next_match:
-		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
-
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
-
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
-
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
-			break;
-		}
-
-		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
-
-		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
-			goto _next_match;
-		}
-
-		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
-	}
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {   size_t matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+
+            if (op + ((matchLength+240)/255) > oMaxMatch) {
+                /* Match description too long : reduce it */
+                matchLength = (15-1) + (oMaxMatch-op) * 255;
+            }
+            ip += MINMATCH + matchLength;
+
+            if (matchLength>=ML_MASK) {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of block */
+        if (ip > mflimit) break;
+        if (op > oMaxSeq) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
 
 _last_literals:
-	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
-	/* End */
-	return (int)(((char *)op) - dest);
+    /* Encode Last Literals */
+    {   size_t lastRunSize = (size_t)(iend - anchor);
+        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) {
+            /* adapt lastRunSize to fill 'dst' */
+            lastRunSize  = (oend-op) - 1;
+            lastRunSize -= (lastRunSize+240)/255;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK) {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRunSize<<ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
+    }
+
+    /* End */
+    *srcSizePtr = (int) (((const char*)ip)-src);
+    return (int) (((char*)op)-dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+
+static int LZ4_compress_destSize_extState (void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+    LZ4_resetStream((LZ4_stream_t*)state);
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    } else {
+        if (*srcSizePtr < LZ4_64Klimit)
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16);
+        else
+            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ4_64bits() ? byU32 : byPtr);
+    }
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize, void* wrkmem)
+{
+    return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr, targetDstSize);
+}
 
-	*dst_len = out_len;
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+{
+    if ((LZ4_dict->currentOffset > 0x80000000) ||
+        ((size_t)LZ4_dict->currentOffset > (size_t)src)) {   /* address space overflow */
+        /* rescale hash table */
+        U32 const delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
 
-	return 0;
-exit:
-	return ret;
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = (const BYTE*) source;
+    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
+    LZ4_renormDictT(streamPtr, smallest);
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source) {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
+        streamPtr->dictSize += (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+
+    /* external dictionary mode */
+    {   int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+}
+
+/*~
+ * for backwards compatibility
+*/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem) {
+    *dst_len = LZ4_compress_default(src, dst, (int)src_len, (int)((size_t)dst_len), wrkmem);
+
+    return (int)((size_t)dst_len);
+}
+
+/* Hidden debug function, to force external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
+    int result;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = dictEnd;
+    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
+    LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
+
+    result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)inputSize;
+    streamPtr->currentOffset += (U32)inputSize;
+
+    return result;
+}
+
+
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+
+    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
 }
-EXPORT_SYMBOL(lz4_compress);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
+
+/* Kernel exports */
+EXPORT_SYMBOL(LZ4_compress_default);
+EXPORT_SYMBOL(LZ4_compress_fast);
+EXPORT_SYMBOL(LZ4_compress_destSize);
+EXPORT_SYMBOL(lz4_compress);
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..e0fdb9cb 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,343 +1,330 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
- */
-
-#ifndef STATIC
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2016, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+
+    Changed for kernel use by:
+    Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+*/
+
+/*-************************************
+*  Includes
+**************************************/
+#include "lz4defs.h"
+#include <linux/lz4.h>
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+*  Decompression functions
+*******************************/
+/*! LZ4_decompress_generic() :
+ *  This generic decompression function cover all use cases.
+ *  It shall be instantiated several times, using different sets of directives
+ *  Note that it is important this generic function is really inlined,
+ *  in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest when no prefix */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
-	BYTE *cpy;
-	unsigned token;
-	size_t length;
-
-	while (1) {
-
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			size_t len;
-
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
-				goto _output_error;
-			length += len;
-		}
-
-		/* copy literals */
-		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
-			memcpy(op, ip, length);
-			ip += length;
-			break; /* EOF */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
-
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
-			goto _output_error;
-
-		/* get matchlength */
-		length = token & ML_MASK;
-		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
-				goto _output_error;
-			length += *ip++;
-		}
-
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
-
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
-	}
-	/* end of decoding */
-	return (int) (((char *)ip) - source);
-
-	/* write overflow error detected */
+    /* Local Variables */
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
+
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+    /* Special cases */
+    if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                        /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+    /* Main Loop : decode sequences */
+    while (1) {
+        unsigned token;
+        size_t length;
+        const BYTE* match;
+        size_t offset;
+
+        /* get literal length */
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK) {
+            unsigned s;
+            do {
+                s = *ip++;
+                length += s;
+            } while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) & (s==255) );
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
+        }
+
+        /* copy literals */
+        cpy = op+length;
+        if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+        {
+            if (partialDecoding) {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            } else {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+            }
+            memcpy(op, ip, length);
+            ip += length;
+            op += length;
+            break;     /* Necessarily EOF, due to parsing restrictions */
+        }
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK) {
+            unsigned s;
+            do {
+                s = *ip++;
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+                length += s;
+            } while (s==255);
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
+        }
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix)) {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match)) {
+                /* match can be copied as a single segment from external dictionary */
+                memmove(op, dictEnd - (lowPrefix-match), length);
+                op += length;
+            } else {
+                /* match encompass external dictionary and current block */
+                size_t const copySize = (size_t)(lowPrefix-match);
+                size_t const restSize = length - copySize;
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                if (restSize > (size_t)(op-lowPrefix)) {  /* overlap copy */
+                    BYTE* const endOfMatch = op + restSize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                } else {
+                    memcpy(op, lowPrefix, restSize);
+                    op += restSize;
+            }   }
+            continue;
+        }
+
+        /* copy match within block */
+        cpy = op + length;
+        if (unlikely(offset<8)) {
+            const int dec64 = dec64table[offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;
+
+        if (unlikely(cpy>oend-12)) {
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit) {
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
+            }
+            while (op<cpy) *op++ = *match++;
+        } else {
+            LZ4_copy8(op, match);
+            if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
+        }
+        op=cpy;   /* correction */
+    }
+
+    /* end of decoding */
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+    else
+       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
+
+    /* Overflow error detected */
 _output_error:
-	return -1;
+    return (int) (-(((const char*)ip)-source))-1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
-
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
-
-	/* Main Loop */
-	while (ip < iend) {
-
-		unsigned token;
-		size_t length;
-
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
-
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
-
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
-
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
-	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
-
-	/* write overflow error detected */
-_output_error:
-	return -1;
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
 }
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
 {
-	int ret = -1;
-	int input_len = 0;
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+}
 
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+}
 
-	return 0;
-exit_0:
-	return ret;
+/*!
+ * LZ4_setStreamDecode() :
+ * Use this function to instruct where to find the dictionary.
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest) {
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += result;
+        lz4sd->prefixEnd  += result;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest) {
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+/*
+ * for backwards compatibility
+ */
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, unsigned char *dest, size_t *dest_len) {
+   *dest_len = LZ4_decompress_safe(src, dest, (int)src_len, (int)((size_t)dest_len));
+
+   return (int)((size_t)(dest_len));
+}
+
+/*
+ * for backwards compatibility
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len, unsigned char *dest, size_t actual_dest_len) {
+    *src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
+
+    return (int)((size_t)(src_len));
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
+
+/* Kernel exports */
+EXPORT_SYMBOL(LZ4_decompress_fast);
+EXPORT_SYMBOL(LZ4_decompress_safe);
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
+EXPORT_SYMBOL(lz4_decompress);
+EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..567998d 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,277 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+   lz4defs.h -- common and architecture specific defines for the kernel usage
+
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2016, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+
+    Created for kernel usage by:
+    Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+*/
+
+#include <asm/unaligned.h>
 
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
+static inline unsigned LZ4_64bits(void) { return LZ4_ARCH64; }
+
 /*
- * Architecture-specific macros
+ * Little/big endian
  */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
+#ifdef __LITTLE_ENDIAN
+#define LZ4_isLittleEndian(void) (true)
+#else
+#define LZ4_isLittleEndian(void) (false)
 #endif
 
-#define COPYLENGTH 8
+/*-************************************
+*  Tuning parameter
+**************************************/
+/*! * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 10
+
+/*-************************************
+*  Memory routines
+**************************************/
+#include <linux/slab.h>
+#include <linux/string.h>   /* memset, memcpy */
+#define MEM_INIT       memset
+
+/*-************************************
+*  Basic Types
+**************************************/
+#include <linux/types.h>
+
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+
+/*-************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
 #define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
 
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
-#else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static inline allocation */
+
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+/*-************************************
+*  Reading and writing into memory
+**************************************/
+
+static inline U16 LZ4_read16(const void* memPtr)
+{
+    U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline U32 LZ4_read32(const void* memPtr)
+{
+    U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void* memPtr)
+{
+    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+static inline void LZ4_write16(void* memPtr, U16 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_write32(void* memPtr, U32 value)
+{
+    memcpy(memPtr, &value, sizeof(value));
+}
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+static inline U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian()) {
+        return LZ4_read16(memPtr);
+    } else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+static inline void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian()) {
+        LZ4_write16(memPtr, value);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
 
+static inline void LZ4_copy8(void* dst, const void* src)
+{
+    memcpy(dst,src,8);
+}
+
+/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
+static inline void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+#if 0
+    const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
+    LZ4_copy8(d,s); if (d>e-9) return;
+    d+=l2; s+=l2;
+#endif /* join to align */
+
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#endif
+#else
 #ifdef __BIG_ENDIAN
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
-
 #endif
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+static inline unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while (likely(pIn<pInLimit-(STEPSIZE-1))) {
+        size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NBCOMMONBYTES(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+typedef struct {
+    uint32_t hashTable[LZ4_HASH_SIZE_U32];
+    uint32_t currentOffset;
+    uint32_t initCheck;
+    const uint8_t* dictionary;
+    uint8_t* bufferStart;   /* obsolete, used for slideInputBuffer */
+    uint32_t dictSize;
+} LZ4_stream_t_internal;
+
+typedef struct {
+    const uint8_t* externalDict;
+    size_t extDictSize;
+    const uint8_t* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+/*-**********************************************
+*  Streaming Decompression
+************************************************/
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
+
+/*-*********************************************
+*  Streaming Compression
+***********************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
+
+/*!
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ * important : init this structure content before first use !
+ * note : only allocated directly the structure if you are static inlineally linking LZ4
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
+ */
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+
+/*-******************************
+*  Streaming functions
+********************************/
+
+static inline void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..e630100 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,539 +1,576 @@
 /*
- * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
- *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
- */
-
+    LZ4 HC - High Compression Mode of LZ4
+    Copyright (C) 2011-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/lz4/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+
+    Changed for kernel use by:
+    Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
+*/
+
+/*-************************************
+*  Includes
+**************************************/
+#include "lz4defs.h"
+#include <linux/lz4.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
-#include <asm/unaligned.h>
-#include "lz4defs.h"
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+*  Local Constants and types
+***************************************/
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+typedef struct
+{
+    unsigned int   hashTable[LZ4HC_HASHTABLESIZE];
+    unsigned short   chainTable[LZ4HC_MAXD];
+    const unsigned char* end;        /* next block here to continue on current prefix */
+    const unsigned char* base;       /* All index relative to this position */
+    const unsigned char* dictBase;   /* alternate base for extDict */
+    unsigned char* inputBuffer;      /* deprecated */
+    unsigned int   dictLimit;        /* below that point, need extDict */
+    unsigned int   lowLimit;         /* below that point, no more dict */
+    unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
+    unsigned int   compressionLevel;
+} LZ4HC_CCtx_internal;
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+
+typedef union LZ4_streamHC_u LZ4_streamHC_t;
+
+union LZ4_streamHC_u {
+    size_t table[LZ4_STREAMHCSIZE_SIZET];
+    LZ4HC_CCtx_internal internal_donotuse;
+};   /* previously typedef'd to LZ4_streamHC_t */
+
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+
+/**************************************
+*  Local Macros
+**************************************/
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)        chainTable[(U16)(p)]   /* faster */
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+/**************************************
+*  HC Compression
+**************************************/
+static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+    hc4->nextToUpdate = 64 KB;
+    hc4->base = start - 64 KB;
+    hc4->end = start;
+    hc4->dictBase = start - 64 KB;
+    hc4->dictLimit = 64 KB;
+    hc4->lowLimit = 64 KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
-	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
-		if (delta > MAX_DISTANCE)
-			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
+    U16* const chainTable = hc4->chainTable;
+    U32* const hashTable  = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = hc4->nextToUpdate;
+
+    while (idx < target) {
+        U32 const h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - hashTable[h];
+        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+        DELTANEXTU16(idx) = (U16)delta;
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    hc4->nextToUpdate = target;
 }
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
+static inline int LZ4HC_InsertAndFindBestMatch (LZ4HC_CCtx_internal* hc4,   /* Index table will be updated */
+                                               const BYTE* ip, const BYTE* const iLimit,
+                                               const BYTE** matchpos,
+                                               const int maxNbAttempts)
 {
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
-
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
-	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const BYTE* const dictBase = hc4->dictBase;
+    const U32 dictLimit = hc4->dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    U32 matchIndex;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=0;
+
+    /* HC4 match finder */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
+        nbAttempts--;
+        if (matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            if (*(match+ml) == *(ip+ml)
+                && (LZ4_read32(match) == LZ4_read32(ip)))
+            {
+                size_t const mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                if (mlt > ml) { ml = mlt; *matchpos = match; }
+            }
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip)) {
+                size_t mlt;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iLimit) vLimit = iLimit;
+                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
+                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   /* virtual matchpos */
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return (int)ml;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+
+static inline int LZ4HC_InsertAndGetWiderMatch (
+    LZ4HC_CCtx_internal* hc4,
+    const BYTE* const ip,
+    const BYTE* const iLowLimit,
+    const BYTE* const iHighLimit,
+    int longest,
+    const BYTE** matchpos,
+    const BYTE** startpos,
+    const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
-	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
-
-	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-				if (mlt > ml) {
-					ml = mlt;
-					*matchpos = ref;
-				}
-			}
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
-	}
-
-	return (int)ml;
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 dictLimit = hc4->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    const BYTE* const dictBase = hc4->dictBase;
+    U32   matchIndex;
+    int nbAttempts = maxNbAttempts;
+    int delta = (int)(ip-iLowLimit);
+
+
+    /* First Match */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts)) {
+        nbAttempts--;
+        if (matchIndex >= dictLimit) {
+            const BYTE* matchPtr = base + matchIndex;
+            if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
+                if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    int back = 0;
+
+                    while ((ip+back > iLowLimit)
+                           && (matchPtr+back > lowPrefixPtr)
+                           && (ip[back-1] == matchPtr[back-1]))
+                            back--;
+
+                    mlt -= back;
+
+                    if (mlt > longest) {
+                        longest = (int)mlt;
+                        *matchpos = matchPtr+back;
+                        *startpos = ip+back;
+                    }
+                }
+            }
+        } else {
+            const BYTE* const matchPtr = dictBase + matchIndex;
+            if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+                size_t mlt;
+                int back=0;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
+                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
+                mlt -= back;
+                if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return longest;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_encodeSequence (
+    const BYTE** ip,
+    BYTE** op,
+    const BYTE** anchor,
+    int matchLength,
+    const BYTE* const match,
+    limitedOutput_directive limitedOutputBuffer,
+    BYTE* oend)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
-	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
-
-	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
-					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
-				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
-				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
-				}
-			}
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-	return longest;
+    int length;
+    BYTE* token;
+
+    /* Encode Literal length */
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
+    else *token = (BYTE)(length<<ML_BITS);
+
+    /* Copy Literals */
+    LZ4_wildCopy(*op, *anchor, (*op) + length);
+    *op += length;
+
+    /* Encode Offset */
+    LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+
+    /* Encode MatchLength */
+    length = (int)(matchLength-MINMATCH);
+    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)ML_MASK) {
+        *token += ML_MASK;
+        length -= ML_MASK;
+        for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; }
+        if (length > 254) { length-=255; *(*op)++ = 255; }
+        *(*op)++ = (BYTE)length;
+    } else {
+        *token += (BYTE)(length);
+    }
+
+    /* Prepare next loop */
+    *ip += matchLength;
+    *anchor = *ip;
+
+    return 0;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static int LZ4HC_compress_generic (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const source,
+    char* const dest,
+    int const inputSize,
+    int const maxOutputSize,
+    int compressionLevel,
+    limitedOutput_directive limit
+    )
 {
-	int length, len;
-	u8 *token;
-
-	/* Encode Literal length */
-	length = (int)(*ip - *anchor);
-	token = (*op)++;
-	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
-		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
-			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
-	} else
-		*token = (length << ML_BITS);
-
-	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
-
-	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
-
-	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
-		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
-			*(*op)++ = 255;
-			*(*op)++ = 255;
-		}
-		if (len > 254) {
-			len -= 255;
-			*(*op)++ = 255;
-		}
-		*(*op)++ = (u8)len;
-	} else
-		*token += len;
-
-	/* Prepare next loop */
-	*ip += ml;
-	*anchor = *ip;
-
-	return 0;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    unsigned maxNbAttempts;
+    int   ml, ml2, ml3, ml0;
+    const BYTE* ref = NULL;
+    const BYTE* start2 = NULL;
+    const BYTE* ref2 = NULL;
+    const BYTE* start3 = NULL;
+    const BYTE* ref3 = NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+    /* init */
+    if (compressionLevel > LZ4HC_MAX_CLEVEL) compressionLevel = LZ4HC_MAX_CLEVEL;
+    if (compressionLevel < 1) compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+    maxNbAttempts = 1 << (compressionLevel-1);
+    ctx->end += inputSize;
+
+    ip++;
+
+    /* Main Loop */
+    while (ip < mflimit) {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
+        if (!ml) { ip++; continue; }
+
+        /* saved, in case we would skip too much */
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+        else ml2 = ml;
+
+        if (ml2 == ml) { /* No better match */
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start0 < ip) {
+            if (start2 < ip + ml0) {  /* empirical */
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        /* Here, start0==ip */
+        if ((start2 - ip) < 3) {  /* First Match too small : removed */
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        /*
+        * Currently we have :
+        * ml2 > ml1, and
+        * ip1+3 <= ip2 (usually < ip1+ml1)
+        */
+        if ((start2 - ip) < OPTIMAL_ML) {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0) {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) {  /* No better match : 2 sequences to encode */
+            /* ip & ref are known; Now for ml */
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            /* Now, encode 2 sequences */
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            ip = start2;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start3 < ip+ml+3) {  /* Not enough space for match 2 : remove it */
+            if (start3 >= (ip+ml)) {  /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+                if (start2 < ip+ml) {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH) {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        /*
+        * OK, now we have 3 ascending matches; let's write at least the first one
+        * ip & ref are known; Now for ml
+        */
+        if (start2 < ip+ml) {
+            if ((start2 - ip) < (int)ML_MASK) {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0) {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            } else {
+                ml = (int)(start2 - ip);
+            }
+        }
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+    }
+
+    /* Encode Last Literals */
+    {   int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
-
-	u8 *op = (u8 *)dest;
-
-	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
-
-	ip++;
-
-	/* Main Loop */
-	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
-		if (!ml) {
-			ip++;
-			continue;
-		}
-
-		/* saved, in case we would skip too much */
-		start0 = ip;
-		ref0 = ref;
-		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
-		else
-			ml2 = ml;
-		/* No better match */
-		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-			continue;
-		}
-
-		if (start0 < ip) {
-			/* empirical */
-			if (start2 < ip + ml0) {
-				ip = start0;
-				ref = ref0;
-				ml = ml0;
-			}
-		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
-		if ((start2 - ip) < 3) {
-			ml = ml2;
-			ip = start2;
-			ref = ref2;
-			goto _search2;
-		}
-
-_search3:
-		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
-		if ((start2 - ip) < OPTIMAL_ML) {
-			int correction;
-			int new_ml = ml;
-			if (new_ml > OPTIMAL_ML)
-				new_ml = OPTIMAL_ML;
-			if (ip + new_ml > start2 + ml2 - MINMATCH)
-				new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
-			correction = new_ml - (int)(start2 - ip);
-			if (correction > 0) {
-				start2 += correction;
-				ref2 += correction;
-				ml2 -= correction;
-			}
-		}
-		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
-		 */
-		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
-		else
-			ml3 = ml2;
-
-		/* No better match : 2 sequences to encode */
-		if (ml3 == ml2) {
-			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
-				ml = (int)(start2 - ip);
-
-			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
-			continue;
-		}
-
-		/* Not enough space for match 2 : remove it */
-		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
-			if (start3 >= (ip + ml)) {
-				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-					if (ml2 < MINMATCH) {
-						start2 = start3;
-						ref2 = ref3;
-						ml2 = ml3;
-					}
-				}
-
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
-				ref = ref3;
-				ml  = ml3;
-
-				start0 = start2;
-				ref0 = ref2;
-				ml0 = ml2;
-				goto _search2;
-			}
-
-			start2 = start3;
-			ref2 = ref3;
-			ml2 = ml3;
-			goto _search3;
-		}
-
-		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
-		if (start2 < ip + ml) {
-			if ((start2 - ip) < (int)ML_MASK) {
-				int correction;
-				if (ml > OPTIMAL_ML)
-					ml = OPTIMAL_ML;
-				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
-				correction = ml - (int)(start2 - ip);
-				if (correction > 0) {
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-				}
-			} else
-				ml = (int)(start2 - ip);
-		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-
-		ip = start2;
-		ref = ref2;
-		ml = ml2;
-
-		start2 = start3;
-		ref2 = ref3;
-		ml2 = ml3;
-
-		goto _search3;
-	}
-
-	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
-	/* End */
-	return (int) (((char *)op) - dest);
+    LZ4HC_CCtx_internal* ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4HC_init (ctx, (const BYTE*)src);
+    if (maxDstSize < LZ4_compressBound(srcSize))
+        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
+    else
+        return LZ4HC_compress_generic (ctx, src, dst, srcSize, maxDstSize, compressionLevel, notLimited);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel, void* wrkmem)
 {
-	int ret = -1;
-	int out_len = 0;
+    return LZ4_compress_HC_extStateHC(wrkmem, src, dst, srcSize, maxDstSize, compressionLevel);
+}
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+  *dst_len = LZ4_compress_HC(src, dst, (int)src_len, (int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
 
-	if (out_len < 0)
-		goto exit;
+  return (int)((size_t)dst_len);
+}
 
-	*dst_len = out_len;
-	return 0;
+/**************************************
+*  Streaming Functions
+**************************************/
 
-exit:
-	return ret;
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
+{
+    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    if (dictSize > 64 KB) {
+        dictionary += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
+    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
+    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    return dictSize;
+}
+
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
+{
+    if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+    ctxPtr->lowLimit  = ctxPtr->dictLimit;
+    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+    ctxPtr->dictBase  = ctxPtr->base;
+    ctxPtr->base = newBlock - ctxPtr->dictLimit;
+    ctxPtr->end  = newBlock;
+    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
+}
+
+static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+                                            const char* source, char* dest,
+                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
+{
+    LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    /* auto-init if forgotten */
+    if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) source);
+
+    /* Check overflow */
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
+        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+        if (dictSize > 64 KB) dictSize = 64 KB;
+        LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+    }
+
+    /* Check if blocks follow each other */
+    if ((const BYTE*)source != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* const dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd)) {
+            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+        }
+    }
+
+    return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    if (maxOutputSize < LZ4_compressBound(inputSize))
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
+    else
+        return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, notLimited);
 }
-EXPORT_SYMBOL(lz4hc_compress);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
+
+/* Kernel exports */
+EXPORT_SYMBOL(LZ4_compress_HC);
+EXPORT_SYMBOL(lz4hc_compress);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v2 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
  2017-01-07 16:55   ` [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
@ 2017-01-07 16:55   ` Sven Schmidt
  2017-01-08 11:23     ` Greg KH
  2017-01-07 16:55   ` [PATCH v2 3/4] crypto: Change LZ4 modules " Sven Schmidt
  2017-01-07 16:55   ` [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply " Sven Schmidt
  3 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-01-07 16:55 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates the unlz4 wrapper to work with the new LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v2 3/4] crypto: Change LZ4 modules to work with new LZ4 module version
  2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
  2017-01-07 16:55   ` [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
  2017-01-07 16:55   ` [PATCH v2 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-01-07 16:55   ` Sven Schmidt
  2017-01-07 16:55   ` [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply " Sven Schmidt
  3 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-07 16:55 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates the crypto modules using LZ4 compression to work with the
new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c   | 27 ++++++++++++++-------------
 crypto/lz4hc.c | 25 +++++++++++++------------
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..b969e5f 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,16 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len;
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
+	out_len = LZ4_compress_default(src, dst, slen, (int)((size_t)dlen), ctx);
 
-	if (err < 0)
-		return -EINVAL;
+	if (out_len == 0) {
+		// out_len is 0 means an error occured
+			return -EINVAL;
+	}
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +97,16 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len;
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
+	if (out_len < 0) {
+		// out_len of less than 0 means an error occured
 		return -EINVAL;
+	}
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..bf2ceb7 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,16 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len;
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
+	out_len = LZ4_compress_HC(src, dst, slen, (int)((size_t)dlen), LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	if (err < 0)
+	if (out_len == 0) {
+		// out_len of 0 -> error
 		return -EINVAL;
+	}
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +98,16 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len;
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
+  if (out_len < 0) {
+		// out_len of less than 0 means an error occured
 		return -EINVAL;
+	}
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply with new LZ4 module version
  2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
                     ` (2 preceding siblings ...)
  2017-01-07 16:55   ` [PATCH v2 3/4] crypto: Change LZ4 modules " Sven Schmidt
@ 2017-01-07 16:55   ` Sven Schmidt
  2017-01-07 21:33     ` Kees Cook
  3 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-01-07 16:55 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates fs/pstore and fs/squashfs to use the updated functions from
the new LZ4 module.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 14 ++++++++------
 fs/squashfs/lz4_wrapper.c | 12 ++++++------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..a0d8ca8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,33 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
 	if (ret) {
+		// ret is 0 means an error occured
 		pr_err("lz4_compress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-	if (ret) {
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
+	if (ret < 0) {
+		// return value is < 0 in case of error
 		pr_err("lz4_decompress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..a512399 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t dest_len = output->length;
 
 	for (i = 0; i < b; i++) {
 		avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
-	if (res)
+	res = LZ4_decompress_safe(stream->input, stream->output, length, output->length);
+	if (res < 0) {
+		// res of less than 0 means an error occured
 		return -EIO;
+	}
 
-	bytes = dest_len;
+	bytes = res;
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	}
 	squashfs_finish_page(output);
 
-	return dest_len;
+	return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply with new LZ4 module version
  2017-01-07 16:55   ` [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply " Sven Schmidt
@ 2017-01-07 21:33     ` Kees Cook
  2017-01-10  9:45       ` Sven Schmidt
  0 siblings, 1 reply; 104+ messages in thread
From: Kees Cook @ 2017-01-07 21:33 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: Andrew Morton, bongkyu.kim, rsalvaterra, Sergey Senozhatsky,
	Greg KH, LKML, Herbert Xu, David S. Miller, linux-crypto,
	Anton Vorontsov, Colin Cross, Tony Luck, phillip

On Sat, Jan 7, 2017 at 8:55 AM, Sven Schmidt
<4sschmid@informatik.uni-hamburg.de> wrote:
> This patch updates fs/pstore and fs/squashfs to use the updated functions from
> the new LZ4 module.
>
> Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
> ---
>  fs/pstore/platform.c      | 14 ++++++++------
>  fs/squashfs/lz4_wrapper.c | 12 ++++++------
>  2 files changed, 14 insertions(+), 12 deletions(-)
>
> diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
> index 729677e..a0d8ca8 100644
> --- a/fs/pstore/platform.c
> +++ b/fs/pstore/platform.c
> @@ -342,31 +342,33 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
>  {
>         int ret;
>
> -       ret = lz4_compress(in, inlen, out, &outlen, workspace);
> +       ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
>         if (ret) {
> +               // ret is 0 means an error occured

If that's true, then shouldn't the "if" logic be changed? Also, here
and in all following comments are C++ style instead of kernel C-style.
This should be "/* ret == 0 means an error occured */", though really,
that should be obvious from the code and the comment isn't really
needed.

>                 pr_err("lz4_compress error, ret = %d!\n", ret);

If it's always going to be zero here, is there a better place to get
details on why it failed?

>                 return -EIO;
>         }
>
> -       return outlen;
> +       return ret;
>  }
>
>  static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
>  {
>         int ret;
>
> -       ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
> -       if (ret) {
> +       ret = LZ4_decompress_safe(in, out, inlen, outlen);
> +       if (ret < 0) {
> +               // return value is < 0 in case of error
>                 pr_err("lz4_decompress error, ret = %d!\n", ret);
>                 return -EIO;
>         }
>
> -       return outlen;
> +       return ret;
>  }
>
>  static void allocate_lz4(void)
>  {
> -       big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
> +       big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
>         big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
>         if (big_oops_buf) {
>                 workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
> diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
> index ff4468b..a512399 100644
> --- a/fs/squashfs/lz4_wrapper.c
> +++ b/fs/squashfs/lz4_wrapper.c
> @@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
>         struct squashfs_lz4 *stream = strm;
>         void *buff = stream->input, *data;
>         int avail, i, bytes = length, res;
> -       size_t dest_len = output->length;
>
>         for (i = 0; i < b; i++) {
>                 avail = min(bytes, msblk->devblksize - offset);
> @@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
>                 put_bh(bh[i]);
>         }
>
> -       res = lz4_decompress_unknownoutputsize(stream->input, length,
> -                                       stream->output, &dest_len);
> -       if (res)
> +       res = LZ4_decompress_safe(stream->input, stream->output, length, output->length);
> +       if (res < 0) {
> +               // res of less than 0 means an error occured
>                 return -EIO;
> +       }
>
> -       bytes = dest_len;
> +       bytes = res;
>         data = squashfs_first_page(output);
>         buff = stream->output;
>         while (data) {
> @@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
>         }
>         squashfs_finish_page(output);
>
> -       return dest_len;
> +       return res;
>  }
>
>  const struct squashfs_decompressor squashfs_lz4_comp_ops = {
> --
> 2.1.4
>

-Kees

-- 
Kees Cook
Nexus Security

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-07 16:55   ` [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
@ 2017-01-08 11:22     ` Greg KH
  2017-01-10  9:32       ` Sven Schmidt
  2017-01-08 11:25     ` Greg KH
  1 sibling, 1 reply; 104+ messages in thread
From: Greg KH @ 2017-01-08 11:22 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip

On Sat, Jan 07, 2017 at 05:55:42PM +0100, Sven Schmidt wrote:
> +/*!LZ4_compressbound() :
> +    Provides the maximum size that LZ4 may output in a "worst case" scenario
> +    (input data not compressible)
> +*/

Odd coding style, please use kerneldoc format if you are going to have
comments like this.

> +static inline int LZ4_compressBound(int isize) {
> +  return LZ4_COMPRESSBOUND(isize);

And follow the proper kernel coding style rules, putting your patches
through scripts/checkpatch.pl should help you out here.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-01-07 16:55   ` [PATCH v2 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-01-08 11:23     ` Greg KH
  0 siblings, 0 replies; 104+ messages in thread
From: Greg KH @ 2017-01-08 11:23 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip

On Sat, Jan 07, 2017 at 05:55:43PM +0100, Sven Schmidt wrote:
> This patch updates the unlz4 wrapper to work with the new LZ4 kernel module version.
> 
> Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
> ---
>  lib/decompress_unlz4.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
> index 036fc88..1b0baf3 100644
> --- a/lib/decompress_unlz4.c
> +++ b/lib/decompress_unlz4.c
> @@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
>  		error("NULL input pointer and missing fill function");
>  		goto exit_1;
>  	} else {
> -		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
> +		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));

Having functions differ by different cases of the characters is ripe for
abuse and confusion.  Please never do that, especially as these "new"
functions you created don't follow the correct kernel coding style rules
:(

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-07 16:55   ` [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
  2017-01-08 11:22     ` Greg KH
@ 2017-01-08 11:25     ` Greg KH
  2017-01-08 11:33       ` Rui Salvaterra
  2017-01-10  9:21       ` Sven Schmidt
  1 sibling, 2 replies; 104+ messages in thread
From: Greg KH @ 2017-01-08 11:25 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip

On Sat, Jan 07, 2017 at 05:55:42PM +0100, Sven Schmidt wrote:
> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
> The kernel module is inspired by the previous work by Chanho Min.
> The updated LZ4 module will not break existing code since there were alias
> methods added to ensure backwards compatibility.

Meta-comment.  Does this update include all of the security fixes that
we have made over the past few years to the lz4 code?  I don't want to
be adding back insecure functions that will cause us problems.

Specifically look at the changes I made in 2014 in this directory for an
example of what I am talking about here.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-08 11:25     ` Greg KH
@ 2017-01-08 11:33       ` Rui Salvaterra
  2017-01-10  9:21       ` Sven Schmidt
  1 sibling, 0 replies; 104+ messages in thread
From: Rui Salvaterra @ 2017-01-08 11:33 UTC (permalink / raw)
  To: Greg KH
  Cc: Sven Schmidt, akpm, bongkyu.kim, sergey.senozhatsky,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip

On 8 January 2017 at 11:25, Greg KH <gregkh@linuxfoundation.org> wrote:
> On Sat, Jan 07, 2017 at 05:55:42PM +0100, Sven Schmidt wrote:
>> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
>> The kernel module is inspired by the previous work by Chanho Min.
>> The updated LZ4 module will not break existing code since there were alias
>> methods added to ensure backwards compatibility.
>
> Meta-comment.  Does this update include all of the security fixes that
> we have made over the past few years to the lz4 code?  I don't want to
> be adding back insecure functions that will cause us problems.
>
> Specifically look at the changes I made in 2014 in this directory for an
> example of what I am talking about here.
>
> thanks,
>
> greg k-h

Also, this series must be tested on big endian, to make sure the last
fixes we made don't regress.


Thanks,

Rui

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-08 11:25     ` Greg KH
  2017-01-08 11:33       ` Rui Salvaterra
@ 2017-01-10  9:21       ` Sven Schmidt
  2017-01-10 10:00         ` Greg KH
  1 sibling, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-01-10  9:21 UTC (permalink / raw)
  To: gregkh
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip, Sven Schmidt

On 01/08/2017 12:25 PM, Greg KH wrote:
>On Sat, Jan 07, 2017 at 05:55:42PM +0100, Sven Schmidt wrote:
>> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
>> The kernel module is inspired by the previous work by Chanho Min.
>> The updated LZ4 module will not break existing code since there were alias
>> methods added to ensure backwards compatibility.
>
> Meta-comment.  Does this update include all of the security fixes that
> we have made over the past few years to the lz4 code?  I don't want to
> be adding back insecure functions that will cause us problems.
>
> Specifically look at the changes I made in 2014 in this directory for an
> example of what I am talking about here.
>

Hi Greg,

it doesn't. I didn't have that in mind until now.
But I had a look at the related commits after I received your E-Mail and will review what I have to change
(and, obviously, do the changes :)).

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-08 11:22     ` Greg KH
@ 2017-01-10  9:32       ` Sven Schmidt
  2017-01-10  9:59         ` Greg KH
  0 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-01-10  9:32 UTC (permalink / raw)
  To: gregkh
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip, Sven Schmidt

On 01/08/2017 12:23 PM, Greg KH wrote:
> On Sat, Jan 07, 2017 at 05:55:43PM +0100, Sven Schmidt wrote:
>> This patch updates the unlz4 wrapper to work with the new LZ4 kernel module version.
>>
>> Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
>> ---
>>  lib/decompress_unlz4.c | 13 ++++++++-----
>>  1 file changed, 8 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
>> index 036fc88..1b0baf3 100644
>> --- a/lib/decompress_unlz4.c
>> +++ b/lib/decompress_unlz4.c
>> @@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
>>  		error("NULL input pointer and missing fill function");
>>  		goto exit_1;
>>  	} else {
>> -		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
>> +		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
>
> Having functions differ by different cases of the characters is ripe for
> abuse and confusion.  Please never do that, especially as these "new"
> functions you created don't follow the correct kernel coding style rules
> :(
>
> thanks,
>
> greg k-h

Hi Greg,

you're right about that. I think I put a little too much effort in keeping old function names here.
I will get rid of that.

I also want to quote your previous E-Mail here:

> And follow the proper kernel coding style rules, putting your patches
> through scripts/checkpatch.pl should help you out here.

Sorry, I didn't know about that particular script. I already put Patches 2, 3 and 4 through checkpatch.pl;
patch 1 is a little more to rework but I will, of course, do that.

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply with new LZ4 module version
  2017-01-07 21:33     ` Kees Cook
@ 2017-01-10  9:45       ` Sven Schmidt
  0 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-10  9:45 UTC (permalink / raw)
  To: keescook
  Cc: gregkh, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	tony.luck, phillip, Sven Schmidt

Hi Kees,

On 01/07/2017 10:33 PM, Kees Cook wrote:
 >On Sat, Jan 7, 2017 at 8:55 AM, Sven Schmidt
 ><4sschmid@informatik.uni-hamburg.de> wrote:
 >> This patch updates fs/pstore and fs/squashfs to use the updated functions from
 >> the new LZ4 module.
 >>
 >> Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
 >> ---
 >>  fs/pstore/platform.c      | 14 ++++++++------
 >>  fs/squashfs/lz4_wrapper.c | 12 ++++++------
 >>  2 files changed, 14 insertions(+), 12 deletions(-)
 >>
 >> diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
 >> index 729677e..a0d8ca8 100644
 >> --- a/fs/pstore/platform.c
 >> +++ b/fs/pstore/platform.c
 >> @@ -342,31 +342,33 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 >>  {
 >>         int ret;
 >>
 >> -       ret = lz4_compress(in, inlen, out, &outlen, workspace);
 >> +       ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
 >>         if (ret) {
 >> +               // ret is 0 means an error occured
 >
 >If that's true, then shouldn't the "if" logic be changed? Also, here
 >and in all following comments are C++ style instead of kernel C-style.
 >This should be "/* ret == 0 means an error occured */", though really,
 >that should be obvious from the code and the comment isn't really
 >needed.

indeed, it should. I fixed that one.

 >>                 pr_err("lz4_compress error, ret = %d!\n", ret);
 >If it's always going to be zero here, is there a better place to get
 >details on why it failed?

 It is always going to be zero. Honestly, after looking at the current LZ4 in kernel again
 I don't get why there actually was a need to print the return value since lz4_compress
 will (as far as I see) always return -1 in case of an error while the new lz4_compress_fast/default
 will return 0 in such case. Maybe I should just stick with the error?

 Thanks,

 Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-10  9:32       ` Sven Schmidt
@ 2017-01-10  9:59         ` Greg KH
  0 siblings, 0 replies; 104+ messages in thread
From: Greg KH @ 2017-01-10  9:59 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip

On Tue, Jan 10, 2017 at 10:32:17AM +0100, Sven Schmidt wrote:
> On 01/08/2017 12:23 PM, Greg KH wrote:
> > And follow the proper kernel coding style rules, putting your patches
> > through scripts/checkpatch.pl should help you out here.
> 
> Sorry, I didn't know about that particular script. I already put
> Patches 2, 3 and 4 through checkpatch.pl; patch 1 is a little more to
> rework but I will, of course, do that.

You do know about Documentation/SubmittingPatches, right?  If not, take
a look at that as well :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-10  9:21       ` Sven Schmidt
@ 2017-01-10 10:00         ` Greg KH
  2017-01-10 10:50           ` Willy Tarreau
  0 siblings, 1 reply; 104+ messages in thread
From: Greg KH @ 2017-01-10 10:00 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip

On Tue, Jan 10, 2017 at 10:21:16AM +0100, Sven Schmidt wrote:
> On 01/08/2017 12:25 PM, Greg KH wrote:
> >On Sat, Jan 07, 2017 at 05:55:42PM +0100, Sven Schmidt wrote:
> >> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
> >> The kernel module is inspired by the previous work by Chanho Min.
> >> The updated LZ4 module will not break existing code since there were alias
> >> methods added to ensure backwards compatibility.
> >
> > Meta-comment.  Does this update include all of the security fixes that
> > we have made over the past few years to the lz4 code?  I don't want to
> > be adding back insecure functions that will cause us problems.
> >
> > Specifically look at the changes I made in 2014 in this directory for an
> > example of what I am talking about here.
> >
> 
> Hi Greg,
> 
> it doesn't. I didn't have that in mind until now.

Ick, those changes never got made "upstream"?  Not good, but makes sense
as we couldn't really find an "upstream" when we made them :(

As you took this code from somewhere, you might want to also push your
changes for these issues there as well, so that others don't run into
them in the future.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2.
  2017-01-10 10:00         ` Greg KH
@ 2017-01-10 10:50           ` Willy Tarreau
  0 siblings, 0 replies; 104+ messages in thread
From: Willy Tarreau @ 2017-01-10 10:50 UTC (permalink / raw)
  To: Greg KH
  Cc: Sven Schmidt, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip

On Tue, Jan 10, 2017 at 11:00:32AM +0100, Greg KH wrote:
> On Tue, Jan 10, 2017 at 10:21:16AM +0100, Sven Schmidt wrote:
> > On 01/08/2017 12:25 PM, Greg KH wrote:
> > >On Sat, Jan 07, 2017 at 05:55:42PM +0100, Sven Schmidt wrote:
> > >> This patch updates LZ4 kernel module to LZ4 v1.7.2 by Yann Collet.
> > >> The kernel module is inspired by the previous work by Chanho Min.
> > >> The updated LZ4 module will not break existing code since there were alias
> > >> methods added to ensure backwards compatibility.
> > >
> > > Meta-comment.  Does this update include all of the security fixes that
> > > we have made over the past few years to the lz4 code?  I don't want to
> > > be adding back insecure functions that will cause us problems.
> > >
> > > Specifically look at the changes I made in 2014 in this directory for an
> > > example of what I am talking about here.
> > >
> > 
> > Hi Greg,
> > 
> > it doesn't. I didn't have that in mind until now.
> 
> Ick, those changes never got made "upstream"?  Not good, but makes sense
> as we couldn't really find an "upstream" when we made them :(

I *seem* to remember that some of these changes were specific to our
implementation, and were discovered during a review after we worked on
the the LZO implementation bugs, though I could be wrong.

If this is the case, it is one more reason for being extra careful.

> As you took this code from somewhere, you might want to also push your
> changes for these issues there as well, so that others don't run into
> them in the future.

Agreed!

Willy

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v3 0/4] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (4 preceding siblings ...)
  2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
@ 2017-01-21 15:09 ` Sven Schmidt
  2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
                     ` (3 more replies)
  2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
                   ` (4 subsequent siblings)
  10 siblings, 4 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html

[PATCHv2 1/4] lib: Update LZ4  compressor module based on LZ4 v1.7.2
[PATCHv2 2/4] lib: Update decompress_unlz4 wrapper to work with new LZ4 module
[PATCHv2 3/4] crypto: Change lz4 modules to work with new LZ4 module
[PATCHv2 4/4] fs/pstore: fs/squashfs: Change LZ4 compressor functions to work with new LZ4 module

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

v3:
- Adjusted the code to satisfy kernel coding style (checkpatch.pl)
- Made sure the changes to LZ4 in Kernel (overflow checks etc.)
are included in the new module (they are)
- Removed the second LZ4_compressBound function with related name but
different return type
- Corrected version number (was LZ4 1.7.3)

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH 1/4] lib: Update LZ4 compressor module
  2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
@ 2017-01-21 15:09   ` Sven Schmidt
  2017-01-21 15:56     ` kbuild test robot
                       ` (3 more replies)
  2017-01-21 15:09   ` [PATCH 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
                     ` (2 subsequent siblings)
  3 siblings, 4 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since there were alias
methods added to ensure backwards compatibility.

API changes:

New method LZ4_compress_fast which differs from the variant available
in kernel by the new acceleration parameter,
allowing to trade compression ratio for more compression speed
and vice versa.

LZ4_decompress_fast is the respective decompression method, featuring a very
fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core
systems. The decompressor allows to decompress data compressed with
LZ4 fast as well as the LZ4 HC (high compression) algorithm.

Also the useful functions LZ4_decompress_safe_partial
LZ4_compress_destsize were added. The latter reverses the logic by trying to
compress as much data as possible from source to dest while the former aims
to decompress partial blocks of data.

A bunch of streaming functions were also added
which allow compressig/decompressing data in multiple steps
(so called "streaming mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize
are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
The old methods are still available for providing backwards compatibility.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  581 +++++++++++++++++++++---
 lib/lz4/lz4_compress.c   | 1103 ++++++++++++++++++++++++++++++++--------------
 lib/lz4/lz4_decompress.c |  694 ++++++++++++++++++-----------
 lib/lz4/lz4defs.h        |  320 ++++++++------
 lib/lz4/lz4hc_compress.c |  855 ++++++++++++++++++++++-------------
 5 files changed, 2477 insertions(+), 1076 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..5bd21c1 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,546 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *		* Redistributions of source code must retain the above copyright
+ *		  notice, this list of conditions and the following disclaimer.
+ *		* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 10
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL				3
+#define LZ4HC_DEFAULT_CLEVEL		9
+#define LZ4HC_MAX_CLEVEL				16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ */
+typedef struct {
+		uint32_t hashTable[LZ4_HASH_SIZE_U32];
+		uint32_t currentOffset;
+		uint32_t initCheck;
+		const uint8_t *dictionary;
+		uint8_t *bufferStart;
+		uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMSIZE_U64];
+		LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t :
+ * information structure to track an LZ4HC stream.
+ */
+typedef struct {
+		unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+		unsigned short	 chainTable[LZ4HC_MAXD];
+		/* next block to continue on current prefix */
+		const unsigned char *end;
+		/* All index relative to this position */
+		const unsigned char *base;
+		/* alternate base for extDict */
+		const unsigned char *dictBase;
+		/* below that point, need extDict */
+		unsigned int	 dictLimit;
+		/* below that point, no more dict */
+		unsigned int	 lowLimit;
+		/* index from which to continue dict update */
+		unsigned int	 nextToUpdate;
+		unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+		size_t table[LZ4_STREAMHCSIZE_SIZET];
+		LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
+
+/*
+ * LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure using LZ4_setStreamDecode
+ * (or memset()) before first use
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
+typedef struct {
+		const uint8_t *externalDict;
+		size_t extDictSize;
+		const uint8_t *prefixEnd;
+		size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+		LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
 
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_compressBound() :
+ *	Provides the maximum size that LZ4 may output in a "worst case" scenario
+ *	(input data not compressible)
  */
-static inline size_t lz4_compressbound(size_t isize)
+static inline int LZ4_compressBound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return (int)LZ4_COMPRESSBOUND(isize);
 }
 
 /*
+ * LZ4_compress_default()
+ *	Compresses 'sourceSize' bytes from buffer 'source'
+ *	into already allocated 'dest' buffer of size 'maxOutputSize'.
+ *	Compression is guaranteed to succeed if
+ *	'maxOutputSize' >= LZ4_compressBound(inputSize).
+ *	It also runs faster, so it's a recommended setting.
+ *	If the function cannot compress 'source'
+ *	into a more limited 'dest' budget,
+ *	compression stops *immediately*,
+ *	and the function result is zero.
+ *	As a consequence, 'dest' content is not valid.
+ *
+ *	source       : source address of the original data
+ *	dest         : output buffer address
+ *                 of the compressed data
+ *	inputSize    : Max supported value is
+ *                 LZ4_MAX_INPUT_SIZE
+ *	maxOutputSize: full or partial size of buffer 'dest'
+ *                 (which must be already allocated)
+ *	workmem      : address of the working memory.
+ *                 This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return       : the number of bytes written into buffer 'dest'
+ *   (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/*
+ * LZ4_compress_fast() :
+ *	Same as LZ4_compress_default(),
+ *	but allows to select an "acceleration" factor.
+ *	The larger the acceleration value,
+ *	the faster the algorithm,
+ *	but also the lesser the compression.
+ *	It's a trade-off. It can be fine tuned,
+ *	with each successive value
+ *	providing roughly +~3% to speed.
+ *	An acceleration value of "1"
+ *	is the same as regular LZ4_compress_default()
+ *	Values <= 0 will be replaced by
+ *	LZ4_ACCELERATION_DEFAULT, which is 1.
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration);
+
+/*
+ * LZ4_compress_destSize()
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * ossible from 'source'.
+ *
+ *	source	      : source address of the original data
+ *	dest	        : output buffer address of the compressed data
+ *	inputSize	    : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	*sourceSizePtr: will be modified to indicate how many bytes where read
+ *                  from 'source' to fill 'dest'.
+ *                  New value is necessarily <= old value.
+ *	workmem       : address of the working memory.
+ *                  This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+			 or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
+
+/*
  * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return	: Success if return 0
+ *            Error if return (< 0)
+ *	note :	Destination buffer and workmem must be already allocated with
  *		the defined size.
  */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
-/*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_decompress_fast()
+ *	source      : source address of the compressed data
+ *	dst         : output buffer address of the decompressed data
+ *	originalSize: is the original and therefore uncompressed size
+ *	return : the number of bytes read from the source buffer
+ *           (in other words, the compressed size)
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           Destination buffer must be already allocated.
+ *           Its size must be a minimum of 'originalSize' bytes.
+ *	note   : This function fully respect memory boundaries
+ *           for properly formed compressed data.
+ *           It is a bit faster than LZ4_decompress_safe().
+ *           However, it does not provide any protection against intentionally
+ *           modified data stream (malicious input).
+ *           Use this function in trusted environment only
+ *           (data to decode comes from a trusted source).
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_safe()
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return : the number of bytes decompressed into destination buffer
+ *           (necessarily <= maxDecompressedSize)
+ *           If destination buffer is not large enough, decoding will stop
+ *           and output an error code (<0).
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           This function is protected against buffer overflow exploits,
+ *           including malicious data packets. It never writes outside
+ *           output buffer, nor reads outside input buffer.
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/*
+ * LZ4_decompress_safe_partial() :
+ * This function decompress a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ *
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	targetOutputSize   : the decompression operation will try
+ *                       to stop as soon as 'targetOutputSize'
+ *                       has been reached
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return  : the number of bytes decoded in the destination buffer
+ *            (necessarily <= maxDecompressedSize)
+ *	Note    : this number can be < 'targetOutputSize' should the compressed
+ *            block to decode be smaller. Always control how many bytes
+ *            were decoded. If the source stream is detected malformed,
+ *            the function will stop decoding and return a negative result.
+ *            This function never writes outside of output buffer,
+ *            and never reads outside of input buffer.
+ *            It is therefore protected against malicious data packets
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
 
 /*
- * lz4_decompress_unknownoutputsize()
+ * lz4_decompress_unknownoutputsize() :
  *	src     : source address of the compressed data
  *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
+ *	dest    : output buffer address of the decompressed data
  *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ *            returned with actual size of decompressed data after
+ *            decompress done
+ * return: Success if return 0
+ *         Error if return (< 0)
+ * note:   Destination buffer must be already allocated.
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/*
+ * lz4_decompress() :
+ *	src            : source address of the compressed data
+ *	src_len        : is the input size,
+ *                   which is returned after decompress done
+ *	dest           : output buffer address of the decompressed data
+ *	actual_dest_len: is the size of uncompressed data, supposing it's known
+ *	return: Success if return 0
+ *          Error if return (< 0)
+ *	note  : Destination buffer must be already allocated.
+ *          slightly faster than lz4_decompress_unknownoutputsize()
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the more powerful
+ * but slower "HC" algorithm. dst` must be already allocated.
+ * Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE
+ *
+ *	src             : source address of the original data
+ *	dst             : output buffer address of the compressed data
+ *	srcSize         : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	dstCapacity     : full or partial size of buffer 'dst'
+ *                    (which must be already allocated)
+ *	compressionLevel: Recommended values are between 4 and 9, although any
+ *                    value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *                    Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *	wrkmem          : address of the working memory.
+ *                    This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *	return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/*
+ * lz4hc_compress()
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ * return	: Success if return 0
+ *          Error if return (< 0)
+ * note   : Destination buffer and workmem must be already allocated with
+ *          the defined size.
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*
+ *	These functions compress data in successive blocks of any size,
+ *	using previous blocks as dictionary. One key assumption is that previous
+ *	blocks (up to 64 KB) remain read-accessible while
+ *	compressing next blocks.
+ *	There is an exception for ring buffers, which can be smaller than 64 KB.
+ *	Ring buffers scenario is automatically detected and handled by
+ *	LZ4_compress_HC_continue().
+ *	Before starting compression, state must be properly initialized,
+ *	using LZ4_resetStreamHC().
+ *	A first "fictional block" can then be designated as initial dictionary,
+ *	using LZ4_loadDictHC() (Optional).
+ *	Then, use LZ4_compress_HC_continue() to compress each successive block.
+ *	Previous memory blocks (including initial dictionary when present) must
+ *	remain accessible and unmodified during compression.
+ *	'dst' buffer should be sized to handle worst case scenarios, using
+ *	LZ4_compressBound(), to ensure operation success.
+ *	If, for any reason, previous data blocks can't be preserved unmodified
+ *	in memory during next compression block,
+ *	you must save it to a safer memory space, using LZ4_saveDictHC().
+ *	Return value of LZ4_saveDictHC() is the size of dictionary
+ *	effectively saved into 'safeBuffer'.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+/*
+ * LZ4_resetStream() :
+ *	An LZ4_stream_t structure can be allocated once
+ *	and re-used multiple times.
+ *	Use this function to init an allocated `LZ4_stream_t` structure
+ *	and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/*
+ * LZ4_loadDict() :
+ *	Use this function to load a static dictionary into LZ4_stream.
+ *	Any previous data will be forgotten, only 'dictionary'
+ *	will remain in memory.
+ *	Loading a size of 0 is allowed.
+ *	Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/*
+ * LZ4_compress_fast_continue() :
+ *	Compress buffer content 'src',
+ *	using data from previously compressed blocks
+ *	as dictionary to improve compression ratio.
+ *	Important : Previous data blocks are assumed to still
+ *	be present and unmodified !
+ *	'dst' buffer must be already allocated.
+ *	If maxDstSize >= LZ4_compressBound(srcSize),
+ *	compression is guaranteed to succeed, and runs faster.
+ *	If not, and if compressed data cannot fit into 'dst' buffer size,
+ *	compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict() :
+ *	If previously compressed data block is not guaranteed
+ *	to remain available at its memory location,
+ *	save it into a safer place (char *safeBuffer).
+ *	Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call
+ *         LZ4_compress_fast_continue().
+ *	Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *           or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/*
+ * LZ4_setStreamDecode() :
+ *	Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *	@return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/*
+ * LZ4_decompress_*_continue() :
+ *	These decoding functions allow decompression of multiple blocks
+ *	in "streaming" mode.
+ *	Previously decoded blocks *must* remain available at the memory position
+ *	where they were decoded (up to 64 KB)
+ *	In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_*_usingDict() :
+ *	These decoding functions work the same as
+ *	a combination of LZ4_setStreamDecode() followed by
+ *	LZ4_decompress_*_continue()
+ *	They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..4cd6d1b 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,417 +22,873 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+/*-******************************
+ *	Compression functions
+ ********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
-#if LZ4_ARCH64
-	const BYTE * const base = ip;
+	if (tableType == byU16)
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+	else
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - LZ4_HASHLOG));
+}
+
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+	const U32 hashLog = (tableType == byU16)
+		? LZ4_HASHLOG + 1
+		: LZ4_HASHLOG;
+
+#ifdef __LITTLE_ENDIAN__
+	static const U64 prime5bytes = 889523592379ULL;
+
+	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-	const int base = 0;
+	static const U64 prime8bytes = 11400714785074694791ULL;
+
+	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+}
+
+static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+{
+#if LZ4_ARCH64
+	if (tableType == byU32)
+		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+	return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
+	tableType_t const tableType, const BYTE *srcBase)
+{
+	switch (tableType) {
+	case byPtr:
+	{
+		const BYTE **hashTable = (const BYTE **)tableBase;
+
+		hashTable[h] = p;
+		return;
+	}
+	case byU32:
+	{
+		U32 *hashTable = (U32 *) tableBase;
+
+		hashTable[h] = (U32)(p - srcBase);
+		return;
+	}
+	case byU16:
+	{
+		U16 *hashTable = (U16 *) tableBase;
+
+		hashTable[h] = (U16)(p - srcBase);
+		return;
+	}
+	}
+}
+
+static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	if (tableType == byPtr) {
+		const BYTE **hashTable = (const BYTE **) tableBase;
+
+		return hashTable[h];
+	}
+
+	if (tableType == byU32) {
+		const U32 * const hashTable = (U32 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+
+	{
+		/* default, to ensure a return */
+		const U16 * const hashTable = (U16 *) tableBase;
+		return hashTable[h] + srcBase;
+	}
+}
+
+static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static inline int LZ4_compress_generic(
+	LZ4_stream_t_internal * const dictPtr,
+	const char * const source,
+	char * const dest,
+	const int inputSize,
+	const int maxOutputSize,
+	const limitedOutput_directive outputLimited,
+	const tableType_t tableType,
+	const dict_directive dict,
+	const dictIssue_directive dictIssue,
+	const U32 acceleration)
+{
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *base;
+	const BYTE *lowLimit;
+	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+	const BYTE * const dictionary = dictPtr->dictionary;
+	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+	const size_t dictDelta = dictEnd - (const BYTE *)source;
+	const BYTE *anchor = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const olimit = op + maxOutputSize;
+
+	U32 forwardH;
+	size_t refDelta = 0;
+
+	/* Init conditions */
+	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+		/* Unsupported inputSize, too large (or negative) */
+		return 0;
+	}
+	switch (dict) {
+	case noDict:
+	default:
+		base = (const BYTE *)source;
+		lowLimit = (const BYTE *)source;
+		break;
+	case withPrefix64k:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source - dictPtr->dictSize;
+		break;
+	case usingExtDict:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source;
+		break;
+	}
+
+	if ((tableType == byU16)
+		&& (inputSize >= LZ4_64Klimit)) {
+		/* Size too large (not within 64K limit) */
+		return 0;
+	}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+	if (inputSize < LZ4_minLength) {
+		/* Input too small, no compression (all literals) */
+		goto _last_literals;
+	}
 
 	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
+	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+		{ const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = acceleration
+				<< LZ4_skipTrigger;
+
+			do {
+				U32 const h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h,
+					dictPtr->hashTable,
+					tableType, base);
+				if (dict == usingExtDict) {
+					if (match < (const BYTE *)source) {
+						refDelta = dictDelta;
+						lowLimit = dictionary;
+					} else {
+						refDelta = 0;
+						lowLimit = (const BYTE *)source;
+				}	 }
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+					tableType, base);
+
+			} while (((dictIssue == dictSmall)
+					? (match < lowRefLimit)
+					: 0)
+				|| ((tableType == byU16)
+					? 0
+					: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match + refDelta)
+					!= LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
+		while (((ip > anchor) & (match + refDelta > lowLimit))
+			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
-			ref--;
-		}
+			match--;
+			}
 
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
+		/* Encode Literals */
+		{ unsigned const int litLength = (unsigned int)(ip - anchor);
 
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+			token = op++;
+			if ((outputLimited) &&
+				/* Check output buffer overflow */
+				(unlikely(op + litLength +
+					(2 + 1 + LASTLITERALS) +
+					(litLength/255) > olimit)))
+				return 0;
+			if (litLength >= RUN_MASK) {
+				int len = (int)litLength - RUN_MASK;
+
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
+		{	 unsigned int matchCode;
+
+			if ((dict == usingExtDict)
+				&& (lowLimit == dictionary)) {
+				const BYTE *limit;
+
+				match += refDelta;
+				limit = ip + (dictEnd - match);
+				if (limit > matchlimit)
+					limit = matchlimit;
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, limit);
+				ip += MINMATCH + matchCode;
+				if (ip == limit) {
+					unsigned const int more = LZ4_count(ip,
+						(const BYTE *)source,
+						matchlimit);
+
+					matchCode += more;
+					ip += more;
+				}
+			} else {
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, matchlimit);
+				ip += MINMATCH + matchCode;
 			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
+
+			if (outputLimited &&
+				/* Check output buffer overflow */
+				(unlikely(op +
+					(1 + LASTLITERALS) +
+					(matchCode>>8) > olimit)))
+				return 0;
+			if (matchCode >= ML_MASK) {
+				*token += ML_MASK;
+				matchCode -= ML_MASK;
+				LZ4_write32(op, 0xFFFFFFFF);
+				while (matchCode >= 4*255) {
+					op += 4;
+					LZ4_write32(op, 0xFFFFFFFF);
+					matchCode -= 4*255;
+				}
+				op += matchCode / 255;
+				*op++ = (BYTE)(matchCode % 255);
+			} else
+				*token += (BYTE)(matchCode);
+		}
+
+		anchor = ip;
 
 		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		if (ip > mflimit)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+		match = LZ4_getPosition(ip, dictPtr->hashTable,
+			tableType, base);
+		if (dict == usingExtDict) {
+			if (match < (const BYTE *)source) {
+				refDelta = dictDelta;
+				lowLimit = dictionary;
+			} else {
+				refDelta = 0;
+				lowLimit = (const BYTE *)source;
+			}
+		}
+		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+			&& (match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
-
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t const lastRun = (size_t)(iend - anchor);
+		if ((outputLimited) &&
+			/* Check output buffer overflow */
+			((op - (BYTE *)dest) + lastRun + 1 +
+			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			return 0;
+		if (lastRun >= RUN_MASK) {
+			size_t accumulator = lastRun - RUN_MASK;
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRun);
+		op += lastRun;
+	}
 
 	/* End */
-	return (int)(((char *)op) - dest);
+	return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
+	int inputSize, int maxOutputSize, int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+
+	LZ4_resetStream((LZ4_stream_t *)state);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, tableType, noDict,
+				noDictIssue, acceleration);
+	} else {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, tableType, noDict,
+				noDictIssue, acceleration);
+	}
+}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration)
+{
+	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+		maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem)
+{
+	return LZ4_compress_fast(source, dest, inputSize,
+		maxOutputSize, wrkmem, 1);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *	*_destSize() variant
+ ********************************/
+
+static int LZ4_compress_destSize_generic(
+	LZ4_stream_t_internal * const ctx,
+	const char * const src,
+	char * const dst,
+	int * const srcSizePtr,
+	const int targetDstSize,
+	const tableType_t tableType)
+{
+	const BYTE *ip = (const BYTE *) src;
+	const BYTE *base = (const BYTE *) src;
+	const BYTE *lowLimit = (const BYTE *) src;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + *srcSizePtr;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dst;
+	BYTE * const oend = op + targetDstSize;
+	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+	BYTE * const oMaxMatch = op + targetDstSize
+		- (LASTLITERALS + 1 /* token */);
+	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+	U32 forwardH;
+
+	/* Init conditions */
+	/* Impossible to store anything */
+	if (targetDstSize < 1)
+		return 0;
+	/* Unsupported input size, too large (or negative) */
+	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+		return 0;
+	/* Size too large (not within 64K limit) */
+	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+		return 0;
+	/* Input too small, no compression (all literals) */
+	if (*srcSizePtr < LZ4_minLength)
+		goto _last_literals;
 
 	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
+	*srcSizePtr = 0;
+	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
+		{	 const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+
+			do {
+				U32 h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h, ctx->hashTable,
+					tableType, base);
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h,
+					ctx->hashTable, tableType,
+					base);
+
+			} while (((tableType == byU16)
+				? 0
+				: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match) != LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
+		while ((ip > anchor)
+			&& (match > lowLimit)
+			&& (unlikely(ip[-1] == match[-1]))) {
+			ip--; match--;
+			}
 
 		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+		{	 unsigned int litLength = (unsigned int)(ip - anchor);
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+			token = op++;
+			if (op + ((litLength + 240)/255)
+				+ litLength > oMaxLit) {
+				/* Not enough space for a last match */
+				op--;
+				goto _last_literals;
+			}
+			if (litLength >= RUN_MASK) {
+				unsigned int len = litLength - RUN_MASK;
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
+		/* Encode MatchLength */
+		{	 size_t matchLength = LZ4_count(ip + MINMATCH,
+			match + MINMATCH, matchlimit);
 
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
+			if (op + ((matchLength + 240)/255) > oMaxMatch) {
+				/* Match description too long : reduce it */
+				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
 			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
+			ip += MINMATCH + matchLength;
+
+			if (matchLength >= ML_MASK) {
+				*token += ML_MASK;
+				matchLength -= ML_MASK;
+				while (matchLength >= 255) {
+					matchLength -= 255; *op++ = 255;
+				}
+				*op++ = (BYTE)matchLength;
+			} else
+				*token += (BYTE)(matchLength);
 		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
+		anchor = ip;
 
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		/* Test end of block */
+		if (ip > mflimit)
+			break;
+		if (op > oMaxSeq)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
+		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+		if ((match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match) == LZ4_read32(ip))) {
+			token = op++; *token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t lastRunSize = (size_t)(iend - anchor);
+
+		if (op + 1 /* token */
+			+ ((lastRunSize + 240)/255) /* litLength */
+			+ lastRunSize /* literals */ > oend) {
+			/* adapt lastRunSize to fill 'dst' */
+			lastRunSize	= (oend - op) - 1;
+			lastRunSize -= (lastRunSize + 240)/255;
+		}
+		ip = anchor + lastRunSize;
+
+		if (lastRunSize >= RUN_MASK) {
+			size_t accumulator = lastRunSize - RUN_MASK;
+
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRunSize<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRunSize);
+		op += lastRunSize;
+	}
+
 	/* End */
-	return (int)(((char *)op) - dest);
+	*srcSizePtr = (int) (((const char *)ip) - src);
+	return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
+	char *dst, int *srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_resetStream(state);
+
+	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+		/* compression success is guaranteed */
+		return LZ4_compress_fast_extState(
+			state, src, dst, *srcSizePtr,
+			targetDstSize, 1);
+	} else {
+		if (*srcSizePtr < LZ4_64Klimit)
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, byU16);
+		else
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, tableType);
+	}
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
+	int targetDstSize, void *wrkmem)
+{
+	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+		targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *	Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+	const char *dictionary, int dictSize)
+{
+	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+	const BYTE *p = (const BYTE *)dictionary;
+	const BYTE * const dictEnd = p + dictSize;
+	const BYTE *base;
+
+	if ((dict->initCheck)
+		|| (dict->currentOffset > 1 * GB)) {
+		/* Uninitialized structure, or reuse overflow */
+		LZ4_resetStream(LZ4_dict);
+	}
+
+	if (dictSize < (int)HASH_UNIT) {
+		dict->dictionary = NULL;
+		dict->dictSize = 0;
+		return 0;
+	}
+
+	if ((dictEnd - p) > 64 * KB)
+		p = dictEnd - 64 * KB;
+	dict->currentOffset += 64 * KB;
+	base = p - dict->currentOffset;
+	dict->dictionary = p;
+	dict->dictSize = (U32)(dictEnd - p);
+	dict->currentOffset += dict->dictSize;
+
+	while (p <= dictEnd - HASH_UNIT) {
+		LZ4_putPosition(p, dict->hashTable, byU32, base);
+		p += 3;
+	}
 
-	*dst_len = out_len;
+	return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+	const BYTE *src)
+{
+	if ((LZ4_dict->currentOffset > 0x80000000) ||
+		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+		/* address space overflow */
+		/* rescale hash table */
+		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+		int i;
+
+		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+			if (LZ4_dict->hashTable[i] < delta)
+				LZ4_dict->hashTable[i] = 0;
+			else
+				LZ4_dict->hashTable[i] -= delta;
+		}
+		LZ4_dict->currentOffset = 64 * KB;
+		if (LZ4_dict->dictSize > 64 * KB)
+			LZ4_dict->dictSize = 64 * KB;
+		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+	}
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+	if ((U32)dictSize > 64 * KB) {
+		/* useless to define a dictionary > 64 * KB */
+		dictSize = 64 * KB;
+	}
+	if ((U32)dictSize > dict->dictSize)
+		dictSize = dict->dictSize;
 
-	return 0;
-exit:
-	return ret;
+	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+	dict->dictionary = (const BYTE *)safeBuffer;
+	dict->dictSize = (U32)dictSize;
+
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+	char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+	const BYTE * const dictEnd = streamPtr->dictionary
+		+ streamPtr->dictSize;
+
+	const BYTE *smallest = (const BYTE *) source;
+
+	if (streamPtr->initCheck) {
+		/* Uninitialized structure detected */
+		return 0;
+	}
+
+	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+		smallest = dictEnd;
+
+	LZ4_renormDictT(streamPtr, smallest);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	/* Check overlapping input/dictionary space */
+	{	 const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+		if ((sourceEnd > streamPtr->dictionary)
+			&& (sourceEnd < dictEnd)) {
+			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+			if (streamPtr->dictSize > 64 * KB)
+				streamPtr->dictSize = 64 * KB;
+			if (streamPtr->dictSize < 4)
+				streamPtr->dictSize = 0;
+			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+		}
+	}
+
+	/* prefix mode : source data follows dictionary */
+	if (dictEnd == (const BYTE *)source) {
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, dictSmall,	acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, noDictIssue, acceleration);
+		}
+		streamPtr->dictSize += (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+
+	/* external dictionary mode */
+	{ int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, noDictIssue, acceleration);
+		}
+		streamPtr->dictionary = (const BYTE *)source;
+		streamPtr->dictSize = (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+}
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem) {
+	*dst_len = LZ4_compress_default(src, dst, (int)src_len,
+		(int)((size_t)dst_len), wrkmem);
+
+	/*
+	 * Prior lz4_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_fast/default
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4_compress);
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..590057b 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *    * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *    * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,508 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *	Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+	 const char *const source,
+	 char * const dest,
+	 int inputSize,
+		/*
+		 * If endOnInput == endOnInputSize,
+		 * this value is the max size of Output Buffer.
+		 */
+	 int outputSize,
+	 /* endOnOutputSize, endOnInputSize */
+	 int endOnInput,
+	 /* full, partial */
+	 int partialDecoding,
+	 /* only used if partialDecoding == partial */
+	 int targetOutputSize,
+	 /* noDict, withPrefix64k, usingExtDict */
+	 int dict,
+	 /* == dest when no prefix */
+	 const BYTE * const lowPrefix,
+	 /* only if dict == usingExtDict */
+	 const BYTE * const dictStart,
+	 /* note : = 0 if noDict */
+	 const size_t dictSize
+	 )
 {
+	/* Local Variables */
 	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
+	const BYTE * const iend = ip + inputSize;
+
 	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
+	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
-	unsigned token;
-	size_t length;
+	BYTE *oexit = op + targetOutputSize;
+	const BYTE * const lowLimit = lowPrefix - dictSize;
+
+	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+	const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+	const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+	const int safeDecode = (endOnInput == endOnInputSize);
+	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 
+	/* Special cases */
+	/* targetOutputSize too high => decode everything */
+	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+		oexit = oend - MFLIMIT;
+
+	/* Empty output buffer */
+	if ((endOnInput) && (unlikely(outputSize == 0)))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	if ((!endOnInput) && (unlikely(outputSize == 0)))
+		return (*ip == 0 ? 1 : -1);
+
+	/* Main Loop : decode sequences */
 	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
 		if (length == RUN_MASK) {
-			size_t len;
+			unsigned int s;
 
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
+			do {
+				s = *ip++;
+				length += s;
+			} while (likely(endOnInput
+				? ip < iend - RUN_MASK
+				: 1) & (s == 255));
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
 				goto _output_error;
-			length += len;
+			}
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
 		}
 
 		/* copy literals */
 		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
+		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
+			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+			if (partialDecoding) {
+				if (cpy > oend) {
+					/*
+					 * Error :
+					 * write attempt beyond end of output buffer
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& (ip + length > iend)) {
+					/*
+					 * Error :
+					 * read attempt beyond
+					 * end of input buffer
+					 */
+					goto _output_error;
+				}
+			} else {
+				if ((!endOnInput)
+					&& (cpy != oend)) {
+					/*
+					 * Error :
+					 * block decoding must
+					 * stop exactly there
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& ((ip + length != iend)
+					|| (cpy > oend))) {
+					/*
+					 * Error :
+					 * input must be consumed
+					 */
+					goto _output_error;
+				}
+			}
 			memcpy(op, ip, length);
 			ip += length;
-			break; /* EOF */
+			op += length;
+			/* Necessarily EOF, due to parsing restrictions */
+			break;
 		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+		LZ4_wildCopy(op, ip, cpy);
+		ip += length; op = cpy;
 
 		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
+		offset = LZ4_readLE16(ip); ip += 2;
+		match = op - offset;
+		if ((checkOffset) && (unlikely(match < lowLimit))) {
+			/* Error : offset outside buffers */
 			goto _output_error;
+		}
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
 
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
+			unsigned int s;
+
+			do {
+			s = *ip++;
+			if ((endOnInput) && (ip > iend - LASTLITERALS))
+				goto _output_error;
+			length += s;
+			} while (s == 255);
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
 				goto _output_error;
-			length += *ip++;
+			}
 		}
+		length += MINMATCH;
+
+		/* check external dictionary */
+		if ((dict == usingExtDict) && (match < lowPrefix)) {
+			if (unlikely(op + length > oend - LASTLITERALS)) {
+				/* doesn't respect parsing restriction */
+				goto _output_error;
+			}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
+			if (length <= (size_t)(lowPrefix - match)) {
+			/*
+			 * match can be copied as a single segment
+			 * from external dictionary
+			 */
+			memmove(op, dictEnd - (lowPrefix - match), length);
+			op += length;
+			} else {
+				/*
+				 * match encompass external
+				 * dictionary and current block
+				 */
+				size_t const copySize = (size_t)(lowPrefix - match);
+				size_t const restSize = length - copySize;
+
+				memcpy(op, dictEnd - copySize, copySize);
+				op += copySize;
+
+				if (restSize > (size_t)(op - lowPrefix)) {
+					/* overlap copy */
+					BYTE * const endOfMatch = op + restSize;
+					const BYTE *copyFrom = lowPrefix;
+
+					while (op < endOfMatch)
+						*op++ = *copyFrom++;
+				} else {
+					memcpy(op, lowPrefix, restSize);
+					op += restSize;
+				}
+			}
+			continue;
+		}
+
+		/* copy match within block */
+		cpy = op + length;
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
 		} else {
-			LZ4_COPYSTEP(ref, op);
+			LZ4_copy8(op, match); match += 8;
 		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
 
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (cpy > oend - LASTLITERALS) {
+				/*
+				 * Error : last LASTLITERALS bytes
+				 * must be literals (uncompressed)
+				 */
 				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+			}
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
 			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
+
 	/* end of decoding */
-	return (int) (((char *)ip) - source);
+	if (endOnInput) {
+		/* Nb of output bytes decoded */
+		return (int) (((char *)op) - dest);
+	} else {
+		/* Nb of input bytes read */
+		return (int) (((const char *)ip) - source);
+	}
 
-	/* write overflow error detected */
+	/* Overflow error detected */
 _output_error:
 	return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, full, 0,
+		noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe);
 
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, partial,
+		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
 
-	/* Main Loop */
-	while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+	return LZ4_decompress_generic(source, dest, 0, originalSize,
+		endOnOutputSize, full, 0, withPrefix64k,
+		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast);
 
-		unsigned token;
-		size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+	lz4sd->prefixSize = (size_t) dictSize;
+	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+	lz4sd->externalDict = NULL;
+	lz4sd->extDictSize	= 0;
+	return 1;
+}
+EXPORT_SYMBOL(LZ4_setStreamDecode);
 
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize,
+			maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict,
+			lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += result;
+		lz4sd->prefixEnd	+= result;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = result;
+		lz4sd->prefixEnd	= (BYTE *)dest + result;
+	}
 
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
+	return result;
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict,
+			lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict, lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += originalSize;
+		lz4sd->prefixEnd	+= originalSize;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = originalSize;
+		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
 	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
 
-	/* write overflow error detected */
-_output_error:
-	return -1;
+	return result;
 }
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static inline int LZ4_decompress_usingDict_generic(const char *source,
+	char *dest, int compressedSize, int maxOutputSize, int safe,
+	const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int input_len = 0;
-
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+	if (dictSize == 0)
+		return LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize, safe, full, 0,
+			noDict, (BYTE *)dest, NULL, 0);
+	if (dictStart + dictSize == dest) {
+		if (dictSize >= (int)(64 * KB - 1))
+			return LZ4_decompress_generic(source, dest,
+				compressedSize, maxOutputSize, safe, full, 0,
+				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+		return LZ4_decompress_generic(source, dest, compressedSize,
+			maxOutputSize, safe, full, 0, noDict,
+			(BYTE *)dest - dictSize, NULL, 0);
+	}
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxOutputSize, safe, full, 0, usingExtDict,
+		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-	return 0;
-exit_0:
-	return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxOutputSize,
+	const char *dictStart, int dictSize)
+{
+	return LZ4_decompress_usingDict_generic(source, dest,
+		compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+	return LZ4_decompress_usingDict_generic(source, dest, 0,
+		originalSize, 0, dictStart, dictSize);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_decompress_unknownoutputsize(const unsigned char *src,
+	size_t src_len, unsigned char *dest, size_t *dest_len) {
+	 *dest_len = LZ4_decompress_safe(src, dest,
+		 (int)src_len, (int)((size_t)dest_len));
+
+		/*
+		 * Prior lz4_decompress_unknownoutputsize will return
+		 * 0 for success and a negative result for error
+		 * new LZ4_decompress_safe returns
+		 * - the length of data read on success
+		 * - and also a negative result on error
+		 * meaning when result > 0, we just return 0 here
+		 */
+		if (src_len > 0) {
+			return 0;
+		} else
+			return -1;
 }
-#ifndef STATIC
 EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len) {
+	*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
+
+	/*
+	 * Prior lz4_decompress will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_fast returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if (src_len > 0)
+		return 0;
+	else
+		return -1;
+}
+EXPORT_SYMBOL(lz4_decompress);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..b0d21b5 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,219 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+#if defined(__x86_64__)
+	typedef U64		reg_t;	 /* 64-bits in x32 mode */
+#else
+	typedef size_t reg_t;	 /* 32-bits in x32 mode */
 #endif
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB (1<<10)
+#define MB (1<<20)
+#define GB (1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1<<MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+
+static inline U16 LZ4_read16(const void *memPtr)
+{
+		U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline U32 LZ4_read32(const void *memPtr)
+{
+		U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void *memPtr)
+{
+		size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline void LZ4_write16(void *memPtr, U16 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void *memPtr, U32 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void *memPtr)
+{
+#ifdef __LITTLE_ENDIAN__
+		return LZ4_read16(memPtr);
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+		const BYTE *p = (const BYTE *)memPtr;
+
+		return (U16)((U16)p[0] + (p[1] << 8));
 #endif
+}
+
+static inline void LZ4_writeLE16(void *memPtr, U16 value)
+{
+#ifdef __LITTLE_ENDIAN__
+		LZ4_write16(memPtr, value);
+#else
+		BYTE *p = (BYTE *)memPtr;
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+		p[0] = (BYTE) value;
+		p[1] = (BYTE)(value>>8);
+#endif
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_copy8(void *dst, const void *src)
+{
+		memcpy(dst, src, 8);
+}
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+		BYTE *d = (BYTE *)dstPtr;
+		const BYTE *s = (const BYTE *)srcPtr;
+		BYTE *const e = (BYTE *)dstEnd;
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+		do { LZ4_copy8(d, s); d += 8; s += 8; } while (d < e);
+}
 
-#ifdef __BIG_ENDIAN
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN__
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#endif
+#else
+#ifdef __BIG_ENDIAN__
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
+#endif
 
+static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+		pIn += LZ4_NBCOMMONBYTES(diff);
+		return (unsigned int)(pIn - pStart);
+	}
+
+#ifdef LZ4_ARCH64
+	if ((pIn < (pInLimit-3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4; pMatch += 4;
+	}
 #endif
+	if ((pIn < (pInLimit-1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2; pMatch += 2;
+	}
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+	return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..407fd33 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,353 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *	Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+ *	Local Constants and types
+ ***************************************/
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
+	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+	return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *	HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+	hc4->nextToUpdate = 64 * KB;
+	hc4->base = start - 64 * KB;
+	hc4->end = start;
+	hc4->dictBase = start - 64 * KB;
+	hc4->dictLimit = 64 * KB;
+	hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+	const BYTE *ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const hashTable	= hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
+	U32 const target = (U32)(ip - base);
+	U32 idx = hc4->nextToUpdate;
+
+	while (idx < target) {
+		U32 const h = LZ4HC_hashPtr(base + idx);
+		size_t delta = idx - hashTable[h];
 
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
 		if (delta > MAX_DISTANCE)
 			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
-{
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
+		DELTANEXTU16(idx) = (U16)delta;
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
+		hashTable[h] = idx;
+		idx++;
 	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+
+	hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static inline int LZ4HC_InsertAndFindBestMatch(
+	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+	const BYTE *ip,
+	const BYTE * const iLimit,
+	const BYTE **matchpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
+	const BYTE * const dictBase = hc4->dictBase;
+	const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = 0;
 
 	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE * const match = base + matchIndex;
+
+			if (*(match + ml) == *(ip + ml)
+				&& (LZ4_read32(match) == LZ4_read32(ip))) {
+				size_t const mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, iLimit) + MINMATCH;
 
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
 				if (mlt > ml) {
 					ml = mlt;
-					*matchpos = ref;
+					*matchpos = match;
+				}
+			}
+		} else {
+			const BYTE * const match = dictBase + matchIndex;
+
+			if (LZ4_read32(match) == LZ4_read32(ip)) {
+				size_t mlt;
+				const BYTE *vLimit = ip
+					+ (dictLimit - matchIndex);
+
+				if (vLimit > iLimit)
+					vLimit = iLimit;
+				mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, vLimit) + MINMATCH;
+				if ((ip + mlt == vLimit)
+					&& (vLimit < iLimit))
+					mlt += LZ4_count(ip + mlt,
+						base + dictLimit,
+						iLimit);
+				if (mlt > ml) {
+					/* virtual matchpos */
+					ml = mlt;
+					*matchpos = base + matchIndex;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
 
 	return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_InsertAndGetWiderMatch(
+	LZ4HC_CCtx_internal *hc4,
+	const BYTE * const ip,
+	const BYTE * const iLowLimit,
+	const BYTE * const iHighLimit,
+	int longest,
+	const BYTE **matchpos,
+	const BYTE **startpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
+	const U32 dictLimit = hc4->dictLimit;
+	const BYTE * const lowPrefixPtr = base + dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	const BYTE * const dictBase = hc4->dictBase;
+	U32	 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	int delta = (int)(ip - iLowLimit);
 
 	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE *matchPtr = base + matchIndex;
+
+			if (*(iLowLimit + longest)
+				== *(matchPtr - delta + longest)) {
+				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+					int mlt = MINMATCH +
+						LZ4_count(ip + MINMATCH,
+							matchPtr + MINMATCH,
+							iHighLimit);
+					int back = 0;
+
+					while ((ip + back > iLowLimit)
+							 && (matchPtr + back > lowPrefixPtr)
+							 && (ip[back - 1] == matchPtr[back - 1]))
+						back--;
+
+					mlt -= back;
+
+					if (mlt > longest) {
+						longest = (int)mlt;
+						*matchpos = matchPtr + back;
+						*startpos = ip + back;
 					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
-				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
 				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
+			}
+		} else {
+			const BYTE * const matchPtr = dictBase + matchIndex;
+
+			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+				size_t mlt;
+				int back = 0;
+				const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+				if (vLimit > iHighLimit)
+					vLimit = iHighLimit;
+
+				mlt = LZ4_count(ip + MINMATCH,
+					matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+					mlt += LZ4_count(ip + mlt, base + dictLimit,
+						iHighLimit);
+				while ((ip + back > iLowLimit)
+					&& (matchIndex + back > lowLimit)
+					&& (ip[back - 1] == matchPtr[back - 1]))
+					back--;
+				mlt -= back;
+				if ((int)mlt > longest) {
+					longest = (int)mlt;
+					*matchpos = base + matchIndex + back;
+					*startpos = ip + back;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
+
 	return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static inline int LZ4HC_encodeSequence(
+	const BYTE **ip,
+	BYTE **op,
+	const BYTE **anchor,
+	int matchLength,
+	const BYTE * const match,
+	limitedOutput_directive limitedOutputBuffer,
+	BYTE *oend)
 {
-	int length, len;
-	u8 *token;
+	int length;
+	BYTE *token;
 
 	/* Encode Literal length */
 	length = (int)(*ip - *anchor);
 	token = (*op)++;
+
+	if ((limitedOutputBuffer)
+		&& ((*op + (length>>8)
+			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
 	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
+		int len;
+
+		*token = (RUN_MASK<<ML_BITS);
 		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
+		for (; len > 254 ; len -= 255) {
 			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
+			*(*op)++ = (BYTE)len;
+		}
 	} else
-		*token = (length << ML_BITS);
+		*token = (BYTE)(length<<ML_BITS);
 
 	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
+	LZ4_wildCopy(*op, *anchor, (*op) + length);
+	*op += length;
 
 	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+	LZ4_writeLE16(*op, (U16)(*ip - match)); *op += 2;
 
 	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
+	length = (int)(matchLength - MINMATCH);
+	if ((limitedOutputBuffer)
+		&& (*op + (length>>8)
+			+ (1 + LASTLITERALS) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
+	if (length >= (int)ML_MASK) {
 		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
+		length -= ML_MASK;
+		for (; length > 509 ; length -= 510) {
 			*(*op)++ = 255;
 			*(*op)++ = 255;
 		}
-		if (len > 254) {
-			len -= 255;
+		if (length > 254) {
+			length -= 255;
 			*(*op)++ = 255;
 		}
-		*(*op)++ = (u8)len;
+		*(*op)++ = (BYTE)length;
 	} else
-		*token += len;
+		*token += (BYTE)(length);
 
 	/* Prepare next loop */
-	*ip += ml;
+	*ip += matchLength;
 	*anchor = *ip;
 
 	return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+static int LZ4HC_compress_generic(
+	LZ4HC_CCtx_internal *const ctx,
+	const char * const source,
+	char * const dest,
+	int const inputSize,
+	int const maxOutputSize,
+	int compressionLevel,
+	limitedOutput_directive limit
+	)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-	u8 *op = (u8 *)dest;
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + maxOutputSize;
 
+	unsigned int maxNbAttempts;
 	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
+	const BYTE *ref = NULL;
+	const BYTE *start2 = NULL;
+	const BYTE *ref2 = NULL;
+	const BYTE *start3 = NULL;
+	const BYTE *ref3 = NULL;
+	const BYTE *start0;
+	const BYTE *ref0;
+
+	/* init */
+	if (compressionLevel > LZ4HC_MAX_CLEVEL)
+		compressionLevel = LZ4HC_MAX_CLEVEL;
+	if (compressionLevel < 1)
+		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+	maxNbAttempts = 1 << (compressionLevel - 1);
+	ctx->end += inputSize;
 
 	ip++;
 
 	/* Main Loop */
 	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+			matchlimit, (&ref), maxNbAttempts);
 		if (!ml) {
 			ip++;
 			continue;
@@ -351,46 +379,52 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		start0 = ip;
 		ref0 = ref;
 		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+		if (ip + ml < mflimit)
+			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				ip + ml - 2, ip + 0,
+				matchlimit, ml, &ref2,
+				&start2, maxNbAttempts);
 		else
 			ml2 = ml;
-		/* No better match */
+
 		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			/* No better match */
+			if (LZ4HC_encodeSequence(&ip, &op,
+				&anchor, ml, ref, limit, oend))
+				return 0;
 			continue;
 		}
 
 		if (start0 < ip) {
-			/* empirical */
 			if (start2 < ip + ml0) {
+				/* empirical */
 				ip = start0;
 				ref = ref0;
 				ml = ml0;
 			}
 		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
+
+		/* Here, start0 == ip */
 		if ((start2 - ip) < 3) {
+			/* First Match too small : removed */
 			ml = ml2;
 			ip = start2;
 			ref = ref2;
-			goto _search2;
+			goto _Search2;
 		}
 
-_search3:
+_Search3:
 		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
+		* Currently we have :
+		* ml2 > ml1, and
+		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
+		*/
 		if ((start2 - ip) < OPTIMAL_ML) {
 			int correction;
 			int new_ml = ml;
+
 			if (new_ml > OPTIMAL_ML)
 				new_ml = OPTIMAL_ML;
 			if (ip + new_ml > start2 + ml2 - MINMATCH)
@@ -403,39 +437,44 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			}
 		}
 		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
+		 * Now, we have start2 = ip + new_ml,
+		 * with new_ml = min(ml, OPTIMAL_ML = 18)
 		 */
+
 		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
+			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				start2 + ml2 - 3, start2,
+				matchlimit, ml2, &ref3, &start3,
+				maxNbAttempts);
 		else
 			ml3 = ml2;
 
-		/* No better match : 2 sequences to encode */
 		if (ml3 == ml2) {
+			/* No better match : 2 sequences to encode */
 			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
+			if (start2 < ip + ml)
 				ml = (int)(start2 - ip);
-
 			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml, ref, limit, oend))
+				return 0;
 			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml2, ref2, limit, oend))
+				return 0;
 			continue;
 		}
 
-		/* Not enough space for match 2 : remove it */
 		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
+			/* Not enough space for match 2 : remove it */
 			if (start3 >= (ip + ml)) {
+				/* can write Seq1 immediately
+				 * ==> Seq2 is removed,
+				 * so Seq3 becomes Seq1
+				 */
 				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
+					int correction = (int)(ip + ml - start2);
+
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -446,35 +485,38 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 					}
 				}
 
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
+				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+					ml, ref, limit, oend))
+					return 0;
+				ip	= start3;
 				ref = ref3;
-				ml  = ml3;
+				ml	= ml3;
 
 				start0 = start2;
 				ref0 = ref2;
 				ml0 = ml2;
-				goto _search2;
+				goto _Search2;
 			}
 
 			start2 = start3;
 			ref2 = ref3;
 			ml2 = ml3;
-			goto _search3;
+			goto _Search3;
 		}
 
 		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
+		* OK, now we have 3 ascending matches;
+		* let's write at least the first one
+		* ip & ref are known; Now for ml
+		*/
 		if (start2 < ip + ml) {
 			if ((start2 - ip) < (int)ML_MASK) {
 				int correction;
+
 				if (ml > OPTIMAL_ML)
 					ml = OPTIMAL_ML;
 				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
+					ml = (int)(start2 - ip) + ml2 - MINMATCH;
 				correction = ml - (int)(start2 - ip);
 				if (correction > 0) {
 					start2 += correction;
@@ -484,7 +526,9 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			} else
 				ml = (int)(start2 - ip);
 		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+			ref, limit, oend))
+			return 0;
 
 		ip = start2;
 		ref = ref2;
@@ -494,46 +538,243 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		ref2 = ref3;
 		ml2 = ml3;
 
-		goto _search3;
+		goto _Search3;
 	}
 
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{ int lastRun = (int)(iend - anchor);
+
+		if ((limit)
+			&& (((char *)op - dest) + lastRun + 1
+				+ ((lastRun + 255 - RUN_MASK)/255)
+					> (U32)maxOutputSize)) {
+			/* Check output limit */
+			return 0;
+		}
+		if (lastRun >= (int)RUN_MASK) {
+			*op++ = (RUN_MASK<<ML_BITS);
+			lastRun -= RUN_MASK;
+			for (; lastRun > 254 ; lastRun -= 255) {
+				*op++ = 255;
+				*op++ = (BYTE) lastRun;
+			}
+		} else
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		memcpy(op, anchor, iend - anchor);
+		op += iend - anchor;
+	}
+
 	/* End */
 	return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+int LZ4_compress_HC_extStateHC(
+	void *state,
+	const char *src,
+	char *dst,
+	int srcSize,
+	int maxDstSize,
+	int compressionLevel)
 {
-	int ret = -1;
-	int out_len = 0;
+	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+		/* Error : state is not aligned
+		 * for pointers (32 or 64 bits)
+		 */
+		return 0;
+	}
 
-	if (out_len < 0)
-		goto exit;
+	LZ4HC_init(ctx, (const BYTE *)src);
 
-	*dst_len = out_len;
-	return 0;
+	if (maxDstSize < LZ4_compressBound(srcSize))
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, limitedOutput);
+	else
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, noLimit);
+}
 
-exit:
-	return ret;
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+	int maxDstSize, int compressionLevel, void *wrkmem)
+{
+	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+		srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+	unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	*dst_len = LZ4_compress_HC(src, dst, (int)src_len,
+		(int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
+
+	/*
+	 * Prior lz4hc_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_HC
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4hc_compress);
 
+/**************************************
+ *	Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+	LZ4_streamHCPtr->internal_donotuse.base = NULL;
+	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *dictionary,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	if (dictSize > 64 * KB) {
+		dictionary += dictSize - 64 * KB;
+		dictSize = 64 * KB;
+	}
+	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+	if (dictSize >= 4)
+		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+	ctxPtr->end = (const BYTE *)dictionary + dictSize;
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+	LZ4HC_CCtx_internal *ctxPtr,
+	const BYTE *newBlock)
+{
+	if (ctxPtr->end >= ctxPtr->base + 4) {
+		/* Referencing remaining dictionary content */
+		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+	}
+
+	/*
+	 * Only one memory segment for extDict,
+	 * so any previous extDict is lost at this stage
+	 */
+	ctxPtr->lowLimit	= ctxPtr->dictLimit;
+	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+	ctxPtr->dictBase	= ctxPtr->base;
+	ctxPtr->base = newBlock - ctxPtr->dictLimit;
+	ctxPtr->end	= newBlock;
+	/* match referencing will resume from there */
+	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	limitedOutput_directive limit)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	/* auto - init if forgotten */
+	if (ctxPtr->base == NULL)
+		LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+	/* Check overflow */
+	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+			- ctxPtr->dictLimit;
+		if (dictSize > 64 * KB)
+			dictSize = 64 * KB;
+		LZ4_loadDictHC(LZ4_streamHCPtr,
+			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+	}
+
+	/* Check if blocks follow each other */
+	if ((const BYTE *)source != ctxPtr->end)
+		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+	/* Check overlapping input/dictionary space */
+	{ const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+		if ((sourceEnd > dictBegin)
+			&& ((const BYTE *)source < dictEnd)) {
+			if (sourceEnd > dictEnd)
+				sourceEnd = dictEnd;
+			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+				ctxPtr->lowLimit = ctxPtr->dictLimit;
+		}
+	}
+
+	return LZ4HC_compress_generic(ctxPtr, source, dest,
+		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize)
+{
+	if (maxOutputSize < LZ4_compressBound(inputSize))
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, limitedOutput);
+	else
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	char *safeBuffer,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+	int const prefixSize = (int)(streamPtr->end
+		- (streamPtr->base + streamPtr->dictLimit));
+
+	if (dictSize > 64 * KB)
+		dictSize = 64 * KB;
+	if (dictSize < 4)
+		dictSize = 0;
+	if (dictSize > prefixSize)
+		dictSize = prefixSize;
+
+	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+	{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+		streamPtr->base = streamPtr->end - endIndex;
+		streamPtr->dictLimit = endIndex - dictSize;
+		streamPtr->lowLimit = endIndex - dictSize;
+
+		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+			streamPtr->nextToUpdate = streamPtr->dictLimit;
+	}
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDictHC);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
  2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
@ 2017-01-21 15:09   ` Sven Schmidt
  2017-01-21 15:09   ` [PATCH 3/4] crypto: Change LZ4 modules " Sven Schmidt
  2017-01-21 15:09   ` [PATCH 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
  3 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates the unlz4 wrapper to work with the
updated LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 3/4] crypto: Change LZ4 modules to work with new LZ4 module version
  2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
  2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
  2017-01-21 15:09   ` [PATCH 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-01-21 15:09   ` Sven Schmidt
  2017-01-21 15:09   ` [PATCH 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
  3 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates the crypto modules using LZ4 compression
to work with the new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c   | 21 ++++++++-------------
 crypto/lz4hc.c | 21 ++++++++-------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..40fd2c2 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_default(src, dst,
+		slen, (int)((size_t)dlen), ctx);
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..6f16f96 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_HC(src, dst, slen,
+		(int)((size_t)dlen), LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (out_len == 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
  2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
                     ` (2 preceding siblings ...)
  2017-01-21 15:09   ` [PATCH 3/4] crypto: Change LZ4 modules " Sven Schmidt
@ 2017-01-21 15:09   ` Sven Schmidt
  3 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates fs/pstore and fs/squashfs to use the updated
functions from the new LZ4 module.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 14 +++++++-------
 fs/squashfs/lz4_wrapper.c | 12 ++++++------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..85c4c58 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,31 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
-	if (ret) {
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+	if (!ret) {
 		pr_err("lz4_compress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-	if (ret) {
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
+	if (ret < 0) {
 		pr_err("lz4_decompress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..95da653 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t dest_len = output->length;
 
 	for (i = 0; i < b; i++) {
 		avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
-	if (res)
+	res = LZ4_decompress_safe(stream->input, stream->output,
+		length, output->length);
+
+	if (res < 0)
 		return -EIO;
 
-	bytes = dest_len;
+	bytes = res;
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	}
 	squashfs_finish_page(output);
 
-	return dest_len;
+	return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] lib: Update LZ4 compressor module
  2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
@ 2017-01-21 15:56     ` kbuild test robot
  2017-01-21 16:16     ` kbuild test robot
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 104+ messages in thread
From: kbuild test robot @ 2017-01-21 15:56 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: kbuild-all, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck, phillip, Sven Schmidt

[-- Attachment #1: Type: text/plain, Size: 2113 bytes --]

Hi Sven,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.10-rc4 next-20170120]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Sven-Schmidt/Update-LZ4-compressor-module/20170121-231418
config: i386-randconfig-r0-201703 (attached as .config)
compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

Note: the linux-review/Sven-Schmidt/Update-LZ4-compressor-module/20170121-231418 HEAD 0472409e2a1c442b51502961aa6d83b866218953 builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   fs/pstore/platform.c: In function 'allocate_lz4':
>> fs/pstore/platform.c:369:20: error: implicit declaration of function 'lz4_compressbound' [-Werror=implicit-function-declaration]
     big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
                       ^
   cc1: some warnings being treated as errors

vim +/lz4_compressbound +369 fs/pstore/platform.c

8cfc8ddc Geliang Tang 2016-02-18  363  
8cfc8ddc Geliang Tang 2016-02-18  364  	return outlen;
8cfc8ddc Geliang Tang 2016-02-18  365  }
8cfc8ddc Geliang Tang 2016-02-18  366  
8cfc8ddc Geliang Tang 2016-02-18  367  static void allocate_lz4(void)
8cfc8ddc Geliang Tang 2016-02-18  368  {
8cfc8ddc Geliang Tang 2016-02-18 @369  	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
8cfc8ddc Geliang Tang 2016-02-18  370  	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
8cfc8ddc Geliang Tang 2016-02-18  371  	if (big_oops_buf) {
8cfc8ddc Geliang Tang 2016-02-18  372  		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);

:::::: The code at line 369 was first introduced by commit
:::::: 8cfc8ddc99df9509a46043b14af81f5c6a223eab pstore: add lzo/lz4 compression support

:::::: TO: Geliang Tang <geliangtang@163.com>
:::::: CC: Kees Cook <keescook@chromium.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24765 bytes --]

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] lib: Update LZ4 compressor module
  2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
  2017-01-21 15:56     ` kbuild test robot
@ 2017-01-21 16:16     ` kbuild test robot
  2017-01-21 17:38     ` kbuild test robot
  2017-01-22 11:05     ` Greg KH
  3 siblings, 0 replies; 104+ messages in thread
From: kbuild test robot @ 2017-01-21 16:16 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: kbuild-all, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck, phillip, Sven Schmidt

[-- Attachment #1: Type: text/plain, Size: 5885 bytes --]

Hi Sven,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.10-rc4 next-20170120]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Sven-Schmidt/Update-LZ4-compressor-module/20170121-231418
config: x86_64-lkp (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: the linux-review/Sven-Schmidt/Update-LZ4-compressor-module/20170121-231418 HEAD 0472409e2a1c442b51502961aa6d83b866218953 builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   In file included from lib/decompress_unlz4.c:19:0:
   lib/decompress_unlz4.c: In function 'unlz4':
>> lib/decompress_unlz4.c:75:22: error: implicit declaration of function 'lz4_compressbound' [-Werror=implicit-function-declaration]
      inp = large_malloc(lz4_compressbound(uncomp_chunksize));
                         ^
   include/linux/decompress/mm.h:83:33: note: in definition of macro 'large_malloc'
    #define large_malloc(a) vmalloc(a)
                                    ^
   cc1: some warnings being treated as errors

vim +/lz4_compressbound +75 lib/decompress_unlz4.c

e76e1fdf Kyungsik Lee 2013-07-08  13  #include "lz4/lz4_decompress.c"
e76e1fdf Kyungsik Lee 2013-07-08  14  #else
e76e1fdf Kyungsik Lee 2013-07-08  15  #include <linux/decompress/unlz4.h>
e76e1fdf Kyungsik Lee 2013-07-08  16  #endif
e76e1fdf Kyungsik Lee 2013-07-08  17  #include <linux/types.h>
e76e1fdf Kyungsik Lee 2013-07-08  18  #include <linux/lz4.h>
e76e1fdf Kyungsik Lee 2013-07-08 @19  #include <linux/decompress/mm.h>
e76e1fdf Kyungsik Lee 2013-07-08  20  #include <linux/compiler.h>
e76e1fdf Kyungsik Lee 2013-07-08  21  
e76e1fdf Kyungsik Lee 2013-07-08  22  #include <asm/unaligned.h>
e76e1fdf Kyungsik Lee 2013-07-08  23  
e76e1fdf Kyungsik Lee 2013-07-08  24  /*
e76e1fdf Kyungsik Lee 2013-07-08  25   * Note: Uncompressed chunk size is used in the compressor side
e76e1fdf Kyungsik Lee 2013-07-08  26   * (userspace side for compression).
e76e1fdf Kyungsik Lee 2013-07-08  27   * It is hardcoded because there is not proper way to extract it
e76e1fdf Kyungsik Lee 2013-07-08  28   * from the binary stream which is generated by the preliminary
e76e1fdf Kyungsik Lee 2013-07-08  29   * version of LZ4 tool so far.
e76e1fdf Kyungsik Lee 2013-07-08  30   */
e76e1fdf Kyungsik Lee 2013-07-08  31  #define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
e76e1fdf Kyungsik Lee 2013-07-08  32  #define ARCHIVE_MAGICNUMBER 0x184C2102
e76e1fdf Kyungsik Lee 2013-07-08  33  
d97b07c5 Yinghai Lu   2014-08-08  34  STATIC inline int INIT unlz4(u8 *input, long in_len,
d97b07c5 Yinghai Lu   2014-08-08  35  				long (*fill)(void *, unsigned long),
d97b07c5 Yinghai Lu   2014-08-08  36  				long (*flush)(void *, unsigned long),
d97b07c5 Yinghai Lu   2014-08-08  37  				u8 *output, long *posp,
e76e1fdf Kyungsik Lee 2013-07-08  38  				void (*error) (char *x))
e76e1fdf Kyungsik Lee 2013-07-08  39  {
e76e1fdf Kyungsik Lee 2013-07-08  40  	int ret = -1;
e76e1fdf Kyungsik Lee 2013-07-08  41  	size_t chunksize = 0;
e76e1fdf Kyungsik Lee 2013-07-08  42  	size_t uncomp_chunksize = LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE;
e76e1fdf Kyungsik Lee 2013-07-08  43  	u8 *inp;
e76e1fdf Kyungsik Lee 2013-07-08  44  	u8 *inp_start;
e76e1fdf Kyungsik Lee 2013-07-08  45  	u8 *outp;
d97b07c5 Yinghai Lu   2014-08-08  46  	long size = in_len;
e76e1fdf Kyungsik Lee 2013-07-08  47  #ifdef PREBOOT
e76e1fdf Kyungsik Lee 2013-07-08  48  	size_t out_len = get_unaligned_le32(input + in_len);
e76e1fdf Kyungsik Lee 2013-07-08  49  #endif
e76e1fdf Kyungsik Lee 2013-07-08  50  	size_t dest_len;
e76e1fdf Kyungsik Lee 2013-07-08  51  
e76e1fdf Kyungsik Lee 2013-07-08  52  
e76e1fdf Kyungsik Lee 2013-07-08  53  	if (output) {
e76e1fdf Kyungsik Lee 2013-07-08  54  		outp = output;
e76e1fdf Kyungsik Lee 2013-07-08  55  	} else if (!flush) {
e76e1fdf Kyungsik Lee 2013-07-08  56  		error("NULL output pointer and no flush function provided");
e76e1fdf Kyungsik Lee 2013-07-08  57  		goto exit_0;
e76e1fdf Kyungsik Lee 2013-07-08  58  	} else {
e76e1fdf Kyungsik Lee 2013-07-08  59  		outp = large_malloc(uncomp_chunksize);
e76e1fdf Kyungsik Lee 2013-07-08  60  		if (!outp) {
e76e1fdf Kyungsik Lee 2013-07-08  61  			error("Could not allocate output buffer");
e76e1fdf Kyungsik Lee 2013-07-08  62  			goto exit_0;
e76e1fdf Kyungsik Lee 2013-07-08  63  		}
e76e1fdf Kyungsik Lee 2013-07-08  64  	}
e76e1fdf Kyungsik Lee 2013-07-08  65  
e76e1fdf Kyungsik Lee 2013-07-08  66  	if (input && fill) {
e76e1fdf Kyungsik Lee 2013-07-08  67  		error("Both input pointer and fill function provided,");
e76e1fdf Kyungsik Lee 2013-07-08  68  		goto exit_1;
e76e1fdf Kyungsik Lee 2013-07-08  69  	} else if (input) {
e76e1fdf Kyungsik Lee 2013-07-08  70  		inp = input;
e76e1fdf Kyungsik Lee 2013-07-08  71  	} else if (!fill) {
e76e1fdf Kyungsik Lee 2013-07-08  72  		error("NULL input pointer and missing fill function");
e76e1fdf Kyungsik Lee 2013-07-08  73  		goto exit_1;
e76e1fdf Kyungsik Lee 2013-07-08  74  	} else {
e76e1fdf Kyungsik Lee 2013-07-08 @75  		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
e76e1fdf Kyungsik Lee 2013-07-08  76  		if (!inp) {
e76e1fdf Kyungsik Lee 2013-07-08  77  			error("Could not allocate input buffer");
e76e1fdf Kyungsik Lee 2013-07-08  78  			goto exit_1;

:::::: The code at line 75 was first introduced by commit
:::::: e76e1fdfa8f8dc1ea6699923cf5d92b5bee9c936 lib: add support for LZ4-compressed kernel

:::::: TO: Kyungsik Lee <kyungsik.lee@lge.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24663 bytes --]

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] lib: Update LZ4 compressor module
  2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
  2017-01-21 15:56     ` kbuild test robot
  2017-01-21 16:16     ` kbuild test robot
@ 2017-01-21 17:38     ` kbuild test robot
  2017-01-22 11:05     ` Greg KH
  3 siblings, 0 replies; 104+ messages in thread
From: kbuild test robot @ 2017-01-21 17:38 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: kbuild-all, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck, phillip, Sven Schmidt

Hi Sven,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.10-rc4 next-20170120]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Sven-Schmidt/Update-LZ4-compressor-module/20170121-231418
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   include/linux/compiler.h:253:8: sparse: attribute 'no_sanitize_address': unknown attribute
>> lib/lz4/lz4_decompress.c:521:21: sparse: incompatible types for operation (>)
   lib/lz4/lz4_decompress.c:521:21:    left side has type unsigned long [usertype] *src_len
   lib/lz4/lz4_decompress.c:521:21:    right side has type int

vim +521 lib/lz4/lz4_decompress.c

   505				return -1;
   506	}
   507	EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
   508	
   509	int lz4_decompress(const unsigned char *src, size_t *src_len,
   510		unsigned char *dest, size_t actual_dest_len) {
   511		*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
   512	
   513		/*
   514		 * Prior lz4_decompress will return
   515		 * 0 for success and a negative result for error
   516		 * new LZ4_decompress_fast returns
   517		 * - the length of data read on success
   518		 * - and also a negative result on error
   519		 * meaning when result > 0, we just return 0 here
   520		 */
 > 521		if (src_len > 0)
   522			return 0;
   523		else
   524			return -1;
   525	}
   526	EXPORT_SYMBOL(lz4_decompress);
   527	
   528	MODULE_LICENSE("Dual BSD/GPL");
   529	MODULE_DESCRIPTION("LZ4 decompressor");

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH 1/4] lib: Update LZ4 compressor module
  2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
                       ` (2 preceding siblings ...)
  2017-01-21 17:38     ` kbuild test robot
@ 2017-01-22 11:05     ` Greg KH
  3 siblings, 0 replies; 104+ messages in thread
From: Greg KH @ 2017-01-22 11:05 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, linux-kernel,
	herbert, davem, linux-crypto, anton, ccross, keescook, tony.luck,
	phillip

On Sat, Jan 21, 2017 at 04:09:08PM +0100, Sven Schmidt wrote:
> This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
> The kernel module is inspired by the previous work by Chanho Min.
> The updated LZ4 module will not break existing code since there were alias
> methods added to ensure backwards compatibility.
> 
> API changes:
> 
> New method LZ4_compress_fast which differs from the variant available
> in kernel by the new acceleration parameter,
> allowing to trade compression ratio for more compression speed
> and vice versa.
> 
> LZ4_decompress_fast is the respective decompression method, featuring a very
> fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core
> systems. The decompressor allows to decompress data compressed with
> LZ4 fast as well as the LZ4 HC (high compression) algorithm.
> 
> Also the useful functions LZ4_decompress_safe_partial
> LZ4_compress_destsize were added. The latter reverses the logic by trying to
> compress as much data as possible from source to dest while the former aims
> to decompress partial blocks of data.
> 
> A bunch of streaming functions were also added
> which allow compressig/decompressing data in multiple steps
> (so called "streaming mode").
> 
> The methods lz4_compress and lz4_decompress_unknownoutputsize
> are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
> The old methods are still available for providing backwards compatibility.
> 
> Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>

Please fix up so that it doesn't break the build as reported by the
kbuild tool...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v4 0/4] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (5 preceding siblings ...)
  2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
@ 2017-01-22 19:35 ` Sven Schmidt
  2017-01-22 19:35   ` [PATCH v4 1/4] lib: " Sven Schmidt
                     ` (3 more replies)
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
                   ` (3 subsequent siblings)
  10 siblings, 4 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-22 19:35 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version

[PATCH 1/4] lib: Update LZ4 compressor module
[PATCH 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version
[PATCH 3/4] crypto: Change LZ4 modules to work with new LZ4 module version
[PATCH 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

v3:
- Adjusted the code to satisfy kernel coding style (checkpatch.pl)
- Made sure the changes to LZ4 in Kernel (overflow checks etc.)
  are included in the new module (they are)
- Removed the second LZ4_compressBound function with related name but
  different return type
- Corrected version number (was LZ4 1.7.3)
- Added missing LZ4 streaming functions

v4:
- Fixed kbuild errors
- Re-added lz4_compressbound as alias for LZ4_compressBound
  to ensure backwards compatibility
- Wrapped LZ4_hash5 with check for LZ4_ARCH64 since it is only used there
  and triggers an unused function warning when false

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v4 1/4] lib: Update LZ4 compressor module
  2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
@ 2017-01-22 19:35   ` Sven Schmidt
  2017-01-24  0:23     ` Andrew Morton
  2017-01-22 19:35   ` [PATCH v4 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-01-22 19:35 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since there were alias
methods added to ensure backwards compatibility.

API changes:

New method LZ4_compress_fast which differs from the variant available
in kernel by the new acceleration parameter,
allowing to trade compression ratio for more compression speed
and vice versa.

LZ4_decompress_fast is the respective decompression method, featuring a very
fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core
systems. The decompressor allows to decompress data compressed with
LZ4 fast as well as the LZ4 HC (high compression) algorithm.

Also the useful functions LZ4_decompress_safe_partial
LZ4_compress_destsize were added. The latter reverses the logic by trying to
compress as much data as possible from source to dest while the former aims
to decompress partial blocks of data.

A bunch of streaming functions were also added
which allow compressig/decompressing data in multiple steps
(so called "streaming mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize
are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
The old methods are still available for providing backwards compatibility.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  589 ++++++++++++++++++++++---
 lib/lz4/lz4_compress.c   | 1103 ++++++++++++++++++++++++++++++++--------------
 lib/lz4/lz4_decompress.c |  694 ++++++++++++++++++-----------
 lib/lz4/lz4defs.h        |  320 ++++++++------
 lib/lz4/lz4hc_compress.c |  855 ++++++++++++++++++++++-------------
 5 files changed, 2486 insertions(+), 1075 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..2072255 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,554 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *		* Redistributions of source code must retain the above copyright
+ *		  notice, this list of conditions and the following disclaimer.
+ *		* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 10
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL				3
+#define LZ4HC_DEFAULT_CLEVEL		9
+#define LZ4HC_MAX_CLEVEL				16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ */
+typedef struct {
+		uint32_t hashTable[LZ4_HASH_SIZE_U32];
+		uint32_t currentOffset;
+		uint32_t initCheck;
+		const uint8_t *dictionary;
+		uint8_t *bufferStart;
+		uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMSIZE_U64];
+		LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t :
+ * information structure to track an LZ4HC stream.
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
+typedef struct {
+		unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+		unsigned short	 chainTable[LZ4HC_MAXD];
+		/* next block to continue on current prefix */
+		const unsigned char *end;
+		/* All index relative to this position */
+		const unsigned char *base;
+		/* alternate base for extDict */
+		const unsigned char *dictBase;
+		/* below that point, need extDict */
+		unsigned int	 dictLimit;
+		/* below that point, no more dict */
+		unsigned int	 lowLimit;
+		/* index from which to continue dict update */
+		unsigned int	 nextToUpdate;
+		unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+		size_t table[LZ4_STREAMHCSIZE_SIZET];
+		LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
 
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure using LZ4_setStreamDecode
+ * (or memset()) before first use
  */
-static inline size_t lz4_compressbound(size_t isize)
+typedef struct {
+		const uint8_t *externalDict;
+		size_t extDictSize;
+		const uint8_t *prefixEnd;
+		size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+		LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_compressBound() :
+ *	Provides the maximum size that LZ4 may output in a "worst case" scenario
+ *	(input data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
+{
+	return LZ4_COMPRESSBOUND(isize);
+}
+
+/*
+ * For backward compatibility
+ */
+static inline int lz4_compressbound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return LZ4_COMPRESSBOUND(isize);
 }
 
 /*
+ * LZ4_compress_default()
+ *	Compresses 'sourceSize' bytes from buffer 'source'
+ *	into already allocated 'dest' buffer of size 'maxOutputSize'.
+ *	Compression is guaranteed to succeed if
+ *	'maxOutputSize' >= LZ4_compressBound(inputSize).
+ *	It also runs faster, so it's a recommended setting.
+ *	If the function cannot compress 'source'
+ *	into a more limited 'dest' budget,
+ *	compression stops *immediately*,
+ *	and the function result is zero.
+ *	As a consequence, 'dest' content is not valid.
+ *
+ *	source       : source address of the original data
+ *	dest         : output buffer address
+ *                 of the compressed data
+ *	inputSize    : Max supported value is
+ *                 LZ4_MAX_INPUT_SIZE
+ *	maxOutputSize: full or partial size of buffer 'dest'
+ *                 (which must be already allocated)
+ *	workmem      : address of the working memory.
+ *                 This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return       : the number of bytes written into buffer 'dest'
+ *   (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/*
+ * LZ4_compress_fast() :
+ *	Same as LZ4_compress_default(),
+ *	but allows to select an "acceleration" factor.
+ *	The larger the acceleration value,
+ *	the faster the algorithm,
+ *	but also the lesser the compression.
+ *	It's a trade-off. It can be fine tuned,
+ *	with each successive value
+ *	providing roughly +~3% to speed.
+ *	An acceleration value of "1"
+ *	is the same as regular LZ4_compress_default()
+ *	Values <= 0 will be replaced by
+ *	LZ4_ACCELERATION_DEFAULT, which is 1.
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration);
+
+/*
+ * LZ4_compress_destSize()
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * ossible from 'source'.
+ *
+ *	source	      : source address of the original data
+ *	dest	        : output buffer address of the compressed data
+ *	inputSize	    : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	*sourceSizePtr: will be modified to indicate how many bytes where read
+ *                  from 'source' to fill 'dest'.
+ *                  New value is necessarily <= old value.
+ *	workmem       : address of the working memory.
+ *                  This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+			 or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
+
+/*
  * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return	: Success if return 0
+ *            Error if return (< 0)
+ *	note :	Destination buffer and workmem must be already allocated with
  *		the defined size.
  */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
-/*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_decompress_fast()
+ *	source      : source address of the compressed data
+ *	dst         : output buffer address of the decompressed data
+ *	originalSize: is the original and therefore uncompressed size
+ *	return : the number of bytes read from the source buffer
+ *           (in other words, the compressed size)
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           Destination buffer must be already allocated.
+ *           Its size must be a minimum of 'originalSize' bytes.
+ *	note   : This function fully respect memory boundaries
+ *           for properly formed compressed data.
+ *           It is a bit faster than LZ4_decompress_safe().
+ *           However, it does not provide any protection against intentionally
+ *           modified data stream (malicious input).
+ *           Use this function in trusted environment only
+ *           (data to decode comes from a trusted source).
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_safe()
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return : the number of bytes decompressed into destination buffer
+ *           (necessarily <= maxDecompressedSize)
+ *           If destination buffer is not large enough, decoding will stop
+ *           and output an error code (<0).
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           This function is protected against buffer overflow exploits,
+ *           including malicious data packets. It never writes outside
+ *           output buffer, nor reads outside input buffer.
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
 
 /*
- * lz4_decompress_unknownoutputsize()
+ * LZ4_decompress_safe_partial() :
+ * This function decompress a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ *
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	targetOutputSize   : the decompression operation will try
+ *                       to stop as soon as 'targetOutputSize'
+ *                       has been reached
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return  : the number of bytes decoded in the destination buffer
+ *            (necessarily <= maxDecompressedSize)
+ *	Note    : this number can be < 'targetOutputSize' should the compressed
+ *            block to decode be smaller. Always control how many bytes
+ *            were decoded. If the source stream is detected malformed,
+ *            the function will stop decoding and return a negative result.
+ *            This function never writes outside of output buffer,
+ *            and never reads outside of input buffer.
+ *            It is therefore protected against malicious data packets
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/*
+ * lz4_decompress_unknownoutputsize() :
  *	src     : source address of the compressed data
  *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
+ *	dest    : output buffer address of the decompressed data
  *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ *            returned with actual size of decompressed data after
+ *            decompress done
+ * return: Success if return 0
+ *         Error if return (< 0)
+ * note:   Destination buffer must be already allocated.
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/*
+ * lz4_decompress() :
+ *	src            : source address of the compressed data
+ *	src_len        : is the input size,
+ *                   which is returned after decompress done
+ *	dest           : output buffer address of the decompressed data
+ *	actual_dest_len: is the size of uncompressed data, supposing it's known
+ *	return: Success if return 0
+ *          Error if return (< 0)
+ *	note  : Destination buffer must be already allocated.
+ *          slightly faster than lz4_decompress_unknownoutputsize()
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the more powerful
+ * but slower "HC" algorithm. dst` must be already allocated.
+ * Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE
+ *
+ *	src             : source address of the original data
+ *	dst             : output buffer address of the compressed data
+ *	srcSize         : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	dstCapacity     : full or partial size of buffer 'dst'
+ *                    (which must be already allocated)
+ *	compressionLevel: Recommended values are between 4 and 9, although any
+ *                    value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *                    Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *	wrkmem          : address of the working memory.
+ *                    This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *	return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/*
+ * lz4hc_compress()
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ * return	: Success if return 0
+ *          Error if return (< 0)
+ * note   : Destination buffer and workmem must be already allocated with
+ *          the defined size.
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*
+ *	These functions compress data in successive blocks of any size,
+ *	using previous blocks as dictionary. One key assumption is that previous
+ *	blocks (up to 64 KB) remain read-accessible while
+ *	compressing next blocks.
+ *	There is an exception for ring buffers, which can be smaller than 64 KB.
+ *	Ring buffers scenario is automatically detected and handled by
+ *	LZ4_compress_HC_continue().
+ *	Before starting compression, state must be properly initialized,
+ *	using LZ4_resetStreamHC().
+ *	A first "fictional block" can then be designated as initial dictionary,
+ *	using LZ4_loadDictHC() (Optional).
+ *	Then, use LZ4_compress_HC_continue() to compress each successive block.
+ *	Previous memory blocks (including initial dictionary when present) must
+ *	remain accessible and unmodified during compression.
+ *	'dst' buffer should be sized to handle worst case scenarios, using
+ *	LZ4_compressBound(), to ensure operation success.
+ *	If, for any reason, previous data blocks can't be preserved unmodified
+ *	in memory during next compression block,
+ *	you must save it to a safer memory space, using LZ4_saveDictHC().
+ *	Return value of LZ4_saveDictHC() is the size of dictionary
+ *	effectively saved into 'safeBuffer'.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+/*
+ * LZ4_resetStream() :
+ *	An LZ4_stream_t structure can be allocated once
+ *	and re-used multiple times.
+ *	Use this function to init an allocated `LZ4_stream_t` structure
+ *	and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/*
+ * LZ4_loadDict() :
+ *	Use this function to load a static dictionary into LZ4_stream.
+ *	Any previous data will be forgotten, only 'dictionary'
+ *	will remain in memory.
+ *	Loading a size of 0 is allowed.
+ *	Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/*
+ * LZ4_compress_fast_continue() :
+ *	Compress buffer content 'src',
+ *	using data from previously compressed blocks
+ *	as dictionary to improve compression ratio.
+ *	Important : Previous data blocks are assumed to still
+ *	be present and unmodified !
+ *	'dst' buffer must be already allocated.
+ *	If maxDstSize >= LZ4_compressBound(srcSize),
+ *	compression is guaranteed to succeed, and runs faster.
+ *	If not, and if compressed data cannot fit into 'dst' buffer size,
+ *	compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict() :
+ *	If previously compressed data block is not guaranteed
+ *	to remain available at its memory location,
+ *	save it into a safer place (char *safeBuffer).
+ *	Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call
+ *         LZ4_compress_fast_continue().
+ *	Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *           or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/*
+ * LZ4_setStreamDecode() :
+ *	Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *	@return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/*
+ * LZ4_decompress_*_continue() :
+ *	These decoding functions allow decompression of multiple blocks
+ *	in "streaming" mode.
+ *	Previously decoded blocks *must* remain available at the memory position
+ *	where they were decoded (up to 64 KB)
+ *	In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_*_usingDict() :
+ *	These decoding functions work the same as
+ *	a combination of LZ4_setStreamDecode() followed by
+ *	LZ4_decompress_*_continue()
+ *	They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..e595896 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,417 +22,875 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+/*-******************************
+ *	Compression functions
+ ********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
+	if (tableType == byU16)
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+	else
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - LZ4_HASHLOG));
+}
+
 #if LZ4_ARCH64
-	const BYTE * const base = ip;
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+	const U32 hashLog = (tableType == byU16)
+		? LZ4_HASHLOG + 1
+		: LZ4_HASHLOG;
+
+#ifdef __LITTLE_ENDIAN__
+	static const U64 prime5bytes = 889523592379ULL;
+
+	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-	const int base = 0;
+	static const U64 prime8bytes = 11400714785074694791ULL;
+
+	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+}
+#endif
+
+static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+{
+#if LZ4_ARCH64
+	if (tableType == byU32)
+		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+	return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
+	tableType_t const tableType, const BYTE *srcBase)
+{
+	switch (tableType) {
+	case byPtr:
+	{
+		const BYTE **hashTable = (const BYTE **)tableBase;
+
+		hashTable[h] = p;
+		return;
+	}
+	case byU32:
+	{
+		U32 *hashTable = (U32 *) tableBase;
+
+		hashTable[h] = (U32)(p - srcBase);
+		return;
+	}
+	case byU16:
+	{
+		U16 *hashTable = (U16 *) tableBase;
+
+		hashTable[h] = (U16)(p - srcBase);
+		return;
+	}
+	}
+}
+
+static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	if (tableType == byPtr) {
+		const BYTE **hashTable = (const BYTE **) tableBase;
+
+		return hashTable[h];
+	}
+
+	if (tableType == byU32) {
+		const U32 * const hashTable = (U32 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+
+	{
+		/* default, to ensure a return */
+		const U16 * const hashTable = (U16 *) tableBase;
+		return hashTable[h] + srcBase;
+	}
+}
+
+static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static inline int LZ4_compress_generic(
+	LZ4_stream_t_internal * const dictPtr,
+	const char * const source,
+	char * const dest,
+	const int inputSize,
+	const int maxOutputSize,
+	const limitedOutput_directive outputLimited,
+	const tableType_t tableType,
+	const dict_directive dict,
+	const dictIssue_directive dictIssue,
+	const U32 acceleration)
+{
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *base;
+	const BYTE *lowLimit;
+	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+	const BYTE * const dictionary = dictPtr->dictionary;
+	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+	const size_t dictDelta = dictEnd - (const BYTE *)source;
+	const BYTE *anchor = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const olimit = op + maxOutputSize;
+
+	U32 forwardH;
+	size_t refDelta = 0;
+
+	/* Init conditions */
+	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+		/* Unsupported inputSize, too large (or negative) */
+		return 0;
+	}
+	switch (dict) {
+	case noDict:
+	default:
+		base = (const BYTE *)source;
+		lowLimit = (const BYTE *)source;
+		break;
+	case withPrefix64k:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source - dictPtr->dictSize;
+		break;
+	case usingExtDict:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source;
+		break;
+	}
+
+	if ((tableType == byU16)
+		&& (inputSize >= LZ4_64Klimit)) {
+		/* Size too large (not within 64K limit) */
+		return 0;
+	}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+	if (inputSize < LZ4_minLength) {
+		/* Input too small, no compression (all literals) */
+		goto _last_literals;
+	}
 
 	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
+	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+		{ const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = acceleration
+				<< LZ4_skipTrigger;
+
+			do {
+				U32 const h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h,
+					dictPtr->hashTable,
+					tableType, base);
+				if (dict == usingExtDict) {
+					if (match < (const BYTE *)source) {
+						refDelta = dictDelta;
+						lowLimit = dictionary;
+					} else {
+						refDelta = 0;
+						lowLimit = (const BYTE *)source;
+				}	 }
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+					tableType, base);
+
+			} while (((dictIssue == dictSmall)
+					? (match < lowRefLimit)
+					: 0)
+				|| ((tableType == byU16)
+					? 0
+					: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match + refDelta)
+					!= LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
+		while (((ip > anchor) & (match + refDelta > lowLimit))
+			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
-			ref--;
-		}
+			match--;
+			}
 
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
+		/* Encode Literals */
+		{ unsigned const int litLength = (unsigned int)(ip - anchor);
 
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+			token = op++;
+			if ((outputLimited) &&
+				/* Check output buffer overflow */
+				(unlikely(op + litLength +
+					(2 + 1 + LASTLITERALS) +
+					(litLength/255) > olimit)))
+				return 0;
+			if (litLength >= RUN_MASK) {
+				int len = (int)litLength - RUN_MASK;
+
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
+		{	 unsigned int matchCode;
+
+			if ((dict == usingExtDict)
+				&& (lowLimit == dictionary)) {
+				const BYTE *limit;
+
+				match += refDelta;
+				limit = ip + (dictEnd - match);
+				if (limit > matchlimit)
+					limit = matchlimit;
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, limit);
+				ip += MINMATCH + matchCode;
+				if (ip == limit) {
+					unsigned const int more = LZ4_count(ip,
+						(const BYTE *)source,
+						matchlimit);
+
+					matchCode += more;
+					ip += more;
+				}
+			} else {
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, matchlimit);
+				ip += MINMATCH + matchCode;
 			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
+
+			if (outputLimited &&
+				/* Check output buffer overflow */
+				(unlikely(op +
+					(1 + LASTLITERALS) +
+					(matchCode>>8) > olimit)))
+				return 0;
+			if (matchCode >= ML_MASK) {
+				*token += ML_MASK;
+				matchCode -= ML_MASK;
+				LZ4_write32(op, 0xFFFFFFFF);
+				while (matchCode >= 4*255) {
+					op += 4;
+					LZ4_write32(op, 0xFFFFFFFF);
+					matchCode -= 4*255;
+				}
+				op += matchCode / 255;
+				*op++ = (BYTE)(matchCode % 255);
+			} else
+				*token += (BYTE)(matchCode);
+		}
+
+		anchor = ip;
 
 		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		if (ip > mflimit)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+		match = LZ4_getPosition(ip, dictPtr->hashTable,
+			tableType, base);
+		if (dict == usingExtDict) {
+			if (match < (const BYTE *)source) {
+				refDelta = dictDelta;
+				lowLimit = dictionary;
+			} else {
+				refDelta = 0;
+				lowLimit = (const BYTE *)source;
+			}
+		}
+		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+			&& (match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
-
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t const lastRun = (size_t)(iend - anchor);
+		if ((outputLimited) &&
+			/* Check output buffer overflow */
+			((op - (BYTE *)dest) + lastRun + 1 +
+			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			return 0;
+		if (lastRun >= RUN_MASK) {
+			size_t accumulator = lastRun - RUN_MASK;
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRun);
+		op += lastRun;
+	}
 
 	/* End */
-	return (int)(((char *)op) - dest);
+	return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
+	int inputSize, int maxOutputSize, int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+
+	LZ4_resetStream((LZ4_stream_t *)state);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, tableType, noDict,
+				noDictIssue, acceleration);
+	} else {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, tableType, noDict,
+				noDictIssue, acceleration);
+	}
+}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration)
+{
+	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+		maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem)
+{
+	return LZ4_compress_fast(source, dest, inputSize,
+		maxOutputSize, wrkmem, 1);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *	*_destSize() variant
+ ********************************/
+
+static int LZ4_compress_destSize_generic(
+	LZ4_stream_t_internal * const ctx,
+	const char * const src,
+	char * const dst,
+	int * const srcSizePtr,
+	const int targetDstSize,
+	const tableType_t tableType)
+{
+	const BYTE *ip = (const BYTE *) src;
+	const BYTE *base = (const BYTE *) src;
+	const BYTE *lowLimit = (const BYTE *) src;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + *srcSizePtr;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dst;
+	BYTE * const oend = op + targetDstSize;
+	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+	BYTE * const oMaxMatch = op + targetDstSize
+		- (LASTLITERALS + 1 /* token */);
+	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+	U32 forwardH;
+
+	/* Init conditions */
+	/* Impossible to store anything */
+	if (targetDstSize < 1)
+		return 0;
+	/* Unsupported input size, too large (or negative) */
+	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+		return 0;
+	/* Size too large (not within 64K limit) */
+	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+		return 0;
+	/* Input too small, no compression (all literals) */
+	if (*srcSizePtr < LZ4_minLength)
+		goto _last_literals;
 
 	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
+	*srcSizePtr = 0;
+	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
+		{	 const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+
+			do {
+				U32 h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h, ctx->hashTable,
+					tableType, base);
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h,
+					ctx->hashTable, tableType,
+					base);
+
+			} while (((tableType == byU16)
+				? 0
+				: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match) != LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
+		while ((ip > anchor)
+			&& (match > lowLimit)
+			&& (unlikely(ip[-1] == match[-1]))) {
+			ip--; match--;
+			}
 
 		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+		{	 unsigned int litLength = (unsigned int)(ip - anchor);
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+			token = op++;
+			if (op + ((litLength + 240)/255)
+				+ litLength > oMaxLit) {
+				/* Not enough space for a last match */
+				op--;
+				goto _last_literals;
+			}
+			if (litLength >= RUN_MASK) {
+				unsigned int len = litLength - RUN_MASK;
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
+		/* Encode MatchLength */
+		{	 size_t matchLength = LZ4_count(ip + MINMATCH,
+			match + MINMATCH, matchlimit);
 
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
+			if (op + ((matchLength + 240)/255) > oMaxMatch) {
+				/* Match description too long : reduce it */
+				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
 			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
+			ip += MINMATCH + matchLength;
+
+			if (matchLength >= ML_MASK) {
+				*token += ML_MASK;
+				matchLength -= ML_MASK;
+				while (matchLength >= 255) {
+					matchLength -= 255; *op++ = 255;
+				}
+				*op++ = (BYTE)matchLength;
+			} else
+				*token += (BYTE)(matchLength);
 		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
+		anchor = ip;
 
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		/* Test end of block */
+		if (ip > mflimit)
+			break;
+		if (op > oMaxSeq)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
+		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+		if ((match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match) == LZ4_read32(ip))) {
+			token = op++; *token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t lastRunSize = (size_t)(iend - anchor);
+
+		if (op + 1 /* token */
+			+ ((lastRunSize + 240)/255) /* litLength */
+			+ lastRunSize /* literals */ > oend) {
+			/* adapt lastRunSize to fill 'dst' */
+			lastRunSize	= (oend - op) - 1;
+			lastRunSize -= (lastRunSize + 240)/255;
+		}
+		ip = anchor + lastRunSize;
+
+		if (lastRunSize >= RUN_MASK) {
+			size_t accumulator = lastRunSize - RUN_MASK;
+
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRunSize<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRunSize);
+		op += lastRunSize;
+	}
+
 	/* End */
-	return (int)(((char *)op) - dest);
+	*srcSizePtr = (int) (((const char *)ip) - src);
+	return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
+	char *dst, int *srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_resetStream(state);
+
+	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+		/* compression success is guaranteed */
+		return LZ4_compress_fast_extState(
+			state, src, dst, *srcSizePtr,
+			targetDstSize, 1);
+	} else {
+		if (*srcSizePtr < LZ4_64Klimit)
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, byU16);
+		else
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, tableType);
+	}
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
+	int targetDstSize, void *wrkmem)
+{
+	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+		targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *	Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+	const char *dictionary, int dictSize)
+{
+	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+	const BYTE *p = (const BYTE *)dictionary;
+	const BYTE * const dictEnd = p + dictSize;
+	const BYTE *base;
+
+	if ((dict->initCheck)
+		|| (dict->currentOffset > 1 * GB)) {
+		/* Uninitialized structure, or reuse overflow */
+		LZ4_resetStream(LZ4_dict);
+	}
+
+	if (dictSize < (int)HASH_UNIT) {
+		dict->dictionary = NULL;
+		dict->dictSize = 0;
+		return 0;
+	}
+
+	if ((dictEnd - p) > 64 * KB)
+		p = dictEnd - 64 * KB;
+	dict->currentOffset += 64 * KB;
+	base = p - dict->currentOffset;
+	dict->dictionary = p;
+	dict->dictSize = (U32)(dictEnd - p);
+	dict->currentOffset += dict->dictSize;
+
+	while (p <= dictEnd - HASH_UNIT) {
+		LZ4_putPosition(p, dict->hashTable, byU32, base);
+		p += 3;
+	}
 
-	*dst_len = out_len;
+	return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+	const BYTE *src)
+{
+	if ((LZ4_dict->currentOffset > 0x80000000) ||
+		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+		/* address space overflow */
+		/* rescale hash table */
+		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+		int i;
+
+		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+			if (LZ4_dict->hashTable[i] < delta)
+				LZ4_dict->hashTable[i] = 0;
+			else
+				LZ4_dict->hashTable[i] -= delta;
+		}
+		LZ4_dict->currentOffset = 64 * KB;
+		if (LZ4_dict->dictSize > 64 * KB)
+			LZ4_dict->dictSize = 64 * KB;
+		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+	}
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+	if ((U32)dictSize > 64 * KB) {
+		/* useless to define a dictionary > 64 * KB */
+		dictSize = 64 * KB;
+	}
+	if ((U32)dictSize > dict->dictSize)
+		dictSize = dict->dictSize;
 
-	return 0;
-exit:
-	return ret;
+	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+	dict->dictionary = (const BYTE *)safeBuffer;
+	dict->dictSize = (U32)dictSize;
+
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+	char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+	const BYTE * const dictEnd = streamPtr->dictionary
+		+ streamPtr->dictSize;
+
+	const BYTE *smallest = (const BYTE *) source;
+
+	if (streamPtr->initCheck) {
+		/* Uninitialized structure detected */
+		return 0;
+	}
+
+	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+		smallest = dictEnd;
+
+	LZ4_renormDictT(streamPtr, smallest);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	/* Check overlapping input/dictionary space */
+	{	 const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+		if ((sourceEnd > streamPtr->dictionary)
+			&& (sourceEnd < dictEnd)) {
+			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+			if (streamPtr->dictSize > 64 * KB)
+				streamPtr->dictSize = 64 * KB;
+			if (streamPtr->dictSize < 4)
+				streamPtr->dictSize = 0;
+			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+		}
+	}
+
+	/* prefix mode : source data follows dictionary */
+	if (dictEnd == (const BYTE *)source) {
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, dictSmall,	acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, noDictIssue, acceleration);
+		}
+		streamPtr->dictSize += (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+
+	/* external dictionary mode */
+	{ int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, noDictIssue, acceleration);
+		}
+		streamPtr->dictionary = (const BYTE *)source;
+		streamPtr->dictSize = (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+}
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem) {
+	*dst_len = LZ4_compress_default(src, dst, (int)src_len,
+		(int)((size_t)dst_len), wrkmem);
+
+	/*
+	 * Prior lz4_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_fast/default
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4_compress);
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..ca71375 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *    * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *    * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,508 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *	Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+	 const char *const source,
+	 char * const dest,
+	 int inputSize,
+		/*
+		 * If endOnInput == endOnInputSize,
+		 * this value is the max size of Output Buffer.
+		 */
+	 int outputSize,
+	 /* endOnOutputSize, endOnInputSize */
+	 int endOnInput,
+	 /* full, partial */
+	 int partialDecoding,
+	 /* only used if partialDecoding == partial */
+	 int targetOutputSize,
+	 /* noDict, withPrefix64k, usingExtDict */
+	 int dict,
+	 /* == dest when no prefix */
+	 const BYTE * const lowPrefix,
+	 /* only if dict == usingExtDict */
+	 const BYTE * const dictStart,
+	 /* note : = 0 if noDict */
+	 const size_t dictSize
+	 )
 {
+	/* Local Variables */
 	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
+	const BYTE * const iend = ip + inputSize;
+
 	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
+	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
-	unsigned token;
-	size_t length;
+	BYTE *oexit = op + targetOutputSize;
+	const BYTE * const lowLimit = lowPrefix - dictSize;
+
+	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+	const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+	const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+	const int safeDecode = (endOnInput == endOnInputSize);
+	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 
+	/* Special cases */
+	/* targetOutputSize too high => decode everything */
+	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+		oexit = oend - MFLIMIT;
+
+	/* Empty output buffer */
+	if ((endOnInput) && (unlikely(outputSize == 0)))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	if ((!endOnInput) && (unlikely(outputSize == 0)))
+		return (*ip == 0 ? 1 : -1);
+
+	/* Main Loop : decode sequences */
 	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
 		if (length == RUN_MASK) {
-			size_t len;
+			unsigned int s;
 
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
+			do {
+				s = *ip++;
+				length += s;
+			} while (likely(endOnInput
+				? ip < iend - RUN_MASK
+				: 1) & (s == 255));
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
 				goto _output_error;
-			length += len;
+			}
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
 		}
 
 		/* copy literals */
 		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
+		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
+			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+			if (partialDecoding) {
+				if (cpy > oend) {
+					/*
+					 * Error :
+					 * write attempt beyond end of output buffer
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& (ip + length > iend)) {
+					/*
+					 * Error :
+					 * read attempt beyond
+					 * end of input buffer
+					 */
+					goto _output_error;
+				}
+			} else {
+				if ((!endOnInput)
+					&& (cpy != oend)) {
+					/*
+					 * Error :
+					 * block decoding must
+					 * stop exactly there
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& ((ip + length != iend)
+					|| (cpy > oend))) {
+					/*
+					 * Error :
+					 * input must be consumed
+					 */
+					goto _output_error;
+				}
+			}
 			memcpy(op, ip, length);
 			ip += length;
-			break; /* EOF */
+			op += length;
+			/* Necessarily EOF, due to parsing restrictions */
+			break;
 		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+		LZ4_wildCopy(op, ip, cpy);
+		ip += length; op = cpy;
 
 		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
+		offset = LZ4_readLE16(ip); ip += 2;
+		match = op - offset;
+		if ((checkOffset) && (unlikely(match < lowLimit))) {
+			/* Error : offset outside buffers */
 			goto _output_error;
+		}
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
 
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
+			unsigned int s;
+
+			do {
+			s = *ip++;
+			if ((endOnInput) && (ip > iend - LASTLITERALS))
+				goto _output_error;
+			length += s;
+			} while (s == 255);
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
 				goto _output_error;
-			length += *ip++;
+			}
 		}
+		length += MINMATCH;
+
+		/* check external dictionary */
+		if ((dict == usingExtDict) && (match < lowPrefix)) {
+			if (unlikely(op + length > oend - LASTLITERALS)) {
+				/* doesn't respect parsing restriction */
+				goto _output_error;
+			}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
+			if (length <= (size_t)(lowPrefix - match)) {
+			/*
+			 * match can be copied as a single segment
+			 * from external dictionary
+			 */
+			memmove(op, dictEnd - (lowPrefix - match), length);
+			op += length;
+			} else {
+				/*
+				 * match encompass external
+				 * dictionary and current block
+				 */
+				size_t const copySize = (size_t)(lowPrefix - match);
+				size_t const restSize = length - copySize;
+
+				memcpy(op, dictEnd - copySize, copySize);
+				op += copySize;
+
+				if (restSize > (size_t)(op - lowPrefix)) {
+					/* overlap copy */
+					BYTE * const endOfMatch = op + restSize;
+					const BYTE *copyFrom = lowPrefix;
+
+					while (op < endOfMatch)
+						*op++ = *copyFrom++;
+				} else {
+					memcpy(op, lowPrefix, restSize);
+					op += restSize;
+				}
+			}
+			continue;
+		}
+
+		/* copy match within block */
+		cpy = op + length;
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
 		} else {
-			LZ4_COPYSTEP(ref, op);
+			LZ4_copy8(op, match); match += 8;
 		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
 
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (cpy > oend - LASTLITERALS) {
+				/*
+				 * Error : last LASTLITERALS bytes
+				 * must be literals (uncompressed)
+				 */
 				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+			}
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
 			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
+
 	/* end of decoding */
-	return (int) (((char *)ip) - source);
+	if (endOnInput) {
+		/* Nb of output bytes decoded */
+		return (int) (((char *)op) - dest);
+	} else {
+		/* Nb of input bytes read */
+		return (int) (((const char *)ip) - source);
+	}
 
-	/* write overflow error detected */
+	/* Overflow error detected */
 _output_error:
 	return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, full, 0,
+		noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe);
 
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, partial,
+		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
 
-	/* Main Loop */
-	while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+	return LZ4_decompress_generic(source, dest, 0, originalSize,
+		endOnOutputSize, full, 0, withPrefix64k,
+		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast);
 
-		unsigned token;
-		size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+	lz4sd->prefixSize = (size_t) dictSize;
+	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+	lz4sd->externalDict = NULL;
+	lz4sd->extDictSize	= 0;
+	return 1;
+}
+EXPORT_SYMBOL(LZ4_setStreamDecode);
 
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize,
+			maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict,
+			lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += result;
+		lz4sd->prefixEnd	+= result;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = result;
+		lz4sd->prefixEnd	= (BYTE *)dest + result;
+	}
 
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
+	return result;
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict,
+			lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict, lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += originalSize;
+		lz4sd->prefixEnd	+= originalSize;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = originalSize;
+		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
 	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
 
-	/* write overflow error detected */
-_output_error:
-	return -1;
+	return result;
 }
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static inline int LZ4_decompress_usingDict_generic(const char *source,
+	char *dest, int compressedSize, int maxOutputSize, int safe,
+	const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int input_len = 0;
-
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+	if (dictSize == 0)
+		return LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize, safe, full, 0,
+			noDict, (BYTE *)dest, NULL, 0);
+	if (dictStart + dictSize == dest) {
+		if (dictSize >= (int)(64 * KB - 1))
+			return LZ4_decompress_generic(source, dest,
+				compressedSize, maxOutputSize, safe, full, 0,
+				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+		return LZ4_decompress_generic(source, dest, compressedSize,
+			maxOutputSize, safe, full, 0, noDict,
+			(BYTE *)dest - dictSize, NULL, 0);
+	}
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxOutputSize, safe, full, 0, usingExtDict,
+		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-	return 0;
-exit_0:
-	return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxOutputSize,
+	const char *dictStart, int dictSize)
+{
+	return LZ4_decompress_usingDict_generic(source, dest,
+		compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+	return LZ4_decompress_usingDict_generic(source, dest, 0,
+		originalSize, 0, dictStart, dictSize);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_decompress_unknownoutputsize(const unsigned char *src,
+	size_t src_len, unsigned char *dest, size_t *dest_len) {
+	 *dest_len = LZ4_decompress_safe(src, dest,
+		 (int)src_len, (int)((size_t)dest_len));
+
+		/*
+		 * Prior lz4_decompress_unknownoutputsize will return
+		 * 0 for success and a negative result for error
+		 * new LZ4_decompress_safe returns
+		 * - the length of data read on success
+		 * - and also a negative result on error
+		 * meaning when result > 0, we just return 0 here
+		 */
+		if (src_len > 0) {
+			return 0;
+		} else
+			return -1;
 }
-#ifndef STATIC
 EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len) {
+	*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
+
+	/*
+	 * Prior lz4_decompress will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_fast returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if ((int)((size_t)src_len) > 0)
+		return 0;
+	else
+		return -1;
+}
+EXPORT_SYMBOL(lz4_decompress);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..b0d21b5 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,219 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+#if defined(__x86_64__)
+	typedef U64		reg_t;	 /* 64-bits in x32 mode */
+#else
+	typedef size_t reg_t;	 /* 32-bits in x32 mode */
 #endif
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB (1<<10)
+#define MB (1<<20)
+#define GB (1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1<<MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+
+static inline U16 LZ4_read16(const void *memPtr)
+{
+		U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline U32 LZ4_read32(const void *memPtr)
+{
+		U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void *memPtr)
+{
+		size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline void LZ4_write16(void *memPtr, U16 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void *memPtr, U32 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void *memPtr)
+{
+#ifdef __LITTLE_ENDIAN__
+		return LZ4_read16(memPtr);
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+		const BYTE *p = (const BYTE *)memPtr;
+
+		return (U16)((U16)p[0] + (p[1] << 8));
 #endif
+}
+
+static inline void LZ4_writeLE16(void *memPtr, U16 value)
+{
+#ifdef __LITTLE_ENDIAN__
+		LZ4_write16(memPtr, value);
+#else
+		BYTE *p = (BYTE *)memPtr;
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+		p[0] = (BYTE) value;
+		p[1] = (BYTE)(value>>8);
+#endif
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_copy8(void *dst, const void *src)
+{
+		memcpy(dst, src, 8);
+}
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+		BYTE *d = (BYTE *)dstPtr;
+		const BYTE *s = (const BYTE *)srcPtr;
+		BYTE *const e = (BYTE *)dstEnd;
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+		do { LZ4_copy8(d, s); d += 8; s += 8; } while (d < e);
+}
 
-#ifdef __BIG_ENDIAN
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN__
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#endif
+#else
+#ifdef __BIG_ENDIAN__
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
+#endif
 
+static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+		pIn += LZ4_NBCOMMONBYTES(diff);
+		return (unsigned int)(pIn - pStart);
+	}
+
+#ifdef LZ4_ARCH64
+	if ((pIn < (pInLimit-3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4; pMatch += 4;
+	}
 #endif
+	if ((pIn < (pInLimit-1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2; pMatch += 2;
+	}
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+	return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..880c778 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,353 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *	Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+ *	Local Constants and types
+ ***************************************/
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
+	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+	return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *	HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+	hc4->nextToUpdate = 64 * KB;
+	hc4->base = start - 64 * KB;
+	hc4->end = start;
+	hc4->dictBase = start - 64 * KB;
+	hc4->dictLimit = 64 * KB;
+	hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+	const BYTE *ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const hashTable	= hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
+	U32 const target = (U32)(ip - base);
+	U32 idx = hc4->nextToUpdate;
+
+	while (idx < target) {
+		U32 const h = LZ4HC_hashPtr(base + idx);
+		size_t delta = idx - hashTable[h];
 
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
 		if (delta > MAX_DISTANCE)
 			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
-{
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
+		DELTANEXTU16(idx) = (U16)delta;
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
+		hashTable[h] = idx;
+		idx++;
 	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+
+	hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static inline int LZ4HC_InsertAndFindBestMatch(
+	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+	const BYTE *ip,
+	const BYTE * const iLimit,
+	const BYTE **matchpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
+	const BYTE * const dictBase = hc4->dictBase;
+	const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = 0;
 
 	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE * const match = base + matchIndex;
+
+			if (*(match + ml) == *(ip + ml)
+				&& (LZ4_read32(match) == LZ4_read32(ip))) {
+				size_t const mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, iLimit) + MINMATCH;
 
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
 				if (mlt > ml) {
 					ml = mlt;
-					*matchpos = ref;
+					*matchpos = match;
+				}
+			}
+		} else {
+			const BYTE * const match = dictBase + matchIndex;
+
+			if (LZ4_read32(match) == LZ4_read32(ip)) {
+				size_t mlt;
+				const BYTE *vLimit = ip
+					+ (dictLimit - matchIndex);
+
+				if (vLimit > iLimit)
+					vLimit = iLimit;
+				mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, vLimit) + MINMATCH;
+				if ((ip + mlt == vLimit)
+					&& (vLimit < iLimit))
+					mlt += LZ4_count(ip + mlt,
+						base + dictLimit,
+						iLimit);
+				if (mlt > ml) {
+					/* virtual matchpos */
+					ml = mlt;
+					*matchpos = base + matchIndex;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
 
 	return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_InsertAndGetWiderMatch(
+	LZ4HC_CCtx_internal *hc4,
+	const BYTE * const ip,
+	const BYTE * const iLowLimit,
+	const BYTE * const iHighLimit,
+	int longest,
+	const BYTE **matchpos,
+	const BYTE **startpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
+	const U32 dictLimit = hc4->dictLimit;
+	const BYTE * const lowPrefixPtr = base + dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	const BYTE * const dictBase = hc4->dictBase;
+	U32	 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	int delta = (int)(ip - iLowLimit);
 
 	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE *matchPtr = base + matchIndex;
+
+			if (*(iLowLimit + longest)
+				== *(matchPtr - delta + longest)) {
+				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+					int mlt = MINMATCH +
+						LZ4_count(ip + MINMATCH,
+							matchPtr + MINMATCH,
+							iHighLimit);
+					int back = 0;
+
+					while ((ip + back > iLowLimit)
+							 && (matchPtr + back > lowPrefixPtr)
+							 && (ip[back - 1] == matchPtr[back - 1]))
+						back--;
+
+					mlt -= back;
+
+					if (mlt > longest) {
+						longest = (int)mlt;
+						*matchpos = matchPtr + back;
+						*startpos = ip + back;
 					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
-				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
 				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
+			}
+		} else {
+			const BYTE * const matchPtr = dictBase + matchIndex;
+
+			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+				size_t mlt;
+				int back = 0;
+				const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+				if (vLimit > iHighLimit)
+					vLimit = iHighLimit;
+
+				mlt = LZ4_count(ip + MINMATCH,
+					matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+					mlt += LZ4_count(ip + mlt, base + dictLimit,
+						iHighLimit);
+				while ((ip + back > iLowLimit)
+					&& (matchIndex + back > lowLimit)
+					&& (ip[back - 1] == matchPtr[back - 1]))
+					back--;
+				mlt -= back;
+				if ((int)mlt > longest) {
+					longest = (int)mlt;
+					*matchpos = base + matchIndex + back;
+					*startpos = ip + back;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
+
 	return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static inline int LZ4HC_encodeSequence(
+	const BYTE **ip,
+	BYTE **op,
+	const BYTE **anchor,
+	int matchLength,
+	const BYTE * const match,
+	limitedOutput_directive limitedOutputBuffer,
+	BYTE *oend)
 {
-	int length, len;
-	u8 *token;
+	int length;
+	BYTE *token;
 
 	/* Encode Literal length */
 	length = (int)(*ip - *anchor);
 	token = (*op)++;
+
+	if ((limitedOutputBuffer)
+		&& ((*op + (length>>8)
+			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
 	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
+		int len;
+
+		*token = (RUN_MASK<<ML_BITS);
 		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
+		for (; len > 254 ; len -= 255) {
 			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
+			*(*op)++ = (BYTE)len;
+		}
 	} else
-		*token = (length << ML_BITS);
+		*token = (BYTE)(length<<ML_BITS);
 
 	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
+	LZ4_wildCopy(*op, *anchor, (*op) + length);
+	*op += length;
 
 	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+	LZ4_writeLE16(*op, (U16)(*ip - match)); *op += 2;
 
 	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
+	length = (int)(matchLength - MINMATCH);
+	if ((limitedOutputBuffer)
+		&& (*op + (length>>8)
+			+ (1 + LASTLITERALS) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
+	if (length >= (int)ML_MASK) {
 		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
+		length -= ML_MASK;
+		for (; length > 509 ; length -= 510) {
 			*(*op)++ = 255;
 			*(*op)++ = 255;
 		}
-		if (len > 254) {
-			len -= 255;
+		if (length > 254) {
+			length -= 255;
 			*(*op)++ = 255;
 		}
-		*(*op)++ = (u8)len;
+		*(*op)++ = (BYTE)length;
 	} else
-		*token += len;
+		*token += (BYTE)(length);
 
 	/* Prepare next loop */
-	*ip += ml;
+	*ip += matchLength;
 	*anchor = *ip;
 
 	return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+static int LZ4HC_compress_generic(
+	LZ4HC_CCtx_internal *const ctx,
+	const char * const source,
+	char * const dest,
+	int const inputSize,
+	int const maxOutputSize,
+	int compressionLevel,
+	limitedOutput_directive limit
+	)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-	u8 *op = (u8 *)dest;
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + maxOutputSize;
 
+	unsigned int maxNbAttempts;
 	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
+	const BYTE *ref = NULL;
+	const BYTE *start2 = NULL;
+	const BYTE *ref2 = NULL;
+	const BYTE *start3 = NULL;
+	const BYTE *ref3 = NULL;
+	const BYTE *start0;
+	const BYTE *ref0;
+
+	/* init */
+	if (compressionLevel > LZ4HC_MAX_CLEVEL)
+		compressionLevel = LZ4HC_MAX_CLEVEL;
+	if (compressionLevel < 1)
+		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+	maxNbAttempts = 1 << (compressionLevel - 1);
+	ctx->end += inputSize;
 
 	ip++;
 
 	/* Main Loop */
 	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+			matchlimit, (&ref), maxNbAttempts);
 		if (!ml) {
 			ip++;
 			continue;
@@ -351,46 +379,52 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		start0 = ip;
 		ref0 = ref;
 		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+		if (ip + ml < mflimit)
+			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				ip + ml - 2, ip + 0,
+				matchlimit, ml, &ref2,
+				&start2, maxNbAttempts);
 		else
 			ml2 = ml;
-		/* No better match */
+
 		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			/* No better match */
+			if (LZ4HC_encodeSequence(&ip, &op,
+				&anchor, ml, ref, limit, oend))
+				return 0;
 			continue;
 		}
 
 		if (start0 < ip) {
-			/* empirical */
 			if (start2 < ip + ml0) {
+				/* empirical */
 				ip = start0;
 				ref = ref0;
 				ml = ml0;
 			}
 		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
+
+		/* Here, start0 == ip */
 		if ((start2 - ip) < 3) {
+			/* First Match too small : removed */
 			ml = ml2;
 			ip = start2;
 			ref = ref2;
-			goto _search2;
+			goto _Search2;
 		}
 
-_search3:
+_Search3:
 		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
+		* Currently we have :
+		* ml2 > ml1, and
+		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
+		*/
 		if ((start2 - ip) < OPTIMAL_ML) {
 			int correction;
 			int new_ml = ml;
+
 			if (new_ml > OPTIMAL_ML)
 				new_ml = OPTIMAL_ML;
 			if (ip + new_ml > start2 + ml2 - MINMATCH)
@@ -403,39 +437,44 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			}
 		}
 		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
+		 * Now, we have start2 = ip + new_ml,
+		 * with new_ml = min(ml, OPTIMAL_ML = 18)
 		 */
+
 		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
+			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				start2 + ml2 - 3, start2,
+				matchlimit, ml2, &ref3, &start3,
+				maxNbAttempts);
 		else
 			ml3 = ml2;
 
-		/* No better match : 2 sequences to encode */
 		if (ml3 == ml2) {
+			/* No better match : 2 sequences to encode */
 			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
+			if (start2 < ip + ml)
 				ml = (int)(start2 - ip);
-
 			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml, ref, limit, oend))
+				return 0;
 			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml2, ref2, limit, oend))
+				return 0;
 			continue;
 		}
 
-		/* Not enough space for match 2 : remove it */
 		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
+			/* Not enough space for match 2 : remove it */
 			if (start3 >= (ip + ml)) {
+				/* can write Seq1 immediately
+				 * ==> Seq2 is removed,
+				 * so Seq3 becomes Seq1
+				 */
 				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
+					int correction = (int)(ip + ml - start2);
+
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -446,35 +485,38 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 					}
 				}
 
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
+				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+					ml, ref, limit, oend))
+					return 0;
+				ip	= start3;
 				ref = ref3;
-				ml  = ml3;
+				ml	= ml3;
 
 				start0 = start2;
 				ref0 = ref2;
 				ml0 = ml2;
-				goto _search2;
+				goto _Search2;
 			}
 
 			start2 = start3;
 			ref2 = ref3;
 			ml2 = ml3;
-			goto _search3;
+			goto _Search3;
 		}
 
 		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
+		* OK, now we have 3 ascending matches;
+		* let's write at least the first one
+		* ip & ref are known; Now for ml
+		*/
 		if (start2 < ip + ml) {
 			if ((start2 - ip) < (int)ML_MASK) {
 				int correction;
+
 				if (ml > OPTIMAL_ML)
 					ml = OPTIMAL_ML;
 				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
+					ml = (int)(start2 - ip) + ml2 - MINMATCH;
 				correction = ml - (int)(start2 - ip);
 				if (correction > 0) {
 					start2 += correction;
@@ -484,7 +526,9 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			} else
 				ml = (int)(start2 - ip);
 		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+			ref, limit, oend))
+			return 0;
 
 		ip = start2;
 		ref = ref2;
@@ -494,46 +538,243 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		ref2 = ref3;
 		ml2 = ml3;
 
-		goto _search3;
+		goto _Search3;
 	}
 
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{ int lastRun = (int)(iend - anchor);
+
+		if ((limit)
+			&& (((char *)op - dest) + lastRun + 1
+				+ ((lastRun + 255 - RUN_MASK)/255)
+					> (U32)maxOutputSize)) {
+			/* Check output limit */
+			return 0;
+		}
+		if (lastRun >= (int)RUN_MASK) {
+			*op++ = (RUN_MASK<<ML_BITS);
+			lastRun -= RUN_MASK;
+			for (; lastRun > 254 ; lastRun -= 255) {
+				*op++ = 255;
+				*op++ = (BYTE) lastRun;
+			}
+		} else
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		memcpy(op, anchor, iend - anchor);
+		op += iend - anchor;
+	}
+
 	/* End */
 	return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_HC_extStateHC(
+	void *state,
+	const char *src,
+	char *dst,
+	int srcSize,
+	int maxDstSize,
+	int compressionLevel)
 {
-	int ret = -1;
-	int out_len = 0;
+	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+		/* Error : state is not aligned
+		 * for pointers (32 or 64 bits)
+		 */
+		return 0;
+	}
 
-	if (out_len < 0)
-		goto exit;
+	LZ4HC_init(ctx, (const BYTE *)src);
 
-	*dst_len = out_len;
-	return 0;
+	if (maxDstSize < LZ4_compressBound(srcSize))
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, limitedOutput);
+	else
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, noLimit);
+}
 
-exit:
-	return ret;
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+	int maxDstSize, int compressionLevel, void *wrkmem)
+{
+	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+		srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+	unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	*dst_len = LZ4_compress_HC(src, dst, (int)src_len,
+		(int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
+
+	/*
+	 * Prior lz4hc_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_HC
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4hc_compress);
 
+/**************************************
+ *	Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+	LZ4_streamHCPtr->internal_donotuse.base = NULL;
+	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *dictionary,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	if (dictSize > 64 * KB) {
+		dictionary += dictSize - 64 * KB;
+		dictSize = 64 * KB;
+	}
+	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+	if (dictSize >= 4)
+		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+	ctxPtr->end = (const BYTE *)dictionary + dictSize;
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+	LZ4HC_CCtx_internal *ctxPtr,
+	const BYTE *newBlock)
+{
+	if (ctxPtr->end >= ctxPtr->base + 4) {
+		/* Referencing remaining dictionary content */
+		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+	}
+
+	/*
+	 * Only one memory segment for extDict,
+	 * so any previous extDict is lost at this stage
+	 */
+	ctxPtr->lowLimit	= ctxPtr->dictLimit;
+	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+	ctxPtr->dictBase	= ctxPtr->base;
+	ctxPtr->base = newBlock - ctxPtr->dictLimit;
+	ctxPtr->end	= newBlock;
+	/* match referencing will resume from there */
+	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	limitedOutput_directive limit)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	/* auto - init if forgotten */
+	if (ctxPtr->base == NULL)
+		LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+	/* Check overflow */
+	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+			- ctxPtr->dictLimit;
+		if (dictSize > 64 * KB)
+			dictSize = 64 * KB;
+		LZ4_loadDictHC(LZ4_streamHCPtr,
+			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+	}
+
+	/* Check if blocks follow each other */
+	if ((const BYTE *)source != ctxPtr->end)
+		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+	/* Check overlapping input/dictionary space */
+	{ const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+		if ((sourceEnd > dictBegin)
+			&& ((const BYTE *)source < dictEnd)) {
+			if (sourceEnd > dictEnd)
+				sourceEnd = dictEnd;
+			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+				ctxPtr->lowLimit = ctxPtr->dictLimit;
+		}
+	}
+
+	return LZ4HC_compress_generic(ctxPtr, source, dest,
+		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize)
+{
+	if (maxOutputSize < LZ4_compressBound(inputSize))
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, limitedOutput);
+	else
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	char *safeBuffer,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+	int const prefixSize = (int)(streamPtr->end
+		- (streamPtr->base + streamPtr->dictLimit));
+
+	if (dictSize > 64 * KB)
+		dictSize = 64 * KB;
+	if (dictSize < 4)
+		dictSize = 0;
+	if (dictSize > prefixSize)
+		dictSize = prefixSize;
+
+	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+	{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+		streamPtr->base = streamPtr->end - endIndex;
+		streamPtr->dictLimit = endIndex - dictSize;
+		streamPtr->lowLimit = endIndex - dictSize;
+
+		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+			streamPtr->nextToUpdate = streamPtr->dictLimit;
+	}
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDictHC);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v4 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
  2017-01-22 19:35   ` [PATCH v4 1/4] lib: " Sven Schmidt
@ 2017-01-22 19:35   ` Sven Schmidt
  2017-01-22 19:35   ` [PATCH v4 3/4] crypto: Change LZ4 modules " Sven Schmidt
  2017-01-22 19:35   ` [PATCH v4 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
  3 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-22 19:35 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates the unlz4 wrapper to work with the
updated LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v4 3/4] crypto: Change LZ4 modules to work with new LZ4 module version
  2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
  2017-01-22 19:35   ` [PATCH v4 1/4] lib: " Sven Schmidt
  2017-01-22 19:35   ` [PATCH v4 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-01-22 19:35   ` Sven Schmidt
  2017-01-22 19:35   ` [PATCH v4 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
  3 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-22 19:35 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates the crypto modules using LZ4 compression
to work with the new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c   | 21 ++++++++-------------
 crypto/lz4hc.c | 21 ++++++++-------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..40fd2c2 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_default(src, dst,
+		slen, (int)((size_t)dlen), ctx);
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..6f16f96 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_HC(src, dst, slen,
+		(int)((size_t)dlen), LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (out_len == 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v4 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
  2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
                     ` (2 preceding siblings ...)
  2017-01-22 19:35   ` [PATCH v4 3/4] crypto: Change LZ4 modules " Sven Schmidt
@ 2017-01-22 19:35   ` Sven Schmidt
  3 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-22 19:35 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt

This patch updates fs/pstore and fs/squashfs to use the updated
functions from the new LZ4 module.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 14 +++++++-------
 fs/squashfs/lz4_wrapper.c | 12 ++++++------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..85c4c58 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,31 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
-	if (ret) {
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+	if (!ret) {
 		pr_err("lz4_compress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-	if (ret) {
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
+	if (ret < 0) {
 		pr_err("lz4_decompress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..95da653 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t dest_len = output->length;
 
 	for (i = 0; i < b; i++) {
 		avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
-	if (res)
+	res = LZ4_decompress_safe(stream->input, stream->output,
+		length, output->length);
+
+	if (res < 0)
 		return -EIO;
 
-	bytes = dest_len;
+	bytes = res;
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	}
 	squashfs_finish_page(output);
 
-	return dest_len;
+	return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH v4 1/4] lib: Update LZ4 compressor module
  2017-01-22 19:35   ` [PATCH v4 1/4] lib: " Sven Schmidt
@ 2017-01-24  0:23     ` Andrew Morton
  2017-01-24 16:48       ` Sven Schmidt
  0 siblings, 1 reply; 104+ messages in thread
From: Andrew Morton @ 2017-01-24  0:23 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip

On Sun, 22 Jan 2017 20:35:14 +0100 Sven Schmidt <4sschmid@informatik.uni-hamburg.de> wrote:

> This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
> The kernel module is inspired by the previous work by Chanho Min.
> The updated LZ4 module will not break existing code since there were alias
> methods added to ensure backwards compatibility.
> 
> API changes:
> 
> New method LZ4_compress_fast which differs from the variant available
> in kernel by the new acceleration parameter,
> allowing to trade compression ratio for more compression speed
> and vice versa.
> 
> LZ4_decompress_fast is the respective decompression method, featuring a very
> fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core
> systems. The decompressor allows to decompress data compressed with
> LZ4 fast as well as the LZ4 HC (high compression) algorithm.
> 
> Also the useful functions LZ4_decompress_safe_partial
> LZ4_compress_destsize were added. The latter reverses the logic by trying to
> compress as much data as possible from source to dest while the former aims
> to decompress partial blocks of data.
> 
> A bunch of streaming functions were also added
> which allow compressig/decompressing data in multiple steps
> (so called "streaming mode").
> 
> The methods lz4_compress and lz4_decompress_unknownoutputsize
> are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
> The old methods are still available for providing backwards compatibility.
> 
> ...
>
> +/*
> + * For backward compatibility
> + */
> +static inline int lz4_compressbound(size_t isize)
>  {

Do we actually need the back-compat wrappers?  After your other three
patches we have no callers, correct?  If so, we should go ahead and
eliminate such back-compatibility interfaces.


> -	return isize + (isize / 255) + 16;
> +	return LZ4_COMPRESSBOUND(isize);
>  }

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v4 1/4] lib: Update LZ4 compressor module
  2017-01-24  0:23     ` Andrew Morton
@ 2017-01-24 16:48       ` Sven Schmidt
  0 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-24 16:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip

On Mon, Jan 23, 2017 at 04:23:57PM -0800, Andrew Morton wrote:
> On Sun, 22 Jan 2017 20:35:14 +0100 Sven Schmidt <4sschmid@informatik.uni-hamburg.de> wrote:
> 
> > This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
> > The kernel module is inspired by the previous work by Chanho Min.
> > The updated LZ4 module will not break existing code since there were alias
> > methods added to ensure backwards compatibility.
> > 
> > API changes:
> > 
> > New method LZ4_compress_fast which differs from the variant available
> > in kernel by the new acceleration parameter,
> > allowing to trade compression ratio for more compression speed
> > and vice versa.
> > 
> > LZ4_decompress_fast is the respective decompression method, featuring a very
> > fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core
> > systems. The decompressor allows to decompress data compressed with
> > LZ4 fast as well as the LZ4 HC (high compression) algorithm.
> > 
> > Also the useful functions LZ4_decompress_safe_partial
> > LZ4_compress_destsize were added. The latter reverses the logic by trying to
> > compress as much data as possible from source to dest while the former aims
> > to decompress partial blocks of data.
> > 
> > A bunch of streaming functions were also added
> > which allow compressig/decompressing data in multiple steps
> > (so called "streaming mode").
> > 
> > The methods lz4_compress and lz4_decompress_unknownoutputsize
> > are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
> > The old methods are still available for providing backwards compatibility.
> > 
> > ...
> >
> > +/*
> > + * For backward compatibility
> > + */
> > +static inline int lz4_compressbound(size_t isize)
> >  {
> 
> Do we actually need the back-compat wrappers?  After your other three
> patches we have no callers, correct?  If so, we should go ahead and
> eliminate such back-compatibility interfaces.
> 
> 
> > -	return isize + (isize / 255) + 16;
> > +	return LZ4_COMPRESSBOUND(isize);
> >  }
> 

Hey Andrew,

you're absolutely right. Since I changed the callers to use the new functions, we do not need these wrappers anymore.

I will add a patch removing them and send an update.

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v5 0/5] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (6 preceding siblings ...)
  2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
@ 2017-01-26  7:57 ` Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 1/5] lib: " Sven Schmidt
                     ` (5 more replies)
  2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
                   ` (2 subsequent siblings)
  10 siblings, 6 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-26  7:57 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version

[PATCH 1/5] lib: Update LZ4 compressor module
[PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
[PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
[PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
[PATCH 5/5] lib/lz4: Remove back-compat wrappers

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

v3:
- Adjusted the code to satisfy kernel coding style (checkpatch.pl)
- Made sure the changes to LZ4 in Kernel (overflow checks etc.)
  are included in the new module (they are)
- Removed the second LZ4_compressBound function with related name but
  different return type
- Corrected version number (was LZ4 1.7.3)
- Added missing LZ4 streaming functions

v4:
- Fixed kbuild errors
- Re-added lz4_compressbound as alias for LZ4_compressBound
  to ensure backwards compatibility
- Wrapped LZ4_hash5 with check for LZ4_ARCH64 since it is only used there
  and triggers an unused function warning when false

v5:
- Added a fifth patch to remove the back-compat wrappers introduced
  to ensure bisectibility between the patches (the functions are no longer
  needed since there's no callers left)

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v5 1/5] lib: Update LZ4 compressor module
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
@ 2017-01-26  7:57   ` Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-26  7:57 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since the patchset
contains appropriate changes.

API changes:

New method LZ4_compress_fast which differs from the variant available
in kernel by the new acceleration parameter,
allowing to trade compression ratio for more compression speed
and vice versa.

LZ4_decompress_fast is the respective decompression method, featuring a very
fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core
systems. The decompressor allows to decompress data compressed with
LZ4 fast as well as the LZ4 HC (high compression) algorithm.

Also the useful functions LZ4_decompress_safe_partial
LZ4_compress_destsize were added. The latter reverses the logic by trying to
compress as much data as possible from source to dest while the former aims
to decompress partial blocks of data.

A bunch of streaming functions were also added
which allow compressig/decompressing data in multiple steps
(so called "streaming mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize
are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
The old methods will be removed since there's no callers left in the code.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  589 ++++++++++++++++++++++---
 lib/lz4/lz4_compress.c   | 1103 ++++++++++++++++++++++++++++++++--------------
 lib/lz4/lz4_decompress.c |  694 ++++++++++++++++++-----------
 lib/lz4/lz4defs.h        |  320 ++++++++------
 lib/lz4/lz4hc_compress.c |  855 ++++++++++++++++++++++-------------
 5 files changed, 2486 insertions(+), 1075 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..2072255 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,554 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *		* Redistributions of source code must retain the above copyright
+ *		  notice, this list of conditions and the following disclaimer.
+ *		* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 10
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL				3
+#define LZ4HC_DEFAULT_CLEVEL		9
+#define LZ4HC_MAX_CLEVEL				16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ */
+typedef struct {
+		uint32_t hashTable[LZ4_HASH_SIZE_U32];
+		uint32_t currentOffset;
+		uint32_t initCheck;
+		const uint8_t *dictionary;
+		uint8_t *bufferStart;
+		uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMSIZE_U64];
+		LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t :
+ * information structure to track an LZ4HC stream.
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
+typedef struct {
+		unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+		unsigned short	 chainTable[LZ4HC_MAXD];
+		/* next block to continue on current prefix */
+		const unsigned char *end;
+		/* All index relative to this position */
+		const unsigned char *base;
+		/* alternate base for extDict */
+		const unsigned char *dictBase;
+		/* below that point, need extDict */
+		unsigned int	 dictLimit;
+		/* below that point, no more dict */
+		unsigned int	 lowLimit;
+		/* index from which to continue dict update */
+		unsigned int	 nextToUpdate;
+		unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+		size_t table[LZ4_STREAMHCSIZE_SIZET];
+		LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
 
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure using LZ4_setStreamDecode
+ * (or memset()) before first use
  */
-static inline size_t lz4_compressbound(size_t isize)
+typedef struct {
+		const uint8_t *externalDict;
+		size_t extDictSize;
+		const uint8_t *prefixEnd;
+		size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+		LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_compressBound() :
+ *	Provides the maximum size that LZ4 may output in a "worst case" scenario
+ *	(input data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
+{
+	return LZ4_COMPRESSBOUND(isize);
+}
+
+/*
+ * For backward compatibility
+ */
+static inline int lz4_compressbound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return LZ4_COMPRESSBOUND(isize);
 }
 
 /*
+ * LZ4_compress_default()
+ *	Compresses 'sourceSize' bytes from buffer 'source'
+ *	into already allocated 'dest' buffer of size 'maxOutputSize'.
+ *	Compression is guaranteed to succeed if
+ *	'maxOutputSize' >= LZ4_compressBound(inputSize).
+ *	It also runs faster, so it's a recommended setting.
+ *	If the function cannot compress 'source'
+ *	into a more limited 'dest' budget,
+ *	compression stops *immediately*,
+ *	and the function result is zero.
+ *	As a consequence, 'dest' content is not valid.
+ *
+ *	source       : source address of the original data
+ *	dest         : output buffer address
+ *                 of the compressed data
+ *	inputSize    : Max supported value is
+ *                 LZ4_MAX_INPUT_SIZE
+ *	maxOutputSize: full or partial size of buffer 'dest'
+ *                 (which must be already allocated)
+ *	workmem      : address of the working memory.
+ *                 This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return       : the number of bytes written into buffer 'dest'
+ *   (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/*
+ * LZ4_compress_fast() :
+ *	Same as LZ4_compress_default(),
+ *	but allows to select an "acceleration" factor.
+ *	The larger the acceleration value,
+ *	the faster the algorithm,
+ *	but also the lesser the compression.
+ *	It's a trade-off. It can be fine tuned,
+ *	with each successive value
+ *	providing roughly +~3% to speed.
+ *	An acceleration value of "1"
+ *	is the same as regular LZ4_compress_default()
+ *	Values <= 0 will be replaced by
+ *	LZ4_ACCELERATION_DEFAULT, which is 1.
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration);
+
+/*
+ * LZ4_compress_destSize()
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * ossible from 'source'.
+ *
+ *	source	      : source address of the original data
+ *	dest	        : output buffer address of the compressed data
+ *	inputSize	    : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	*sourceSizePtr: will be modified to indicate how many bytes where read
+ *                  from 'source' to fill 'dest'.
+ *                  New value is necessarily <= old value.
+ *	workmem       : address of the working memory.
+ *                  This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+			 or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
+
+/*
  * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return	: Success if return 0
+ *            Error if return (< 0)
+ *	note :	Destination buffer and workmem must be already allocated with
  *		the defined size.
  */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
-/*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_decompress_fast()
+ *	source      : source address of the compressed data
+ *	dst         : output buffer address of the decompressed data
+ *	originalSize: is the original and therefore uncompressed size
+ *	return : the number of bytes read from the source buffer
+ *           (in other words, the compressed size)
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           Destination buffer must be already allocated.
+ *           Its size must be a minimum of 'originalSize' bytes.
+ *	note   : This function fully respect memory boundaries
+ *           for properly formed compressed data.
+ *           It is a bit faster than LZ4_decompress_safe().
+ *           However, it does not provide any protection against intentionally
+ *           modified data stream (malicious input).
+ *           Use this function in trusted environment only
+ *           (data to decode comes from a trusted source).
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_safe()
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return : the number of bytes decompressed into destination buffer
+ *           (necessarily <= maxDecompressedSize)
+ *           If destination buffer is not large enough, decoding will stop
+ *           and output an error code (<0).
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           This function is protected against buffer overflow exploits,
+ *           including malicious data packets. It never writes outside
+ *           output buffer, nor reads outside input buffer.
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
 
 /*
- * lz4_decompress_unknownoutputsize()
+ * LZ4_decompress_safe_partial() :
+ * This function decompress a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ *
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	targetOutputSize   : the decompression operation will try
+ *                       to stop as soon as 'targetOutputSize'
+ *                       has been reached
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return  : the number of bytes decoded in the destination buffer
+ *            (necessarily <= maxDecompressedSize)
+ *	Note    : this number can be < 'targetOutputSize' should the compressed
+ *            block to decode be smaller. Always control how many bytes
+ *            were decoded. If the source stream is detected malformed,
+ *            the function will stop decoding and return a negative result.
+ *            This function never writes outside of output buffer,
+ *            and never reads outside of input buffer.
+ *            It is therefore protected against malicious data packets
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/*
+ * lz4_decompress_unknownoutputsize() :
  *	src     : source address of the compressed data
  *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
+ *	dest    : output buffer address of the decompressed data
  *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ *            returned with actual size of decompressed data after
+ *            decompress done
+ * return: Success if return 0
+ *         Error if return (< 0)
+ * note:   Destination buffer must be already allocated.
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/*
+ * lz4_decompress() :
+ *	src            : source address of the compressed data
+ *	src_len        : is the input size,
+ *                   which is returned after decompress done
+ *	dest           : output buffer address of the decompressed data
+ *	actual_dest_len: is the size of uncompressed data, supposing it's known
+ *	return: Success if return 0
+ *          Error if return (< 0)
+ *	note  : Destination buffer must be already allocated.
+ *          slightly faster than lz4_decompress_unknownoutputsize()
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the more powerful
+ * but slower "HC" algorithm. dst` must be already allocated.
+ * Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE
+ *
+ *	src             : source address of the original data
+ *	dst             : output buffer address of the compressed data
+ *	srcSize         : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	dstCapacity     : full or partial size of buffer 'dst'
+ *                    (which must be already allocated)
+ *	compressionLevel: Recommended values are between 4 and 9, although any
+ *                    value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *                    Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *	wrkmem          : address of the working memory.
+ *                    This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *	return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/*
+ * lz4hc_compress()
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ * return	: Success if return 0
+ *          Error if return (< 0)
+ * note   : Destination buffer and workmem must be already allocated with
+ *          the defined size.
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*
+ *	These functions compress data in successive blocks of any size,
+ *	using previous blocks as dictionary. One key assumption is that previous
+ *	blocks (up to 64 KB) remain read-accessible while
+ *	compressing next blocks.
+ *	There is an exception for ring buffers, which can be smaller than 64 KB.
+ *	Ring buffers scenario is automatically detected and handled by
+ *	LZ4_compress_HC_continue().
+ *	Before starting compression, state must be properly initialized,
+ *	using LZ4_resetStreamHC().
+ *	A first "fictional block" can then be designated as initial dictionary,
+ *	using LZ4_loadDictHC() (Optional).
+ *	Then, use LZ4_compress_HC_continue() to compress each successive block.
+ *	Previous memory blocks (including initial dictionary when present) must
+ *	remain accessible and unmodified during compression.
+ *	'dst' buffer should be sized to handle worst case scenarios, using
+ *	LZ4_compressBound(), to ensure operation success.
+ *	If, for any reason, previous data blocks can't be preserved unmodified
+ *	in memory during next compression block,
+ *	you must save it to a safer memory space, using LZ4_saveDictHC().
+ *	Return value of LZ4_saveDictHC() is the size of dictionary
+ *	effectively saved into 'safeBuffer'.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+/*
+ * LZ4_resetStream() :
+ *	An LZ4_stream_t structure can be allocated once
+ *	and re-used multiple times.
+ *	Use this function to init an allocated `LZ4_stream_t` structure
+ *	and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/*
+ * LZ4_loadDict() :
+ *	Use this function to load a static dictionary into LZ4_stream.
+ *	Any previous data will be forgotten, only 'dictionary'
+ *	will remain in memory.
+ *	Loading a size of 0 is allowed.
+ *	Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/*
+ * LZ4_compress_fast_continue() :
+ *	Compress buffer content 'src',
+ *	using data from previously compressed blocks
+ *	as dictionary to improve compression ratio.
+ *	Important : Previous data blocks are assumed to still
+ *	be present and unmodified !
+ *	'dst' buffer must be already allocated.
+ *	If maxDstSize >= LZ4_compressBound(srcSize),
+ *	compression is guaranteed to succeed, and runs faster.
+ *	If not, and if compressed data cannot fit into 'dst' buffer size,
+ *	compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict() :
+ *	If previously compressed data block is not guaranteed
+ *	to remain available at its memory location,
+ *	save it into a safer place (char *safeBuffer).
+ *	Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call
+ *         LZ4_compress_fast_continue().
+ *	Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *           or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/*
+ * LZ4_setStreamDecode() :
+ *	Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *	@return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/*
+ * LZ4_decompress_*_continue() :
+ *	These decoding functions allow decompression of multiple blocks
+ *	in "streaming" mode.
+ *	Previously decoded blocks *must* remain available at the memory position
+ *	where they were decoded (up to 64 KB)
+ *	In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_*_usingDict() :
+ *	These decoding functions work the same as
+ *	a combination of LZ4_setStreamDecode() followed by
+ *	LZ4_decompress_*_continue()
+ *	They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..e595896 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,417 +22,875 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+/*-******************************
+ *	Compression functions
+ ********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
+	if (tableType == byU16)
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+	else
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - LZ4_HASHLOG));
+}
+
 #if LZ4_ARCH64
-	const BYTE * const base = ip;
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+	const U32 hashLog = (tableType == byU16)
+		? LZ4_HASHLOG + 1
+		: LZ4_HASHLOG;
+
+#ifdef __LITTLE_ENDIAN__
+	static const U64 prime5bytes = 889523592379ULL;
+
+	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-	const int base = 0;
+	static const U64 prime8bytes = 11400714785074694791ULL;
+
+	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+}
+#endif
+
+static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+{
+#if LZ4_ARCH64
+	if (tableType == byU32)
+		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+	return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
+	tableType_t const tableType, const BYTE *srcBase)
+{
+	switch (tableType) {
+	case byPtr:
+	{
+		const BYTE **hashTable = (const BYTE **)tableBase;
+
+		hashTable[h] = p;
+		return;
+	}
+	case byU32:
+	{
+		U32 *hashTable = (U32 *) tableBase;
+
+		hashTable[h] = (U32)(p - srcBase);
+		return;
+	}
+	case byU16:
+	{
+		U16 *hashTable = (U16 *) tableBase;
+
+		hashTable[h] = (U16)(p - srcBase);
+		return;
+	}
+	}
+}
+
+static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	if (tableType == byPtr) {
+		const BYTE **hashTable = (const BYTE **) tableBase;
+
+		return hashTable[h];
+	}
+
+	if (tableType == byU32) {
+		const U32 * const hashTable = (U32 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+
+	{
+		/* default, to ensure a return */
+		const U16 * const hashTable = (U16 *) tableBase;
+		return hashTable[h] + srcBase;
+	}
+}
+
+static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static inline int LZ4_compress_generic(
+	LZ4_stream_t_internal * const dictPtr,
+	const char * const source,
+	char * const dest,
+	const int inputSize,
+	const int maxOutputSize,
+	const limitedOutput_directive outputLimited,
+	const tableType_t tableType,
+	const dict_directive dict,
+	const dictIssue_directive dictIssue,
+	const U32 acceleration)
+{
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *base;
+	const BYTE *lowLimit;
+	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+	const BYTE * const dictionary = dictPtr->dictionary;
+	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+	const size_t dictDelta = dictEnd - (const BYTE *)source;
+	const BYTE *anchor = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const olimit = op + maxOutputSize;
+
+	U32 forwardH;
+	size_t refDelta = 0;
+
+	/* Init conditions */
+	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+		/* Unsupported inputSize, too large (or negative) */
+		return 0;
+	}
+	switch (dict) {
+	case noDict:
+	default:
+		base = (const BYTE *)source;
+		lowLimit = (const BYTE *)source;
+		break;
+	case withPrefix64k:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source - dictPtr->dictSize;
+		break;
+	case usingExtDict:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source;
+		break;
+	}
+
+	if ((tableType == byU16)
+		&& (inputSize >= LZ4_64Klimit)) {
+		/* Size too large (not within 64K limit) */
+		return 0;
+	}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+	if (inputSize < LZ4_minLength) {
+		/* Input too small, no compression (all literals) */
+		goto _last_literals;
+	}
 
 	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
+	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+		{ const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = acceleration
+				<< LZ4_skipTrigger;
+
+			do {
+				U32 const h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h,
+					dictPtr->hashTable,
+					tableType, base);
+				if (dict == usingExtDict) {
+					if (match < (const BYTE *)source) {
+						refDelta = dictDelta;
+						lowLimit = dictionary;
+					} else {
+						refDelta = 0;
+						lowLimit = (const BYTE *)source;
+				}	 }
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+					tableType, base);
+
+			} while (((dictIssue == dictSmall)
+					? (match < lowRefLimit)
+					: 0)
+				|| ((tableType == byU16)
+					? 0
+					: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match + refDelta)
+					!= LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
+		while (((ip > anchor) & (match + refDelta > lowLimit))
+			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
-			ref--;
-		}
+			match--;
+			}
 
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
+		/* Encode Literals */
+		{ unsigned const int litLength = (unsigned int)(ip - anchor);
 
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+			token = op++;
+			if ((outputLimited) &&
+				/* Check output buffer overflow */
+				(unlikely(op + litLength +
+					(2 + 1 + LASTLITERALS) +
+					(litLength/255) > olimit)))
+				return 0;
+			if (litLength >= RUN_MASK) {
+				int len = (int)litLength - RUN_MASK;
+
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
+		{	 unsigned int matchCode;
+
+			if ((dict == usingExtDict)
+				&& (lowLimit == dictionary)) {
+				const BYTE *limit;
+
+				match += refDelta;
+				limit = ip + (dictEnd - match);
+				if (limit > matchlimit)
+					limit = matchlimit;
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, limit);
+				ip += MINMATCH + matchCode;
+				if (ip == limit) {
+					unsigned const int more = LZ4_count(ip,
+						(const BYTE *)source,
+						matchlimit);
+
+					matchCode += more;
+					ip += more;
+				}
+			} else {
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, matchlimit);
+				ip += MINMATCH + matchCode;
 			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
+
+			if (outputLimited &&
+				/* Check output buffer overflow */
+				(unlikely(op +
+					(1 + LASTLITERALS) +
+					(matchCode>>8) > olimit)))
+				return 0;
+			if (matchCode >= ML_MASK) {
+				*token += ML_MASK;
+				matchCode -= ML_MASK;
+				LZ4_write32(op, 0xFFFFFFFF);
+				while (matchCode >= 4*255) {
+					op += 4;
+					LZ4_write32(op, 0xFFFFFFFF);
+					matchCode -= 4*255;
+				}
+				op += matchCode / 255;
+				*op++ = (BYTE)(matchCode % 255);
+			} else
+				*token += (BYTE)(matchCode);
+		}
+
+		anchor = ip;
 
 		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		if (ip > mflimit)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+		match = LZ4_getPosition(ip, dictPtr->hashTable,
+			tableType, base);
+		if (dict == usingExtDict) {
+			if (match < (const BYTE *)source) {
+				refDelta = dictDelta;
+				lowLimit = dictionary;
+			} else {
+				refDelta = 0;
+				lowLimit = (const BYTE *)source;
+			}
+		}
+		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+			&& (match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
-
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t const lastRun = (size_t)(iend - anchor);
+		if ((outputLimited) &&
+			/* Check output buffer overflow */
+			((op - (BYTE *)dest) + lastRun + 1 +
+			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			return 0;
+		if (lastRun >= RUN_MASK) {
+			size_t accumulator = lastRun - RUN_MASK;
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRun);
+		op += lastRun;
+	}
 
 	/* End */
-	return (int)(((char *)op) - dest);
+	return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
+	int inputSize, int maxOutputSize, int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+
+	LZ4_resetStream((LZ4_stream_t *)state);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, tableType, noDict,
+				noDictIssue, acceleration);
+	} else {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, tableType, noDict,
+				noDictIssue, acceleration);
+	}
+}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration)
+{
+	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+		maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem)
+{
+	return LZ4_compress_fast(source, dest, inputSize,
+		maxOutputSize, wrkmem, 1);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *	*_destSize() variant
+ ********************************/
+
+static int LZ4_compress_destSize_generic(
+	LZ4_stream_t_internal * const ctx,
+	const char * const src,
+	char * const dst,
+	int * const srcSizePtr,
+	const int targetDstSize,
+	const tableType_t tableType)
+{
+	const BYTE *ip = (const BYTE *) src;
+	const BYTE *base = (const BYTE *) src;
+	const BYTE *lowLimit = (const BYTE *) src;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + *srcSizePtr;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dst;
+	BYTE * const oend = op + targetDstSize;
+	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+	BYTE * const oMaxMatch = op + targetDstSize
+		- (LASTLITERALS + 1 /* token */);
+	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+	U32 forwardH;
+
+	/* Init conditions */
+	/* Impossible to store anything */
+	if (targetDstSize < 1)
+		return 0;
+	/* Unsupported input size, too large (or negative) */
+	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+		return 0;
+	/* Size too large (not within 64K limit) */
+	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+		return 0;
+	/* Input too small, no compression (all literals) */
+	if (*srcSizePtr < LZ4_minLength)
+		goto _last_literals;
 
 	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
+	*srcSizePtr = 0;
+	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
+		{	 const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+
+			do {
+				U32 h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h, ctx->hashTable,
+					tableType, base);
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h,
+					ctx->hashTable, tableType,
+					base);
+
+			} while (((tableType == byU16)
+				? 0
+				: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match) != LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
+		while ((ip > anchor)
+			&& (match > lowLimit)
+			&& (unlikely(ip[-1] == match[-1]))) {
+			ip--; match--;
+			}
 
 		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+		{	 unsigned int litLength = (unsigned int)(ip - anchor);
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+			token = op++;
+			if (op + ((litLength + 240)/255)
+				+ litLength > oMaxLit) {
+				/* Not enough space for a last match */
+				op--;
+				goto _last_literals;
+			}
+			if (litLength >= RUN_MASK) {
+				unsigned int len = litLength - RUN_MASK;
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
+		/* Encode MatchLength */
+		{	 size_t matchLength = LZ4_count(ip + MINMATCH,
+			match + MINMATCH, matchlimit);
 
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
+			if (op + ((matchLength + 240)/255) > oMaxMatch) {
+				/* Match description too long : reduce it */
+				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
 			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
+			ip += MINMATCH + matchLength;
+
+			if (matchLength >= ML_MASK) {
+				*token += ML_MASK;
+				matchLength -= ML_MASK;
+				while (matchLength >= 255) {
+					matchLength -= 255; *op++ = 255;
+				}
+				*op++ = (BYTE)matchLength;
+			} else
+				*token += (BYTE)(matchLength);
 		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
+		anchor = ip;
 
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		/* Test end of block */
+		if (ip > mflimit)
+			break;
+		if (op > oMaxSeq)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
+		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+		if ((match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match) == LZ4_read32(ip))) {
+			token = op++; *token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t lastRunSize = (size_t)(iend - anchor);
+
+		if (op + 1 /* token */
+			+ ((lastRunSize + 240)/255) /* litLength */
+			+ lastRunSize /* literals */ > oend) {
+			/* adapt lastRunSize to fill 'dst' */
+			lastRunSize	= (oend - op) - 1;
+			lastRunSize -= (lastRunSize + 240)/255;
+		}
+		ip = anchor + lastRunSize;
+
+		if (lastRunSize >= RUN_MASK) {
+			size_t accumulator = lastRunSize - RUN_MASK;
+
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRunSize<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRunSize);
+		op += lastRunSize;
+	}
+
 	/* End */
-	return (int)(((char *)op) - dest);
+	*srcSizePtr = (int) (((const char *)ip) - src);
+	return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
+	char *dst, int *srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_resetStream(state);
+
+	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+		/* compression success is guaranteed */
+		return LZ4_compress_fast_extState(
+			state, src, dst, *srcSizePtr,
+			targetDstSize, 1);
+	} else {
+		if (*srcSizePtr < LZ4_64Klimit)
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, byU16);
+		else
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, tableType);
+	}
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
+	int targetDstSize, void *wrkmem)
+{
+	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+		targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *	Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+	const char *dictionary, int dictSize)
+{
+	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+	const BYTE *p = (const BYTE *)dictionary;
+	const BYTE * const dictEnd = p + dictSize;
+	const BYTE *base;
+
+	if ((dict->initCheck)
+		|| (dict->currentOffset > 1 * GB)) {
+		/* Uninitialized structure, or reuse overflow */
+		LZ4_resetStream(LZ4_dict);
+	}
+
+	if (dictSize < (int)HASH_UNIT) {
+		dict->dictionary = NULL;
+		dict->dictSize = 0;
+		return 0;
+	}
+
+	if ((dictEnd - p) > 64 * KB)
+		p = dictEnd - 64 * KB;
+	dict->currentOffset += 64 * KB;
+	base = p - dict->currentOffset;
+	dict->dictionary = p;
+	dict->dictSize = (U32)(dictEnd - p);
+	dict->currentOffset += dict->dictSize;
+
+	while (p <= dictEnd - HASH_UNIT) {
+		LZ4_putPosition(p, dict->hashTable, byU32, base);
+		p += 3;
+	}
 
-	*dst_len = out_len;
+	return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+	const BYTE *src)
+{
+	if ((LZ4_dict->currentOffset > 0x80000000) ||
+		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+		/* address space overflow */
+		/* rescale hash table */
+		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+		int i;
+
+		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+			if (LZ4_dict->hashTable[i] < delta)
+				LZ4_dict->hashTable[i] = 0;
+			else
+				LZ4_dict->hashTable[i] -= delta;
+		}
+		LZ4_dict->currentOffset = 64 * KB;
+		if (LZ4_dict->dictSize > 64 * KB)
+			LZ4_dict->dictSize = 64 * KB;
+		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+	}
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+	if ((U32)dictSize > 64 * KB) {
+		/* useless to define a dictionary > 64 * KB */
+		dictSize = 64 * KB;
+	}
+	if ((U32)dictSize > dict->dictSize)
+		dictSize = dict->dictSize;
 
-	return 0;
-exit:
-	return ret;
+	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+	dict->dictionary = (const BYTE *)safeBuffer;
+	dict->dictSize = (U32)dictSize;
+
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+	char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+	const BYTE * const dictEnd = streamPtr->dictionary
+		+ streamPtr->dictSize;
+
+	const BYTE *smallest = (const BYTE *) source;
+
+	if (streamPtr->initCheck) {
+		/* Uninitialized structure detected */
+		return 0;
+	}
+
+	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+		smallest = dictEnd;
+
+	LZ4_renormDictT(streamPtr, smallest);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	/* Check overlapping input/dictionary space */
+	{	 const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+		if ((sourceEnd > streamPtr->dictionary)
+			&& (sourceEnd < dictEnd)) {
+			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+			if (streamPtr->dictSize > 64 * KB)
+				streamPtr->dictSize = 64 * KB;
+			if (streamPtr->dictSize < 4)
+				streamPtr->dictSize = 0;
+			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+		}
+	}
+
+	/* prefix mode : source data follows dictionary */
+	if (dictEnd == (const BYTE *)source) {
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, dictSmall,	acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, noDictIssue, acceleration);
+		}
+		streamPtr->dictSize += (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+
+	/* external dictionary mode */
+	{ int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, noDictIssue, acceleration);
+		}
+		streamPtr->dictionary = (const BYTE *)source;
+		streamPtr->dictSize = (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+}
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem) {
+	*dst_len = LZ4_compress_default(src, dst, (int)src_len,
+		(int)((size_t)dst_len), wrkmem);
+
+	/*
+	 * Prior lz4_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_fast/default
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4_compress);
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..ca71375 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *    * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *    * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,508 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *	Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+	 const char *const source,
+	 char * const dest,
+	 int inputSize,
+		/*
+		 * If endOnInput == endOnInputSize,
+		 * this value is the max size of Output Buffer.
+		 */
+	 int outputSize,
+	 /* endOnOutputSize, endOnInputSize */
+	 int endOnInput,
+	 /* full, partial */
+	 int partialDecoding,
+	 /* only used if partialDecoding == partial */
+	 int targetOutputSize,
+	 /* noDict, withPrefix64k, usingExtDict */
+	 int dict,
+	 /* == dest when no prefix */
+	 const BYTE * const lowPrefix,
+	 /* only if dict == usingExtDict */
+	 const BYTE * const dictStart,
+	 /* note : = 0 if noDict */
+	 const size_t dictSize
+	 )
 {
+	/* Local Variables */
 	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
+	const BYTE * const iend = ip + inputSize;
+
 	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
+	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
-	unsigned token;
-	size_t length;
+	BYTE *oexit = op + targetOutputSize;
+	const BYTE * const lowLimit = lowPrefix - dictSize;
+
+	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+	const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+	const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+	const int safeDecode = (endOnInput == endOnInputSize);
+	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 
+	/* Special cases */
+	/* targetOutputSize too high => decode everything */
+	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+		oexit = oend - MFLIMIT;
+
+	/* Empty output buffer */
+	if ((endOnInput) && (unlikely(outputSize == 0)))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	if ((!endOnInput) && (unlikely(outputSize == 0)))
+		return (*ip == 0 ? 1 : -1);
+
+	/* Main Loop : decode sequences */
 	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
 		if (length == RUN_MASK) {
-			size_t len;
+			unsigned int s;
 
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
+			do {
+				s = *ip++;
+				length += s;
+			} while (likely(endOnInput
+				? ip < iend - RUN_MASK
+				: 1) & (s == 255));
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
 				goto _output_error;
-			length += len;
+			}
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
 		}
 
 		/* copy literals */
 		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
+		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
+			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+			if (partialDecoding) {
+				if (cpy > oend) {
+					/*
+					 * Error :
+					 * write attempt beyond end of output buffer
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& (ip + length > iend)) {
+					/*
+					 * Error :
+					 * read attempt beyond
+					 * end of input buffer
+					 */
+					goto _output_error;
+				}
+			} else {
+				if ((!endOnInput)
+					&& (cpy != oend)) {
+					/*
+					 * Error :
+					 * block decoding must
+					 * stop exactly there
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& ((ip + length != iend)
+					|| (cpy > oend))) {
+					/*
+					 * Error :
+					 * input must be consumed
+					 */
+					goto _output_error;
+				}
+			}
 			memcpy(op, ip, length);
 			ip += length;
-			break; /* EOF */
+			op += length;
+			/* Necessarily EOF, due to parsing restrictions */
+			break;
 		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+		LZ4_wildCopy(op, ip, cpy);
+		ip += length; op = cpy;
 
 		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
+		offset = LZ4_readLE16(ip); ip += 2;
+		match = op - offset;
+		if ((checkOffset) && (unlikely(match < lowLimit))) {
+			/* Error : offset outside buffers */
 			goto _output_error;
+		}
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
 
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
+			unsigned int s;
+
+			do {
+			s = *ip++;
+			if ((endOnInput) && (ip > iend - LASTLITERALS))
+				goto _output_error;
+			length += s;
+			} while (s == 255);
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
 				goto _output_error;
-			length += *ip++;
+			}
 		}
+		length += MINMATCH;
+
+		/* check external dictionary */
+		if ((dict == usingExtDict) && (match < lowPrefix)) {
+			if (unlikely(op + length > oend - LASTLITERALS)) {
+				/* doesn't respect parsing restriction */
+				goto _output_error;
+			}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
+			if (length <= (size_t)(lowPrefix - match)) {
+			/*
+			 * match can be copied as a single segment
+			 * from external dictionary
+			 */
+			memmove(op, dictEnd - (lowPrefix - match), length);
+			op += length;
+			} else {
+				/*
+				 * match encompass external
+				 * dictionary and current block
+				 */
+				size_t const copySize = (size_t)(lowPrefix - match);
+				size_t const restSize = length - copySize;
+
+				memcpy(op, dictEnd - copySize, copySize);
+				op += copySize;
+
+				if (restSize > (size_t)(op - lowPrefix)) {
+					/* overlap copy */
+					BYTE * const endOfMatch = op + restSize;
+					const BYTE *copyFrom = lowPrefix;
+
+					while (op < endOfMatch)
+						*op++ = *copyFrom++;
+				} else {
+					memcpy(op, lowPrefix, restSize);
+					op += restSize;
+				}
+			}
+			continue;
+		}
+
+		/* copy match within block */
+		cpy = op + length;
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
 		} else {
-			LZ4_COPYSTEP(ref, op);
+			LZ4_copy8(op, match); match += 8;
 		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
 
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (cpy > oend - LASTLITERALS) {
+				/*
+				 * Error : last LASTLITERALS bytes
+				 * must be literals (uncompressed)
+				 */
 				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+			}
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
 			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
+
 	/* end of decoding */
-	return (int) (((char *)ip) - source);
+	if (endOnInput) {
+		/* Nb of output bytes decoded */
+		return (int) (((char *)op) - dest);
+	} else {
+		/* Nb of input bytes read */
+		return (int) (((const char *)ip) - source);
+	}
 
-	/* write overflow error detected */
+	/* Overflow error detected */
 _output_error:
 	return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, full, 0,
+		noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe);
 
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, partial,
+		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
 
-	/* Main Loop */
-	while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+	return LZ4_decompress_generic(source, dest, 0, originalSize,
+		endOnOutputSize, full, 0, withPrefix64k,
+		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast);
 
-		unsigned token;
-		size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+	lz4sd->prefixSize = (size_t) dictSize;
+	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+	lz4sd->externalDict = NULL;
+	lz4sd->extDictSize	= 0;
+	return 1;
+}
+EXPORT_SYMBOL(LZ4_setStreamDecode);
 
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize,
+			maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict,
+			lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += result;
+		lz4sd->prefixEnd	+= result;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = result;
+		lz4sd->prefixEnd	= (BYTE *)dest + result;
+	}
 
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
+	return result;
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict,
+			lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict, lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += originalSize;
+		lz4sd->prefixEnd	+= originalSize;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = originalSize;
+		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
 	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
 
-	/* write overflow error detected */
-_output_error:
-	return -1;
+	return result;
 }
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static inline int LZ4_decompress_usingDict_generic(const char *source,
+	char *dest, int compressedSize, int maxOutputSize, int safe,
+	const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int input_len = 0;
-
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+	if (dictSize == 0)
+		return LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize, safe, full, 0,
+			noDict, (BYTE *)dest, NULL, 0);
+	if (dictStart + dictSize == dest) {
+		if (dictSize >= (int)(64 * KB - 1))
+			return LZ4_decompress_generic(source, dest,
+				compressedSize, maxOutputSize, safe, full, 0,
+				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+		return LZ4_decompress_generic(source, dest, compressedSize,
+			maxOutputSize, safe, full, 0, noDict,
+			(BYTE *)dest - dictSize, NULL, 0);
+	}
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxOutputSize, safe, full, 0, usingExtDict,
+		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-	return 0;
-exit_0:
-	return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxOutputSize,
+	const char *dictStart, int dictSize)
+{
+	return LZ4_decompress_usingDict_generic(source, dest,
+		compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+	return LZ4_decompress_usingDict_generic(source, dest, 0,
+		originalSize, 0, dictStart, dictSize);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_decompress_unknownoutputsize(const unsigned char *src,
+	size_t src_len, unsigned char *dest, size_t *dest_len) {
+	 *dest_len = LZ4_decompress_safe(src, dest,
+		 (int)src_len, (int)((size_t)dest_len));
+
+		/*
+		 * Prior lz4_decompress_unknownoutputsize will return
+		 * 0 for success and a negative result for error
+		 * new LZ4_decompress_safe returns
+		 * - the length of data read on success
+		 * - and also a negative result on error
+		 * meaning when result > 0, we just return 0 here
+		 */
+		if (src_len > 0) {
+			return 0;
+		} else
+			return -1;
 }
-#ifndef STATIC
 EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len) {
+	*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
+
+	/*
+	 * Prior lz4_decompress will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_fast returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if ((int)((size_t)src_len) > 0)
+		return 0;
+	else
+		return -1;
+}
+EXPORT_SYMBOL(lz4_decompress);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..b0d21b5 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,219 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+#if defined(__x86_64__)
+	typedef U64		reg_t;	 /* 64-bits in x32 mode */
+#else
+	typedef size_t reg_t;	 /* 32-bits in x32 mode */
 #endif
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB (1<<10)
+#define MB (1<<20)
+#define GB (1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1<<MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+
+static inline U16 LZ4_read16(const void *memPtr)
+{
+		U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline U32 LZ4_read32(const void *memPtr)
+{
+		U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void *memPtr)
+{
+		size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline void LZ4_write16(void *memPtr, U16 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void *memPtr, U32 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void *memPtr)
+{
+#ifdef __LITTLE_ENDIAN__
+		return LZ4_read16(memPtr);
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+		const BYTE *p = (const BYTE *)memPtr;
+
+		return (U16)((U16)p[0] + (p[1] << 8));
 #endif
+}
+
+static inline void LZ4_writeLE16(void *memPtr, U16 value)
+{
+#ifdef __LITTLE_ENDIAN__
+		LZ4_write16(memPtr, value);
+#else
+		BYTE *p = (BYTE *)memPtr;
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+		p[0] = (BYTE) value;
+		p[1] = (BYTE)(value>>8);
+#endif
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_copy8(void *dst, const void *src)
+{
+		memcpy(dst, src, 8);
+}
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+		BYTE *d = (BYTE *)dstPtr;
+		const BYTE *s = (const BYTE *)srcPtr;
+		BYTE *const e = (BYTE *)dstEnd;
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+		do { LZ4_copy8(d, s); d += 8; s += 8; } while (d < e);
+}
 
-#ifdef __BIG_ENDIAN
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN__
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#endif
+#else
+#ifdef __BIG_ENDIAN__
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
+#endif
 
+static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+		pIn += LZ4_NBCOMMONBYTES(diff);
+		return (unsigned int)(pIn - pStart);
+	}
+
+#ifdef LZ4_ARCH64
+	if ((pIn < (pInLimit-3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4; pMatch += 4;
+	}
 #endif
+	if ((pIn < (pInLimit-1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2; pMatch += 2;
+	}
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+	return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..880c778 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,353 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *	Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+ *	Local Constants and types
+ ***************************************/
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
+	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+	return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *	HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+	hc4->nextToUpdate = 64 * KB;
+	hc4->base = start - 64 * KB;
+	hc4->end = start;
+	hc4->dictBase = start - 64 * KB;
+	hc4->dictLimit = 64 * KB;
+	hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+	const BYTE *ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const hashTable	= hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
+	U32 const target = (U32)(ip - base);
+	U32 idx = hc4->nextToUpdate;
+
+	while (idx < target) {
+		U32 const h = LZ4HC_hashPtr(base + idx);
+		size_t delta = idx - hashTable[h];
 
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
 		if (delta > MAX_DISTANCE)
 			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
-{
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
+		DELTANEXTU16(idx) = (U16)delta;
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
+		hashTable[h] = idx;
+		idx++;
 	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+
+	hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static inline int LZ4HC_InsertAndFindBestMatch(
+	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+	const BYTE *ip,
+	const BYTE * const iLimit,
+	const BYTE **matchpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
+	const BYTE * const dictBase = hc4->dictBase;
+	const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = 0;
 
 	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE * const match = base + matchIndex;
+
+			if (*(match + ml) == *(ip + ml)
+				&& (LZ4_read32(match) == LZ4_read32(ip))) {
+				size_t const mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, iLimit) + MINMATCH;
 
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
 				if (mlt > ml) {
 					ml = mlt;
-					*matchpos = ref;
+					*matchpos = match;
+				}
+			}
+		} else {
+			const BYTE * const match = dictBase + matchIndex;
+
+			if (LZ4_read32(match) == LZ4_read32(ip)) {
+				size_t mlt;
+				const BYTE *vLimit = ip
+					+ (dictLimit - matchIndex);
+
+				if (vLimit > iLimit)
+					vLimit = iLimit;
+				mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, vLimit) + MINMATCH;
+				if ((ip + mlt == vLimit)
+					&& (vLimit < iLimit))
+					mlt += LZ4_count(ip + mlt,
+						base + dictLimit,
+						iLimit);
+				if (mlt > ml) {
+					/* virtual matchpos */
+					ml = mlt;
+					*matchpos = base + matchIndex;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
 
 	return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_InsertAndGetWiderMatch(
+	LZ4HC_CCtx_internal *hc4,
+	const BYTE * const ip,
+	const BYTE * const iLowLimit,
+	const BYTE * const iHighLimit,
+	int longest,
+	const BYTE **matchpos,
+	const BYTE **startpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
+	const U32 dictLimit = hc4->dictLimit;
+	const BYTE * const lowPrefixPtr = base + dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	const BYTE * const dictBase = hc4->dictBase;
+	U32	 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	int delta = (int)(ip - iLowLimit);
 
 	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE *matchPtr = base + matchIndex;
+
+			if (*(iLowLimit + longest)
+				== *(matchPtr - delta + longest)) {
+				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+					int mlt = MINMATCH +
+						LZ4_count(ip + MINMATCH,
+							matchPtr + MINMATCH,
+							iHighLimit);
+					int back = 0;
+
+					while ((ip + back > iLowLimit)
+							 && (matchPtr + back > lowPrefixPtr)
+							 && (ip[back - 1] == matchPtr[back - 1]))
+						back--;
+
+					mlt -= back;
+
+					if (mlt > longest) {
+						longest = (int)mlt;
+						*matchpos = matchPtr + back;
+						*startpos = ip + back;
 					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
-				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
 				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
+			}
+		} else {
+			const BYTE * const matchPtr = dictBase + matchIndex;
+
+			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+				size_t mlt;
+				int back = 0;
+				const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+				if (vLimit > iHighLimit)
+					vLimit = iHighLimit;
+
+				mlt = LZ4_count(ip + MINMATCH,
+					matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+					mlt += LZ4_count(ip + mlt, base + dictLimit,
+						iHighLimit);
+				while ((ip + back > iLowLimit)
+					&& (matchIndex + back > lowLimit)
+					&& (ip[back - 1] == matchPtr[back - 1]))
+					back--;
+				mlt -= back;
+				if ((int)mlt > longest) {
+					longest = (int)mlt;
+					*matchpos = base + matchIndex + back;
+					*startpos = ip + back;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
+
 	return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static inline int LZ4HC_encodeSequence(
+	const BYTE **ip,
+	BYTE **op,
+	const BYTE **anchor,
+	int matchLength,
+	const BYTE * const match,
+	limitedOutput_directive limitedOutputBuffer,
+	BYTE *oend)
 {
-	int length, len;
-	u8 *token;
+	int length;
+	BYTE *token;
 
 	/* Encode Literal length */
 	length = (int)(*ip - *anchor);
 	token = (*op)++;
+
+	if ((limitedOutputBuffer)
+		&& ((*op + (length>>8)
+			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
 	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
+		int len;
+
+		*token = (RUN_MASK<<ML_BITS);
 		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
+		for (; len > 254 ; len -= 255) {
 			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
+			*(*op)++ = (BYTE)len;
+		}
 	} else
-		*token = (length << ML_BITS);
+		*token = (BYTE)(length<<ML_BITS);
 
 	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
+	LZ4_wildCopy(*op, *anchor, (*op) + length);
+	*op += length;
 
 	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+	LZ4_writeLE16(*op, (U16)(*ip - match)); *op += 2;
 
 	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
+	length = (int)(matchLength - MINMATCH);
+	if ((limitedOutputBuffer)
+		&& (*op + (length>>8)
+			+ (1 + LASTLITERALS) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
+	if (length >= (int)ML_MASK) {
 		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
+		length -= ML_MASK;
+		for (; length > 509 ; length -= 510) {
 			*(*op)++ = 255;
 			*(*op)++ = 255;
 		}
-		if (len > 254) {
-			len -= 255;
+		if (length > 254) {
+			length -= 255;
 			*(*op)++ = 255;
 		}
-		*(*op)++ = (u8)len;
+		*(*op)++ = (BYTE)length;
 	} else
-		*token += len;
+		*token += (BYTE)(length);
 
 	/* Prepare next loop */
-	*ip += ml;
+	*ip += matchLength;
 	*anchor = *ip;
 
 	return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+static int LZ4HC_compress_generic(
+	LZ4HC_CCtx_internal *const ctx,
+	const char * const source,
+	char * const dest,
+	int const inputSize,
+	int const maxOutputSize,
+	int compressionLevel,
+	limitedOutput_directive limit
+	)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-	u8 *op = (u8 *)dest;
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + maxOutputSize;
 
+	unsigned int maxNbAttempts;
 	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
+	const BYTE *ref = NULL;
+	const BYTE *start2 = NULL;
+	const BYTE *ref2 = NULL;
+	const BYTE *start3 = NULL;
+	const BYTE *ref3 = NULL;
+	const BYTE *start0;
+	const BYTE *ref0;
+
+	/* init */
+	if (compressionLevel > LZ4HC_MAX_CLEVEL)
+		compressionLevel = LZ4HC_MAX_CLEVEL;
+	if (compressionLevel < 1)
+		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+	maxNbAttempts = 1 << (compressionLevel - 1);
+	ctx->end += inputSize;
 
 	ip++;
 
 	/* Main Loop */
 	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+			matchlimit, (&ref), maxNbAttempts);
 		if (!ml) {
 			ip++;
 			continue;
@@ -351,46 +379,52 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		start0 = ip;
 		ref0 = ref;
 		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+		if (ip + ml < mflimit)
+			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				ip + ml - 2, ip + 0,
+				matchlimit, ml, &ref2,
+				&start2, maxNbAttempts);
 		else
 			ml2 = ml;
-		/* No better match */
+
 		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			/* No better match */
+			if (LZ4HC_encodeSequence(&ip, &op,
+				&anchor, ml, ref, limit, oend))
+				return 0;
 			continue;
 		}
 
 		if (start0 < ip) {
-			/* empirical */
 			if (start2 < ip + ml0) {
+				/* empirical */
 				ip = start0;
 				ref = ref0;
 				ml = ml0;
 			}
 		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
+
+		/* Here, start0 == ip */
 		if ((start2 - ip) < 3) {
+			/* First Match too small : removed */
 			ml = ml2;
 			ip = start2;
 			ref = ref2;
-			goto _search2;
+			goto _Search2;
 		}
 
-_search3:
+_Search3:
 		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
+		* Currently we have :
+		* ml2 > ml1, and
+		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
+		*/
 		if ((start2 - ip) < OPTIMAL_ML) {
 			int correction;
 			int new_ml = ml;
+
 			if (new_ml > OPTIMAL_ML)
 				new_ml = OPTIMAL_ML;
 			if (ip + new_ml > start2 + ml2 - MINMATCH)
@@ -403,39 +437,44 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			}
 		}
 		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
+		 * Now, we have start2 = ip + new_ml,
+		 * with new_ml = min(ml, OPTIMAL_ML = 18)
 		 */
+
 		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
+			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				start2 + ml2 - 3, start2,
+				matchlimit, ml2, &ref3, &start3,
+				maxNbAttempts);
 		else
 			ml3 = ml2;
 
-		/* No better match : 2 sequences to encode */
 		if (ml3 == ml2) {
+			/* No better match : 2 sequences to encode */
 			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
+			if (start2 < ip + ml)
 				ml = (int)(start2 - ip);
-
 			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml, ref, limit, oend))
+				return 0;
 			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml2, ref2, limit, oend))
+				return 0;
 			continue;
 		}
 
-		/* Not enough space for match 2 : remove it */
 		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
+			/* Not enough space for match 2 : remove it */
 			if (start3 >= (ip + ml)) {
+				/* can write Seq1 immediately
+				 * ==> Seq2 is removed,
+				 * so Seq3 becomes Seq1
+				 */
 				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
+					int correction = (int)(ip + ml - start2);
+
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -446,35 +485,38 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 					}
 				}
 
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
+				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+					ml, ref, limit, oend))
+					return 0;
+				ip	= start3;
 				ref = ref3;
-				ml  = ml3;
+				ml	= ml3;
 
 				start0 = start2;
 				ref0 = ref2;
 				ml0 = ml2;
-				goto _search2;
+				goto _Search2;
 			}
 
 			start2 = start3;
 			ref2 = ref3;
 			ml2 = ml3;
-			goto _search3;
+			goto _Search3;
 		}
 
 		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
+		* OK, now we have 3 ascending matches;
+		* let's write at least the first one
+		* ip & ref are known; Now for ml
+		*/
 		if (start2 < ip + ml) {
 			if ((start2 - ip) < (int)ML_MASK) {
 				int correction;
+
 				if (ml > OPTIMAL_ML)
 					ml = OPTIMAL_ML;
 				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
+					ml = (int)(start2 - ip) + ml2 - MINMATCH;
 				correction = ml - (int)(start2 - ip);
 				if (correction > 0) {
 					start2 += correction;
@@ -484,7 +526,9 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			} else
 				ml = (int)(start2 - ip);
 		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+			ref, limit, oend))
+			return 0;
 
 		ip = start2;
 		ref = ref2;
@@ -494,46 +538,243 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		ref2 = ref3;
 		ml2 = ml3;
 
-		goto _search3;
+		goto _Search3;
 	}
 
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{ int lastRun = (int)(iend - anchor);
+
+		if ((limit)
+			&& (((char *)op - dest) + lastRun + 1
+				+ ((lastRun + 255 - RUN_MASK)/255)
+					> (U32)maxOutputSize)) {
+			/* Check output limit */
+			return 0;
+		}
+		if (lastRun >= (int)RUN_MASK) {
+			*op++ = (RUN_MASK<<ML_BITS);
+			lastRun -= RUN_MASK;
+			for (; lastRun > 254 ; lastRun -= 255) {
+				*op++ = 255;
+				*op++ = (BYTE) lastRun;
+			}
+		} else
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		memcpy(op, anchor, iend - anchor);
+		op += iend - anchor;
+	}
+
 	/* End */
 	return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_HC_extStateHC(
+	void *state,
+	const char *src,
+	char *dst,
+	int srcSize,
+	int maxDstSize,
+	int compressionLevel)
 {
-	int ret = -1;
-	int out_len = 0;
+	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+		/* Error : state is not aligned
+		 * for pointers (32 or 64 bits)
+		 */
+		return 0;
+	}
 
-	if (out_len < 0)
-		goto exit;
+	LZ4HC_init(ctx, (const BYTE *)src);
 
-	*dst_len = out_len;
-	return 0;
+	if (maxDstSize < LZ4_compressBound(srcSize))
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, limitedOutput);
+	else
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, noLimit);
+}
 
-exit:
-	return ret;
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+	int maxDstSize, int compressionLevel, void *wrkmem)
+{
+	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+		srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+	unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	*dst_len = LZ4_compress_HC(src, dst, (int)src_len,
+		(int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
+
+	/*
+	 * Prior lz4hc_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_HC
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4hc_compress);
 
+/**************************************
+ *	Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+	LZ4_streamHCPtr->internal_donotuse.base = NULL;
+	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *dictionary,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	if (dictSize > 64 * KB) {
+		dictionary += dictSize - 64 * KB;
+		dictSize = 64 * KB;
+	}
+	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+	if (dictSize >= 4)
+		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+	ctxPtr->end = (const BYTE *)dictionary + dictSize;
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+	LZ4HC_CCtx_internal *ctxPtr,
+	const BYTE *newBlock)
+{
+	if (ctxPtr->end >= ctxPtr->base + 4) {
+		/* Referencing remaining dictionary content */
+		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+	}
+
+	/*
+	 * Only one memory segment for extDict,
+	 * so any previous extDict is lost at this stage
+	 */
+	ctxPtr->lowLimit	= ctxPtr->dictLimit;
+	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+	ctxPtr->dictBase	= ctxPtr->base;
+	ctxPtr->base = newBlock - ctxPtr->dictLimit;
+	ctxPtr->end	= newBlock;
+	/* match referencing will resume from there */
+	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	limitedOutput_directive limit)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	/* auto - init if forgotten */
+	if (ctxPtr->base == NULL)
+		LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+	/* Check overflow */
+	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+			- ctxPtr->dictLimit;
+		if (dictSize > 64 * KB)
+			dictSize = 64 * KB;
+		LZ4_loadDictHC(LZ4_streamHCPtr,
+			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+	}
+
+	/* Check if blocks follow each other */
+	if ((const BYTE *)source != ctxPtr->end)
+		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+	/* Check overlapping input/dictionary space */
+	{ const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+		if ((sourceEnd > dictBegin)
+			&& ((const BYTE *)source < dictEnd)) {
+			if (sourceEnd > dictEnd)
+				sourceEnd = dictEnd;
+			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+				ctxPtr->lowLimit = ctxPtr->dictLimit;
+		}
+	}
+
+	return LZ4HC_compress_generic(ctxPtr, source, dest,
+		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize)
+{
+	if (maxOutputSize < LZ4_compressBound(inputSize))
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, limitedOutput);
+	else
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	char *safeBuffer,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+	int const prefixSize = (int)(streamPtr->end
+		- (streamPtr->base + streamPtr->dictLimit));
+
+	if (dictSize > 64 * KB)
+		dictSize = 64 * KB;
+	if (dictSize < 4)
+		dictSize = 0;
+	if (dictSize > prefixSize)
+		dictSize = prefixSize;
+
+	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+	{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+		streamPtr->base = streamPtr->end - endIndex;
+		streamPtr->dictLimit = endIndex - dictSize;
+		streamPtr->lowLimit = endIndex - dictSize;
+
+		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+			streamPtr->nextToUpdate = streamPtr->dictLimit;
+	}
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDictHC);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v5 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 1/5] lib: " Sven Schmidt
@ 2017-01-26  7:57   ` Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 3/5] crypto: Change LZ4 modules " Sven Schmidt
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-26  7:57 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates the unlz4 wrapper to work with the
updated LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v5 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 1/5] lib: " Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-01-26  7:57   ` Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-26  7:57 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates the crypto modules using LZ4 compression
to work with the new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c   | 21 ++++++++-------------
 crypto/lz4hc.c | 21 ++++++++-------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..40fd2c2 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_default(src, dst,
+		slen, (int)((size_t)dlen), ctx);
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..6f16f96 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_HC(src, dst, slen,
+		(int)((size_t)dlen), LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (out_len == 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v5 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
                     ` (2 preceding siblings ...)
  2017-01-26  7:57   ` [PATCH v5 3/5] crypto: Change LZ4 modules " Sven Schmidt
@ 2017-01-26  7:57   ` Sven Schmidt
  2017-01-26  7:57   ` [PATCH v5 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
  2017-01-26  9:19   ` [PATCH v5 0/5] Update LZ4 compressor module Eric Biggers
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-26  7:57 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates fs/pstore and fs/squashfs to use the updated
functions from the new LZ4 module.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 14 +++++++-------
 fs/squashfs/lz4_wrapper.c | 12 ++++++------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..85c4c58 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,31 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
-	if (ret) {
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+	if (!ret) {
 		pr_err("lz4_compress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-	if (ret) {
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
+	if (ret < 0) {
 		pr_err("lz4_decompress error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..95da653 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t dest_len = output->length;
 
 	for (i = 0; i < b; i++) {
 		avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
-	if (res)
+	res = LZ4_decompress_safe(stream->input, stream->output,
+		length, output->length);
+
+	if (res < 0)
 		return -EIO;
 
-	bytes = dest_len;
+	bytes = res;
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	}
 	squashfs_finish_page(output);
 
-	return dest_len;
+	return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v5 5/5] lib/lz4: Remove back-compat wrappers
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
                     ` (3 preceding siblings ...)
  2017-01-26  7:57   ` [PATCH v5 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
@ 2017-01-26  7:57   ` Sven Schmidt
  2017-01-26  9:19   ` [PATCH v5 0/5] Update LZ4 compressor module Eric Biggers
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-26  7:57 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch removes the functions introduced as wrappers for providing
backwards compatibility to the prior LZ4 version.
They're not needed anymore since there's no callers left.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      | 73 ------------------------------------------------
 lib/lz4/lz4_compress.c   | 22 ---------------
 lib/lz4/lz4_decompress.c | 42 ----------------------------
 lib/lz4/lz4hc_compress.c | 23 ---------------
 4 files changed, 160 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 2072255..5958f7d 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -173,14 +173,6 @@ static inline int LZ4_compressBound(size_t isize)
 }
 
 /*
- * For backward compatibility
- */
-static inline int lz4_compressbound(size_t isize)
-{
-	return LZ4_COMPRESSBOUND(isize);
-}
-
-/*
  * LZ4_compress_default()
  *	Compresses 'sourceSize' bytes from buffer 'source'
  *	into already allocated 'dest' buffer of size 'maxOutputSize'.
@@ -249,23 +241,6 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize,
 int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
 	int targetDestSize, void *wrkmem);
 
-/*
- * lz4_compress()
- *	src    : source address of the original data
- *	src_len: size of the original data
- *	dst    : output buffer address of the compressed data
- *           This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len: is the output size, which is returned after compress done
- *	workmem: address of the working memory.
- *           This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return	: Success if return 0
- *            Error if return (< 0)
- *	note :	Destination buffer and workmem must be already allocated with
- *		the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
 /*-************************************************************************
  *	Decompression Functions
  **************************************************************************/
@@ -340,37 +315,6 @@ int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
 int LZ4_decompress_safe_partial(const char *source, char *dest,
 	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
-
-/*
- * lz4_decompress_unknownoutputsize() :
- *	src     : source address of the compressed data
- *	src_len : is the input size, therefore the compressed size
- *	dest    : output buffer address of the decompressed data
- *	dest_len: is the max size of the destination buffer, which is
- *            returned with actual size of decompressed data after
- *            decompress done
- * return: Success if return 0
- *         Error if return (< 0)
- * note:   Destination buffer must be already allocated.
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-	unsigned char *dest, size_t *dest_len);
-
-/*
- * lz4_decompress() :
- *	src            : source address of the compressed data
- *	src_len        : is the input size,
- *                   which is returned after decompress done
- *	dest           : output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return: Success if return 0
- *          Error if return (< 0)
- *	note  : Destination buffer must be already allocated.
- *          slightly faster than lz4_decompress_unknownoutputsize()
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len);
-
 /*-************************************************************************
  *	LZ4 HC Compression
  **************************************************************************/
@@ -399,23 +343,6 @@ int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
 	int compressionLevel, void *wrkmem);
 
 /*
- * lz4hc_compress()
- *	src    : source address of the original data
- *	src_len: size of the original data
- *	dst    : output buffer address of the compressed data
- *           This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len: is the output size, which is returned after compress done
- *	workmem: address of the working memory.
- *           This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
- * return	: Success if return 0
- *          Error if return (< 0)
- * note   : Destination buffer and workmem must be already allocated with
- *          the defined size.
- */
-int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
-/*
  *	These functions compress data in successive blocks of any size,
  *	using previous blocks as dictionary. One key assumption is that previous
  *	blocks (up to 64 KB) remain read-accessible while
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index e595896..b625226 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -872,27 +872,5 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 }
 EXPORT_SYMBOL(LZ4_compress_fast_continue);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem) {
-	*dst_len = LZ4_compress_default(src, dst, (int)src_len,
-		(int)((size_t)dst_len), wrkmem);
-
-	/*
-	 * Prior lz4_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_fast/default
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4_compress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index ca71375..053d09d 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -483,47 +483,5 @@ int LZ4_decompress_fast_usingDict(const char *source, char *dest,
 }
 EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_decompress_unknownoutputsize(const unsigned char *src,
-	size_t src_len, unsigned char *dest, size_t *dest_len) {
-	 *dest_len = LZ4_decompress_safe(src, dest,
-		 (int)src_len, (int)((size_t)dest_len));
-
-		/*
-		 * Prior lz4_decompress_unknownoutputsize will return
-		 * 0 for success and a negative result for error
-		 * new LZ4_decompress_safe returns
-		 * - the length of data read on success
-		 * - and also a negative result on error
-		 * meaning when result > 0, we just return 0 here
-		 */
-		if (src_len > 0) {
-			return 0;
-		} else
-			return -1;
-}
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
-
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len) {
-	*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
-
-	/*
-	 * Prior lz4_decompress will return
-	 * 0 for success and a negative result for error
-	 * new LZ4_decompress_fast returns
-	 * - the length of data read on success
-	 * - and also a negative result on error
-	 * meaning when result > 0, we just return 0 here
-	 */
-	if ((int)((size_t)src_len) > 0)
-		return 0;
-	else
-		return -1;
-}
-EXPORT_SYMBOL(lz4_decompress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index 880c778..a29dad0 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -603,29 +603,6 @@ int LZ4_compress_HC(const char *src, char *dst, int srcSize,
 }
 EXPORT_SYMBOL(LZ4_compress_HC);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-	unsigned char *dst, size_t *dst_len, void *wrkmem)
-{
-	*dst_len = LZ4_compress_HC(src, dst, (int)src_len,
-		(int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
-
-	/*
-	 * Prior lz4hc_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_HC
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4hc_compress);
-
 /**************************************
  *	Streaming Functions
  **************************************/
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH v5 0/5] Update LZ4 compressor module
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
                     ` (4 preceding siblings ...)
  2017-01-26  7:57   ` [PATCH v5 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
@ 2017-01-26  9:19   ` Eric Biggers
  2017-01-26 14:15     ` Sven Schmidt
  5 siblings, 1 reply; 104+ messages in thread
From: Eric Biggers @ 2017-01-26  9:19 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck

On Thu, Jan 26, 2017 at 08:57:30AM +0100, Sven Schmidt wrote:
> 
> This patchset is for updating the LZ4 compression module to a version based
> on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> which provides an "acceleration" parameter as a tradeoff between
> high compression ratio and high compression speed.
> 
> We want to use LZ4 fast in order to support compression in lustre
> and (mostly, based on that) investigate data reduction techniques in behalf of
> storage systems.
> 
> Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> it is possible to enable applications to use fast and/or high compression
> depending on the usecase.
> For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> LZ4 in the kernel.
> 

Hi Sven,

[For some reason I didn't receive patch 1/5 and had to get it from patchwork...
I'm not sure why.  I'm subscribed to linux-crypto but not linux-kernel.]

The proposed patch defines LZ4_MEMORY_USAGE to 10 which means that LZ4
compression will use a hash table of only 1024 bytes, containing only 256
entries, to find matches.  This differs from upstream LZ4 1.7.3, which uses
LZ4_MEMORY_USAGE of 14, as well as the previous LZ4 included in the Linux
kernel, both of which specify the hash table size to be 16384 bytes, containing
4096 entries.

Given that varying the hash table size is a trade-off between memory usage,
speed, and compression ratio, is this an intentional difference and has it been
benchmarked?

Also, in lz4defs.h:

> #if defined(__x86_64__)
>        typedef U64             reg_t;   /* 64-bits in x32 mode */
> #else
>        typedef size_t reg_t;    /* 32-bits in x32 mode */
> #endif

Are you sure this really needed over just always using size_t?

> #if LZ4_ARCH64
> #ifdef __BIG_ENDIAN__
> #define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> #else
> #define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> #endif
> #else
> #ifdef __BIG_ENDIAN__
> #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
> #else
> #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
> #endif
> #endif

LZ4_NBCOMMONBYTES() is defined incorrectly for 64-bit little endian; it should
be using __builtin_ctzll().

Nit: can you also clean up the weird indentation (e.g. double tabs) in
lz4defs.h?

Thanks,

Eric

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v5 0/5] Update LZ4 compressor module
  2017-01-26  9:19   ` [PATCH v5 0/5] Update LZ4 compressor module Eric Biggers
@ 2017-01-26 14:15     ` Sven Schmidt
  0 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-26 14:15 UTC (permalink / raw)
  To: Eric Biggers
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck

On Thu, Jan 26, 2017 at 01:19:53AM -0800, Eric Biggers wrote:
> On Thu, Jan 26, 2017 at 08:57:30AM +0100, Sven Schmidt wrote:
> > 
> > This patchset is for updating the LZ4 compression module to a version based
> > on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> > which provides an "acceleration" parameter as a tradeoff between
> > high compression ratio and high compression speed.
> > 
> > We want to use LZ4 fast in order to support compression in lustre
> > and (mostly, based on that) investigate data reduction techniques in behalf of
> > storage systems.
> > 
> > Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> > it is possible to enable applications to use fast and/or high compression
> > depending on the usecase.
> > For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> > LZ4 in the kernel.
> > 
>

Hey Eric,
 
> Hi Sven,
> 
> [For some reason I didn't receive patch 1/5 and had to get it from patchwork...
> I'm not sure why.  I'm subscribed to linux-crypto but not linux-kernel.]

that's weird. I just experienced the first patch takes a little longer to get delivered because of its size.
Please let me know if the problem occurs again.
 
> The proposed patch defines LZ4_MEMORY_USAGE to 10 which means that LZ4
> compression will use a hash table of only 1024 bytes, containing only 256
> entries, to find matches.  This differs from upstream LZ4 1.7.3, which uses
> LZ4_MEMORY_USAGE of 14, as well as the previous LZ4 included in the Linux
> kernel, both of which specify the hash table size to be 16384 bytes, containing
> 4096 entries.
> 
> Given that varying the hash table size is a trade-off between memory usage,
> speed, and compression ratio, is this an intentional difference and has it been
> benchmarked?
> 

I believe I had some troubles with LZ4_MEMORY_USAGE of 14. But I may be wrong.
I will test that again and eventually adapt that value.

> Also, in lz4defs.h:
> 
> > #if defined(__x86_64__)
> >        typedef U64             reg_t;   /* 64-bits in x32 mode */
> > #else
> >        typedef size_t reg_t;    /* 32-bits in x32 mode */
> > #endif
> 
> Are you sure this really needed over just always using size_t?
>

No, actually there's just one use of that value and the upstream version uses size_t instead of reg_t 
in that particular place. So I will replace it with size_t.
 
> > #if LZ4_ARCH64
> > #ifdef __BIG_ENDIAN__
> > #define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> > #else
> > #define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> > #endif
> > #else
> > #ifdef __BIG_ENDIAN__
> > #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
> > #else
> > #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
> > #endif
> > #endif
> 
> LZ4_NBCOMMONBYTES() is defined incorrectly for 64-bit little endian; it should
> be using __builtin_ctzll().
> 

Indeed! Using the same values in if and else does not make sense at all.
Thank you for pointing that one out. I will fix it.

> Nit: can you also clean up the weird indentation (e.g. double tabs) in
> lz4defs.h?
> 
> Thanks,
> 
> Eric
> 

I'm wondering why checkpatch does not point out this kind of styling problem?
I did fix that in the other files but I think I missed lz4defs.h. Will fix the indentation.

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v6 0/5] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (7 preceding siblings ...)
  2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
@ 2017-01-27 22:01 ` Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 1/5] lib: " Sven Schmidt
                     ` (4 more replies)
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
  2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
  10 siblings, 5 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-27 22:01 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version

[PATCH 1/5] lib: Update LZ4 compressor module
[PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
[PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
[PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
[PATCH 5/5] lib/lz4: Remove back-compat wrappers

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

v3:
- Adjusted the code to satisfy kernel coding style (checkpatch.pl)
- Made sure the changes to LZ4 in Kernel (overflow checks etc.)
  are included in the new module (they are)
- Removed the second LZ4_compressBound function with related name but
  different return type
- Corrected version number (was LZ4 1.7.3)
- Added missing LZ4 streaming functions

v4:
- Fixed kbuild errors
- Re-added lz4_compressbound as alias for LZ4_compressBound
  to ensure backwards compatibility
- Wrapped LZ4_hash5 with check for LZ4_ARCH64 since it is only used there
  and triggers an unused function warning when false

v5:
- Added a fifth patch to remove the back-compat wrappers introduced
  to ensure bisectibility between the patches (the functions are no longer
  needed since there's no callers left)

v6:
- Fixed LZ4_NBCOMMONBYTES() for 64-bit little endian
- Reset LZ4_MEMORY_USAGE to 14 (which is the value used in
  upstream LZ4 as well as the previous kernel module)
- Fixed that weird double-indentation in lz4defs.h and lz4.h
- Adjusted general styling issues in lz4defs.h
  (e.g. lines consisting of more than one instruction)
- Removed the architecture-dependent typedef to reg_t
  since upstream LZ4 is just using size_t and that works fine
- Changed error messages in pstore/platform.c:
  * LZ4_compress_default always returns 0 in case of an error
    (no need to print the return value)
  * LZ4_decompress_safe returns a negative error message
    (return value _does_ matter)

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v6 1/5] lib: Update LZ4 compressor module
  2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
@ 2017-01-27 22:02   ` Sven Schmidt
  2017-01-31 22:27     ` Jonathan Corbet
  2017-01-27 22:02   ` [PATCH v6 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-01-27 22:02 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since the patchset
contains appropriate changes.

API changes:

New method LZ4_compress_fast which differs from the variant available
in kernel by the new acceleration parameter,
allowing to trade compression ratio for more compression speed
and vice versa.

LZ4_decompress_fast is the respective decompression method,
featuring a very fast decoder (multiple GB/s per core),
able to reach RAM speed in multi-core systems.
The decompressor allows to decompress data compressed with
LZ4 fast as well as the LZ4 HC (high compression) algorithm.

Also the useful functions LZ4_decompress_safe_partial
LZ4_compress_destsize were added. The latter reverses the logic by trying to
compress as much data as possible from source to dest while the former aims
to decompress partial blocks of data.

A bunch of streaming functions were also added
which allow compressig/decompressing data in multiple steps
(so called "streaming mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize
are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
The old methods will be removed since there's no callers left in the code.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  589 ++++++++++++++++++++++---
 lib/lz4/lz4_compress.c   | 1103 ++++++++++++++++++++++++++++++++--------------
 lib/lz4/lz4_decompress.c |  696 ++++++++++++++++++-----------
 lib/lz4/lz4defs.h        |  334 ++++++++------
 lib/lz4/lz4hc_compress.c |  855 ++++++++++++++++++++++-------------
 5 files changed, 2500 insertions(+), 1077 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..ed59cb9 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,554 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *		* Redistributions of source code must retain the above copyright
+ *		  notice, this list of conditions and the following disclaimer.
+ *		* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL			3
+#define LZ4HC_DEFAULT_CLEVEL	9
+#define LZ4HC_MAX_CLEVEL			16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ */
+typedef struct {
+	uint32_t hashTable[LZ4_HASH_SIZE_U32];
+	uint32_t currentOffset;
+	uint32_t initCheck;
+	const uint8_t *dictionary;
+	uint8_t *bufferStart;
+	uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMSIZE_U64];
+	LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t :
+ * information structure to track an LZ4HC stream.
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
+typedef struct {
+	unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+	unsigned short	 chainTable[LZ4HC_MAXD];
+	/* next block to continue on current prefix */
+	const unsigned char *end;
+	/* All index relative to this position */
+	const unsigned char *base;
+	/* alternate base for extDict */
+	const unsigned char *dictBase;
+	/* below that point, need extDict */
+	unsigned int	 dictLimit;
+	/* below that point, no more dict */
+	unsigned int	 lowLimit;
+	/* index from which to continue dict update */
+	unsigned int	 nextToUpdate;
+	unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+	size_t table[LZ4_STREAMHCSIZE_SIZET];
+	LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
 
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure using LZ4_setStreamDecode
+ * (or memset()) before first use
  */
-static inline size_t lz4_compressbound(size_t isize)
+typedef struct {
+	const uint8_t *externalDict;
+	size_t extDictSize;
+	const uint8_t *prefixEnd;
+	size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+	LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_compressBound() :
+ *	Provides the maximum size that LZ4 may output in a "worst case" scenario
+ *	(input data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
+{
+	return LZ4_COMPRESSBOUND(isize);
+}
+
+/*
+ * For backward compatibility
+ */
+static inline int lz4_compressbound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return LZ4_COMPRESSBOUND(isize);
 }
 
 /*
+ * LZ4_compress_default()
+ *	Compresses 'sourceSize' bytes from buffer 'source'
+ *	into already allocated 'dest' buffer of size 'maxOutputSize'.
+ *	Compression is guaranteed to succeed if
+ *	'maxOutputSize' >= LZ4_compressBound(inputSize).
+ *	It also runs faster, so it's a recommended setting.
+ *	If the function cannot compress 'source'
+ *	into a more limited 'dest' budget,
+ *	compression stops *immediately*,
+ *	and the function result is zero.
+ *	As a consequence, 'dest' content is not valid.
+ *
+ *	source       : source address of the original data
+ *	dest         : output buffer address
+ *                 of the compressed data
+ *	inputSize    : Max supported value is
+ *                 LZ4_MAX_INPUT_SIZE
+ *	maxOutputSize: full or partial size of buffer 'dest'
+ *                 (which must be already allocated)
+ *	workmem      : address of the working memory.
+ *                 This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return       : the number of bytes written into buffer 'dest'
+ *   (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/*
+ * LZ4_compress_fast() :
+ *	Same as LZ4_compress_default(),
+ *	but allows to select an "acceleration" factor.
+ *	The larger the acceleration value,
+ *	the faster the algorithm,
+ *	but also the lesser the compression.
+ *	It's a trade-off. It can be fine tuned,
+ *	with each successive value
+ *	providing roughly +~3% to speed.
+ *	An acceleration value of "1"
+ *	is the same as regular LZ4_compress_default()
+ *	Values <= 0 will be replaced by
+ *	LZ4_ACCELERATION_DEFAULT, which is 1.
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration);
+
+/*
+ * LZ4_compress_destSize()
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * ossible from 'source'.
+ *
+ *	source	      : source address of the original data
+ *	dest	        : output buffer address of the compressed data
+ *	inputSize	    : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	*sourceSizePtr: will be modified to indicate how many bytes where read
+ *                  from 'source' to fill 'dest'.
+ *                  New value is necessarily <= old value.
+ *	workmem       : address of the working memory.
+ *                  This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+			 or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
+
+/*
  * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return	: Success if return 0
+ *            Error if return (< 0)
+ *	note :	Destination buffer and workmem must be already allocated with
  *		the defined size.
  */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
-/*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_decompress_fast()
+ *	source      : source address of the compressed data
+ *	dst         : output buffer address of the decompressed data
+ *	originalSize: is the original and therefore uncompressed size
+ *	return : the number of bytes read from the source buffer
+ *           (in other words, the compressed size)
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           Destination buffer must be already allocated.
+ *           Its size must be a minimum of 'originalSize' bytes.
+ *	note   : This function fully respect memory boundaries
+ *           for properly formed compressed data.
+ *           It is a bit faster than LZ4_decompress_safe().
+ *           However, it does not provide any protection against intentionally
+ *           modified data stream (malicious input).
+ *           Use this function in trusted environment only
+ *           (data to decode comes from a trusted source).
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_safe()
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return : the number of bytes decompressed into destination buffer
+ *           (necessarily <= maxDecompressedSize)
+ *           If destination buffer is not large enough, decoding will stop
+ *           and output an error code (<0).
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           This function is protected against buffer overflow exploits,
+ *           including malicious data packets. It never writes outside
+ *           output buffer, nor reads outside input buffer.
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
 
 /*
- * lz4_decompress_unknownoutputsize()
+ * LZ4_decompress_safe_partial() :
+ * This function decompress a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ *
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	targetOutputSize   : the decompression operation will try
+ *                       to stop as soon as 'targetOutputSize'
+ *                       has been reached
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return  : the number of bytes decoded in the destination buffer
+ *            (necessarily <= maxDecompressedSize)
+ *	Note    : this number can be < 'targetOutputSize' should the compressed
+ *            block to decode be smaller. Always control how many bytes
+ *            were decoded. If the source stream is detected malformed,
+ *            the function will stop decoding and return a negative result.
+ *            This function never writes outside of output buffer,
+ *            and never reads outside of input buffer.
+ *            It is therefore protected against malicious data packets
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/*
+ * lz4_decompress_unknownoutputsize() :
  *	src     : source address of the compressed data
  *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
+ *	dest    : output buffer address of the decompressed data
  *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ *            returned with actual size of decompressed data after
+ *            decompress done
+ * return: Success if return 0
+ *         Error if return (< 0)
+ * note:   Destination buffer must be already allocated.
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/*
+ * lz4_decompress() :
+ *	src            : source address of the compressed data
+ *	src_len        : is the input size,
+ *                   which is returned after decompress done
+ *	dest           : output buffer address of the decompressed data
+ *	actual_dest_len: is the size of uncompressed data, supposing it's known
+ *	return: Success if return 0
+ *          Error if return (< 0)
+ *	note  : Destination buffer must be already allocated.
+ *          slightly faster than lz4_decompress_unknownoutputsize()
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the more powerful
+ * but slower "HC" algorithm. dst` must be already allocated.
+ * Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE
+ *
+ *	src             : source address of the original data
+ *	dst             : output buffer address of the compressed data
+ *	srcSize         : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	dstCapacity     : full or partial size of buffer 'dst'
+ *                    (which must be already allocated)
+ *	compressionLevel: Recommended values are between 4 and 9, although any
+ *                    value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *                    Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *	wrkmem          : address of the working memory.
+ *                    This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *	return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/*
+ * lz4hc_compress()
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ * return	: Success if return 0
+ *          Error if return (< 0)
+ * note   : Destination buffer and workmem must be already allocated with
+ *          the defined size.
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*
+ *	These functions compress data in successive blocks of any size,
+ *	using previous blocks as dictionary. One key assumption is that previous
+ *	blocks (up to 64 KB) remain read-accessible while
+ *	compressing next blocks.
+ *	There is an exception for ring buffers, which can be smaller than 64 KB.
+ *	Ring buffers scenario is automatically detected and handled by
+ *	LZ4_compress_HC_continue().
+ *	Before starting compression, state must be properly initialized,
+ *	using LZ4_resetStreamHC().
+ *	A first "fictional block" can then be designated as initial dictionary,
+ *	using LZ4_loadDictHC() (Optional).
+ *	Then, use LZ4_compress_HC_continue() to compress each successive block.
+ *	Previous memory blocks (including initial dictionary when present) must
+ *	remain accessible and unmodified during compression.
+ *	'dst' buffer should be sized to handle worst case scenarios, using
+ *	LZ4_compressBound(), to ensure operation success.
+ *	If, for any reason, previous data blocks can't be preserved unmodified
+ *	in memory during next compression block,
+ *	you must save it to a safer memory space, using LZ4_saveDictHC().
+ *	Return value of LZ4_saveDictHC() is the size of dictionary
+ *	effectively saved into 'safeBuffer'.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+/*
+ * LZ4_resetStream() :
+ *	An LZ4_stream_t structure can be allocated once
+ *	and re-used multiple times.
+ *	Use this function to init an allocated `LZ4_stream_t` structure
+ *	and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/*
+ * LZ4_loadDict() :
+ *	Use this function to load a static dictionary into LZ4_stream.
+ *	Any previous data will be forgotten, only 'dictionary'
+ *	will remain in memory.
+ *	Loading a size of 0 is allowed.
+ *	Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/*
+ * LZ4_compress_fast_continue() :
+ *	Compress buffer content 'src',
+ *	using data from previously compressed blocks
+ *	as dictionary to improve compression ratio.
+ *	Important : Previous data blocks are assumed to still
+ *	be present and unmodified !
+ *	'dst' buffer must be already allocated.
+ *	If maxDstSize >= LZ4_compressBound(srcSize),
+ *	compression is guaranteed to succeed, and runs faster.
+ *	If not, and if compressed data cannot fit into 'dst' buffer size,
+ *	compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict() :
+ *	If previously compressed data block is not guaranteed
+ *	to remain available at its memory location,
+ *	save it into a safer place (char *safeBuffer).
+ *	Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call
+ *         LZ4_compress_fast_continue().
+ *	Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *           or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/*
+ * LZ4_setStreamDecode() :
+ *	Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *	@return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/*
+ * LZ4_decompress_*_continue() :
+ *	These decoding functions allow decompression of multiple blocks
+ *	in "streaming" mode.
+ *	Previously decoded blocks *must* remain available at the memory position
+ *	where they were decoded (up to 64 KB)
+ *	In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_*_usingDict() :
+ *	These decoding functions work the same as
+ *	a combination of LZ4_setStreamDecode() followed by
+ *	LZ4_decompress_*_continue()
+ *	They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..295c71b 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,417 +22,875 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+/*-******************************
+ *	Compression functions
+ ********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
+	if (tableType == byU16)
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+	else
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - LZ4_HASHLOG));
+}
+
 #if LZ4_ARCH64
-	const BYTE * const base = ip;
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+	const U32 hashLog = (tableType == byU16)
+		? LZ4_HASHLOG + 1
+		: LZ4_HASHLOG;
+
+#ifdef __LITTLE_ENDIAN__
+	static const U64 prime5bytes = 889523592379ULL;
+
+	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-	const int base = 0;
+	static const U64 prime8bytes = 11400714785074694791ULL;
+
+	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+}
+#endif
+
+static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+{
+#if LZ4_ARCH64
+	if (tableType == byU32)
+		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+	return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
+	tableType_t const tableType, const BYTE *srcBase)
+{
+	switch (tableType) {
+	case byPtr:
+	{
+		const BYTE **hashTable = (const BYTE **)tableBase;
+
+		hashTable[h] = p;
+		return;
+	}
+	case byU32:
+	{
+		U32 *hashTable = (U32 *) tableBase;
+
+		hashTable[h] = (U32)(p - srcBase);
+		return;
+	}
+	case byU16:
+	{
+		U16 *hashTable = (U16 *) tableBase;
+
+		hashTable[h] = (U16)(p - srcBase);
+		return;
+	}
+	}
+}
+
+static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	if (tableType == byPtr) {
+		const BYTE **hashTable = (const BYTE **) tableBase;
+
+		return hashTable[h];
+	}
+
+	if (tableType == byU32) {
+		const U32 * const hashTable = (U32 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+
+	{
+		/* default, to ensure a return */
+		const U16 * const hashTable = (U16 *) tableBase;
+		return hashTable[h] + srcBase;
+	}
+}
+
+static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static inline int LZ4_compress_generic(
+	LZ4_stream_t_internal * const dictPtr,
+	const char * const source,
+	char * const dest,
+	const int inputSize,
+	const int maxOutputSize,
+	const limitedOutput_directive outputLimited,
+	const tableType_t tableType,
+	const dict_directive dict,
+	const dictIssue_directive dictIssue,
+	const U32 acceleration)
+{
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *base;
+	const BYTE *lowLimit;
+	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+	const BYTE * const dictionary = dictPtr->dictionary;
+	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+	const size_t dictDelta = dictEnd - (const BYTE *)source;
+	const BYTE *anchor = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const olimit = op + maxOutputSize;
+
+	U32 forwardH;
+	size_t refDelta = 0;
+
+	/* Init conditions */
+	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+		/* Unsupported inputSize, too large (or negative) */
+		return 0;
+	}
+	switch (dict) {
+	case noDict:
+	default:
+		base = (const BYTE *)source;
+		lowLimit = (const BYTE *)source;
+		break;
+	case withPrefix64k:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source - dictPtr->dictSize;
+		break;
+	case usingExtDict:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source;
+		break;
+	}
+
+	if ((tableType == byU16)
+		&& (inputSize >= LZ4_64Klimit)) {
+		/* Size too large (not within 64K limit) */
+		return 0;
+	}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+	if (inputSize < LZ4_minLength) {
+		/* Input too small, no compression (all literals) */
+		goto _last_literals;
+	}
 
 	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
+	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+		{ const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = acceleration
+				<< LZ4_skipTrigger;
+
+			do {
+				U32 const h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h,
+					dictPtr->hashTable,
+					tableType, base);
+				if (dict == usingExtDict) {
+					if (match < (const BYTE *)source) {
+						refDelta = dictDelta;
+						lowLimit = dictionary;
+					} else {
+						refDelta = 0;
+						lowLimit = (const BYTE *)source;
+				}	 }
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+					tableType, base);
+
+			} while (((dictIssue == dictSmall)
+					? (match < lowRefLimit)
+					: 0)
+				|| ((tableType == byU16)
+					? 0
+					: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match + refDelta)
+					!= LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
+		while (((ip > anchor) & (match + refDelta > lowLimit))
+			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
-			ref--;
-		}
+			match--;
+			}
 
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
+		/* Encode Literals */
+		{ unsigned const int litLength = (unsigned int)(ip - anchor);
 
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+			token = op++;
+			if ((outputLimited) &&
+				/* Check output buffer overflow */
+				(unlikely(op + litLength +
+					(2 + 1 + LASTLITERALS) +
+					(litLength/255) > olimit)))
+				return 0;
+			if (litLength >= RUN_MASK) {
+				int len = (int)litLength - RUN_MASK;
+
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
+		{	 unsigned int matchCode;
+
+			if ((dict == usingExtDict)
+				&& (lowLimit == dictionary)) {
+				const BYTE *limit;
+
+				match += refDelta;
+				limit = ip + (dictEnd - match);
+				if (limit > matchlimit)
+					limit = matchlimit;
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, limit);
+				ip += MINMATCH + matchCode;
+				if (ip == limit) {
+					unsigned const int more = LZ4_count(ip,
+						(const BYTE *)source,
+						matchlimit);
+
+					matchCode += more;
+					ip += more;
+				}
+			} else {
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, matchlimit);
+				ip += MINMATCH + matchCode;
 			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
+
+			if (outputLimited &&
+				/* Check output buffer overflow */
+				(unlikely(op +
+					(1 + LASTLITERALS) +
+					(matchCode>>8) > olimit)))
+				return 0;
+			if (matchCode >= ML_MASK) {
+				*token += ML_MASK;
+				matchCode -= ML_MASK;
+				LZ4_write32(op, 0xFFFFFFFF);
+				while (matchCode >= 4*255) {
+					op += 4;
+					LZ4_write32(op, 0xFFFFFFFF);
+					matchCode -= 4*255;
+				}
+				op += matchCode / 255;
+				*op++ = (BYTE)(matchCode % 255);
+			} else
+				*token += (BYTE)(matchCode);
+		}
+
+		anchor = ip;
 
 		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		if (ip > mflimit)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+		match = LZ4_getPosition(ip, dictPtr->hashTable,
+			tableType, base);
+		if (dict == usingExtDict) {
+			if (match < (const BYTE *)source) {
+				refDelta = dictDelta;
+				lowLimit = dictionary;
+			} else {
+				refDelta = 0;
+				lowLimit = (const BYTE *)source;
+			}
+		}
+		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+			&& (match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
-
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t const lastRun = (size_t)(iend - anchor);
+		if ((outputLimited) &&
+			/* Check output buffer overflow */
+			((op - (BYTE *)dest) + lastRun + 1 +
+			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			return 0;
+		if (lastRun >= RUN_MASK) {
+			size_t accumulator = lastRun - RUN_MASK;
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRun);
+		op += lastRun;
+	}
 
 	/* End */
-	return (int)(((char *)op) - dest);
+	return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
+	int inputSize, int maxOutputSize, int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+
+	LZ4_resetStream((LZ4_stream_t *)state);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, tableType, noDict,
+				noDictIssue, acceleration);
+	} else {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, tableType, noDict,
+				noDictIssue, acceleration);
+	}
+}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration)
+{
+	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+		maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem)
+{
+	return LZ4_compress_fast(source, dest, inputSize,
+		maxOutputSize, wrkmem, 1);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *	*_destSize() variant
+ ********************************/
+
+static int LZ4_compress_destSize_generic(
+	LZ4_stream_t_internal * const ctx,
+	const char * const src,
+	char * const dst,
+	int * const srcSizePtr,
+	const int targetDstSize,
+	const tableType_t tableType)
+{
+	const BYTE *ip = (const BYTE *) src;
+	const BYTE *base = (const BYTE *) src;
+	const BYTE *lowLimit = (const BYTE *) src;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + *srcSizePtr;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dst;
+	BYTE * const oend = op + targetDstSize;
+	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+	BYTE * const oMaxMatch = op + targetDstSize
+		- (LASTLITERALS + 1 /* token */);
+	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+	U32 forwardH;
+
+	/* Init conditions */
+	/* Impossible to store anything */
+	if (targetDstSize < 1)
+		return 0;
+	/* Unsupported input size, too large (or negative) */
+	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+		return 0;
+	/* Size too large (not within 64K limit) */
+	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+		return 0;
+	/* Input too small, no compression (all literals) */
+	if (*srcSizePtr < LZ4_minLength)
+		goto _last_literals;
 
 	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
+	*srcSizePtr = 0;
+	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
+		{	 const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+
+			do {
+				U32 h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h, ctx->hashTable,
+					tableType, base);
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h,
+					ctx->hashTable, tableType,
+					base);
+
+			} while (((tableType == byU16)
+				? 0
+				: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match) != LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
+		while ((ip > anchor)
+			&& (match > lowLimit)
+			&& (unlikely(ip[-1] == match[-1]))) {
+			ip--; match--;
+			}
 
 		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+		{	 unsigned int litLength = (unsigned int)(ip - anchor);
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+			token = op++;
+			if (op + ((litLength + 240)/255)
+				+ litLength > oMaxLit) {
+				/* Not enough space for a last match */
+				op--;
+				goto _last_literals;
+			}
+			if (litLength >= RUN_MASK) {
+				unsigned int len = litLength - RUN_MASK;
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
+		/* Encode MatchLength */
+		{	 size_t matchLength = LZ4_count(ip + MINMATCH,
+			match + MINMATCH, matchlimit);
 
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
+			if (op + ((matchLength + 240)/255) > oMaxMatch) {
+				/* Match description too long : reduce it */
+				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
 			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
+			ip += MINMATCH + matchLength;
+
+			if (matchLength >= ML_MASK) {
+				*token += ML_MASK;
+				matchLength -= ML_MASK;
+				while (matchLength >= 255) {
+					matchLength -= 255; *op++ = 255;
+				}
+				*op++ = (BYTE)matchLength;
+			} else
+				*token += (BYTE)(matchLength);
 		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
+		anchor = ip;
 
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		/* Test end of block */
+		if (ip > mflimit)
+			break;
+		if (op > oMaxSeq)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
+		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+		if ((match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match) == LZ4_read32(ip))) {
+			token = op++; *token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t lastRunSize = (size_t)(iend - anchor);
+
+		if (op + 1 /* token */
+			+ ((lastRunSize + 240)/255) /* litLength */
+			+ lastRunSize /* literals */ > oend) {
+			/* adapt lastRunSize to fill 'dst' */
+			lastRunSize	= (oend - op) - 1;
+			lastRunSize -= (lastRunSize + 240)/255;
+		}
+		ip = anchor + lastRunSize;
+
+		if (lastRunSize >= RUN_MASK) {
+			size_t accumulator = lastRunSize - RUN_MASK;
+
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRunSize<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRunSize);
+		op += lastRunSize;
+	}
+
 	/* End */
-	return (int)(((char *)op) - dest);
+	*srcSizePtr = (int) (((const char *)ip) - src);
+	return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
+	char *dst, int *srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_resetStream(state);
+
+	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+		/* compression success is guaranteed */
+		return LZ4_compress_fast_extState(
+			state, src, dst, *srcSizePtr,
+			targetDstSize, 1);
+	} else {
+		if (*srcSizePtr < LZ4_64Klimit)
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, byU16);
+		else
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, tableType);
+	}
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
+	int targetDstSize, void *wrkmem)
+{
+	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+		targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *	Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#define HASH_UNIT sizeof(size_t)
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+	const char *dictionary, int dictSize)
+{
+	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+	const BYTE *p = (const BYTE *)dictionary;
+	const BYTE * const dictEnd = p + dictSize;
+	const BYTE *base;
+
+	if ((dict->initCheck)
+		|| (dict->currentOffset > 1 * GB)) {
+		/* Uninitialized structure, or reuse overflow */
+		LZ4_resetStream(LZ4_dict);
+	}
+
+	if (dictSize < (int)HASH_UNIT) {
+		dict->dictionary = NULL;
+		dict->dictSize = 0;
+		return 0;
+	}
+
+	if ((dictEnd - p) > 64 * KB)
+		p = dictEnd - 64 * KB;
+	dict->currentOffset += 64 * KB;
+	base = p - dict->currentOffset;
+	dict->dictionary = p;
+	dict->dictSize = (U32)(dictEnd - p);
+	dict->currentOffset += dict->dictSize;
+
+	while (p <= dictEnd - HASH_UNIT) {
+		LZ4_putPosition(p, dict->hashTable, byU32, base);
+		p += 3;
+	}
 
-	*dst_len = out_len;
+	return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+	const BYTE *src)
+{
+	if ((LZ4_dict->currentOffset > 0x80000000) ||
+		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+		/* address space overflow */
+		/* rescale hash table */
+		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+		int i;
+
+		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+			if (LZ4_dict->hashTable[i] < delta)
+				LZ4_dict->hashTable[i] = 0;
+			else
+				LZ4_dict->hashTable[i] -= delta;
+		}
+		LZ4_dict->currentOffset = 64 * KB;
+		if (LZ4_dict->dictSize > 64 * KB)
+			LZ4_dict->dictSize = 64 * KB;
+		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+	}
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+	if ((U32)dictSize > 64 * KB) {
+		/* useless to define a dictionary > 64 * KB */
+		dictSize = 64 * KB;
+	}
+	if ((U32)dictSize > dict->dictSize)
+		dictSize = dict->dictSize;
 
-	return 0;
-exit:
-	return ret;
+	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+	dict->dictionary = (const BYTE *)safeBuffer;
+	dict->dictSize = (U32)dictSize;
+
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+	char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+	const BYTE * const dictEnd = streamPtr->dictionary
+		+ streamPtr->dictSize;
+
+	const BYTE *smallest = (const BYTE *) source;
+
+	if (streamPtr->initCheck) {
+		/* Uninitialized structure detected */
+		return 0;
+	}
+
+	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+		smallest = dictEnd;
+
+	LZ4_renormDictT(streamPtr, smallest);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	/* Check overlapping input/dictionary space */
+	{	 const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+		if ((sourceEnd > streamPtr->dictionary)
+			&& (sourceEnd < dictEnd)) {
+			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+			if (streamPtr->dictSize > 64 * KB)
+				streamPtr->dictSize = 64 * KB;
+			if (streamPtr->dictSize < 4)
+				streamPtr->dictSize = 0;
+			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+		}
+	}
+
+	/* prefix mode : source data follows dictionary */
+	if (dictEnd == (const BYTE *)source) {
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, dictSmall,	acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, noDictIssue, acceleration);
+		}
+		streamPtr->dictSize += (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+
+	/* external dictionary mode */
+	{ int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, noDictIssue, acceleration);
+		}
+		streamPtr->dictionary = (const BYTE *)source;
+		streamPtr->dictSize = (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+}
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem) {
+	*dst_len = LZ4_compress_default(src, dst, (int)src_len,
+		(int)((size_t)dst_len), wrkmem);
+
+	/*
+	 * Prior lz4_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_fast/default
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4_compress);
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..322ea3a 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *    * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *    * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,510 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *	Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+	 const char *const source,
+	 char * const dest,
+	 int inputSize,
+		/*
+		 * If endOnInput == endOnInputSize,
+		 * this value is the max size of Output Buffer.
+		 */
+	 int outputSize,
+	 /* endOnOutputSize, endOnInputSize */
+	 int endOnInput,
+	 /* full, partial */
+	 int partialDecoding,
+	 /* only used if partialDecoding == partial */
+	 int targetOutputSize,
+	 /* noDict, withPrefix64k, usingExtDict */
+	 int dict,
+	 /* == dest when no prefix */
+	 const BYTE * const lowPrefix,
+	 /* only if dict == usingExtDict */
+	 const BYTE * const dictStart,
+	 /* note : = 0 if noDict */
+	 const size_t dictSize
+	 )
 {
+	/* Local Variables */
 	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
+	const BYTE * const iend = ip + inputSize;
+
 	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
+	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
-	unsigned token;
-	size_t length;
+	BYTE *oexit = op + targetOutputSize;
+	const BYTE * const lowLimit = lowPrefix - dictSize;
+
+	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+	const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+	const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+	const int safeDecode = (endOnInput == endOnInputSize);
+	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 
+	/* Special cases */
+	/* targetOutputSize too high => decode everything */
+	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+		oexit = oend - MFLIMIT;
+
+	/* Empty output buffer */
+	if ((endOnInput) && (unlikely(outputSize == 0)))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	if ((!endOnInput) && (unlikely(outputSize == 0)))
+		return (*ip == 0 ? 1 : -1);
+
+	/* Main Loop : decode sequences */
 	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
 		if (length == RUN_MASK) {
-			size_t len;
+			unsigned int s;
 
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
+			do {
+				s = *ip++;
+				length += s;
+			} while (likely(endOnInput
+				? ip < iend - RUN_MASK
+				: 1) & (s == 255));
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
 				goto _output_error;
-			length += len;
+			}
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
 		}
 
 		/* copy literals */
 		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
+		if (((endOnInput)
+			&& ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
+			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+			if (partialDecoding) {
+				if (cpy > oend) {
+					/*
+					 * Error :
+					 * write attempt
+					 * beyond end of output buffer
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& (ip + length > iend)) {
+					/*
+					 * Error :
+					 * read attempt beyond
+					 * end of input buffer
+					 */
+					goto _output_error;
+				}
+			} else {
+				if ((!endOnInput)
+					&& (cpy != oend)) {
+					/*
+					 * Error :
+					 * block decoding must
+					 * stop exactly there
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& ((ip + length != iend)
+					|| (cpy > oend))) {
+					/*
+					 * Error :
+					 * input must be consumed
+					 */
+					goto _output_error;
+				}
+			}
 			memcpy(op, ip, length);
 			ip += length;
-			break; /* EOF */
+			op += length;
+			/* Necessarily EOF, due to parsing restrictions */
+			break;
 		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+		LZ4_wildCopy(op, ip, cpy);
+		ip += length; op = cpy;
 
 		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
+		offset = LZ4_readLE16(ip); ip += 2;
+		match = op - offset;
+		if ((checkOffset) && (unlikely(match < lowLimit))) {
+			/* Error : offset outside buffers */
 			goto _output_error;
+		}
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
 
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
+			unsigned int s;
+
+			do {
+			s = *ip++;
+			if ((endOnInput) && (ip > iend - LASTLITERALS))
+				goto _output_error;
+			length += s;
+			} while (s == 255);
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
 				goto _output_error;
-			length += *ip++;
+			}
 		}
+		length += MINMATCH;
+
+		/* check external dictionary */
+		if ((dict == usingExtDict) && (match < lowPrefix)) {
+			if (unlikely(op + length > oend - LASTLITERALS)) {
+				/* doesn't respect parsing restriction */
+				goto _output_error;
+			}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
+			if (length <= (size_t)(lowPrefix - match)) {
+			/*
+			 * match can be copied as a single segment
+			 * from external dictionary
+			 */
+			memmove(op, dictEnd - (lowPrefix - match), length);
+			op += length;
+			} else {
+				/*
+				 * match encompass external
+				 * dictionary and current block
+				 */
+				size_t const copySize = (size_t)(lowPrefix - match);
+				size_t const restSize = length - copySize;
+
+				memcpy(op, dictEnd - copySize, copySize);
+				op += copySize;
+
+				if (restSize > (size_t)(op - lowPrefix)) {
+					/* overlap copy */
+					BYTE * const endOfMatch = op + restSize;
+					const BYTE *copyFrom = lowPrefix;
+
+					while (op < endOfMatch)
+						*op++ = *copyFrom++;
+				} else {
+					memcpy(op, lowPrefix, restSize);
+					op += restSize;
+				}
+			}
+			continue;
+		}
+
+		/* copy match within block */
+		cpy = op + length;
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
 		} else {
-			LZ4_COPYSTEP(ref, op);
+			LZ4_copy8(op, match); match += 8;
 		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
 
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (cpy > oend - LASTLITERALS) {
+				/*
+				 * Error : last LASTLITERALS bytes
+				 * must be literals (uncompressed)
+				 */
 				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+			}
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
 			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
+
 	/* end of decoding */
-	return (int) (((char *)ip) - source);
+	if (endOnInput) {
+		/* Nb of output bytes decoded */
+		return (int) (((char *)op) - dest);
+	} else {
+		/* Nb of input bytes read */
+		return (int) (((const char *)ip) - source);
+	}
 
-	/* write overflow error detected */
+	/* Overflow error detected */
 _output_error:
 	return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, full, 0,
+		noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe);
 
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, partial,
+		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
 
-	/* Main Loop */
-	while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+	return LZ4_decompress_generic(source, dest, 0, originalSize,
+		endOnOutputSize, full, 0, withPrefix64k,
+		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast);
 
-		unsigned token;
-		size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+	lz4sd->prefixSize = (size_t) dictSize;
+	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+	lz4sd->externalDict = NULL;
+	lz4sd->extDictSize	= 0;
+	return 1;
+}
+EXPORT_SYMBOL(LZ4_setStreamDecode);
 
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize,
+			maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict,
+			lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += result;
+		lz4sd->prefixEnd	+= result;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = result;
+		lz4sd->prefixEnd	= (BYTE *)dest + result;
+	}
 
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
+	return result;
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict,
+			lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict, lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += originalSize;
+		lz4sd->prefixEnd	+= originalSize;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = originalSize;
+		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
 	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
 
-	/* write overflow error detected */
-_output_error:
-	return -1;
+	return result;
 }
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static inline int LZ4_decompress_usingDict_generic(const char *source,
+	char *dest, int compressedSize, int maxOutputSize, int safe,
+	const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int input_len = 0;
-
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+	if (dictSize == 0)
+		return LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize, safe, full, 0,
+			noDict, (BYTE *)dest, NULL, 0);
+	if (dictStart + dictSize == dest) {
+		if (dictSize >= (int)(64 * KB - 1))
+			return LZ4_decompress_generic(source, dest,
+				compressedSize, maxOutputSize, safe, full, 0,
+				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+		return LZ4_decompress_generic(source, dest, compressedSize,
+			maxOutputSize, safe, full, 0, noDict,
+			(BYTE *)dest - dictSize, NULL, 0);
+	}
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxOutputSize, safe, full, 0, usingExtDict,
+		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-	return 0;
-exit_0:
-	return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxOutputSize,
+	const char *dictStart, int dictSize)
+{
+	return LZ4_decompress_usingDict_generic(source, dest,
+		compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+	return LZ4_decompress_usingDict_generic(source, dest, 0,
+		originalSize, 0, dictStart, dictSize);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_decompress_unknownoutputsize(const unsigned char *src,
+	size_t src_len, unsigned char *dest, size_t *dest_len) {
+	 *dest_len = LZ4_decompress_safe(src, dest,
+		 (int)src_len, (int)((size_t)dest_len));
+
+		/*
+		 * Prior lz4_decompress_unknownoutputsize will return
+		 * 0 for success and a negative result for error
+		 * new LZ4_decompress_safe returns
+		 * - the length of data read on success
+		 * - and also a negative result on error
+		 * meaning when result > 0, we just return 0 here
+		 */
+		if (src_len > 0)
+			return 0;
+		else
+			return -1;
 }
-#ifndef STATIC
 EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len) {
+	*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
+
+	/*
+	 * Prior lz4_decompress will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_fast returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if ((int)((size_t)src_len) > 0)
+		return 0;
+	else
+		return -1;
+}
+EXPORT_SYMBOL(lz4_decompress);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..56977bc 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,229 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
-#endif
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB (1<<10)
+#define MB (1<<20)
+#define GB (1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1<<MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+
+static inline U16 LZ4_read16(const void *memPtr)
+{
+	U16 val;
+
+	memcpy(&val, memPtr, sizeof(val));
+
+	return val;
+}
+
+static inline U32 LZ4_read32(const void *memPtr)
+{
+	U32 val;
+
+	memcpy(&val, memPtr, sizeof(val));
+
+	return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void *memPtr)
+{
+	size_t val;
+
+	memcpy(&val, memPtr, sizeof(val));
+
+	return val;
+}
+
+static inline void LZ4_write16(void *memPtr, U16 value)
+{
+	memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void *memPtr, U32 value)
+{
+	memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void *memPtr)
+{
+#ifdef __LITTLE_ENDIAN__
+	return LZ4_read16(memPtr);
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+	const BYTE *p = (const BYTE *)memPtr;
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+	return (U16)((U16)p[0] + (p[1] << 8));
+#endif
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_writeLE16(void *memPtr, U16 value)
+{
+#ifdef __LITTLE_ENDIAN__
+	LZ4_write16(memPtr, value);
+#else
+	BYTE *p = (BYTE *)memPtr;
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+	p[0] = (BYTE) value;
+	p[1] = (BYTE)(value>>8);
+#endif
+}
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+static inline void LZ4_copy8(void *dst, const void *src)
+{
+	memcpy(dst, src, 8);
+}
 
-#ifdef __BIG_ENDIAN
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+	BYTE *d = (BYTE *)dstPtr;
+	const BYTE *s = (const BYTE *)srcPtr;
+	BYTE *const e = (BYTE *)dstEnd;
+
+	do {
+		LZ4_copy8(d, s);
+		d += 8;
+		s += 8;
+	} while (d < e);
+}
+
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN__
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+#endif
+#else
+#ifdef __BIG_ENDIAN__
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
+#endif
 
+static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+		pIn += LZ4_NBCOMMONBYTES(diff);
+		return (unsigned int)(pIn - pStart);
+	}
+
+#ifdef LZ4_ARCH64
+	if ((pIn < (pInLimit-3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4; pMatch += 4;
+	}
 #endif
+	if ((pIn < (pInLimit-1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2; pMatch += 2;
+	}
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+	return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..da8063c 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,353 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *	Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+ *	Local Constants and types
+ ***************************************/
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
+	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+	return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *	HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+	hc4->nextToUpdate = 64 * KB;
+	hc4->base = start - 64 * KB;
+	hc4->end = start;
+	hc4->dictBase = start - 64 * KB;
+	hc4->dictLimit = 64 * KB;
+	hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+	const BYTE *ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const hashTable	= hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
+	U32 const target = (U32)(ip - base);
+	U32 idx = hc4->nextToUpdate;
+
+	while (idx < target) {
+		U32 const h = LZ4HC_hashPtr(base + idx);
+		size_t delta = idx - hashTable[h];
 
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
 		if (delta > MAX_DISTANCE)
 			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
-{
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
+		DELTANEXTU16(idx) = (U16)delta;
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
+		hashTable[h] = idx;
+		idx++;
 	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+
+	hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static inline int LZ4HC_InsertAndFindBestMatch(
+	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+	const BYTE *ip,
+	const BYTE * const iLimit,
+	const BYTE **matchpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
+	const BYTE * const dictBase = hc4->dictBase;
+	const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = 0;
 
 	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE * const match = base + matchIndex;
+
+			if (*(match + ml) == *(ip + ml)
+				&& (LZ4_read32(match) == LZ4_read32(ip))) {
+				size_t const mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, iLimit) + MINMATCH;
 
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
 				if (mlt > ml) {
 					ml = mlt;
-					*matchpos = ref;
+					*matchpos = match;
+				}
+			}
+		} else {
+			const BYTE * const match = dictBase + matchIndex;
+
+			if (LZ4_read32(match) == LZ4_read32(ip)) {
+				size_t mlt;
+				const BYTE *vLimit = ip
+					+ (dictLimit - matchIndex);
+
+				if (vLimit > iLimit)
+					vLimit = iLimit;
+				mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, vLimit) + MINMATCH;
+				if ((ip + mlt == vLimit)
+					&& (vLimit < iLimit))
+					mlt += LZ4_count(ip + mlt,
+						base + dictLimit,
+						iLimit);
+				if (mlt > ml) {
+					/* virtual matchpos */
+					ml = mlt;
+					*matchpos = base + matchIndex;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
 
 	return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_InsertAndGetWiderMatch(
+	LZ4HC_CCtx_internal *hc4,
+	const BYTE * const ip,
+	const BYTE * const iLowLimit,
+	const BYTE * const iHighLimit,
+	int longest,
+	const BYTE **matchpos,
+	const BYTE **startpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
+	const U32 dictLimit = hc4->dictLimit;
+	const BYTE * const lowPrefixPtr = base + dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	const BYTE * const dictBase = hc4->dictBase;
+	U32	 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	int delta = (int)(ip - iLowLimit);
 
 	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE *matchPtr = base + matchIndex;
+
+			if (*(iLowLimit + longest)
+				== *(matchPtr - delta + longest)) {
+				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+					int mlt = MINMATCH +
+						LZ4_count(ip + MINMATCH,
+							matchPtr + MINMATCH,
+							iHighLimit);
+					int back = 0;
+
+					while ((ip + back > iLowLimit)
+							 && (matchPtr + back > lowPrefixPtr)
+							 && (ip[back - 1] == matchPtr[back - 1]))
+						back--;
+
+					mlt -= back;
+
+					if (mlt > longest) {
+						longest = (int)mlt;
+						*matchpos = matchPtr + back;
+						*startpos = ip + back;
 					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
-				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
 				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
+			}
+		} else {
+			const BYTE * const matchPtr = dictBase + matchIndex;
+
+			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+				size_t mlt;
+				int back = 0;
+				const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+				if (vLimit > iHighLimit)
+					vLimit = iHighLimit;
+
+				mlt = LZ4_count(ip + MINMATCH,
+					matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+					mlt += LZ4_count(ip + mlt, base + dictLimit,
+						iHighLimit);
+				while ((ip + back > iLowLimit)
+					&& (matchIndex + back > lowLimit)
+					&& (ip[back - 1] == matchPtr[back - 1]))
+					back--;
+				mlt -= back;
+				if ((int)mlt > longest) {
+					longest = (int)mlt;
+					*matchpos = base + matchIndex + back;
+					*startpos = ip + back;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
+
 	return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static inline int LZ4HC_encodeSequence(
+	const BYTE **ip,
+	BYTE **op,
+	const BYTE **anchor,
+	int matchLength,
+	const BYTE * const match,
+	limitedOutput_directive limitedOutputBuffer,
+	BYTE *oend)
 {
-	int length, len;
-	u8 *token;
+	int length;
+	BYTE *token;
 
 	/* Encode Literal length */
 	length = (int)(*ip - *anchor);
 	token = (*op)++;
+
+	if ((limitedOutputBuffer)
+		&& ((*op + (length>>8)
+			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
 	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
+		int len;
+
+		*token = (RUN_MASK<<ML_BITS);
 		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
+		for (; len > 254 ; len -= 255) {
 			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
+			*(*op)++ = (BYTE)len;
+		}
 	} else
-		*token = (length << ML_BITS);
+		*token = (BYTE)(length<<ML_BITS);
 
 	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
+	LZ4_wildCopy(*op, *anchor, (*op) + length);
+	*op += length;
 
 	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+	LZ4_writeLE16(*op, (U16)(*ip - match)); *op += 2;
 
 	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
+	length = (int)(matchLength - MINMATCH);
+	if ((limitedOutputBuffer)
+		&& (*op + (length>>8)
+			+ (1 + LASTLITERALS) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
+	if (length >= (int)ML_MASK) {
 		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
+		length -= ML_MASK;
+		for (; length > 509 ; length -= 510) {
 			*(*op)++ = 255;
 			*(*op)++ = 255;
 		}
-		if (len > 254) {
-			len -= 255;
+		if (length > 254) {
+			length -= 255;
 			*(*op)++ = 255;
 		}
-		*(*op)++ = (u8)len;
+		*(*op)++ = (BYTE)length;
 	} else
-		*token += len;
+		*token += (BYTE)(length);
 
 	/* Prepare next loop */
-	*ip += ml;
+	*ip += matchLength;
 	*anchor = *ip;
 
 	return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+static int LZ4HC_compress_generic(
+	LZ4HC_CCtx_internal *const ctx,
+	const char * const source,
+	char * const dest,
+	int const inputSize,
+	int const maxOutputSize,
+	int compressionLevel,
+	limitedOutput_directive limit
+	)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-	u8 *op = (u8 *)dest;
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + maxOutputSize;
 
+	unsigned int maxNbAttempts;
 	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
+	const BYTE *ref = NULL;
+	const BYTE *start2 = NULL;
+	const BYTE *ref2 = NULL;
+	const BYTE *start3 = NULL;
+	const BYTE *ref3 = NULL;
+	const BYTE *start0;
+	const BYTE *ref0;
+
+	/* init */
+	if (compressionLevel > LZ4HC_MAX_CLEVEL)
+		compressionLevel = LZ4HC_MAX_CLEVEL;
+	if (compressionLevel < 1)
+		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+	maxNbAttempts = 1 << (compressionLevel - 1);
+	ctx->end += inputSize;
 
 	ip++;
 
 	/* Main Loop */
 	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+			matchlimit, (&ref), maxNbAttempts);
 		if (!ml) {
 			ip++;
 			continue;
@@ -351,46 +379,52 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		start0 = ip;
 		ref0 = ref;
 		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+		if (ip + ml < mflimit)
+			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				ip + ml - 2, ip + 0,
+				matchlimit, ml, &ref2,
+				&start2, maxNbAttempts);
 		else
 			ml2 = ml;
-		/* No better match */
+
 		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			/* No better match */
+			if (LZ4HC_encodeSequence(&ip, &op,
+				&anchor, ml, ref, limit, oend))
+				return 0;
 			continue;
 		}
 
 		if (start0 < ip) {
-			/* empirical */
 			if (start2 < ip + ml0) {
+				/* empirical */
 				ip = start0;
 				ref = ref0;
 				ml = ml0;
 			}
 		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
+
+		/* Here, start0 == ip */
 		if ((start2 - ip) < 3) {
+			/* First Match too small : removed */
 			ml = ml2;
 			ip = start2;
 			ref = ref2;
-			goto _search2;
+			goto _Search2;
 		}
 
-_search3:
+_Search3:
 		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
+		* Currently we have :
+		* ml2 > ml1, and
+		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
+		*/
 		if ((start2 - ip) < OPTIMAL_ML) {
 			int correction;
 			int new_ml = ml;
+
 			if (new_ml > OPTIMAL_ML)
 				new_ml = OPTIMAL_ML;
 			if (ip + new_ml > start2 + ml2 - MINMATCH)
@@ -403,39 +437,44 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			}
 		}
 		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
+		 * Now, we have start2 = ip + new_ml,
+		 * with new_ml = min(ml, OPTIMAL_ML = 18)
 		 */
+
 		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
+			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				start2 + ml2 - 3, start2,
+				matchlimit, ml2, &ref3, &start3,
+				maxNbAttempts);
 		else
 			ml3 = ml2;
 
-		/* No better match : 2 sequences to encode */
 		if (ml3 == ml2) {
+			/* No better match : 2 sequences to encode */
 			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
+			if (start2 < ip + ml)
 				ml = (int)(start2 - ip);
-
 			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml, ref, limit, oend))
+				return 0;
 			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml2, ref2, limit, oend))
+				return 0;
 			continue;
 		}
 
-		/* Not enough space for match 2 : remove it */
 		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
+			/* Not enough space for match 2 : remove it */
 			if (start3 >= (ip + ml)) {
+				/* can write Seq1 immediately
+				 * ==> Seq2 is removed,
+				 * so Seq3 becomes Seq1
+				 */
 				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
+					int correction = (int)(ip + ml - start2);
+
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -446,35 +485,38 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 					}
 				}
 
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
+				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+					ml, ref, limit, oend))
+					return 0;
+				ip	= start3;
 				ref = ref3;
-				ml  = ml3;
+				ml	= ml3;
 
 				start0 = start2;
 				ref0 = ref2;
 				ml0 = ml2;
-				goto _search2;
+				goto _Search2;
 			}
 
 			start2 = start3;
 			ref2 = ref3;
 			ml2 = ml3;
-			goto _search3;
+			goto _Search3;
 		}
 
 		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
+		* OK, now we have 3 ascending matches;
+		* let's write at least the first one
+		* ip & ref are known; Now for ml
+		*/
 		if (start2 < ip + ml) {
 			if ((start2 - ip) < (int)ML_MASK) {
 				int correction;
+
 				if (ml > OPTIMAL_ML)
 					ml = OPTIMAL_ML;
 				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
+					ml = (int)(start2 - ip) + ml2 - MINMATCH;
 				correction = ml - (int)(start2 - ip);
 				if (correction > 0) {
 					start2 += correction;
@@ -484,7 +526,9 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			} else
 				ml = (int)(start2 - ip);
 		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+			ref, limit, oend))
+			return 0;
 
 		ip = start2;
 		ref = ref2;
@@ -494,46 +538,243 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		ref2 = ref3;
 		ml2 = ml3;
 
-		goto _search3;
+		goto _Search3;
 	}
 
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{ int lastRun = (int)(iend - anchor);
+
+		if ((limit)
+			&& (((char *)op - dest) + lastRun + 1
+				+ ((lastRun + 255 - RUN_MASK)/255)
+					> (U32)maxOutputSize)) {
+			/* Check output limit */
+			return 0;
+		}
+		if (lastRun >= (int)RUN_MASK) {
+			*op++ = (RUN_MASK<<ML_BITS);
+			lastRun -= RUN_MASK;
+			for (; lastRun > 254 ; lastRun -= 255) {
+				*op++ = 255;
+				*op++ = (BYTE) lastRun;
+			}
+		} else
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		memcpy(op, anchor, iend - anchor);
+		op += iend - anchor;
+	}
+
 	/* End */
 	return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_HC_extStateHC(
+	void *state,
+	const char *src,
+	char *dst,
+	int srcSize,
+	int maxDstSize,
+	int compressionLevel)
 {
-	int ret = -1;
-	int out_len = 0;
+	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+		/* Error : state is not aligned
+		 * for pointers (32 or 64 bits)
+		 */
+		return 0;
+	}
 
-	if (out_len < 0)
-		goto exit;
+	LZ4HC_init(ctx, (const BYTE *)src);
 
-	*dst_len = out_len;
-	return 0;
+	if (maxDstSize < LZ4_compressBound(srcSize))
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, limitedOutput);
+	else
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+	int maxDstSize, int compressionLevel, void *wrkmem)
+{
+	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+		srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
 
-exit:
-	return ret;
+/**************************************
+ *	Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+	LZ4_streamHCPtr->internal_donotuse.base = NULL;
+	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *dictionary,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	if (dictSize > 64 * KB) {
+		dictionary += dictSize - 64 * KB;
+		dictSize = 64 * KB;
+	}
+	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+	if (dictSize >= 4)
+		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+	ctxPtr->end = (const BYTE *)dictionary + dictSize;
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+	LZ4HC_CCtx_internal *ctxPtr,
+	const BYTE *newBlock)
+{
+	if (ctxPtr->end >= ctxPtr->base + 4) {
+		/* Referencing remaining dictionary content */
+		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+	}
+
+	/*
+	 * Only one memory segment for extDict,
+	 * so any previous extDict is lost at this stage
+	 */
+	ctxPtr->lowLimit	= ctxPtr->dictLimit;
+	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+	ctxPtr->dictBase	= ctxPtr->base;
+	ctxPtr->base = newBlock - ctxPtr->dictLimit;
+	ctxPtr->end	= newBlock;
+	/* match referencing will resume from there */
+	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	limitedOutput_directive limit)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	/* auto - init if forgotten */
+	if (ctxPtr->base == NULL)
+		LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+	/* Check overflow */
+	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+			- ctxPtr->dictLimit;
+		if (dictSize > 64 * KB)
+			dictSize = 64 * KB;
+		LZ4_loadDictHC(LZ4_streamHCPtr,
+			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+	}
+
+	/* Check if blocks follow each other */
+	if ((const BYTE *)source != ctxPtr->end)
+		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+	/* Check overlapping input/dictionary space */
+	{ const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+		if ((sourceEnd > dictBegin)
+			&& ((const BYTE *)source < dictEnd)) {
+			if (sourceEnd > dictEnd)
+				sourceEnd = dictEnd;
+			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+				ctxPtr->lowLimit = ctxPtr->dictLimit;
+		}
+	}
+
+	return LZ4HC_compress_generic(ctxPtr, source, dest,
+		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize)
+{
+	if (maxOutputSize < LZ4_compressBound(inputSize))
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, limitedOutput);
+	else
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	char *safeBuffer,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+	int const prefixSize = (int)(streamPtr->end
+		- (streamPtr->base + streamPtr->dictLimit));
+
+	if (dictSize > 64 * KB)
+		dictSize = 64 * KB;
+	if (dictSize < 4)
+		dictSize = 0;
+	if (dictSize > prefixSize)
+		dictSize = prefixSize;
+
+	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+	{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+		streamPtr->base = streamPtr->end - endIndex;
+		streamPtr->dictLimit = endIndex - dictSize;
+		streamPtr->lowLimit = endIndex - dictSize;
+
+		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+			streamPtr->nextToUpdate = streamPtr->dictLimit;
+	}
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDictHC);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+	unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	*dst_len = LZ4_compress_HC(src, dst, (int)src_len,
+		(int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
+
+	/*
+	 * Prior lz4hc_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_HC
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4hc_compress);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v6 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 1/5] lib: " Sven Schmidt
@ 2017-01-27 22:02   ` Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 3/5] crypto: Change LZ4 modules " Sven Schmidt
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-27 22:02 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates the unlz4 wrapper to work with the
updated LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v6 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
  2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 1/5] lib: " Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-01-27 22:02   ` Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-27 22:02 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates the crypto modules using LZ4 compression
to work with the new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c   | 21 ++++++++-------------
 crypto/lz4hc.c | 21 ++++++++-------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..40fd2c2 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_default(src, dst,
+		slen, (int)((size_t)dlen), ctx);
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..6f16f96 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_HC(src, dst, slen,
+		(int)((size_t)dlen), LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (out_len == 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v6 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
  2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
                     ` (2 preceding siblings ...)
  2017-01-27 22:02   ` [PATCH v6 3/5] crypto: Change LZ4 modules " Sven Schmidt
@ 2017-01-27 22:02   ` Sven Schmidt
  2017-01-27 22:02   ` [PATCH v6 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-27 22:02 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates fs/pstore and fs/squashfs to use the updated
functions from the new LZ4 module.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 22 +++++++++++++---------
 fs/squashfs/lz4_wrapper.c | 12 ++++++------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..efab7b6 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,35 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
-	if (ret) {
-		pr_err("lz4_compress error, ret = %d!\n", ret);
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+	if (!ret) {
+		pr_err("LZ4_compress_default error; compression failed!\n");
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-	if (ret) {
-		pr_err("lz4_decompress error, ret = %d!\n", ret);
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
+	if (ret < 0) {
+		/*
+		 * LZ4_decompress_safe will return an error code
+		 * (< 0) if decompression failed
+		 */
+		pr_err("LZ4_decompress_safe error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..95da653 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t dest_len = output->length;
 
 	for (i = 0; i < b; i++) {
 		avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
-	if (res)
+	res = LZ4_decompress_safe(stream->input, stream->output,
+		length, output->length);
+
+	if (res < 0)
 		return -EIO;
 
-	bytes = dest_len;
+	bytes = res;
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	}
 	squashfs_finish_page(output);
 
-	return dest_len;
+	return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v6 5/5] lib/lz4: Remove back-compat wrappers
  2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
                     ` (3 preceding siblings ...)
  2017-01-27 22:02   ` [PATCH v6 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
@ 2017-01-27 22:02   ` Sven Schmidt
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-01-27 22:02 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch removes the functions introduced as wrappers for providing
backwards compatibility to the prior LZ4 version.
They're not needed anymore since there's no callers left.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      | 73 ------------------------------------------------
 lib/lz4/lz4_compress.c   | 22 ---------------
 lib/lz4/lz4_decompress.c | 42 ----------------------------
 lib/lz4/lz4hc_compress.c | 23 ---------------
 4 files changed, 160 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index ed59cb9..2844e6f 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -173,14 +173,6 @@ static inline int LZ4_compressBound(size_t isize)
 }
 
 /*
- * For backward compatibility
- */
-static inline int lz4_compressbound(size_t isize)
-{
-	return LZ4_COMPRESSBOUND(isize);
-}
-
-/*
  * LZ4_compress_default()
  *	Compresses 'sourceSize' bytes from buffer 'source'
  *	into already allocated 'dest' buffer of size 'maxOutputSize'.
@@ -249,23 +241,6 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize,
 int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
 	int targetDestSize, void *wrkmem);
 
-/*
- * lz4_compress()
- *	src    : source address of the original data
- *	src_len: size of the original data
- *	dst    : output buffer address of the compressed data
- *           This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len: is the output size, which is returned after compress done
- *	workmem: address of the working memory.
- *           This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return	: Success if return 0
- *            Error if return (< 0)
- *	note :	Destination buffer and workmem must be already allocated with
- *		the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
 /*-************************************************************************
  *	Decompression Functions
  **************************************************************************/
@@ -340,37 +315,6 @@ int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
 int LZ4_decompress_safe_partial(const char *source, char *dest,
 	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
-
-/*
- * lz4_decompress_unknownoutputsize() :
- *	src     : source address of the compressed data
- *	src_len : is the input size, therefore the compressed size
- *	dest    : output buffer address of the decompressed data
- *	dest_len: is the max size of the destination buffer, which is
- *            returned with actual size of decompressed data after
- *            decompress done
- * return: Success if return 0
- *         Error if return (< 0)
- * note:   Destination buffer must be already allocated.
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-	unsigned char *dest, size_t *dest_len);
-
-/*
- * lz4_decompress() :
- *	src            : source address of the compressed data
- *	src_len        : is the input size,
- *                   which is returned after decompress done
- *	dest           : output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return: Success if return 0
- *          Error if return (< 0)
- *	note  : Destination buffer must be already allocated.
- *          slightly faster than lz4_decompress_unknownoutputsize()
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len);
-
 /*-************************************************************************
  *	LZ4 HC Compression
  **************************************************************************/
@@ -399,23 +343,6 @@ int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
 	int compressionLevel, void *wrkmem);
 
 /*
- * lz4hc_compress()
- *	src    : source address of the original data
- *	src_len: size of the original data
- *	dst    : output buffer address of the compressed data
- *           This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len: is the output size, which is returned after compress done
- *	workmem: address of the working memory.
- *           This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
- * return	: Success if return 0
- *          Error if return (< 0)
- * note   : Destination buffer and workmem must be already allocated with
- *          the defined size.
- */
-int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
-/*
  *	These functions compress data in successive blocks of any size,
  *	using previous blocks as dictionary. One key assumption is that previous
  *	blocks (up to 64 KB) remain read-accessible while
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 295c71b..e78dbf3 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -872,27 +872,5 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 }
 EXPORT_SYMBOL(LZ4_compress_fast_continue);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem) {
-	*dst_len = LZ4_compress_default(src, dst, (int)src_len,
-		(int)((size_t)dst_len), wrkmem);
-
-	/*
-	 * Prior lz4_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_fast/default
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4_compress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 322ea3a..f97ee6d 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -485,47 +485,5 @@ int LZ4_decompress_fast_usingDict(const char *source, char *dest,
 }
 EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_decompress_unknownoutputsize(const unsigned char *src,
-	size_t src_len, unsigned char *dest, size_t *dest_len) {
-	 *dest_len = LZ4_decompress_safe(src, dest,
-		 (int)src_len, (int)((size_t)dest_len));
-
-		/*
-		 * Prior lz4_decompress_unknownoutputsize will return
-		 * 0 for success and a negative result for error
-		 * new LZ4_decompress_safe returns
-		 * - the length of data read on success
-		 * - and also a negative result on error
-		 * meaning when result > 0, we just return 0 here
-		 */
-		if (src_len > 0)
-			return 0;
-		else
-			return -1;
-}
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
-
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len) {
-	*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
-
-	/*
-	 * Prior lz4_decompress will return
-	 * 0 for success and a negative result for error
-	 * new LZ4_decompress_fast returns
-	 * - the length of data read on success
-	 * - and also a negative result on error
-	 * meaning when result > 0, we just return 0 here
-	 */
-	if ((int)((size_t)src_len) > 0)
-		return 0;
-	else
-		return -1;
-}
-EXPORT_SYMBOL(lz4_decompress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index da8063c..a29dad0 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -753,28 +753,5 @@ int LZ4_saveDictHC(
 }
 EXPORT_SYMBOL(LZ4_saveDictHC);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-	unsigned char *dst, size_t *dst_len, void *wrkmem)
-{
-	*dst_len = LZ4_compress_HC(src, dst, (int)src_len,
-		(int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
-
-	/*
-	 * Prior lz4hc_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_HC
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4hc_compress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH v6 1/5] lib: Update LZ4 compressor module
  2017-01-27 22:02   ` [PATCH v6 1/5] lib: " Sven Schmidt
@ 2017-01-31 22:27     ` Jonathan Corbet
  2017-02-01 20:18       ` Sven Schmidt
  0 siblings, 1 reply; 104+ messages in thread
From: Jonathan Corbet @ 2017-01-31 22:27 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck

On Fri, 27 Jan 2017 23:02:00 +0100
Sven Schmidt <4sschmid@informatik.uni-hamburg.de> wrote:

I have one quick question...

>  /*
> + * LZ4_compress_default()
> + *	Compresses 'sourceSize' bytes from buffer 'source'
> + *	into already allocated 'dest' buffer of size 'maxOutputSize'.
> + *	Compression is guaranteed to succeed if
> + *	'maxOutputSize' >= LZ4_compressBound(inputSize).
> + *	It also runs faster, so it's a recommended setting.
> + *	If the function cannot compress 'source'
> + *	into a more limited 'dest' budget,
> + *	compression stops *immediately*,
> + *	and the function result is zero.
> + *	As a consequence, 'dest' content is not valid.
> + *
> + *	source       : source address of the original data
> + *	dest         : output buffer address
> + *                 of the compressed data
> + *	inputSize    : Max supported value is
> + *                 LZ4_MAX_INPUT_SIZE
> + *	maxOutputSize: full or partial size of buffer 'dest'
> + *                 (which must be already allocated)
> + *	workmem      : address of the working memory.
> + *                 This requires 'workmem' of size LZ4_MEM_COMPRESS.
> + *	return       : the number of bytes written into buffer 'dest'
> + *   (necessarily <= maxOutputSize) or 0 if compression fails
> + */
> +int LZ4_compress_default(const char *source, char *dest, int inputSize,
> +	int maxOutputSize, void *wrkmem);

Is there any chance you could format these as kerneldoc comments?  You're
not too far from it now, and that would allow the LZ4 interface to be
pulled into the documentation.

Thanks,

jon

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v6 1/5] lib: Update LZ4 compressor module
  2017-01-31 22:27     ` Jonathan Corbet
@ 2017-02-01 20:18       ` Sven Schmidt
  0 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-01 20:18 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, 4sschmid

On Tue, Jan 31, 2017 at 03:27:44PM -0700, Jonathan Corbet wrote:
> On Fri, 27 Jan 2017 23:02:00 +0100
> Sven Schmidt <4sschmid@informatik.uni-hamburg.de> wrote:
> 
> I have one quick question...
> 
> >  /*
> > + * LZ4_compress_default()
> > + *	Compresses 'sourceSize' bytes from buffer 'source'
> > + *	into already allocated 'dest' buffer of size 'maxOutputSize'.
> > + *	Compression is guaranteed to succeed if
> > + *	'maxOutputSize' >= LZ4_compressBound(inputSize).
> > + *	It also runs faster, so it's a recommended setting.
> > + *	If the function cannot compress 'source'
> > + *	into a more limited 'dest' budget,
> > + *	compression stops *immediately*,
> > + *	and the function result is zero.
> > + *	As a consequence, 'dest' content is not valid.
> > + *
> > + *	source       : source address of the original data
> > + *	dest         : output buffer address
> > + *                 of the compressed data
> > + *	inputSize    : Max supported value is
> > + *                 LZ4_MAX_INPUT_SIZE
> > + *	maxOutputSize: full or partial size of buffer 'dest'
> > + *                 (which must be already allocated)
> > + *	workmem      : address of the working memory.
> > + *                 This requires 'workmem' of size LZ4_MEM_COMPRESS.
> > + *	return       : the number of bytes written into buffer 'dest'
> > + *   (necessarily <= maxOutputSize) or 0 if compression fails
> > + */
> > +int LZ4_compress_default(const char *source, char *dest, int inputSize,
> > +	int maxOutputSize, void *wrkmem);
> 
> Is there any chance you could format these as kerneldoc comments?  You're
> not too far from it now, and that would allow the LZ4 interface to be
> pulled into the documentation.
> 
> Thanks,
> 
> jon

Hi Jon,

of course, that makes sense. I already checked the documentation and you're right, I'm not that far from it.
Will do the necessary changes.

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v7 0/5] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (8 preceding siblings ...)
  2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
@ 2017-02-05 19:09 ` Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 1/5] lib: " Sven Schmidt
                     ` (5 more replies)
  2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
  10 siblings, 6 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-05 19:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html

[PATCH 1/5] lib: Update LZ4 compressor module
[PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
[PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
[PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
[PATCH 5/5] lib/lz4: Remove back-compat wrappers

Changes:
v7:
- Fixed errors reported by the Smatch tool
- Changed function documentation comments in lz4.h to match kernel-doc style
- Fixed a misbehaviour of LZ4HC caused by the wrong level of indentation
  concerning two for loops introduced after I refactored the code style using
  checkpatch.pl (upstream LZ4 put dozens of stuff in just one line, gnah)
- Updated the crypto tests for LZ4 since they did fail for the new code
  and hence zram did fail to allocate memory for LZ4

v6:
- Fixed LZ4_NBCOMMONBYTES() for 64-bit little endian
- Reset LZ4_MEMORY_USAGE to 14 (which is the value used in
  upstream LZ4 as well as the previous kernel module)
- Fixed that weird double-indentation in lz4defs.h and lz4.h
- Adjusted general styling issues in lz4defs.h
  (e.g. lines consisting of more than one instruction)
- Removed the architecture-dependent typedef to reg_t
  since upstream LZ4 is just using size_t and that works fine
- Changed error messages in pstore/platform.c:
  * LZ4_compress_default always returns 0 in case of an error
    (no need to print the return value)
  * LZ4_decompress_safe returns a negative error message
    (return value _does_ matter)

v5:
- Added a fifth patch to remove the back-compat wrappers introduced
  to ensure bisectibility between the patches (the functions are no longer
  needed since there's no callers left)

v4:
- Fixed kbuild errors
- Re-added lz4_compressbound as alias for LZ4_compressBound
  to ensure backwards compatibility
- Wrapped LZ4_hash5 with check for LZ4_ARCH64 since it is only used there
  and triggers an unused function warning when false

v3:
- Adjusted the code to satisfy kernel coding style (checkpatch.pl)
- Made sure the changes to LZ4 in Kernel (overflow checks etc.)
  are included in the new module (they are)
- Removed the second LZ4_compressBound function with related name but
  different return type
- Corrected version number (was LZ4 1.7.3)
- Added missing LZ4 streaming functions

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v7 1/5] lib: Update LZ4 compressor module
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
@ 2017-02-05 19:09   ` Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-05 19:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since the patchset
contains appropriate changes.

API changes:

New method LZ4_compress_fast which differs from the variant available
in kernel by the new acceleration parameter,
allowing to trade compression ratio for more compression speed
and vice versa.

LZ4_decompress_fast is the respective decompression method,
featuring a very fast decoder (multiple GB/s per core),
able to reach RAM speed in multi-core systems.
The decompressor allows to decompress data compressed with
LZ4 fast as well as the LZ4 HC (high compression) algorithm.

Also the useful functions LZ4_decompress_safe_partial and
LZ4_compress_destsize were added.
The latter reverses the logic by trying to compress as much
data as possible from source to dest while the former aims
to decompress partial blocks of data.

A bunch of streaming functions were also added
which allow compressig/decompressing data in multiple steps
(so called "streaming mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize
are now known as LZ4_compress_default respectivley
LZ4_decompress_safe.
The old methods will be removed since there's
no callers left in the code.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  762 ++++++++++++++++++++++++++++---
 lib/lz4/lz4_compress.c   | 1114 ++++++++++++++++++++++++++++++++--------------
 lib/lz4/lz4_decompress.c |  696 ++++++++++++++++++-----------
 lib/lz4/lz4defs.h        |  334 ++++++++------
 lib/lz4/lz4hc_compress.c |  867 +++++++++++++++++++++++-------------
 5 files changed, 2690 insertions(+), 1083 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..1b7ab2a 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,717 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
 
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  */
-static inline size_t lz4_compressbound(size_t isize)
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL			3
+#define LZ4HC_DEFAULT_CLEVEL			9
+#define LZ4HC_MAX_CLEVEL			16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t - information structure to track an LZ4 stream.
+ */
+typedef struct {
+	uint32_t hashTable[LZ4_HASH_SIZE_U32];
+	uint32_t currentOffset;
+	uint32_t initCheck;
+	const uint8_t *dictionary;
+	uint8_t *bufferStart;
+	uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMSIZE_U64];
+	LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t - information structure to track an LZ4HC stream.
+ */
+typedef struct {
+	unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+	unsigned short	 chainTable[LZ4HC_MAXD];
+	/* next block to continue on current prefix */
+	const unsigned char *end;
+	/* All index relative to this position */
+	const unsigned char *base;
+	/* alternate base for extDict */
+	const unsigned char *dictBase;
+	/* below that point, need extDict */
+	unsigned int	 dictLimit;
+	/* below that point, no more dict */
+	unsigned int	 lowLimit;
+	/* index from which to continue dict update */
+	unsigned int	 nextToUpdate;
+	unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+	size_t table[LZ4_STREAMHCSIZE_SIZET];
+	LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
+
+/*
+ * LZ4_streamDecode_t - information structure to track an
+ *	LZ4 stream during decompression.
+ *
+ * init this structure using LZ4_setStreamDecode (or memset()) before first use
+ */
+typedef struct {
+	const uint8_t *externalDict;
+	size_t extDictSize;
+	const uint8_t *prefixEnd;
+	size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+	LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_compressBound() - Max. output size in worst case szenarios
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ * (data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return LZ4_COMPRESSBOUND(isize);
 }
 
-/*
- * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
- *		the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
+/**
+ * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ *	(data not compressible)
+ */
+static inline int lz4_compressbound(size_t isize)
+{
+	return LZ4_COMPRESSBOUND(isize);
+}
+
+/**
+ * LZ4_compress_default() - Compress data from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Compresses 'sourceSize' bytes from buffer 'source'
+ * into already allocated 'dest' buffer of size 'maxOutputSize'.
+ * Compression is guaranteed to succeed if
+ * 'maxOutputSize' >= LZ4_compressBound(inputSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'source' into a more limited 'dest' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * As a consequence, 'dest' content is not valid.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *	(necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/**
+ * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @acceleration: acceleration factor
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Same as LZ4_compress_default(), but allows to select an "acceleration"
+ * factor. The larger the acceleration value, the faster the algorithm,
+ * but also the lesser the compression. It's a trade-off. It can be fine tuned,
+ * with each successive value providing roughly +~3% to speed.
+ * An acceleration value of "1" is the same as regular LZ4_compress_default()
+ * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *	(necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, int acceleration, void *wrkmem);
+
+/**
+ * LZ4_compress_destSize() - Compress as much data as possible
+ *	from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @sourceSizePtr: will be modified to indicate how many bytes where read
+ *	from 'source' to fill 'dest'. New value is necessarily <= old value.
+ * @targetDestSize: Size of buffer 'dest' which must be already allocated
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * possible from 'source'.
+ *
+ * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
+ *	or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
 
 /*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+ * lz4_compress() - For backward compatibility, see LZ4_compress_default
+ * @src: source address of the original data
+ * @src_len: size of the original data
+ * @dst: output buffer address of the compressed data. This requires 'dst'
+ *	of size LZ4_COMPRESSBOUND
+ * @dst_len: is the output size, which is returned after compress done
+ * @workmem: address of the working memory.
+ *
+ * Return: Success if return 0, Error if return < 0
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * This function fully respect memory boundaries for properly formed
+ * compressed data.
+ * It is a bit faster than LZ4_decompress_safe().
+ * However, it does not provide any protection against intentionally
+ * modified data stream (malicious input).
+ * Use this function in trusted environment only
+ * (data to decode comes from a trusted source).
+ *
+ * Return: number of bytes read from the source buffer
+ *	or a negative result if decompression fails.
+ */
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe() - Decompression protected against buffer overflow
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * Decompresses data fom 'source' into 'dest'.
+ * If the source stream is detected malformed, the function will
+ * stop decoding and return a negative result.
+ * This function is protected against buffer overflow exploits,
+ * including malicious data packets. It never writes outside output buffer,
+ * nor reads outside input buffer.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
+ *	at position 'source' into buffer 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the decompressed data which must be
+ *	already allocated
+ * @compressedSize: is the precise full size of the compressed block.
+ * @targetOutputSize: the decompression operation will try
+ *	to stop as soon as 'targetOutputSize' has been reached
+ * @maxDecompressedSize: is the size of destination buffer
+ *
+ * This function decompresses a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ * This function never writes outside of output buffer,
+ * and never reads outside of input buffer.
+ * It is therefore protected against malicious data packets.
+ *
+ * Return: the number of bytes decoded in the destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ *
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
 /*
- * lz4_decompress_unknownoutputsize()
- *	src     : source address of the compressed data
- *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
- *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ * lz4_decompress_unknownoutputsize() - For backwards compatibility,
+ *	see LZ4_decompress_safe
+ * @src: source address of the compressed data
+ * @src_len: is the input size, therefore the compressed size
+ * @dest: output buffer address of the decompressed data
+ *	which must be already allocated
+ * @dest_len: is the max size of the destination buffer, which is
+ *	returned with actual size of decompressed data after decompress done
+ *
+ * Return: Success if return 0, Error if return (< 0)
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/**
+ * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast
+ * @src: source address of the compressed data
+ * @src_len: is the input size, which is returned after decompress done
+ * @dest: output buffer address of the decompressed data,
+ *	which must be already allocated
+ * @actual_dest_len: is the size of uncompressed data, supposing it's known
+ *
+ * Return: Success if return 0, Error if return (< 0)
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/**
+ * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @dstCapacity: full or partial size of buffer 'dst',
+ *	which must be already allocated
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *	value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ * @wrkmem: address of the working memory.
+ *	This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Compress data from 'src' into 'dst', using the more powerful
+ * but slower "HC" algorithm. Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ *
+ * Return : the number of bytes written into 'dst' or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/**
+ * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC
+ * @src: source address of the original data
+ * @src_len: size of the original data
+ * @dst: output buffer address of the compressed data. This requires 'dst'
+ *	of size LZ4_COMPRESSBOUND.
+ * @dst_len: is the output size, which is returned after compress done
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Return	: Success if return 0, Error if return (< 0)
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/**
+ * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
+ * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *	value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *
+ * An LZ4_streamHC_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_streamHC_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+
+/**
+ * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
+ * @streamHCPtr: pointer to the LZ4HC_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4HC_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+
+/**
+ * LZ4_compress_HC_continue() - Compress 'src' using data from previously
+ *	compressed blocks as a dictionary using the HC algorithm
+ * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *	which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ *
+ * These functions compress data in successive blocks of any size, using
+ * previous blocks as dictionary. One key assumption is that previous
+ * blocks (up to 64 KB) remain read-accessible while
+ * compressing next blocks. There is an exception for ring buffers,
+ * which can be smaller than 64 KB.
+ * Ring buffers scenario is automatically detected and handled by
+ * LZ4_compress_HC_continue().
+ * Before starting compression, state must be properly initialized,
+ * using LZ4_resetStreamHC().
+ * A first "fictional block" can then be designated as
+ * initial dictionary, using LZ4_loadDictHC() (Optional).
+ * Then, use LZ4_compress_HC_continue()
+ * to compress each successive block. Previous memory blocks
+ * (including initial dictionary when present) must remain accessible
+ * and unmodified during compression.
+ * 'dst' buffer should be sized to handle worst case scenarios, using
+ *  LZ4_compressBound(), to ensure operation success.
+ *  If, for any reason, previous data blocks can't be preserved unmodified
+ *  in memory during next compression block,
+ *  you must save it to a safer memory space, using LZ4_saveDictHC().
+ * Return value of LZ4_saveDictHC() is the size of dictionary
+ * effectively saved into 'safeBuffer'.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+
+/**
+ * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
+ * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @maxDictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDictHC() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_HC_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
+ *	or 0 if error.
+ */
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+
+/**
+ * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
+ * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
+ *
+ * An LZ4_stream_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_stream_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/**
+ * LZ4_loadDict() - Load a static dictionary into LZ4_stream
+ * @streamPtr: pointer to the LZ4_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/**
+ * LZ4_saveDict() - Save static dictionary from LZ4_stream
+ * @streamPtr: pointer to the 'LZ4_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @dictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_fast_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *	or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/**
+ * LZ4_compress_fast_continue() - Compress 'src' using data from previously
+ *	compressed blocks as a dictionary
+ * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *	which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @acceleration: acceleration factor
+ *
+ * Compress buffer content 'src', using data from previously compressed blocks
+ * as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still
+ * be present and unmodified !
+ * If maxDstSize >= LZ4_compressBound(srcSize),
+ * compression is guaranteed to succeed, and runs faster.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/**
+ * LZ4_setStreamDecode() - Instruct where to find dictionary
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @dictionary: dictionary to use
+ * @dictSize: size of dictionary
+ *
+ * Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *
+ * Return: 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
+ *	followed by LZ4_decompress_safe_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+
+/**
+ * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
+ *	followed by LZ4_decompress_fast_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..6aa7ac3 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,417 +22,886 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+/*-******************************
+ *	Compression functions
+ ********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
+	if (tableType == byU16)
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+	else
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - LZ4_HASHLOG));
+}
+
 #if LZ4_ARCH64
-	const BYTE * const base = ip;
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+	const U32 hashLog = (tableType == byU16)
+		? LZ4_HASHLOG + 1
+		: LZ4_HASHLOG;
+
+#ifdef __LITTLE_ENDIAN__
+	static const U64 prime5bytes = 889523592379ULL;
+
+	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-	const int base = 0;
+	static const U64 prime8bytes = 11400714785074694791ULL;
+
+	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+}
+#endif
+
+static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+{
+#if LZ4_ARCH64
+	if (tableType == byU32)
+		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+	return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
+	tableType_t const tableType, const BYTE *srcBase)
+{
+	switch (tableType) {
+	case byPtr:
+	{
+		const BYTE **hashTable = (const BYTE **)tableBase;
+
+		hashTable[h] = p;
+		return;
+	}
+	case byU32:
+	{
+		U32 *hashTable = (U32 *) tableBase;
+
+		hashTable[h] = (U32)(p - srcBase);
+		return;
+	}
+	case byU16:
+	{
+		U16 *hashTable = (U16 *) tableBase;
+
+		hashTable[h] = (U16)(p - srcBase);
+		return;
+	}
+	}
+}
+
+static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	if (tableType == byPtr) {
+		const BYTE **hashTable = (const BYTE **) tableBase;
+
+		return hashTable[h];
+	}
+
+	if (tableType == byU32) {
+		const U32 * const hashTable = (U32 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+
+	{
+		/* default, to ensure a return */
+		const U16 * const hashTable = (U16 *) tableBase;
+		return hashTable[h] + srcBase;
+	}
+}
+
+static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static inline int LZ4_compress_generic(
+	LZ4_stream_t_internal * const dictPtr,
+	const char * const source,
+	char * const dest,
+	const int inputSize,
+	const int maxOutputSize,
+	const limitedOutput_directive outputLimited,
+	const tableType_t tableType,
+	const dict_directive dict,
+	const dictIssue_directive dictIssue,
+	const U32 acceleration)
+{
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *base;
+	const BYTE *lowLimit;
+	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+	const BYTE * const dictionary = dictPtr->dictionary;
+	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+	const size_t dictDelta = dictEnd - (const BYTE *)source;
+	const BYTE *anchor = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const olimit = op + maxOutputSize;
+
+	U32 forwardH;
+	size_t refDelta = 0;
+
+	/* Init conditions */
+	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+		/* Unsupported inputSize, too large (or negative) */
+		return 0;
+	}
+	switch (dict) {
+	case noDict:
+	default:
+		base = (const BYTE *)source;
+		lowLimit = (const BYTE *)source;
+		break;
+	case withPrefix64k:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source - dictPtr->dictSize;
+		break;
+	case usingExtDict:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source;
+		break;
+	}
+
+	if ((tableType == byU16)
+		&& (inputSize >= LZ4_64Klimit)) {
+		/* Size too large (not within 64K limit) */
+		return 0;
+	}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+	if (inputSize < LZ4_minLength) {
+		/* Input too small, no compression (all literals) */
+		goto _last_literals;
+	}
 
 	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
+	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+		{
+			const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = acceleration
+				<< LZ4_skipTrigger;
+
+			do {
+				U32 const h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h,
+					dictPtr->hashTable,
+					tableType, base);
+				if (dict == usingExtDict) {
+					if (match < (const BYTE *)source) {
+						refDelta = dictDelta;
+						lowLimit = dictionary;
+					} else {
+						refDelta = 0;
+						lowLimit = (const BYTE *)source;
+				}	 }
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+					tableType, base);
+
+			} while (((dictIssue == dictSmall)
+					? (match < lowRefLimit)
+					: 0)
+				|| ((tableType == byU16)
+					? 0
+					: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match + refDelta)
+					!= LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
+		while (((ip > anchor) & (match + refDelta > lowLimit))
+			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
-			ref--;
-		}
+			match--;
+			}
 
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
+		/* Encode Literals */
+		{
+			unsigned const int litLength = (unsigned int)(ip - anchor);
 
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+			token = op++;
+			if ((outputLimited) &&
+				/* Check output buffer overflow */
+				(unlikely(op + litLength +
+					(2 + 1 + LASTLITERALS) +
+					(litLength/255) > olimit)))
+				return 0;
+			if (litLength >= RUN_MASK) {
+				int len = (int)litLength - RUN_MASK;
+
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
+		{
+			unsigned int matchCode;
+
+			if ((dict == usingExtDict)
+				&& (lowLimit == dictionary)) {
+				const BYTE *limit;
+
+				match += refDelta;
+				limit = ip + (dictEnd - match);
+				if (limit > matchlimit)
+					limit = matchlimit;
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, limit);
+				ip += MINMATCH + matchCode;
+				if (ip == limit) {
+					unsigned const int more = LZ4_count(ip,
+						(const BYTE *)source,
+						matchlimit);
+
+					matchCode += more;
+					ip += more;
+				}
+			} else {
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, matchlimit);
+				ip += MINMATCH + matchCode;
 			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
+
+			if (outputLimited &&
+				/* Check output buffer overflow */
+				(unlikely(op +
+					(1 + LASTLITERALS) +
+					(matchCode>>8) > olimit)))
+				return 0;
+			if (matchCode >= ML_MASK) {
+				*token += ML_MASK;
+				matchCode -= ML_MASK;
+				LZ4_write32(op, 0xFFFFFFFF);
+				while (matchCode >= 4*255) {
+					op += 4;
+					LZ4_write32(op, 0xFFFFFFFF);
+					matchCode -= 4*255;
+				}
+				op += matchCode / 255;
+				*op++ = (BYTE)(matchCode % 255);
+			} else
+				*token += (BYTE)(matchCode);
+		}
+
+		anchor = ip;
 
 		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		if (ip > mflimit)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+		match = LZ4_getPosition(ip, dictPtr->hashTable,
+			tableType, base);
+		if (dict == usingExtDict) {
+			if (match < (const BYTE *)source) {
+				refDelta = dictDelta;
+				lowLimit = dictionary;
+			} else {
+				refDelta = 0;
+				lowLimit = (const BYTE *)source;
+			}
+		}
+		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+			&& (match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
+	{
+		size_t const lastRun = (size_t)(iend - anchor);
 
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+		if ((outputLimited) &&
+			/* Check output buffer overflow */
+			((op - (BYTE *)dest) + lastRun + 1 +
+			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			return 0;
+		if (lastRun >= RUN_MASK) {
+			size_t accumulator = lastRun - RUN_MASK;
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRun);
+		op += lastRun;
+	}
 
 	/* End */
-	return (int)(((char *)op) - dest);
+	return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+static int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
+	int inputSize, int maxOutputSize, int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+
+	LZ4_resetStream((LZ4_stream_t *)state);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, tableType, noDict,
+				noDictIssue, acceleration);
+	} else {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, tableType, noDict,
+				noDictIssue, acceleration);
+	}
+}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, int acceleration, void *wrkmem)
+{
+	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+		maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem)
+{
+	return LZ4_compress_fast(source, dest, inputSize,
+		maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *	*_destSize() variant
+ ********************************/
+
+static int LZ4_compress_destSize_generic(
+	LZ4_stream_t_internal * const ctx,
+	const char * const src,
+	char * const dst,
+	int * const srcSizePtr,
+	const int targetDstSize,
+	const tableType_t tableType)
+{
+	const BYTE *ip = (const BYTE *) src;
+	const BYTE *base = (const BYTE *) src;
+	const BYTE *lowLimit = (const BYTE *) src;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + *srcSizePtr;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dst;
+	BYTE * const oend = op + targetDstSize;
+	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+	BYTE * const oMaxMatch = op + targetDstSize
+		- (LASTLITERALS + 1 /* token */);
+	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+	U32 forwardH;
+
+	/* Init conditions */
+	/* Impossible to store anything */
+	if (targetDstSize < 1)
+		return 0;
+	/* Unsupported input size, too large (or negative) */
+	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+		return 0;
+	/* Size too large (not within 64K limit) */
+	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+		return 0;
+	/* Input too small, no compression (all literals) */
+	if (*srcSizePtr < LZ4_minLength)
+		goto _last_literals;
 
 	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
+	*srcSizePtr = 0;
+	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
+		{
+			const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+
+			do {
+				U32 h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h, ctx->hashTable,
+					tableType, base);
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h,
+					ctx->hashTable, tableType,
+					base);
+
+			} while (((tableType == byU16)
+				? 0
+				: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match) != LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
+		while ((ip > anchor)
+			&& (match > lowLimit)
+			&& (unlikely(ip[-1] == match[-1]))) {
+			ip--; match--;
+			}
 
 		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+		{
+			unsigned int litLength = (unsigned int)(ip - anchor);
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+			token = op++;
+			if (op + ((litLength + 240)/255)
+				+ litLength > oMaxLit) {
+				/* Not enough space for a last match */
+				op--;
+				goto _last_literals;
+			}
+			if (litLength >= RUN_MASK) {
+				unsigned int len = litLength - RUN_MASK;
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
+		/* Encode MatchLength */
+		{
+			size_t matchLength = LZ4_count(ip + MINMATCH,
+			match + MINMATCH, matchlimit);
 
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
+			if (op + ((matchLength + 240)/255) > oMaxMatch) {
+				/* Match description too long : reduce it */
+				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
 			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
+			ip += MINMATCH + matchLength;
+
+			if (matchLength >= ML_MASK) {
+				*token += ML_MASK;
+				matchLength -= ML_MASK;
+				while (matchLength >= 255) {
+					matchLength -= 255; *op++ = 255;
+				}
+				*op++ = (BYTE)matchLength;
+			} else
+				*token += (BYTE)(matchLength);
 		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
+		anchor = ip;
 
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		/* Test end of block */
+		if (ip > mflimit)
+			break;
+		if (op > oMaxSeq)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
+		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+		if ((match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match) == LZ4_read32(ip))) {
+			token = op++; *token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{
+		size_t lastRunSize = (size_t)(iend - anchor);
+
+		if (op + 1 /* token */
+			+ ((lastRunSize + 240)/255) /* litLength */
+			+ lastRunSize /* literals */ > oend) {
+			/* adapt lastRunSize to fill 'dst' */
+			lastRunSize	= (oend - op) - 1;
+			lastRunSize -= (lastRunSize + 240)/255;
+		}
+		ip = anchor + lastRunSize;
+
+		if (lastRunSize >= RUN_MASK) {
+			size_t accumulator = lastRunSize - RUN_MASK;
+
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRunSize<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRunSize);
+		op += lastRunSize;
+	}
+
 	/* End */
-	return (int)(((char *)op) - dest);
+	*srcSizePtr = (int) (((const char *)ip) - src);
+	return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
+	char *dst, int *srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_resetStream(state);
+
+	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+		/* compression success is guaranteed */
+		return LZ4_compress_fast_extState(
+			state, src, dst, *srcSizePtr,
+			targetDstSize, 1);
+	} else {
+		if (*srcSizePtr < LZ4_64Klimit)
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, byU16);
+		else
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, tableType);
+	}
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
+	int targetDstSize, void *wrkmem)
+{
+	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+		targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *	Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#define HASH_UNIT sizeof(size_t)
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+	const char *dictionary, int dictSize)
+{
+	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+	const BYTE *p = (const BYTE *)dictionary;
+	const BYTE * const dictEnd = p + dictSize;
+	const BYTE *base;
+
+	if ((dict->initCheck)
+		|| (dict->currentOffset > 1 * GB)) {
+		/* Uninitialized structure, or reuse overflow */
+		LZ4_resetStream(LZ4_dict);
+	}
+
+	if (dictSize < (int)HASH_UNIT) {
+		dict->dictionary = NULL;
+		dict->dictSize = 0;
+		return 0;
+	}
+
+	if ((dictEnd - p) > 64 * KB)
+		p = dictEnd - 64 * KB;
+	dict->currentOffset += 64 * KB;
+	base = p - dict->currentOffset;
+	dict->dictionary = p;
+	dict->dictSize = (U32)(dictEnd - p);
+	dict->currentOffset += dict->dictSize;
+
+	while (p <= dictEnd - HASH_UNIT) {
+		LZ4_putPosition(p, dict->hashTable, byU32, base);
+		p += 3;
+	}
+
+	return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+	const BYTE *src)
+{
+	if ((LZ4_dict->currentOffset > 0x80000000) ||
+		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+		/* address space overflow */
+		/* rescale hash table */
+		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+		int i;
+
+		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+			if (LZ4_dict->hashTable[i] < delta)
+				LZ4_dict->hashTable[i] = 0;
+			else
+				LZ4_dict->hashTable[i] -= delta;
+		}
+		LZ4_dict->currentOffset = 64 * KB;
+		if (LZ4_dict->dictSize > 64 * KB)
+			LZ4_dict->dictSize = 64 * KB;
+		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+	}
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+	if ((U32)dictSize > 64 * KB) {
+		/* useless to define a dictionary > 64 * KB */
+		dictSize = 64 * KB;
+	}
+	if ((U32)dictSize > dict->dictSize)
+		dictSize = dict->dictSize;
+
+	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+	dict->dictionary = (const BYTE *)safeBuffer;
+	dict->dictSize = (U32)dictSize;
+
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+	char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+	const BYTE * const dictEnd = streamPtr->dictionary
+		+ streamPtr->dictSize;
+
+	const BYTE *smallest = (const BYTE *) source;
+
+	if (streamPtr->initCheck) {
+		/* Uninitialized structure detected */
+		return 0;
+	}
+
+	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+		smallest = dictEnd;
+
+	LZ4_renormDictT(streamPtr, smallest);
 
-	*dst_len = out_len;
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
 
-	return 0;
-exit:
-	return ret;
+	/* Check overlapping input/dictionary space */
+	{
+		const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+		if ((sourceEnd > streamPtr->dictionary)
+			&& (sourceEnd < dictEnd)) {
+			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+			if (streamPtr->dictSize > 64 * KB)
+				streamPtr->dictSize = 64 * KB;
+			if (streamPtr->dictSize < 4)
+				streamPtr->dictSize = 0;
+			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+		}
+	}
+
+	/* prefix mode : source data follows dictionary */
+	if (dictEnd == (const BYTE *)source) {
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, dictSmall,	acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, noDictIssue, acceleration);
+		}
+		streamPtr->dictSize += (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+
+	/* external dictionary mode */
+	{
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, noDictIssue, acceleration);
+		}
+		streamPtr->dictionary = (const BYTE *)source;
+		streamPtr->dictSize = (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+}
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem) {
+	*dst_len = LZ4_compress_default(src, dst, src_len,
+		*dst_len, wrkmem);
+
+	/*
+	 * Prior lz4_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_fast/default
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!*dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4_compress);
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..fd665ca 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,508 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *	Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+	 const char *const source,
+	 char * const dest,
+	 int inputSize,
+		/*
+		 * If endOnInput == endOnInputSize,
+		 * this value is the max size of Output Buffer.
+		 */
+	 int outputSize,
+	 /* endOnOutputSize, endOnInputSize */
+	 int endOnInput,
+	 /* full, partial */
+	 int partialDecoding,
+	 /* only used if partialDecoding == partial */
+	 int targetOutputSize,
+	 /* noDict, withPrefix64k, usingExtDict */
+	 int dict,
+	 /* == dest when no prefix */
+	 const BYTE * const lowPrefix,
+	 /* only if dict == usingExtDict */
+	 const BYTE * const dictStart,
+	 /* note : = 0 if noDict */
+	 const size_t dictSize
+	 )
 {
+	/* Local Variables */
 	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
+	const BYTE * const iend = ip + inputSize;
+
 	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
+	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
-	unsigned token;
-	size_t length;
+	BYTE *oexit = op + targetOutputSize;
+	const BYTE * const lowLimit = lowPrefix - dictSize;
+
+	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+	const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+	const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+	const int safeDecode = (endOnInput == endOnInputSize);
+	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 
+	/* Special cases */
+	/* targetOutputSize too high => decode everything */
+	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+		oexit = oend - MFLIMIT;
+
+	/* Empty output buffer */
+	if ((endOnInput) && (unlikely(outputSize == 0)))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	if ((!endOnInput) && (unlikely(outputSize == 0)))
+		return (*ip == 0 ? 1 : -1);
+
+	/* Main Loop : decode sequences */
 	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
 		if (length == RUN_MASK) {
-			size_t len;
+			unsigned int s;
 
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
+			do {
+				s = *ip++;
+				length += s;
+			} while (likely(endOnInput
+				? ip < iend - RUN_MASK
+				: 1) & (s == 255));
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
 				goto _output_error;
-			length += len;
+			}
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
 		}
 
 		/* copy literals */
 		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
+		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
+			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+			if (partialDecoding) {
+				if (cpy > oend) {
+					/*
+					 * Error :
+					 * write attempt beyond end of output buffer
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& (ip + length > iend)) {
+					/*
+					 * Error :
+					 * read attempt beyond
+					 * end of input buffer
+					 */
+					goto _output_error;
+				}
+			} else {
+				if ((!endOnInput)
+					&& (cpy != oend)) {
+					/*
+					 * Error :
+					 * block decoding must
+					 * stop exactly there
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& ((ip + length != iend)
+					|| (cpy > oend))) {
+					/*
+					 * Error :
+					 * input must be consumed
+					 */
+					goto _output_error;
+				}
+			}
 			memcpy(op, ip, length);
 			ip += length;
-			break; /* EOF */
+			op += length;
+			/* Necessarily EOF, due to parsing restrictions */
+			break;
 		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+		LZ4_wildCopy(op, ip, cpy);
+		ip += length; op = cpy;
 
 		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
+		offset = LZ4_readLE16(ip); ip += 2;
+		match = op - offset;
+		if ((checkOffset) && (unlikely(match < lowLimit))) {
+			/* Error : offset outside buffers */
 			goto _output_error;
+		}
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
 
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
+			unsigned int s;
+
+			do {
+			s = *ip++;
+			if ((endOnInput) && (ip > iend - LASTLITERALS))
+				goto _output_error;
+			length += s;
+			} while (s == 255);
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
 				goto _output_error;
-			length += *ip++;
+			}
 		}
+		length += MINMATCH;
+
+		/* check external dictionary */
+		if ((dict == usingExtDict) && (match < lowPrefix)) {
+			if (unlikely(op + length > oend - LASTLITERALS)) {
+				/* doesn't respect parsing restriction */
+				goto _output_error;
+			}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
+			if (length <= (size_t)(lowPrefix - match)) {
+			/*
+			 * match can be copied as a single segment
+			 * from external dictionary
+			 */
+			memmove(op, dictEnd - (lowPrefix - match), length);
+			op += length;
+			} else {
+				/*
+				 * match encompass external
+				 * dictionary and current block
+				 */
+				size_t const copySize = (size_t)(lowPrefix - match);
+				size_t const restSize = length - copySize;
+
+				memcpy(op, dictEnd - copySize, copySize);
+				op += copySize;
+
+				if (restSize > (size_t)(op - lowPrefix)) {
+					/* overlap copy */
+					BYTE * const endOfMatch = op + restSize;
+					const BYTE *copyFrom = lowPrefix;
+
+					while (op < endOfMatch)
+						*op++ = *copyFrom++;
+				} else {
+					memcpy(op, lowPrefix, restSize);
+					op += restSize;
+				}
+			}
+			continue;
+		}
+
+		/* copy match within block */
+		cpy = op + length;
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
 		} else {
-			LZ4_COPYSTEP(ref, op);
+			LZ4_copy8(op, match); match += 8;
 		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
 
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (cpy > oend - LASTLITERALS) {
+				/*
+				 * Error : last LASTLITERALS bytes
+				 * must be literals (uncompressed)
+				 */
 				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+			}
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
 			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
+
 	/* end of decoding */
-	return (int) (((char *)ip) - source);
+	if (endOnInput) {
+		/* Nb of output bytes decoded */
+		return (int) (((char *)op) - dest);
+	} else {
+		/* Nb of input bytes read */
+		return (int) (((const char *)ip) - source);
+	}
 
-	/* write overflow error detected */
+	/* Overflow error detected */
 _output_error:
 	return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, full, 0,
+		noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe);
 
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, partial,
+		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
 
-	/* Main Loop */
-	while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+	return LZ4_decompress_generic(source, dest, 0, originalSize,
+		endOnOutputSize, full, 0, withPrefix64k,
+		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast);
 
-		unsigned token;
-		size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+	lz4sd->prefixSize = (size_t) dictSize;
+	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+	lz4sd->externalDict = NULL;
+	lz4sd->extDictSize	= 0;
+	return 1;
+}
+EXPORT_SYMBOL(LZ4_setStreamDecode);
 
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize,
+			maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict,
+			lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += result;
+		lz4sd->prefixEnd	+= result;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = result;
+		lz4sd->prefixEnd	= (BYTE *)dest + result;
+	}
 
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
+	return result;
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict,
+			lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict, lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += originalSize;
+		lz4sd->prefixEnd	+= originalSize;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = originalSize;
+		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
 	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
 
-	/* write overflow error detected */
-_output_error:
-	return -1;
+	return result;
 }
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static inline int LZ4_decompress_usingDict_generic(const char *source,
+	char *dest, int compressedSize, int maxOutputSize, int safe,
+	const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int input_len = 0;
-
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+	if (dictSize == 0)
+		return LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize, safe, full, 0,
+			noDict, (BYTE *)dest, NULL, 0);
+	if (dictStart + dictSize == dest) {
+		if (dictSize >= (int)(64 * KB - 1))
+			return LZ4_decompress_generic(source, dest,
+				compressedSize, maxOutputSize, safe, full, 0,
+				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+		return LZ4_decompress_generic(source, dest, compressedSize,
+			maxOutputSize, safe, full, 0, noDict,
+			(BYTE *)dest - dictSize, NULL, 0);
+	}
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxOutputSize, safe, full, 0, usingExtDict,
+		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-	return 0;
-exit_0:
-	return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxOutputSize,
+	const char *dictStart, int dictSize)
+{
+	return LZ4_decompress_usingDict_generic(source, dest,
+		compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+	return LZ4_decompress_usingDict_generic(source, dest, 0,
+		originalSize, 0, dictStart, dictSize);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_decompress_unknownoutputsize(const unsigned char *src,
+	size_t src_len, unsigned char *dest, size_t *dest_len) {
+	*dest_len = LZ4_decompress_safe(src, dest,
+		src_len, *dest_len);
+
+	/*
+	 * Prior lz4_decompress_unknownoutputsize will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_safe returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if (src_len > 0)
+		return 0;
+	else
+		return -1;
 }
-#ifndef STATIC
 EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len) {
+	*src_len = LZ4_decompress_fast(src, dest, actual_dest_len);
+
+	/*
+	 * Prior lz4_decompress will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_fast returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if (*src_len > 0)
+		return 0;
+	else
+		return -1;
+}
+EXPORT_SYMBOL(lz4_decompress);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..23e1a1b 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,229 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
-#endif
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB (1<<10)
+#define MB (1<<20)
+#define GB (1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1<<MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+
+static inline U16 LZ4_read16(const void *memPtr)
+{
+	U16 val;
+
+	memcpy(&val, memPtr, sizeof(val));
+
+	return val;
+}
+
+static inline U32 LZ4_read32(const void *memPtr)
+{
+	U32 val;
+
+	memcpy(&val, memPtr, sizeof(val));
+
+	return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void *memPtr)
+{
+	size_t val;
+
+	memcpy(&val, memPtr, sizeof(val));
+
+	return val;
+}
+
+static inline void LZ4_write16(void *memPtr, U16 value)
+{
+	memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void *memPtr, U32 value)
+{
+	memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void *memPtr)
+{
+#ifdef __LITTLE_ENDIAN__
+	return LZ4_read16(memPtr);
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+	const BYTE *p = (const BYTE *)memPtr;
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+	return (U16)((U16)p[0] + (p[1] << 8));
+#endif
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_writeLE16(void *memPtr, U16 value)
+{
+#ifdef __LITTLE_ENDIAN__
+	LZ4_write16(memPtr, value);
+#else
+	BYTE *p = (BYTE *)memPtr;
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+	p[0] = (BYTE) value;
+	p[1] = (BYTE)(value>>8);
+#endif
+}
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+static inline void LZ4_copy8(void *dst, const void *src)
+{
+	memcpy(dst, src, 8);
+}
 
-#ifdef __BIG_ENDIAN
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+	BYTE *d = (BYTE *)dstPtr;
+	const BYTE *s = (const BYTE *)srcPtr;
+	BYTE *const e = (BYTE *)dstEnd;
+
+	do {
+		LZ4_copy8(d, s);
+		d += 8;
+		s += 8;
+	} while (d < e);
+}
+
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN__
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+#endif
+#else
+#ifdef __BIG_ENDIAN__
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
+#endif
 
+static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+		pIn += LZ4_NBCOMMONBYTES(diff);
+		return (unsigned int)(pIn - pStart);
+	}
+
+#ifdef LZ4_ARCH64
+	if ((pIn < (pInLimit-3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4; pMatch += 4;
+	}
 #endif
+	if ((pIn < (pInLimit-1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2; pMatch += 2;
+	}
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+	return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..f1d4662 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,361 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *	Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
+
+/* *************************************
+ *	Local Constants and types
+ ***************************************/
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
+	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
+{
+	return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *	HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+	hc4->nextToUpdate = 64 * KB;
+	hc4->base = start - 64 * KB;
+	hc4->end = start;
+	hc4->dictBase = start - 64 * KB;
+	hc4->dictLimit = 64 * KB;
+	hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+	const BYTE *ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const hashTable	= hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
+	U32 const target = (U32)(ip - base);
+	U32 idx = hc4->nextToUpdate;
+
+	while (idx < target) {
+		U32 const h = LZ4HC_hashPtr(base + idx);
+		size_t delta = idx - hashTable[h];
 
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
 		if (delta > MAX_DISTANCE)
 			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
-{
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
+		DELTANEXTU16(idx) = (U16)delta;
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
+		hashTable[h] = idx;
+		idx++;
 	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+
+	hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static inline int LZ4HC_InsertAndFindBestMatch(
+	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+	const BYTE *ip,
+	const BYTE * const iLimit,
+	const BYTE **matchpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
+	const BYTE * const dictBase = hc4->dictBase;
+	const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = 0;
 
 	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE * const match = base + matchIndex;
+
+			if (*(match + ml) == *(ip + ml)
+				&& (LZ4_read32(match) == LZ4_read32(ip))) {
+				size_t const mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, iLimit) + MINMATCH;
 
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
 				if (mlt > ml) {
 					ml = mlt;
-					*matchpos = ref;
+					*matchpos = match;
+				}
+			}
+		} else {
+			const BYTE * const match = dictBase + matchIndex;
+
+			if (LZ4_read32(match) == LZ4_read32(ip)) {
+				size_t mlt;
+				const BYTE *vLimit = ip
+					+ (dictLimit - matchIndex);
+
+				if (vLimit > iLimit)
+					vLimit = iLimit;
+				mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, vLimit) + MINMATCH;
+				if ((ip + mlt == vLimit)
+					&& (vLimit < iLimit))
+					mlt += LZ4_count(ip + mlt,
+						base + dictLimit,
+						iLimit);
+				if (mlt > ml) {
+					/* virtual matchpos */
+					ml = mlt;
+					*matchpos = base + matchIndex;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
 
 	return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_InsertAndGetWiderMatch(
+	LZ4HC_CCtx_internal *hc4,
+	const BYTE * const ip,
+	const BYTE * const iLowLimit,
+	const BYTE * const iHighLimit,
+	int longest,
+	const BYTE **matchpos,
+	const BYTE **startpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
+	const U32 dictLimit = hc4->dictLimit;
+	const BYTE * const lowPrefixPtr = base + dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	const BYTE * const dictBase = hc4->dictBase;
+	U32	 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	int delta = (int)(ip - iLowLimit);
 
 	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE *matchPtr = base + matchIndex;
+
+			if (*(iLowLimit + longest)
+				== *(matchPtr - delta + longest)) {
+				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+					int mlt = MINMATCH + LZ4_count(
+						ip + MINMATCH,
+						matchPtr + MINMATCH,
+						iHighLimit);
+					int back = 0;
+
+					while ((ip + back > iLowLimit)
+						&& (matchPtr + back > lowPrefixPtr)
+						&& (ip[back - 1] == matchPtr[back - 1]))
+						back--;
+
+					mlt -= back;
+
+					if (mlt > longest) {
+						longest = (int)mlt;
+						*matchpos = matchPtr + back;
+						*startpos = ip + back;
 					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
 				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
-				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
+			}
+		} else {
+			const BYTE * const matchPtr = dictBase + matchIndex;
+
+			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+				size_t mlt;
+				int back = 0;
+				const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+				if (vLimit > iHighLimit)
+					vLimit = iHighLimit;
+
+				mlt = LZ4_count(ip + MINMATCH,
+					matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+					mlt += LZ4_count(ip + mlt, base + dictLimit,
+						iHighLimit);
+				while ((ip + back > iLowLimit)
+					&& (matchIndex + back > lowLimit)
+					&& (ip[back - 1] == matchPtr[back - 1]))
+					back--;
+
+				mlt -= back;
+
+				if ((int)mlt > longest) {
+					longest = (int)mlt;
+					*matchpos = base + matchIndex + back;
+					*startpos = ip + back;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
+
 	return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static inline int LZ4HC_encodeSequence(
+	const BYTE **ip,
+	BYTE **op,
+	const BYTE **anchor,
+	int matchLength,
+	const BYTE * const match,
+	limitedOutput_directive limitedOutputBuffer,
+	BYTE *oend)
 {
-	int length, len;
-	u8 *token;
+	int length;
+	BYTE *token;
 
 	/* Encode Literal length */
 	length = (int)(*ip - *anchor);
 	token = (*op)++;
+
+	if ((limitedOutputBuffer)
+		&& ((*op + (length>>8)
+			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
 	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
+		int len;
+
+		*token = (RUN_MASK<<ML_BITS);
 		len = length - RUN_MASK;
 		for (; len > 254 ; len -= 255)
 			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
+		*(*op)++ = (BYTE)len;
 	} else
-		*token = (length << ML_BITS);
+		*token = (BYTE)(length<<ML_BITS);
 
 	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
+	LZ4_wildCopy(*op, *anchor, (*op) + length);
+	*op += length;
 
 	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+	LZ4_writeLE16(*op, (U16)(*ip - match));
+	*op += 2;
 
 	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
+	length = (int)(matchLength - MINMATCH);
+
+	if ((limitedOutputBuffer)
+		&& (*op + (length>>8)
+			+ (1 + LASTLITERALS) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
+
+	if (length >= (int)ML_MASK) {
 		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
+		length -= ML_MASK;
+
+		for (; length > 509 ; length -= 510) {
 			*(*op)++ = 255;
 			*(*op)++ = 255;
 		}
-		if (len > 254) {
-			len -= 255;
+
+		if (length > 254) {
+			length -= 255;
 			*(*op)++ = 255;
 		}
-		*(*op)++ = (u8)len;
+
+		*(*op)++ = (BYTE)length;
 	} else
-		*token += len;
+		*token += (BYTE)(length);
 
 	/* Prepare next loop */
-	*ip += ml;
+	*ip += matchLength;
 	*anchor = *ip;
 
 	return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+static int LZ4HC_compress_generic(
+	LZ4HC_CCtx_internal *const ctx,
+	const char * const source,
+	char * const dest,
+	int const inputSize,
+	int const maxOutputSize,
+	int compressionLevel,
+	limitedOutput_directive limit
+	)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-	u8 *op = (u8 *)dest;
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + maxOutputSize;
 
+	unsigned int maxNbAttempts;
 	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
+	const BYTE *ref = NULL;
+	const BYTE *start2 = NULL;
+	const BYTE *ref2 = NULL;
+	const BYTE *start3 = NULL;
+	const BYTE *ref3 = NULL;
+	const BYTE *start0;
+	const BYTE *ref0;
+
+	/* init */
+	if (compressionLevel > LZ4HC_MAX_CLEVEL)
+		compressionLevel = LZ4HC_MAX_CLEVEL;
+	if (compressionLevel < 1)
+		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+	maxNbAttempts = 1 << (compressionLevel - 1);
+	ctx->end += inputSize;
 
 	ip++;
 
 	/* Main Loop */
 	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+			matchlimit, (&ref), maxNbAttempts);
 		if (!ml) {
 			ip++;
 			continue;
@@ -351,51 +387,59 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		start0 = ip;
 		ref0 = ref;
 		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+		if (ip + ml < mflimit)
+			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				ip + ml - 2, ip + 0,
+				matchlimit, ml, &ref2,
+				&start2, maxNbAttempts);
 		else
 			ml2 = ml;
-		/* No better match */
+
 		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			/* No better match */
+			if (LZ4HC_encodeSequence(&ip, &op,
+				&anchor, ml, ref, limit, oend))
+				return 0;
 			continue;
 		}
 
 		if (start0 < ip) {
-			/* empirical */
 			if (start2 < ip + ml0) {
+				/* empirical */
 				ip = start0;
 				ref = ref0;
 				ml = ml0;
 			}
 		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
+
+		/* Here, start0 == ip */
 		if ((start2 - ip) < 3) {
+			/* First Match too small : removed */
 			ml = ml2;
 			ip = start2;
 			ref = ref2;
-			goto _search2;
+			goto _Search2;
 		}
 
-_search3:
+_Search3:
 		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
+		* Currently we have :
+		* ml2 > ml1, and
+		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
+		*/
 		if ((start2 - ip) < OPTIMAL_ML) {
 			int correction;
 			int new_ml = ml;
+
 			if (new_ml > OPTIMAL_ML)
 				new_ml = OPTIMAL_ML;
 			if (ip + new_ml > start2 + ml2 - MINMATCH)
 				new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+
 			correction = new_ml - (int)(start2 - ip);
+
 			if (correction > 0) {
 				start2 += correction;
 				ref2 += correction;
@@ -403,39 +447,44 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			}
 		}
 		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
+		 * Now, we have start2 = ip + new_ml,
+		 * with new_ml = min(ml, OPTIMAL_ML = 18)
 		 */
+
 		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
+			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				start2 + ml2 - 3, start2,
+				matchlimit, ml2, &ref3, &start3,
+				maxNbAttempts);
 		else
 			ml3 = ml2;
 
-		/* No better match : 2 sequences to encode */
 		if (ml3 == ml2) {
+			/* No better match : 2 sequences to encode */
 			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
+			if (start2 < ip + ml)
 				ml = (int)(start2 - ip);
-
 			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml, ref, limit, oend))
+				return 0;
 			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml2, ref2, limit, oend))
+				return 0;
 			continue;
 		}
 
-		/* Not enough space for match 2 : remove it */
 		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
+			/* Not enough space for match 2 : remove it */
 			if (start3 >= (ip + ml)) {
+				/* can write Seq1 immediately
+				 * ==> Seq2 is removed,
+				 * so Seq3 becomes Seq1
+				 */
 				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
+					int correction = (int)(ip + ml - start2);
+
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -446,35 +495,38 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 					}
 				}
 
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
+				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+					ml, ref, limit, oend))
+					return 0;
+				ip = start3;
 				ref = ref3;
-				ml  = ml3;
+				ml = ml3;
 
 				start0 = start2;
 				ref0 = ref2;
 				ml0 = ml2;
-				goto _search2;
+				goto _Search2;
 			}
 
 			start2 = start3;
 			ref2 = ref3;
 			ml2 = ml3;
-			goto _search3;
+			goto _Search3;
 		}
 
 		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
+		* OK, now we have 3 ascending matches;
+		* let's write at least the first one
+		* ip & ref are known; Now for ml
+		*/
 		if (start2 < ip + ml) {
 			if ((start2 - ip) < (int)ML_MASK) {
 				int correction;
+
 				if (ml > OPTIMAL_ML)
 					ml = OPTIMAL_ML;
 				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
+					ml = (int)(start2 - ip) + ml2 - MINMATCH;
 				correction = ml - (int)(start2 - ip);
 				if (correction > 0) {
 					start2 += correction;
@@ -484,7 +536,9 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			} else
 				ml = (int)(start2 - ip);
 		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+			ref, limit, oend))
+			return 0;
 
 		ip = start2;
 		ref = ref2;
@@ -494,46 +548,245 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		ref2 = ref3;
 		ml2 = ml3;
 
-		goto _search3;
+		goto _Search3;
 	}
 
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{
+		int lastRun = (int)(iend - anchor);
+
+		if ((limit)
+			&& (((char *)op - dest) + lastRun + 1
+				+ ((lastRun + 255 - RUN_MASK)/255)
+					> (U32)maxOutputSize)) {
+			/* Check output limit */
+			return 0;
+		}
+		if (lastRun >= (int)RUN_MASK) {
+			*op++ = (RUN_MASK<<ML_BITS);
+			lastRun -= RUN_MASK;
+			for (; lastRun > 254 ; lastRun -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) lastRun;
+		} else
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		memcpy(op, anchor, iend - anchor);
+		op += iend - anchor;
+	}
+
 	/* End */
 	return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_HC_extStateHC(
+	void *state,
+	const char *src,
+	char *dst,
+	int srcSize,
+	int maxDstSize,
+	int compressionLevel)
 {
-	int ret = -1;
-	int out_len = 0;
+	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+		/* Error : state is not aligned
+		 * for pointers (32 or 64 bits)
+		 */
+		return 0;
+	}
 
-	if (out_len < 0)
-		goto exit;
+	LZ4HC_init(ctx, (const BYTE *)src);
 
-	*dst_len = out_len;
-	return 0;
+	if (maxDstSize < LZ4_compressBound(srcSize))
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, limitedOutput);
+	else
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+	int maxDstSize, int compressionLevel, void *wrkmem)
+{
+	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+		srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/**************************************
+ *	Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+	LZ4_streamHCPtr->internal_donotuse.base = NULL;
+	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *dictionary,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	if (dictSize > 64 * KB) {
+		dictionary += dictSize - 64 * KB;
+		dictSize = 64 * KB;
+	}
+	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+	if (dictSize >= 4)
+		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+	ctxPtr->end = (const BYTE *)dictionary + dictSize;
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+	LZ4HC_CCtx_internal *ctxPtr,
+	const BYTE *newBlock)
+{
+	if (ctxPtr->end >= ctxPtr->base + 4) {
+		/* Referencing remaining dictionary content */
+		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+	}
+
+	/*
+	 * Only one memory segment for extDict,
+	 * so any previous extDict is lost at this stage
+	 */
+	ctxPtr->lowLimit	= ctxPtr->dictLimit;
+	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+	ctxPtr->dictBase	= ctxPtr->base;
+	ctxPtr->base = newBlock - ctxPtr->dictLimit;
+	ctxPtr->end	= newBlock;
+	/* match referencing will resume from there */
+	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	limitedOutput_directive limit)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	/* auto - init if forgotten */
+	if (ctxPtr->base == NULL)
+		LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+	/* Check overflow */
+	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+			- ctxPtr->dictLimit;
+		if (dictSize > 64 * KB)
+			dictSize = 64 * KB;
+		LZ4_loadDictHC(LZ4_streamHCPtr,
+			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+	}
+
+	/* Check if blocks follow each other */
+	if ((const BYTE *)source != ctxPtr->end)
+		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+	/* Check overlapping input/dictionary space */
+	{
+		const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+		if ((sourceEnd > dictBegin)
+			&& ((const BYTE *)source < dictEnd)) {
+			if (sourceEnd > dictEnd)
+				sourceEnd = dictEnd;
+			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+				ctxPtr->lowLimit = ctxPtr->dictLimit;
+		}
+	}
+
+	return LZ4HC_compress_generic(ctxPtr, source, dest,
+		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize)
+{
+	if (maxOutputSize < LZ4_compressBound(inputSize))
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, limitedOutput);
+	else
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	char *safeBuffer,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+	int const prefixSize = (int)(streamPtr->end
+		- (streamPtr->base + streamPtr->dictLimit));
+
+	if (dictSize > 64 * KB)
+		dictSize = 64 * KB;
+	if (dictSize < 4)
+		dictSize = 0;
+	if (dictSize > prefixSize)
+		dictSize = prefixSize;
+
+	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
 
-exit:
-	return ret;
+	{
+		U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+		streamPtr->base = streamPtr->end - endIndex;
+		streamPtr->dictLimit = endIndex - dictSize;
+		streamPtr->lowLimit = endIndex - dictSize;
+
+		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+			streamPtr->nextToUpdate = streamPtr->dictLimit;
+	}
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDictHC);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+	unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	*dst_len = LZ4_compress_HC(src, dst, src_len,
+		*dst_len, LZ4HC_DEFAULT_CLEVEL, wrkmem);
+
+	/*
+	 * Prior lz4hc_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_HC
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!*dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4hc_compress);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v7 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 1/5] lib: " Sven Schmidt
@ 2017-02-05 19:09   ` Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 3/5] crypto: Change LZ4 modules " Sven Schmidt
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-05 19:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates the unlz4 wrapper to work with the
updated LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v7 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 1/5] lib: " Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-02-05 19:09   ` Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-05 19:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates the crypto modules using LZ4 compression as well as the
test cases in testmgr.h to work with the new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c     |  23 ++++-----
 crypto/lz4hc.c   |  23 ++++-----
 crypto/testmgr.h | 142 +++++++++++++++++++++++++++++++++++++++----------------
 3 files changed, 120 insertions(+), 68 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..71eff9b 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_default(src, dst,
+		slen, *dlen, ctx);
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
-		return -EINVAL;
+	if (out_len < 0)
+		return out_len;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return 0;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..03a34a8 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_HC(src, dst, slen,
+		*dlen, LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
-		return -EINVAL;
+	if (out_len < 0)
+		return out_len;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return 0;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 9b656be..98d4be0 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -34498,31 +34498,62 @@ static struct hash_testvec bfin_crc_tv_template[] = {
 
 static struct comp_testvec lz4_comp_tv_template[] = {
 	{
-		.inlen	= 70,
-		.outlen	= 45,
-		.input	= "Join us now and share the software "
-			  "Join us now and share the software ",
-		.output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
+		.inlen	= 255,
+		.outlen	= 218,
+		.input	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
+		.output	= "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+			  "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
 	},
 };
 
 static struct comp_testvec lz4_decomp_tv_template[] = {
 	{
-		.inlen	= 45,
-		.outlen	= 70,
-		.input  = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
-		.output	= "Join us now and share the software "
-			  "Join us now and share the software ",
+		.inlen	= 218,
+		.outlen	= 255,
+		.input	= "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+			  "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+		.output	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
 	},
 };
 
@@ -34531,31 +34562,62 @@ static struct comp_testvec lz4_decomp_tv_template[] = {
 
 static struct comp_testvec lz4hc_comp_tv_template[] = {
 	{
-		.inlen	= 70,
-		.outlen	= 45,
-		.input	= "Join us now and share the software "
-			  "Join us now and share the software ",
-		.output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
+		.inlen	= 255,
+		.outlen	= 216,
+		.input	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
+		.output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+			  "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
 	},
 };
 
 static struct comp_testvec lz4hc_decomp_tv_template[] = {
 	{
-		.inlen	= 45,
-		.outlen	= 70,
-		.input  = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
-		.output	= "Join us now and share the software "
-			  "Join us now and share the software ",
+		.inlen	= 216,
+		.outlen	= 255,
+		.input	= "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+			  "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+		.output	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
 	},
 };
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v7 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
                     ` (2 preceding siblings ...)
  2017-02-05 19:09   ` [PATCH v7 3/5] crypto: Change LZ4 modules " Sven Schmidt
@ 2017-02-05 19:09   ` Sven Schmidt
  2017-02-05 19:09   ` [PATCH v7 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
  2017-02-08 23:31   ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-05 19:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch updates fs/pstore and fs/squashfs to use the updated
functions from the new LZ4 module.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 22 +++++++++++++---------
 fs/squashfs/lz4_wrapper.c | 12 ++++++------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..efab7b6 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,35 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
-	if (ret) {
-		pr_err("lz4_compress error, ret = %d!\n", ret);
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+	if (!ret) {
+		pr_err("LZ4_compress_default error; compression failed!\n");
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-	if (ret) {
-		pr_err("lz4_decompress error, ret = %d!\n", ret);
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
+	if (ret < 0) {
+		/*
+		 * LZ4_decompress_safe will return an error code
+		 * (< 0) if decompression failed
+		 */
+		pr_err("LZ4_decompress_safe error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..95da653 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t dest_len = output->length;
 
 	for (i = 0; i < b; i++) {
 		avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
-	if (res)
+	res = LZ4_decompress_safe(stream->input, stream->output,
+		length, output->length);
+
+	if (res < 0)
 		return -EIO;
 
-	bytes = dest_len;
+	bytes = res;
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	}
 	squashfs_finish_page(output);
 
-	return dest_len;
+	return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v7 5/5] lib/lz4: Remove back-compat wrappers
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
                     ` (3 preceding siblings ...)
  2017-02-05 19:09   ` [PATCH v7 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
@ 2017-02-05 19:09   ` Sven Schmidt
  2017-02-08 23:31   ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
  5 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-05 19:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

This patch removes the functions introduced as wrappers for providing
backwards compatibility to the prior LZ4 version.
They're not needed anymore since there's no callers left.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      | 69 ------------------------------------------------
 lib/lz4/lz4_compress.c   | 22 ---------------
 lib/lz4/lz4_decompress.c | 42 -----------------------------
 lib/lz4/lz4hc_compress.c | 23 ----------------
 4 files changed, 156 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 1b7ab2a..a3912d7 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -173,18 +173,6 @@ static inline int LZ4_compressBound(size_t isize)
 }
 
 /**
- * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound
- * @isize: Size of the input data
- *
- * Return: Max. size LZ4 may output in a "worst case" szenario
- *	(data not compressible)
- */
-static inline int lz4_compressbound(size_t isize)
-{
-	return LZ4_COMPRESSBOUND(isize);
-}
-
-/**
  * LZ4_compress_default() - Compress data from source to dest
  * @source: source address of the original data
  * @dest: output buffer address of the compressed data
@@ -257,20 +245,6 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize,
 int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
 	int targetDestSize, void *wrkmem);
 
-/*
- * lz4_compress() - For backward compatibility, see LZ4_compress_default
- * @src: source address of the original data
- * @src_len: size of the original data
- * @dst: output buffer address of the compressed data. This requires 'dst'
- *	of size LZ4_COMPRESSBOUND
- * @dst_len: is the output size, which is returned after compress done
- * @workmem: address of the working memory.
- *
- * Return: Success if return 0, Error if return < 0
- */
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
 /*-************************************************************************
  *	Decompression Functions
  **************************************************************************/
@@ -346,34 +320,6 @@ int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
 int LZ4_decompress_safe_partial(const char *source, char *dest,
 	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
-/*
- * lz4_decompress_unknownoutputsize() - For backwards compatibility,
- *	see LZ4_decompress_safe
- * @src: source address of the compressed data
- * @src_len: is the input size, therefore the compressed size
- * @dest: output buffer address of the decompressed data
- *	which must be already allocated
- * @dest_len: is the max size of the destination buffer, which is
- *	returned with actual size of decompressed data after decompress done
- *
- * Return: Success if return 0, Error if return (< 0)
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-	unsigned char *dest, size_t *dest_len);
-
-/**
- * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast
- * @src: source address of the compressed data
- * @src_len: is the input size, which is returned after decompress done
- * @dest: output buffer address of the decompressed data,
- *	which must be already allocated
- * @actual_dest_len: is the size of uncompressed data, supposing it's known
- *
- * Return: Success if return 0, Error if return (< 0)
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len);
-
 /*-************************************************************************
  *	LZ4 HC Compression
  **************************************************************************/
@@ -401,21 +347,6 @@ int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
 	int compressionLevel, void *wrkmem);
 
 /**
- * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC
- * @src: source address of the original data
- * @src_len: size of the original data
- * @dst: output buffer address of the compressed data. This requires 'dst'
- *	of size LZ4_COMPRESSBOUND.
- * @dst_len: is the output size, which is returned after compress done
- * @wrkmem: address of the working memory.
- *	This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
- *
- * Return	: Success if return 0, Error if return (< 0)
- */
-int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
-/**
  * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
  * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
  * @compressionLevel: Recommended values are between 4 and 9, although any
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 6aa7ac3..697dbda 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -883,27 +883,5 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 }
 EXPORT_SYMBOL(LZ4_compress_fast_continue);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem) {
-	*dst_len = LZ4_compress_default(src, dst, src_len,
-		*dst_len, wrkmem);
-
-	/*
-	 * Prior lz4_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_fast/default
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!*dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4_compress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index fd665ca..9bf9182 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -483,47 +483,5 @@ int LZ4_decompress_fast_usingDict(const char *source, char *dest,
 }
 EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_decompress_unknownoutputsize(const unsigned char *src,
-	size_t src_len, unsigned char *dest, size_t *dest_len) {
-	*dest_len = LZ4_decompress_safe(src, dest,
-		src_len, *dest_len);
-
-	/*
-	 * Prior lz4_decompress_unknownoutputsize will return
-	 * 0 for success and a negative result for error
-	 * new LZ4_decompress_safe returns
-	 * - the length of data read on success
-	 * - and also a negative result on error
-	 * meaning when result > 0, we just return 0 here
-	 */
-	if (src_len > 0)
-		return 0;
-	else
-		return -1;
-}
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
-
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len) {
-	*src_len = LZ4_decompress_fast(src, dest, actual_dest_len);
-
-	/*
-	 * Prior lz4_decompress will return
-	 * 0 for success and a negative result for error
-	 * new LZ4_decompress_fast returns
-	 * - the length of data read on success
-	 * - and also a negative result on error
-	 * meaning when result > 0, we just return 0 here
-	 */
-	if (*src_len > 0)
-		return 0;
-	else
-		return -1;
-}
-EXPORT_SYMBOL(lz4_decompress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f1d4662..8363292 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -765,28 +765,5 @@ int LZ4_saveDictHC(
 }
 EXPORT_SYMBOL(LZ4_saveDictHC);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-	unsigned char *dst, size_t *dst_len, void *wrkmem)
-{
-	*dst_len = LZ4_compress_HC(src, dst, src_len,
-		*dst_len, LZ4HC_DEFAULT_CLEVEL, wrkmem);
-
-	/*
-	 * Prior lz4hc_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_HC
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!*dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4hc_compress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
                     ` (4 preceding siblings ...)
  2017-02-05 19:09   ` [PATCH v7 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
@ 2017-02-08 23:31   ` Minchan Kim
  2017-02-09  0:24     ` Eric Biggers
  2017-02-09 10:56     ` Sven Schmidt
  5 siblings, 2 replies; 104+ messages in thread
From: Minchan Kim @ 2017-02-08 23:31 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck

Hello Sven,

On Sun, Feb 05, 2017 at 08:09:03PM +0100, Sven Schmidt wrote:
> 
> This patchset is for updating the LZ4 compression module to a version based
> on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> which provides an "acceleration" parameter as a tradeoff between
> high compression ratio and high compression speed.
> 
> We want to use LZ4 fast in order to support compression in lustre
> and (mostly, based on that) investigate data reduction techniques in behalf of
> storage systems.
> 
> Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> it is possible to enable applications to use fast and/or high compression
> depending on the usecase.
> For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> LZ4 in the kernel.
> 
> LZ4 homepage: http://www.lz4.org/
> LZ4 source repository: https://github.com/lz4/lz4
> Source version: 1.7.3
> 
> Benchmark (taken from [1], Core i5-4300U @1.9GHz):
> ----------------|--------------|----------------|----------
> Compressor      | Compression  | Decompression  | Ratio
> ----------------|--------------|----------------|----------
> memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> 
> [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
> 
> [PATCH 1/5] lib: Update LZ4 compressor module
> [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
> [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
> [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
> [PATCH 5/5] lib/lz4: Remove back-compat wrappers

Today, I did zram-lz4 performance test with fio in current mmotm and
found it makes regression about 20%.

"lz4-update" means current mmots(git://git.cmpxchg.org/linux-mmots.git) so
applied your 5 patches. (But now sure current mmots has recent uptodate
patches)
"revert" means I reverted your 5 patches in current mmots.

                     revert    lz4-update

      seq-write       1547       1339      86.55%
     rand-write      22775      19381      85.10%
       seq-read       7035       5589      79.45%
      rand-read      78556      68479      87.17%
   mixed-seq(R)       1305       1066      81.69%
   mixed-seq(W)       1205        984      81.66%
  mixed-rand(R)      17421      14993      86.06%
  mixed-rand(W)      17391      14968      86.07%

My fio description file

[global]
bs=4k
ioengine=sync
size=100m
numjobs=1
group_reporting
buffer_compress_percentage=30
scramble_buffers=0
filename=/dev/zram0
loops=10
fsync_on_close=1

[seq-write]
bs=64k
rw=write
stonewall

[rand-write]
rw=randwrite
stonewall

[seq-read]
bs=64k
rw=read
stonewall

[rand-read]
rw=randread
stonewall

[mixed-seq]
bs=64k
rw=rw
stonewall

[mixed-rand]
rw=randrw
stonewall

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-08 23:31   ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
@ 2017-02-09  0:24     ` Eric Biggers
  2017-02-09  5:24       ` Eric Biggers
  2017-02-09 11:02       ` Sven Schmidt
  2017-02-09 10:56     ` Sven Schmidt
  1 sibling, 2 replies; 104+ messages in thread
From: Eric Biggers @ 2017-02-09  0:24 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Sven Schmidt, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> 
> Today, I did zram-lz4 performance test with fio in current mmotm and
> found it makes regression about 20%.
> 

This may or may not be the cause of the specific regression you're observing,
but I just noticed that the proposed patch drops a lot of FORCEINLINE
annotations from upstream LZ4.  The FORCEINLINE's are there for a reason,
especially for the main decompression and compression functions which are
basically "templates" that take in different sets of constant parameters, and
should be left in.  We should #define FORCEINLINE to __always_inline somewhere,
or just do a s/FORCEINLINE/__always_inline/g.

Note that the upstream LZ4 code is very carefully optimized, so we should not,
in general, be changing things like when functions are force-inlined, what the
hash table size is, etc.

[Also, for some reason linux-crypto is apparently still not receiving patch 1/5
in the series.  It's missing from the linux-crypto archive at
http://www.spinics.net/lists/linux-crypto/, so it's not just me.]

Thanks!

Eric

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09  0:24     ` Eric Biggers
@ 2017-02-09  5:24       ` Eric Biggers
  2017-02-09 11:05         ` Sven Schmidt
  2017-02-10  0:14         ` Minchan Kim
  2017-02-09 11:02       ` Sven Schmidt
  1 sibling, 2 replies; 104+ messages in thread
From: Eric Biggers @ 2017-02-09  5:24 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Sven Schmidt, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Also I noticed another bug, this time in LZ4_count():

> #if defined(CONFIG_64BIT)
> #define LZ4_ARCH64 1
> #else
> #define LZ4_ARCH64 0
> #endif
...
> #ifdef LZ4_ARCH64
>        if ((pIn < (pInLimit-3))
>                && (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
>                pIn += 4; pMatch += 4;
>        }
> #endif

Because of how LZ4_ARCH64 is defined, it needs to be '#if LZ4_ARCH64'.

But I also think the way upstream LZ4 does 64-bit detection could have just been
left as-is; it has a function which gets inlined:

	static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }

Eric

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-08 23:31   ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
  2017-02-09  0:24     ` Eric Biggers
@ 2017-02-09 10:56     ` Sven Schmidt
  2017-02-10  0:13       ` Minchan Kim
  1 sibling, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-02-09 10:56 UTC (permalink / raw)
  To: Minchan Kim
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck

Hey Minchan,

On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> Hello Sven,
> 
> On Sun, Feb 05, 2017 at 08:09:03PM +0100, Sven Schmidt wrote:
> > 
> > This patchset is for updating the LZ4 compression module to a version based
> > on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> > which provides an "acceleration" parameter as a tradeoff between
> > high compression ratio and high compression speed.
> > 
> > We want to use LZ4 fast in order to support compression in lustre
> > and (mostly, based on that) investigate data reduction techniques in behalf of
> > storage systems.
> > 
> > Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> > it is possible to enable applications to use fast and/or high compression
> > depending on the usecase.
> > For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> > LZ4 in the kernel.
> > 
> > LZ4 homepage: http://www.lz4.org/
> > LZ4 source repository: https://github.com/lz4/lz4
> > Source version: 1.7.3
> > 
> > Benchmark (taken from [1], Core i5-4300U @1.9GHz):
> > ----------------|--------------|----------------|----------
> > Compressor      | Compression  | Decompression  | Ratio
> > ----------------|--------------|----------------|----------
> > memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> > LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> > LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> > LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> > LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> > 
> > [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
> > 
> > [PATCH 1/5] lib: Update LZ4 compressor module
> > [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
> > [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
> > [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
> > [PATCH 5/5] lib/lz4: Remove back-compat wrappers
> 
> Today, I did zram-lz4 performance test with fio in current mmotm and
> found it makes regression about 20%.
> 
> "lz4-update" means current mmots(git://git.cmpxchg.org/linux-mmots.git) so
> applied your 5 patches. (But now sure current mmots has recent uptodate
> patches)
> "revert" means I reverted your 5 patches in current mmots.
> 
>                      revert    lz4-update
> 
>       seq-write       1547       1339      86.55%
>      rand-write      22775      19381      85.10%
>        seq-read       7035       5589      79.45%
>       rand-read      78556      68479      87.17%
>    mixed-seq(R)       1305       1066      81.69%
>    mixed-seq(W)       1205        984      81.66%
>   mixed-rand(R)      17421      14993      86.06%
>   mixed-rand(W)      17391      14968      86.07%

which parts of the output (as well as units) are these values exactly?
I did not work with fio until now, so I think I might ask before misinterpreting my results.
 
> My fio description file
> 
> [global]
> bs=4k
> ioengine=sync
> size=100m
> numjobs=1
> group_reporting
> buffer_compress_percentage=30
> scramble_buffers=0
> filename=/dev/zram0
> loops=10
> fsync_on_close=1
> 
> [seq-write]
> bs=64k
> rw=write
> stonewall
> 
> [rand-write]
> rw=randwrite
> stonewall
> 
> [seq-read]
> bs=64k
> rw=read
> stonewall
> 
> [rand-read]
> rw=randread
> stonewall
> 
> [mixed-seq]
> bs=64k
> rw=rw
> stonewall
> 
> [mixed-rand]
> rw=randrw
> stonewall
> 

Great, this makes it easy for me to reproduce your test.

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09  0:24     ` Eric Biggers
  2017-02-09  5:24       ` Eric Biggers
@ 2017-02-09 11:02       ` Sven Schmidt
  2017-02-09 18:29         ` Eric Biggers
  1 sibling, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-02-09 11:02 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Minchan Kim, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Hey Eric,

On Wed, Feb 08, 2017 at 04:24:36PM -0800, Eric Biggers wrote:
> On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> > 
> > Today, I did zram-lz4 performance test with fio in current mmotm and
> > found it makes regression about 20%.
> > 
> 
> This may or may not be the cause of the specific regression you're observing,
> but I just noticed that the proposed patch drops a lot of FORCEINLINE
> annotations from upstream LZ4.  The FORCEINLINE's are there for a reason,
> especially for the main decompression and compression functions which are
> basically "templates" that take in different sets of constant parameters, and
> should be left in.  We should #define FORCEINLINE to __always_inline somewhere,
> or just do a s/FORCEINLINE/__always_inline/g.
>

I generally just replaced "FORCE_INLINE" by "static inline". At least I thought so.
I rechecked and realised, I missed at least two of them (why did I not just use "search+replace"?).
So I think it's maybe safer and easier to eventually just use "FORCE_INLINE"
with the definition you suggested. Will try that.
 
> Note that the upstream LZ4 code is very carefully optimized, so we should not,
> in general, be changing things like when functions are force-inlined, what the
> hash table size is, etc.
> 
> [Also, for some reason linux-crypto is apparently still not receiving patch 1/5
> in the series.  It's missing from the linux-crypto archive at
> http://www.spinics.net/lists/linux-crypto/, so it's not just me.]
> 

I don't really know what to do about this. I think the matter is the size of the E-Mail.
Are there filters or something like that? Since in linux-kernel the patch seems to get delivered.
I could otherwise CC you if you wish.

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09  5:24       ` Eric Biggers
@ 2017-02-09 11:05         ` Sven Schmidt
  2017-02-09 18:20           ` Eric Biggers
  2017-02-10  0:14         ` Minchan Kim
  1 sibling, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-02-09 11:05 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Minchan Kim, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Hey Eric,

On Wed, Feb 08, 2017 at 09:24:25PM -0800, Eric Biggers wrote:
> Also I noticed another bug, this time in LZ4_count():
> 
> > #if defined(CONFIG_64BIT)
> > #define LZ4_ARCH64 1
> > #else
> > #define LZ4_ARCH64 0
> > #endif
> ...
> > #ifdef LZ4_ARCH64
> >        if ((pIn < (pInLimit-3))
> >                && (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
> >                pIn += 4; pMatch += 4;
> >        }
> > #endif
> 
> Because of how LZ4_ARCH64 is defined, it needs to be '#if LZ4_ARCH64'.
> 
> But I also think the way upstream LZ4 does 64-bit detection could have just been
> left as-is; it has a function which gets inlined:
> 
> 	static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
> 
> Eric

does this apply for LZ4_isLittleEndian() as well? As a reminder:
	
	static unsigned LZ4_isLittleEndian(void)
	{
		/* don't use static : performance detrimental */
		const union { U32 u; BYTE c[4]; } one = { 1 };

		return one.c[0];
	}

It is surely easier to read and understand using these functions in favor of the macros.

Thanks,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09 11:05         ` Sven Schmidt
@ 2017-02-09 18:20           ` Eric Biggers
  0 siblings, 0 replies; 104+ messages in thread
From: Eric Biggers @ 2017-02-09 18:20 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: Minchan Kim, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

On Thu, Feb 09, 2017 at 12:05:40PM +0100, Sven Schmidt wrote:
> > Because of how LZ4_ARCH64 is defined, it needs to be '#if LZ4_ARCH64'.
> > 
> > But I also think the way upstream LZ4 does 64-bit detection could have just been
> > left as-is; it has a function which gets inlined:
> > 
> > 	static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
> > 
> > Eric
> 
> does this apply for LZ4_isLittleEndian() as well? As a reminder:
> 	
> 	static unsigned LZ4_isLittleEndian(void)
> 	{
> 		/* don't use static : performance detrimental */
> 		const union { U32 u; BYTE c[4]; } one = { 1 };
> 
> 		return one.c[0];
> 	}
> 
> It is surely easier to read and understand using these functions in favor of the macros.

Yes, it makes sense to retain LZ4_isLittleEndian() too, mainly so that the call
sites don't need to be changed and we have less chance of introducing bugs.

I also believe the *implementation* of LZ4_isLittleEndian() using the union
"hack" to detecting endianness works fine and will get optimized correctly,
though we could replace it with an #ifdef __LITTLE_ENDIAN__ if we wanted to.  (I
am sure that upstream LZ4 would do that if it was guaranteed to work, but
upstream LZ4 needs to detect endianness reliably across many more different
compilers, compiler versions, and platforms than the Linux kernel does, and
there is no standard preprocessor macro for doing so.)

Eric

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09 11:02       ` Sven Schmidt
@ 2017-02-09 18:29         ` Eric Biggers
  2017-02-10  3:57           ` David Miller
  0 siblings, 1 reply; 104+ messages in thread
From: Eric Biggers @ 2017-02-09 18:29 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: Minchan Kim, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

On Thu, Feb 09, 2017 at 12:02:11PM +0100, Sven Schmidt wrote:
> > 
> > [Also, for some reason linux-crypto is apparently still not receiving patch 1/5
> > in the series.  It's missing from the linux-crypto archive at
> > http://www.spinics.net/lists/linux-crypto/, so it's not just me.]
> > 
> 
> I don't really know what to do about this. I think the matter is the size of the E-Mail.
> Are there filters or something like that? Since in linux-kernel the patch seems to get delivered.
> I could otherwise CC you if you wish.
> 

If I'm not mistaken, David Miller is the admin of the mail server on
vger.kernel.org, and he already happens to be Cc'ed on this thread, so maybe he
can answer as to why linux-crypto may be configured differently?

Anyway, since the patch did make it to linux-kernel anyone can still download it
from patchwork if they know where to look:
https://patchwork.kernel.org/patch/9556271/

Eric

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09 10:56     ` Sven Schmidt
@ 2017-02-10  0:13       ` Minchan Kim
  2017-02-12 11:16         ` Sven Schmidt
  0 siblings, 1 reply; 104+ messages in thread
From: Minchan Kim @ 2017-02-10  0:13 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck

Hello Sven,

On Thu, Feb 09, 2017 at 11:56:17AM +0100, Sven Schmidt wrote:
> Hey Minchan,
> 
> On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> > Hello Sven,
> > 
> > On Sun, Feb 05, 2017 at 08:09:03PM +0100, Sven Schmidt wrote:
> > > 
> > > This patchset is for updating the LZ4 compression module to a version based
> > > on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> > > which provides an "acceleration" parameter as a tradeoff between
> > > high compression ratio and high compression speed.
> > > 
> > > We want to use LZ4 fast in order to support compression in lustre
> > > and (mostly, based on that) investigate data reduction techniques in behalf of
> > > storage systems.
> > > 
> > > Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> > > it is possible to enable applications to use fast and/or high compression
> > > depending on the usecase.
> > > For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> > > LZ4 in the kernel.
> > > 
> > > LZ4 homepage: http://www.lz4.org/
> > > LZ4 source repository: https://github.com/lz4/lz4
> > > Source version: 1.7.3
> > > 
> > > Benchmark (taken from [1], Core i5-4300U @1.9GHz):
> > > ----------------|--------------|----------------|----------
> > > Compressor      | Compression  | Decompression  | Ratio
> > > ----------------|--------------|----------------|----------
> > > memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> > > LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> > > LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> > > LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> > > LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> > > 
> > > [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
> > > 
> > > [PATCH 1/5] lib: Update LZ4 compressor module
> > > [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
> > > [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
> > > [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
> > > [PATCH 5/5] lib/lz4: Remove back-compat wrappers
> > 
> > Today, I did zram-lz4 performance test with fio in current mmotm and
> > found it makes regression about 20%.
> > 
> > "lz4-update" means current mmots(git://git.cmpxchg.org/linux-mmots.git) so
> > applied your 5 patches. (But now sure current mmots has recent uptodate
> > patches)
> > "revert" means I reverted your 5 patches in current mmots.
> > 
> >                      revert    lz4-update
> > 
> >       seq-write       1547       1339      86.55%
> >      rand-write      22775      19381      85.10%
> >        seq-read       7035       5589      79.45%
> >       rand-read      78556      68479      87.17%
> >    mixed-seq(R)       1305       1066      81.69%
> >    mixed-seq(W)       1205        984      81.66%
> >   mixed-rand(R)      17421      14993      86.06%
> >   mixed-rand(W)      17391      14968      86.07%
> 
> which parts of the output (as well as units) are these values exactly?
> I did not work with fio until now, so I think I might ask before misinterpreting my results.

It is IOPS.

>  
> > My fio description file
> > 
> > [global]
> > bs=4k
> > ioengine=sync
> > size=100m
> > numjobs=1
> > group_reporting
> > buffer_compress_percentage=30
> > scramble_buffers=0
> > filename=/dev/zram0
> > loops=10
> > fsync_on_close=1
> > 
> > [seq-write]
> > bs=64k
> > rw=write
> > stonewall
> > 
> > [rand-write]
> > rw=randwrite
> > stonewall
> > 
> > [seq-read]
> > bs=64k
> > rw=read
> > stonewall
> > 
> > [rand-read]
> > rw=randread
> > stonewall
> > 
> > [mixed-seq]
> > bs=64k
> > rw=rw
> > stonewall
> > 
> > [mixed-rand]
> > rw=randrw
> > stonewall
> > 
> 
> Great, this makes it easy for me to reproduce your test.

If you have trouble to reproduce, feel free to ask me. I'm happy to test it. :)

Thanks!

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09  5:24       ` Eric Biggers
  2017-02-09 11:05         ` Sven Schmidt
@ 2017-02-10  0:14         ` Minchan Kim
  1 sibling, 0 replies; 104+ messages in thread
From: Minchan Kim @ 2017-02-10  0:14 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Sven Schmidt, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Hello Eric,

On Wed, Feb 08, 2017 at 09:24:25PM -0800, Eric Biggers wrote:
> Also I noticed another bug, this time in LZ4_count():
> 
> > #if defined(CONFIG_64BIT)
> > #define LZ4_ARCH64 1
> > #else
> > #define LZ4_ARCH64 0
> > #endif
> ...
> > #ifdef LZ4_ARCH64
> >        if ((pIn < (pInLimit-3))
> >                && (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
> >                pIn += 4; pMatch += 4;
> >        }
> > #endif
> 
> Because of how LZ4_ARCH64 is defined, it needs to be '#if LZ4_ARCH64'.
> 
> But I also think the way upstream LZ4 does 64-bit detection could have just been
> left as-is; it has a function which gets inlined:
> 
> 	static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
> 

Thanks for looking this.

If you have a fix, could you send a patch? I'm happy to test it.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-09 18:29         ` Eric Biggers
@ 2017-02-10  3:57           ` David Miller
  0 siblings, 0 replies; 104+ messages in thread
From: David Miller @ 2017-02-10  3:57 UTC (permalink / raw)
  To: ebiggers3
  Cc: 4sschmid, minchan, akpm, bongkyu.kim, rsalvaterra,
	sergey.senozhatsky, gregkh, linux-kernel, herbert, linux-crypto,
	anton, ccross, keescook, tony.luck

From: Eric Biggers <ebiggers3@gmail.com>
Date: Thu, 9 Feb 2017 10:29:14 -0800

> On Thu, Feb 09, 2017 at 12:02:11PM +0100, Sven Schmidt wrote:
>> > 
>> > [Also, for some reason linux-crypto is apparently still not receiving patch 1/5
>> > in the series.  It's missing from the linux-crypto archive at
>> > http://www.spinics.net/lists/linux-crypto/, so it's not just me.]
>> > 
>> 
>> I don't really know what to do about this. I think the matter is the size of the E-Mail.
>> Are there filters or something like that? Since in linux-kernel the patch seems to get delivered.
>> I could otherwise CC you if you wish.
>> 
> 
> If I'm not mistaken, David Miller is the admin of the mail server on
> vger.kernel.org, and he already happens to be Cc'ed on this thread, so maybe he
> can answer as to why linux-crypto may be configured differently?
> 
> Anyway, since the patch did make it to linux-kernel anyone can still download it
> from patchwork if they know where to look:
> https://patchwork.kernel.org/patch/9556271/

I've increased the maxlength parameter for linux-cryto so this doesn't
happen again.

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-10  0:13       ` Minchan Kim
@ 2017-02-12 11:16         ` Sven Schmidt
  2017-02-12 11:16             ` Sven Schmidt
  2017-02-13  0:03           ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
  0 siblings, 2 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-12 11:16 UTC (permalink / raw)
  To: minchan
  Cc: ebiggers3, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck




On 02/10/2017 01:13 AM, Minchan Kim wrote:
> Hello Sven,
>
> On Thu, Feb 09, 2017 at 11:56:17AM +0100, Sven Schmidt wrote:
>> Hey Minchan,
>>
>> On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
>>> Hello Sven,
>>>
>>> On Sun, Feb 05, 2017 at 08:09:03PM +0100, Sven Schmidt wrote:
>>>>
>>>> This patchset is for updating the LZ4 compression module to a version based
>>>> on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
>>>> which provides an "acceleration" parameter as a tradeoff between
>>>> high compression ratio and high compression speed.
>>>>
>>>> We want to use LZ4 fast in order to support compression in lustre
>>>> and (mostly, based on that) investigate data reduction techniques in behalf of
>>>> storage systems.
>>>>
>>>> Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
>>>> it is possible to enable applications to use fast and/or high compression
>>>> depending on the usecase.
>>>> For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
>>>> LZ4 in the kernel.
>>>>
>>>> LZ4 homepage: http://www.lz4.org/
>>>> LZ4 source repository: https://github.com/lz4/lz4
>>>> Source version: 1.7.3
>>>>
>>>> Benchmark (taken from [1], Core i5-4300U @1.9GHz):
>>>> ----------------|--------------|----------------|----------
>>>> Compressor      | Compression  | Decompression  | Ratio
>>>> ----------------|--------------|----------------|----------
>>>> memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
>>>> LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
>>>> LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
>>>> LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
>>>> LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
>>>>
>>>> [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
>>>>
>>>> [PATCH 1/5] lib: Update LZ4 compressor module
>>>> [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
>>>> [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
>>>> [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
>>>> [PATCH 5/5] lib/lz4: Remove back-compat wrappers
>>>
>>> Today, I did zram-lz4 performance test with fio in current mmotm and
>>> found it makes regression about 20%.
>>>
>>> "lz4-update" means current mmots(git://git.cmpxchg.org/linux-mmots.git) so
>>> applied your 5 patches. (But now sure current mmots has recent uptodate
>>> patches)
>>> "revert" means I reverted your 5 patches in current mmots.
>>>
>>>                      revert    lz4-update
>>>
>>>       seq-write       1547       1339      86.55%
>>>      rand-write      22775      19381      85.10%
>>>        seq-read       7035       5589      79.45%
>>>       rand-read      78556      68479      87.17%
>>>    mixed-seq(R)       1305       1066      81.69%
>>>    mixed-seq(W)       1205        984      81.66%
>>>   mixed-rand(R)      17421      14993      86.06%
>>>   mixed-rand(W)      17391      14968      86.07%
>>
>> which parts of the output (as well as units) are these values exactly?
>> I did not work with fio until now, so I think I might ask before misinterpreting my results.
>
> It is IOPS.
>
>>  
>>> My fio description file
>>>
>>> [global]
>>> bs=4k
>>> ioengine=sync
>>> size=100m
>>> numjobs=1
>>> group_reporting
>>> buffer_compress_percentage=30
>>> scramble_buffers=0
>>> filename=/dev/zram0
>>> loops=10
>>> fsync_on_close=1
>>>
>>> [seq-write]
>>> bs=64k
>>> rw=write
>>> stonewall
>>>
>>> [rand-write]
>>> rw=randwrite
>>> stonewall
>>>
>>> [seq-read]
>>> bs=64k
>>> rw=read
>>> stonewall
>>>
>>> [rand-read]
>>> rw=randread
>>> stonewall
>>>
>>> [mixed-seq]
>>> bs=64k
>>> rw=rw
>>> stonewall
>>>
>>> [mixed-rand]
>>> rw=randrw
>>> stonewall
>>>
>>
>> Great, this makes it easy for me to reproduce your test.
>
> If you have trouble to reproduce, feel free to ask me. I'm happy to test it. :)
>
> Thanks!
>

Hi Minchan,

I will send an updated patch as a reply to this E-Mail. Would be really grateful If you'd test it and provide feedback!
The patch should be applied to the current mmots tree.

In fact, the updated LZ4 _is_ slower than the current one in kernel. But I was not able to reproduce such large regressions
as you did. I now tried to define FORCE_INLINE as Eric suggested. I also inlined some functions which weren't in upstream LZ4,
but are defined as macros in the current kernel LZ4. The approach to replace LZ4_ARCH64 with the function call _seemed_ to behave
worse than the macro, so I withdrew the change.

The main difference is, that I replaced the read32/read16/write... etc. functions using memcpy with the other ones defined 
in upstream LZ4 (which can be switched using a macro). 
The comment of the author stated, that they're as fast as the memcpy variants (or faster), but not as portable
(which does not matter since we're not dependent for multiple compilers).

In my tests, this version is mostly as fast as the current kernel LZ4.

Thank you!

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH] lz4: fix performance regressions
  2017-02-12 11:16         ` Sven Schmidt
@ 2017-02-12 11:16             ` Sven Schmidt
  2017-02-13  0:03           ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
  1 sibling, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-12 11:16 UTC (permalink / raw)
  To: minchan
  Cc: ebiggers3, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck, Sven Schmidt

Fix performance regressions compared to current kernel LZ4

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |   2 +-
 lib/lz4/lz4_compress.c   | 157 +++++++++++++++++++++++-------------
 lib/lz4/lz4_decompress.c |  50 ++++++++----
 lib/lz4/lz4defs.h        | 203 ++++++++++++++++++++++++++++++++---------------
 lib/lz4/lz4hc_compress.c |   8 +-
 5 files changed, 281 insertions(+), 139 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index a3912d7..394e3d9 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -82,7 +82,7 @@
 /*-************************************************************************
  *	STREAMING CONSTANTS AND STRUCTURES
  **************************************************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
 #define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))

 #define LZ4_STREAMHCSIZE        262192
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 697dbda..2cbbf99 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -39,27 +39,33 @@
 #include <linux/kernel.h>
 #include <asm/unaligned.h>

+static const int LZ4_minLength = (MFLIMIT + 1);
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
+
 /*-******************************
  *	Compression functions
  ********************************/
-static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+static FORCE_INLINE U32 LZ4_hash4(
+	U32 sequence,
+	tableType_t const tableType)
 {
 	if (tableType == byU16)
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+			>> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
 	else
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - LZ4_HASHLOG));
+			>> ((MINMATCH * 8) - LZ4_HASHLOG));
 }

-#if LZ4_ARCH64
-static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+static FORCE_INLINE __maybe_unused U32 LZ4_hash5(
+	U64 sequence,
+	tableType_t const tableType)
 {
 	const U32 hashLog = (tableType == byU16)
 		? LZ4_HASHLOG + 1
 		: LZ4_HASHLOG;

-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	static const U64 prime5bytes = 889523592379ULL;

 	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
@@ -69,9 +75,10 @@ static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
 }
-#endif

-static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+static FORCE_INLINE U32 LZ4_hashPosition(
+	const void *p,
+	tableType_t const tableType)
 {
 #if LZ4_ARCH64
 	if (tableType == byU32)
@@ -81,8 +88,12 @@ static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
 	return LZ4_hash4(LZ4_read32(p), tableType);
 }

-static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
-	tableType_t const tableType, const BYTE *srcBase)
+static void LZ4_putPositionOnHash(
+	const BYTE *p,
+	U32 h,
+	void *tableBase,
+	tableType_t const tableType,
+	const BYTE *srcBase)
 {
 	switch (tableType) {
 	case byPtr:
@@ -109,16 +120,22 @@ static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
 	}
 }

-static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE void LZ4_putPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

 	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }

-static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static const BYTE *LZ4_getPositionOnHash(
+	U32 h,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	if (tableType == byPtr) {
 		const BYTE **hashTable = (const BYTE **) tableBase;
@@ -135,12 +152,16 @@ static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
 	{
 		/* default, to ensure a return */
 		const U16 * const hashTable = (U16 *) tableBase;
+
 		return hashTable[h] + srcBase;
 	}
 }

-static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE const BYTE *LZ4_getPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

@@ -152,7 +173,7 @@ static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
  * LZ4_compress_generic() :
  * inlined, to ensure branches are decided at compilation time
  */
-static inline int LZ4_compress_generic(
+static FORCE_INLINE int LZ4_compress_generic(
 	LZ4_stream_t_internal * const dictPtr,
 	const char * const source,
 	char * const dest,
@@ -187,6 +208,7 @@ static inline int LZ4_compress_generic(
 		/* Unsupported inputSize, too large (or negative) */
 		return 0;
 	}
+
 	switch (dict) {
 	case noDict:
 	default:
@@ -216,7 +238,8 @@ static inline int LZ4_compress_generic(

 	/* First Byte */
 	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
-	ip++; forwardH = LZ4_hashPosition(ip, tableType);
+	ip++;
+	forwardH = LZ4_hashPosition(ip, tableType);

 	/* Main Loop */
 	for ( ; ; ) {
@@ -227,15 +250,14 @@ static inline int LZ4_compress_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = acceleration
-				<< LZ4_skipTrigger;
+			unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;

 			do {
 				U32 const h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -243,6 +265,7 @@ static inline int LZ4_compress_generic(
 				match = LZ4_getPositionOnHash(h,
 					dictPtr->hashTable,
 					tableType, base);
+
 				if (dict == usingExtDict) {
 					if (match < (const BYTE *)source) {
 						refDelta = dictDelta;
@@ -251,11 +274,12 @@ static inline int LZ4_compress_generic(
 						refDelta = 0;
 						lowLimit = (const BYTE *)source;
 				}	 }
+
 				forwardH = LZ4_hashPosition(forwardIp,
 					tableType);
+
 				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
 					tableType, base);
-
 			} while (((dictIssue == dictSmall)
 					? (match < lowRefLimit)
 					: 0)
@@ -268,31 +292,34 @@ static inline int LZ4_compress_generic(

 		/* Catch up */
 		while (((ip > anchor) & (match + refDelta > lowLimit))
-			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
+				&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
 			match--;
-			}
+		}

 		/* Encode Literals */
 		{
 			unsigned const int litLength = (unsigned int)(ip - anchor);

 			token = op++;
+
 			if ((outputLimited) &&
 				/* Check output buffer overflow */
 				(unlikely(op + litLength +
 					(2 + 1 + LASTLITERALS) +
-					(litLength/255) > olimit)))
+					(litLength / 255) > olimit)))
 				return 0;
+
 			if (litLength >= RUN_MASK) {
 				int len = (int)litLength - RUN_MASK;

-				*token = (RUN_MASK<<ML_BITS);
-				for (; len >= 255 ; len -= 255)
+				*token = (RUN_MASK << ML_BITS);
+
+				for (; len >= 255; len -= 255)
 					*op++ = 255;
 				*op++ = (BYTE)len;
 			} else
-				*token = (BYTE)(litLength<<ML_BITS);
+				*token = (BYTE)(litLength << ML_BITS);

 			/* Copy Literals */
 			LZ4_wildCopy(op, anchor, op + litLength);
@@ -301,7 +328,8 @@ static inline int LZ4_compress_generic(

 _next_match:
 		/* Encode Offset */
-		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+		LZ4_writeLE16(op, (U16)(ip - match));
+		op += 2;

 		/* Encode MatchLength */
 		{
@@ -313,11 +341,15 @@ static inline int LZ4_compress_generic(

 				match += refDelta;
 				limit = ip + (dictEnd - match);
+
 				if (limit > matchlimit)
 					limit = matchlimit;
+
 				matchCode = LZ4_count(ip + MINMATCH,
 					match + MINMATCH, limit);
+
 				ip += MINMATCH + matchCode;
+
 				if (ip == limit) {
 					unsigned const int more = LZ4_count(ip,
 						(const BYTE *)source,
@@ -336,17 +368,20 @@ static inline int LZ4_compress_generic(
 				/* Check output buffer overflow */
 				(unlikely(op +
 					(1 + LASTLITERALS) +
-					(matchCode>>8) > olimit)))
+					(matchCode >> 8) > olimit)))
 				return 0;
+
 			if (matchCode >= ML_MASK) {
 				*token += ML_MASK;
 				matchCode -= ML_MASK;
 				LZ4_write32(op, 0xFFFFFFFF);
-				while (matchCode >= 4*255) {
+
+				while (matchCode >= 4 * 255) {
 					op += 4;
 					LZ4_write32(op, 0xFFFFFFFF);
-					matchCode -= 4*255;
+					matchCode -= 4 * 255;
 				}
+
 				op += matchCode / 255;
 				*op++ = (BYTE)(matchCode % 255);
 			} else
@@ -365,6 +400,7 @@ static inline int LZ4_compress_generic(
 		/* Test next position */
 		match = LZ4_getPosition(ip, dictPtr->hashTable,
 			tableType, base);
+
 		if (dict == usingExtDict) {
 			if (match < (const BYTE *)source) {
 				refDelta = dictDelta;
@@ -374,7 +410,9 @@ static inline int LZ4_compress_generic(
 				lowLimit = (const BYTE *)source;
 			}
 		}
+
 		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+
 		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
 			&& (match + MAX_DISTANCE >= ip)
 			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
@@ -395,18 +433,21 @@ static inline int LZ4_compress_generic(
 		if ((outputLimited) &&
 			/* Check output buffer overflow */
 			((op - (BYTE *)dest) + lastRun + 1 +
-			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
 			return 0;
+
 		if (lastRun >= RUN_MASK) {
 			size_t accumulator = lastRun - RUN_MASK;
 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
-			*op++ = (BYTE)(lastRun<<ML_BITS);
+			*op++ = (BYTE)(lastRun << ML_BITS);
 		}
+
 		memcpy(op, anchor, lastRun);
+
 		op += lastRun;
 	}

@@ -414,23 +455,27 @@ static inline int LZ4_compress_generic(
 	return (int) (((char *)op) - dest);
 }

-static int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
-	int inputSize, int maxOutputSize, int acceleration)
+static int LZ4_compress_fast_extState(
+	void *state,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	int acceleration)
 {
-	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
-	#else
-	tableType_t tableType = byPtr;
-	#endif
-
 	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+#if LZ4_ARCH64
+	const tableType_t tableType = byU32;
+#else
+	const tableType_t tableType = byPtr;
+#endif

 	LZ4_resetStream((LZ4_stream_t *)state);

 	if (acceleration < 1)
 		acceleration = LZ4_ACCELERATION_DEFAULT;

-	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+	if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
 		if (inputSize < LZ4_64Klimit)
 			return LZ4_compress_generic(ctx, source,
 				dest, inputSize, 0,
@@ -474,7 +519,6 @@ EXPORT_SYMBOL(LZ4_compress_default);
 /*-******************************
  *	*_destSize() variant
  ********************************/
-
 static int LZ4_compress_destSize_generic(
 	LZ4_stream_t_internal * const ctx,
 	const char * const src,
@@ -529,14 +573,14 @@ static int LZ4_compress_destSize_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+			unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;

 			do {
 				U32 h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -559,8 +603,9 @@ static int LZ4_compress_destSize_generic(
 		while ((ip > anchor)
 			&& (match > lowLimit)
 			&& (unlikely(ip[-1] == match[-1]))) {
-			ip--; match--;
-			}
+			ip--;
+			match--;
+		}

 		/* Encode Literal length */
 		{
@@ -644,11 +689,11 @@ static int LZ4_compress_destSize_generic(
 		size_t lastRunSize = (size_t)(iend - anchor);

 		if (op + 1 /* token */
-			+ ((lastRunSize + 240)/255) /* litLength */
+			+ ((lastRunSize + 240) / 255) /* litLength */
 			+ lastRunSize /* literals */ > oend) {
 			/* adapt lastRunSize to fill 'dst' */
 			lastRunSize	= (oend - op) - 1;
-			lastRunSize -= (lastRunSize + 240)/255;
+			lastRunSize -= (lastRunSize + 240) / 255;
 		}
 		ip = anchor + lastRunSize;

@@ -656,7 +701,7 @@ static int LZ4_compress_destSize_generic(
 			size_t accumulator = lastRunSize - RUN_MASK;

 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
@@ -675,14 +720,14 @@ static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
 	char *dst, int *srcSizePtr, int targetDstSize)
 {
 	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
+		const tableType_t tableType = byU32;
 	#else
-	tableType_t tableType = byPtr;
+		const tableType_t tableType = byPtr;
 	#endif

 	LZ4_resetStream(state);

-	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+	if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
 		/* compression success is guaranteed */
 		return LZ4_compress_fast_extState(
 			state, src, dst, *srcSizePtr,
@@ -847,7 +892,7 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
 				maxOutputSize, limitedOutput, byU32,
-				withPrefix64k, dictSmall,	acceleration);
+				withPrefix64k, dictSmall, acceleration);
 		} else {
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index a7731ba..3bfc2f6 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -49,8 +49,8 @@
  * Note that it is important this generic function is really inlined,
  * in order to remove useless branches during compilation optimization.
  */
-static inline int LZ4_decompress_generic(
-	 const char *const source,
+static FORCE_INLINE int LZ4_decompress_generic(
+	 const char * const source,
 	 char * const dest,
 	 int inputSize,
 		/*
@@ -180,22 +180,28 @@ static inline int LZ4_decompress_generic(
 					goto _output_error;
 				}
 			}
+
 			memcpy(op, ip, length);
 			ip += length;
 			op += length;
 			/* Necessarily EOF, due to parsing restrictions */
 			break;
 		}
+
 		LZ4_wildCopy(op, ip, cpy);
-		ip += length; op = cpy;
+		ip += length;
+		op = cpy;

 		/* get offset */
-		offset = LZ4_readLE16(ip); ip += 2;
+		offset = LZ4_readLE16(ip);
+		ip += 2;
 		match = op - offset;
+
 		if ((checkOffset) && (unlikely(match < lowLimit))) {
 			/* Error : offset outside buffers */
 			goto _output_error;
 		}
+
 		/* costs ~1%; silence an msan warning when offset == 0 */
 		LZ4_write32(op, (U32)offset);

@@ -205,11 +211,14 @@ static inline int LZ4_decompress_generic(
 			unsigned int s;

 			do {
-			s = *ip++;
-			if ((endOnInput) && (ip > iend - LASTLITERALS))
-				goto _output_error;
-			length += s;
+				s = *ip++;
+
+				if ((endOnInput) && (ip > iend - LASTLITERALS))
+					goto _output_error;
+
+				length += s;
 			} while (s == 255);
+
 			if ((safeDecode)
 				&& unlikely(
 					(size_t)(op + length) < (size_t)op)) {
@@ -217,6 +226,7 @@ static inline int LZ4_decompress_generic(
 				goto _output_error;
 			}
 		}
+
 		length += MINMATCH;

 		/* check external dictionary */
@@ -227,12 +237,13 @@ static inline int LZ4_decompress_generic(
 			}

 			if (length <= (size_t)(lowPrefix - match)) {
-			/*
-			 * match can be copied as a single segment
-			 * from external dictionary
-			 */
-			memmove(op, dictEnd - (lowPrefix - match), length);
-			op += length;
+				/*
+				 * match can be copied as a single segment
+				 * from external dictionary
+				 */
+				memmove(op, dictEnd - (lowPrefix - match),
+					length);
+				op += length;
 			} else {
 				/*
 				 * match encompass external
@@ -256,11 +267,13 @@ static inline int LZ4_decompress_generic(
 					op += restSize;
 				}
 			}
+
 			continue;
 		}

 		/* copy match within block */
 		cpy = op + length;
+
 		if (unlikely(offset < 8)) {
 			const int dec64 = dec64table[offset];

@@ -272,7 +285,8 @@ static inline int LZ4_decompress_generic(
 			memcpy(op + 4, match, 4);
 			match -= dec64;
 		} else {
-			LZ4_copy8(op, match); match += 8;
+			LZ4_copy8(op, match);
+			match += 8;
 		}

 		op += 8;
@@ -287,18 +301,22 @@ static inline int LZ4_decompress_generic(
 				 */
 				goto _output_error;
 			}
+
 			if (op < oCopyLimit) {
 				LZ4_wildCopy(op, match, oCopyLimit);
 				match += oCopyLimit - op;
 				op = oCopyLimit;
 			}
+
 			while (op < cpy)
 				*op++ = *match++;
 		} else {
 			LZ4_copy8(op, match);
+
 			if (length > 16)
 				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
+
 		op = cpy; /* correction */
 	}

@@ -438,7 +456,7 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
  * These decoding functions work the same as "_continue" ones,
  * the dictionary must be explicitly provided within parameters
  */
-static inline int LZ4_decompress_usingDict_generic(const char *source,
+static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
 	char *dest, int compressedSize, int maxOutputSize, int safe,
 	const char *dictStart, int dictSize)
 {
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 23e1a1b..47ef42b 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -38,14 +38,7 @@
 #include <asm/unaligned.h>
 #include <linux/string.h>	 /* memset, memcpy */

-/*
- * Detects 64 bits mode
-*/
-#if defined(CONFIG_64BIT)
-#define LZ4_ARCH64 1
-#else
-#define LZ4_ARCH64 0
-#endif
+#define FORCE_INLINE __always_inline

 /*-************************************
  *	Basic Types
@@ -60,14 +53,38 @@ typedef uint64_t U64;
 typedef uintptr_t uptrval;

 /*-************************************
+ *	Architecture specifics
+ **************************************/
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system
+ * does not support hardware bit count
+ */
+/* #define LZ4_FORCE_SW_BITCOUNT */
+
+/*-************************************
  *	Constants
  **************************************/
 #define MINMATCH 4

 #define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
-static const int LZ4_minLength = (MFLIMIT+1);
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6

 #define KB (1<<10)
 #define MB (1<<20)
@@ -82,53 +99,42 @@ static const int LZ4_minLength = (MFLIMIT+1);
 #define RUN_BITS (8-ML_BITS)
 #define RUN_MASK ((1U<<RUN_BITS)-1)

-static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
-static const U32 LZ4_skipTrigger = 6;
-
 /*-************************************
  *	Reading and writing into memory
  **************************************/
+typedef union {
+	U16 u16;
+	U32 u32;
+	size_t uArch;
+} __packed unalign;

-static inline U16 LZ4_read16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_read16(const void *ptr)
 {
-	U16 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u16;
 }

-static inline U32 LZ4_read32(const void *memPtr)
+static FORCE_INLINE __maybe_unused U32 LZ4_read32(const void *ptr)
 {
-	U32 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u32;
 }

-static inline size_t LZ4_read_ARCH(const void *memPtr)
+static FORCE_INLINE __maybe_unused size_t LZ4_read_ARCH(const void *ptr)
 {
-	size_t val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->uArch;
 }

-static inline void LZ4_write16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_write16(void *memPtr, U16 value)
 {
-	memcpy(memPtr, &value, sizeof(value));
+	((unalign *)memPtr)->u16 = value;
 }

-static inline void LZ4_write32(void *memPtr, U32 value)
-{
-	memcpy(memPtr, &value, sizeof(value));
+static FORCE_INLINE __maybe_unused void LZ4_write32(void *memPtr, U32 value) {
+	((unalign *)memPtr)->u32 = value;
 }

-static inline U16 LZ4_readLE16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_readLE16(const void *memPtr)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	return LZ4_read16(memPtr);
 #else
 	const BYTE *p = (const BYTE *)memPtr;
@@ -137,19 +143,19 @@ static inline U16 LZ4_readLE16(const void *memPtr)
 #endif
 }

-static inline void LZ4_writeLE16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_writeLE16(void *memPtr, U16 value)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	LZ4_write16(memPtr, value);
 #else
 	BYTE *p = (BYTE *)memPtr;

 	p[0] = (BYTE) value;
-	p[1] = (BYTE)(value>>8);
+	p[1] = (BYTE)(value >> 8);
 #endif
 }

-static inline void LZ4_copy8(void *dst, const void *src)
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
 {
 	memcpy(dst, src, 8);
 }
@@ -158,7 +164,8 @@ static inline void LZ4_copy8(void *dst, const void *src)
  * customized variant of memcpy,
  * which can overwrite up to 7 bytes beyond dstEnd
  */
-static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+	const void *srcPtr, void *dstEnd)
 {
 	BYTE *d = (BYTE *)dstPtr;
 	const BYTE *s = (const BYTE *)srcPtr;
@@ -171,49 +178,121 @@ static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
 	} while (d < e);
 }

-#if LZ4_ARCH64
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+#if LZ4_ARCH64 /* 64 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[64] = {
+		0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
+		0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
+		7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
+		7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
+	};
+
+	return DeBruijnBytePos[((U64)((val & -(long long)val)
+		* 0x0218A392CDABBD3FULL)) >> 58];
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+	return (__builtin_ctzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[32] = {
+		0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
+		3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1
+	};
+
+	return DeBruijnBytePos[((U32)((val & -(S32)val)
+		* 0x077CB531U)) >> 27];
 #else
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
+	return (__builtin_ctz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#else /* Big Endian */
+#if LZ4_ARCH64 /* 64 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 32)) {
+		r = 4;
+	} else {
+		r = 0;
+		val >>= 32;
+	}
+
+	if (!(val >> 16)) {
+		r += 2;
+		val >>= 8;
+	} else {
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
-#endif
-#endif
+	return (__builtin_clzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 16)) {
+		r = 2;
+		val >>= 8;
+	} else {
+		r = 0;
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
+#else
+	return (__builtin_clz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#endif /* LZ4_LITTLE_ENDIAN */
+}

-static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+static FORCE_INLINE __maybe_unused unsigned int LZ4_count(
+	const BYTE *pIn,
+	const BYTE *pMatch,
 	const BYTE *pInLimit)
 {
 	const BYTE *const pStart = pIn;

-	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
-		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+	while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+		size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);

 		if (!diff) {
 			pIn += STEPSIZE;
 			pMatch += STEPSIZE;
 			continue;
 		}
-		pIn += LZ4_NBCOMMONBYTES(diff);
+
+		pIn += LZ4_NbCommonBytes(diff);
+
 		return (unsigned int)(pIn - pStart);
 	}

-#ifdef LZ4_ARCH64
-	if ((pIn < (pInLimit-3))
+#if LZ4_ARCH64
+	if ((pIn < (pInLimit - 3))
 		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
-		pIn += 4; pMatch += 4;
+		pIn += 4;
+		pMatch += 4;
 	}
 #endif
-	if ((pIn < (pInLimit-1))
+
+	if ((pIn < (pInLimit - 1))
 		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
-		pIn += 2; pMatch += 2;
+		pIn += 2;
+		pMatch += 2;
 	}
+
 	if ((pIn < pInLimit) && (*pMatch == *pIn))
 		pIn++;
+
 	return (unsigned int)(pIn - pStart);
 }

diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index 8363292..c7271a1 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -71,7 +71,7 @@ static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
 }

 /* Update chains up to ip (excluded) */
-static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	const BYTE *ip)
 {
 	U16 * const chainTable = hc4->chainTable;
@@ -96,7 +96,7 @@ static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	hc4->nextToUpdate = target;
 }

-static inline int LZ4HC_InsertAndFindBestMatch(
+static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
 	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
 	const BYTE *ip,
 	const BYTE * const iLimit,
@@ -165,7 +165,7 @@ static inline int LZ4HC_InsertAndFindBestMatch(
 	return (int)ml;
 }

-static inline int LZ4HC_InsertAndGetWiderMatch(
+static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
 	LZ4HC_CCtx_internal *hc4,
 	const BYTE * const ip,
 	const BYTE * const iLowLimit,
@@ -259,7 +259,7 @@ static inline int LZ4HC_InsertAndGetWiderMatch(
 	return longest;
 }

-static inline int LZ4HC_encodeSequence(
+static FORCE_INLINE int LZ4HC_encodeSequence(
 	const BYTE **ip,
 	BYTE **op,
 	const BYTE **anchor,

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH] lz4: fix performance regressions
@ 2017-02-12 11:16             ` Sven Schmidt
  0 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-12 11:16 UTC (permalink / raw)
  To: minchan
  Cc: ebiggers3, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck, Sven Schmidt

Fix performance regressions compared to current kernel LZ4

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |   2 +-
 lib/lz4/lz4_compress.c   | 157 +++++++++++++++++++++++-------------
 lib/lz4/lz4_decompress.c |  50 ++++++++----
 lib/lz4/lz4defs.h        | 203 ++++++++++++++++++++++++++++++++---------------
 lib/lz4/lz4hc_compress.c |   8 +-
 5 files changed, 281 insertions(+), 139 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index a3912d7..394e3d9 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -82,7 +82,7 @@
 /*-************************************************************************
  *	STREAMING CONSTANTS AND STRUCTURES
  **************************************************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
 #define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))

 #define LZ4_STREAMHCSIZE        262192
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 697dbda..2cbbf99 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -39,27 +39,33 @@
 #include <linux/kernel.h>
 #include <asm/unaligned.h>

+static const int LZ4_minLength = (MFLIMIT + 1);
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
+
 /*-******************************
  *	Compression functions
  ********************************/
-static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+static FORCE_INLINE U32 LZ4_hash4(
+	U32 sequence,
+	tableType_t const tableType)
 {
 	if (tableType == byU16)
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+			>> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
 	else
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - LZ4_HASHLOG));
+			>> ((MINMATCH * 8) - LZ4_HASHLOG));
 }

-#if LZ4_ARCH64
-static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+static FORCE_INLINE __maybe_unused U32 LZ4_hash5(
+	U64 sequence,
+	tableType_t const tableType)
 {
 	const U32 hashLog = (tableType == byU16)
 		? LZ4_HASHLOG + 1
 		: LZ4_HASHLOG;

-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	static const U64 prime5bytes = 889523592379ULL;

 	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
@@ -69,9 +75,10 @@ static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
 }
-#endif

-static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+static FORCE_INLINE U32 LZ4_hashPosition(
+	const void *p,
+	tableType_t const tableType)
 {
 #if LZ4_ARCH64
 	if (tableType == byU32)
@@ -81,8 +88,12 @@ static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
 	return LZ4_hash4(LZ4_read32(p), tableType);
 }

-static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
-	tableType_t const tableType, const BYTE *srcBase)
+static void LZ4_putPositionOnHash(
+	const BYTE *p,
+	U32 h,
+	void *tableBase,
+	tableType_t const tableType,
+	const BYTE *srcBase)
 {
 	switch (tableType) {
 	case byPtr:
@@ -109,16 +120,22 @@ static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
 	}
 }

-static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE void LZ4_putPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

 	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }

-static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static const BYTE *LZ4_getPositionOnHash(
+	U32 h,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	if (tableType == byPtr) {
 		const BYTE **hashTable = (const BYTE **) tableBase;
@@ -135,12 +152,16 @@ static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
 	{
 		/* default, to ensure a return */
 		const U16 * const hashTable = (U16 *) tableBase;
+
 		return hashTable[h] + srcBase;
 	}
 }

-static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE const BYTE *LZ4_getPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

@@ -152,7 +173,7 @@ static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
  * LZ4_compress_generic() :
  * inlined, to ensure branches are decided at compilation time
  */
-static inline int LZ4_compress_generic(
+static FORCE_INLINE int LZ4_compress_generic(
 	LZ4_stream_t_internal * const dictPtr,
 	const char * const source,
 	char * const dest,
@@ -187,6 +208,7 @@ static inline int LZ4_compress_generic(
 		/* Unsupported inputSize, too large (or negative) */
 		return 0;
 	}
+
 	switch (dict) {
 	case noDict:
 	default:
@@ -216,7 +238,8 @@ static inline int LZ4_compress_generic(

 	/* First Byte */
 	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
-	ip++; forwardH = LZ4_hashPosition(ip, tableType);
+	ip++;
+	forwardH = LZ4_hashPosition(ip, tableType);

 	/* Main Loop */
 	for ( ; ; ) {
@@ -227,15 +250,14 @@ static inline int LZ4_compress_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = acceleration
-				<< LZ4_skipTrigger;
+			unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;

 			do {
 				U32 const h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -243,6 +265,7 @@ static inline int LZ4_compress_generic(
 				match = LZ4_getPositionOnHash(h,
 					dictPtr->hashTable,
 					tableType, base);
+
 				if (dict == usingExtDict) {
 					if (match < (const BYTE *)source) {
 						refDelta = dictDelta;
@@ -251,11 +274,12 @@ static inline int LZ4_compress_generic(
 						refDelta = 0;
 						lowLimit = (const BYTE *)source;
 				}	 }
+
 				forwardH = LZ4_hashPosition(forwardIp,
 					tableType);
+
 				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
 					tableType, base);
-
 			} while (((dictIssue == dictSmall)
 					? (match < lowRefLimit)
 					: 0)
@@ -268,31 +292,34 @@ static inline int LZ4_compress_generic(

 		/* Catch up */
 		while (((ip > anchor) & (match + refDelta > lowLimit))
-			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
+				&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
 			match--;
-			}
+		}

 		/* Encode Literals */
 		{
 			unsigned const int litLength = (unsigned int)(ip - anchor);

 			token = op++;
+
 			if ((outputLimited) &&
 				/* Check output buffer overflow */
 				(unlikely(op + litLength +
 					(2 + 1 + LASTLITERALS) +
-					(litLength/255) > olimit)))
+					(litLength / 255) > olimit)))
 				return 0;
+
 			if (litLength >= RUN_MASK) {
 				int len = (int)litLength - RUN_MASK;

-				*token = (RUN_MASK<<ML_BITS);
-				for (; len >= 255 ; len -= 255)
+				*token = (RUN_MASK << ML_BITS);
+
+				for (; len >= 255; len -= 255)
 					*op++ = 255;
 				*op++ = (BYTE)len;
 			} else
-				*token = (BYTE)(litLength<<ML_BITS);
+				*token = (BYTE)(litLength << ML_BITS);

 			/* Copy Literals */
 			LZ4_wildCopy(op, anchor, op + litLength);
@@ -301,7 +328,8 @@ static inline int LZ4_compress_generic(

 _next_match:
 		/* Encode Offset */
-		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+		LZ4_writeLE16(op, (U16)(ip - match));
+		op += 2;

 		/* Encode MatchLength */
 		{
@@ -313,11 +341,15 @@ static inline int LZ4_compress_generic(

 				match += refDelta;
 				limit = ip + (dictEnd - match);
+
 				if (limit > matchlimit)
 					limit = matchlimit;
+
 				matchCode = LZ4_count(ip + MINMATCH,
 					match + MINMATCH, limit);
+
 				ip += MINMATCH + matchCode;
+
 				if (ip == limit) {
 					unsigned const int more = LZ4_count(ip,
 						(const BYTE *)source,
@@ -336,17 +368,20 @@ static inline int LZ4_compress_generic(
 				/* Check output buffer overflow */
 				(unlikely(op +
 					(1 + LASTLITERALS) +
-					(matchCode>>8) > olimit)))
+					(matchCode >> 8) > olimit)))
 				return 0;
+
 			if (matchCode >= ML_MASK) {
 				*token += ML_MASK;
 				matchCode -= ML_MASK;
 				LZ4_write32(op, 0xFFFFFFFF);
-				while (matchCode >= 4*255) {
+
+				while (matchCode >= 4 * 255) {
 					op += 4;
 					LZ4_write32(op, 0xFFFFFFFF);
-					matchCode -= 4*255;
+					matchCode -= 4 * 255;
 				}
+
 				op += matchCode / 255;
 				*op++ = (BYTE)(matchCode % 255);
 			} else
@@ -365,6 +400,7 @@ static inline int LZ4_compress_generic(
 		/* Test next position */
 		match = LZ4_getPosition(ip, dictPtr->hashTable,
 			tableType, base);
+
 		if (dict == usingExtDict) {
 			if (match < (const BYTE *)source) {
 				refDelta = dictDelta;
@@ -374,7 +410,9 @@ static inline int LZ4_compress_generic(
 				lowLimit = (const BYTE *)source;
 			}
 		}
+
 		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+
 		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
 			&& (match + MAX_DISTANCE >= ip)
 			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
@@ -395,18 +433,21 @@ static inline int LZ4_compress_generic(
 		if ((outputLimited) &&
 			/* Check output buffer overflow */
 			((op - (BYTE *)dest) + lastRun + 1 +
-			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
 			return 0;
+
 		if (lastRun >= RUN_MASK) {
 			size_t accumulator = lastRun - RUN_MASK;
 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
-			*op++ = (BYTE)(lastRun<<ML_BITS);
+			*op++ = (BYTE)(lastRun << ML_BITS);
 		}
+
 		memcpy(op, anchor, lastRun);
+
 		op += lastRun;
 	}

@@ -414,23 +455,27 @@ static inline int LZ4_compress_generic(
 	return (int) (((char *)op) - dest);
 }

-static int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
-	int inputSize, int maxOutputSize, int acceleration)
+static int LZ4_compress_fast_extState(
+	void *state,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	int acceleration)
 {
-	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
-	#else
-	tableType_t tableType = byPtr;
-	#endif
-
 	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+#if LZ4_ARCH64
+	const tableType_t tableType = byU32;
+#else
+	const tableType_t tableType = byPtr;
+#endif

 	LZ4_resetStream((LZ4_stream_t *)state);

 	if (acceleration < 1)
 		acceleration = LZ4_ACCELERATION_DEFAULT;

-	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+	if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
 		if (inputSize < LZ4_64Klimit)
 			return LZ4_compress_generic(ctx, source,
 				dest, inputSize, 0,
@@ -474,7 +519,6 @@ EXPORT_SYMBOL(LZ4_compress_default);
 /*-******************************
  *	*_destSize() variant
  ********************************/
-
 static int LZ4_compress_destSize_generic(
 	LZ4_stream_t_internal * const ctx,
 	const char * const src,
@@ -529,14 +573,14 @@ static int LZ4_compress_destSize_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+			unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;

 			do {
 				U32 h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -559,8 +603,9 @@ static int LZ4_compress_destSize_generic(
 		while ((ip > anchor)
 			&& (match > lowLimit)
 			&& (unlikely(ip[-1] == match[-1]))) {
-			ip--; match--;
-			}
+			ip--;
+			match--;
+		}

 		/* Encode Literal length */
 		{
@@ -644,11 +689,11 @@ static int LZ4_compress_destSize_generic(
 		size_t lastRunSize = (size_t)(iend - anchor);

 		if (op + 1 /* token */
-			+ ((lastRunSize + 240)/255) /* litLength */
+			+ ((lastRunSize + 240) / 255) /* litLength */
 			+ lastRunSize /* literals */ > oend) {
 			/* adapt lastRunSize to fill 'dst' */
 			lastRunSize	= (oend - op) - 1;
-			lastRunSize -= (lastRunSize + 240)/255;
+			lastRunSize -= (lastRunSize + 240) / 255;
 		}
 		ip = anchor + lastRunSize;

@@ -656,7 +701,7 @@ static int LZ4_compress_destSize_generic(
 			size_t accumulator = lastRunSize - RUN_MASK;

 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
@@ -675,14 +720,14 @@ static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
 	char *dst, int *srcSizePtr, int targetDstSize)
 {
 	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
+		const tableType_t tableType = byU32;
 	#else
-	tableType_t tableType = byPtr;
+		const tableType_t tableType = byPtr;
 	#endif

 	LZ4_resetStream(state);

-	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+	if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
 		/* compression success is guaranteed */
 		return LZ4_compress_fast_extState(
 			state, src, dst, *srcSizePtr,
@@ -847,7 +892,7 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
 				maxOutputSize, limitedOutput, byU32,
-				withPrefix64k, dictSmall,	acceleration);
+				withPrefix64k, dictSmall, acceleration);
 		} else {
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index a7731ba..3bfc2f6 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -49,8 +49,8 @@
  * Note that it is important this generic function is really inlined,
  * in order to remove useless branches during compilation optimization.
  */
-static inline int LZ4_decompress_generic(
-	 const char *const source,
+static FORCE_INLINE int LZ4_decompress_generic(
+	 const char * const source,
 	 char * const dest,
 	 int inputSize,
 		/*
@@ -180,22 +180,28 @@ static inline int LZ4_decompress_generic(
 					goto _output_error;
 				}
 			}
+
 			memcpy(op, ip, length);
 			ip += length;
 			op += length;
 			/* Necessarily EOF, due to parsing restrictions */
 			break;
 		}
+
 		LZ4_wildCopy(op, ip, cpy);
-		ip += length; op = cpy;
+		ip += length;
+		op = cpy;

 		/* get offset */
-		offset = LZ4_readLE16(ip); ip += 2;
+		offset = LZ4_readLE16(ip);
+		ip += 2;
 		match = op - offset;
+
 		if ((checkOffset) && (unlikely(match < lowLimit))) {
 			/* Error : offset outside buffers */
 			goto _output_error;
 		}
+
 		/* costs ~1%; silence an msan warning when offset == 0 */
 		LZ4_write32(op, (U32)offset);

@@ -205,11 +211,14 @@ static inline int LZ4_decompress_generic(
 			unsigned int s;

 			do {
-			s = *ip++;
-			if ((endOnInput) && (ip > iend - LASTLITERALS))
-				goto _output_error;
-			length += s;
+				s = *ip++;
+
+				if ((endOnInput) && (ip > iend - LASTLITERALS))
+					goto _output_error;
+
+				length += s;
 			} while (s == 255);
+
 			if ((safeDecode)
 				&& unlikely(
 					(size_t)(op + length) < (size_t)op)) {
@@ -217,6 +226,7 @@ static inline int LZ4_decompress_generic(
 				goto _output_error;
 			}
 		}
+
 		length += MINMATCH;

 		/* check external dictionary */
@@ -227,12 +237,13 @@ static inline int LZ4_decompress_generic(
 			}

 			if (length <= (size_t)(lowPrefix - match)) {
-			/*
-			 * match can be copied as a single segment
-			 * from external dictionary
-			 */
-			memmove(op, dictEnd - (lowPrefix - match), length);
-			op += length;
+				/*
+				 * match can be copied as a single segment
+				 * from external dictionary
+				 */
+				memmove(op, dictEnd - (lowPrefix - match),
+					length);
+				op += length;
 			} else {
 				/*
 				 * match encompass external
@@ -256,11 +267,13 @@ static inline int LZ4_decompress_generic(
 					op += restSize;
 				}
 			}
+
 			continue;
 		}

 		/* copy match within block */
 		cpy = op + length;
+
 		if (unlikely(offset < 8)) {
 			const int dec64 = dec64table[offset];

@@ -272,7 +285,8 @@ static inline int LZ4_decompress_generic(
 			memcpy(op + 4, match, 4);
 			match -= dec64;
 		} else {
-			LZ4_copy8(op, match); match += 8;
+			LZ4_copy8(op, match);
+			match += 8;
 		}

 		op += 8;
@@ -287,18 +301,22 @@ static inline int LZ4_decompress_generic(
 				 */
 				goto _output_error;
 			}
+
 			if (op < oCopyLimit) {
 				LZ4_wildCopy(op, match, oCopyLimit);
 				match += oCopyLimit - op;
 				op = oCopyLimit;
 			}
+
 			while (op < cpy)
 				*op++ = *match++;
 		} else {
 			LZ4_copy8(op, match);
+
 			if (length > 16)
 				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
+
 		op = cpy; /* correction */
 	}

@@ -438,7 +456,7 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
  * These decoding functions work the same as "_continue" ones,
  * the dictionary must be explicitly provided within parameters
  */
-static inline int LZ4_decompress_usingDict_generic(const char *source,
+static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
 	char *dest, int compressedSize, int maxOutputSize, int safe,
 	const char *dictStart, int dictSize)
 {
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 23e1a1b..47ef42b 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -38,14 +38,7 @@
 #include <asm/unaligned.h>
 #include <linux/string.h>	 /* memset, memcpy */

-/*
- * Detects 64 bits mode
-*/
-#if defined(CONFIG_64BIT)
-#define LZ4_ARCH64 1
-#else
-#define LZ4_ARCH64 0
-#endif
+#define FORCE_INLINE __always_inline

 /*-************************************
  *	Basic Types
@@ -60,14 +53,38 @@ typedef uint64_t U64;
 typedef uintptr_t uptrval;

 /*-************************************
+ *	Architecture specifics
+ **************************************/
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system
+ * does not support hardware bit count
+ */
+/* #define LZ4_FORCE_SW_BITCOUNT */
+
+/*-************************************
  *	Constants
  **************************************/
 #define MINMATCH 4

 #define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
-static const int LZ4_minLength = (MFLIMIT+1);
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6

 #define KB (1<<10)
 #define MB (1<<20)
@@ -82,53 +99,42 @@ static const int LZ4_minLength = (MFLIMIT+1);
 #define RUN_BITS (8-ML_BITS)
 #define RUN_MASK ((1U<<RUN_BITS)-1)

-static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
-static const U32 LZ4_skipTrigger = 6;
-
 /*-************************************
  *	Reading and writing into memory
  **************************************/
+typedef union {
+	U16 u16;
+	U32 u32;
+	size_t uArch;
+} __packed unalign;

-static inline U16 LZ4_read16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_read16(const void *ptr)
 {
-	U16 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u16;
 }

-static inline U32 LZ4_read32(const void *memPtr)
+static FORCE_INLINE __maybe_unused U32 LZ4_read32(const void *ptr)
 {
-	U32 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u32;
 }

-static inline size_t LZ4_read_ARCH(const void *memPtr)
+static FORCE_INLINE __maybe_unused size_t LZ4_read_ARCH(const void *ptr)
 {
-	size_t val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->uArch;
 }

-static inline void LZ4_write16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_write16(void *memPtr, U16 value)
 {
-	memcpy(memPtr, &value, sizeof(value));
+	((unalign *)memPtr)->u16 = value;
 }

-static inline void LZ4_write32(void *memPtr, U32 value)
-{
-	memcpy(memPtr, &value, sizeof(value));
+static FORCE_INLINE __maybe_unused void LZ4_write32(void *memPtr, U32 value) {
+	((unalign *)memPtr)->u32 = value;
 }

-static inline U16 LZ4_readLE16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_readLE16(const void *memPtr)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	return LZ4_read16(memPtr);
 #else
 	const BYTE *p = (const BYTE *)memPtr;
@@ -137,19 +143,19 @@ static inline U16 LZ4_readLE16(const void *memPtr)
 #endif
 }

-static inline void LZ4_writeLE16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_writeLE16(void *memPtr, U16 value)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	LZ4_write16(memPtr, value);
 #else
 	BYTE *p = (BYTE *)memPtr;

 	p[0] = (BYTE) value;
-	p[1] = (BYTE)(value>>8);
+	p[1] = (BYTE)(value >> 8);
 #endif
 }

-static inline void LZ4_copy8(void *dst, const void *src)
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
 {
 	memcpy(dst, src, 8);
 }
@@ -158,7 +164,8 @@ static inline void LZ4_copy8(void *dst, const void *src)
  * customized variant of memcpy,
  * which can overwrite up to 7 bytes beyond dstEnd
  */
-static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+	const void *srcPtr, void *dstEnd)
 {
 	BYTE *d = (BYTE *)dstPtr;
 	const BYTE *s = (const BYTE *)srcPtr;
@@ -171,49 +178,121 @@ static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
 	} while (d < e);
 }

-#if LZ4_ARCH64
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+#if LZ4_ARCH64 /* 64 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[64] = {
+		0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
+		0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
+		7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
+		7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
+	};
+
+	return DeBruijnBytePos[((U64)((val & -(long long)val)
+		* 0x0218A392CDABBD3FULL)) >> 58];
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+	return (__builtin_ctzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[32] = {
+		0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
+		3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1
+	};
+
+	return DeBruijnBytePos[((U32)((val & -(S32)val)
+		* 0x077CB531U)) >> 27];
 #else
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
+	return (__builtin_ctz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#else /* Big Endian */
+#if LZ4_ARCH64 /* 64 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 32)) {
+		r = 4;
+	} else {
+		r = 0;
+		val >>= 32;
+	}
+
+	if (!(val >> 16)) {
+		r += 2;
+		val >>= 8;
+	} else {
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
-#endif
-#endif
+	return (__builtin_clzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 16)) {
+		r = 2;
+		val >>= 8;
+	} else {
+		r = 0;
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
+#else
+	return (__builtin_clz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#endif /* LZ4_LITTLE_ENDIAN */
+}

-static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+static FORCE_INLINE __maybe_unused unsigned int LZ4_count(
+	const BYTE *pIn,
+	const BYTE *pMatch,
 	const BYTE *pInLimit)
 {
 	const BYTE *const pStart = pIn;

-	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
-		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+	while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+		size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);

 		if (!diff) {
 			pIn += STEPSIZE;
 			pMatch += STEPSIZE;
 			continue;
 		}
-		pIn += LZ4_NBCOMMONBYTES(diff);
+
+		pIn += LZ4_NbCommonBytes(diff);
+
 		return (unsigned int)(pIn - pStart);
 	}

-#ifdef LZ4_ARCH64
-	if ((pIn < (pInLimit-3))
+#if LZ4_ARCH64
+	if ((pIn < (pInLimit - 3))
 		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
-		pIn += 4; pMatch += 4;
+		pIn += 4;
+		pMatch += 4;
 	}
 #endif
-	if ((pIn < (pInLimit-1))
+
+	if ((pIn < (pInLimit - 1))
 		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
-		pIn += 2; pMatch += 2;
+		pIn += 2;
+		pMatch += 2;
 	}
+
 	if ((pIn < pInLimit) && (*pMatch == *pIn))
 		pIn++;
+
 	return (unsigned int)(pIn - pStart);
 }

diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index 8363292..c7271a1 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -71,7 +71,7 @@ static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
 }

 /* Update chains up to ip (excluded) */
-static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	const BYTE *ip)
 {
 	U16 * const chainTable = hc4->chainTable;
@@ -96,7 +96,7 @@ static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	hc4->nextToUpdate = target;
 }

-static inline int LZ4HC_InsertAndFindBestMatch(
+static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
 	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
 	const BYTE *ip,
 	const BYTE * const iLimit,
@@ -165,7 +165,7 @@ static inline int LZ4HC_InsertAndFindBestMatch(
 	return (int)ml;
 }

-static inline int LZ4HC_InsertAndGetWiderMatch(
+static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
 	LZ4HC_CCtx_internal *hc4,
 	const BYTE * const ip,
 	const BYTE * const iLowLimit,
@@ -259,7 +259,7 @@ static inline int LZ4HC_InsertAndGetWiderMatch(
 	return longest;
 }

-static inline int LZ4HC_encodeSequence(
+static FORCE_INLINE int LZ4HC_encodeSequence(
 	const BYTE **ip,
 	BYTE **op,
 	const BYTE **anchor,
--
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* Re: [PATCH] lz4: fix performance regressions
  2017-02-12 11:16             ` Sven Schmidt
  (?)
@ 2017-02-12 13:05             ` Willy Tarreau
  2017-02-12 15:20               ` Sven Schmidt
  -1 siblings, 1 reply; 104+ messages in thread
From: Willy Tarreau @ 2017-02-12 13:05 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: minchan, ebiggers3, akpm, bongkyu.kim, rsalvaterra,
	sergey.senozhatsky, gregkh, linux-kernel, herbert, davem,
	linux-crypto, anton, ccross, keescook, tony.luck

Hi Sven,

On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
> Fix performance regressions compared to current kernel LZ4

Your patch contains mostly style cleanups which certainly are welcome
but make the whole patch hard to review. These cleanups would have been
better into a separate, preliminary patch IMHO.

Regards,
Willy

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH] lz4: fix performance regressions
  2017-02-12 13:05             ` Willy Tarreau
@ 2017-02-12 15:20               ` Sven Schmidt
  2017-02-12 21:41                 ` Willy Tarreau
  0 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-02-12 15:20 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: minchan, ebiggers3, akpm, bongkyu.kim, rsalvaterra,
	sergey.senozhatsky, gregkh, linux-kernel, herbert, davem,
	linux-crypto, anton, ccross, keescook, tony.luck

On Sun, Feb 12, 2017 at 02:05:08PM +0100, Willy Tarreau wrote:
> Hi Sven,
> 
> On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
> > Fix performance regressions compared to current kernel LZ4
> 
> Your patch contains mostly style cleanups which certainly are welcome
> but make the whole patch hard to review. These cleanups would have been
> better into a separate, preliminary patch IMHO.
> 
> Regards,
> Willy

Hi Willy,

the problem was, I wanted to compare my version to the upstream LZ4 to find bugs (as with my last patch version: wrong indentation in LZ4HC 
in two for loops). But since the LZ4 code is a pain to read, I made additional style cleanups "on the way".
Hope you can manage to review the patch though, because it is difficult to separate the cleanups now.
Please feel free to ask if you stumble upon something.

Greetings,

Sven 

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH] lz4: fix performance regressions
  2017-02-12 15:20               ` Sven Schmidt
@ 2017-02-12 21:41                 ` Willy Tarreau
  2017-02-13 11:53                   ` Sven Schmidt
  0 siblings, 1 reply; 104+ messages in thread
From: Willy Tarreau @ 2017-02-12 21:41 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: minchan, ebiggers3, akpm, bongkyu.kim, rsalvaterra,
	sergey.senozhatsky, gregkh, linux-kernel, herbert, davem,
	linux-crypto, anton, ccross, keescook, tony.luck

On Sun, Feb 12, 2017 at 04:20:00PM +0100, Sven Schmidt wrote:
> On Sun, Feb 12, 2017 at 02:05:08PM +0100, Willy Tarreau wrote:
> > Hi Sven,
> > 
> > On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
> > > Fix performance regressions compared to current kernel LZ4
> > 
> > Your patch contains mostly style cleanups which certainly are welcome
> > but make the whole patch hard to review. These cleanups would have been
> > better into a separate, preliminary patch IMHO.
> > 
> > Regards,
> > Willy
> 
> Hi Willy,
> 
> the problem was, I wanted to compare my version to the upstream LZ4 to find bugs (as with my last patch version: wrong indentation in LZ4HC 
> in two for loops). But since the LZ4 code is a pain to read, I made additional style cleanups "on the way".

Oh I can easily understand!

> Hope you can manage to review the patch though, because it is difficult to separate the cleanups now.

When I need to split a patch into pieces, usually what I do is that I
revert it, re-apply it without committing, then "git add -p", validate
all the hunks to be taken as the first patch (ie here the cleanups),
commit, then commit the rest as a separate one. It seems to me that the
fix is in the last few hunks though I'm not sure yet.

Thanks,
Willy

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH] lz4: fix performance regressions
  2017-02-12 11:16             ` Sven Schmidt
  (?)
  (?)
@ 2017-02-12 23:38             ` Eric Biggers
  2017-02-14 10:33               ` Sven Schmidt
  -1 siblings, 1 reply; 104+ messages in thread
From: Eric Biggers @ 2017-02-12 23:38 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: minchan, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Hi Sven,

On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
>  /*-************************************
>   *	Reading and writing into memory
>   **************************************/
> +typedef union {
> +	U16 u16;
> +	U32 u32;
> +	size_t uArch;
> +} __packed unalign;
> 
> -static inline U16 LZ4_read16(const void *memPtr)
> +static FORCE_INLINE __maybe_unused U16 LZ4_read16(const void *ptr)
>  {
> -	U16 val;
> -
> -	memcpy(&val, memPtr, sizeof(val));
> -
> -	return val;
> +	return ((const unalign *)ptr)->u16;
>  }
> 
> -static inline U32 LZ4_read32(const void *memPtr)
> +static FORCE_INLINE __maybe_unused U32 LZ4_read32(const void *ptr)
>  {
> -	U32 val;
> -
> -	memcpy(&val, memPtr, sizeof(val));
> -
> -	return val;
> +	return ((const unalign *)ptr)->u32;
>  }
> 
> -static inline size_t LZ4_read_ARCH(const void *memPtr)
> +static FORCE_INLINE __maybe_unused size_t LZ4_read_ARCH(const void *ptr)
>  {
> -	size_t val;
> -
> -	memcpy(&val, memPtr, sizeof(val));
> -
> -	return val;
> +	return ((const unalign *)ptr)->uArch;
>  }
> 
> -static inline void LZ4_write16(void *memPtr, U16 value)
> +static FORCE_INLINE __maybe_unused void LZ4_write16(void *memPtr, U16 value)
>  {
> -	memcpy(memPtr, &value, sizeof(value));
> +	((unalign *)memPtr)->u16 = value;
>  }
> 
> -static inline void LZ4_write32(void *memPtr, U32 value)
> -{
> -	memcpy(memPtr, &value, sizeof(value));
> +static FORCE_INLINE __maybe_unused void LZ4_write32(void *memPtr, U32 value) {
> +	((unalign *)memPtr)->u32 = value;
>  }
> 
> -static inline U16 LZ4_readLE16(const void *memPtr)
> +static FORCE_INLINE __maybe_unused U16 LZ4_readLE16(const void *memPtr)
>  {
> -#ifdef __LITTLE_ENDIAN__
> +#if LZ4_LITTLE_ENDIAN
>  	return LZ4_read16(memPtr);
>  #else
>  	const BYTE *p = (const BYTE *)memPtr;
> @@ -137,19 +143,19 @@ static inline U16 LZ4_readLE16(const void *memPtr)
>  #endif
>  }

Since upstream LZ4 is intended to be compiled at -O3, this may allow it to get
away with using memcpy() for unaligned memory accesses.  The reason it uses
memcpy() is that, other than a byte-by-byte copy, it is the only portable way to
express unaligned memory accesses.  But the Linux kernel is sometimes compiled
optimized for size (-Os), and I wouldn't be *too* surprised if some of the
memcpy()'s don't always get inlined then, which could be causing the performance
regression being observed.  (Of course, this could be verified by checking
whether CONFIG_CC_OPTIMIZE_FOR_SIZE=y is set, then reading the assembly.)

But I don't think accessing a __packed structure directly is the right
alternative.  Instead, Linux already includes macros for unaligned memory
accesses which have been optimized for every supported architecture.  Those
should just be used instead, e.g. like this:

static FORCE_INLINE U16 LZ4_read16(const void *ptr)
{
	return get_unaligned((const u16 *)ptr);
}

static FORCE_INLINE U32 LZ4_read32(const void *ptr)
{
	return get_unaligned((const u32 *)ptr);
}

static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
{
	return get_unaligned((const size_t *)ptr);
}

static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
{
	put_unaligned(value, (u16 *)memPtr);
}

static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
{
	put_unaligned(value, (u32 *)memPtr);
}

static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
{
	return get_unaligned_le16(memPtr);
}

static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
{
	return put_unaligned_le16(value, memPtr);
}

static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
{
	if (LZ4_64bits()) {
		u64 a = get_unaligned((const u64 *)src);
		put_unaligned(a, (u64 *)dst);
	} else {
		u32 a = get_unaligned((const u32 *)src);
		u32 b = get_unaligned((const u32 *)src + 1);
		put_unaligned(a, (u32 *)dst);
		put_unaligned(b, (u32 *)dst + 1);
	}
}


Note that I dropped __maybe_unused as it's not needed on inline functions.
That should be done everywhere else the patch proposes to add it too.

> -#if LZ4_ARCH64
> -#ifdef __BIG_ENDIAN__
> -#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> +static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
> +{
> +#if LZ4_LITTLE_ENDIAN
> +#if LZ4_ARCH64 /* 64 Bits Little Endian */
> +#if defined(LZ4_FORCE_SW_BITCOUNT)
> +	static const int DeBruijnBytePos[64] = {
> +		0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
> +		0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
> +		7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
> +		7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
> +	};
> +
> +	return DeBruijnBytePos[((U64)((val & -(long long)val)
> +		* 0x0218A392CDABBD3FULL)) >> 58];
>  #else
> -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
> -#endif
> +	return (__builtin_ctzll((U64)val) >> 3);
> +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> +#else /* 32 Bits Little Endian */
> +#if defined(LZ4_FORCE_SW_BITCOUNT)
> +	static const int DeBruijnBytePos[32] = {
> +		0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
> +		3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1
> +	};
> +
> +	return DeBruijnBytePos[((U32)((val & -(S32)val)
> +		* 0x077CB531U)) >> 27];
>  #else
> -#ifdef __BIG_ENDIAN__
> -#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
> +	return (__builtin_ctz((U32)val) >> 3);
> +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> +#endif /* LZ4_ARCH64 */
> +#else /* Big Endian */
> +#if LZ4_ARCH64 /* 64 Bits Big Endian */
> +#if defined(LZ4_FORCE_SW_BITCOUNT)
> +	unsigned int r;
> +
> +	if (!(val >> 32)) {
> +		r = 4;
> +	} else {
> +		r = 0;
> +		val >>= 32;
> +	}
> +
> +	if (!(val >> 16)) {
> +		r += 2;
> +		val >>= 8;
> +	} else {
> +		val >>= 24;
> +	}
> +
> +	r += (!val);
> +
> +	return r;
>  #else
> -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
> -#endif
> -#endif
> +	return (__builtin_clzll((U64)val) >> 3);
> +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> +#else /* 32 Bits Big Endian */
> +#if defined(LZ4_FORCE_SW_BITCOUNT)
> +	unsigned int r;
> +
> +	if (!(val >> 16)) {
> +		r = 2;
> +		val >>= 8;
> +	} else {
> +		r = 0;
> +		val >>= 24;
> +	}
> +
> +	r += (!val);
> +
> +	return r;
> +#else
> +	return (__builtin_clz((U32)val) >> 3);
> +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> +#endif /* LZ4_ARCH64 */
> +#endif /* LZ4_LITTLE_ENDIAN */
> +}

The reason LZ4_NbCommonBytes() in upstream LZ4 is so complicated is that it
needs to provide portable fallbacks that work on *any* platform and compiler.
This isn't needed in the Linux kernel, and it should just call the functions
already defined that do the right thing:

static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
{
        if (LZ4_isLittleEndian())
                return __ffs(val) >> 3;
        else
                return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
}                      

To be clear, when I said that upstream LZ4 shouldn't generally be changed, I'm
primarily talking about the core code, not the platform-specific parts.  What we
need to do is define platform-specific stuff, like LZ4_read*(), LZ4_write*(),
LZ4_NbCommonBytes(), LZ4_64bits(), and FORCE_INLINE, in a way that makes sense
for the Linux kernel and the environment it's compiled in.  Also I think it's
fine, and maybe even necessary for performance, to *add* inline or FORCE_INLINE
in some places too, given that LZ4 in Linux may get compiled with a lower
optimization level than that intended to be used for upstream LZ4 --- though it
may be worth considering updating the Makefile to just always compile the LZ4
files with -O3 instead.  What should be avoided is making unnecessary changes to
the *users* of the platform-specific code or to the core (de)compression
parameters or templates.

Eric

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-12 11:16         ` Sven Schmidt
  2017-02-12 11:16             ` Sven Schmidt
@ 2017-02-13  0:03           ` Minchan Kim
  2017-02-13 12:08             ` Sven Schmidt
  1 sibling, 1 reply; 104+ messages in thread
From: Minchan Kim @ 2017-02-13  0:03 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: ebiggers3, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Hi Sven,

On Sun, Feb 12, 2017 at 12:16:17PM +0100, Sven Schmidt wrote:
> 
> 
> 
> On 02/10/2017 01:13 AM, Minchan Kim wrote:
> > Hello Sven,
> >
> > On Thu, Feb 09, 2017 at 11:56:17AM +0100, Sven Schmidt wrote:
> >> Hey Minchan,
> >>
> >> On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> >>> Hello Sven,
> >>>
> >>> On Sun, Feb 05, 2017 at 08:09:03PM +0100, Sven Schmidt wrote:
> >>>>
> >>>> This patchset is for updating the LZ4 compression module to a version based
> >>>> on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> >>>> which provides an "acceleration" parameter as a tradeoff between
> >>>> high compression ratio and high compression speed.
> >>>>
> >>>> We want to use LZ4 fast in order to support compression in lustre
> >>>> and (mostly, based on that) investigate data reduction techniques in behalf of
> >>>> storage systems.
> >>>>
> >>>> Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> >>>> it is possible to enable applications to use fast and/or high compression
> >>>> depending on the usecase.
> >>>> For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> >>>> LZ4 in the kernel.
> >>>>
> >>>> LZ4 homepage: http://www.lz4.org/
> >>>> LZ4 source repository: https://github.com/lz4/lz4
> >>>> Source version: 1.7.3
> >>>>
> >>>> Benchmark (taken from [1], Core i5-4300U @1.9GHz):
> >>>> ----------------|--------------|----------------|----------
> >>>> Compressor      | Compression  | Decompression  | Ratio
> >>>> ----------------|--------------|----------------|----------
> >>>> memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> >>>> LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> >>>> LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> >>>> LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> >>>> LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> >>>>
> >>>> [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
> >>>>
> >>>> [PATCH 1/5] lib: Update LZ4 compressor module
> >>>> [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
> >>>> [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
> >>>> [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
> >>>> [PATCH 5/5] lib/lz4: Remove back-compat wrappers
> >>>
> >>> Today, I did zram-lz4 performance test with fio in current mmotm and
> >>> found it makes regression about 20%.
> >>>
> >>> "lz4-update" means current mmots(git://git.cmpxchg.org/linux-mmots.git) so
> >>> applied your 5 patches. (But now sure current mmots has recent uptodate
> >>> patches)
> >>> "revert" means I reverted your 5 patches in current mmots.
> >>>
> >>>                      revert    lz4-update
> >>>
> >>>       seq-write       1547       1339      86.55%
> >>>      rand-write      22775      19381      85.10%
> >>>        seq-read       7035       5589      79.45%
> >>>       rand-read      78556      68479      87.17%
> >>>    mixed-seq(R)       1305       1066      81.69%
> >>>    mixed-seq(W)       1205        984      81.66%
> >>>   mixed-rand(R)      17421      14993      86.06%
> >>>   mixed-rand(W)      17391      14968      86.07%
> >>
> >> which parts of the output (as well as units) are these values exactly?
> >> I did not work with fio until now, so I think I might ask before misinterpreting my results.
> >
> > It is IOPS.
> >
> >>  
> >>> My fio description file
> >>>
> >>> [global]
> >>> bs=4k
> >>> ioengine=sync
> >>> size=100m
> >>> numjobs=1
> >>> group_reporting
> >>> buffer_compress_percentage=30
> >>> scramble_buffers=0
> >>> filename=/dev/zram0
> >>> loops=10
> >>> fsync_on_close=1
> >>>
> >>> [seq-write]
> >>> bs=64k
> >>> rw=write
> >>> stonewall
> >>>
> >>> [rand-write]
> >>> rw=randwrite
> >>> stonewall
> >>>
> >>> [seq-read]
> >>> bs=64k
> >>> rw=read
> >>> stonewall
> >>>
> >>> [rand-read]
> >>> rw=randread
> >>> stonewall
> >>>
> >>> [mixed-seq]
> >>> bs=64k
> >>> rw=rw
> >>> stonewall
> >>>
> >>> [mixed-rand]
> >>> rw=randrw
> >>> stonewall
> >>>
> >>
> >> Great, this makes it easy for me to reproduce your test.
> >
> > If you have trouble to reproduce, feel free to ask me. I'm happy to test it. :)
> >
> > Thanks!
> >
> 
> Hi Minchan,
> 
> I will send an updated patch as a reply to this E-Mail. Would be really grateful If you'd test it and provide feedback!
> The patch should be applied to the current mmots tree.
> 
> In fact, the updated LZ4 _is_ slower than the current one in kernel. But I was not able to reproduce such large regressions
> as you did. I now tried to define FORCE_INLINE as Eric suggested. I also inlined some functions which weren't in upstream LZ4,
> but are defined as macros in the current kernel LZ4. The approach to replace LZ4_ARCH64 with the function call _seemed_ to behave
> worse than the macro, so I withdrew the change.
> 
> The main difference is, that I replaced the read32/read16/write... etc. functions using memcpy with the other ones defined 
> in upstream LZ4 (which can be switched using a macro). 
> The comment of the author stated, that they're as fast as the memcpy variants (or faster), but not as portable
> (which does not matter since we're not dependent for multiple compilers).
> 
> In my tests, this version is mostly as fast as the current kernel LZ4.

With a patch you sent, I cannot see enhancement so I wanted to dig in and
found how I was really careless.

I have tested both test with CONFIG_KASAN. OMG. With disabling it, I don't
see any regression any more. So, I'm really really *sorry* about noise and
wasting your time. However, I am curious why KASAN makes such difference.

The reason I tested new updated lz4 is description says lz4 fast and
want to use it in zram. How can I do that? and How faster it is compared
to old?

Thanks for you work!

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH] lz4: fix performance regressions
  2017-02-12 21:41                 ` Willy Tarreau
@ 2017-02-13 11:53                   ` Sven Schmidt
  2017-02-13 13:37                     ` Willy Tarreau
  0 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-02-13 11:53 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: minchan, ebiggers3, akpm, bongkyu.kim, rsalvaterra,
	sergey.senozhatsky, gregkh, linux-kernel, herbert, davem,
	linux-crypto, anton, ccross, keescook, tony.luck

On Sun, Feb 12, 2017 at 10:41:17PM +0100, Willy Tarreau wrote:
> On Sun, Feb 12, 2017 at 04:20:00PM +0100, Sven Schmidt wrote:
> > On Sun, Feb 12, 2017 at 02:05:08PM +0100, Willy Tarreau wrote:
> > > Hi Sven,
> > > 
> > > On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
> > > > Fix performance regressions compared to current kernel LZ4
> > > 
> > > Your patch contains mostly style cleanups which certainly are welcome
> > > but make the whole patch hard to review. These cleanups would have been
> > > better into a separate, preliminary patch IMHO.
> > > 
> > > Regards,
> > > Willy
> > 
> > Hi Willy,
> > 
> > the problem was, I wanted to compare my version to the upstream LZ4 to find bugs (as with my last patch version: wrong indentation in LZ4HC 
> > in two for loops). But since the LZ4 code is a pain to read, I made additional style cleanups "on the way".
> 
> Oh I can easily understand!
> 
> > Hope you can manage to review the patch though, because it is difficult to separate the cleanups now.
> 
> When I need to split a patch into pieces, usually what I do is that I
> revert it, re-apply it without committing, then "git add -p", validate
> all the hunks to be taken as the first patch (ie here the cleanups),
> commit, then commit the rest as a separate one. It seems to me that the
> fix is in the last few hunks though I'm not sure yet.
> 
> Thanks,
> Willy

Hi Willy,

I didn't know about this 'trick' until now. Thanks for sharing it! I gave it a short try recently, that's really cool!

Since the problem discussed in this branch of this thread seems to be solved (see Minchans E-Mail), I won't split the patches, though.
Or is there an actual need for doing so? I will send an updated patchset (containing these patches + the other ones suggested by Eric) later.

Regards,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-13  0:03           ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
@ 2017-02-13 12:08             ` Sven Schmidt
  2017-02-15  7:29               ` Minchan Kim
  0 siblings, 1 reply; 104+ messages in thread
From: Sven Schmidt @ 2017-02-13 12:08 UTC (permalink / raw)
  To: Minchan Kim
  Cc: ebiggers3, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

On Mon, Feb 13, 2017 at 09:03:24AM +0900, Minchan Kim wrote:
> Hi Sven,
> 
> On Sun, Feb 12, 2017 at 12:16:17PM +0100, Sven Schmidt wrote:
> > 
> > 
> > 
> > On 02/10/2017 01:13 AM, Minchan Kim wrote:
> > > Hello Sven,
> > >
> > > On Thu, Feb 09, 2017 at 11:56:17AM +0100, Sven Schmidt wrote:
> > >> Hey Minchan,
> > >>
> > >> On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> > >>> Hello Sven,
> > >>>
> > >>> On Sun, Feb 05, 2017 at 08:09:03PM +0100, Sven Schmidt wrote:
> > >>>>
> > >>>> This patchset is for updating the LZ4 compression module to a version based
> > >>>> on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> > >>>> which provides an "acceleration" parameter as a tradeoff between
> > >>>> high compression ratio and high compression speed.
> > >>>>
> > >>>> We want to use LZ4 fast in order to support compression in lustre
> > >>>> and (mostly, based on that) investigate data reduction techniques in behalf of
> > >>>> storage systems.
> > >>>>
> > >>>> Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> > >>>> it is possible to enable applications to use fast and/or high compression
> > >>>> depending on the usecase.
> > >>>> For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> > >>>> LZ4 in the kernel.
> > >>>>
> > >>>> LZ4 homepage: http://www.lz4.org/
> > >>>> LZ4 source repository: https://github.com/lz4/lz4
> > >>>> Source version: 1.7.3
> > >>>>
> > >>>> Benchmark (taken from [1], Core i5-4300U @1.9GHz):
> > >>>> ----------------|--------------|----------------|----------
> > >>>> Compressor      | Compression  | Decompression  | Ratio
> > >>>> ----------------|--------------|----------------|----------
> > >>>> memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> > >>>> LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> > >>>> LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> > >>>> LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> > >>>> LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> > >>>>
> > >>>> [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
> > >>>>
> > >>>> [PATCH 1/5] lib: Update LZ4 compressor module
> > >>>> [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
> > >>>> [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
> > >>>> [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
> > >>>> [PATCH 5/5] lib/lz4: Remove back-compat wrappers
> > >>>
> > >>> Today, I did zram-lz4 performance test with fio in current mmotm and
> > >>> found it makes regression about 20%.
> > >>>
> > >>> "lz4-update" means current mmots(git://git.cmpxchg.org/linux-mmots.git) so
> > >>> applied your 5 patches. (But now sure current mmots has recent uptodate
> > >>> patches)
> > >>> "revert" means I reverted your 5 patches in current mmots.
> > >>>
> > >>>                      revert    lz4-update
> > >>>
> > >>>       seq-write       1547       1339      86.55%
> > >>>      rand-write      22775      19381      85.10%
> > >>>        seq-read       7035       5589      79.45%
> > >>>       rand-read      78556      68479      87.17%
> > >>>    mixed-seq(R)       1305       1066      81.69%
> > >>>    mixed-seq(W)       1205        984      81.66%
> > >>>   mixed-rand(R)      17421      14993      86.06%
> > >>>   mixed-rand(W)      17391      14968      86.07%
> > >>
> > >> which parts of the output (as well as units) are these values exactly?
> > >> I did not work with fio until now, so I think I might ask before misinterpreting my results.
> > >
> > > It is IOPS.
> > >
> > >>  
> > >>> My fio description file
> > >>>
> > >>> [global]
> > >>> bs=4k
> > >>> ioengine=sync
> > >>> size=100m
> > >>> numjobs=1
> > >>> group_reporting
> > >>> buffer_compress_percentage=30
> > >>> scramble_buffers=0
> > >>> filename=/dev/zram0
> > >>> loops=10
> > >>> fsync_on_close=1
> > >>>
> > >>> [seq-write]
> > >>> bs=64k
> > >>> rw=write
> > >>> stonewall
> > >>>
> > >>> [rand-write]
> > >>> rw=randwrite
> > >>> stonewall
> > >>>
> > >>> [seq-read]
> > >>> bs=64k
> > >>> rw=read
> > >>> stonewall
> > >>>
> > >>> [rand-read]
> > >>> rw=randread
> > >>> stonewall
> > >>>
> > >>> [mixed-seq]
> > >>> bs=64k
> > >>> rw=rw
> > >>> stonewall
> > >>>
> > >>> [mixed-rand]
> > >>> rw=randrw
> > >>> stonewall
> > >>>
> > >>
> > >> Great, this makes it easy for me to reproduce your test.
> > >
> > > If you have trouble to reproduce, feel free to ask me. I'm happy to test it. :)
> > >
> > > Thanks!
> > >
> > 
> > Hi Minchan,
> > 
> > I will send an updated patch as a reply to this E-Mail. Would be really grateful If you'd test it and provide feedback!
> > The patch should be applied to the current mmots tree.
> > 
> > In fact, the updated LZ4 _is_ slower than the current one in kernel. But I was not able to reproduce such large regressions
> > as you did. I now tried to define FORCE_INLINE as Eric suggested. I also inlined some functions which weren't in upstream LZ4,
> > but are defined as macros in the current kernel LZ4. The approach to replace LZ4_ARCH64 with the function call _seemed_ to behave
> > worse than the macro, so I withdrew the change.
> > 
> > The main difference is, that I replaced the read32/read16/write... etc. functions using memcpy with the other ones defined 
> > in upstream LZ4 (which can be switched using a macro). 
> > The comment of the author stated, that they're as fast as the memcpy variants (or faster), but not as portable
> > (which does not matter since we're not dependent for multiple compilers).
> > 
> > In my tests, this version is mostly as fast as the current kernel LZ4.
> 
> With a patch you sent, I cannot see enhancement so I wanted to dig in and
> found how I was really careless.
> 
> I have tested both test with CONFIG_KASAN. OMG. With disabling it, I don't
> see any regression any more. So, I'm really really *sorry* about noise and
> wasting your time. However, I am curious why KASAN makes such difference.
> 

Hey Minchan,

I'm glad to hear that! Nevertheless, the changes discussed here made some differences in my own tests (I believe it got a bit
faster now) and we have the functions properly inlined, where this makes sense. Also, I added the '-O3' C-flag as Eric suggested.
So, this was not really a waste of time, I think.

> The reason I tested new updated lz4 is description says lz4 fast and
> want to use it in zram. How can I do that? and How faster it is compared
> to old?
>

Unfortunately, in the current implementation (in crypto/lz4.c, which is used by zram) I'm setting the acceleration parameter 
(which is the paramer making the compression 'fast', see LZ4_compress_fast) to 1 (which is the default) since I did not know how this
patchset is accepted and this equals the behaviour currently available in kernel.

Basically, the logic is 'higher acceleration = faster compression = lower compression ratio' and vice versa. 
I included some benchmarks in my patch 0/5 E-Mail taken from the official LZ4:

> > >>>> ----------------|--------------|----------------|----------
> > >>>> Compressor      | Compression  | Decompression  | Ratio
> > >>>> ----------------|--------------|----------------|----------
> > >>>> memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> > >>>> LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> > >>>> LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> > >>>> LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> > >>>> LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> > >>>>

fast 50 means: acceleration=50, default: acceleration=1.

Besides the proposed patchset, I tried to implement a module parameter in crypto/lz4.c to set the acceleration factor.
In my tests, the module parameter works out great.
But I think this is subject to a future, separate patch. Especially since I had to 'work around' the crypto/testmgr.c, 
which only tests acceleration=1 and there's no limit for acceleration.  

Thanks for your help,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH] lz4: fix performance regressions
  2017-02-13 11:53                   ` Sven Schmidt
@ 2017-02-13 13:37                     ` Willy Tarreau
  0 siblings, 0 replies; 104+ messages in thread
From: Willy Tarreau @ 2017-02-13 13:37 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: minchan, ebiggers3, akpm, bongkyu.kim, rsalvaterra,
	sergey.senozhatsky, gregkh, linux-kernel, herbert, davem,
	linux-crypto, anton, ccross, keescook, tony.luck

On Mon, Feb 13, 2017 at 12:53:49PM +0100, Sven Schmidt wrote:
> On Sun, Feb 12, 2017 at 10:41:17PM +0100, Willy Tarreau wrote:
> > On Sun, Feb 12, 2017 at 04:20:00PM +0100, Sven Schmidt wrote:
> > > On Sun, Feb 12, 2017 at 02:05:08PM +0100, Willy Tarreau wrote:
> > > > Hi Sven,
> > > > 
> > > > On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
> > > > > Fix performance regressions compared to current kernel LZ4
> > > > 
> > > > Your patch contains mostly style cleanups which certainly are welcome
> > > > but make the whole patch hard to review. These cleanups would have been
> > > > better into a separate, preliminary patch IMHO.
> > > > 
> > > > Regards,
> > > > Willy
> > > 
> > > Hi Willy,
> > > 
> > > the problem was, I wanted to compare my version to the upstream LZ4 to find bugs (as with my last patch version: wrong indentation in LZ4HC 
> > > in two for loops). But since the LZ4 code is a pain to read, I made additional style cleanups "on the way".
> > 
> > Oh I can easily understand!
> > 
> > > Hope you can manage to review the patch though, because it is difficult to separate the cleanups now.
> > 
> > When I need to split a patch into pieces, usually what I do is that I
> > revert it, re-apply it without committing, then "git add -p", validate
> > all the hunks to be taken as the first patch (ie here the cleanups),
> > commit, then commit the rest as a separate one. It seems to me that the
> > fix is in the last few hunks though I'm not sure yet.
> > 
> > Thanks,
> > Willy
> 
> Hi Willy,
> 
> I didn't know about this 'trick' until now. Thanks for sharing it! I gave it a short try recently, that's really cool!
> 
> Since the problem discussed in this branch of this thread seems to be solved (see Minchans E-Mail), I won't split the patches, though.
> Or is there an actual need for doing so? I will send an updated patchset (containing these patches + the other ones suggested by Eric) later.

It's probably too late for this time, but keep it in mind for next time :-)

willy

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH] lz4: fix performance regressions
  2017-02-12 23:38             ` Eric Biggers
@ 2017-02-14 10:33               ` Sven Schmidt
  0 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-14 10:33 UTC (permalink / raw)
  To: Eric Biggers
  Cc: minchan, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Hey Eric,

On Sun, Feb 12, 2017 at 03:38:02PM -0800, Eric Biggers wrote:
> Hi Sven,
> 
> On Sun, Feb 12, 2017 at 12:16:18PM +0100, Sven Schmidt wrote:
> >  /*-************************************
> >   *	Reading and writing into memory
> >   **************************************/
> > +typedef union {
> > +	U16 u16;
> > +	U32 u32;
> > +	size_t uArch;
> > +} __packed unalign;
> > 
> > -static inline U16 LZ4_read16(const void *memPtr)
> > +static FORCE_INLINE __maybe_unused U16 LZ4_read16(const void *ptr)
> >  {
> > -	U16 val;
> > -
> > -	memcpy(&val, memPtr, sizeof(val));
> > -
> > -	return val;
> > +	return ((const unalign *)ptr)->u16;
> >  }
> > 
> > -static inline U32 LZ4_read32(const void *memPtr)
> > +static FORCE_INLINE __maybe_unused U32 LZ4_read32(const void *ptr)
> >  {
> > -	U32 val;
> > -
> > -	memcpy(&val, memPtr, sizeof(val));
> > -
> > -	return val;
> > +	return ((const unalign *)ptr)->u32;
> >  }
> > 
> > -static inline size_t LZ4_read_ARCH(const void *memPtr)
> > +static FORCE_INLINE __maybe_unused size_t LZ4_read_ARCH(const void *ptr)
> >  {
> > -	size_t val;
> > -
> > -	memcpy(&val, memPtr, sizeof(val));
> > -
> > -	return val;
> > +	return ((const unalign *)ptr)->uArch;
> >  }
> > 
> > -static inline void LZ4_write16(void *memPtr, U16 value)
> > +static FORCE_INLINE __maybe_unused void LZ4_write16(void *memPtr, U16 value)
> >  {
> > -	memcpy(memPtr, &value, sizeof(value));
> > +	((unalign *)memPtr)->u16 = value;
> >  }
> > 
> > -static inline void LZ4_write32(void *memPtr, U32 value)
> > -{
> > -	memcpy(memPtr, &value, sizeof(value));
> > +static FORCE_INLINE __maybe_unused void LZ4_write32(void *memPtr, U32 value) {
> > +	((unalign *)memPtr)->u32 = value;
> >  }
> > 
> > -static inline U16 LZ4_readLE16(const void *memPtr)
> > +static FORCE_INLINE __maybe_unused U16 LZ4_readLE16(const void *memPtr)
> >  {
> > -#ifdef __LITTLE_ENDIAN__
> > +#if LZ4_LITTLE_ENDIAN
> >  	return LZ4_read16(memPtr);
> >  #else
> >  	const BYTE *p = (const BYTE *)memPtr;
> > @@ -137,19 +143,19 @@ static inline U16 LZ4_readLE16(const void *memPtr)
> >  #endif
> >  }
> 
> Since upstream LZ4 is intended to be compiled at -O3, this may allow it to get
> away with using memcpy() for unaligned memory accesses.  The reason it uses
> memcpy() is that, other than a byte-by-byte copy, it is the only portable way to
> express unaligned memory accesses.  But the Linux kernel is sometimes compiled
> optimized for size (-Os), and I wouldn't be *too* surprised if some of the
> memcpy()'s don't always get inlined then, which could be causing the performance
> regression being observed.  (Of course, this could be verified by checking
> whether CONFIG_CC_OPTIMIZE_FOR_SIZE=y is set, then reading the assembly.)
> 
> But I don't think accessing a __packed structure directly is the right
> alternative.  Instead, Linux already includes macros for unaligned memory
> accesses which have been optimized for every supported architecture.  Those
> should just be used instead, e.g. like this:
> 
> static FORCE_INLINE U16 LZ4_read16(const void *ptr)
> {
> 	return get_unaligned((const u16 *)ptr);
> }
> 
> static FORCE_INLINE U32 LZ4_read32(const void *ptr)
> {
> 	return get_unaligned((const u32 *)ptr);
> }
> 
> static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
> {
> 	return get_unaligned((const size_t *)ptr);
> }
> 
> static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
> {
> 	put_unaligned(value, (u16 *)memPtr);
> }
> 
> static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
> {
> 	put_unaligned(value, (u32 *)memPtr);
> }
> 
> static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
> {
> 	return get_unaligned_le16(memPtr);
> }
> 
> static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
> {
> 	return put_unaligned_le16(value, memPtr);
> }
> 
> static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
> {
> 	if (LZ4_64bits()) {
> 		u64 a = get_unaligned((const u64 *)src);
> 		put_unaligned(a, (u64 *)dst);
> 	} else {
> 		u32 a = get_unaligned((const u32 *)src);
> 		u32 b = get_unaligned((const u32 *)src + 1);
> 		put_unaligned(a, (u32 *)dst);
> 		put_unaligned(b, (u32 *)dst + 1);
> 	}
> }
> 
> 
> Note that I dropped __maybe_unused as it's not needed on inline functions.
> That should be done everywhere else the patch proposes to add it too.
> 
> > -#if LZ4_ARCH64
> > -#ifdef __BIG_ENDIAN__
> > -#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
> > +static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
> > +{
> > +#if LZ4_LITTLE_ENDIAN
> > +#if LZ4_ARCH64 /* 64 Bits Little Endian */
> > +#if defined(LZ4_FORCE_SW_BITCOUNT)
> > +	static const int DeBruijnBytePos[64] = {
> > +		0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
> > +		0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
> > +		7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
> > +		7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
> > +	};
> > +
> > +	return DeBruijnBytePos[((U64)((val & -(long long)val)
> > +		* 0x0218A392CDABBD3FULL)) >> 58];
> >  #else
> > -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
> > -#endif
> > +	return (__builtin_ctzll((U64)val) >> 3);
> > +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> > +#else /* 32 Bits Little Endian */
> > +#if defined(LZ4_FORCE_SW_BITCOUNT)
> > +	static const int DeBruijnBytePos[32] = {
> > +		0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
> > +		3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1
> > +	};
> > +
> > +	return DeBruijnBytePos[((U32)((val & -(S32)val)
> > +		* 0x077CB531U)) >> 27];
> >  #else
> > -#ifdef __BIG_ENDIAN__
> > -#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
> > +	return (__builtin_ctz((U32)val) >> 3);
> > +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> > +#endif /* LZ4_ARCH64 */
> > +#else /* Big Endian */
> > +#if LZ4_ARCH64 /* 64 Bits Big Endian */
> > +#if defined(LZ4_FORCE_SW_BITCOUNT)
> > +	unsigned int r;
> > +
> > +	if (!(val >> 32)) {
> > +		r = 4;
> > +	} else {
> > +		r = 0;
> > +		val >>= 32;
> > +	}
> > +
> > +	if (!(val >> 16)) {
> > +		r += 2;
> > +		val >>= 8;
> > +	} else {
> > +		val >>= 24;
> > +	}
> > +
> > +	r += (!val);
> > +
> > +	return r;
> >  #else
> > -#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
> > -#endif
> > -#endif
> > +	return (__builtin_clzll((U64)val) >> 3);
> > +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> > +#else /* 32 Bits Big Endian */
> > +#if defined(LZ4_FORCE_SW_BITCOUNT)
> > +	unsigned int r;
> > +
> > +	if (!(val >> 16)) {
> > +		r = 2;
> > +		val >>= 8;
> > +	} else {
> > +		r = 0;
> > +		val >>= 24;
> > +	}
> > +
> > +	r += (!val);
> > +
> > +	return r;
> > +#else
> > +	return (__builtin_clz((U32)val) >> 3);
> > +#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
> > +#endif /* LZ4_ARCH64 */
> > +#endif /* LZ4_LITTLE_ENDIAN */
> > +}
> 
> The reason LZ4_NbCommonBytes() in upstream LZ4 is so complicated is that it
> needs to provide portable fallbacks that work on *any* platform and compiler.
> This isn't needed in the Linux kernel, and it should just call the functions
> already defined that do the right thing:
> 
> static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
> {
>         if (LZ4_isLittleEndian())
>                 return __ffs(val) >> 3;
>         else
>                 return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
> }                      
> 
> To be clear, when I said that upstream LZ4 shouldn't generally be changed, I'm
> primarily talking about the core code, not the platform-specific parts.  What we
> need to do is define platform-specific stuff, like LZ4_read*(), LZ4_write*(),
> LZ4_NbCommonBytes(), LZ4_64bits(), and FORCE_INLINE, in a way that makes sense
> for the Linux kernel and the environment it's compiled in.  Also I think it's
> fine, and maybe even necessary for performance, to *add* inline or FORCE_INLINE
> in some places too, given that LZ4 in Linux may get compiled with a lower
> optimization level than that intended to be used for upstream LZ4 --- though it
> may be worth considering updating the Makefile to just always compile the LZ4
> files with -O3 instead.  What should be avoided is making unnecessary changes to
> the *users* of the platform-specific code or to the core (de)compression
> parameters or templates.
> 
> Eric

I'm very thankful for your effort here! I included all your proposed changes (as well as -O3 as compiler flag in the Makefile)
and noticed further improvements concerning the performance in ZRAM (compared to current LZ4 in 4.10 RC8 as well as the previous changes discussed
here). I would never have thought that the NBcommonBytes function can be re-written in such an easy way, since the current LZ4 in the kernel
uses similar approach checking for 64/32 bit and endianess.

Also, thanks for your very detailed remarks. That's really helpful to me.

Regards,

Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* Re: [PATCH v7 0/5] Update LZ4 compressor module
  2017-02-13 12:08             ` Sven Schmidt
@ 2017-02-15  7:29               ` Minchan Kim
  0 siblings, 0 replies; 104+ messages in thread
From: Minchan Kim @ 2017-02-15  7:29 UTC (permalink / raw)
  To: Sven Schmidt
  Cc: ebiggers3, akpm, bongkyu.kim, rsalvaterra, sergey.senozhatsky,
	gregkh, linux-kernel, herbert, davem, linux-crypto, anton,
	ccross, keescook, tony.luck

Hi Sven,

On Mon, Feb 13, 2017 at 01:08:41PM +0100, Sven Schmidt wrote:
> On Mon, Feb 13, 2017 at 09:03:24AM +0900, Minchan Kim wrote:
> > Hi Sven,
> > 
> > On Sun, Feb 12, 2017 at 12:16:17PM +0100, Sven Schmidt wrote:
> > > 
> > > 
> > > 
> > > On 02/10/2017 01:13 AM, Minchan Kim wrote:
> > > > Hello Sven,
> > > >
> > > > On Thu, Feb 09, 2017 at 11:56:17AM +0100, Sven Schmidt wrote:
> > > >> Hey Minchan,
> > > >>
> > > >> On Thu, Feb 09, 2017 at 08:31:21AM +0900, Minchan Kim wrote:
> > > >>> Hello Sven,
> > > >>>
> > > >>> On Sun, Feb 05, 2017 at 08:09:03PM +0100, Sven Schmidt wrote:
> > > >>>>
> > > >>>> This patchset is for updating the LZ4 compression module to a version based
> > > >>>> on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
> > > >>>> which provides an "acceleration" parameter as a tradeoff between
> > > >>>> high compression ratio and high compression speed.
> > > >>>>
> > > >>>> We want to use LZ4 fast in order to support compression in lustre
> > > >>>> and (mostly, based on that) investigate data reduction techniques in behalf of
> > > >>>> storage systems.
> > > >>>>
> > > >>>> Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
> > > >>>> it is possible to enable applications to use fast and/or high compression
> > > >>>> depending on the usecase.
> > > >>>> For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
> > > >>>> LZ4 in the kernel.
> > > >>>>
> > > >>>> LZ4 homepage: http://www.lz4.org/
> > > >>>> LZ4 source repository: https://github.com/lz4/lz4
> > > >>>> Source version: 1.7.3
> > > >>>>
> > > >>>> Benchmark (taken from [1], Core i5-4300U @1.9GHz):
> > > >>>> ----------------|--------------|----------------|----------
> > > >>>> Compressor      | Compression  | Decompression  | Ratio
> > > >>>> ----------------|--------------|----------------|----------
> > > >>>> memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> > > >>>> LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> > > >>>> LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> > > >>>> LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> > > >>>> LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> > > >>>>
> > > >>>> [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html
> > > >>>>
> > > >>>> [PATCH 1/5] lib: Update LZ4 compressor module
> > > >>>> [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
> > > >>>> [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
> > > >>>> [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
> > > >>>> [PATCH 5/5] lib/lz4: Remove back-compat wrappers
> > > >>>
> > > >>> Today, I did zram-lz4 performance test with fio in current mmotm and
> > > >>> found it makes regression about 20%.
> > > >>>
> > > >>> "lz4-update" means current mmots(git://git.cmpxchg.org/linux-mmots.git) so
> > > >>> applied your 5 patches. (But now sure current mmots has recent uptodate
> > > >>> patches)
> > > >>> "revert" means I reverted your 5 patches in current mmots.
> > > >>>
> > > >>>                      revert    lz4-update
> > > >>>
> > > >>>       seq-write       1547       1339      86.55%
> > > >>>      rand-write      22775      19381      85.10%
> > > >>>        seq-read       7035       5589      79.45%
> > > >>>       rand-read      78556      68479      87.17%
> > > >>>    mixed-seq(R)       1305       1066      81.69%
> > > >>>    mixed-seq(W)       1205        984      81.66%
> > > >>>   mixed-rand(R)      17421      14993      86.06%
> > > >>>   mixed-rand(W)      17391      14968      86.07%
> > > >>
> > > >> which parts of the output (as well as units) are these values exactly?
> > > >> I did not work with fio until now, so I think I might ask before misinterpreting my results.
> > > >
> > > > It is IOPS.
> > > >
> > > >>  
> > > >>> My fio description file
> > > >>>
> > > >>> [global]
> > > >>> bs=4k
> > > >>> ioengine=sync
> > > >>> size=100m
> > > >>> numjobs=1
> > > >>> group_reporting
> > > >>> buffer_compress_percentage=30
> > > >>> scramble_buffers=0
> > > >>> filename=/dev/zram0
> > > >>> loops=10
> > > >>> fsync_on_close=1
> > > >>>
> > > >>> [seq-write]
> > > >>> bs=64k
> > > >>> rw=write
> > > >>> stonewall
> > > >>>
> > > >>> [rand-write]
> > > >>> rw=randwrite
> > > >>> stonewall
> > > >>>
> > > >>> [seq-read]
> > > >>> bs=64k
> > > >>> rw=read
> > > >>> stonewall
> > > >>>
> > > >>> [rand-read]
> > > >>> rw=randread
> > > >>> stonewall
> > > >>>
> > > >>> [mixed-seq]
> > > >>> bs=64k
> > > >>> rw=rw
> > > >>> stonewall
> > > >>>
> > > >>> [mixed-rand]
> > > >>> rw=randrw
> > > >>> stonewall
> > > >>>
> > > >>
> > > >> Great, this makes it easy for me to reproduce your test.
> > > >
> > > > If you have trouble to reproduce, feel free to ask me. I'm happy to test it. :)
> > > >
> > > > Thanks!
> > > >
> > > 
> > > Hi Minchan,
> > > 
> > > I will send an updated patch as a reply to this E-Mail. Would be really grateful If you'd test it and provide feedback!
> > > The patch should be applied to the current mmots tree.
> > > 
> > > In fact, the updated LZ4 _is_ slower than the current one in kernel. But I was not able to reproduce such large regressions
> > > as you did. I now tried to define FORCE_INLINE as Eric suggested. I also inlined some functions which weren't in upstream LZ4,
> > > but are defined as macros in the current kernel LZ4. The approach to replace LZ4_ARCH64 with the function call _seemed_ to behave
> > > worse than the macro, so I withdrew the change.
> > > 
> > > The main difference is, that I replaced the read32/read16/write... etc. functions using memcpy with the other ones defined 
> > > in upstream LZ4 (which can be switched using a macro). 
> > > The comment of the author stated, that they're as fast as the memcpy variants (or faster), but not as portable
> > > (which does not matter since we're not dependent for multiple compilers).
> > > 
> > > In my tests, this version is mostly as fast as the current kernel LZ4.
> > 
> > With a patch you sent, I cannot see enhancement so I wanted to dig in and
> > found how I was really careless.
> > 
> > I have tested both test with CONFIG_KASAN. OMG. With disabling it, I don't
> > see any regression any more. So, I'm really really *sorry* about noise and
> > wasting your time. However, I am curious why KASAN makes such difference.
> > 
> 
> Hey Minchan,
> 
> I'm glad to hear that! Nevertheless, the changes discussed here made some differences in my own tests (I believe it got a bit
> faster now) and we have the functions properly inlined, where this makes sense. Also, I added the '-O3' C-flag as Eric suggested.
> So, this was not really a waste of time, I think.
> 
> > The reason I tested new updated lz4 is description says lz4 fast and
> > want to use it in zram. How can I do that? and How faster it is compared
> > to old?
> >
> 
> Unfortunately, in the current implementation (in crypto/lz4.c, which is used by zram) I'm setting the acceleration parameter 
> (which is the paramer making the compression 'fast', see LZ4_compress_fast) to 1 (which is the default) since I did not know how this
> patchset is accepted and this equals the behaviour currently available in kernel.

Fair enough.

> 
> Basically, the logic is 'higher acceleration = faster compression = lower compression ratio' and vice versa. 
> I included some benchmarks in my patch 0/5 E-Mail taken from the official LZ4:
> 
> > > >>>> ----------------|--------------|----------------|----------
> > > >>>> Compressor      | Compression  | Decompression  | Ratio
> > > >>>> ----------------|--------------|----------------|----------
> > > >>>> memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
> > > >>>> LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
> > > >>>> LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
> > > >>>> LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
> > > >>>> LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101
> > > >>>>
> 
> fast 50 means: acceleration=50, default: acceleration=1.

I understood now. Thanks for the explanation!

> 
> Besides the proposed patchset, I tried to implement a module parameter in crypto/lz4.c to set the acceleration factor.
> In my tests, the module parameter works out great.

If it works with module parameter, it means every guest of lz4 via crypto
should use same acceleration level? Hmm, some system might want different
acceleration level among different subsystems.

Anyway, if it works, it would be great for user of zram to test/select
right choice for their system workload.

Thanks for the great work!

> But I think this is subject to a future, separate patch. Especially since I had to 'work around' the crypto/testmgr.c, 
> which only tests acceleration=1 and there's no limit for acceleration.  
> 
> Thanks for your help,
> 
> Sven

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v8 0/5] Update LZ4 compressor module
  2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
                   ` (9 preceding siblings ...)
  2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
@ 2017-02-15 18:16 ` Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 1/5] lib: " Sven Schmidt
                     ` (4 more replies)
  10 siblings, 5 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-15 18:16 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html

[PATCH 1/5] lib: Update LZ4 compressor module
[PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
[PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
[PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
[PATCH 5/5] lib/lz4: Remove back-compat wrappers

Changes:
v8:
- Rewrote the architecture-dependent definitions in lz4defs.h, such as LZ4_read*,
  LZ4_write* and LZ4_NbCommonBytes as proposed by Eric Biggers
- Added -O3 compiler flag to Makefile, also suggested by Eric
- Defined FORCE_INLINE, as used in upstream LZ4, as __always_inline and
  force-inlined most of the small functions in lz4defs.h
- lz4_decompress: Wrapped the EXPORT_SYMBOL and MODULE_* macros in a
  #ifdef STATIC the way suggested by Andrew Morton, fixing the breakage
  of CONFIG_KERNEL_LZ4 in x86 as reported by Arnd Bergman

v7:
- Fixed errors reported by the Smatch tool
- Changed function documentation comments in lz4.h to match kernel-doc style
- Fixed a misbehaviour of LZ4HC caused by the wrong level of indentation
  concerning two for loops introduced after I refactored the code style using
  checkpatch.pl (upstream LZ4 put dozens of stuff in just one line, gnah)
- Updated the crypto tests for LZ4 since they did fail for the new code
  and hence zram did fail to allocate memory for LZ4

v6:
- Fixed LZ4_NBCOMMONBYTES() for 64-bit little endian
- Reset LZ4_MEMORY_USAGE to 14 (which is the value used in
  upstream LZ4 as well as the previous kernel module)
- Fixed that weird double-indentation in lz4defs.h and lz4.h
- Adjusted general styling issues in lz4defs.h
  (e.g. lines consisting of more than one instruction)
- Removed the architecture-dependent typedef to reg_t
  since upstream LZ4 is just using size_t and that works fine
- Changed error messages in pstore/platform.c:
  * LZ4_compress_default always returns 0 in case of an error
    (no need to print the return value)
  * LZ4_decompress_safe returns a negative error message
    (return value _does_ matter)

v5:
- Added a fifth patch to remove the back-compat wrappers introduced
  to ensure bisectibility between the patches (the functions are no longer
  needed since there's no callers left)

v4:
- Fixed kbuild errors
- Re-added lz4_compressbound as alias for LZ4_compressBound
  to ensure backwards compatibility
- Wrapped LZ4_hash5 with check for LZ4_ARCH64 since it is only used there
  and triggers an unused function warning when false

v3:
- Adjusted the code to satisfy kernel coding style (checkpatch.pl)
- Made sure the changes to LZ4 in Kernel (overflow checks etc.)
  are included in the new module (they are)
- Removed the second LZ4_compressBound function with related name but
  different return type
- Corrected version number (was LZ4 1.7.3)
- Added missing LZ4 streaming functions

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

^ permalink raw reply	[flat|nested] 104+ messages in thread

* [PATCH v8 1/5] lib: Update LZ4 compressor module
  2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
@ 2017-02-15 18:16   ` Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-15 18:16 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

Update the LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.  The kernel
module is inspired by the previous work by Chanho Min.  The updated LZ4
module will not break existing code since the patchset contains
appropriate changes.

API changes:

New method LZ4_compress_fast which differs from the variant available in
kernel by the new acceleration parameter, allowing to trade compression
ratio for more compression speed and vice versa.

LZ4_decompress_fast is the respective decompression method, featuring a
very fast decoder (multiple GB/s per core), able to reach RAM speed in
multi-core systems.  The decompressor allows to decompress data compressed
with LZ4 fast as well as the LZ4 HC (high compression) algorithm.

Also the useful functions LZ4_decompress_safe_partial and
LZ4_compress_destsize were added.  The latter reverses the logic by trying
to compress as much data as possible from source to dest while the former
aims to decompress partial blocks of data.

A bunch of streaming functions were also added which allow
compressig/decompressing data in multiple steps (so called "streaming
mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize are now
known as LZ4_compress_default respectivley LZ4_decompress_safe. The old
methods will be removed since there's no callers left in the code.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  762 +++++++++++++++++++++++++++---
 lib/lz4/Makefile         |    2 +
 lib/lz4/lz4_compress.c   | 1161 +++++++++++++++++++++++++++++++++-------------
 lib/lz4/lz4_decompress.c |  705 ++++++++++++++++++----------
 lib/lz4/lz4defs.h        |  338 ++++++++------
 lib/lz4/lz4hc_compress.c |  867 ++++++++++++++++++++++------------
 6 files changed, 2758 insertions(+), 1077 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..1b0f8ca 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,717 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
 
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  */
-static inline size_t lz4_compressbound(size_t isize)
+#define LZ4_MEMORY_USAGE 14
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL			3
+#define LZ4HC_DEFAULT_CLEVEL			9
+#define LZ4HC_MAX_CLEVEL			16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t - information structure to track an LZ4 stream.
+ */
+typedef struct {
+	uint32_t hashTable[LZ4_HASH_SIZE_U32];
+	uint32_t currentOffset;
+	uint32_t initCheck;
+	const uint8_t *dictionary;
+	uint8_t *bufferStart;
+	uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMSIZE_U64];
+	LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t - information structure to track an LZ4HC stream.
+ */
+typedef struct {
+	unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+	unsigned short	 chainTable[LZ4HC_MAXD];
+	/* next block to continue on current prefix */
+	const unsigned char *end;
+	/* All index relative to this position */
+	const unsigned char *base;
+	/* alternate base for extDict */
+	const unsigned char *dictBase;
+	/* below that point, need extDict */
+	unsigned int	 dictLimit;
+	/* below that point, no more dict */
+	unsigned int	 lowLimit;
+	/* index from which to continue dict update */
+	unsigned int	 nextToUpdate;
+	unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+	size_t table[LZ4_STREAMHCSIZE_SIZET];
+	LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
+
+/*
+ * LZ4_streamDecode_t - information structure to track an
+ *	LZ4 stream during decompression.
+ *
+ * init this structure using LZ4_setStreamDecode (or memset()) before first use
+ */
+typedef struct {
+	const uint8_t *externalDict;
+	size_t extDictSize;
+	const uint8_t *prefixEnd;
+	size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+	unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+	LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_compressBound() - Max. output size in worst case szenarios
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ * (data not compressible)
+ */
+static inline int LZ4_compressBound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return LZ4_COMPRESSBOUND(isize);
 }
 
-/*
- * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
- *		the defined size.
- */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
+/**
+ * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound
+ * @isize: Size of the input data
+ *
+ * Return: Max. size LZ4 may output in a "worst case" szenario
+ *	(data not compressible)
+ */
+static inline int lz4_compressbound(size_t isize)
+{
+	return LZ4_COMPRESSBOUND(isize);
+}
+
+/**
+ * LZ4_compress_default() - Compress data from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Compresses 'sourceSize' bytes from buffer 'source'
+ * into already allocated 'dest' buffer of size 'maxOutputSize'.
+ * Compression is guaranteed to succeed if
+ * 'maxOutputSize' >= LZ4_compressBound(inputSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'source' into a more limited 'dest' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * As a consequence, 'dest' content is not valid.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *	(necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/**
+ * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxOutputSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @acceleration: acceleration factor
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Same as LZ4_compress_default(), but allows to select an "acceleration"
+ * factor. The larger the acceleration value, the faster the algorithm,
+ * but also the lesser the compression. It's a trade-off. It can be fine tuned,
+ * with each successive value providing roughly +~3% to speed.
+ * An acceleration value of "1" is the same as regular LZ4_compress_default()
+ * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
+ *
+ * Return: Number of bytes written into buffer 'dest'
+ *	(necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, int acceleration, void *wrkmem);
+
+/**
+ * LZ4_compress_destSize() - Compress as much data as possible
+ *	from source to dest
+ * @source: source address of the original data
+ * @dest: output buffer address of the compressed data
+ * @sourceSizePtr: will be modified to indicate how many bytes where read
+ *	from 'source' to fill 'dest'. New value is necessarily <= old value.
+ * @targetDestSize: Size of buffer 'dest' which must be already allocated
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of LZ4_MEM_COMPRESS.
+ *
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * possible from 'source'.
+ *
+ * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
+ *	or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
 
 /*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+ * lz4_compress() - For backward compatibility, see LZ4_compress_default
+ * @src: source address of the original data
+ * @src_len: size of the original data
+ * @dst: output buffer address of the compressed data. This requires 'dst'
+ *	of size LZ4_COMPRESSBOUND
+ * @dst_len: is the output size, which is returned after compress done
+ * @workmem: address of the working memory.
+ *
+ * Return: Success if return 0, Error if return < 0
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/**
+ * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * Decompresses data from 'source' into 'dest'.
+ * This function fully respect memory boundaries for properly formed
+ * compressed data.
+ * It is a bit faster than LZ4_decompress_safe().
+ * However, it does not provide any protection against intentionally
+ * modified data stream (malicious input).
+ * Use this function in trusted environment only
+ * (data to decode comes from a trusted source).
+ *
+ * Return: number of bytes read from the source buffer
+ *	or a negative result if decompression fails.
+ */
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe() - Decompression protected against buffer overflow
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * Decompresses data fom 'source' into 'dest'.
+ * If the source stream is detected malformed, the function will
+ * stop decoding and return a negative result.
+ * This function is protected against buffer overflow exploits,
+ * including malicious data packets. It never writes outside output buffer,
+ * nor reads outside input buffer.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
+ *	at position 'source' into buffer 'dest'
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the decompressed data which must be
+ *	already allocated
+ * @compressedSize: is the precise full size of the compressed block.
+ * @targetOutputSize: the decompression operation will try
+ *	to stop as soon as 'targetOutputSize' has been reached
+ * @maxDecompressedSize: is the size of destination buffer
+ *
+ * This function decompresses a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ * This function never writes outside of output buffer,
+ * and never reads outside of input buffer.
+ * It is therefore protected against malicious data packets.
+ *
+ * Return: the number of bytes decoded in the destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ *
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
 /*
- * lz4_decompress_unknownoutputsize()
- *	src     : source address of the compressed data
- *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
- *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ * lz4_decompress_unknownoutputsize() - For backwards compatibility,
+ *	see LZ4_decompress_safe
+ * @src: source address of the compressed data
+ * @src_len: is the input size, therefore the compressed size
+ * @dest: output buffer address of the decompressed data
+ *	which must be already allocated
+ * @dest_len: is the max size of the destination buffer, which is
+ *	returned with actual size of decompressed data after decompress done
+ *
+ * Return: Success if return 0, Error if return (< 0)
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/**
+ * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast
+ * @src: source address of the compressed data
+ * @src_len: is the input size, which is returned after decompress done
+ * @dest: output buffer address of the decompressed data,
+ *	which must be already allocated
+ * @actual_dest_len: is the size of uncompressed data, supposing it's known
+ *
+ * Return: Success if return 0, Error if return (< 0)
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/**
+ * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @dstCapacity: full or partial size of buffer 'dst',
+ *	which must be already allocated
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *	value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ * @wrkmem: address of the working memory.
+ *	This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Compress data from 'src' into 'dst', using the more powerful
+ * but slower "HC" algorithm. Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ *
+ * Return : the number of bytes written into 'dst' or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/**
+ * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC
+ * @src: source address of the original data
+ * @src_len: size of the original data
+ * @dst: output buffer address of the compressed data. This requires 'dst'
+ *	of size LZ4_COMPRESSBOUND.
+ * @dst_len: is the output size, which is returned after compress done
+ * @wrkmem: address of the working memory.
+ *	This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ *
+ * Return	: Success if return 0, Error if return (< 0)
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/**
+ * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
+ * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
+ * @compressionLevel: Recommended values are between 4 and 9, although any
+ *	value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *
+ * An LZ4_streamHC_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_streamHC_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+
+/**
+ * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
+ * @streamHCPtr: pointer to the LZ4HC_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4HC_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+
+/**
+ * LZ4_compress_HC_continue() - Compress 'src' using data from previously
+ *	compressed blocks as a dictionary using the HC algorithm
+ * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *	which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ *
+ * These functions compress data in successive blocks of any size, using
+ * previous blocks as dictionary. One key assumption is that previous
+ * blocks (up to 64 KB) remain read-accessible while
+ * compressing next blocks. There is an exception for ring buffers,
+ * which can be smaller than 64 KB.
+ * Ring buffers scenario is automatically detected and handled by
+ * LZ4_compress_HC_continue().
+ * Before starting compression, state must be properly initialized,
+ * using LZ4_resetStreamHC().
+ * A first "fictional block" can then be designated as
+ * initial dictionary, using LZ4_loadDictHC() (Optional).
+ * Then, use LZ4_compress_HC_continue()
+ * to compress each successive block. Previous memory blocks
+ * (including initial dictionary when present) must remain accessible
+ * and unmodified during compression.
+ * 'dst' buffer should be sized to handle worst case scenarios, using
+ *  LZ4_compressBound(), to ensure operation success.
+ *  If, for any reason, previous data blocks can't be preserved unmodified
+ *  in memory during next compression block,
+ *  you must save it to a safer memory space, using LZ4_saveDictHC().
+ * Return value of LZ4_saveDictHC() is the size of dictionary
+ * effectively saved into 'safeBuffer'.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+
+/**
+ * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
+ * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @maxDictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDictHC() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_HC_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
+ *	or 0 if error.
+ */
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+
+/**
+ * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
+ * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
+ *
+ * An LZ4_stream_t structure can be allocated once
+ * and re-used multiple times.
+ * Use this function to init an allocated `LZ4_stream_t` structure
+ * and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/**
+ * LZ4_loadDict() - Load a static dictionary into LZ4_stream
+ * @streamPtr: pointer to the LZ4_stream_t
+ * @dictionary: dictionary to load
+ * @dictSize: size of dictionary
+ *
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary'
+ * will remain in memory.
+ * Loading a size of 0 is allowed.
+ *
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/**
+ * LZ4_saveDict() - Save static dictionary from LZ4_stream
+ * @streamPtr: pointer to the 'LZ4_stream_t' structure
+ * @safeBuffer: buffer to save dictionary to, must be already allocated
+ * @dictSize: size of 'safeBuffer'
+ *
+ * If previously compressed data block is not guaranteed
+ * to remain available at its memory location,
+ * save it into a safer place (char *safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call
+ * LZ4_compress_fast_continue().
+ *
+ * Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *	or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/**
+ * LZ4_compress_fast_continue() - Compress 'src' using data from previously
+ *	compressed blocks as a dictionary
+ * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
+ * @src: source address of the original data
+ * @dst: output buffer address of the compressed data,
+ *	which must be already allocated
+ * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
+ * @maxDstSize: full or partial size of buffer 'dest'
+ *	which must be already allocated
+ * @acceleration: acceleration factor
+ *
+ * Compress buffer content 'src', using data from previously compressed blocks
+ * as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still
+ * be present and unmodified !
+ * If maxDstSize >= LZ4_compressBound(srcSize),
+ * compression is guaranteed to succeed, and runs faster.
+ *
+ * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/**
+ * LZ4_setStreamDecode() - Instruct where to find dictionary
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @dictionary: dictionary to use
+ * @dictSize: size of dictionary
+ *
+ * Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *
+ * Return: 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/**
+ * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
+ * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ *
+ * These decoding function allows decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks *must* remain available at the memory position
+ * where they were decoded (up to 64 KB)
+ * In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/**
+ * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
+ *	followed by LZ4_decompress_safe_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated
+ * @compressedSize: is the precise full size of the compressed block
+ * @maxDecompressedSize: is the size of 'dest' buffer
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+
+/**
+ * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
+ *	followed by LZ4_decompress_fast_continue()
+ * @source: source address of the compressed data
+ * @dest: output buffer address of the uncompressed data
+ *	which must be already allocated with 'originalSize' bytes
+ * @originalSize: is the original and therefore uncompressed size
+ * @dictStart: pointer to the start of the dictionary in memory
+ * @dictSize: size of dictionary
+ *
+ * These decoding function works the same as
+ * a combination of LZ4_setStreamDecode() followed by
+ * LZ4_decompress_safe_continue()
+ * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
+ *
+ * Return: number of bytes decompressed into destination buffer
+ *	(necessarily <= maxDecompressedSize)
+ *	or a negative result in case of error
+ */
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile
index 8085d04..f7b11327 100644
--- a/lib/lz4/Makefile
+++ b/lib/lz4/Makefile
@@ -1,3 +1,5 @@
+ccflags-y += -O3
+
 obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o
 obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o
 obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..53f313f 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,417 +22,939 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+static const int LZ4_minLength = (MFLIMIT + 1);
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
+
+/*-******************************
+ *	Compression functions
+ ********************************/
+static FORCE_INLINE U32 LZ4_hash4(
+	U32 sequence,
+	tableType_t const tableType)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
-#if LZ4_ARCH64
-	const BYTE * const base = ip;
+	if (tableType == byU16)
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
+	else
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH * 8) - LZ4_HASHLOG));
+}
+
+static FORCE_INLINE U32 LZ4_hash5(
+	U64 sequence,
+	tableType_t const tableType)
+{
+	const U32 hashLog = (tableType == byU16)
+		? LZ4_HASHLOG + 1
+		: LZ4_HASHLOG;
+
+#if LZ4_LITTLE_ENDIAN
+	static const U64 prime5bytes = 889523592379ULL;
+
+	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-	const int base = 0;
+	static const U64 prime8bytes = 11400714785074694791ULL;
+
+	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+}
+
+static FORCE_INLINE U32 LZ4_hashPosition(
+	const void *p,
+	tableType_t const tableType)
+{
+#if LZ4_ARCH64
+	if (tableType == byU32)
+		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+	return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(
+	const BYTE *p,
+	U32 h,
+	void *tableBase,
+	tableType_t const tableType,
+	const BYTE *srcBase)
+{
+	switch (tableType) {
+	case byPtr:
+	{
+		const BYTE **hashTable = (const BYTE **)tableBase;
+
+		hashTable[h] = p;
+		return;
+	}
+	case byU32:
+	{
+		U32 *hashTable = (U32 *) tableBase;
+
+		hashTable[h] = (U32)(p - srcBase);
+		return;
+	}
+	case byU16:
+	{
+		U16 *hashTable = (U16 *) tableBase;
+
+		hashTable[h] = (U16)(p - srcBase);
+		return;
+	}
+	}
+}
+
+static FORCE_INLINE void LZ4_putPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(
+	U32 h,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
+{
+	if (tableType == byPtr) {
+		const BYTE **hashTable = (const BYTE **) tableBase;
+
+		return hashTable[h];
+	}
+
+	if (tableType == byU32) {
+		const U32 * const hashTable = (U32 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+
+	{
+		/* default, to ensure a return */
+		const U16 * const hashTable = (U16 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+}
+
+static FORCE_INLINE const BYTE *LZ4_getPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static FORCE_INLINE int LZ4_compress_generic(
+	LZ4_stream_t_internal * const dictPtr,
+	const char * const source,
+	char * const dest,
+	const int inputSize,
+	const int maxOutputSize,
+	const limitedOutput_directive outputLimited,
+	const tableType_t tableType,
+	const dict_directive dict,
+	const dictIssue_directive dictIssue,
+	const U32 acceleration)
+{
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *base;
+	const BYTE *lowLimit;
+	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+	const BYTE * const dictionary = dictPtr->dictionary;
+	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+	const size_t dictDelta = dictEnd - (const BYTE *)source;
+	const BYTE *anchor = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const olimit = op + maxOutputSize;
+
+	U32 forwardH;
+	size_t refDelta = 0;
+
+	/* Init conditions */
+	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+		/* Unsupported inputSize, too large (or negative) */
+		return 0;
+	}
+
+	switch (dict) {
+	case noDict:
+	default:
+		base = (const BYTE *)source;
+		lowLimit = (const BYTE *)source;
+		break;
+	case withPrefix64k:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source - dictPtr->dictSize;
+		break;
+	case usingExtDict:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source;
+		break;
+	}
+
+	if ((tableType == byU16)
+		&& (inputSize >= LZ4_64Klimit)) {
+		/* Size too large (not within 64K limit) */
+		return 0;
+	}
+
+	if (inputSize < LZ4_minLength) {
+		/* Input too small, no compression (all literals) */
+		goto _last_literals;
+	}
 
 	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
+	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
 	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
+	forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+		{
+			const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;
+
+			do {
+				U32 const h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h,
+					dictPtr->hashTable,
+					tableType, base);
+
+				if (dict == usingExtDict) {
+					if (match < (const BYTE *)source) {
+						refDelta = dictDelta;
+						lowLimit = dictionary;
+					} else {
+						refDelta = 0;
+						lowLimit = (const BYTE *)source;
+				}	 }
+
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+
+				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+					tableType, base);
+			} while (((dictIssue == dictSmall)
+					? (match < lowRefLimit)
+					: 0)
+				|| ((tableType == byU16)
+					? 0
+					: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match + refDelta)
+					!= LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
+		while (((ip > anchor) & (match + refDelta > lowLimit))
+				&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
-			ref--;
+			match--;
 		}
 
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
+		/* Encode Literals */
+		{
+			unsigned const int litLength = (unsigned int)(ip - anchor);
 
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+			token = op++;
+
+			if ((outputLimited) &&
+				/* Check output buffer overflow */
+				(unlikely(op + litLength +
+					(2 + 1 + LASTLITERALS) +
+					(litLength / 255) > olimit)))
+				return 0;
+
+			if (litLength >= RUN_MASK) {
+				int len = (int)litLength - RUN_MASK;
+
+				*token = (RUN_MASK << ML_BITS);
+
+				for (; len >= 255; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength << ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match));
+		op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
+		{
+			unsigned int matchCode;
+
+			if ((dict == usingExtDict)
+				&& (lowLimit == dictionary)) {
+				const BYTE *limit;
+
+				match += refDelta;
+				limit = ip + (dictEnd - match);
+
+				if (limit > matchlimit)
+					limit = matchlimit;
+
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, limit);
+
+				ip += MINMATCH + matchCode;
+
+				if (ip == limit) {
+					unsigned const int more = LZ4_count(ip,
+						(const BYTE *)source,
+						matchlimit);
+
+					matchCode += more;
+					ip += more;
+				}
+			} else {
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, matchlimit);
+				ip += MINMATCH + matchCode;
 			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
+
+			if (outputLimited &&
+				/* Check output buffer overflow */
+				(unlikely(op +
+					(1 + LASTLITERALS) +
+					(matchCode >> 8) > olimit)))
+				return 0;
+
+			if (matchCode >= ML_MASK) {
+				*token += ML_MASK;
+				matchCode -= ML_MASK;
+				LZ4_write32(op, 0xFFFFFFFF);
+
+				while (matchCode >= 4 * 255) {
+					op += 4;
+					LZ4_write32(op, 0xFFFFFFFF);
+					matchCode -= 4 * 255;
+				}
+
+				op += matchCode / 255;
+				*op++ = (BYTE)(matchCode % 255);
+			} else
+				*token += (BYTE)(matchCode);
+		}
+
+		anchor = ip;
 
 		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		if (ip > mflimit)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+		match = LZ4_getPosition(ip, dictPtr->hashTable,
+			tableType, base);
+
+		if (dict == usingExtDict) {
+			if (match < (const BYTE *)source) {
+				refDelta = dictDelta;
+				lowLimit = dictionary;
+			} else {
+				refDelta = 0;
+				lowLimit = (const BYTE *)source;
+			}
+		}
+
+		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+
+		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+			&& (match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
+	{
+		size_t const lastRun = (size_t)(iend - anchor);
 
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+		if ((outputLimited) &&
+			/* Check output buffer overflow */
+			((op - (BYTE *)dest) + lastRun + 1 +
+			((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
+			return 0;
+
+		if (lastRun >= RUN_MASK) {
+			size_t accumulator = lastRun - RUN_MASK;
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRun << ML_BITS);
+		}
+
+		memcpy(op, anchor, lastRun);
+
+		op += lastRun;
+	}
 
 	/* End */
-	return (int)(((char *)op) - dest);
+	return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+static int LZ4_compress_fast_extState(
+	void *state,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+#if LZ4_ARCH64
+	const tableType_t tableType = byU32;
+#else
+	const tableType_t tableType = byPtr;
+#endif
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+	LZ4_resetStream((LZ4_stream_t *)state);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, tableType, noDict,
+				noDictIssue, acceleration);
+	} else {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, tableType, noDict,
+				noDictIssue, acceleration);
+	}
+}
+
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, int acceleration, void *wrkmem)
+{
+	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+		maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem)
+{
+	return LZ4_compress_fast(source, dest, inputSize,
+		maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *	*_destSize() variant
+ ********************************/
+static int LZ4_compress_destSize_generic(
+	LZ4_stream_t_internal * const ctx,
+	const char * const src,
+	char * const dst,
+	int * const srcSizePtr,
+	const int targetDstSize,
+	const tableType_t tableType)
+{
+	const BYTE *ip = (const BYTE *) src;
+	const BYTE *base = (const BYTE *) src;
+	const BYTE *lowLimit = (const BYTE *) src;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + *srcSizePtr;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dst;
+	BYTE * const oend = op + targetDstSize;
+	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+	BYTE * const oMaxMatch = op + targetDstSize
+		- (LASTLITERALS + 1 /* token */);
+	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+	U32 forwardH;
+
+	/* Init conditions */
+	/* Impossible to store anything */
+	if (targetDstSize < 1)
+		return 0;
+	/* Unsupported input size, too large (or negative) */
+	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+		return 0;
+	/* Size too large (not within 64K limit) */
+	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+		return 0;
+	/* Input too small, no compression (all literals) */
+	if (*srcSizePtr < LZ4_minLength)
+		goto _last_literals;
 
 	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
+	*srcSizePtr = 0;
+	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
+		{
+			const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;
+
+			do {
+				U32 h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h, ctx->hashTable,
+					tableType, base);
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h,
+					ctx->hashTable, tableType,
+					base);
+
+			} while (((tableType == byU16)
+				? 0
+				: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match) != LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
+		while ((ip > anchor)
+			&& (match > lowLimit)
+			&& (unlikely(ip[-1] == match[-1]))) {
 			ip--;
-			ref--;
+			match--;
 		}
 
 		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+		{
+			unsigned int litLength = (unsigned int)(ip - anchor);
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+			token = op++;
+			if (op + ((litLength + 240) / 255)
+				+ litLength > oMaxLit) {
+				/* Not enough space for a last match */
+				op--;
+				goto _last_literals;
+			}
+			if (litLength >= RUN_MASK) {
+				unsigned int len = litLength - RUN_MASK;
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength << ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
+		/* Encode MatchLength */
+		{
+			size_t matchLength = LZ4_count(ip + MINMATCH,
+			match + MINMATCH, matchlimit);
 
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
+			if (op + ((matchLength + 240)/255) > oMaxMatch) {
+				/* Match description too long : reduce it */
+				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
 			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
+			ip += MINMATCH + matchLength;
+
+			if (matchLength >= ML_MASK) {
+				*token += ML_MASK;
+				matchLength -= ML_MASK;
+				while (matchLength >= 255) {
+					matchLength -= 255;
+					*op++ = 255;
+				}
+				*op++ = (BYTE)matchLength;
+			} else
+				*token += (BYTE)(matchLength);
 		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
+		anchor = ip;
 
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		/* Test end of block */
+		if (ip > mflimit)
+			break;
+		if (op > oMaxSeq)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
+		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+		if ((match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match) == LZ4_read32(ip))) {
+			token = op++; *token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{
+		size_t lastRunSize = (size_t)(iend - anchor);
+
+		if (op + 1 /* token */
+			+ ((lastRunSize + 240) / 255) /* litLength */
+			+ lastRunSize /* literals */ > oend) {
+			/* adapt lastRunSize to fill 'dst' */
+			lastRunSize	= (oend - op) - 1;
+			lastRunSize -= (lastRunSize + 240) / 255;
+		}
+		ip = anchor + lastRunSize;
+
+		if (lastRunSize >= RUN_MASK) {
+			size_t accumulator = lastRunSize - RUN_MASK;
+
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRunSize<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRunSize);
+		op += lastRunSize;
+	}
+
 	/* End */
-	return (int)(((char *)op) - dest);
+	*srcSizePtr = (int) (((const char *)ip) - src);
+	return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(
+	LZ4_stream_t *state,
+	const char *src,
+	char *dst,
+	int *srcSizePtr,
+	int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+#if LZ4_ARCH64
+	const tableType_t tableType = byU32;
+#else
+	const tableType_t tableType = byPtr;
+#endif
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
+	LZ4_resetStream(state);
+
+	if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
+		/* compression success is guaranteed */
+		return LZ4_compress_fast_extState(
+			state, src, dst, *srcSizePtr,
+			targetDstSize, 1);
+	} else {
+		if (*srcSizePtr < LZ4_64Klimit)
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, byU16);
+		else
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, tableType);
+	}
+}
+
+
+int LZ4_compress_destSize(
+	const char *src,
+	char *dst,
+	int *srcSizePtr,
+	int targetDstSize,
+	void *wrkmem)
+{
+	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+		targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *	Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+	const char *dictionary, int dictSize)
+{
+	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+	const BYTE *p = (const BYTE *)dictionary;
+	const BYTE * const dictEnd = p + dictSize;
+	const BYTE *base;
+
+	if ((dict->initCheck)
+		|| (dict->currentOffset > 1 * GB)) {
+		/* Uninitialized structure, or reuse overflow */
+		LZ4_resetStream(LZ4_dict);
+	}
+
+	if (dictSize < (int)HASH_UNIT) {
+		dict->dictionary = NULL;
+		dict->dictSize = 0;
+		return 0;
+	}
+
+	if ((dictEnd - p) > 64 * KB)
+		p = dictEnd - 64 * KB;
+	dict->currentOffset += 64 * KB;
+	base = p - dict->currentOffset;
+	dict->dictionary = p;
+	dict->dictSize = (U32)(dictEnd - p);
+	dict->currentOffset += dict->dictSize;
+
+	while (p <= dictEnd - HASH_UNIT) {
+		LZ4_putPosition(p, dict->hashTable, byU32, base);
+		p += 3;
+	}
+
+	return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+	const BYTE *src)
+{
+	if ((LZ4_dict->currentOffset > 0x80000000) ||
+		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+		/* address space overflow */
+		/* rescale hash table */
+		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+		int i;
+
+		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+			if (LZ4_dict->hashTable[i] < delta)
+				LZ4_dict->hashTable[i] = 0;
+			else
+				LZ4_dict->hashTable[i] -= delta;
+		}
+		LZ4_dict->currentOffset = 64 * KB;
+		if (LZ4_dict->dictSize > 64 * KB)
+			LZ4_dict->dictSize = 64 * KB;
+		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+	}
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+	if ((U32)dictSize > 64 * KB) {
+		/* useless to define a dictionary > 64 * KB */
+		dictSize = 64 * KB;
+	}
+	if ((U32)dictSize > dict->dictSize)
+		dictSize = dict->dictSize;
+
+	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+	dict->dictionary = (const BYTE *)safeBuffer;
+	dict->dictSize = (U32)dictSize;
+
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+	char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+	const BYTE * const dictEnd = streamPtr->dictionary
+		+ streamPtr->dictSize;
+
+	const BYTE *smallest = (const BYTE *) source;
+
+	if (streamPtr->initCheck) {
+		/* Uninitialized structure detected */
+		return 0;
+	}
+
+	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+		smallest = dictEnd;
+
+	LZ4_renormDictT(streamPtr, smallest);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
 
-	if (out_len < 0)
-		goto exit;
+	/* Check overlapping input/dictionary space */
+	{
+		const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+		if ((sourceEnd > streamPtr->dictionary)
+			&& (sourceEnd < dictEnd)) {
+			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+			if (streamPtr->dictSize > 64 * KB)
+				streamPtr->dictSize = 64 * KB;
+			if (streamPtr->dictSize < 4)
+				streamPtr->dictSize = 0;
+			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+		}
+	}
 
-	*dst_len = out_len;
+	/* prefix mode : source data follows dictionary */
+	if (dictEnd == (const BYTE *)source) {
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, noDictIssue, acceleration);
+		}
+		streamPtr->dictSize += (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
 
-	return 0;
-exit:
-	return ret;
+	/* external dictionary mode */
+	{
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, noDictIssue, acceleration);
+		}
+		streamPtr->dictionary = (const BYTE *)source;
+		streamPtr->dictSize = (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+}
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem) {
+	*dst_len = LZ4_compress_default(src, dst, src_len,
+		*dst_len, wrkmem);
+
+	/*
+	 * Prior lz4_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_fast/default
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!*dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4_compress);
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..b5e00a1 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,529 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *	Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static FORCE_INLINE int LZ4_decompress_generic(
+	 const char * const source,
+	 char * const dest,
+	 int inputSize,
+		/*
+		 * If endOnInput == endOnInputSize,
+		 * this value is the max size of Output Buffer.
+		 */
+	 int outputSize,
+	 /* endOnOutputSize, endOnInputSize */
+	 int endOnInput,
+	 /* full, partial */
+	 int partialDecoding,
+	 /* only used if partialDecoding == partial */
+	 int targetOutputSize,
+	 /* noDict, withPrefix64k, usingExtDict */
+	 int dict,
+	 /* == dest when no prefix */
+	 const BYTE * const lowPrefix,
+	 /* only if dict == usingExtDict */
+	 const BYTE * const dictStart,
+	 /* note : = 0 if noDict */
+	 const size_t dictSize
+	 )
 {
+	/* Local Variables */
 	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
+	const BYTE * const iend = ip + inputSize;
+
 	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
+	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
-	unsigned token;
-	size_t length;
+	BYTE *oexit = op + targetOutputSize;
+	const BYTE * const lowLimit = lowPrefix - dictSize;
+
+	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+	const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
+	const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
 
+	const int safeDecode = (endOnInput == endOnInputSize);
+	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
+
+	/* Special cases */
+	/* targetOutputSize too high => decode everything */
+	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+		oexit = oend - MFLIMIT;
+
+	/* Empty output buffer */
+	if ((endOnInput) && (unlikely(outputSize == 0)))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	if ((!endOnInput) && (unlikely(outputSize == 0)))
+		return (*ip == 0 ? 1 : -1);
+
+	/* Main Loop : decode sequences */
 	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
 		if (length == RUN_MASK) {
-			size_t len;
+			unsigned int s;
 
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
+			do {
+				s = *ip++;
+				length += s;
+			} while (likely(endOnInput
+				? ip < iend - RUN_MASK
+				: 1) & (s == 255));
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
 				goto _output_error;
-			length += len;
+			}
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
 		}
 
 		/* copy literals */
 		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
+		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
+			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+			if (partialDecoding) {
+				if (cpy > oend) {
+					/*
+					 * Error :
+					 * write attempt beyond end of output buffer
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& (ip + length > iend)) {
+					/*
+					 * Error :
+					 * read attempt beyond
+					 * end of input buffer
+					 */
+					goto _output_error;
+				}
+			} else {
+				if ((!endOnInput)
+					&& (cpy != oend)) {
+					/*
+					 * Error :
+					 * block decoding must
+					 * stop exactly there
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& ((ip + length != iend)
+					|| (cpy > oend))) {
+					/*
+					 * Error :
+					 * input must be consumed
+					 */
+					goto _output_error;
+				}
+			}
 
 			memcpy(op, ip, length);
 			ip += length;
-			break; /* EOF */
+			op += length;
+			/* Necessarily EOF, due to parsing restrictions */
+			break;
 		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
+
+		LZ4_wildCopy(op, ip, cpy);
+		ip += length;
 		op = cpy;
 
 		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
+		offset = LZ4_readLE16(ip);
 		ip += 2;
+		match = op - offset;
 
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
+		if ((checkOffset) && (unlikely(match < lowLimit))) {
+			/* Error : offset outside buffers */
 			goto _output_error;
+		}
+
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
 
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
+			unsigned int s;
+
+			do {
+				s = *ip++;
+
+				if ((endOnInput) && (ip > iend - LASTLITERALS))
+					goto _output_error;
+
+				length += s;
+			} while (s == 255);
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
 				goto _output_error;
-			length += *ip++;
+			}
 		}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
+		length += MINMATCH;
+
+		/* check external dictionary */
+		if ((dict == usingExtDict) && (match < lowPrefix)) {
+			if (unlikely(op + length > oend - LASTLITERALS)) {
+				/* doesn't respect parsing restriction */
+				goto _output_error;
+			}
+
+			if (length <= (size_t)(lowPrefix - match)) {
+				/*
+				 * match can be copied as a single segment
+				 * from external dictionary
+				 */
+				memmove(op, dictEnd - (lowPrefix - match),
+					length);
+				op += length;
+			} else {
+				/*
+				 * match encompass external
+				 * dictionary and current block
+				 */
+				size_t const copySize = (size_t)(lowPrefix - match);
+				size_t const restSize = length - copySize;
+
+				memcpy(op, dictEnd - copySize, copySize);
+				op += copySize;
+
+				if (restSize > (size_t)(op - lowPrefix)) {
+					/* overlap copy */
+					BYTE * const endOfMatch = op + restSize;
+					const BYTE *copyFrom = lowPrefix;
+
+					while (op < endOfMatch)
+						*op++ = *copyFrom++;
+				} else {
+					memcpy(op, lowPrefix, restSize);
+					op += restSize;
+				}
+			}
+
+			continue;
+		}
+
+		/* copy match within block */
+		cpy = op + length;
+
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
 		} else {
-			LZ4_COPYSTEP(ref, op);
+			LZ4_copy8(op, match);
+			match += 8;
 		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
 
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (cpy > oend - LASTLITERALS) {
+				/*
+				 * Error : last LASTLITERALS bytes
+				 * must be literals (uncompressed)
+				 */
 				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+			}
+
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
+
 			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-		LZ4_SECURECOPY(ref, op, cpy);
+
 		op = cpy; /* correction */
 	}
+
 	/* end of decoding */
-	return (int) (((char *)ip) - source);
+	if (endOnInput) {
+		/* Nb of output bytes decoded */
+		return (int) (((char *)op) - dest);
+	} else {
+		/* Nb of input bytes read */
+		return (int) (((const char *)ip) - source);
+	}
 
-	/* write overflow error detected */
+	/* Overflow error detected */
 _output_error:
 	return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, full, 0,
+		noDict, (BYTE *)dest, NULL, 0);
+}
 
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, partial,
+		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
 
-	/* Main Loop */
-	while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+	return LZ4_decompress_generic(source, dest, 0, originalSize,
+		endOnOutputSize, full, 0, withPrefix64k,
+		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
 
-		unsigned token;
-		size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+	lz4sd->prefixSize = (size_t) dictSize;
+	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+	lz4sd->externalDict = NULL;
+	lz4sd->extDictSize	= 0;
+	return 1;
+}
 
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize,
+			maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict,
+			lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += result;
+		lz4sd->prefixEnd	+= result;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = result;
+		lz4sd->prefixEnd	= (BYTE *)dest + result;
+	}
 
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
+	return result;
+}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict,
+			lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict, lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += originalSize;
+		lz4sd->prefixEnd	+= originalSize;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = originalSize;
+		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
 	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
 
-	/* write overflow error detected */
-_output_error:
-	return -1;
+	return result;
 }
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
+	char *dest, int compressedSize, int maxOutputSize, int safe,
+	const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int input_len = 0;
-
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+	if (dictSize == 0)
+		return LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize, safe, full, 0,
+			noDict, (BYTE *)dest, NULL, 0);
+	if (dictStart + dictSize == dest) {
+		if (dictSize >= (int)(64 * KB - 1))
+			return LZ4_decompress_generic(source, dest,
+				compressedSize, maxOutputSize, safe, full, 0,
+				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+		return LZ4_decompress_generic(source, dest, compressedSize,
+			maxOutputSize, safe, full, 0, noDict,
+			(BYTE *)dest - dictSize, NULL, 0);
+	}
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxOutputSize, safe, full, 0, usingExtDict,
+		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-	return 0;
-exit_0:
-	return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxOutputSize,
+	const char *dictStart, int dictSize)
+{
+	return LZ4_decompress_usingDict_generic(source, dest,
+		compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+	return LZ4_decompress_usingDict_generic(source, dest, 0,
+		originalSize, 0, dictStart, dictSize);
 }
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_decompress_unknownoutputsize(const unsigned char *src,
+	size_t src_len, unsigned char *dest, size_t *dest_len) {
+	*dest_len = LZ4_decompress_safe(src, dest,
+		src_len, *dest_len);
+
+	/*
+	 * Prior lz4_decompress_unknownoutputsize will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_safe returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if (src_len > 0)
+		return 0;
+	else
+		return -1;
+}
+
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len) {
+	*src_len = LZ4_decompress_fast(src, dest, actual_dest_len);
+
+	/*
+	 * Prior lz4_decompress will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_fast returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if (*src_len > 0)
+		return 0;
+	else
+		return -1;
+}
+
 #ifndef STATIC
+EXPORT_SYMBOL(LZ4_decompress_safe);
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
+EXPORT_SYMBOL(LZ4_decompress_fast);
+EXPORT_SYMBOL(LZ4_setStreamDecode);
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
 EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
+EXPORT_SYMBOL(lz4_decompress);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
+MODULE_DESCRIPTION("LZ4 decompressor");
 #endif
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..00a0b58 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,227 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-/*
- * Detects 64 bits mode
- */
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+#define FORCE_INLINE __always_inline
+
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+/*-************************************
+ *	Architecture specifics
+ **************************************/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
 #endif
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6
+
+#define HASH_UNIT sizeof(size_t)
+
+#define KB (1 << 10)
+#define MB (1 << 20)
+#define GB (1U << 30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U << ML_BITS) - 1)
 #define RUN_BITS (8 - ML_BITS)
 #define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+static FORCE_INLINE U16 LZ4_read16(const void *ptr)
+{
+	return get_unaligned((const U16 *)ptr);
+}
+
+static FORCE_INLINE U32 LZ4_read32(const void *ptr)
+{
+	return get_unaligned((const U32 *)ptr);
+}
+
+static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
+{
+	return get_unaligned((const size_t *)ptr);
+}
+
+static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
+{
+	put_unaligned(value, (U16 *)memPtr);
+}
+
+static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
+{
+	put_unaligned(value, (U32 *)memPtr);
+}
+
+static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
+{
+	return get_unaligned_le16(memPtr);
+}
+
+static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
+{
+	return put_unaligned_le16(value, memPtr);
+}
+
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
+{
+#if LZ4_ARCH64
+	U64 a = get_unaligned((const U64 *)src);
+
+	put_unaligned(a, (U64 *)dst);
+#else
+	U32 a = get_unaligned((const U32 *)src);
+	U32 b = get_unaligned((const U32 *)src + 1);
+
+	put_unaligned(a, (U32 *)dst);
+	put_unaligned(b, (U32 *)dst + 1);
+#endif
+}
+
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+	const void *srcPtr, void *dstEnd)
+{
+	BYTE *d = (BYTE *)dstPtr;
+	const BYTE *s = (const BYTE *)srcPtr;
+	BYTE *const e = (BYTE *)dstEnd;
+
+	do {
+		LZ4_copy8(d, s);
+		d += 8;
+		s += 8;
+	} while (d < e);
+}
+
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+	return __ffs(val) >> 3;
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+	return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
+#endif
+}
+
+static FORCE_INLINE unsigned int LZ4_count(
+	const BYTE *pIn,
+	const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+		size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+
+		pIn += LZ4_NbCommonBytes(diff);
+
+		return (unsigned int)(pIn - pStart);
+	}
+
+#if LZ4_ARCH64
+	if ((pIn < (pInLimit - 3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4;
+		pMatch += 4;
+	}
 #endif
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+	if ((pIn < (pInLimit - 1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2;
+		pMatch += 2;
+	}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+	return (unsigned int)(pIn - pStart);
+}
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
-#else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
-#endif
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-#endif
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..e1fbf15 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *	* Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,361 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *	Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
+
+/* *************************************
+ *	Local Constants and types
+ ***************************************/
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
+	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
+{
+	return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *	HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+	hc4->nextToUpdate = 64 * KB;
+	hc4->base = start - 64 * KB;
+	hc4->end = start;
+	hc4->dictBase = start - 64 * KB;
+	hc4->dictLimit = 64 * KB;
+	hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+	const BYTE *ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const hashTable	= hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
+	U32 const target = (U32)(ip - base);
+	U32 idx = hc4->nextToUpdate;
+
+	while (idx < target) {
+		U32 const h = LZ4HC_hashPtr(base + idx);
+		size_t delta = idx - hashTable[h];
 
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
 		if (delta > MAX_DISTANCE)
 			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
-{
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
+		DELTANEXTU16(idx) = (U16)delta;
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
+		hashTable[h] = idx;
+		idx++;
 	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+
+	hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
+	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+	const BYTE *ip,
+	const BYTE * const iLimit,
+	const BYTE **matchpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
+	const BYTE * const dictBase = hc4->dictBase;
+	const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = 0;
 
 	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE * const match = base + matchIndex;
+
+			if (*(match + ml) == *(ip + ml)
+				&& (LZ4_read32(match) == LZ4_read32(ip))) {
+				size_t const mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, iLimit) + MINMATCH;
 
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
 				if (mlt > ml) {
 					ml = mlt;
-					*matchpos = ref;
+					*matchpos = match;
+				}
+			}
+		} else {
+			const BYTE * const match = dictBase + matchIndex;
+
+			if (LZ4_read32(match) == LZ4_read32(ip)) {
+				size_t mlt;
+				const BYTE *vLimit = ip
+					+ (dictLimit - matchIndex);
+
+				if (vLimit > iLimit)
+					vLimit = iLimit;
+				mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, vLimit) + MINMATCH;
+				if ((ip + mlt == vLimit)
+					&& (vLimit < iLimit))
+					mlt += LZ4_count(ip + mlt,
+						base + dictLimit,
+						iLimit);
+				if (mlt > ml) {
+					/* virtual matchpos */
+					ml = mlt;
+					*matchpos = base + matchIndex;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
 
 	return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
+	LZ4HC_CCtx_internal *hc4,
+	const BYTE * const ip,
+	const BYTE * const iLowLimit,
+	const BYTE * const iHighLimit,
+	int longest,
+	const BYTE **matchpos,
+	const BYTE **startpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
+	const U32 dictLimit = hc4->dictLimit;
+	const BYTE * const lowPrefixPtr = base + dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	const BYTE * const dictBase = hc4->dictBase;
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	int delta = (int)(ip - iLowLimit);
 
 	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE *matchPtr = base + matchIndex;
+
+			if (*(iLowLimit + longest)
+				== *(matchPtr - delta + longest)) {
+				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+					int mlt = MINMATCH + LZ4_count(
+						ip + MINMATCH,
+						matchPtr + MINMATCH,
+						iHighLimit);
+					int back = 0;
+
+					while ((ip + back > iLowLimit)
+						&& (matchPtr + back > lowPrefixPtr)
+						&& (ip[back - 1] == matchPtr[back - 1]))
+						back--;
+
+					mlt -= back;
+
+					if (mlt > longest) {
+						longest = (int)mlt;
+						*matchpos = matchPtr + back;
+						*startpos = ip + back;
 					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
 				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
-				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
+			}
+		} else {
+			const BYTE * const matchPtr = dictBase + matchIndex;
+
+			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+				size_t mlt;
+				int back = 0;
+				const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+				if (vLimit > iHighLimit)
+					vLimit = iHighLimit;
+
+				mlt = LZ4_count(ip + MINMATCH,
+					matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+					mlt += LZ4_count(ip + mlt, base + dictLimit,
+						iHighLimit);
+				while ((ip + back > iLowLimit)
+					&& (matchIndex + back > lowLimit)
+					&& (ip[back - 1] == matchPtr[back - 1]))
+					back--;
+
+				mlt -= back;
+
+				if ((int)mlt > longest) {
+					longest = (int)mlt;
+					*matchpos = base + matchIndex + back;
+					*startpos = ip + back;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
+
 	return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static FORCE_INLINE int LZ4HC_encodeSequence(
+	const BYTE **ip,
+	BYTE **op,
+	const BYTE **anchor,
+	int matchLength,
+	const BYTE * const match,
+	limitedOutput_directive limitedOutputBuffer,
+	BYTE *oend)
 {
-	int length, len;
-	u8 *token;
+	int length;
+	BYTE *token;
 
 	/* Encode Literal length */
 	length = (int)(*ip - *anchor);
 	token = (*op)++;
+
+	if ((limitedOutputBuffer)
+		&& ((*op + (length>>8)
+			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
 	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
+		int len;
+
+		*token = (RUN_MASK<<ML_BITS);
 		len = length - RUN_MASK;
 		for (; len > 254 ; len -= 255)
 			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
+		*(*op)++ = (BYTE)len;
 	} else
-		*token = (length << ML_BITS);
+		*token = (BYTE)(length<<ML_BITS);
 
 	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
+	LZ4_wildCopy(*op, *anchor, (*op) + length);
+	*op += length;
 
 	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+	LZ4_writeLE16(*op, (U16)(*ip - match));
+	*op += 2;
 
 	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
+	length = (int)(matchLength - MINMATCH);
+
+	if ((limitedOutputBuffer)
+		&& (*op + (length>>8)
+			+ (1 + LASTLITERALS) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
+
+	if (length >= (int)ML_MASK) {
 		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
+		length -= ML_MASK;
+
+		for (; length > 509 ; length -= 510) {
 			*(*op)++ = 255;
 			*(*op)++ = 255;
 		}
-		if (len > 254) {
-			len -= 255;
+
+		if (length > 254) {
+			length -= 255;
 			*(*op)++ = 255;
 		}
-		*(*op)++ = (u8)len;
+
+		*(*op)++ = (BYTE)length;
 	} else
-		*token += len;
+		*token += (BYTE)(length);
 
 	/* Prepare next loop */
-	*ip += ml;
+	*ip += matchLength;
 	*anchor = *ip;
 
 	return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+static int LZ4HC_compress_generic(
+	LZ4HC_CCtx_internal *const ctx,
+	const char * const source,
+	char * const dest,
+	int const inputSize,
+	int const maxOutputSize,
+	int compressionLevel,
+	limitedOutput_directive limit
+	)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-	u8 *op = (u8 *)dest;
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + maxOutputSize;
 
+	unsigned int maxNbAttempts;
 	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
+	const BYTE *ref = NULL;
+	const BYTE *start2 = NULL;
+	const BYTE *ref2 = NULL;
+	const BYTE *start3 = NULL;
+	const BYTE *ref3 = NULL;
+	const BYTE *start0;
+	const BYTE *ref0;
+
+	/* init */
+	if (compressionLevel > LZ4HC_MAX_CLEVEL)
+		compressionLevel = LZ4HC_MAX_CLEVEL;
+	if (compressionLevel < 1)
+		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+	maxNbAttempts = 1 << (compressionLevel - 1);
+	ctx->end += inputSize;
 
 	ip++;
 
 	/* Main Loop */
 	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+			matchlimit, (&ref), maxNbAttempts);
 		if (!ml) {
 			ip++;
 			continue;
@@ -351,51 +387,59 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		start0 = ip;
 		ref0 = ref;
 		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+		if (ip + ml < mflimit)
+			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				ip + ml - 2, ip + 0,
+				matchlimit, ml, &ref2,
+				&start2, maxNbAttempts);
 		else
 			ml2 = ml;
-		/* No better match */
+
 		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			/* No better match */
+			if (LZ4HC_encodeSequence(&ip, &op,
+				&anchor, ml, ref, limit, oend))
+				return 0;
 			continue;
 		}
 
 		if (start0 < ip) {
-			/* empirical */
 			if (start2 < ip + ml0) {
+				/* empirical */
 				ip = start0;
 				ref = ref0;
 				ml = ml0;
 			}
 		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
+
+		/* Here, start0 == ip */
 		if ((start2 - ip) < 3) {
+			/* First Match too small : removed */
 			ml = ml2;
 			ip = start2;
 			ref = ref2;
-			goto _search2;
+			goto _Search2;
 		}
 
-_search3:
+_Search3:
 		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
+		* Currently we have :
+		* ml2 > ml1, and
+		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
+		*/
 		if ((start2 - ip) < OPTIMAL_ML) {
 			int correction;
 			int new_ml = ml;
+
 			if (new_ml > OPTIMAL_ML)
 				new_ml = OPTIMAL_ML;
 			if (ip + new_ml > start2 + ml2 - MINMATCH)
 				new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+
 			correction = new_ml - (int)(start2 - ip);
+
 			if (correction > 0) {
 				start2 += correction;
 				ref2 += correction;
@@ -403,39 +447,44 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			}
 		}
 		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
+		 * Now, we have start2 = ip + new_ml,
+		 * with new_ml = min(ml, OPTIMAL_ML = 18)
 		 */
+
 		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
+			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				start2 + ml2 - 3, start2,
+				matchlimit, ml2, &ref3, &start3,
+				maxNbAttempts);
 		else
 			ml3 = ml2;
 
-		/* No better match : 2 sequences to encode */
 		if (ml3 == ml2) {
+			/* No better match : 2 sequences to encode */
 			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
+			if (start2 < ip + ml)
 				ml = (int)(start2 - ip);
-
 			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml, ref, limit, oend))
+				return 0;
 			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml2, ref2, limit, oend))
+				return 0;
 			continue;
 		}
 
-		/* Not enough space for match 2 : remove it */
 		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
+			/* Not enough space for match 2 : remove it */
 			if (start3 >= (ip + ml)) {
+				/* can write Seq1 immediately
+				 * ==> Seq2 is removed,
+				 * so Seq3 becomes Seq1
+				 */
 				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
+					int correction = (int)(ip + ml - start2);
+
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -446,35 +495,38 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 					}
 				}
 
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
+				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+					ml, ref, limit, oend))
+					return 0;
+				ip = start3;
 				ref = ref3;
-				ml  = ml3;
+				ml = ml3;
 
 				start0 = start2;
 				ref0 = ref2;
 				ml0 = ml2;
-				goto _search2;
+				goto _Search2;
 			}
 
 			start2 = start3;
 			ref2 = ref3;
 			ml2 = ml3;
-			goto _search3;
+			goto _Search3;
 		}
 
 		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
+		* OK, now we have 3 ascending matches;
+		* let's write at least the first one
+		* ip & ref are known; Now for ml
+		*/
 		if (start2 < ip + ml) {
 			if ((start2 - ip) < (int)ML_MASK) {
 				int correction;
+
 				if (ml > OPTIMAL_ML)
 					ml = OPTIMAL_ML;
 				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
+					ml = (int)(start2 - ip) + ml2 - MINMATCH;
 				correction = ml - (int)(start2 - ip);
 				if (correction > 0) {
 					start2 += correction;
@@ -484,7 +536,9 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			} else
 				ml = (int)(start2 - ip);
 		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+			ref, limit, oend))
+			return 0;
 
 		ip = start2;
 		ref = ref2;
@@ -494,46 +548,245 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		ref2 = ref3;
 		ml2 = ml3;
 
-		goto _search3;
+		goto _Search3;
 	}
 
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{
+		int lastRun = (int)(iend - anchor);
+
+		if ((limit)
+			&& (((char *)op - dest) + lastRun + 1
+				+ ((lastRun + 255 - RUN_MASK)/255)
+					> (U32)maxOutputSize)) {
+			/* Check output limit */
+			return 0;
+		}
+		if (lastRun >= (int)RUN_MASK) {
+			*op++ = (RUN_MASK<<ML_BITS);
+			lastRun -= RUN_MASK;
+			for (; lastRun > 254 ; lastRun -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) lastRun;
+		} else
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		memcpy(op, anchor, iend - anchor);
+		op += iend - anchor;
+	}
+
 	/* End */
 	return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_HC_extStateHC(
+	void *state,
+	const char *src,
+	char *dst,
+	int srcSize,
+	int maxDstSize,
+	int compressionLevel)
 {
-	int ret = -1;
-	int out_len = 0;
+	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+		/* Error : state is not aligned
+		 * for pointers (32 or 64 bits)
+		 */
+		return 0;
+	}
 
-	if (out_len < 0)
-		goto exit;
+	LZ4HC_init(ctx, (const BYTE *)src);
 
-	*dst_len = out_len;
-	return 0;
+	if (maxDstSize < LZ4_compressBound(srcSize))
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, limitedOutput);
+	else
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+	int maxDstSize, int compressionLevel, void *wrkmem)
+{
+	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+		srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/**************************************
+ *	Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+	LZ4_streamHCPtr->internal_donotuse.base = NULL;
+	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *dictionary,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	if (dictSize > 64 * KB) {
+		dictionary += dictSize - 64 * KB;
+		dictSize = 64 * KB;
+	}
+	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+	if (dictSize >= 4)
+		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+	ctxPtr->end = (const BYTE *)dictionary + dictSize;
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+	LZ4HC_CCtx_internal *ctxPtr,
+	const BYTE *newBlock)
+{
+	if (ctxPtr->end >= ctxPtr->base + 4) {
+		/* Referencing remaining dictionary content */
+		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+	}
+
+	/*
+	 * Only one memory segment for extDict,
+	 * so any previous extDict is lost at this stage
+	 */
+	ctxPtr->lowLimit	= ctxPtr->dictLimit;
+	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+	ctxPtr->dictBase	= ctxPtr->base;
+	ctxPtr->base = newBlock - ctxPtr->dictLimit;
+	ctxPtr->end	= newBlock;
+	/* match referencing will resume from there */
+	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	limitedOutput_directive limit)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	/* auto - init if forgotten */
+	if (ctxPtr->base == NULL)
+		LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+	/* Check overflow */
+	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+			- ctxPtr->dictLimit;
+		if (dictSize > 64 * KB)
+			dictSize = 64 * KB;
+		LZ4_loadDictHC(LZ4_streamHCPtr,
+			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+	}
+
+	/* Check if blocks follow each other */
+	if ((const BYTE *)source != ctxPtr->end)
+		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+	/* Check overlapping input/dictionary space */
+	{
+		const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+		if ((sourceEnd > dictBegin)
+			&& ((const BYTE *)source < dictEnd)) {
+			if (sourceEnd > dictEnd)
+				sourceEnd = dictEnd;
+			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+				ctxPtr->lowLimit = ctxPtr->dictLimit;
+		}
+	}
+
+	return LZ4HC_compress_generic(ctxPtr, source, dest,
+		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize)
+{
+	if (maxOutputSize < LZ4_compressBound(inputSize))
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, limitedOutput);
+	else
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	char *safeBuffer,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+	int const prefixSize = (int)(streamPtr->end
+		- (streamPtr->base + streamPtr->dictLimit));
+
+	if (dictSize > 64 * KB)
+		dictSize = 64 * KB;
+	if (dictSize < 4)
+		dictSize = 0;
+	if (dictSize > prefixSize)
+		dictSize = prefixSize;
+
+	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
 
-exit:
-	return ret;
+	{
+		U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+		streamPtr->base = streamPtr->end - endIndex;
+		streamPtr->dictLimit = endIndex - dictSize;
+		streamPtr->lowLimit = endIndex - dictSize;
+
+		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+			streamPtr->nextToUpdate = streamPtr->dictLimit;
+	}
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDictHC);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+	unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	*dst_len = LZ4_compress_HC(src, dst, src_len,
+		*dst_len, LZ4HC_DEFAULT_CLEVEL, wrkmem);
+
+	/*
+	 * Prior lz4hc_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_HC
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!*dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4hc_compress);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v8 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version
  2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 1/5] lib: " Sven Schmidt
@ 2017-02-15 18:16   ` Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 3/5] crypto: Change LZ4 modules " Sven Schmidt
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-15 18:16 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

Update the unlz4 wrapper to work with the updated LZ4 kernel module
version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v8 3/5] crypto: Change LZ4 modules to work with new LZ4 module version
  2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 1/5] lib: " Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
@ 2017-02-15 18:16   ` Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-15 18:16 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

Update the crypto modules using LZ4 compression as well as the test cases
in testmgr.h to work with the new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c     |  23 ++++-----
 crypto/lz4hc.c   |  23 ++++-----
 crypto/testmgr.h | 142 +++++++++++++++++++++++++++++++++++++++----------------
 3 files changed, 120 insertions(+), 68 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..71eff9b 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_default(src, dst,
+		slen, *dlen, ctx);
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
-		return -EINVAL;
+	if (out_len < 0)
+		return out_len;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return 0;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..03a34a8 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_HC(src, dst, slen,
+		*dlen, LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, *dlen);
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
-		return -EINVAL;
+	if (out_len < 0)
+		return out_len;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return 0;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 9b656be..98d4be0 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -34498,31 +34498,62 @@ static struct hash_testvec bfin_crc_tv_template[] = {
 
 static struct comp_testvec lz4_comp_tv_template[] = {
 	{
-		.inlen	= 70,
-		.outlen	= 45,
-		.input	= "Join us now and share the software "
-			  "Join us now and share the software ",
-		.output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
+		.inlen	= 255,
+		.outlen	= 218,
+		.input	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
+		.output	= "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+			  "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
 	},
 };
 
 static struct comp_testvec lz4_decomp_tv_template[] = {
 	{
-		.inlen	= 45,
-		.outlen	= 70,
-		.input  = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
-		.output	= "Join us now and share the software "
-			  "Join us now and share the software ",
+		.inlen	= 218,
+		.outlen	= 255,
+		.input	= "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x83"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x3f\x00\x01\x85\x00"
+			  "\x90\x20\x73\x79\x73\x74\x65\x6d\x73\x2e",
+		.output	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
 	},
 };
 
@@ -34531,31 +34562,62 @@ static struct comp_testvec lz4_decomp_tv_template[] = {
 
 static struct comp_testvec lz4hc_comp_tv_template[] = {
 	{
-		.inlen	= 70,
-		.outlen	= 45,
-		.input	= "Join us now and share the software "
-			  "Join us now and share the software ",
-		.output = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
+		.inlen	= 255,
+		.outlen	= 216,
+		.input	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
+		.output = "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+			  "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+
 	},
 };
 
 static struct comp_testvec lz4hc_decomp_tv_template[] = {
 	{
-		.inlen	= 45,
-		.outlen	= 70,
-		.input  = "\xf0\x10\x4a\x6f\x69\x6e\x20\x75"
-			  "\x73\x20\x6e\x6f\x77\x20\x61\x6e"
-			  "\x64\x20\x73\x68\x61\x72\x65\x20"
-			  "\x74\x68\x65\x20\x73\x6f\x66\x74"
-			  "\x77\x0d\x00\x0f\x23\x00\x0b\x50"
-			  "\x77\x61\x72\x65\x20",
-		.output	= "Join us now and share the software "
-			  "Join us now and share the software ",
+		.inlen	= 216,
+		.outlen	= 255,
+		.input	= "\xf9\x21\x4c\x5a\x34\x20\x69\x73\x20\x6c\x6f\x73\x73"
+			  "\x6c\x65\x73\x73\x20\x63\x6f\x6d\x70\x72\x65\x73\x73"
+			  "\x69\x6f\x6e\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d"
+			  "\x2c\x20\x70\x72\x6f\x76\x69\x64\x69\x6e\x67\x21\x00"
+			  "\xf0\x21\x73\x70\x65\x65\x64\x20\x61\x74\x20\x34\x30"
+			  "\x30\x20\x4d\x42\x2f\x73\x20\x70\x65\x72\x20\x63\x6f"
+			  "\x72\x65\x2c\x20\x73\x63\x61\x6c\x61\x62\x6c\x65\x20"
+			  "\x77\x69\x74\x68\x20\x6d\x75\x6c\x74\x69\x2d\x1a\x00"
+			  "\xf0\x00\x73\x20\x43\x50\x55\x2e\x20\x49\x74\x20\x66"
+			  "\x65\x61\x74\x75\x11\x00\xf2\x0b\x61\x6e\x20\x65\x78"
+			  "\x74\x72\x65\x6d\x65\x6c\x79\x20\x66\x61\x73\x74\x20"
+			  "\x64\x65\x63\x6f\x64\x65\x72\x2c\x3d\x00\x02\x67\x00"
+			  "\x22\x69\x6e\x46\x00\x5a\x70\x6c\x65\x20\x47\x6c\x00"
+			  "\xf0\x00\x74\x79\x70\x69\x63\x61\x6c\x6c\x79\x20\x72"
+			  "\x65\x61\x63\x68\xa7\x00\x33\x52\x41\x4d\x38\x00\x97"
+			  "\x6c\x69\x6d\x69\x74\x73\x20\x6f\x6e\x85\x00\x90\x20"
+			  "\x73\x79\x73\x74\x65\x6d\x73\x2e",
+		.output	= "LZ4 is lossless compression algorithm, providing"
+			 " compression speed at 400 MB/s per core, scalable "
+			 "with multi-cores CPU. It features an extremely fast "
+			 "decoder, with speed in multiple GB/s per core, "
+			 "typically reaching RAM speed limits on multi-core "
+			 "systems.",
 	},
 };
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v8 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version
  2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
                     ` (2 preceding siblings ...)
  2017-02-15 18:16   ` [PATCH v8 3/5] crypto: Change LZ4 modules " Sven Schmidt
@ 2017-02-15 18:16   ` Sven Schmidt
  2017-02-15 18:16   ` [PATCH v8 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-15 18:16 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

Update fs/pstore and fs/squashfs to use the updated functions from the new
LZ4 module.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 fs/pstore/platform.c      | 22 +++++++++++++---------
 fs/squashfs/lz4_wrapper.c | 12 ++++++------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 729677e..efab7b6 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -342,31 +342,35 @@ static int compress_lz4(const void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_compress(in, inlen, out, &outlen, workspace);
-	if (ret) {
-		pr_err("lz4_compress error, ret = %d!\n", ret);
+	ret = LZ4_compress_default(in, out, inlen, outlen, workspace);
+	if (!ret) {
+		pr_err("LZ4_compress_default error; compression failed!\n");
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static int decompress_lz4(void *in, void *out, size_t inlen, size_t outlen)
 {
 	int ret;
 
-	ret = lz4_decompress_unknownoutputsize(in, inlen, out, &outlen);
-	if (ret) {
-		pr_err("lz4_decompress error, ret = %d!\n", ret);
+	ret = LZ4_decompress_safe(in, out, inlen, outlen);
+	if (ret < 0) {
+		/*
+		 * LZ4_decompress_safe will return an error code
+		 * (< 0) if decompression failed
+		 */
+		pr_err("LZ4_decompress_safe error, ret = %d!\n", ret);
 		return -EIO;
 	}
 
-	return outlen;
+	return ret;
 }
 
 static void allocate_lz4(void)
 {
-	big_oops_buf_sz = lz4_compressbound(psinfo->bufsize);
+	big_oops_buf_sz = LZ4_compressBound(psinfo->bufsize);
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
 		workspace = kmalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index ff4468b..95da653 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -97,7 +97,6 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t dest_len = output->length;
 
 	for (i = 0; i < b; i++) {
 		avail = min(bytes, msblk->devblksize - offset);
@@ -108,12 +107,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		put_bh(bh[i]);
 	}
 
-	res = lz4_decompress_unknownoutputsize(stream->input, length,
-					stream->output, &dest_len);
-	if (res)
+	res = LZ4_decompress_safe(stream->input, stream->output,
+		length, output->length);
+
+	if (res < 0)
 		return -EIO;
 
-	bytes = dest_len;
+	bytes = res;
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
@@ -128,7 +128,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	}
 	squashfs_finish_page(output);
 
-	return dest_len;
+	return res;
 }
 
 const struct squashfs_decompressor squashfs_lz4_comp_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

* [PATCH v8 5/5] lib/lz4: Remove back-compat wrappers
  2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
                     ` (3 preceding siblings ...)
  2017-02-15 18:16   ` [PATCH v8 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
@ 2017-02-15 18:16   ` Sven Schmidt
  4 siblings, 0 replies; 104+ messages in thread
From: Sven Schmidt @ 2017-02-15 18:16 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, Sven Schmidt

Remove the functions introduced as wrappers for providing backwards
compatibility to the prior LZ4 version.  They're not needed anymore since
there's no callers left.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      | 69 ------------------------------------------------
 lib/lz4/lz4_compress.c   | 22 ---------------
 lib/lz4/lz4_decompress.c | 42 -----------------------------
 lib/lz4/lz4hc_compress.c | 23 ----------------
 4 files changed, 156 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 1b0f8ca..394e3d9 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -173,18 +173,6 @@ static inline int LZ4_compressBound(size_t isize)
 }
 
 /**
- * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound
- * @isize: Size of the input data
- *
- * Return: Max. size LZ4 may output in a "worst case" szenario
- *	(data not compressible)
- */
-static inline int lz4_compressbound(size_t isize)
-{
-	return LZ4_COMPRESSBOUND(isize);
-}
-
-/**
  * LZ4_compress_default() - Compress data from source to dest
  * @source: source address of the original data
  * @dest: output buffer address of the compressed data
@@ -257,20 +245,6 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize,
 int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
 	int targetDestSize, void *wrkmem);
 
-/*
- * lz4_compress() - For backward compatibility, see LZ4_compress_default
- * @src: source address of the original data
- * @src_len: size of the original data
- * @dst: output buffer address of the compressed data. This requires 'dst'
- *	of size LZ4_COMPRESSBOUND
- * @dst_len: is the output size, which is returned after compress done
- * @workmem: address of the working memory.
- *
- * Return: Success if return 0, Error if return < 0
- */
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
 /*-************************************************************************
  *	Decompression Functions
  **************************************************************************/
@@ -346,34 +320,6 @@ int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
 int LZ4_decompress_safe_partial(const char *source, char *dest,
 	int compressedSize, int targetOutputSize, int maxDecompressedSize);
 
-/*
- * lz4_decompress_unknownoutputsize() - For backwards compatibility,
- *	see LZ4_decompress_safe
- * @src: source address of the compressed data
- * @src_len: is the input size, therefore the compressed size
- * @dest: output buffer address of the decompressed data
- *	which must be already allocated
- * @dest_len: is the max size of the destination buffer, which is
- *	returned with actual size of decompressed data after decompress done
- *
- * Return: Success if return 0, Error if return (< 0)
- */
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-	unsigned char *dest, size_t *dest_len);
-
-/**
- * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast
- * @src: source address of the compressed data
- * @src_len: is the input size, which is returned after decompress done
- * @dest: output buffer address of the decompressed data,
- *	which must be already allocated
- * @actual_dest_len: is the size of uncompressed data, supposing it's known
- *
- * Return: Success if return 0, Error if return (< 0)
- */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len);
-
 /*-************************************************************************
  *	LZ4 HC Compression
  **************************************************************************/
@@ -401,21 +347,6 @@ int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
 	int compressionLevel, void *wrkmem);
 
 /**
- * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC
- * @src: source address of the original data
- * @src_len: size of the original data
- * @dst: output buffer address of the compressed data. This requires 'dst'
- *	of size LZ4_COMPRESSBOUND.
- * @dst_len: is the output size, which is returned after compress done
- * @wrkmem: address of the working memory.
- *	This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
- *
- * Return	: Success if return 0, Error if return (< 0)
- */
-int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem);
-
-/**
  * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
  * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
  * @compressionLevel: Recommended values are between 4 and 9, although any
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 53f313f..cc7b6d4 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -936,27 +936,5 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 }
 EXPORT_SYMBOL(LZ4_compress_fast_continue);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
-	size_t *dst_len, void *wrkmem) {
-	*dst_len = LZ4_compress_default(src, dst, src_len,
-		*dst_len, wrkmem);
-
-	/*
-	 * Prior lz4_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_fast/default
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!*dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4_compress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index b5e00a1..bd35743 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -493,46 +493,6 @@ int LZ4_decompress_fast_usingDict(const char *source, char *dest,
 		originalSize, 0, dictStart, dictSize);
 }
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4_decompress_unknownoutputsize(const unsigned char *src,
-	size_t src_len, unsigned char *dest, size_t *dest_len) {
-	*dest_len = LZ4_decompress_safe(src, dest,
-		src_len, *dest_len);
-
-	/*
-	 * Prior lz4_decompress_unknownoutputsize will return
-	 * 0 for success and a negative result for error
-	 * new LZ4_decompress_safe returns
-	 * - the length of data read on success
-	 * - and also a negative result on error
-	 * meaning when result > 0, we just return 0 here
-	 */
-	if (src_len > 0)
-		return 0;
-	else
-		return -1;
-}
-
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-	unsigned char *dest, size_t actual_dest_len) {
-	*src_len = LZ4_decompress_fast(src, dest, actual_dest_len);
-
-	/*
-	 * Prior lz4_decompress will return
-	 * 0 for success and a negative result for error
-	 * new LZ4_decompress_fast returns
-	 * - the length of data read on success
-	 * - and also a negative result on error
-	 * meaning when result > 0, we just return 0 here
-	 */
-	if (*src_len > 0)
-		return 0;
-	else
-		return -1;
-}
-
 #ifndef STATIC
 EXPORT_SYMBOL(LZ4_decompress_safe);
 EXPORT_SYMBOL(LZ4_decompress_safe_partial);
@@ -542,8 +502,6 @@ EXPORT_SYMBOL(LZ4_decompress_safe_continue);
 EXPORT_SYMBOL(LZ4_decompress_fast_continue);
 EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
 EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
-EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
-EXPORT_SYMBOL(lz4_decompress);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index e1fbf15..176f03b 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -765,28 +765,5 @@ int LZ4_saveDictHC(
 }
 EXPORT_SYMBOL(LZ4_saveDictHC);
 
-/*-******************************
- *	For backwards compatibility
- ********************************/
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-	unsigned char *dst, size_t *dst_len, void *wrkmem)
-{
-	*dst_len = LZ4_compress_HC(src, dst, src_len,
-		*dst_len, LZ4HC_DEFAULT_CLEVEL, wrkmem);
-
-	/*
-	 * Prior lz4hc_compress will return -1 in case of error
-	 * and 0 on success
-	 * while new LZ4_compress_HC
-	 * returns 0 in case of error
-	 * and the output length on success
-	 */
-	if (!*dst_len)
-		return -1;
-	else
-		return 0;
-}
-EXPORT_SYMBOL(lz4hc_compress);
-
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 104+ messages in thread

end of thread, other threads:[~2017-02-15 18:17 UTC | newest]

Thread overview: 104+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
2016-12-20 18:53 ` [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version Sven Schmidt
2016-12-21  5:25   ` kbuild test robot
2016-12-22 13:25   ` Sergey Senozhatsky
2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
2016-12-21  5:16   ` kbuild test robot
2016-12-21  5:18   ` kbuild test robot
2016-12-22 13:21   ` Sergey Senozhatsky
2016-12-22 15:31     ` Sven Schmidt
2016-12-22 15:36       ` Sergey Senozhatsky
2016-12-22 13:27   ` Sergey Senozhatsky
2016-12-20 18:53 ` [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
2016-12-20 19:52   ` Joe Perches
2016-12-21 20:04     ` Sven Schmidt
2016-12-22 17:31       ` Greg KH
2016-12-22 18:39         ` Sven Schmidt
2016-12-21  7:09   ` kbuild test robot
2016-12-22 17:29 ` [PATCH 0/3] Update LZ4 compressor module Greg KH
2016-12-22 18:35   ` Sven Schmidt
2016-12-23 20:53     ` Greg KH
2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
2017-01-07 16:55   ` [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
2017-01-08 11:22     ` Greg KH
2017-01-10  9:32       ` Sven Schmidt
2017-01-10  9:59         ` Greg KH
2017-01-08 11:25     ` Greg KH
2017-01-08 11:33       ` Rui Salvaterra
2017-01-10  9:21       ` Sven Schmidt
2017-01-10 10:00         ` Greg KH
2017-01-10 10:50           ` Willy Tarreau
2017-01-07 16:55   ` [PATCH v2 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-08 11:23     ` Greg KH
2017-01-07 16:55   ` [PATCH v2 3/4] crypto: Change LZ4 modules " Sven Schmidt
2017-01-07 16:55   ` [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply " Sven Schmidt
2017-01-07 21:33     ` Kees Cook
2017-01-10  9:45       ` Sven Schmidt
2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
2017-01-21 15:56     ` kbuild test robot
2017-01-21 16:16     ` kbuild test robot
2017-01-21 17:38     ` kbuild test robot
2017-01-22 11:05     ` Greg KH
2017-01-21 15:09   ` [PATCH 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-21 15:09   ` [PATCH 3/4] crypto: Change LZ4 modules " Sven Schmidt
2017-01-21 15:09   ` [PATCH 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 1/4] lib: " Sven Schmidt
2017-01-24  0:23     ` Andrew Morton
2017-01-24 16:48       ` Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 3/4] crypto: Change LZ4 modules " Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 1/5] lib: " Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
2017-01-26  9:19   ` [PATCH v5 0/5] Update LZ4 compressor module Eric Biggers
2017-01-26 14:15     ` Sven Schmidt
2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 1/5] lib: " Sven Schmidt
2017-01-31 22:27     ` Jonathan Corbet
2017-02-01 20:18       ` Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 1/5] lib: " Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
2017-02-08 23:31   ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
2017-02-09  0:24     ` Eric Biggers
2017-02-09  5:24       ` Eric Biggers
2017-02-09 11:05         ` Sven Schmidt
2017-02-09 18:20           ` Eric Biggers
2017-02-10  0:14         ` Minchan Kim
2017-02-09 11:02       ` Sven Schmidt
2017-02-09 18:29         ` Eric Biggers
2017-02-10  3:57           ` David Miller
2017-02-09 10:56     ` Sven Schmidt
2017-02-10  0:13       ` Minchan Kim
2017-02-12 11:16         ` Sven Schmidt
2017-02-12 11:16           ` [PATCH] lz4: fix performance regressions Sven Schmidt
2017-02-12 11:16             ` Sven Schmidt
2017-02-12 13:05             ` Willy Tarreau
2017-02-12 15:20               ` Sven Schmidt
2017-02-12 21:41                 ` Willy Tarreau
2017-02-13 11:53                   ` Sven Schmidt
2017-02-13 13:37                     ` Willy Tarreau
2017-02-12 23:38             ` Eric Biggers
2017-02-14 10:33               ` Sven Schmidt
2017-02-13  0:03           ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
2017-02-13 12:08             ` Sven Schmidt
2017-02-15  7:29               ` Minchan Kim
2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 1/5] lib: " Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.