All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
To: minchan@kernel.org
Cc: ebiggers3@gmail.com, akpm@linux-foundation.org,
	bongkyu.kim@lge.com, rsalvaterra@gmail.com,
	sergey.senozhatsky@gmail.com, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org, herbert@gondor.apana.org.au,
	davem@davemloft.net, linux-crypto@vger.kernel.org,
	anton@enomsg.org, ccross@android.com, keescook@chromium.org,
	tony.luck@intel.com,
	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
Subject: [PATCH] lz4: fix performance regressions
Date: Sun, 12 Feb 2017 12:16:18 +0100	[thread overview]
Message-ID: <1486898178-17125-2-git-send-email-4sschmid@informatik.uni-hamburg.de> (raw)
In-Reply-To: <1486898178-17125-1-git-send-email-4sschmid@informatik.uni-hamburg.de>

Fix performance regressions compared to current kernel LZ4

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |   2 +-
 lib/lz4/lz4_compress.c   | 157 +++++++++++++++++++++++-------------
 lib/lz4/lz4_decompress.c |  50 ++++++++----
 lib/lz4/lz4defs.h        | 203 ++++++++++++++++++++++++++++++++---------------
 lib/lz4/lz4hc_compress.c |   8 +-
 5 files changed, 281 insertions(+), 139 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index a3912d7..394e3d9 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -82,7 +82,7 @@
 /*-************************************************************************
  *	STREAMING CONSTANTS AND STRUCTURES
  **************************************************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
 #define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))

 #define LZ4_STREAMHCSIZE        262192
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 697dbda..2cbbf99 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -39,27 +39,33 @@
 #include <linux/kernel.h>
 #include <asm/unaligned.h>

+static const int LZ4_minLength = (MFLIMIT + 1);
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
+
 /*-******************************
  *	Compression functions
  ********************************/
-static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+static FORCE_INLINE U32 LZ4_hash4(
+	U32 sequence,
+	tableType_t const tableType)
 {
 	if (tableType == byU16)
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+			>> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
 	else
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - LZ4_HASHLOG));
+			>> ((MINMATCH * 8) - LZ4_HASHLOG));
 }

-#if LZ4_ARCH64
-static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+static FORCE_INLINE __maybe_unused U32 LZ4_hash5(
+	U64 sequence,
+	tableType_t const tableType)
 {
 	const U32 hashLog = (tableType == byU16)
 		? LZ4_HASHLOG + 1
 		: LZ4_HASHLOG;

-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	static const U64 prime5bytes = 889523592379ULL;

 	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
@@ -69,9 +75,10 @@ static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
 }
-#endif

-static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+static FORCE_INLINE U32 LZ4_hashPosition(
+	const void *p,
+	tableType_t const tableType)
 {
 #if LZ4_ARCH64
 	if (tableType == byU32)
@@ -81,8 +88,12 @@ static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
 	return LZ4_hash4(LZ4_read32(p), tableType);
 }

-static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
-	tableType_t const tableType, const BYTE *srcBase)
+static void LZ4_putPositionOnHash(
+	const BYTE *p,
+	U32 h,
+	void *tableBase,
+	tableType_t const tableType,
+	const BYTE *srcBase)
 {
 	switch (tableType) {
 	case byPtr:
@@ -109,16 +120,22 @@ static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
 	}
 }

-static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE void LZ4_putPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

 	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }

-static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static const BYTE *LZ4_getPositionOnHash(
+	U32 h,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	if (tableType == byPtr) {
 		const BYTE **hashTable = (const BYTE **) tableBase;
@@ -135,12 +152,16 @@ static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
 	{
 		/* default, to ensure a return */
 		const U16 * const hashTable = (U16 *) tableBase;
+
 		return hashTable[h] + srcBase;
 	}
 }

-static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE const BYTE *LZ4_getPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

@@ -152,7 +173,7 @@ static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
  * LZ4_compress_generic() :
  * inlined, to ensure branches are decided at compilation time
  */
-static inline int LZ4_compress_generic(
+static FORCE_INLINE int LZ4_compress_generic(
 	LZ4_stream_t_internal * const dictPtr,
 	const char * const source,
 	char * const dest,
@@ -187,6 +208,7 @@ static inline int LZ4_compress_generic(
 		/* Unsupported inputSize, too large (or negative) */
 		return 0;
 	}
+
 	switch (dict) {
 	case noDict:
 	default:
@@ -216,7 +238,8 @@ static inline int LZ4_compress_generic(

 	/* First Byte */
 	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
-	ip++; forwardH = LZ4_hashPosition(ip, tableType);
+	ip++;
+	forwardH = LZ4_hashPosition(ip, tableType);

 	/* Main Loop */
 	for ( ; ; ) {
@@ -227,15 +250,14 @@ static inline int LZ4_compress_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = acceleration
-				<< LZ4_skipTrigger;
+			unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;

 			do {
 				U32 const h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -243,6 +265,7 @@ static inline int LZ4_compress_generic(
 				match = LZ4_getPositionOnHash(h,
 					dictPtr->hashTable,
 					tableType, base);
+
 				if (dict == usingExtDict) {
 					if (match < (const BYTE *)source) {
 						refDelta = dictDelta;
@@ -251,11 +274,12 @@ static inline int LZ4_compress_generic(
 						refDelta = 0;
 						lowLimit = (const BYTE *)source;
 				}	 }
+
 				forwardH = LZ4_hashPosition(forwardIp,
 					tableType);
+
 				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
 					tableType, base);
-
 			} while (((dictIssue == dictSmall)
 					? (match < lowRefLimit)
 					: 0)
@@ -268,31 +292,34 @@ static inline int LZ4_compress_generic(

 		/* Catch up */
 		while (((ip > anchor) & (match + refDelta > lowLimit))
-			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
+				&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
 			match--;
-			}
+		}

 		/* Encode Literals */
 		{
 			unsigned const int litLength = (unsigned int)(ip - anchor);

 			token = op++;
+
 			if ((outputLimited) &&
 				/* Check output buffer overflow */
 				(unlikely(op + litLength +
 					(2 + 1 + LASTLITERALS) +
-					(litLength/255) > olimit)))
+					(litLength / 255) > olimit)))
 				return 0;
+
 			if (litLength >= RUN_MASK) {
 				int len = (int)litLength - RUN_MASK;

-				*token = (RUN_MASK<<ML_BITS);
-				for (; len >= 255 ; len -= 255)
+				*token = (RUN_MASK << ML_BITS);
+
+				for (; len >= 255; len -= 255)
 					*op++ = 255;
 				*op++ = (BYTE)len;
 			} else
-				*token = (BYTE)(litLength<<ML_BITS);
+				*token = (BYTE)(litLength << ML_BITS);

 			/* Copy Literals */
 			LZ4_wildCopy(op, anchor, op + litLength);
@@ -301,7 +328,8 @@ static inline int LZ4_compress_generic(

 _next_match:
 		/* Encode Offset */
-		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+		LZ4_writeLE16(op, (U16)(ip - match));
+		op += 2;

 		/* Encode MatchLength */
 		{
@@ -313,11 +341,15 @@ static inline int LZ4_compress_generic(

 				match += refDelta;
 				limit = ip + (dictEnd - match);
+
 				if (limit > matchlimit)
 					limit = matchlimit;
+
 				matchCode = LZ4_count(ip + MINMATCH,
 					match + MINMATCH, limit);
+
 				ip += MINMATCH + matchCode;
+
 				if (ip == limit) {
 					unsigned const int more = LZ4_count(ip,
 						(const BYTE *)source,
@@ -336,17 +368,20 @@ static inline int LZ4_compress_generic(
 				/* Check output buffer overflow */
 				(unlikely(op +
 					(1 + LASTLITERALS) +
-					(matchCode>>8) > olimit)))
+					(matchCode >> 8) > olimit)))
 				return 0;
+
 			if (matchCode >= ML_MASK) {
 				*token += ML_MASK;
 				matchCode -= ML_MASK;
 				LZ4_write32(op, 0xFFFFFFFF);
-				while (matchCode >= 4*255) {
+
+				while (matchCode >= 4 * 255) {
 					op += 4;
 					LZ4_write32(op, 0xFFFFFFFF);
-					matchCode -= 4*255;
+					matchCode -= 4 * 255;
 				}
+
 				op += matchCode / 255;
 				*op++ = (BYTE)(matchCode % 255);
 			} else
@@ -365,6 +400,7 @@ static inline int LZ4_compress_generic(
 		/* Test next position */
 		match = LZ4_getPosition(ip, dictPtr->hashTable,
 			tableType, base);
+
 		if (dict == usingExtDict) {
 			if (match < (const BYTE *)source) {
 				refDelta = dictDelta;
@@ -374,7 +410,9 @@ static inline int LZ4_compress_generic(
 				lowLimit = (const BYTE *)source;
 			}
 		}
+
 		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+
 		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
 			&& (match + MAX_DISTANCE >= ip)
 			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
@@ -395,18 +433,21 @@ static inline int LZ4_compress_generic(
 		if ((outputLimited) &&
 			/* Check output buffer overflow */
 			((op - (BYTE *)dest) + lastRun + 1 +
-			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
 			return 0;
+
 		if (lastRun >= RUN_MASK) {
 			size_t accumulator = lastRun - RUN_MASK;
 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
-			*op++ = (BYTE)(lastRun<<ML_BITS);
+			*op++ = (BYTE)(lastRun << ML_BITS);
 		}
+
 		memcpy(op, anchor, lastRun);
+
 		op += lastRun;
 	}

@@ -414,23 +455,27 @@ static inline int LZ4_compress_generic(
 	return (int) (((char *)op) - dest);
 }

-static int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
-	int inputSize, int maxOutputSize, int acceleration)
+static int LZ4_compress_fast_extState(
+	void *state,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	int acceleration)
 {
-	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
-	#else
-	tableType_t tableType = byPtr;
-	#endif
-
 	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+#if LZ4_ARCH64
+	const tableType_t tableType = byU32;
+#else
+	const tableType_t tableType = byPtr;
+#endif

 	LZ4_resetStream((LZ4_stream_t *)state);

 	if (acceleration < 1)
 		acceleration = LZ4_ACCELERATION_DEFAULT;

-	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+	if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
 		if (inputSize < LZ4_64Klimit)
 			return LZ4_compress_generic(ctx, source,
 				dest, inputSize, 0,
@@ -474,7 +519,6 @@ EXPORT_SYMBOL(LZ4_compress_default);
 /*-******************************
  *	*_destSize() variant
  ********************************/
-
 static int LZ4_compress_destSize_generic(
 	LZ4_stream_t_internal * const ctx,
 	const char * const src,
@@ -529,14 +573,14 @@ static int LZ4_compress_destSize_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+			unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;

 			do {
 				U32 h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -559,8 +603,9 @@ static int LZ4_compress_destSize_generic(
 		while ((ip > anchor)
 			&& (match > lowLimit)
 			&& (unlikely(ip[-1] == match[-1]))) {
-			ip--; match--;
-			}
+			ip--;
+			match--;
+		}

 		/* Encode Literal length */
 		{
@@ -644,11 +689,11 @@ static int LZ4_compress_destSize_generic(
 		size_t lastRunSize = (size_t)(iend - anchor);

 		if (op + 1 /* token */
-			+ ((lastRunSize + 240)/255) /* litLength */
+			+ ((lastRunSize + 240) / 255) /* litLength */
 			+ lastRunSize /* literals */ > oend) {
 			/* adapt lastRunSize to fill 'dst' */
 			lastRunSize	= (oend - op) - 1;
-			lastRunSize -= (lastRunSize + 240)/255;
+			lastRunSize -= (lastRunSize + 240) / 255;
 		}
 		ip = anchor + lastRunSize;

@@ -656,7 +701,7 @@ static int LZ4_compress_destSize_generic(
 			size_t accumulator = lastRunSize - RUN_MASK;

 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
@@ -675,14 +720,14 @@ static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
 	char *dst, int *srcSizePtr, int targetDstSize)
 {
 	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
+		const tableType_t tableType = byU32;
 	#else
-	tableType_t tableType = byPtr;
+		const tableType_t tableType = byPtr;
 	#endif

 	LZ4_resetStream(state);

-	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+	if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
 		/* compression success is guaranteed */
 		return LZ4_compress_fast_extState(
 			state, src, dst, *srcSizePtr,
@@ -847,7 +892,7 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
 				maxOutputSize, limitedOutput, byU32,
-				withPrefix64k, dictSmall,	acceleration);
+				withPrefix64k, dictSmall, acceleration);
 		} else {
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index a7731ba..3bfc2f6 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -49,8 +49,8 @@
  * Note that it is important this generic function is really inlined,
  * in order to remove useless branches during compilation optimization.
  */
-static inline int LZ4_decompress_generic(
-	 const char *const source,
+static FORCE_INLINE int LZ4_decompress_generic(
+	 const char * const source,
 	 char * const dest,
 	 int inputSize,
 		/*
@@ -180,22 +180,28 @@ static inline int LZ4_decompress_generic(
 					goto _output_error;
 				}
 			}
+
 			memcpy(op, ip, length);
 			ip += length;
 			op += length;
 			/* Necessarily EOF, due to parsing restrictions */
 			break;
 		}
+
 		LZ4_wildCopy(op, ip, cpy);
-		ip += length; op = cpy;
+		ip += length;
+		op = cpy;

 		/* get offset */
-		offset = LZ4_readLE16(ip); ip += 2;
+		offset = LZ4_readLE16(ip);
+		ip += 2;
 		match = op - offset;
+
 		if ((checkOffset) && (unlikely(match < lowLimit))) {
 			/* Error : offset outside buffers */
 			goto _output_error;
 		}
+
 		/* costs ~1%; silence an msan warning when offset == 0 */
 		LZ4_write32(op, (U32)offset);

@@ -205,11 +211,14 @@ static inline int LZ4_decompress_generic(
 			unsigned int s;

 			do {
-			s = *ip++;
-			if ((endOnInput) && (ip > iend - LASTLITERALS))
-				goto _output_error;
-			length += s;
+				s = *ip++;
+
+				if ((endOnInput) && (ip > iend - LASTLITERALS))
+					goto _output_error;
+
+				length += s;
 			} while (s == 255);
+
 			if ((safeDecode)
 				&& unlikely(
 					(size_t)(op + length) < (size_t)op)) {
@@ -217,6 +226,7 @@ static inline int LZ4_decompress_generic(
 				goto _output_error;
 			}
 		}
+
 		length += MINMATCH;

 		/* check external dictionary */
@@ -227,12 +237,13 @@ static inline int LZ4_decompress_generic(
 			}

 			if (length <= (size_t)(lowPrefix - match)) {
-			/*
-			 * match can be copied as a single segment
-			 * from external dictionary
-			 */
-			memmove(op, dictEnd - (lowPrefix - match), length);
-			op += length;
+				/*
+				 * match can be copied as a single segment
+				 * from external dictionary
+				 */
+				memmove(op, dictEnd - (lowPrefix - match),
+					length);
+				op += length;
 			} else {
 				/*
 				 * match encompass external
@@ -256,11 +267,13 @@ static inline int LZ4_decompress_generic(
 					op += restSize;
 				}
 			}
+
 			continue;
 		}

 		/* copy match within block */
 		cpy = op + length;
+
 		if (unlikely(offset < 8)) {
 			const int dec64 = dec64table[offset];

@@ -272,7 +285,8 @@ static inline int LZ4_decompress_generic(
 			memcpy(op + 4, match, 4);
 			match -= dec64;
 		} else {
-			LZ4_copy8(op, match); match += 8;
+			LZ4_copy8(op, match);
+			match += 8;
 		}

 		op += 8;
@@ -287,18 +301,22 @@ static inline int LZ4_decompress_generic(
 				 */
 				goto _output_error;
 			}
+
 			if (op < oCopyLimit) {
 				LZ4_wildCopy(op, match, oCopyLimit);
 				match += oCopyLimit - op;
 				op = oCopyLimit;
 			}
+
 			while (op < cpy)
 				*op++ = *match++;
 		} else {
 			LZ4_copy8(op, match);
+
 			if (length > 16)
 				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
+
 		op = cpy; /* correction */
 	}

@@ -438,7 +456,7 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
  * These decoding functions work the same as "_continue" ones,
  * the dictionary must be explicitly provided within parameters
  */
-static inline int LZ4_decompress_usingDict_generic(const char *source,
+static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
 	char *dest, int compressedSize, int maxOutputSize, int safe,
 	const char *dictStart, int dictSize)
 {
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 23e1a1b..47ef42b 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -38,14 +38,7 @@
 #include <asm/unaligned.h>
 #include <linux/string.h>	 /* memset, memcpy */

-/*
- * Detects 64 bits mode
-*/
-#if defined(CONFIG_64BIT)
-#define LZ4_ARCH64 1
-#else
-#define LZ4_ARCH64 0
-#endif
+#define FORCE_INLINE __always_inline

 /*-************************************
  *	Basic Types
@@ -60,14 +53,38 @@ typedef uint64_t U64;
 typedef uintptr_t uptrval;

 /*-************************************
+ *	Architecture specifics
+ **************************************/
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system
+ * does not support hardware bit count
+ */
+/* #define LZ4_FORCE_SW_BITCOUNT */
+
+/*-************************************
  *	Constants
  **************************************/
 #define MINMATCH 4

 #define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
-static const int LZ4_minLength = (MFLIMIT+1);
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6

 #define KB (1<<10)
 #define MB (1<<20)
@@ -82,53 +99,42 @@ static const int LZ4_minLength = (MFLIMIT+1);
 #define RUN_BITS (8-ML_BITS)
 #define RUN_MASK ((1U<<RUN_BITS)-1)

-static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
-static const U32 LZ4_skipTrigger = 6;
-
 /*-************************************
  *	Reading and writing into memory
  **************************************/
+typedef union {
+	U16 u16;
+	U32 u32;
+	size_t uArch;
+} __packed unalign;

-static inline U16 LZ4_read16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_read16(const void *ptr)
 {
-	U16 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u16;
 }

-static inline U32 LZ4_read32(const void *memPtr)
+static FORCE_INLINE __maybe_unused U32 LZ4_read32(const void *ptr)
 {
-	U32 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u32;
 }

-static inline size_t LZ4_read_ARCH(const void *memPtr)
+static FORCE_INLINE __maybe_unused size_t LZ4_read_ARCH(const void *ptr)
 {
-	size_t val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->uArch;
 }

-static inline void LZ4_write16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_write16(void *memPtr, U16 value)
 {
-	memcpy(memPtr, &value, sizeof(value));
+	((unalign *)memPtr)->u16 = value;
 }

-static inline void LZ4_write32(void *memPtr, U32 value)
-{
-	memcpy(memPtr, &value, sizeof(value));
+static FORCE_INLINE __maybe_unused void LZ4_write32(void *memPtr, U32 value) {
+	((unalign *)memPtr)->u32 = value;
 }

-static inline U16 LZ4_readLE16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_readLE16(const void *memPtr)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	return LZ4_read16(memPtr);
 #else
 	const BYTE *p = (const BYTE *)memPtr;
@@ -137,19 +143,19 @@ static inline U16 LZ4_readLE16(const void *memPtr)
 #endif
 }

-static inline void LZ4_writeLE16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_writeLE16(void *memPtr, U16 value)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	LZ4_write16(memPtr, value);
 #else
 	BYTE *p = (BYTE *)memPtr;

 	p[0] = (BYTE) value;
-	p[1] = (BYTE)(value>>8);
+	p[1] = (BYTE)(value >> 8);
 #endif
 }

-static inline void LZ4_copy8(void *dst, const void *src)
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
 {
 	memcpy(dst, src, 8);
 }
@@ -158,7 +164,8 @@ static inline void LZ4_copy8(void *dst, const void *src)
  * customized variant of memcpy,
  * which can overwrite up to 7 bytes beyond dstEnd
  */
-static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+	const void *srcPtr, void *dstEnd)
 {
 	BYTE *d = (BYTE *)dstPtr;
 	const BYTE *s = (const BYTE *)srcPtr;
@@ -171,49 +178,121 @@ static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
 	} while (d < e);
 }

-#if LZ4_ARCH64
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+#if LZ4_ARCH64 /* 64 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[64] = {
+		0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
+		0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
+		7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
+		7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
+	};
+
+	return DeBruijnBytePos[((U64)((val & -(long long)val)
+		* 0x0218A392CDABBD3FULL)) >> 58];
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+	return (__builtin_ctzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[32] = {
+		0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
+		3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1
+	};
+
+	return DeBruijnBytePos[((U32)((val & -(S32)val)
+		* 0x077CB531U)) >> 27];
 #else
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
+	return (__builtin_ctz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#else /* Big Endian */
+#if LZ4_ARCH64 /* 64 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 32)) {
+		r = 4;
+	} else {
+		r = 0;
+		val >>= 32;
+	}
+
+	if (!(val >> 16)) {
+		r += 2;
+		val >>= 8;
+	} else {
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
-#endif
-#endif
+	return (__builtin_clzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 16)) {
+		r = 2;
+		val >>= 8;
+	} else {
+		r = 0;
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
+#else
+	return (__builtin_clz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#endif /* LZ4_LITTLE_ENDIAN */
+}

-static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+static FORCE_INLINE __maybe_unused unsigned int LZ4_count(
+	const BYTE *pIn,
+	const BYTE *pMatch,
 	const BYTE *pInLimit)
 {
 	const BYTE *const pStart = pIn;

-	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
-		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+	while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+		size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);

 		if (!diff) {
 			pIn += STEPSIZE;
 			pMatch += STEPSIZE;
 			continue;
 		}
-		pIn += LZ4_NBCOMMONBYTES(diff);
+
+		pIn += LZ4_NbCommonBytes(diff);
+
 		return (unsigned int)(pIn - pStart);
 	}

-#ifdef LZ4_ARCH64
-	if ((pIn < (pInLimit-3))
+#if LZ4_ARCH64
+	if ((pIn < (pInLimit - 3))
 		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
-		pIn += 4; pMatch += 4;
+		pIn += 4;
+		pMatch += 4;
 	}
 #endif
-	if ((pIn < (pInLimit-1))
+
+	if ((pIn < (pInLimit - 1))
 		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
-		pIn += 2; pMatch += 2;
+		pIn += 2;
+		pMatch += 2;
 	}
+
 	if ((pIn < pInLimit) && (*pMatch == *pIn))
 		pIn++;
+
 	return (unsigned int)(pIn - pStart);
 }

diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index 8363292..c7271a1 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -71,7 +71,7 @@ static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
 }

 /* Update chains up to ip (excluded) */
-static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	const BYTE *ip)
 {
 	U16 * const chainTable = hc4->chainTable;
@@ -96,7 +96,7 @@ static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	hc4->nextToUpdate = target;
 }

-static inline int LZ4HC_InsertAndFindBestMatch(
+static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
 	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
 	const BYTE *ip,
 	const BYTE * const iLimit,
@@ -165,7 +165,7 @@ static inline int LZ4HC_InsertAndFindBestMatch(
 	return (int)ml;
 }

-static inline int LZ4HC_InsertAndGetWiderMatch(
+static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
 	LZ4HC_CCtx_internal *hc4,
 	const BYTE * const ip,
 	const BYTE * const iLowLimit,
@@ -259,7 +259,7 @@ static inline int LZ4HC_InsertAndGetWiderMatch(
 	return longest;
 }

-static inline int LZ4HC_encodeSequence(
+static FORCE_INLINE int LZ4HC_encodeSequence(
 	const BYTE **ip,
 	BYTE **op,
 	const BYTE **anchor,

WARNING: multiple messages have this Message-ID (diff)
From: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
To: minchan@kernel.org
Cc: ebiggers3@gmail.com, akpm@linux-foundation.org,
	bongkyu.kim@lge.com, rsalvaterra@gmail.com,
	sergey.senozhatsky@gmail.com, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org, herbert@gondor.apana.org.au,
	davem@davemloft.net, linux-crypto@vger.kernel.org,
	anton@enomsg.org, ccross@android.com, keescook@chromium.org,
	tony.luck@intel.com,
	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
Subject: [PATCH] lz4: fix performance regressions
Date: Sun, 12 Feb 2017 12:16:18 +0100	[thread overview]
Message-ID: <1486898178-17125-2-git-send-email-4sschmid@informatik.uni-hamburg.de> (raw)
In-Reply-To: <1486898178-17125-1-git-send-email-4sschmid@informatik.uni-hamburg.de>

Fix performance regressions compared to current kernel LZ4

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |   2 +-
 lib/lz4/lz4_compress.c   | 157 +++++++++++++++++++++++-------------
 lib/lz4/lz4_decompress.c |  50 ++++++++----
 lib/lz4/lz4defs.h        | 203 ++++++++++++++++++++++++++++++++---------------
 lib/lz4/lz4hc_compress.c |   8 +-
 5 files changed, 281 insertions(+), 139 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index a3912d7..394e3d9 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -82,7 +82,7 @@
 /*-************************************************************************
  *	STREAMING CONSTANTS AND STRUCTURES
  **************************************************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
 #define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))

 #define LZ4_STREAMHCSIZE        262192
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 697dbda..2cbbf99 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -39,27 +39,33 @@
 #include <linux/kernel.h>
 #include <asm/unaligned.h>

+static const int LZ4_minLength = (MFLIMIT + 1);
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
+
 /*-******************************
  *	Compression functions
  ********************************/
-static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+static FORCE_INLINE U32 LZ4_hash4(
+	U32 sequence,
+	tableType_t const tableType)
 {
 	if (tableType == byU16)
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+			>> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
 	else
 		return ((sequence * 2654435761U)
-			>> ((MINMATCH*8) - LZ4_HASHLOG));
+			>> ((MINMATCH * 8) - LZ4_HASHLOG));
 }

-#if LZ4_ARCH64
-static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+static FORCE_INLINE __maybe_unused U32 LZ4_hash5(
+	U64 sequence,
+	tableType_t const tableType)
 {
 	const U32 hashLog = (tableType == byU16)
 		? LZ4_HASHLOG + 1
 		: LZ4_HASHLOG;

-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	static const U64 prime5bytes = 889523592379ULL;

 	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
@@ -69,9 +75,10 @@ static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
 }
-#endif

-static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+static FORCE_INLINE U32 LZ4_hashPosition(
+	const void *p,
+	tableType_t const tableType)
 {
 #if LZ4_ARCH64
 	if (tableType == byU32)
@@ -81,8 +88,12 @@ static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
 	return LZ4_hash4(LZ4_read32(p), tableType);
 }

-static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
-	tableType_t const tableType, const BYTE *srcBase)
+static void LZ4_putPositionOnHash(
+	const BYTE *p,
+	U32 h,
+	void *tableBase,
+	tableType_t const tableType,
+	const BYTE *srcBase)
 {
 	switch (tableType) {
 	case byPtr:
@@ -109,16 +120,22 @@ static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
 	}
 }

-static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE void LZ4_putPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

 	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }

-static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static const BYTE *LZ4_getPositionOnHash(
+	U32 h,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	if (tableType == byPtr) {
 		const BYTE **hashTable = (const BYTE **) tableBase;
@@ -135,12 +152,16 @@ static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
 	{
 		/* default, to ensure a return */
 		const U16 * const hashTable = (U16 *) tableBase;
+
 		return hashTable[h] + srcBase;
 	}
 }

-static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
-	tableType_t tableType, const BYTE *srcBase)
+static FORCE_INLINE const BYTE *LZ4_getPosition(
+	const BYTE *p,
+	void *tableBase,
+	tableType_t tableType,
+	const BYTE *srcBase)
 {
 	U32 const h = LZ4_hashPosition(p, tableType);

@@ -152,7 +173,7 @@ static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
  * LZ4_compress_generic() :
  * inlined, to ensure branches are decided at compilation time
  */
-static inline int LZ4_compress_generic(
+static FORCE_INLINE int LZ4_compress_generic(
 	LZ4_stream_t_internal * const dictPtr,
 	const char * const source,
 	char * const dest,
@@ -187,6 +208,7 @@ static inline int LZ4_compress_generic(
 		/* Unsupported inputSize, too large (or negative) */
 		return 0;
 	}
+
 	switch (dict) {
 	case noDict:
 	default:
@@ -216,7 +238,8 @@ static inline int LZ4_compress_generic(

 	/* First Byte */
 	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
-	ip++; forwardH = LZ4_hashPosition(ip, tableType);
+	ip++;
+	forwardH = LZ4_hashPosition(ip, tableType);

 	/* Main Loop */
 	for ( ; ; ) {
@@ -227,15 +250,14 @@ static inline int LZ4_compress_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = acceleration
-				<< LZ4_skipTrigger;
+			unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;

 			do {
 				U32 const h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -243,6 +265,7 @@ static inline int LZ4_compress_generic(
 				match = LZ4_getPositionOnHash(h,
 					dictPtr->hashTable,
 					tableType, base);
+
 				if (dict == usingExtDict) {
 					if (match < (const BYTE *)source) {
 						refDelta = dictDelta;
@@ -251,11 +274,12 @@ static inline int LZ4_compress_generic(
 						refDelta = 0;
 						lowLimit = (const BYTE *)source;
 				}	 }
+
 				forwardH = LZ4_hashPosition(forwardIp,
 					tableType);
+
 				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
 					tableType, base);
-
 			} while (((dictIssue == dictSmall)
 					? (match < lowRefLimit)
 					: 0)
@@ -268,31 +292,34 @@ static inline int LZ4_compress_generic(

 		/* Catch up */
 		while (((ip > anchor) & (match + refDelta > lowLimit))
-			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
+				&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
 			match--;
-			}
+		}

 		/* Encode Literals */
 		{
 			unsigned const int litLength = (unsigned int)(ip - anchor);

 			token = op++;
+
 			if ((outputLimited) &&
 				/* Check output buffer overflow */
 				(unlikely(op + litLength +
 					(2 + 1 + LASTLITERALS) +
-					(litLength/255) > olimit)))
+					(litLength / 255) > olimit)))
 				return 0;
+
 			if (litLength >= RUN_MASK) {
 				int len = (int)litLength - RUN_MASK;

-				*token = (RUN_MASK<<ML_BITS);
-				for (; len >= 255 ; len -= 255)
+				*token = (RUN_MASK << ML_BITS);
+
+				for (; len >= 255; len -= 255)
 					*op++ = 255;
 				*op++ = (BYTE)len;
 			} else
-				*token = (BYTE)(litLength<<ML_BITS);
+				*token = (BYTE)(litLength << ML_BITS);

 			/* Copy Literals */
 			LZ4_wildCopy(op, anchor, op + litLength);
@@ -301,7 +328,8 @@ static inline int LZ4_compress_generic(

 _next_match:
 		/* Encode Offset */
-		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+		LZ4_writeLE16(op, (U16)(ip - match));
+		op += 2;

 		/* Encode MatchLength */
 		{
@@ -313,11 +341,15 @@ static inline int LZ4_compress_generic(

 				match += refDelta;
 				limit = ip + (dictEnd - match);
+
 				if (limit > matchlimit)
 					limit = matchlimit;
+
 				matchCode = LZ4_count(ip + MINMATCH,
 					match + MINMATCH, limit);
+
 				ip += MINMATCH + matchCode;
+
 				if (ip == limit) {
 					unsigned const int more = LZ4_count(ip,
 						(const BYTE *)source,
@@ -336,17 +368,20 @@ static inline int LZ4_compress_generic(
 				/* Check output buffer overflow */
 				(unlikely(op +
 					(1 + LASTLITERALS) +
-					(matchCode>>8) > olimit)))
+					(matchCode >> 8) > olimit)))
 				return 0;
+
 			if (matchCode >= ML_MASK) {
 				*token += ML_MASK;
 				matchCode -= ML_MASK;
 				LZ4_write32(op, 0xFFFFFFFF);
-				while (matchCode >= 4*255) {
+
+				while (matchCode >= 4 * 255) {
 					op += 4;
 					LZ4_write32(op, 0xFFFFFFFF);
-					matchCode -= 4*255;
+					matchCode -= 4 * 255;
 				}
+
 				op += matchCode / 255;
 				*op++ = (BYTE)(matchCode % 255);
 			} else
@@ -365,6 +400,7 @@ static inline int LZ4_compress_generic(
 		/* Test next position */
 		match = LZ4_getPosition(ip, dictPtr->hashTable,
 			tableType, base);
+
 		if (dict == usingExtDict) {
 			if (match < (const BYTE *)source) {
 				refDelta = dictDelta;
@@ -374,7 +410,9 @@ static inline int LZ4_compress_generic(
 				lowLimit = (const BYTE *)source;
 			}
 		}
+
 		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+
 		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
 			&& (match + MAX_DISTANCE >= ip)
 			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
@@ -395,18 +433,21 @@ static inline int LZ4_compress_generic(
 		if ((outputLimited) &&
 			/* Check output buffer overflow */
 			((op - (BYTE *)dest) + lastRun + 1 +
-			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
 			return 0;
+
 		if (lastRun >= RUN_MASK) {
 			size_t accumulator = lastRun - RUN_MASK;
 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
-			*op++ = (BYTE)(lastRun<<ML_BITS);
+			*op++ = (BYTE)(lastRun << ML_BITS);
 		}
+
 		memcpy(op, anchor, lastRun);
+
 		op += lastRun;
 	}

@@ -414,23 +455,27 @@ static inline int LZ4_compress_generic(
 	return (int) (((char *)op) - dest);
 }

-static int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
-	int inputSize, int maxOutputSize, int acceleration)
+static int LZ4_compress_fast_extState(
+	void *state,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	int acceleration)
 {
-	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
-	#else
-	tableType_t tableType = byPtr;
-	#endif
-
 	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+#if LZ4_ARCH64
+	const tableType_t tableType = byU32;
+#else
+	const tableType_t tableType = byPtr;
+#endif

 	LZ4_resetStream((LZ4_stream_t *)state);

 	if (acceleration < 1)
 		acceleration = LZ4_ACCELERATION_DEFAULT;

-	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+	if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
 		if (inputSize < LZ4_64Klimit)
 			return LZ4_compress_generic(ctx, source,
 				dest, inputSize, 0,
@@ -474,7 +519,6 @@ EXPORT_SYMBOL(LZ4_compress_default);
 /*-******************************
  *	*_destSize() variant
  ********************************/
-
 static int LZ4_compress_destSize_generic(
 	LZ4_stream_t_internal * const ctx,
 	const char * const src,
@@ -529,14 +573,14 @@ static int LZ4_compress_destSize_generic(
 		{
 			const BYTE *forwardIp = ip;
 			unsigned int step = 1;
-			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+			unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;

 			do {
 				U32 h = forwardH;

 				ip = forwardIp;
 				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_skipTrigger);
+				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);

 				if (unlikely(forwardIp > mflimit))
 					goto _last_literals;
@@ -559,8 +603,9 @@ static int LZ4_compress_destSize_generic(
 		while ((ip > anchor)
 			&& (match > lowLimit)
 			&& (unlikely(ip[-1] == match[-1]))) {
-			ip--; match--;
-			}
+			ip--;
+			match--;
+		}

 		/* Encode Literal length */
 		{
@@ -644,11 +689,11 @@ static int LZ4_compress_destSize_generic(
 		size_t lastRunSize = (size_t)(iend - anchor);

 		if (op + 1 /* token */
-			+ ((lastRunSize + 240)/255) /* litLength */
+			+ ((lastRunSize + 240) / 255) /* litLength */
 			+ lastRunSize /* literals */ > oend) {
 			/* adapt lastRunSize to fill 'dst' */
 			lastRunSize	= (oend - op) - 1;
-			lastRunSize -= (lastRunSize + 240)/255;
+			lastRunSize -= (lastRunSize + 240) / 255;
 		}
 		ip = anchor + lastRunSize;

@@ -656,7 +701,7 @@ static int LZ4_compress_destSize_generic(
 			size_t accumulator = lastRunSize - RUN_MASK;

 			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255 ; accumulator -= 255)
+			for (; accumulator >= 255; accumulator -= 255)
 				*op++ = 255;
 			*op++ = (BYTE) accumulator;
 		} else {
@@ -675,14 +720,14 @@ static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
 	char *dst, int *srcSizePtr, int targetDstSize)
 {
 	#if LZ4_ARCH64
-	tableType_t tableType = byU32;
+		const tableType_t tableType = byU32;
 	#else
-	tableType_t tableType = byPtr;
+		const tableType_t tableType = byPtr;
 	#endif

 	LZ4_resetStream(state);

-	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+	if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
 		/* compression success is guaranteed */
 		return LZ4_compress_fast_extState(
 			state, src, dst, *srcSizePtr,
@@ -847,7 +892,7 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
 				maxOutputSize, limitedOutput, byU32,
-				withPrefix64k, dictSmall,	acceleration);
+				withPrefix64k, dictSmall, acceleration);
 		} else {
 			result = LZ4_compress_generic(
 				streamPtr, source, dest, inputSize,
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index a7731ba..3bfc2f6 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -49,8 +49,8 @@
  * Note that it is important this generic function is really inlined,
  * in order to remove useless branches during compilation optimization.
  */
-static inline int LZ4_decompress_generic(
-	 const char *const source,
+static FORCE_INLINE int LZ4_decompress_generic(
+	 const char * const source,
 	 char * const dest,
 	 int inputSize,
 		/*
@@ -180,22 +180,28 @@ static inline int LZ4_decompress_generic(
 					goto _output_error;
 				}
 			}
+
 			memcpy(op, ip, length);
 			ip += length;
 			op += length;
 			/* Necessarily EOF, due to parsing restrictions */
 			break;
 		}
+
 		LZ4_wildCopy(op, ip, cpy);
-		ip += length; op = cpy;
+		ip += length;
+		op = cpy;

 		/* get offset */
-		offset = LZ4_readLE16(ip); ip += 2;
+		offset = LZ4_readLE16(ip);
+		ip += 2;
 		match = op - offset;
+
 		if ((checkOffset) && (unlikely(match < lowLimit))) {
 			/* Error : offset outside buffers */
 			goto _output_error;
 		}
+
 		/* costs ~1%; silence an msan warning when offset == 0 */
 		LZ4_write32(op, (U32)offset);

@@ -205,11 +211,14 @@ static inline int LZ4_decompress_generic(
 			unsigned int s;

 			do {
-			s = *ip++;
-			if ((endOnInput) && (ip > iend - LASTLITERALS))
-				goto _output_error;
-			length += s;
+				s = *ip++;
+
+				if ((endOnInput) && (ip > iend - LASTLITERALS))
+					goto _output_error;
+
+				length += s;
 			} while (s == 255);
+
 			if ((safeDecode)
 				&& unlikely(
 					(size_t)(op + length) < (size_t)op)) {
@@ -217,6 +226,7 @@ static inline int LZ4_decompress_generic(
 				goto _output_error;
 			}
 		}
+
 		length += MINMATCH;

 		/* check external dictionary */
@@ -227,12 +237,13 @@ static inline int LZ4_decompress_generic(
 			}

 			if (length <= (size_t)(lowPrefix - match)) {
-			/*
-			 * match can be copied as a single segment
-			 * from external dictionary
-			 */
-			memmove(op, dictEnd - (lowPrefix - match), length);
-			op += length;
+				/*
+				 * match can be copied as a single segment
+				 * from external dictionary
+				 */
+				memmove(op, dictEnd - (lowPrefix - match),
+					length);
+				op += length;
 			} else {
 				/*
 				 * match encompass external
@@ -256,11 +267,13 @@ static inline int LZ4_decompress_generic(
 					op += restSize;
 				}
 			}
+
 			continue;
 		}

 		/* copy match within block */
 		cpy = op + length;
+
 		if (unlikely(offset < 8)) {
 			const int dec64 = dec64table[offset];

@@ -272,7 +285,8 @@ static inline int LZ4_decompress_generic(
 			memcpy(op + 4, match, 4);
 			match -= dec64;
 		} else {
-			LZ4_copy8(op, match); match += 8;
+			LZ4_copy8(op, match);
+			match += 8;
 		}

 		op += 8;
@@ -287,18 +301,22 @@ static inline int LZ4_decompress_generic(
 				 */
 				goto _output_error;
 			}
+
 			if (op < oCopyLimit) {
 				LZ4_wildCopy(op, match, oCopyLimit);
 				match += oCopyLimit - op;
 				op = oCopyLimit;
 			}
+
 			while (op < cpy)
 				*op++ = *match++;
 		} else {
 			LZ4_copy8(op, match);
+
 			if (length > 16)
 				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
+
 		op = cpy; /* correction */
 	}

@@ -438,7 +456,7 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
  * These decoding functions work the same as "_continue" ones,
  * the dictionary must be explicitly provided within parameters
  */
-static inline int LZ4_decompress_usingDict_generic(const char *source,
+static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
 	char *dest, int compressedSize, int maxOutputSize, int safe,
 	const char *dictStart, int dictSize)
 {
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index 23e1a1b..47ef42b 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -38,14 +38,7 @@
 #include <asm/unaligned.h>
 #include <linux/string.h>	 /* memset, memcpy */

-/*
- * Detects 64 bits mode
-*/
-#if defined(CONFIG_64BIT)
-#define LZ4_ARCH64 1
-#else
-#define LZ4_ARCH64 0
-#endif
+#define FORCE_INLINE __always_inline

 /*-************************************
  *	Basic Types
@@ -60,14 +53,38 @@ typedef uint64_t U64;
 typedef uintptr_t uptrval;

 /*-************************************
+ *	Architecture specifics
+ **************************************/
+#if defined(CONFIG_64BIT)
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+#if defined(__LITTLE_ENDIAN)
+#define LZ4_LITTLE_ENDIAN 1
+#else
+#define LZ4_LITTLE_ENDIAN 0
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system
+ * does not support hardware bit count
+ */
+/* #define LZ4_FORCE_SW_BITCOUNT */
+
+/*-************************************
  *	Constants
  **************************************/
 #define MINMATCH 4

 #define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
-static const int LZ4_minLength = (MFLIMIT+1);
+#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
+
+/* Increase this value ==> compression run slower on incompressible data */
+#define LZ4_SKIPTRIGGER 6

 #define KB (1<<10)
 #define MB (1<<20)
@@ -82,53 +99,42 @@ static const int LZ4_minLength = (MFLIMIT+1);
 #define RUN_BITS (8-ML_BITS)
 #define RUN_MASK ((1U<<RUN_BITS)-1)

-static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
-static const U32 LZ4_skipTrigger = 6;
-
 /*-************************************
  *	Reading and writing into memory
  **************************************/
+typedef union {
+	U16 u16;
+	U32 u32;
+	size_t uArch;
+} __packed unalign;

-static inline U16 LZ4_read16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_read16(const void *ptr)
 {
-	U16 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u16;
 }

-static inline U32 LZ4_read32(const void *memPtr)
+static FORCE_INLINE __maybe_unused U32 LZ4_read32(const void *ptr)
 {
-	U32 val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->u32;
 }

-static inline size_t LZ4_read_ARCH(const void *memPtr)
+static FORCE_INLINE __maybe_unused size_t LZ4_read_ARCH(const void *ptr)
 {
-	size_t val;
-
-	memcpy(&val, memPtr, sizeof(val));
-
-	return val;
+	return ((const unalign *)ptr)->uArch;
 }

-static inline void LZ4_write16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_write16(void *memPtr, U16 value)
 {
-	memcpy(memPtr, &value, sizeof(value));
+	((unalign *)memPtr)->u16 = value;
 }

-static inline void LZ4_write32(void *memPtr, U32 value)
-{
-	memcpy(memPtr, &value, sizeof(value));
+static FORCE_INLINE __maybe_unused void LZ4_write32(void *memPtr, U32 value) {
+	((unalign *)memPtr)->u32 = value;
 }

-static inline U16 LZ4_readLE16(const void *memPtr)
+static FORCE_INLINE __maybe_unused U16 LZ4_readLE16(const void *memPtr)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	return LZ4_read16(memPtr);
 #else
 	const BYTE *p = (const BYTE *)memPtr;
@@ -137,19 +143,19 @@ static inline U16 LZ4_readLE16(const void *memPtr)
 #endif
 }

-static inline void LZ4_writeLE16(void *memPtr, U16 value)
+static FORCE_INLINE __maybe_unused void LZ4_writeLE16(void *memPtr, U16 value)
 {
-#ifdef __LITTLE_ENDIAN__
+#if LZ4_LITTLE_ENDIAN
 	LZ4_write16(memPtr, value);
 #else
 	BYTE *p = (BYTE *)memPtr;

 	p[0] = (BYTE) value;
-	p[1] = (BYTE)(value>>8);
+	p[1] = (BYTE)(value >> 8);
 #endif
 }

-static inline void LZ4_copy8(void *dst, const void *src)
+static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
 {
 	memcpy(dst, src, 8);
 }
@@ -158,7 +164,8 @@ static inline void LZ4_copy8(void *dst, const void *src)
  * customized variant of memcpy,
  * which can overwrite up to 7 bytes beyond dstEnd
  */
-static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
+	const void *srcPtr, void *dstEnd)
 {
 	BYTE *d = (BYTE *)dstPtr;
 	const BYTE *s = (const BYTE *)srcPtr;
@@ -171,49 +178,121 @@ static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
 	} while (d < e);
 }

-#if LZ4_ARCH64
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
+{
+#if LZ4_LITTLE_ENDIAN
+#if LZ4_ARCH64 /* 64 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[64] = {
+		0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
+		0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
+		7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
+		7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
+	};
+
+	return DeBruijnBytePos[((U64)((val & -(long long)val)
+		* 0x0218A392CDABBD3FULL)) >> 58];
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
-#endif
+	return (__builtin_ctzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Little Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	static const int DeBruijnBytePos[32] = {
+		0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
+		3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1
+	};
+
+	return DeBruijnBytePos[((U32)((val & -(S32)val)
+		* 0x077CB531U)) >> 27];
 #else
-#ifdef __BIG_ENDIAN__
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
+	return (__builtin_ctz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#else /* Big Endian */
+#if LZ4_ARCH64 /* 64 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 32)) {
+		r = 4;
+	} else {
+		r = 0;
+		val >>= 32;
+	}
+
+	if (!(val >> 16)) {
+		r += 2;
+		val >>= 8;
+	} else {
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
-#endif
-#endif
+	return (__builtin_clzll((U64)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#else /* 32 Bits Big Endian */
+#if defined(LZ4_FORCE_SW_BITCOUNT)
+	unsigned int r;
+
+	if (!(val >> 16)) {
+		r = 2;
+		val >>= 8;
+	} else {
+		r = 0;
+		val >>= 24;
+	}
+
+	r += (!val);
+
+	return r;
+#else
+	return (__builtin_clz((U32)val) >> 3);
+#endif /* defined(LZ4_FORCE_SW_BITCOUNT) */
+#endif /* LZ4_ARCH64 */
+#endif /* LZ4_LITTLE_ENDIAN */
+}

-static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+static FORCE_INLINE __maybe_unused unsigned int LZ4_count(
+	const BYTE *pIn,
+	const BYTE *pMatch,
 	const BYTE *pInLimit)
 {
 	const BYTE *const pStart = pIn;

-	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
-		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+	while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
+		size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);

 		if (!diff) {
 			pIn += STEPSIZE;
 			pMatch += STEPSIZE;
 			continue;
 		}
-		pIn += LZ4_NBCOMMONBYTES(diff);
+
+		pIn += LZ4_NbCommonBytes(diff);
+
 		return (unsigned int)(pIn - pStart);
 	}

-#ifdef LZ4_ARCH64
-	if ((pIn < (pInLimit-3))
+#if LZ4_ARCH64
+	if ((pIn < (pInLimit - 3))
 		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
-		pIn += 4; pMatch += 4;
+		pIn += 4;
+		pMatch += 4;
 	}
 #endif
-	if ((pIn < (pInLimit-1))
+
+	if ((pIn < (pInLimit - 1))
 		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
-		pIn += 2; pMatch += 2;
+		pIn += 2;
+		pMatch += 2;
 	}
+
 	if ((pIn < pInLimit) && (*pMatch == *pIn))
 		pIn++;
+
 	return (unsigned int)(pIn - pStart);
 }

diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index 8363292..c7271a1 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -71,7 +71,7 @@ static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
 }

 /* Update chains up to ip (excluded) */
-static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	const BYTE *ip)
 {
 	U16 * const chainTable = hc4->chainTable;
@@ -96,7 +96,7 @@ static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
 	hc4->nextToUpdate = target;
 }

-static inline int LZ4HC_InsertAndFindBestMatch(
+static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
 	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
 	const BYTE *ip,
 	const BYTE * const iLimit,
@@ -165,7 +165,7 @@ static inline int LZ4HC_InsertAndFindBestMatch(
 	return (int)ml;
 }

-static inline int LZ4HC_InsertAndGetWiderMatch(
+static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
 	LZ4HC_CCtx_internal *hc4,
 	const BYTE * const ip,
 	const BYTE * const iLowLimit,
@@ -259,7 +259,7 @@ static inline int LZ4HC_InsertAndGetWiderMatch(
 	return longest;
 }

-static inline int LZ4HC_encodeSequence(
+static FORCE_INLINE int LZ4HC_encodeSequence(
 	const BYTE **ip,
 	BYTE **op,
 	const BYTE **anchor,
--
2.1.4

  reply	other threads:[~2017-02-12 11:17 UTC|newest]

Thread overview: 104+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-20 18:53 [PATCH 0/3] Update LZ4 compressor module Sven Schmidt
2016-12-20 18:53 ` [PATCH 1/3] crypto: Change lz4 modules to work with new lz4 compressor module version Sven Schmidt
2016-12-21  5:25   ` kbuild test robot
2016-12-22 13:25   ` Sergey Senozhatsky
2016-12-20 18:53 ` [PATCH 2/3] fs/pstore: fs/squashfs: Change lz4 compressor functions to work with new version Sven Schmidt
2016-12-21  5:16   ` kbuild test robot
2016-12-21  5:18   ` kbuild test robot
2016-12-22 13:21   ` Sergey Senozhatsky
2016-12-22 15:31     ` Sven Schmidt
2016-12-22 15:36       ` Sergey Senozhatsky
2016-12-22 13:27   ` Sergey Senozhatsky
2016-12-20 18:53 ` [PATCH 3/3] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
2016-12-20 19:52   ` Joe Perches
2016-12-21 20:04     ` Sven Schmidt
2016-12-22 17:31       ` Greg KH
2016-12-22 18:39         ` Sven Schmidt
2016-12-21  7:09   ` kbuild test robot
2016-12-22 17:29 ` [PATCH 0/3] Update LZ4 compressor module Greg KH
2016-12-22 18:35   ` Sven Schmidt
2016-12-23 20:53     ` Greg KH
2017-01-07 16:55 ` [PATCH v2 0/4] " Sven Schmidt
2017-01-07 16:55   ` [PATCH v2 1/4] lib: Update LZ4 compressor module based on LZ4 v1.7.2 Sven Schmidt
2017-01-08 11:22     ` Greg KH
2017-01-10  9:32       ` Sven Schmidt
2017-01-10  9:59         ` Greg KH
2017-01-08 11:25     ` Greg KH
2017-01-08 11:33       ` Rui Salvaterra
2017-01-10  9:21       ` Sven Schmidt
2017-01-10 10:00         ` Greg KH
2017-01-10 10:50           ` Willy Tarreau
2017-01-07 16:55   ` [PATCH v2 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-08 11:23     ` Greg KH
2017-01-07 16:55   ` [PATCH v2 3/4] crypto: Change LZ4 modules " Sven Schmidt
2017-01-07 16:55   ` [PATCH v2 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to comply " Sven Schmidt
2017-01-07 21:33     ` Kees Cook
2017-01-10  9:45       ` Sven Schmidt
2017-01-21 15:09 ` [PATCH v3 0/4] Update LZ4 compressor module Sven Schmidt
2017-01-21 15:09   ` [PATCH 1/4] lib: " Sven Schmidt
2017-01-21 15:56     ` kbuild test robot
2017-01-21 16:16     ` kbuild test robot
2017-01-21 17:38     ` kbuild test robot
2017-01-22 11:05     ` Greg KH
2017-01-21 15:09   ` [PATCH 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-21 15:09   ` [PATCH 3/4] crypto: Change LZ4 modules " Sven Schmidt
2017-01-21 15:09   ` [PATCH 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-22 19:35 ` [PATCH v4 0/4] Update LZ4 compressor module Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 1/4] lib: " Sven Schmidt
2017-01-24  0:23     ` Andrew Morton
2017-01-24 16:48       ` Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 3/4] crypto: Change LZ4 modules " Sven Schmidt
2017-01-22 19:35   ` [PATCH v4 4/4] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-26  7:57 ` [PATCH v5 0/5] Update LZ4 compressor module Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 1/5] lib: " Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-26  7:57   ` [PATCH v5 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
2017-01-26  9:19   ` [PATCH v5 0/5] Update LZ4 compressor module Eric Biggers
2017-01-26 14:15     ` Sven Schmidt
2017-01-27 22:01 ` [PATCH v6 " Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 1/5] lib: " Sven Schmidt
2017-01-31 22:27     ` Jonathan Corbet
2017-02-01 20:18       ` Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-01-27 22:02   ` [PATCH v6 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
2017-02-05 19:09 ` [PATCH v7 0/5] Update LZ4 compressor module Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 1/5] lib: " Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-02-05 19:09   ` [PATCH v7 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt
2017-02-08 23:31   ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
2017-02-09  0:24     ` Eric Biggers
2017-02-09  5:24       ` Eric Biggers
2017-02-09 11:05         ` Sven Schmidt
2017-02-09 18:20           ` Eric Biggers
2017-02-10  0:14         ` Minchan Kim
2017-02-09 11:02       ` Sven Schmidt
2017-02-09 18:29         ` Eric Biggers
2017-02-10  3:57           ` David Miller
2017-02-09 10:56     ` Sven Schmidt
2017-02-10  0:13       ` Minchan Kim
2017-02-12 11:16         ` Sven Schmidt
2017-02-12 11:16           ` Sven Schmidt [this message]
2017-02-12 11:16             ` [PATCH] lz4: fix performance regressions Sven Schmidt
2017-02-12 13:05             ` Willy Tarreau
2017-02-12 15:20               ` Sven Schmidt
2017-02-12 21:41                 ` Willy Tarreau
2017-02-13 11:53                   ` Sven Schmidt
2017-02-13 13:37                     ` Willy Tarreau
2017-02-12 23:38             ` Eric Biggers
2017-02-14 10:33               ` Sven Schmidt
2017-02-13  0:03           ` [PATCH v7 0/5] Update LZ4 compressor module Minchan Kim
2017-02-13 12:08             ` Sven Schmidt
2017-02-15  7:29               ` Minchan Kim
2017-02-15 18:16 ` [PATCH v8 " Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 1/5] lib: " Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 3/5] crypto: Change LZ4 modules " Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version Sven Schmidt
2017-02-15 18:16   ` [PATCH v8 5/5] lib/lz4: Remove back-compat wrappers Sven Schmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1486898178-17125-2-git-send-email-4sschmid@informatik.uni-hamburg.de \
    --to=4sschmid@informatik.uni-hamburg.de \
    --cc=akpm@linux-foundation.org \
    --cc=anton@enomsg.org \
    --cc=bongkyu.kim@lge.com \
    --cc=ccross@android.com \
    --cc=davem@davemloft.net \
    --cc=ebiggers3@gmail.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=keescook@chromium.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=minchan@kernel.org \
    --cc=rsalvaterra@gmail.com \
    --cc=sergey.senozhatsky@gmail.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.