All of lore.kernel.org
 help / color / mirror / Atom feed
* Reiser4 und LZO compression
@ 2006-08-27  0:34 Alexey Dobriyan
  2006-08-27  8:04 ` Andrew Morton
  2006-08-28 17:06 ` Hans Reiser
  0 siblings, 2 replies; 47+ messages in thread
From: Alexey Dobriyan @ 2006-08-27  0:34 UTC (permalink / raw)
  To: reiserfs-list; +Cc: linux-kernel, Andrew Morton

Reiser4 developers, Andrew,

The patch below is so-called reiser4 LZO compression plugin as extracted
from 2.6.18-rc4-mm3.

I think it is an unauditable piece of shit and thus should not enter
mainline.

--- /dev/null	2006-06-20 03:34:40.298038750 -0700
+++ devel/fs/reiser4/plugin/compress/lzoconf.h	2006-08-26 15:39:12.000000000 -0700
@@ -0,0 +1,420 @@
+/* lzoconf.h -- configuration for the LZO real-time data compression library
+   adopted for reiser4 compression transform plugin.
+
+   This file is part of the LZO real-time data compression library
+   and not included in any proprietary licenses of reiser4.
+
+   Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
+   All Rights Reserved.
+
+   The LZO library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of
+   the License, or (at your option) any later version.
+
+   The LZO library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with the LZO library; see the file COPYING.
+   If not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   Markus F.X.J. Oberhumer
+   <markus@oberhumer.com>
+   http://www.oberhumer.com/opensource/lzo/
+ */
+
+#include <linux/kernel.h>	/* for UINT_MAX, ULONG_MAX - edward */
+
+#ifndef __LZOCONF_H
+#define __LZOCONF_H
+
+#define LZO_VERSION             0x1080
+#define LZO_VERSION_STRING      "1.08"
+#define LZO_VERSION_DATE        "Jul 12 2002"
+
+/* internal Autoconf configuration file - only used when building LZO */
+#if defined(LZO_HAVE_CONFIG_H)
+#  include <config.h>
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***********************************************************************
+// LZO requires a conforming <limits.h>
+************************************************************************/
+
+#define CHAR_BIT  8
+#define USHRT_MAX 0xffff
+
+/* workaround a cpp bug under hpux 10.20 */
+#define LZO_0xffffffffL         4294967295ul
+
+/***********************************************************************
+// architecture defines
+************************************************************************/
+
+#if !defined(__LZO_WIN) && !defined(__LZO_DOS) && !defined(__LZO_OS2)
+#  if defined(__WINDOWS__) || defined(_WINDOWS) || defined(_Windows)
+#    define __LZO_WIN
+#  elif defined(__WIN32__) || defined(_WIN32) || defined(WIN32)
+#    define __LZO_WIN
+#  elif defined(__NT__) || defined(__NT_DLL__) || defined(__WINDOWS_386__)
+#    define __LZO_WIN
+#  elif defined(__DOS__) || defined(__MSDOS__) || defined(MSDOS)
+#    define __LZO_DOS
+#  elif defined(__OS2__) || defined(__OS2V2__) || defined(OS2)
+#    define __LZO_OS2
+#  elif defined(__palmos__)
+#    define __LZO_PALMOS
+#  elif defined(__TOS__) || defined(__atarist__)
+#    define __LZO_TOS
+#  endif
+#endif
+
+#if (UINT_MAX < LZO_0xffffffffL)
+#  if defined(__LZO_WIN)
+#    define __LZO_WIN16
+#  elif defined(__LZO_DOS)
+#    define __LZO_DOS16
+#  elif defined(__LZO_PALMOS)
+#    define __LZO_PALMOS16
+#  elif defined(__LZO_TOS)
+#    define __LZO_TOS16
+#  elif defined(__C166__)
+#  else
+	/* porting hint: for pure 16-bit architectures try compiling
+	 * everything with -D__LZO_STRICT_16BIT */
+#    error "16-bit target not supported - contact me for porting hints"
+#  endif
+#endif
+
+#if !defined(__LZO_i386)
+#  if defined(__LZO_DOS) || defined(__LZO_WIN16)
+#    define __LZO_i386
+#  elif defined(__i386__) || defined(__386__) || defined(_M_IX86)
+#    define __LZO_i386
+#  endif
+#endif
+
+#if defined(__LZO_STRICT_16BIT)
+#  if (UINT_MAX < LZO_0xffffffffL)
+#    include <lzo16bit.h>
+#  endif
+#endif
+
+/* memory checkers */
+#if !defined(__LZO_CHECKER)
+#  if defined(__BOUNDS_CHECKING_ON)
+#    define __LZO_CHECKER
+#  elif defined(__CHECKER__)
+#    define __LZO_CHECKER
+#  elif defined(__INSURE__)
+#    define __LZO_CHECKER
+#  elif defined(__PURIFY__)
+#    define __LZO_CHECKER
+#  endif
+#endif
+
+/***********************************************************************
+// integral and pointer types
+************************************************************************/
+
+/* Integral types with 32 bits or more */
+#if !defined(LZO_UINT32_MAX)
+#  if (UINT_MAX >= LZO_0xffffffffL)
+	typedef unsigned int lzo_uint32;
+	typedef int lzo_int32;
+#    define LZO_UINT32_MAX      UINT_MAX
+#    define LZO_INT32_MAX       INT_MAX
+#    define LZO_INT32_MIN       INT_MIN
+#  elif (ULONG_MAX >= LZO_0xffffffffL)
+	typedef unsigned long lzo_uint32;
+	typedef long lzo_int32;
+#    define LZO_UINT32_MAX      ULONG_MAX
+#    define LZO_INT32_MAX       LONG_MAX
+#    define LZO_INT32_MIN       LONG_MIN
+#  else
+#    error "lzo_uint32"
+#  endif
+#endif
+
+/* lzo_uint is used like size_t */
+#if !defined(LZO_UINT_MAX)
+#  if (UINT_MAX >= LZO_0xffffffffL)
+	typedef unsigned int lzo_uint;
+	typedef int lzo_int;
+#    define LZO_UINT_MAX        UINT_MAX
+#    define LZO_INT_MAX         INT_MAX
+#    define LZO_INT_MIN         INT_MIN
+#  elif (ULONG_MAX >= LZO_0xffffffffL)
+	typedef unsigned long lzo_uint;
+	typedef long lzo_int;
+#    define LZO_UINT_MAX        ULONG_MAX
+#    define LZO_INT_MAX         LONG_MAX
+#    define LZO_INT_MIN         LONG_MIN
+#  else
+#    error "lzo_uint"
+#  endif
+#endif
+
+	typedef int lzo_bool;
+
+/***********************************************************************
+// memory models
+************************************************************************/
+
+/* Memory model for the public code segment. */
+#if !defined(__LZO_CMODEL)
+#  if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#    define __LZO_CMODEL        __far
+#  elif defined(__LZO_i386) && defined(__WATCOMC__)
+#    define __LZO_CMODEL        __near
+#  else
+#    define __LZO_CMODEL
+#  endif
+#endif
+
+/* Memory model for the public data segment. */
+#if !defined(__LZO_DMODEL)
+#  if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#    define __LZO_DMODEL        __far
+#  elif defined(__LZO_i386) && defined(__WATCOMC__)
+#    define __LZO_DMODEL        __near
+#  else
+#    define __LZO_DMODEL
+#  endif
+#endif
+
+/* Memory model that allows to access memory at offsets of lzo_uint. */
+#if !defined(__LZO_MMODEL)
+#  if (LZO_UINT_MAX <= UINT_MAX)
+#    define __LZO_MMODEL
+#  elif defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#    define __LZO_MMODEL        __huge
+#    define LZO_999_UNSUPPORTED
+#  elif defined(__LZO_PALMOS16) || defined(__LZO_TOS16)
+#    define __LZO_MMODEL
+#  else
+#    error "__LZO_MMODEL"
+#  endif
+#endif
+
+/* no typedef here because of const-pointer issues */
+#define lzo_byte                unsigned char __LZO_MMODEL
+#define lzo_bytep               unsigned char __LZO_MMODEL *
+#define lzo_charp               char __LZO_MMODEL *
+#define lzo_voidp               void __LZO_MMODEL *
+#define lzo_shortp              short __LZO_MMODEL *
+#define lzo_ushortp             unsigned short __LZO_MMODEL *
+#define lzo_uint32p             lzo_uint32 __LZO_MMODEL *
+#define lzo_int32p              lzo_int32 __LZO_MMODEL *
+#define lzo_uintp               lzo_uint __LZO_MMODEL *
+#define lzo_intp                lzo_int __LZO_MMODEL *
+#define lzo_voidpp              lzo_voidp __LZO_MMODEL *
+#define lzo_bytepp              lzo_bytep __LZO_MMODEL *
+
+#ifndef lzo_sizeof_dict_t
+#  define lzo_sizeof_dict_t     sizeof(lzo_bytep)
+#endif
+
+/***********************************************************************
+// calling conventions and function types
+************************************************************************/
+
+/* linkage */
+#if !defined(__LZO_EXTERN_C)
+#  ifdef __cplusplus
+#    define __LZO_EXTERN_C      extern "C"
+#  else
+#    define __LZO_EXTERN_C      extern
+#  endif
+#endif
+
+/* calling convention */
+#if !defined(__LZO_CDECL)
+#  if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#    define __LZO_CDECL         __LZO_CMODEL __cdecl
+#  elif defined(__LZO_i386) && defined(_MSC_VER)
+#    define __LZO_CDECL         __LZO_CMODEL __cdecl
+#  elif defined(__LZO_i386) && defined(__WATCOMC__)
+#    define __LZO_CDECL         __LZO_CMODEL __cdecl
+#  else
+#    define __LZO_CDECL         __LZO_CMODEL
+#  endif
+#endif
+#if !defined(__LZO_ENTRY)
+#  define __LZO_ENTRY           __LZO_CDECL
+#endif
+
+/* C++ exception specification for extern "C" function types */
+#if !defined(__cplusplus)
+#  undef LZO_NOTHROW
+#  define LZO_NOTHROW
+#elif !defined(LZO_NOTHROW)
+#  define LZO_NOTHROW
+#endif
+
+	typedef int
+	 (__LZO_ENTRY * lzo_compress_t) (const lzo_byte * src, lzo_uint src_len,
+					 lzo_byte * dst, lzo_uintp dst_len,
+					 lzo_voidp wrkmem);
+
+	typedef int
+	 (__LZO_ENTRY * lzo_decompress_t) (const lzo_byte * src,
+					   lzo_uint src_len, lzo_byte * dst,
+					   lzo_uintp dst_len, lzo_voidp wrkmem);
+
+	typedef int
+	 (__LZO_ENTRY * lzo_optimize_t) (lzo_byte * src, lzo_uint src_len,
+					 lzo_byte * dst, lzo_uintp dst_len,
+					 lzo_voidp wrkmem);
+
+	typedef int
+	 (__LZO_ENTRY * lzo_compress_dict_t) (const lzo_byte * src,
+					      lzo_uint src_len, lzo_byte * dst,
+					      lzo_uintp dst_len,
+					      lzo_voidp wrkmem,
+					      const lzo_byte * dict,
+					      lzo_uint dict_len);
+
+	typedef int
+	 (__LZO_ENTRY * lzo_decompress_dict_t) (const lzo_byte * src,
+						lzo_uint src_len,
+						lzo_byte * dst,
+						lzo_uintp dst_len,
+						lzo_voidp wrkmem,
+						const lzo_byte * dict,
+						lzo_uint dict_len);
+
+/* assembler versions always use __cdecl */
+	typedef int
+	 (__LZO_CDECL * lzo_compress_asm_t) (const lzo_byte * src,
+					     lzo_uint src_len, lzo_byte * dst,
+					     lzo_uintp dst_len,
+					     lzo_voidp wrkmem);
+
+	typedef int
+	 (__LZO_CDECL * lzo_decompress_asm_t) (const lzo_byte * src,
+					       lzo_uint src_len, lzo_byte * dst,
+					       lzo_uintp dst_len,
+					       lzo_voidp wrkmem);
+
+/* a progress indicator callback function */
+	typedef void (__LZO_ENTRY * lzo_progress_callback_t) (lzo_uint,
+							      lzo_uint);
+
+/***********************************************************************
+// export information
+************************************************************************/
+
+/* DLL export information */
+#if !defined(__LZO_EXPORT1)
+#  define __LZO_EXPORT1
+#endif
+#if !defined(__LZO_EXPORT2)
+#  define __LZO_EXPORT2
+#endif
+
+/* exported calling convention for C functions */
+#if !defined(LZO_PUBLIC)
+#  define LZO_PUBLIC(_rettype) \
+                __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_ENTRY
+#endif
+#if !defined(LZO_EXTERN)
+#  define LZO_EXTERN(_rettype)          __LZO_EXTERN_C LZO_PUBLIC(_rettype)
+#endif
+#if !defined(LZO_PRIVATE)
+#  define LZO_PRIVATE(_rettype)         static _rettype __LZO_ENTRY
+#endif
+
+/* exported __cdecl calling convention for assembler functions */
+#if !defined(LZO_PUBLIC_CDECL)
+#  define LZO_PUBLIC_CDECL(_rettype) \
+                __LZO_EXPORT1 _rettype __LZO_EXPORT2 __LZO_CDECL
+#endif
+#if !defined(LZO_EXTERN_CDECL)
+#  define LZO_EXTERN_CDECL(_rettype)    __LZO_EXTERN_C LZO_PUBLIC_CDECL(_rettype)
+#endif
+
+/* exported global variables (LZO currently uses no static variables and
+ * is fully thread safe) */
+#if !defined(LZO_PUBLIC_VAR)
+#  define LZO_PUBLIC_VAR(_type) \
+                __LZO_EXPORT1 _type __LZO_EXPORT2 __LZO_DMODEL
+#endif
+#if !defined(LZO_EXTERN_VAR)
+#  define LZO_EXTERN_VAR(_type)         extern LZO_PUBLIC_VAR(_type)
+#endif
+
+/***********************************************************************
+// error codes and prototypes
+************************************************************************/
+
+/* Error codes for the compression/decompression functions. Negative
+ * values are errors, positive values will be used for special but
+ * normal events.
+ */
+#define LZO_E_OK                    0
+#define LZO_E_ERROR                 (-1)
+#define LZO_E_OUT_OF_MEMORY         (-2)	/* not used right now */
+#define LZO_E_NOT_COMPRESSIBLE      (-3)	/* not used right now */
+#define LZO_E_INPUT_OVERRUN         (-4)
+#define LZO_E_OUTPUT_OVERRUN        (-5)
+#define LZO_E_LOOKBEHIND_OVERRUN    (-6)
+#define LZO_E_EOF_NOT_FOUND         (-7)
+#define LZO_E_INPUT_NOT_CONSUMED    (-8)
+
+/* lzo_init() should be the first function you call.
+ * Check the return code !
+ *
+ * lzo_init() is a macro to allow checking that the library and the
+ * compiler's view of various types are consistent.
+ */
+#define lzo_init() __lzo_init2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\
+    (int)sizeof(long),(int)sizeof(lzo_uint32),(int)sizeof(lzo_uint),\
+    (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\
+    (int)sizeof(lzo_compress_t))
+	 LZO_EXTERN(int) __lzo_init2(unsigned, int, int, int, int, int, int,
+				     int, int, int);
+
+/* checksum functions */
+	 LZO_EXTERN(lzo_uint32)
+	 lzo_crc32(lzo_uint32 _c, const lzo_byte * _buf, lzo_uint _len);
+
+/* misc. */
+	typedef union {
+		lzo_bytep p;
+		lzo_uint u;
+	} __lzo_pu_u;
+	typedef union {
+		lzo_bytep p;
+		lzo_uint32 u32;
+	} __lzo_pu32_u;
+	typedef union {
+		void *vp;
+		lzo_bytep bp;
+		lzo_uint32 u32;
+		long l;
+	} lzo_align_t;
+
+#define LZO_PTR_ALIGN_UP(_ptr,_size) \
+    ((_ptr) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(_ptr),(lzo_uint)(_size)))
+
+/* deprecated - only for backward compatibility */
+#define LZO_ALIGN(_ptr,_size) LZO_PTR_ALIGN_UP(_ptr,_size)
+
+#ifdef __cplusplus
+}				/* extern "C" */
+#endif
+#endif				/* already included */
--- /dev/null	2006-06-20 03:34:40.298038750 -0700
+++ devel/fs/reiser4/plugin/compress/minilzo.c	2006-08-26 15:39:12.000000000 -0700
@@ -0,0 +1,2155 @@
+/* minilzo.c -- mini subset of the LZO real-time data compression library
+   adopted for reiser4 compression transform plugin.
+
+   This file is part of the LZO real-time data compression library
+   and not included in any proprietary licenses of reiser4.
+
+   Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
+   Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
+   All Rights Reserved.
+
+   The LZO library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of
+   the License, or (at your option) any later version.
+
+   The LZO library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with the LZO library; see the file COPYING.
+   If not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   Markus F.X.J. Oberhumer
+   <markus@oberhumer.com>
+   http://www.oberhumer.com/opensource/lzo/
+ */
+
+/*
+ * NOTE:
+ *   the full LZO package can be found at
+ *   http://www.oberhumer.com/opensource/lzo/
+ */
+
+#include "../../debug.h"	/* for reiser4 assert macro -edward */
+
+#define __LZO_IN_MINILZO
+#define LZO_BUILD
+
+#ifdef MINILZO_HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#undef LZO_HAVE_CONFIG_H
+#include "minilzo.h"
+
+#if !defined(MINILZO_VERSION) || (MINILZO_VERSION != 0x1080)
+#  error "version mismatch in miniLZO source files"
+#endif
+
+#ifdef MINILZO_HAVE_CONFIG_H
+#  define LZO_HAVE_CONFIG_H
+#endif
+
+
+#ifndef __LZO_CONF_H
+#define __LZO_CONF_H
+
+#if !defined(__LZO_IN_MINILZO)
+#  ifndef __LZOCONF_H
+#    include <lzoconf.h>
+#  endif
+#endif
+
+#if defined(__BOUNDS_CHECKING_ON)
+#  include <unchecked.h>
+#else
+#  define BOUNDS_CHECKING_OFF_DURING(stmt)      stmt
+#  define BOUNDS_CHECKING_OFF_IN_EXPR(expr)     (expr)
+#endif
+
+#  define HAVE_MEMCMP
+#  define HAVE_MEMCPY
+#  define HAVE_MEMMOVE
+#  define HAVE_MEMSET
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#  define HAVE_MALLOC_H
+#  define HAVE_HALLOC
+#endif
+
+#undef NDEBUG
+#if !defined(LZO_DEBUG)
+#  define NDEBUG
+#endif
+#if defined(LZO_DEBUG) || !defined(NDEBUG)
+#  if !defined(NO_STDIO_H)
+#    include <stdio.h>
+#  endif
+#endif
+# if 0				/* edward */
+#include <assert.h>
+#endif				/* edward */
+
+#if !defined(LZO_COMPILE_TIME_ASSERT)
+#  define LZO_COMPILE_TIME_ASSERT(expr) \
+	{ typedef int __lzo_compile_time_assert_fail[1 - 2 * !(expr)]; }
+#endif
+
+#if !defined(LZO_UNUSED)
+#  if 1
+#    define LZO_UNUSED(var)     ((void)&var)
+#  elif 0
+#    define LZO_UNUSED(var)     { typedef int __lzo_unused[sizeof(var) ? 2 : 1]; }
+#  else
+#    define LZO_UNUSED(parm)    (parm = parm)
+#  endif
+#endif
+
+#if !defined(__inline__) && !defined(__GNUC__)
+#  if defined(__cplusplus)
+#    define __inline__      inline
+#  else
+#    define __inline__
+#  endif
+#endif
+
+#if defined(NO_MEMCMP)
+#  undef HAVE_MEMCMP
+#endif
+
+#if !defined(HAVE_MEMSET)
+#  undef memset
+#  define memset    lzo_memset
+#endif
+
+#  define LZO_BYTE(x)       ((unsigned char) ((x) & 0xff))
+
+#define LZO_MAX(a,b)        ((a) >= (b) ? (a) : (b))
+#define LZO_MIN(a,b)        ((a) <= (b) ? (a) : (b))
+#define LZO_MAX3(a,b,c)     ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c))
+#define LZO_MIN3(a,b,c)     ((a) <= (b) ? LZO_MIN(a,c) : LZO_MIN(b,c))
+
+#define lzo_sizeof(type)    ((lzo_uint) (sizeof(type)))
+
+#define LZO_HIGH(array)     ((lzo_uint) (sizeof(array)/sizeof(*(array))))
+
+#define LZO_SIZE(bits)      (1u << (bits))
+#define LZO_MASK(bits)      (LZO_SIZE(bits) - 1)
+
+#define LZO_LSIZE(bits)     (1ul << (bits))
+#define LZO_LMASK(bits)     (LZO_LSIZE(bits) - 1)
+
+#define LZO_USIZE(bits)     ((lzo_uint) 1 << (bits))
+#define LZO_UMASK(bits)     (LZO_USIZE(bits) - 1)
+
+#define LZO_STYPE_MAX(b)    (((1l  << (8*(b)-2)) - 1l)  + (1l  << (8*(b)-2)))
+#define LZO_UTYPE_MAX(b)    (((1ul << (8*(b)-1)) - 1ul) + (1ul << (8*(b)-1)))
+
+#if !defined(SIZEOF_UNSIGNED)
+#  if (UINT_MAX == 0xffff)
+#    define SIZEOF_UNSIGNED         2
+#  elif (UINT_MAX == LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED         4
+#  elif (UINT_MAX >= LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED         8
+#  else
+#    error "SIZEOF_UNSIGNED"
+#  endif
+#endif
+
+#if !defined(SIZEOF_UNSIGNED_LONG)
+#  if (ULONG_MAX == LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED_LONG    4
+#  elif (ULONG_MAX >= LZO_0xffffffffL)
+#    define SIZEOF_UNSIGNED_LONG    8
+#  else
+#    error "SIZEOF_UNSIGNED_LONG"
+#  endif
+#endif
+
+#if !defined(SIZEOF_SIZE_T)
+#  define SIZEOF_SIZE_T             SIZEOF_UNSIGNED
+#endif
+#if !defined(SIZE_T_MAX)
+#  define SIZE_T_MAX                LZO_UTYPE_MAX(SIZEOF_SIZE_T)
+#endif
+
+#if 1 && defined(__LZO_i386) && (UINT_MAX == LZO_0xffffffffL)
+#  if !defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX == 0xffff)
+#    define LZO_UNALIGNED_OK_2
+#  endif
+#  if !defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX == LZO_0xffffffffL)
+#    define LZO_UNALIGNED_OK_4
+#  endif
+#endif
+
+#if defined(LZO_UNALIGNED_OK_2) || defined(LZO_UNALIGNED_OK_4)
+#  if !defined(LZO_UNALIGNED_OK)
+#    define LZO_UNALIGNED_OK
+#  endif
+#endif
+
+#if defined(__LZO_NO_UNALIGNED)
+#  undef LZO_UNALIGNED_OK
+#  undef LZO_UNALIGNED_OK_2
+#  undef LZO_UNALIGNED_OK_4
+#endif
+
+#if defined(LZO_UNALIGNED_OK_2) && (USHRT_MAX != 0xffff)
+#  error "LZO_UNALIGNED_OK_2 must not be defined on this system"
+#endif
+#if defined(LZO_UNALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
+#  error "LZO_UNALIGNED_OK_4 must not be defined on this system"
+#endif
+
+#if defined(__LZO_NO_ALIGNED)
+#  undef LZO_ALIGNED_OK_4
+#endif
+
+#if defined(LZO_ALIGNED_OK_4) && (LZO_UINT32_MAX != LZO_0xffffffffL)
+#  error "LZO_ALIGNED_OK_4 must not be defined on this system"
+#endif
+
+#define LZO_LITTLE_ENDIAN       1234
+#define LZO_BIG_ENDIAN          4321
+#define LZO_PDP_ENDIAN          3412
+
+#if !defined(LZO_BYTE_ORDER)
+#  if defined(MFX_BYTE_ORDER)
+#    define LZO_BYTE_ORDER      MFX_BYTE_ORDER
+#  elif defined(__LZO_i386)
+#    define LZO_BYTE_ORDER      LZO_LITTLE_ENDIAN
+#  elif defined(BYTE_ORDER)
+#    define LZO_BYTE_ORDER      BYTE_ORDER
+#  elif defined(__BYTE_ORDER)
+#    define LZO_BYTE_ORDER      __BYTE_ORDER
+#  endif
+#endif
+
+#if defined(LZO_BYTE_ORDER)
+#  if (LZO_BYTE_ORDER != LZO_LITTLE_ENDIAN) && \
+      (LZO_BYTE_ORDER != LZO_BIG_ENDIAN)
+#    error "invalid LZO_BYTE_ORDER"
+#  endif
+#endif
+
+#if defined(LZO_UNALIGNED_OK) && !defined(LZO_BYTE_ORDER)
+#  error "LZO_BYTE_ORDER is not defined"
+#endif
+
+#define LZO_OPTIMIZE_GNUC_i386_IS_BUGGY
+
+#if defined(NDEBUG) && !defined(LZO_DEBUG) && !defined(__LZO_CHECKER)
+#  if defined(__GNUC__) && defined(__i386__)
+#    if !defined(LZO_OPTIMIZE_GNUC_i386_IS_BUGGY)
+#      define LZO_OPTIMIZE_GNUC_i386
+#    endif
+#  endif
+#endif
+
+__LZO_EXTERN_C const lzo_uint32 _lzo_crc32_table[256];
+
+#define _LZO_STRINGIZE(x)           #x
+#define _LZO_MEXPAND(x)             _LZO_STRINGIZE(x)
+
+#define _LZO_CONCAT2(a,b)           a ## b
+#define _LZO_CONCAT3(a,b,c)         a ## b ## c
+#define _LZO_CONCAT4(a,b,c,d)       a ## b ## c ## d
+#define _LZO_CONCAT5(a,b,c,d,e)     a ## b ## c ## d ## e
+
+#define _LZO_ECONCAT2(a,b)          _LZO_CONCAT2(a,b)
+#define _LZO_ECONCAT3(a,b,c)        _LZO_CONCAT3(a,b,c)
+#define _LZO_ECONCAT4(a,b,c,d)      _LZO_CONCAT4(a,b,c,d)
+#define _LZO_ECONCAT5(a,b,c,d,e)    _LZO_CONCAT5(a,b,c,d,e)
+
+#ifndef __LZO_PTR_H
+#define __LZO_PTR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#  include <dos.h>
+#  if 1 && defined(__WATCOMC__)
+#    include <i86.h>
+	__LZO_EXTERN_C unsigned char _HShift;
+#    define __LZO_HShift    _HShift
+#  elif 1 && defined(_MSC_VER)
+	__LZO_EXTERN_C unsigned short __near _AHSHIFT;
+#    define __LZO_HShift    ((unsigned) &_AHSHIFT)
+#  elif defined(__LZO_WIN16)
+#    define __LZO_HShift    3
+#  else
+#    define __LZO_HShift    12
+#  endif
+#  if !defined(_FP_SEG) && defined(FP_SEG)
+#    define _FP_SEG         FP_SEG
+#  endif
+#  if !defined(_FP_OFF) && defined(FP_OFF)
+#    define _FP_OFF         FP_OFF
+#  endif
+#endif
+
+#if !defined(lzo_ptrdiff_t)
+#  if (UINT_MAX >= LZO_0xffffffffL)
+	typedef ptrdiff_t lzo_ptrdiff_t;
+#  else
+	typedef long lzo_ptrdiff_t;
+#  endif
+#endif
+
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(lzo_ptr_t)
+#    define __LZO_HAVE_PTR_T
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_LONG)
+#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_LONG)
+	typedef unsigned long lzo_ptr_t;
+	typedef long lzo_sptr_t;
+#      define __LZO_HAVE_PTR_T
+#    endif
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED)
+#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED)
+	typedef unsigned int lzo_ptr_t;
+	typedef int lzo_sptr_t;
+#      define __LZO_HAVE_PTR_T
+#    endif
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(SIZEOF_CHAR_P) && defined(SIZEOF_UNSIGNED_SHORT)
+#    if (SIZEOF_CHAR_P == SIZEOF_UNSIGNED_SHORT)
+	typedef unsigned short lzo_ptr_t;
+	typedef short lzo_sptr_t;
+#      define __LZO_HAVE_PTR_T
+#    endif
+#  endif
+#endif
+#if !defined(__LZO_HAVE_PTR_T)
+#  if defined(LZO_HAVE_CONFIG_H) || defined(SIZEOF_CHAR_P)
+#    error "no suitable type for lzo_ptr_t"
+#  else
+	typedef unsigned long lzo_ptr_t;
+	typedef long lzo_sptr_t;
+#    define __LZO_HAVE_PTR_T
+#  endif
+#endif
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+#define PTR(a)              ((lzo_bytep) (a))
+#define PTR_ALIGNED_4(a)    ((_FP_OFF(a) & 3) == 0)
+#define PTR_ALIGNED2_4(a,b) (((_FP_OFF(a) | _FP_OFF(b)) & 3) == 0)
+#else
+#define PTR(a)              ((lzo_ptr_t) (a))
+#define PTR_LINEAR(a)       PTR(a)
+#define PTR_ALIGNED_4(a)    ((PTR_LINEAR(a) & 3) == 0)
+#define PTR_ALIGNED_8(a)    ((PTR_LINEAR(a) & 7) == 0)
+#define PTR_ALIGNED2_4(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 3) == 0)
+#define PTR_ALIGNED2_8(a,b) (((PTR_LINEAR(a) | PTR_LINEAR(b)) & 7) == 0)
+#endif
+
+#define PTR_LT(a,b)         (PTR(a) < PTR(b))
+#define PTR_GE(a,b)         (PTR(a) >= PTR(b))
+#define PTR_DIFF(a,b)       ((lzo_ptrdiff_t) (PTR(a) - PTR(b)))
+#define pd(a,b)             ((lzo_uint) ((a)-(b)))
+
+	typedef union {
+		char a_char;
+		unsigned char a_uchar;
+		short a_short;
+		unsigned short a_ushort;
+		int a_int;
+		unsigned int a_uint;
+		long a_long;
+		unsigned long a_ulong;
+		lzo_int a_lzo_int;
+		lzo_uint a_lzo_uint;
+		lzo_int32 a_lzo_int32;
+		lzo_uint32 a_lzo_uint32;
+		ptrdiff_t a_ptrdiff_t;
+		lzo_ptrdiff_t a_lzo_ptrdiff_t;
+		lzo_ptr_t a_lzo_ptr_t;
+		lzo_voidp a_lzo_voidp;
+		void *a_void_p;
+		lzo_bytep a_lzo_bytep;
+		lzo_bytepp a_lzo_bytepp;
+		lzo_uintp a_lzo_uintp;
+		lzo_uint *a_lzo_uint_p;
+		lzo_uint32p a_lzo_uint32p;
+		lzo_uint32 *a_lzo_uint32_p;
+		unsigned char *a_uchar_p;
+		char *a_char_p;
+	} lzo_full_align_t;
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+#define LZO_DETERMINISTIC
+#define LZO_DICT_USE_PTR
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16) || defined(__LZO_STRICT_16BIT)
+#  undef LZO_DICT_USE_PTR
+#endif
+#if defined(LZO_DICT_USE_PTR)
+#  define lzo_dict_t    const lzo_bytep
+#  define lzo_dict_p    lzo_dict_t __LZO_MMODEL *
+#else
+#  define lzo_dict_t    lzo_uint
+#  define lzo_dict_p    lzo_dict_t __LZO_MMODEL *
+#endif
+#if !defined(lzo_moff_t)
+#define lzo_moff_t      lzo_uint
+#endif
+#endif
+static lzo_ptr_t __lzo_ptr_linear(const lzo_voidp ptr)
+{
+	lzo_ptr_t p;
+
+#if defined(__LZO_DOS16) || defined(__LZO_WIN16)
+	p = (((lzo_ptr_t) (_FP_SEG(ptr))) << (16 - __LZO_HShift)) +
+	    (_FP_OFF(ptr));
+#else
+	p = PTR_LINEAR(ptr);
+#endif
+
+	return p;
+}
+
+static unsigned __lzo_align_gap(const lzo_voidp ptr, lzo_uint size)
+{
+	lzo_ptr_t p, s, n;
+
+	assert("lzo-01", size > 0);
+
+	p = __lzo_ptr_linear(ptr);
+	s = (lzo_ptr_t) (size - 1);
+	n = (((p + s) / size) * size) - p;
+
+	assert("lzo-02", (long)n >= 0);
+	assert("lzo-03", n <= s);
+
+	return (unsigned)n;
+}
+
+#ifndef __LZO_UTIL_H
+#define __LZO_UTIL_H
+
+#ifndef __LZO_CONF_H
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if 1 && defined(HAVE_MEMCPY)
+#if !defined(__LZO_DOS16) && !defined(__LZO_WIN16)
+
+#define MEMCPY8_DS(dest,src,len) \
+    memcpy(dest,src,len); \
+    dest += len; \
+    src += len
+
+#endif
+#endif
+
+#if !defined(MEMCPY8_DS)
+
+#define MEMCPY8_DS(dest,src,len) \
+    { register lzo_uint __l = (len) / 8; \
+    do { \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+	*dest++ = *src++; \
+    } while (--__l > 0); }
+
+#endif
+
+#define MEMCPY_DS(dest,src,len) \
+    do *dest++ = *src++; \
+    while (--len > 0)
+
+#define MEMMOVE_DS(dest,src,len) \
+    do *dest++ = *src++; \
+    while (--len > 0)
+
+
+#if (LZO_UINT_MAX <= SIZE_T_MAX) && defined(HAVE_MEMSET)
+
+#define BZERO8_PTR(s,l,n)   memset((s),0,(lzo_uint)(l)*(n))
+
+#else
+
+#define BZERO8_PTR(s,l,n) \
+    lzo_memset((lzo_voidp)(s),0,(lzo_uint)(l)*(n))
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+/* If you use the LZO library in a product, you *must* keep this
+ * copyright string in the executable of your product.
+ */
+
+static const lzo_byte __lzo_copyright[] =
+#if !defined(__LZO_IN_MINLZO)
+    LZO_VERSION_STRING;
+#else
+    "\n\n\n"
+    "LZO real-time data compression library.\n"
+    "Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Markus Franz Xaver Johannes Oberhumer\n"
+    "<markus.oberhumer@jk.uni-linz.ac.at>\n"
+    "http://www.oberhumer.com/opensource/lzo/\n"
+    "\n"
+    "LZO version: v" LZO_VERSION_STRING ", " LZO_VERSION_DATE "\n"
+    "LZO build date: " __DATE__ " " __TIME__ "\n\n"
+    "LZO special compilation options:\n"
+#ifdef __cplusplus
+    " __cplusplus\n"
+#endif
+#if defined(__PIC__)
+    " __PIC__\n"
+#elif defined(__pic__)
+    " __pic__\n"
+#endif
+#if (UINT_MAX < LZO_0xffffffffL)
+    " 16BIT\n"
+#endif
+#if defined(__LZO_STRICT_16BIT)
+    " __LZO_STRICT_16BIT\n"
+#endif
+#if (UINT_MAX > LZO_0xffffffffL)
+    " UINT_MAX=" _LZO_MEXPAND(UINT_MAX) "\n"
+#endif
+#if (ULONG_MAX > LZO_0xffffffffL)
+    " ULONG_MAX=" _LZO_MEXPAND(ULONG_MAX) "\n"
+#endif
+#if defined(LZO_BYTE_ORDER)
+    " LZO_BYTE_ORDER=" _LZO_MEXPAND(LZO_BYTE_ORDER) "\n"
+#endif
+#if defined(LZO_UNALIGNED_OK_2)
+    " LZO_UNALIGNED_OK_2\n"
+#endif
+#if defined(LZO_UNALIGNED_OK_4)
+    " LZO_UNALIGNED_OK_4\n"
+#endif
+#if defined(LZO_ALIGNED_OK_4)
+    " LZO_ALIGNED_OK_4\n"
+#endif
+#if defined(LZO_DICT_USE_PTR)
+    " LZO_DICT_USE_PTR\n"
+#endif
+#if defined(__LZO_QUERY_COMPRESS)
+    " __LZO_QUERY_COMPRESS\n"
+#endif
+#if defined(__LZO_QUERY_DECOMPRESS)
+    " __LZO_QUERY_DECOMPRESS\n"
+#endif
+#if defined(__LZO_IN_MINILZO)
+    " __LZO_IN_MINILZO\n"
+#endif
+    "\n\n" "$Id: LZO " LZO_VERSION_STRING " built " __DATE__ " " __TIME__
+#if defined(__GNUC__) && defined(__VERSION__)
+    " by gcc " __VERSION__
+#elif defined(__BORLANDC__)
+    " by Borland C " _LZO_MEXPAND(__BORLANDC__)
+#elif defined(_MSC_VER)
+    " by Microsoft C " _LZO_MEXPAND(_MSC_VER)
+#elif defined(__PUREC__)
+    " by Pure C " _LZO_MEXPAND(__PUREC__)
+#elif defined(__SC__)
+    " by Symantec C " _LZO_MEXPAND(__SC__)
+#elif defined(__TURBOC__)
+    " by Turbo C " _LZO_MEXPAND(__TURBOC__)
+#elif defined(__WATCOMC__)
+    " by Watcom C " _LZO_MEXPAND(__WATCOMC__)
+#endif
+    " $\n"
+    "$Copyright: LZO (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 Markus Franz Xaver Johannes Oberhumer $\n";
+#endif
+
+#define LZO_BASE 65521u
+#define LZO_NMAX 5552
+
+#define LZO_DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define LZO_DO2(buf,i)  LZO_DO1(buf,i); LZO_DO1(buf,i+1);
+#define LZO_DO4(buf,i)  LZO_DO2(buf,i); LZO_DO2(buf,i+2);
+#define LZO_DO8(buf,i)  LZO_DO4(buf,i); LZO_DO4(buf,i+4);
+#define LZO_DO16(buf,i) LZO_DO8(buf,i); LZO_DO8(buf,i+8);
+
+#  define IS_SIGNED(type)       (((type) (-1)) < ((type) 0))
+#  define IS_UNSIGNED(type)     (((type) (-1)) > ((type) 0))
+
+#define IS_POWER_OF_2(x)        (((x) & ((x) - 1)) == 0)
+
+static lzo_bool schedule_insns_bug(void);
+static lzo_bool strength_reduce_bug(int *);
+
+#  define __lzo_assert(x)   ((x) ? 1 : 0)
+
+#undef COMPILE_TIME_ASSERT
+
+#  define COMPILE_TIME_ASSERT(expr)     LZO_COMPILE_TIME_ASSERT(expr)
+
+static lzo_bool basic_integral_check(void)
+{
+	lzo_bool r = 1;
+
+	COMPILE_TIME_ASSERT(CHAR_BIT == 8);
+	COMPILE_TIME_ASSERT(sizeof(char) == 1);
+	COMPILE_TIME_ASSERT(sizeof(short) >= 2);
+	COMPILE_TIME_ASSERT(sizeof(long) >= 4);
+	COMPILE_TIME_ASSERT(sizeof(int) >= sizeof(short));
+	COMPILE_TIME_ASSERT(sizeof(long) >= sizeof(int));
+
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint) == sizeof(lzo_int));
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == sizeof(lzo_int32));
+
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint32) >= 4);
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint32) >= sizeof(unsigned));
+#if defined(__LZO_STRICT_16BIT)
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint) == 2);
+#else
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint) >= 4);
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint) >= sizeof(unsigned));
+#endif
+
+#if (USHRT_MAX == 65535u)
+	COMPILE_TIME_ASSERT(sizeof(short) == 2);
+#elif (USHRT_MAX == LZO_0xffffffffL)
+	COMPILE_TIME_ASSERT(sizeof(short) == 4);
+#elif (USHRT_MAX >= LZO_0xffffffffL)
+	COMPILE_TIME_ASSERT(sizeof(short) > 4);
+#endif
+#if 0				/* to make gcc happy -edward */
+#if (UINT_MAX == 65535u)
+	COMPILE_TIME_ASSERT(sizeof(int) == 2);
+#elif (UINT_MAX == LZO_0xffffffffL)
+	COMPILE_TIME_ASSERT(sizeof(int) == 4);
+#elif (UINT_MAX >= LZO_0xffffffffL)
+	COMPILE_TIME_ASSERT(sizeof(int) > 4);
+#endif
+#if (ULONG_MAX == 65535ul)
+	COMPILE_TIME_ASSERT(sizeof(long) == 2);
+#elif (ULONG_MAX == LZO_0xffffffffL)
+	COMPILE_TIME_ASSERT(sizeof(long) == 4);
+#elif (ULONG_MAX >= LZO_0xffffffffL)
+	COMPILE_TIME_ASSERT(sizeof(long) > 4);
+#endif
+#if defined(SIZEOF_UNSIGNED)
+	COMPILE_TIME_ASSERT(SIZEOF_UNSIGNED == sizeof(unsigned));
+#endif
+#if defined(SIZEOF_UNSIGNED_LONG)
+	COMPILE_TIME_ASSERT(SIZEOF_UNSIGNED_LONG == sizeof(unsigned long));
+#endif
+#if defined(SIZEOF_UNSIGNED_SHORT)
+	COMPILE_TIME_ASSERT(SIZEOF_UNSIGNED_SHORT == sizeof(unsigned short));
+#endif
+#if !defined(__LZO_IN_MINILZO)
+#if defined(SIZEOF_SIZE_T)
+	COMPILE_TIME_ASSERT(SIZEOF_SIZE_T == sizeof(size_t));
+#endif
+#endif
+#endif				/* -edward */
+
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned char));
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned short));
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned));
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(unsigned long));
+	COMPILE_TIME_ASSERT(IS_SIGNED(short));
+	COMPILE_TIME_ASSERT(IS_SIGNED(int));
+	COMPILE_TIME_ASSERT(IS_SIGNED(long));
+
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_uint32));
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_uint));
+	COMPILE_TIME_ASSERT(IS_SIGNED(lzo_int32));
+	COMPILE_TIME_ASSERT(IS_SIGNED(lzo_int));
+
+	COMPILE_TIME_ASSERT(INT_MAX == LZO_STYPE_MAX(sizeof(int)));
+	COMPILE_TIME_ASSERT(UINT_MAX == LZO_UTYPE_MAX(sizeof(unsigned)));
+	COMPILE_TIME_ASSERT(LONG_MAX == LZO_STYPE_MAX(sizeof(long)));
+	COMPILE_TIME_ASSERT(ULONG_MAX == LZO_UTYPE_MAX(sizeof(unsigned long)));
+	//    COMPILE_TIME_ASSERT(SHRT_MAX   == LZO_STYPE_MAX(sizeof(short))); /* edward */
+	COMPILE_TIME_ASSERT(USHRT_MAX == LZO_UTYPE_MAX(sizeof(unsigned short)));
+	COMPILE_TIME_ASSERT(LZO_UINT32_MAX ==
+			    LZO_UTYPE_MAX(sizeof(lzo_uint32)));
+	COMPILE_TIME_ASSERT(LZO_UINT_MAX == LZO_UTYPE_MAX(sizeof(lzo_uint)));
+#if !defined(__LZO_IN_MINILZO)
+	COMPILE_TIME_ASSERT(SIZE_T_MAX == LZO_UTYPE_MAX(sizeof(size_t)));
+#endif
+
+	r &= __lzo_assert(LZO_BYTE(257) == 1);
+
+	return r;
+}
+
+static lzo_bool basic_ptr_check(void)
+{
+	lzo_bool r = 1;
+
+	COMPILE_TIME_ASSERT(sizeof(char *) >= sizeof(int));
+	COMPILE_TIME_ASSERT(sizeof(lzo_byte *) >= sizeof(char *));
+
+	COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_byte *));
+	COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_voidpp));
+	COMPILE_TIME_ASSERT(sizeof(lzo_voidp) == sizeof(lzo_bytepp));
+	COMPILE_TIME_ASSERT(sizeof(lzo_voidp) >= sizeof(lzo_uint));
+
+	COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) == sizeof(lzo_voidp));
+	COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) == sizeof(lzo_sptr_t));
+	COMPILE_TIME_ASSERT(sizeof(lzo_ptr_t) >= sizeof(lzo_uint));
+
+	COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= 4);
+	COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= sizeof(ptrdiff_t));
+
+	COMPILE_TIME_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t));
+	COMPILE_TIME_ASSERT(sizeof(lzo_ptrdiff_t) >= sizeof(lzo_uint));
+
+#if defined(SIZEOF_CHAR_P)
+	COMPILE_TIME_ASSERT(SIZEOF_CHAR_P == sizeof(char *));
+#endif
+#if defined(SIZEOF_PTRDIFF_T)
+	COMPILE_TIME_ASSERT(SIZEOF_PTRDIFF_T == sizeof(ptrdiff_t));
+#endif
+
+	COMPILE_TIME_ASSERT(IS_SIGNED(ptrdiff_t));
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(size_t));
+	COMPILE_TIME_ASSERT(IS_SIGNED(lzo_ptrdiff_t));
+	COMPILE_TIME_ASSERT(IS_SIGNED(lzo_sptr_t));
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_ptr_t));
+	COMPILE_TIME_ASSERT(IS_UNSIGNED(lzo_moff_t));
+
+	return r;
+}
+
+static lzo_bool ptr_check(void)
+{
+	lzo_bool r = 1;
+	int i;
+	char _wrkmem[10 * sizeof(lzo_byte *) + sizeof(lzo_full_align_t)];
+	lzo_bytep wrkmem;
+	lzo_bytepp dict;
+	unsigned char x[4 * sizeof(lzo_full_align_t)];
+	long d;
+	lzo_full_align_t a;
+	lzo_full_align_t u;
+
+	for (i = 0; i < (int)sizeof(x); i++)
+		x[i] = LZO_BYTE(i);
+
+	wrkmem =
+	    LZO_PTR_ALIGN_UP((lzo_byte *) _wrkmem, sizeof(lzo_full_align_t));
+
+	u.a_lzo_bytep = wrkmem;
+	dict = u.a_lzo_bytepp;
+
+	d = (long)((const lzo_bytep)dict - (const lzo_bytep)_wrkmem);
+	r &= __lzo_assert(d >= 0);
+	r &= __lzo_assert(d < (long)sizeof(lzo_full_align_t));
+
+	memset(&a, 0, sizeof(a));
+	r &= __lzo_assert(a.a_lzo_voidp == NULL);
+
+	memset(&a, 0xff, sizeof(a));
+	r &= __lzo_assert(a.a_ushort == USHRT_MAX);
+	r &= __lzo_assert(a.a_uint == UINT_MAX);
+	r &= __lzo_assert(a.a_ulong == ULONG_MAX);
+	r &= __lzo_assert(a.a_lzo_uint == LZO_UINT_MAX);
+	r &= __lzo_assert(a.a_lzo_uint32 == LZO_UINT32_MAX);
+
+	if (r == 1) {
+		for (i = 0; i < 8; i++)
+			r &= __lzo_assert((const lzo_voidp)(&dict[i]) ==
+					  (const
+					   lzo_voidp)(&wrkmem[i *
+							      sizeof(lzo_byte
+								     *)]));
+	}
+
+	memset(&a, 0, sizeof(a));
+	r &= __lzo_assert(a.a_char_p == NULL);
+	r &= __lzo_assert(a.a_lzo_bytep == NULL);
+	r &= __lzo_assert(NULL == (void *)0);
+	if (r == 1) {
+		for (i = 0; i < 10; i++)
+			dict[i] = wrkmem;
+		BZERO8_PTR(dict + 1, sizeof(dict[0]), 8);
+		r &= __lzo_assert(dict[0] == wrkmem);
+		for (i = 1; i < 9; i++)
+			r &= __lzo_assert(dict[i] == NULL);
+		r &= __lzo_assert(dict[9] == wrkmem);
+	}
+
+	if (r == 1) {
+		unsigned k = 1;
+		const unsigned n = (unsigned)sizeof(lzo_uint32);
+		lzo_byte *p0;
+		lzo_byte *p1;
+
+		k += __lzo_align_gap(&x[k], n);
+		p0 = (lzo_bytep) & x[k];
+#if defined(PTR_LINEAR)
+		r &= __lzo_assert((PTR_LINEAR(p0) & (n - 1)) == 0);
+#else
+		r &= __lzo_assert(n == 4);
+		r &= __lzo_assert(PTR_ALIGNED_4(p0));
+#endif
+
+		r &= __lzo_assert(k >= 1);
+		p1 = (lzo_bytep) & x[1];
+		r &= __lzo_assert(PTR_GE(p0, p1));
+
+		r &= __lzo_assert(k < 1 + n);
+		p1 = (lzo_bytep) & x[1 + n];
+		r &= __lzo_assert(PTR_LT(p0, p1));
+
+		if (r == 1) {
+			lzo_uint32 v0, v1;
+
+			u.a_uchar_p = &x[k];
+			v0 = *u.a_lzo_uint32_p;
+			u.a_uchar_p = &x[k + n];
+			v1 = *u.a_lzo_uint32_p;
+
+			r &= __lzo_assert(v0 > 0);
+			r &= __lzo_assert(v1 > 0);
+		}
+	}
+
+	return r;
+}
+
+static int _lzo_config_check(void)
+{
+	lzo_bool r = 1;
+	int i;
+	union {
+		lzo_uint32 a;
+		unsigned short b;
+		lzo_uint32 aa[4];
+		unsigned char x[4 * sizeof(lzo_full_align_t)];
+	}
+	u;
+
+	COMPILE_TIME_ASSERT((int)((unsigned char)((signed char)-1)) == 255);
+	COMPILE_TIME_ASSERT((((unsigned char)128) << (int)(8 * sizeof(int) - 8))
+			    < 0);
+
+	r &= basic_integral_check();
+	r &= basic_ptr_check();
+	if (r != 1)
+		return LZO_E_ERROR;
+
+	u.a = 0;
+	u.b = 0;
+	for (i = 0; i < (int)sizeof(u.x); i++)
+		u.x[i] = LZO_BYTE(i);
+
+#if defined(LZO_BYTE_ORDER)
+	if (r == 1) {
+#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+		lzo_uint32 a = (lzo_uint32) (u.a & LZO_0xffffffffL);
+		unsigned short b = (unsigned short)(u.b & 0xffff);
+		r &= __lzo_assert(a == 0x03020100L);
+		r &= __lzo_assert(b == 0x0100);
+#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
+		lzo_uint32 a = u.a >> (8 * sizeof(u.a) - 32);
+		unsigned short b = u.b >> (8 * sizeof(u.b) - 16);
+		r &= __lzo_assert(a == 0x00010203L);
+		r &= __lzo_assert(b == 0x0001);
+#  else
+#    error "invalid LZO_BYTE_ORDER"
+#  endif
+	}
+#endif
+
+#if defined(LZO_UNALIGNED_OK_2)
+	COMPILE_TIME_ASSERT(sizeof(short) == 2);
+	if (r == 1) {
+		unsigned short b[4];
+
+		for (i = 0; i < 4; i++)
+			b[i] = *(const unsigned short *)&u.x[i];
+
+#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+		r &= __lzo_assert(b[0] == 0x0100);
+		r &= __lzo_assert(b[1] == 0x0201);
+		r &= __lzo_assert(b[2] == 0x0302);
+		r &= __lzo_assert(b[3] == 0x0403);
+#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
+		r &= __lzo_assert(b[0] == 0x0001);
+		r &= __lzo_assert(b[1] == 0x0102);
+		r &= __lzo_assert(b[2] == 0x0203);
+		r &= __lzo_assert(b[3] == 0x0304);
+#  endif
+	}
+#endif
+
+#if defined(LZO_UNALIGNED_OK_4)
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == 4);
+	if (r == 1) {
+		lzo_uint32 a[4];
+
+		for (i = 0; i < 4; i++)
+			a[i] = *(const lzo_uint32 *)&u.x[i];
+
+#  if (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+		r &= __lzo_assert(a[0] == 0x03020100L);
+		r &= __lzo_assert(a[1] == 0x04030201L);
+		r &= __lzo_assert(a[2] == 0x05040302L);
+		r &= __lzo_assert(a[3] == 0x06050403L);
+#  elif (LZO_BYTE_ORDER == LZO_BIG_ENDIAN)
+		r &= __lzo_assert(a[0] == 0x00010203L);
+		r &= __lzo_assert(a[1] == 0x01020304L);
+		r &= __lzo_assert(a[2] == 0x02030405L);
+		r &= __lzo_assert(a[3] == 0x03040506L);
+#  endif
+	}
+#endif
+
+#if defined(LZO_ALIGNED_OK_4)
+	COMPILE_TIME_ASSERT(sizeof(lzo_uint32) == 4);
+#endif
+
+	COMPILE_TIME_ASSERT(lzo_sizeof_dict_t == sizeof(lzo_dict_t));
+
+	if (r == 1) {
+		r &= __lzo_assert(!schedule_insns_bug());
+	}
+
+	if (r == 1) {
+		static int x[3];
+		static unsigned xn = 3;
+		register unsigned j;
+
+		for (j = 0; j < xn; j++)
+			x[j] = (int)j - 3;
+		r &= __lzo_assert(!strength_reduce_bug(x));
+	}
+
+	if (r == 1) {
+		r &= ptr_check();
+	}
+
+	return r == 1 ? LZO_E_OK : LZO_E_ERROR;
+}
+
+static lzo_bool schedule_insns_bug(void)
+{
+#if defined(__LZO_CHECKER)
+	return 0;
+#else
+	const int clone[] = { 1, 2, 0 };
+	const int *q;
+	q = clone;
+	return (*q) ? 0 : 1;
+#endif
+}
+
+static lzo_bool strength_reduce_bug(int *x)
+{
+	return x[0] != -3 || x[1] != -2 || x[2] != -1;
+}
+
+#undef COMPILE_TIME_ASSERT
+
+LZO_PUBLIC(int)
+    __lzo_init2(unsigned v, int s1, int s2, int s3, int s4, int s5,
+	    int s6, int s7, int s8, int s9)
+{
+	int r;
+
+	if (v == 0)
+		return LZO_E_ERROR;
+
+	r = (s1 == -1 || s1 == (int)sizeof(short)) &&
+	    (s2 == -1 || s2 == (int)sizeof(int)) &&
+	    (s3 == -1 || s3 == (int)sizeof(long)) &&
+	    (s4 == -1 || s4 == (int)sizeof(lzo_uint32)) &&
+	    (s5 == -1 || s5 == (int)sizeof(lzo_uint)) &&
+	    (s6 == -1 || s6 == (int)lzo_sizeof_dict_t) &&
+	    (s7 == -1 || s7 == (int)sizeof(char *)) &&
+	    (s8 == -1 || s8 == (int)sizeof(lzo_voidp)) &&
+	    (s9 == -1 || s9 == (int)sizeof(lzo_compress_t));
+	if (!r)
+		return LZO_E_ERROR;
+
+	r = _lzo_config_check();
+	if (r != LZO_E_OK)
+		return r;
+
+	return r;
+}
+
+#if !defined(__LZO_IN_MINILZO)
+
+LZO_EXTERN(int)
+    __lzo_init(unsigned v, int s1, int s2, int s3, int s4, int s5, int s6, int s7);
+
+LZO_PUBLIC(int)
+__lzo_init(unsigned v, int s1, int s2, int s3, int s4, int s5, int s6, int s7)
+{
+	if (v == 0 || v > 0x1010)
+		return LZO_E_ERROR;
+	return __lzo_init2(v, s1, s2, s3, s4, s5, -1, -1, s6, s7);
+}
+
+#endif
+
+#define do_compress         _lzo1x_1_do_compress
+
+#define LZO_NEED_DICT_H
+#define D_BITS          14
+#define D_INDEX1(d,p)       d = DM((0x21*DX3(p,5,5,6)) >> 5)
+#define D_INDEX2(d,p)       d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f)
+
+#ifndef __LZO_CONFIG1X_H
+#define __LZO_CONFIG1X_H
+
+#if !defined(LZO1X) && !defined(LZO1Y) && !defined(LZO1Z)
+#  define LZO1X
+#endif
+
+#if !defined(__LZO_IN_MINILZO)
+#include <lzo1x.h>
+#endif
+
+#define LZO_EOF_CODE
+#undef LZO_DETERMINISTIC
+
+#define M1_MAX_OFFSET   0x0400
+#ifndef M2_MAX_OFFSET
+#define M2_MAX_OFFSET   0x0800
+#endif
+#define M3_MAX_OFFSET   0x4000
+#define M4_MAX_OFFSET   0xbfff
+
+#define MX_MAX_OFFSET   (M1_MAX_OFFSET + M2_MAX_OFFSET)
+
+#define M1_MIN_LEN      2
+#define M1_MAX_LEN      2
+#define M2_MIN_LEN      3
+#ifndef M2_MAX_LEN
+#define M2_MAX_LEN      8
+#endif
+#define M3_MIN_LEN      3
+#define M3_MAX_LEN      33
+#define M4_MIN_LEN      3
+#define M4_MAX_LEN      9
+
+#define M1_MARKER       0
+#define M2_MARKER       64
+#define M3_MARKER       32
+#define M4_MARKER       16
+
+#ifndef MIN_LOOKAHEAD
+#define MIN_LOOKAHEAD       (M2_MAX_LEN + 1)
+#endif
+
+#if defined(LZO_NEED_DICT_H)
+
+#ifndef LZO_HASH
+#define LZO_HASH            LZO_HASH_LZO_INCREMENTAL_B
+#endif
+#define DL_MIN_LEN          M2_MIN_LEN
+
+#ifndef __LZO_DICT_H
+#define __LZO_DICT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined(D_BITS) && defined(DBITS)
+#  define D_BITS        DBITS
+#endif
+#if !defined(D_BITS)
+#  error "D_BITS is not defined"
+#endif
+#if (D_BITS < 16)
+#  define D_SIZE        LZO_SIZE(D_BITS)
+#  define D_MASK        LZO_MASK(D_BITS)
+#else
+#  define D_SIZE        LZO_USIZE(D_BITS)
+#  define D_MASK        LZO_UMASK(D_BITS)
+#endif
+#define D_HIGH          ((D_MASK >> 1) + 1)
+
+#if !defined(DD_BITS)
+#  define DD_BITS       0
+#endif
+#define DD_SIZE         LZO_SIZE(DD_BITS)
+#define DD_MASK         LZO_MASK(DD_BITS)
+
+#if !defined(DL_BITS)
+#  define DL_BITS       (D_BITS - DD_BITS)
+#endif
+#if (DL_BITS < 16)
+#  define DL_SIZE       LZO_SIZE(DL_BITS)
+#  define DL_MASK       LZO_MASK(DL_BITS)
+#else
+#  define DL_SIZE       LZO_USIZE(DL_BITS)
+#  define DL_MASK       LZO_UMASK(DL_BITS)
+#endif
+
+#if (D_BITS != DL_BITS + DD_BITS)
+#  error "D_BITS does not match"
+#endif
+#if (D_BITS < 8 || D_BITS > 18)
+#  error "invalid D_BITS"
+#endif
+#if (DL_BITS < 8 || DL_BITS > 20)
+#  error "invalid DL_BITS"
+#endif
+#if (DD_BITS < 0 || DD_BITS > 6)
+#  error "invalid DD_BITS"
+#endif
+
+#if !defined(DL_MIN_LEN)
+#  define DL_MIN_LEN    3
+#endif
+#if !defined(DL_SHIFT)
+#  define DL_SHIFT      ((DL_BITS + (DL_MIN_LEN - 1)) / DL_MIN_LEN)
+#endif
+
+#define LZO_HASH_GZIP                   1
+#define LZO_HASH_GZIP_INCREMENTAL       2
+#define LZO_HASH_LZO_INCREMENTAL_A      3
+#define LZO_HASH_LZO_INCREMENTAL_B      4
+
+#if !defined(LZO_HASH)
+#  error "choose a hashing strategy"
+#endif
+
+#if (DL_MIN_LEN == 3)
+#  define _DV2_A(p,shift1,shift2) \
+	(((( (lzo_uint32)((p)[0]) << shift1) ^ (p)[1]) << shift2) ^ (p)[2])
+#  define _DV2_B(p,shift1,shift2) \
+	(((( (lzo_uint32)((p)[2]) << shift1) ^ (p)[1]) << shift2) ^ (p)[0])
+#  define _DV3_B(p,shift1,shift2,shift3) \
+	((_DV2_B((p)+1,shift1,shift2) << (shift3)) ^ (p)[0])
+#elif (DL_MIN_LEN == 2)
+#  define _DV2_A(p,shift1,shift2) \
+	(( (lzo_uint32)(p[0]) << shift1) ^ p[1])
+#  define _DV2_B(p,shift1,shift2) \
+	(( (lzo_uint32)(p[1]) << shift1) ^ p[2])
+#else
+#  error "invalid DL_MIN_LEN"
+#endif
+#define _DV_A(p,shift)      _DV2_A(p,shift,shift)
+#define _DV_B(p,shift)      _DV2_B(p,shift,shift)
+#define DA2(p,s1,s2) \
+	(((((lzo_uint32)((p)[2]) << (s2)) + (p)[1]) << (s1)) + (p)[0])
+#define DS2(p,s1,s2) \
+	(((((lzo_uint32)((p)[2]) << (s2)) - (p)[1]) << (s1)) - (p)[0])
+#define DX2(p,s1,s2) \
+	(((((lzo_uint32)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0])
+#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0])
+#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0])
+#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0])
+#define DMS(v,s)        ((lzo_uint) (((v) & (D_MASK >> (s))) << (s)))
+#define DM(v)           DMS(v,0)
+
+#if (LZO_HASH == LZO_HASH_GZIP)
+#  define _DINDEX(dv,p)     (_DV_A((p),DL_SHIFT))
+
+#elif (LZO_HASH == LZO_HASH_GZIP_INCREMENTAL)
+#  define __LZO_HASH_INCREMENTAL
+#  define DVAL_FIRST(dv,p)  dv = _DV_A((p),DL_SHIFT)
+#  define DVAL_NEXT(dv,p)   dv = (((dv) << DL_SHIFT) ^ p[2])
+#  define _DINDEX(dv,p)     (dv)
+#  define DVAL_LOOKAHEAD    DL_MIN_LEN
+
+#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_A)
+#  define __LZO_HASH_INCREMENTAL
+#  define DVAL_FIRST(dv,p)  dv = _DV_A((p),5)
+#  define DVAL_NEXT(dv,p) \
+		dv ^= (lzo_uint32)(p[-1]) << (2*5); dv = (((dv) << 5) ^ p[2])
+#  define _DINDEX(dv,p)     ((0x9f5f * (dv)) >> 5)
+#  define DVAL_LOOKAHEAD    DL_MIN_LEN
+
+#elif (LZO_HASH == LZO_HASH_LZO_INCREMENTAL_B)
+#  define __LZO_HASH_INCREMENTAL
+#  define DVAL_FIRST(dv,p)  dv = _DV_B((p),5)
+#  define DVAL_NEXT(dv,p) \
+		dv ^= p[-1]; dv = (((dv) >> 5) ^ ((lzo_uint32)(p[2]) << (2*5)))
+#  define _DINDEX(dv,p)     ((0x9f5f * (dv)) >> 5)
+#  define DVAL_LOOKAHEAD    DL_MIN_LEN
+
+#else
+#  error "choose a hashing strategy"
+#endif
+
+#ifndef DINDEX
+#define DINDEX(dv,p)        ((lzo_uint)((_DINDEX(dv,p)) & DL_MASK) << DD_BITS)
+#endif
+#if !defined(DINDEX1) && defined(D_INDEX1)
+#define DINDEX1             D_INDEX1
+#endif
+#if !defined(DINDEX2) && defined(D_INDEX2)
+#define DINDEX2             D_INDEX2
+#endif
+
+#if !defined(__LZO_HASH_INCREMENTAL)
+#  define DVAL_FIRST(dv,p)  ((void) 0)
+#  define DVAL_NEXT(dv,p)   ((void) 0)
+#  define DVAL_LOOKAHEAD    0
+#endif
+
+#if !defined(DVAL_ASSERT)
+#if defined(__LZO_HASH_INCREMENTAL) && !defined(NDEBUG)
+	static void DVAL_ASSERT(lzo_uint32 dv, const lzo_byte * p) {
+		lzo_uint32 df;
+		 DVAL_FIRST(df, (p));
+		 assert(DINDEX(dv, p) == DINDEX(df, p));
+	}
+#else
+#  define DVAL_ASSERT(dv,p) ((void) 0)
+#endif
+#endif
+
+#if defined(LZO_DICT_USE_PTR)
+#  define DENTRY(p,in)                          (p)
+#  define GINDEX(m_pos,m_off,dict,dindex,in)    m_pos = dict[dindex]
+#else
+#  define DENTRY(p,in)                          ((lzo_uint) ((p)-(in)))
+#  define GINDEX(m_pos,m_off,dict,dindex,in)    m_off = dict[dindex]
+#endif
+
+#if (DD_BITS == 0)
+
+#  define UPDATE_D(dict,drun,dv,p,in)       dict[ DINDEX(dv,p) ] = DENTRY(p,in)
+#  define UPDATE_I(dict,drun,index,p,in)    dict[index] = DENTRY(p,in)
+#  define UPDATE_P(ptr,drun,p,in)           (ptr)[0] = DENTRY(p,in)
+
+#else
+
+#  define UPDATE_D(dict,drun,dv,p,in)   \
+	dict[ DINDEX(dv,p) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
+#  define UPDATE_I(dict,drun,index,p,in)    \
+	dict[ (index) + drun++ ] = DENTRY(p,in); drun &= DD_MASK
+#  define UPDATE_P(ptr,drun,p,in)   \
+	(ptr) [ drun++ ] = DENTRY(p,in); drun &= DD_MASK
+
+#endif
+
+#if defined(LZO_DICT_USE_PTR)
+
+#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \
+	(m_pos == NULL || (m_off = (lzo_moff_t) (ip - m_pos)) > max_offset)
+
+#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
+    (BOUNDS_CHECKING_OFF_IN_EXPR( \
+	(PTR_LT(m_pos,in) || \
+	 (m_off = (lzo_moff_t) PTR_DIFF(ip,m_pos)) <= 0 || \
+	  m_off > max_offset) ))
+
+#else
+
+#define LZO_CHECK_MPOS_DET(m_pos,m_off,in,ip,max_offset) \
+	(m_off == 0 || \
+	 ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \
+	 (m_pos = (ip) - (m_off), 0) )
+
+#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \
+	((lzo_moff_t) ((ip)-(in)) <= m_off || \
+	 ((m_off = (lzo_moff_t) ((ip)-(in)) - m_off) > max_offset) || \
+	 (m_pos = (ip) - (m_off), 0) )
+
+#endif
+
+#if defined(LZO_DETERMINISTIC)
+#  define LZO_CHECK_MPOS    LZO_CHECK_MPOS_DET
+#else
+#  define LZO_CHECK_MPOS    LZO_CHECK_MPOS_NON_DET
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+#endif
+#endif
+#define DO_COMPRESS     lzo1x_1_compress
+static
+lzo_uint do_compress(const lzo_byte * in, lzo_uint in_len,
+		     lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem)
+{
+	register const lzo_byte *ip;
+	lzo_byte *op;
+	const lzo_byte *const in_end = in + in_len;
+	const lzo_byte *const ip_end = in + in_len - M2_MAX_LEN - 5;
+	const lzo_byte *ii;
+	lzo_dict_p const dict = (lzo_dict_p) wrkmem;
+
+	op = out;
+	ip = in;
+	ii = ip;
+
+	ip += 4;
+	for (;;) {
+		register const lzo_byte *m_pos;
+
+		lzo_moff_t m_off;
+		lzo_uint m_len;
+		lzo_uint dindex;
+
+		DINDEX1(dindex, ip);
+		GINDEX(m_pos, m_off, dict, dindex, in);
+		if (LZO_CHECK_MPOS_NON_DET(m_pos, m_off, in, ip, M4_MAX_OFFSET))
+			goto literal;
+#if 1
+		if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
+			goto try_match;
+		DINDEX2(dindex, ip);
+#endif
+		GINDEX(m_pos, m_off, dict, dindex, in);
+		if (LZO_CHECK_MPOS_NON_DET(m_pos, m_off, in, ip, M4_MAX_OFFSET))
+			goto literal;
+		if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
+			goto try_match;
+		goto literal;
+
+	      try_match:
+#if 1 && defined(LZO_UNALIGNED_OK_2)
+		if (*(const lzo_ushortp)m_pos != *(const lzo_ushortp)ip) {
+#else
+		if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) {
+#endif
+			;
+		} else {
+			if (m_pos[2] == ip[2]) {
+				goto match;
+			} else {
+				;
+			}
+		}
+
+	      literal:
+		UPDATE_I(dict, 0, dindex, ip, in);
+		++ip;
+		if (ip >= ip_end)
+			break;
+		continue;
+
+	      match:
+		UPDATE_I(dict, 0, dindex, ip, in);
+		if (pd(ip, ii) > 0) {
+			register lzo_uint t = pd(ip, ii);
+
+			if (t <= 3) {
+				assert("lzo-04", op - 2 > out);
+				op[-2] |= LZO_BYTE(t);
+			} else if (t <= 18)
+				*op++ = LZO_BYTE(t - 3);
+			else {
+				register lzo_uint tt = t - 18;
+
+				*op++ = 0;
+				while (tt > 255) {
+					tt -= 255;
+					*op++ = 0;
+				}
+				assert("lzo-05", tt > 0);
+				*op++ = LZO_BYTE(tt);
+			}
+			do
+				*op++ = *ii++;
+			while (--t > 0);
+		}
+
+		assert("lzo-06", ii == ip);
+		ip += 3;
+		if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++
+		    || m_pos[6] != *ip++ || m_pos[7] != *ip++
+		    || m_pos[8] != *ip++
+#ifdef LZO1Y
+		    || m_pos[9] != *ip++ || m_pos[10] != *ip++
+		    || m_pos[11] != *ip++ || m_pos[12] != *ip++
+		    || m_pos[13] != *ip++ || m_pos[14] != *ip++
+#endif
+		    ) {
+			--ip;
+			m_len = ip - ii;
+			assert("lzo-07", m_len >= 3);
+			assert("lzo-08", m_len <= M2_MAX_LEN);
+
+			if (m_off <= M2_MAX_OFFSET) {
+				m_off -= 1;
+#if defined(LZO1X)
+				*op++ =
+				    LZO_BYTE(((m_len -
+					       1) << 5) | ((m_off & 7) << 2));
+				*op++ = LZO_BYTE(m_off >> 3);
+#elif defined(LZO1Y)
+				*op++ =
+				    LZO_BYTE(((m_len +
+					       1) << 4) | ((m_off & 3) << 2));
+				*op++ = LZO_BYTE(m_off >> 2);
+#endif
+			} else if (m_off <= M3_MAX_OFFSET) {
+				m_off -= 1;
+				*op++ = LZO_BYTE(M3_MARKER | (m_len - 2));
+				goto m3_m4_offset;
+			} else
+#if defined(LZO1X)
+			{
+				m_off -= 0x4000;
+				assert("lzo-09", m_off > 0);
+				assert("lzo-10", m_off <= 0x7fff);
+				*op++ = LZO_BYTE(M4_MARKER |
+						 ((m_off & 0x4000) >> 11) |
+						 (m_len - 2));
+				goto m3_m4_offset;
+			}
+#elif defined(LZO1Y)
+				goto m4_match;
+#endif
+		} else {
+			{
+				const lzo_byte *end = in_end;
+				const lzo_byte *m = m_pos + M2_MAX_LEN + 1;
+				while (ip < end && *m == *ip)
+					m++, ip++;
+				m_len = (ip - ii);
+			}
+			assert("lzo-11", m_len > M2_MAX_LEN);
+
+			if (m_off <= M3_MAX_OFFSET) {
+				m_off -= 1;
+				if (m_len <= 33)
+					*op++ =
+					    LZO_BYTE(M3_MARKER | (m_len - 2));
+				else {
+					m_len -= 33;
+					*op++ = M3_MARKER | 0;
+					goto m3_m4_len;
+				}
+			} else {
+#if defined(LZO1Y)
+			      m4_match:
+#endif
+				m_off -= 0x4000;
+				assert("lzo-12", m_off > 0);
+				assert("lzo-13", m_off <= 0x7fff);
+				if (m_len <= M4_MAX_LEN)
+					*op++ = LZO_BYTE(M4_MARKER |
+							 ((m_off & 0x4000) >>
+							  11) | (m_len - 2));
+				else {
+					m_len -= M4_MAX_LEN;
+					*op++ =
+					    LZO_BYTE(M4_MARKER |
+						     ((m_off & 0x4000) >> 11));
+				      m3_m4_len:
+					while (m_len > 255) {
+						m_len -= 255;
+						*op++ = 0;
+					}
+					assert("lzo-14", m_len > 0);
+					*op++ = LZO_BYTE(m_len);
+				}
+			}
+
+		      m3_m4_offset:
+			*op++ = LZO_BYTE((m_off & 63) << 2);
+			*op++ = LZO_BYTE(m_off >> 6);
+		}
+
+		ii = ip;
+		if (ip >= ip_end)
+			break;
+	}
+
+	*out_len = op - out;
+	return pd(in_end, ii);
+}
+
+LZO_PUBLIC(int)
+    DO_COMPRESS(const lzo_byte * in, lzo_uint in_len,
+	    lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem)
+{
+	lzo_byte *op = out;
+	lzo_uint t;
+
+#if defined(__LZO_QUERY_COMPRESS)
+	if (__LZO_IS_COMPRESS_QUERY(in, in_len, out, out_len, wrkmem))
+		return __LZO_QUERY_COMPRESS(in, in_len, out, out_len, wrkmem,
+					    D_SIZE, lzo_sizeof(lzo_dict_t));
+#endif
+
+	if (in_len <= M2_MAX_LEN + 5)
+		t = in_len;
+	else {
+		t = do_compress(in, in_len, op, out_len, wrkmem);
+		op += *out_len;
+	}
+
+	if (t > 0) {
+		const lzo_byte *ii = in + in_len - t;
+
+		if (op == out && t <= 238)
+			*op++ = LZO_BYTE(17 + t);
+		else if (t <= 3)
+			op[-2] |= LZO_BYTE(t);
+		else if (t <= 18)
+			*op++ = LZO_BYTE(t - 3);
+		else {
+			lzo_uint tt = t - 18;
+
+			*op++ = 0;
+			while (tt > 255) {
+				tt -= 255;
+				*op++ = 0;
+			}
+			assert("lzo-15", tt > 0);
+			*op++ = LZO_BYTE(tt);
+		}
+		do
+			*op++ = *ii++;
+		while (--t > 0);
+	}
+
+	*op++ = M4_MARKER | 1;
+	*op++ = 0;
+	*op++ = 0;
+
+	*out_len = op - out;
+	return LZO_E_OK;
+}
+
+#undef do_compress
+#undef DO_COMPRESS
+#undef LZO_HASH
+
+#undef LZO_TEST_DECOMPRESS_OVERRUN
+#undef LZO_TEST_DECOMPRESS_OVERRUN_INPUT
+#undef LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT
+#undef LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
+#undef DO_DECOMPRESS
+#define DO_DECOMPRESS       lzo1x_decompress
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_INPUT       2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT      2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
+#  endif
+#endif
+
+#undef TEST_IP
+#undef TEST_OP
+#undef TEST_LOOKBEHIND
+#undef NEED_IP
+#undef NEED_OP
+#undef HAVE_TEST_IP
+#undef HAVE_TEST_OP
+#undef HAVE_NEED_IP
+#undef HAVE_NEED_OP
+#undef HAVE_ANY_IP
+#undef HAVE_ANY_OP
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
+#    define TEST_IP             (ip < ip_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
+#    define NEED_IP(x) \
+	    if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x))  goto input_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
+#    define TEST_OP             (op <= op_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
+#    undef TEST_OP
+#    define NEED_OP(x) \
+	    if ((lzo_uint)(op_end - op) < (lzo_uint)(x))  goto output_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#  define TEST_LOOKBEHIND(m_pos,out)    if (m_pos < out) goto lookbehind_overrun
+#else
+#  define TEST_LOOKBEHIND(m_pos,op)     ((void) 0)
+#endif
+
+#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
+#  define TEST_IP               (ip < ip_end)
+#endif
+
+#if defined(TEST_IP)
+#  define HAVE_TEST_IP
+#else
+#  define TEST_IP               1
+#endif
+#if defined(TEST_OP)
+#  define HAVE_TEST_OP
+#else
+#  define TEST_OP               1
+#endif
+
+#if defined(NEED_IP)
+#  define HAVE_NEED_IP
+#else
+#  define NEED_IP(x)            ((void) 0)
+#endif
+#if defined(NEED_OP)
+#  define HAVE_NEED_OP
+#else
+#  define NEED_OP(x)            ((void) 0)
+#endif
+
+#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
+#  define HAVE_ANY_IP
+#endif
+#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
+#  define HAVE_ANY_OP
+#endif
+
+#undef __COPY4
+#define __COPY4(dst,src)    * (lzo_uint32p)(dst) = * (const lzo_uint32p)(src)
+
+#undef COPY4
+#if defined(LZO_UNALIGNED_OK_4)
+#  define COPY4(dst,src)    __COPY4(dst,src)
+#elif defined(LZO_ALIGNED_OK_4)
+#  define COPY4(dst,src)    __COPY4((lzo_ptr_t)(dst),(lzo_ptr_t)(src))
+#endif
+
+#if defined(DO_DECOMPRESS)
+LZO_PUBLIC(int)
+    DO_DECOMPRESS(const lzo_byte * in, lzo_uint in_len,
+	      lzo_byte * out, lzo_uintp out_len, lzo_voidp wrkmem)
+#endif
+{
+	register lzo_byte *op;
+	register const lzo_byte *ip;
+	register lzo_uint t;
+#if defined(COPY_DICT)
+	lzo_uint m_off;
+	const lzo_byte *dict_end;
+#else
+	register const lzo_byte *m_pos;
+#endif
+
+	const lzo_byte *const ip_end = in + in_len;
+#if defined(HAVE_ANY_OP)
+	lzo_byte *const op_end = out + *out_len;
+#endif
+#if defined(LZO1Z)
+	lzo_uint last_m_off = 0;
+#endif
+
+	LZO_UNUSED(wrkmem);
+
+#if defined(__LZO_QUERY_DECOMPRESS)
+	if (__LZO_IS_DECOMPRESS_QUERY(in, in_len, out, out_len, wrkmem))
+		return __LZO_QUERY_DECOMPRESS(in, in_len, out, out_len, wrkmem,
+					      0, 0);
+#endif
+
+#if defined(COPY_DICT)
+	if (dict) {
+		if (dict_len > M4_MAX_OFFSET) {
+			dict += dict_len - M4_MAX_OFFSET;
+			dict_len = M4_MAX_OFFSET;
+		}
+		dict_end = dict + dict_len;
+	} else {
+		dict_len = 0;
+		dict_end = NULL;
+	}
+#endif
+
+	*out_len = 0;
+
+	op = out;
+	ip = in;
+
+	if (*ip > 17) {
+		t = *ip++ - 17;
+		if (t < 4)
+			goto match_next;
+		assert("lzo-16", t > 0);
+		NEED_OP(t);
+		NEED_IP(t + 1);
+		do
+			*op++ = *ip++;
+		while (--t > 0);
+		goto first_literal_run;
+	}
+
+	while (TEST_IP && TEST_OP) {
+		t = *ip++;
+		if (t >= 16)
+			goto match;
+		if (t == 0) {
+			NEED_IP(1);
+			while (*ip == 0) {
+				t += 255;
+				ip++;
+				NEED_IP(1);
+			}
+			t += 15 + *ip++;
+		}
+		assert("lzo-17", t > 0);
+		NEED_OP(t + 3);
+		NEED_IP(t + 4);
+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
+#if !defined(LZO_UNALIGNED_OK_4)
+		if (PTR_ALIGNED2_4(op, ip)) {
+#endif
+			COPY4(op, ip);
+			op += 4;
+			ip += 4;
+			if (--t > 0) {
+				if (t >= 4) {
+					do {
+						COPY4(op, ip);
+						op += 4;
+						ip += 4;
+						t -= 4;
+					} while (t >= 4);
+					if (t > 0)
+						do
+							*op++ = *ip++;
+						while (--t > 0);
+				} else
+					do
+						*op++ = *ip++;
+					while (--t > 0);
+			}
+#if !defined(LZO_UNALIGNED_OK_4)
+		} else
+#endif
+#endif
+#if !defined(LZO_UNALIGNED_OK_4)
+		{
+			*op++ = *ip++;
+			*op++ = *ip++;
+			*op++ = *ip++;
+			do
+				*op++ = *ip++;
+			while (--t > 0);
+		}
+#endif
+
+	      first_literal_run:
+
+		t = *ip++;
+		if (t >= 16)
+			goto match;
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+		m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
+		last_m_off = m_off;
+#else
+		m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2);
+#endif
+		NEED_OP(3);
+		t = 3;
+		COPY_DICT(t, m_off)
+#else
+#if defined(LZO1Z)
+		t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2);
+		m_pos = op - t;
+		last_m_off = t;
+#else
+		m_pos = op - (1 + M2_MAX_OFFSET);
+		m_pos -= t >> 2;
+		m_pos -= *ip++ << 2;
+#endif
+		TEST_LOOKBEHIND(m_pos, out);
+		NEED_OP(3);
+		*op++ = *m_pos++;
+		*op++ = *m_pos++;
+		*op++ = *m_pos;
+#endif
+		goto match_done;
+
+		while (TEST_IP && TEST_OP) {
+		      match:
+			if (t >= 64) {
+#if defined(COPY_DICT)
+#if defined(LZO1X)
+				m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3);
+				t = (t >> 5) - 1;
+#elif defined(LZO1Y)
+				m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2);
+				t = (t >> 4) - 3;
+#elif defined(LZO1Z)
+				m_off = t & 0x1f;
+				if (m_off >= 0x1c)
+					m_off = last_m_off;
+				else {
+					m_off = 1 + (m_off << 6) + (*ip++ >> 2);
+					last_m_off = m_off;
+				}
+				t = (t >> 5) - 1;
+#endif
+#else
+#if defined(LZO1X)
+				m_pos = op - 1;
+				m_pos -= (t >> 2) & 7;
+				m_pos -= *ip++ << 3;
+				t = (t >> 5) - 1;
+#elif defined(LZO1Y)
+				m_pos = op - 1;
+				m_pos -= (t >> 2) & 3;
+				m_pos -= *ip++ << 2;
+				t = (t >> 4) - 3;
+#elif defined(LZO1Z)
+				{
+					lzo_uint off = t & 0x1f;
+					m_pos = op;
+					if (off >= 0x1c) {
+						assert(last_m_off > 0);
+						m_pos -= last_m_off;
+					} else {
+						off =
+						    1 + (off << 6) +
+						    (*ip++ >> 2);
+						m_pos -= off;
+						last_m_off = off;
+					}
+				}
+				t = (t >> 5) - 1;
+#endif
+				TEST_LOOKBEHIND(m_pos, out);
+				assert("lzo-18", t > 0);
+				NEED_OP(t + 3 - 1);
+				goto copy_match;
+#endif
+			} else if (t >= 32) {
+				t &= 31;
+				if (t == 0) {
+					NEED_IP(1);
+					while (*ip == 0) {
+						t += 255;
+						ip++;
+						NEED_IP(1);
+					}
+					t += 31 + *ip++;
+				}
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+				m_off = 1 + (ip[0] << 6) + (ip[1] >> 2);
+				last_m_off = m_off;
+#else
+				m_off = 1 + (ip[0] >> 2) + (ip[1] << 6);
+#endif
+#else
+#if defined(LZO1Z)
+				{
+					lzo_uint off =
+					    1 + (ip[0] << 6) + (ip[1] >> 2);
+					m_pos = op - off;
+					last_m_off = off;
+				}
+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+				m_pos = op - 1;
+				m_pos -= (*(const lzo_ushortp)ip) >> 2;
+#else
+				m_pos = op - 1;
+				m_pos -= (ip[0] >> 2) + (ip[1] << 6);
+#endif
+#endif
+				ip += 2;
+			} else if (t >= 16) {
+#if defined(COPY_DICT)
+				m_off = (t & 8) << 11;
+#else
+				m_pos = op;
+				m_pos -= (t & 8) << 11;
+#endif
+				t &= 7;
+				if (t == 0) {
+					NEED_IP(1);
+					while (*ip == 0) {
+						t += 255;
+						ip++;
+						NEED_IP(1);
+					}
+					t += 7 + *ip++;
+				}
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+				m_off += (ip[0] << 6) + (ip[1] >> 2);
+#else
+				m_off += (ip[0] >> 2) + (ip[1] << 6);
+#endif
+				ip += 2;
+				if (m_off == 0)
+					goto eof_found;
+				m_off += 0x4000;
+#if defined(LZO1Z)
+				last_m_off = m_off;
+#endif
+#else
+#if defined(LZO1Z)
+				m_pos -= (ip[0] << 6) + (ip[1] >> 2);
+#elif defined(LZO_UNALIGNED_OK_2) && (LZO_BYTE_ORDER == LZO_LITTLE_ENDIAN)
+				m_pos -= (*(const lzo_ushortp)ip) >> 2;
+#else
+				m_pos -= (ip[0] >> 2) + (ip[1] << 6);
+#endif
+				ip += 2;
+				if (m_pos == op)
+					goto eof_found;
+				m_pos -= 0x4000;
+#if defined(LZO1Z)
+				last_m_off = op - m_pos;
+#endif
+#endif
+			} else {
+#if defined(COPY_DICT)
+#if defined(LZO1Z)
+				m_off = 1 + (t << 6) + (*ip++ >> 2);
+				last_m_off = m_off;
+#else
+				m_off = 1 + (t >> 2) + (*ip++ << 2);
+#endif
+				NEED_OP(2);
+				t = 2;
+				COPY_DICT(t, m_off)
+#else
+#if defined(LZO1Z)
+				t = 1 + (t << 6) + (*ip++ >> 2);
+				m_pos = op - t;
+				last_m_off = t;
+#else
+				m_pos = op - 1;
+				m_pos -= t >> 2;
+				m_pos -= *ip++ << 2;
+#endif
+				TEST_LOOKBEHIND(m_pos, out);
+				NEED_OP(2);
+				*op++ = *m_pos++;
+				*op++ = *m_pos;
+#endif
+				goto match_done;
+			}
+
+#if defined(COPY_DICT)
+
+			NEED_OP(t + 3 - 1);
+			t += 3 - 1;
+			COPY_DICT(t, m_off)
+#else
+
+			TEST_LOOKBEHIND(m_pos, out);
+			assert("lzo-19", t > 0);
+			NEED_OP(t + 3 - 1);
+#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4)
+#if !defined(LZO_UNALIGNED_OK_4)
+			if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op, m_pos)) {
+				assert((op - m_pos) >= 4);
+#else
+			if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
+#endif
+				COPY4(op, m_pos);
+				op += 4;
+				m_pos += 4;
+				t -= 4 - (3 - 1);
+				do {
+					COPY4(op, m_pos);
+					op += 4;
+					m_pos += 4;
+					t -= 4;
+				} while (t >= 4);
+				if (t > 0)
+					do
+						*op++ = *m_pos++;
+					while (--t > 0);
+			} else
+#endif
+			{
+			      copy_match:
+				*op++ = *m_pos++;
+				*op++ = *m_pos++;
+				do
+					*op++ = *m_pos++;
+				while (--t > 0);
+			}
+
+#endif
+
+		      match_done:
+#if defined(LZO1Z)
+			t = ip[-1] & 3;
+#else
+			t = ip[-2] & 3;
+#endif
+			if (t == 0)
+				break;
+
+		      match_next:
+			assert("lzo-20", t > 0);
+			NEED_OP(t);
+			NEED_IP(t + 1);
+			do
+				*op++ = *ip++;
+			while (--t > 0);
+			t = *ip++;
+		}
+	}
+
+#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP)
+	*out_len = op - out;
+	return LZO_E_EOF_NOT_FOUND;
+#endif
+
+      eof_found:
+	assert("lzo-21", t == 1);
+	*out_len = op - out;
+	return (ip == ip_end ? LZO_E_OK :
+		(ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
+
+#if defined(HAVE_NEED_IP)
+      input_overrun:
+	*out_len = op - out;
+	return LZO_E_INPUT_OVERRUN;
+#endif
+
+#if defined(HAVE_NEED_OP)
+      output_overrun:
+	*out_len = op - out;
+	return LZO_E_OUTPUT_OVERRUN;
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+      lookbehind_overrun:
+	*out_len = op - out;
+	return LZO_E_LOOKBEHIND_OVERRUN;
+#endif
+}
+
+#define LZO_TEST_DECOMPRESS_OVERRUN
+#undef DO_DECOMPRESS
+#define DO_DECOMPRESS       lzo1x_decompress_safe
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN)
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_INPUT       2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT      2
+#  endif
+#  if !defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#    define LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND
+#  endif
+#endif
+
+#undef TEST_IP
+#undef TEST_OP
+#undef TEST_LOOKBEHIND
+#undef NEED_IP
+#undef NEED_OP
+#undef HAVE_TEST_IP
+#undef HAVE_TEST_OP
+#undef HAVE_NEED_IP
+#undef HAVE_NEED_OP
+#undef HAVE_ANY_IP
+#undef HAVE_ANY_OP
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_INPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 1)
+#    define TEST_IP             (ip < ip_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_INPUT >= 2)
+#    define NEED_IP(x) \
+	    if ((lzo_uint)(ip_end - ip) < (lzo_uint)(x))  goto input_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT)
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 1)
+#    define TEST_OP             (op <= op_end)
+#  endif
+#  if (LZO_TEST_DECOMPRESS_OVERRUN_OUTPUT >= 2)
+#    undef TEST_OP
+#    define NEED_OP(x) \
+	    if ((lzo_uint)(op_end - op) < (lzo_uint)(x))  goto output_overrun
+#  endif
+#endif
+
+#if defined(LZO_TEST_DECOMPRESS_OVERRUN_LOOKBEHIND)
+#  define TEST_LOOKBEHIND(m_pos,out)    if (m_pos < out) goto lookbehind_overrun
+#else
+#  define TEST_LOOKBEHIND(m_pos,op)     ((void) 0)
+#endif
+
+#if !defined(LZO_EOF_CODE) && !defined(TEST_IP)
+#  define TEST_IP               (ip < ip_end)
+#endif
+
+#if defined(TEST_IP)
+#  define HAVE_TEST_IP
+#else
+#  define TEST_IP               1
+#endif
+#if defined(TEST_OP)
+#  define HAVE_TEST_OP
+#else
+#  define TEST_OP               1
+#endif
+
+#if defined(NEED_IP)
+#  define HAVE_NEED_IP
+#else
+#  define NEED_IP(x)            ((void) 0)
+#endif
+#if defined(NEED_OP)
+#  define HAVE_NEED_OP
+#else
+#  define NEED_OP(x)            ((void) 0)
+#endif
+
+#if defined(HAVE_TEST_IP) || defined(HAVE_NEED_IP)
+#  define HAVE_ANY_IP
+#endif
+#if defined(HAVE_TEST_OP) || defined(HAVE_NEED_OP)
+#  define HAVE_ANY_OP
+#endif
+
+#undef __COPY4
+#define __COPY4(dst,src)    * (lzo_uint32p)(dst) = * (const lzo_uint32p)(src)
+
+#undef COPY4
+#if defined(LZO_UNALIGNED_OK_4)
+#  define COPY4(dst,src)    __COPY4(dst,src)
+#elif defined(LZO_ALIGNED_OK_4)
+#  define COPY4(dst,src)    __COPY4((lzo_ptr_t)(dst),(lzo_ptr_t)(src))
+#endif
+
+/***** End of minilzo.c *****/


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-27  0:34 Reiser4 und LZO compression Alexey Dobriyan
@ 2006-08-27  8:04 ` Andrew Morton
  2006-08-27  8:49   ` Ray Lee
                     ` (3 more replies)
  2006-08-28 17:06 ` Hans Reiser
  1 sibling, 4 replies; 47+ messages in thread
From: Andrew Morton @ 2006-08-27  8:04 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: reiserfs-list, linux-kernel

On Sun, 27 Aug 2006 04:34:26 +0400
Alexey Dobriyan <adobriyan@gmail.com> wrote:

> The patch below is so-called reiser4 LZO compression plugin as extracted
> from 2.6.18-rc4-mm3.
> 
> I think it is an unauditable piece of shit and thus should not enter
> mainline.

Like lib/inflate.c (and this new code should arguably be in lib/).

The problem is that if we clean this up, we've diverged very much from the
upstream implementation.  So taking in fixes and features from upstream
becomes harder and more error-prone.

I'd suspect that the maturity of these utilities is such that we could
afford to turn them into kernel code in the expectation that any future
changes will be small.  But it's not a completely simple call.

(iirc the inflate code had a buffer overrun a while back, which was found
and fixed in the upstream version).



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-27  8:04 ` Andrew Morton
@ 2006-08-27  8:49   ` Ray Lee
  2006-08-27  9:42   ` David Masover
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 47+ messages in thread
From: Ray Lee @ 2006-08-27  8:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alexey Dobriyan, reiserfs-list, linux-kernel

On 8/27/06, Andrew Morton <akpm@osdl.org> wrote:
> On Sun, 27 Aug 2006 04:34:26 +0400
> Alexey Dobriyan <adobriyan@gmail.com> wrote:
>
> > The patch below is so-called reiser4 LZO compression plugin as extracted
> > from 2.6.18-rc4-mm3.
> >
> > I think it is an unauditable piece of shit and thus should not enter
> > mainline.

Sheesh.

> Like lib/inflate.c (and this new code should arguably be in lib/).
>
> The problem is that if we clean this up, we've diverged very much from the
> upstream implementation.  So taking in fixes and features from upstream
> becomes harder and more error-prone.

Right. How about putting it in as so that everyone can track
divergences, but to not use it for a real compile. Rather, consider it
meta-source, and do mechanical, repeatable transformations only,
starting with something like:

mv minilzo.c minilzo._c
cpp 2>/dev/null -w -P -C -nostdinc -dI minilzo._c >minilzo.c
lindent minilzo.c

to generate a version that can be audited. Doing so on a version of
minilzo.c google found on the web generated something that looked much
like any other stream coder source I've read, so it approaches
readability. Of a sorts. Further cleanups could be done with cpp -D to
rename some of the more bizarre symbols.

Downside is that bugs would have to be fixed in the 'meta-source'
(horrible name, but it's late here), but at least they could be found
(potentially) easier than in the original.

Ray

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-27  8:04 ` Andrew Morton
  2006-08-27  8:49   ` Ray Lee
@ 2006-08-27  9:42   ` David Masover
  2006-08-28 17:34     ` Jindrich Makovicka
  2006-08-28 12:42   ` Jörn Engel
  2006-08-29 13:14   ` PFC
  3 siblings, 1 reply; 47+ messages in thread
From: David Masover @ 2006-08-27  9:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alexey Dobriyan, reiserfs-list, linux-kernel

Andrew Morton wrote:
> On Sun, 27 Aug 2006 04:34:26 +0400
> Alexey Dobriyan <adobriyan@gmail.com> wrote:
> 
>> The patch below is so-called reiser4 LZO compression plugin as extracted
>> from 2.6.18-rc4-mm3.
>>
>> I think it is an unauditable piece of shit and thus should not enter
>> mainline.
> 
> Like lib/inflate.c (and this new code should arguably be in lib/).
> 
> The problem is that if we clean this up, we've diverged very much from the
> upstream implementation.  So taking in fixes and features from upstream
> becomes harder and more error-prone.

Well, what kinds of changes have to happen?  I doubt upstream would care 
about moving some of it to lib/ -- and anyway, reiserfs-list is on the 
CC.  We are speaking of upstream in the third party in the presence of 
upstream, so...

Maybe just ask upstream?

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-27  8:04 ` Andrew Morton
  2006-08-27  8:49   ` Ray Lee
  2006-08-27  9:42   ` David Masover
@ 2006-08-28 12:42   ` Jörn Engel
  2006-08-29 13:14   ` PFC
  3 siblings, 0 replies; 47+ messages in thread
From: Jörn Engel @ 2006-08-28 12:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alexey Dobriyan, reiserfs-list, linux-kernel

On Sun, 27 August 2006 01:04:28 -0700, Andrew Morton wrote:
> 
> Like lib/inflate.c (and this new code should arguably be in lib/).
> 
> The problem is that if we clean this up, we've diverged very much from the
> upstream implementation.  So taking in fixes and features from upstream
> becomes harder and more error-prone.

I've had an identical argument with Linus about lib/zlib_*.  He
decided that he didn't care about diverging, I went ahead and changed
the code.  In the process, I merged a couple of outstanding bugfixes
and reduced memory consumption by 25%.  Looks like Linus was right on
that one.

> I'd suspect that the maturity of these utilities is such that we could
> afford to turn them into kernel code in the expectation that any future
> changes will be small.  But it's not a completely simple call.
> 
> (iirc the inflate code had a buffer overrun a while back, which was found
> and fixed in the upstream version).

Dito in lib/zlib_*.  lib/inflage.c is only used for the various
in-kernel bootloaders to uncompress a kernel image.  Anyone tampering
with the image to cause a buffer overrun already owns the machine
anyway.

Whether any of our experiences with zlib apply to lzo remains a
question, though.

Jörn

-- 
I've never met a human being who would want to read 17,000 pages of
documentation, and if there was, I'd kill him to get him out of the
gene pool.
-- Joseph Costello

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-27  0:34 Reiser4 und LZO compression Alexey Dobriyan
  2006-08-27  8:04 ` Andrew Morton
@ 2006-08-28 17:06 ` Hans Reiser
  2006-08-28 17:37   ` Stefan Traby
  1 sibling, 1 reply; 47+ messages in thread
From: Hans Reiser @ 2006-08-28 17:06 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: reiserfs-list, linux-kernel, Andrew Morton

Alexey Dobriyan wrote:
> Reiser4 developers, Andrew,
>
> The patch below is so-called reiser4 LZO compression plugin as extracted
> from 2.6.18-rc4-mm3.
>
> I think it is an unauditable piece of shit and thus should not enter
> mainline.
>
>   
Hmm.  LZO is the best compression algorithm for the task as measured by
the objectives of good compression effectiveness while still having very
low CPU usage (the best of those written and GPL'd, there is a slightly
better one which is proprietary and uses more CPU, LZRW if I remember
right.  The gzip code base uses too much CPU, though I think Edward made
an option of it....).  Could you be kind enough to send me a plugin
which is better at those two measures, I'd be quite grateful?

By the way, could you tell me about this "auditing" stuff?  Last I
remember, when I mentioned that the US Defense community had coding
practices worth adopting by the Kernel Community, I was pretty much
disregarded.  So, while I understand that the FSB has serious security
issues what with all these Americans seeking to crack their Linux boxen,
complaining to me about auditability seems a bit graceless.;-) 
Especially if there is no offer of replacement compression code.

Oh, and this LZO code is not written by Namesys.  You can tell by the
utter lack of comments, assertions, etc.  We are just seeking to reuse
well known widely used code.  I have in the past been capable of
demanding that my programmers comment code not written by them before we
use it, but this time I did not.  I have mixed feeling about us adding
our comments to code written by a compression specialist.  If Andrew
wants us to write our own compression code, or comment this code and
fill it with asserts, we will grumble a bit and do it.  It is not a task
I am eager for, as compression code is a highly competitive field which
gives me the surface impression that if you are not gripped by what you
are sure is an inspiration you should stay out of it.  

Jorn wrote:

    I've had an identical argument with Linus about lib/zlib_*.  He
    decided that he didn't care about diverging, I went ahead and changed
    the code.  In the process, I merged a couple of outstanding bugfixes
    and reduced memory consumption by 25%.  Looks like Linus was right on
    that one.

Anyone sends myself or Edward a patch, that's great.  Jorn, sounds like
you did a good job on that one.

Hans

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-27  9:42   ` David Masover
@ 2006-08-28 17:34     ` Jindrich Makovicka
  2006-08-28 18:05       ` Edward Shishkin
  0 siblings, 1 reply; 47+ messages in thread
From: Jindrich Makovicka @ 2006-08-28 17:34 UTC (permalink / raw)
  To: David Masover; +Cc: Andrew Morton, Alexey Dobriyan, reiserfs-list, linux-kernel

On Sun, 27 Aug 2006 04:42:59 -0500
David Masover <ninja@slaphack.com> wrote:

> Andrew Morton wrote:
> > On Sun, 27 Aug 2006 04:34:26 +0400
> > Alexey Dobriyan <adobriyan@gmail.com> wrote:
> > 
> >> The patch below is so-called reiser4 LZO compression plugin as
> >> extracted from 2.6.18-rc4-mm3.
> >>
> >> I think it is an unauditable piece of shit and thus should not
> >> enter mainline.
> > 
> > Like lib/inflate.c (and this new code should arguably be in lib/).
> > 
> > The problem is that if we clean this up, we've diverged very much
> > from the upstream implementation.  So taking in fixes and features
> > from upstream becomes harder and more error-prone.
> 
> Well, what kinds of changes have to happen?  I doubt upstream would
> care about moving some of it to lib/ -- and anyway, reiserfs-list is
> on the CC.  We are speaking of upstream in the third party in the
> presence of upstream, so...

The ifdef jungle is ugly, and especially the WIN / 16-bit DOS stuff is
completely useless here.

> Maybe just ask upstream?

I am not sure if Mr. Oberhumer still cares about LZO 1.x, AFAIK he now
develops a new compressor under a commercial license.

Regards,
-- 
Jindrich Makovicka

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 17:06 ` Hans Reiser
@ 2006-08-28 17:37   ` Stefan Traby
  2006-08-28 18:15     ` Edward Shishkin
  0 siblings, 1 reply; 47+ messages in thread
From: Stefan Traby @ 2006-08-28 17:37 UTC (permalink / raw)
  To: Hans Reiser; +Cc: Alexey Dobriyan, reiserfs-list, linux-kernel, Andrew Morton

On Mon, Aug 28, 2006 at 10:06:46AM -0700, Hans Reiser wrote:

> Hmm.  LZO is the best compression algorithm for the task as measured by
> the objectives of good compression effectiveness while still having very
> low CPU usage (the best of those written and GPL'd, there is a slightly
> better one which is proprietary and uses more CPU, LZRW if I remember
> right.  The gzip code base uses too much CPU, though I think Edward made

I don't think that LZO beats LZF in both speed and compression ratio.

LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
of LZO for the next generation suspend-to-disk code of the Linux kernel.

see: http://www.goof.com/pcg/marc/liblzf.html

-- 

  ciao - 
    Stefan

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 17:34     ` Jindrich Makovicka
@ 2006-08-28 18:05       ` Edward Shishkin
  0 siblings, 0 replies; 47+ messages in thread
From: Edward Shishkin @ 2006-08-28 18:05 UTC (permalink / raw)
  To: Jindrich Makovicka
  Cc: David Masover, Andrew Morton, Alexey Dobriyan, reiserfs-list,
	linux-kernel

Jindrich Makovicka wrote:
> On Sun, 27 Aug 2006 04:42:59 -0500
> David Masover <ninja@slaphack.com> wrote:
> 
> 
>>Andrew Morton wrote:
>>
>>>On Sun, 27 Aug 2006 04:34:26 +0400
>>>Alexey Dobriyan <adobriyan@gmail.com> wrote:
>>>
>>>
>>>>The patch below is so-called reiser4 LZO compression plugin as
>>>>extracted from 2.6.18-rc4-mm3.
>>>>
>>>>I think it is an unauditable piece of shit and thus should not
>>>>enter mainline.
>>>
>>>Like lib/inflate.c (and this new code should arguably be in lib/).
>>>
>>>The problem is that if we clean this up, we've diverged very much
>>>from the upstream implementation.  So taking in fixes and features
>>>from upstream becomes harder and more error-prone.
>>
>>Well, what kinds of changes have to happen?  I doubt upstream would
>>care about moving some of it to lib/ -- and anyway, reiserfs-list is
>>on the CC.  We are speaking of upstream in the third party in the
>>presence of upstream, so...
> 
> 
> The ifdef jungle is ugly, and especially the WIN / 16-bit DOS stuff is
> completely useless here.
> 

I agree that it needs some brushing,
putting in todo..


> 
>>Maybe just ask upstream?
> 
> 
> I am not sure if Mr. Oberhumer still cares about LZO 1.x, AFAIK he now
> develops a new compressor under a commercial license.
> 
> Regards,


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 17:37   ` Stefan Traby
@ 2006-08-28 18:15     ` Edward Shishkin
  2006-08-28 21:48       ` Nigel Cunningham
  0 siblings, 1 reply; 47+ messages in thread
From: Edward Shishkin @ 2006-08-28 18:15 UTC (permalink / raw)
  To: Stefan Traby
  Cc: Hans Reiser, Alexey Dobriyan, reiserfs-list, linux-kernel, Andrew Morton

Stefan Traby wrote:
> On Mon, Aug 28, 2006 at 10:06:46AM -0700, Hans Reiser wrote:
> 
> 
>>Hmm.  LZO is the best compression algorithm for the task as measured by
>>the objectives of good compression effectiveness while still having very
>>low CPU usage (the best of those written and GPL'd, there is a slightly
>>better one which is proprietary and uses more CPU, LZRW if I remember
>>right.  The gzip code base uses too much CPU, though I think Edward made
> 
> 
> I don't think that LZO beats LZF in both speed and compression ratio.
> 
> LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
> of LZO for the next generation suspend-to-disk code of the Linux kernel.
> 
> see: http://www.goof.com/pcg/marc/liblzf.html
> 

thanks for the info, we will compare them

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 18:15     ` Edward Shishkin
@ 2006-08-28 21:48       ` Nigel Cunningham
  2006-08-28 23:32         ` Hans Reiser
                           ` (3 more replies)
  0 siblings, 4 replies; 47+ messages in thread
From: Nigel Cunningham @ 2006-08-28 21:48 UTC (permalink / raw)
  To: Edward Shishkin
  Cc: Stefan Traby, Hans Reiser, Alexey Dobriyan, reiserfs-list,
	linux-kernel, Andrew Morton

Hi.

On Mon, 2006-08-28 at 22:15 +0400, Edward Shishkin wrote:
> Stefan Traby wrote:
> > On Mon, Aug 28, 2006 at 10:06:46AM -0700, Hans Reiser wrote:
> > 
> > 
> >>Hmm.  LZO is the best compression algorithm for the task as measured by
> >>the objectives of good compression effectiveness while still having very
> >>low CPU usage (the best of those written and GPL'd, there is a slightly
> >>better one which is proprietary and uses more CPU, LZRW if I remember
> >>right.  The gzip code base uses too much CPU, though I think Edward made
> > 
> > 
> > I don't think that LZO beats LZF in both speed and compression ratio.
> > 
> > LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
> > of LZO for the next generation suspend-to-disk code of the Linux kernel.
> > 
> > see: http://www.goof.com/pcg/marc/liblzf.html
> > 
> 
> thanks for the info, we will compare them

For Suspend2, we ended up converting the LZF support to a cryptoapi
plugin. Is there any chance that you could use cryptoapi modules? We
could then have a hope of sharing the support.

Regards,

Nigel


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 21:48       ` Nigel Cunningham
@ 2006-08-28 23:32         ` Hans Reiser
  2006-08-29  4:05         ` Jan Engelhardt
                           ` (2 subsequent siblings)
  3 siblings, 0 replies; 47+ messages in thread
From: Hans Reiser @ 2006-08-28 23:32 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: Edward Shishkin, Stefan Traby, Alexey Dobriyan, reiserfs-list,
	linux-kernel, Andrew Morton

Nigel Cunningham wrote:
> For Suspend2, we ended up converting the LZF support to a cryptoapi
> plugin. Is there any chance that you could use cryptoapi modules? We
> could then have a hope of sharing the support
It is in principle a good idea, and I hope we will be able to say yes. 
However, I have to see the numbers, as we are more performance sensitive
than you folks probably are, and every 10% is a big deal for us.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 21:48       ` Nigel Cunningham
  2006-08-28 23:32         ` Hans Reiser
@ 2006-08-29  4:05         ` Jan Engelhardt
  2006-08-29  5:41           ` Nigel Cunningham
  2006-08-29  4:59         ` Paul Mundt
  2006-08-29  9:29         ` Edward Shishkin
  3 siblings, 1 reply; 47+ messages in thread
From: Jan Engelhardt @ 2006-08-29  4:05 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: Edward Shishkin, Stefan Traby, Hans Reiser, Alexey Dobriyan,
	reiserfs-list, linux-kernel, Andrew Morton

>> >>Hmm.  LZO is the best compression algorithm for the task as measured by
>> >>the objectives of good compression effectiveness while still having very
>> >>low CPU usage (the best of those written and GPL'd, there is a slightly
>> >>better one which is proprietary and uses more CPU, LZRW if I remember
>> >>right.  The gzip code base uses too much CPU, though I think Edward made
>> > 
>> > I don't think that LZO beats LZF in both speed and compression ratio.
>> > 
>> > LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
>> > of LZO for the next generation suspend-to-disk code of the Linux kernel.
>> > 
>> > see: http://www.goof.com/pcg/marc/liblzf.html
>> 
>> thanks for the info, we will compare them
>
>For Suspend2, we ended up converting the LZF support to a cryptoapi
>plugin. Is there any chance that you could use cryptoapi modules? We
>could then have a hope of sharing the support.

I am throwing in gzip: would it be meaningful to use that instead? The 
decoder (inflate.c) is already there.

06:04 shanghai:~/liblzf-1.6 > l configure*
-rwxr-xr-x  1 jengelh users 154894 Mar  3  2005 configure
-rwxr-xr-x  1 jengelh users  26810 Mar  3  2005 configure.bz2
-rw-r--r--  1 jengelh users  30611 Aug 28 20:32 configure.gz-z9
-rw-r--r--  1 jengelh users  30693 Aug 28 20:32 configure.gz-z6
-rw-r--r--  1 jengelh users  53077 Aug 28 20:32 configure.lzf


Jan Engelhardt
-- 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 21:48       ` Nigel Cunningham
  2006-08-28 23:32         ` Hans Reiser
  2006-08-29  4:05         ` Jan Engelhardt
@ 2006-08-29  4:59         ` Paul Mundt
  2006-08-29  5:47           ` Nigel Cunningham
  2006-08-29 13:45           ` PFC
  2006-08-29  9:29         ` Edward Shishkin
  3 siblings, 2 replies; 47+ messages in thread
From: Paul Mundt @ 2006-08-29  4:59 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: Edward Shishkin, Stefan Traby, Hans Reiser, Alexey Dobriyan,
	reiserfs-list, linux-kernel, Andrew Morton, nitingupta.mail

On Tue, Aug 29, 2006 at 07:48:25AM +1000, Nigel Cunningham wrote:
> For Suspend2, we ended up converting the LZF support to a cryptoapi
> plugin. Is there any chance that you could use cryptoapi modules? We
> could then have a hope of sharing the support.
> 
Using cryptoapi plugins for the compression methods is an interesting
approach, there's a few other places in the kernel that could probably
benefit from this as well, such as jffs2 (which at the moment rolls its
own compression subsystem), and the out-of-tree page and swap cache
compression work.

Assuming you were wrapping in to LZF directly prior to the cryptoapi
integration, do you happen to have before and after numbers to determine
how heavyweight the rest of the cryptoapi overhead is? It would be
interesting to profile this and consider migrating the in-tree users,
rather than duplicating the compress/decompress routines all over the
place.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29  4:05         ` Jan Engelhardt
@ 2006-08-29  5:41           ` Nigel Cunningham
  2006-08-29  8:23             ` David Masover
  0 siblings, 1 reply; 47+ messages in thread
From: Nigel Cunningham @ 2006-08-29  5:41 UTC (permalink / raw)
  To: Jan Engelhardt
  Cc: Edward Shishkin, Stefan Traby, Hans Reiser, Alexey Dobriyan,
	reiserfs-list, linux-kernel, Andrew Morton

Hi.

On Tue, 2006-08-29 at 06:05 +0200, Jan Engelhardt wrote:
> >> >>Hmm.  LZO is the best compression algorithm for the task as measured by
> >> >>the objectives of good compression effectiveness while still having very
> >> >>low CPU usage (the best of those written and GPL'd, there is a slightly
> >> >>better one which is proprietary and uses more CPU, LZRW if I remember
> >> >>right.  The gzip code base uses too much CPU, though I think Edward made
> >> > 
> >> > I don't think that LZO beats LZF in both speed and compression ratio.
> >> > 
> >> > LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
> >> > of LZO for the next generation suspend-to-disk code of the Linux kernel.
> >> > 
> >> > see: http://www.goof.com/pcg/marc/liblzf.html
> >> 
> >> thanks for the info, we will compare them
> >
> >For Suspend2, we ended up converting the LZF support to a cryptoapi
> >plugin. Is there any chance that you could use cryptoapi modules? We
> >could then have a hope of sharing the support.
> 
> I am throwing in gzip: would it be meaningful to use that instead? The 
> decoder (inflate.c) is already there.
> 
> 06:04 shanghai:~/liblzf-1.6 > l configure*
> -rwxr-xr-x  1 jengelh users 154894 Mar  3  2005 configure
> -rwxr-xr-x  1 jengelh users  26810 Mar  3  2005 configure.bz2
> -rw-r--r--  1 jengelh users  30611 Aug 28 20:32 configure.gz-z9
> -rw-r--r--  1 jengelh users  30693 Aug 28 20:32 configure.gz-z6
> -rw-r--r--  1 jengelh users  53077 Aug 28 20:32 configure.lzf

We used gzip when we first implemented compression support, and found it
to be far too slow. Even with the fastest compression options, we were
only getting a few megabytes per second. Perhaps I did something wrong
in configuring it, but there's not that many things to get wrong!

In contrast, with LZF, we get very high throughput. My current laptop is
an 1.8MHz Turion with a 7200 RPM (PATA) drive. Without LZF compression,
my throughput in writing an image is the maximum the drive & interface
can manage - 38MB/s. With LZF, I get roughly that divided by compression
ratio achieved, so if the compression ratio is ~50%, as it generally is,
I'm reading and writing the image at 75-80MB/s. During this time, all
the computer is doing is compressing pages using LZF and submitting
bios, with the odd message being send to the userspace interface app via
netlink. I realise this is very different to the workload you'll be
doing, but hopefully the numbers are somewhat useful:

nigel@nigel:~$ cat /sys/power/suspend2/debug_info
Suspend2 debugging info:
- SUSPEND core   : 2.2.7.4
- Kernel Version : 2.6.18-rc4
- Compiler vers. : 4.1
- Attempt number : 1
- Parameters     : 0 32785 0 0 0 0
- Overall expected compression percentage: 0.
- Compressor is 'lzf'.
  Compressed 820006912 bytes into 430426371 (47 percent compression).
- Swapwriter active.
  Swap available for image: 487964 pages.
- Filewriter inactive.
- I/O speed: Write 74 MB/s, Read 70 MB/s.
- Extra pages    : 1913 used/2100.
nigel@nigel:~$

(Modify hibernate.conf to disable compression, suspend again...)

nigel@nigel:~$ cat /sys/power/suspend2/debug_info
Suspend2 debugging info:
- SUSPEND core   : 2.2.7.4
- Kernel Version : 2.6.18-rc4
- Compiler vers. : 4.1
- Attempt number : 2
- Parameters     : 0 32785 0 0 0 0
- Overall expected compression percentage: 0.
- Swapwriter active.
  Swap available for image: 487964 pages.
- Filewriter inactive.
- I/O speed: Write 38 MB/s, Read 39 MB/s.
- Extra pages    : 1907 used/2100.
nigel@nigel:~$

Oh, I also have a debugging mode where I can get Suspend2 to just
compress the pages but not actually write anything. If I do that, it
says it can do 80MB/s on my kernel image, so the disk is still the
bottleneck, it seems.

Hope this all helps (and isn't information overload!)

Nigel


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29  4:59         ` Paul Mundt
@ 2006-08-29  5:47           ` Nigel Cunningham
  2006-08-29 13:45           ` PFC
  1 sibling, 0 replies; 47+ messages in thread
From: Nigel Cunningham @ 2006-08-29  5:47 UTC (permalink / raw)
  To: Paul Mundt
  Cc: Edward Shishkin, Stefan Traby, Hans Reiser, Alexey Dobriyan,
	reiserfs-list, linux-kernel, Andrew Morton, nitingupta.mail

Hi.

On Tue, 2006-08-29 at 13:59 +0900, Paul Mundt wrote:
> On Tue, Aug 29, 2006 at 07:48:25AM +1000, Nigel Cunningham wrote:
> > For Suspend2, we ended up converting the LZF support to a cryptoapi
> > plugin. Is there any chance that you could use cryptoapi modules? We
> > could then have a hope of sharing the support.
> > 
> Using cryptoapi plugins for the compression methods is an interesting
> approach, there's a few other places in the kernel that could probably
> benefit from this as well, such as jffs2 (which at the moment rolls its
> own compression subsystem), and the out-of-tree page and swap cache
> compression work.
> 
> Assuming you were wrapping in to LZF directly prior to the cryptoapi
> integration, do you happen to have before and after numbers to determine
> how heavyweight the rest of the cryptoapi overhead is? It would be
> interesting to profile this and consider migrating the in-tree users,
> rather than duplicating the compress/decompress routines all over the
> place.

I was, but I don't have numbers right now. I'm about to go out, but will
see if I can find them when I get back later. From memory, it wasn't a
huge change in terms of lines of code.

Regards,

Nigel


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29  5:41           ` Nigel Cunningham
@ 2006-08-29  8:23             ` David Masover
  2006-08-29  9:57               ` Nigel Cunningham
  0 siblings, 1 reply; 47+ messages in thread
From: David Masover @ 2006-08-29  8:23 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: Jan Engelhardt, Edward Shishkin, Stefan Traby, Hans Reiser,
	Alexey Dobriyan, reiserfs-list, linux-kernel, Andrew Morton

Nigel Cunningham wrote:
> Hi.
> 
> On Tue, 2006-08-29 at 06:05 +0200, Jan Engelhardt wrote:
>>>>>> Hmm.  LZO is the best compression algorithm for the task as measured by
>>>>>> the objectives of good compression effectiveness while still having very
>>>>>> low CPU usage (the best of those written and GPL'd, there is a slightly
>>>>>> better one which is proprietary and uses more CPU, LZRW if I remember
>>>>>> right.  The gzip code base uses too much CPU, though I think Edward made
>>>>> I don't think that LZO beats LZF in both speed and compression ratio.
>>>>>
>>>>> LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
>>>>> of LZO for the next generation suspend-to-disk code of the Linux kernel.
>>>>>
>>>>> see: http://www.goof.com/pcg/marc/liblzf.html
>>>> thanks for the info, we will compare them
>>> For Suspend2, we ended up converting the LZF support to a cryptoapi
>>> plugin. Is there any chance that you could use cryptoapi modules? We
>>> could then have a hope of sharing the support.
>> I am throwing in gzip: would it be meaningful to use that instead? The 
>> decoder (inflate.c) is already there.
>>
>> 06:04 shanghai:~/liblzf-1.6 > l configure*
>> -rwxr-xr-x  1 jengelh users 154894 Mar  3  2005 configure
>> -rwxr-xr-x  1 jengelh users  26810 Mar  3  2005 configure.bz2
>> -rw-r--r--  1 jengelh users  30611 Aug 28 20:32 configure.gz-z9
>> -rw-r--r--  1 jengelh users  30693 Aug 28 20:32 configure.gz-z6
>> -rw-r--r--  1 jengelh users  53077 Aug 28 20:32 configure.lzf
> 
> We used gzip when we first implemented compression support, and found it
> to be far too slow. Even with the fastest compression options, we were
> only getting a few megabytes per second. Perhaps I did something wrong
> in configuring it, but there's not that many things to get wrong!

All that comes to mind is the speed/quality setting -- the number from 1 
to 9.  Recently, I backed up someone's hard drive using -1, and I 
believe I was still able to saturate... the _network_.  Definitely try 
again if you haven't changed this, but I can't imagine I'm the first 
persson to think of it.

 From what I remember, gzip -1 wasn't faster than the disk.  But at 
least for (very) repetitive data, I was wrong:

eve:~ sanity$ time bash -c 'dd if=/dev/zero of=test bs=10m count=10; sync'
10+0 records in
10+0 records out
104857600 bytes transferred in 3.261990 secs (32145287 bytes/sec)

real    0m3.746s
user    0m0.005s
sys     0m0.627s
eve:~ sanity$ time bash -c 'dd if=/dev/zero bs=10m count=10 | gzip -v1 > 
test; sync'
10+0 records in
10+0 records out
104857600 bytes transferred in 2.404093 secs (43616282 bytes/sec)
  99.5%

real    0m2.558s
user    0m1.554s
sys     0m0.680s
eve:~ sanity$



This was on OS X, but I think it's still valid -- this is a slightly 
older Powerbook, with a 5400 RPM drive, 1.6 ghz G4.

-1 is still worlds better than nothing.  The backup was over 15 gigs, 
down to about 6 -- loads of repetitive data, I'm sure, but that's where 
you win with compression anyway.

Well, you use cryptoapi anyway, so it should be easy to just let the 
user pick a plugin, right?

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-28 21:48       ` Nigel Cunningham
                           ` (2 preceding siblings ...)
  2006-08-29  4:59         ` Paul Mundt
@ 2006-08-29  9:29         ` Edward Shishkin
  3 siblings, 0 replies; 47+ messages in thread
From: Edward Shishkin @ 2006-08-29  9:29 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: Stefan Traby, Hans Reiser, Alexey Dobriyan, reiserfs-list,
	linux-kernel, Andrew Morton

Nigel Cunningham wrote:
> Hi.
> 
> On Mon, 2006-08-28 at 22:15 +0400, Edward Shishkin wrote:
> 
>>Stefan Traby wrote:
>>
>>>On Mon, Aug 28, 2006 at 10:06:46AM -0700, Hans Reiser wrote:
>>>
>>>
>>>
>>>>Hmm.  LZO is the best compression algorithm for the task as measured by
>>>>the objectives of good compression effectiveness while still having very
>>>>low CPU usage (the best of those written and GPL'd, there is a slightly
>>>>better one which is proprietary and uses more CPU, LZRW if I remember
>>>>right.  The gzip code base uses too much CPU, though I think Edward made
>>>
>>>
>>>I don't think that LZO beats LZF in both speed and compression ratio.
>>>
>>>LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
>>>of LZO for the next generation suspend-to-disk code of the Linux kernel.
>>>
>>>see: http://www.goof.com/pcg/marc/liblzf.html
>>>
>>
>>thanks for the info, we will compare them
> 
> 
> For Suspend2, we ended up converting the LZF support to a cryptoapi
> plugin. Is there any chance that you could use cryptoapi modules? We
> could then have a hope of sharing the support.
> 

No problems with using crypto-api. Reiser4 bypasses it, because
currently it supplies the only compression level, which is fairly
bad for compressed file systems.

Edward.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29  8:23             ` David Masover
@ 2006-08-29  9:57               ` Nigel Cunningham
  2006-08-29 11:09                 ` Ray Lee
  2006-08-29 11:38                 ` Edward Shishkin
  0 siblings, 2 replies; 47+ messages in thread
From: Nigel Cunningham @ 2006-08-29  9:57 UTC (permalink / raw)
  To: David Masover
  Cc: Jan Engelhardt, Edward Shishkin, Stefan Traby, Hans Reiser,
	Alexey Dobriyan, reiserfs-list, linux-kernel, Andrew Morton

Hi.

On Tue, 2006-08-29 at 03:23 -0500, David Masover wrote:
> Nigel Cunningham wrote:
> > Hi.
> > 
> > On Tue, 2006-08-29 at 06:05 +0200, Jan Engelhardt wrote:
> >>>>>> Hmm.  LZO is the best compression algorithm for the task as measured by
> >>>>>> the objectives of good compression effectiveness while still having very
> >>>>>> low CPU usage (the best of those written and GPL'd, there is a slightly
> >>>>>> better one which is proprietary and uses more CPU, LZRW if I remember
> >>>>>> right.  The gzip code base uses too much CPU, though I think Edward made
> >>>>> I don't think that LZO beats LZF in both speed and compression ratio.
> >>>>>
> >>>>> LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
> >>>>> of LZO for the next generation suspend-to-disk code of the Linux kernel.
> >>>>>
> >>>>> see: http://www.goof.com/pcg/marc/liblzf.html
> >>>> thanks for the info, we will compare them
> >>> For Suspend2, we ended up converting the LZF support to a cryptoapi
> >>> plugin. Is there any chance that you could use cryptoapi modules? We
> >>> could then have a hope of sharing the support.
> >> I am throwing in gzip: would it be meaningful to use that instead? The 
> >> decoder (inflate.c) is already there.
> >>
> >> 06:04 shanghai:~/liblzf-1.6 > l configure*
> >> -rwxr-xr-x  1 jengelh users 154894 Mar  3  2005 configure
> >> -rwxr-xr-x  1 jengelh users  26810 Mar  3  2005 configure.bz2
> >> -rw-r--r--  1 jengelh users  30611 Aug 28 20:32 configure.gz-z9
> >> -rw-r--r--  1 jengelh users  30693 Aug 28 20:32 configure.gz-z6
> >> -rw-r--r--  1 jengelh users  53077 Aug 28 20:32 configure.lzf
> > 
> > We used gzip when we first implemented compression support, and found it
> > to be far too slow. Even with the fastest compression options, we were
> > only getting a few megabytes per second. Perhaps I did something wrong
> > in configuring it, but there's not that many things to get wrong!
> 
> All that comes to mind is the speed/quality setting -- the number from 1 
> to 9.  Recently, I backed up someone's hard drive using -1, and I 
> believe I was still able to saturate... the _network_.  Definitely try 
> again if you haven't changed this, but I can't imagine I'm the first 
> persson to think of it.
> 
>  From what I remember, gzip -1 wasn't faster than the disk.  But at 
> least for (very) repetitive data, I was wrong:
> 
> eve:~ sanity$ time bash -c 'dd if=/dev/zero of=test bs=10m count=10; sync'
> 10+0 records in
> 10+0 records out
> 104857600 bytes transferred in 3.261990 secs (32145287 bytes/sec)
> 
> real    0m3.746s
> user    0m0.005s
> sys     0m0.627s
> eve:~ sanity$ time bash -c 'dd if=/dev/zero bs=10m count=10 | gzip -v1 > 
> test; sync'
> 10+0 records in
> 10+0 records out
> 104857600 bytes transferred in 2.404093 secs (43616282 bytes/sec)
>   99.5%
> 
> real    0m2.558s
> user    0m1.554s
> sys     0m0.680s
> eve:~ sanity$
> 
> 
> 
> This was on OS X, but I think it's still valid -- this is a slightly 
> older Powerbook, with a 5400 RPM drive, 1.6 ghz G4.
> 
> -1 is still worlds better than nothing.  The backup was over 15 gigs, 
> down to about 6 -- loads of repetitive data, I'm sure, but that's where 
> you win with compression anyway.

Wow. That's a lot better; I guess I did get something wrong in trying to
tune deflate. That was pre-cryptoapi though; looking at
cryptoapi/deflate.c, I don't see any way of controlling the compression
level. Am I missing anything?

> Well, you use cryptoapi anyway, so it should be easy to just let the 
> user pick a plugin, right?

Right. They can already pick deflate if they want to.

Regards,

Nigel


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29  9:57               ` Nigel Cunningham
@ 2006-08-29 11:09                 ` Ray Lee
  2006-08-29 11:38                 ` Edward Shishkin
  1 sibling, 0 replies; 47+ messages in thread
From: Ray Lee @ 2006-08-29 11:09 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: David Masover, Jan Engelhardt, Edward Shishkin, Stefan Traby,
	Hans Reiser, Alexey Dobriyan, reiserfs-list, linux-kernel,
	Andrew Morton

On 8/29/06, Nigel Cunningham <ncunningham@linuxmail.org> wrote:
> Hi.
> On Tue, 2006-08-29 at 03:23 -0500, David Masover wrote:
> > Nigel Cunningham wrote:
> > > We used gzip when we first implemented compression support, and found it
> > > to be far too slow. Even with the fastest compression options, we were
> > > only getting a few megabytes per second. Perhaps I did something wrong
> > > in configuring it, but there's not that many things to get wrong!
> >
> > All that comes to mind is the speed/quality setting -- the number from 1
> > to 9.  Recently, I backed up someone's hard drive using -1, and I
> > believe I was still able to saturate... the _network_.  Definitely try
> > again if you haven't changed this, but I can't imagine I'm the first
> > persson to think of it.
> >
> >  From what I remember, gzip -1 wasn't faster than the disk.  But at
> > least for (very) repetitive data, I was wrong:
> >
> > eve:~ sanity$ time bash -c 'dd if=/dev/zero of=test bs=10m count=10; sync'
> > 10+0 records in
> > 10+0 records out
> > 104857600 bytes transferred in 3.261990 secs (32145287 bytes/sec)
> >
> > real    0m3.746s
> > user    0m0.005s
> > sys     0m0.627s
> > eve:~ sanity$ time bash -c 'dd if=/dev/zero bs=10m count=10 | gzip -v1 >
> > test; sync'
> > 10+0 records in
> > 10+0 records out
> > 104857600 bytes transferred in 2.404093 secs (43616282 bytes/sec)
> >   99.5%
> >
> > real    0m2.558s
> > user    0m1.554s
> > sys     0m0.680s
> > eve:~ sanity$
> >
> >
> >
> > This was on OS X, but I think it's still valid -- this is a slightly
> > older Powerbook, with a 5400 RPM drive, 1.6 ghz G4.
> >
> > -1 is still worlds better than nothing.  The backup was over 15 gigs,
> > down to about 6 -- loads of repetitive data, I'm sure, but that's where
> > you win with compression anyway.
>
> Wow. That's a lot better; I guess I did get something wrong in trying to
> tune deflate. That was pre-cryptoapi though; looking at
> cryptoapi/deflate.c, I don't see any way of controlling the compression
> level. Am I missing anything?

Compressing /dev/zero isn't a great test. The timings are really data-dependant:

ray@phoenix:~$ time bash -c 'sudo dd if=/dev/zero bs=8M count=64 |
gzip -v1 >/dev/null'
64+0 records in
64+0 records out
536870912 bytes (537 MB) copied, 7.60817 seconds, 70.6 MB/s
 99.6%

real    0m7.652s
user    0m6.581s
sys     0m0.701s
ray@phoenix:~$ time bash -c 'sudo dd if=/dev/mem bs=8M count=64 | gzip
-v1 >/dev/null'
64+0 records in
64+0 records out
536870912 bytes (537 MB) copied, 21.5863 seconds, 24.9 MB/s
 70.4%

real    0m21.626s
user    0m18.763s
sys     0m1.762s

This is on an AMD64 laptop.

Ray

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29  9:57               ` Nigel Cunningham
  2006-08-29 11:09                 ` Ray Lee
@ 2006-08-29 11:38                 ` Edward Shishkin
  2006-08-29 22:03                   ` Nigel Cunningham
  1 sibling, 1 reply; 47+ messages in thread
From: Edward Shishkin @ 2006-08-29 11:38 UTC (permalink / raw)
  To: Nigel Cunningham
  Cc: David Masover, Jan Engelhardt, Stefan Traby, Hans Reiser,
	Alexey Dobriyan, reiserfs-list, linux-kernel, Andrew Morton

Nigel Cunningham wrote:
> Hi.
> 
> On Tue, 2006-08-29 at 03:23 -0500, David Masover wrote:
> 
>>Nigel Cunningham wrote:
>>
>>>Hi.
>>>
>>>On Tue, 2006-08-29 at 06:05 +0200, Jan Engelhardt wrote:
>>>
>>>>>>>>Hmm.  LZO is the best compression algorithm for the task as measured by
>>>>>>>>the objectives of good compression effectiveness while still having very
>>>>>>>>low CPU usage (the best of those written and GPL'd, there is a slightly
>>>>>>>>better one which is proprietary and uses more CPU, LZRW if I remember
>>>>>>>>right.  The gzip code base uses too much CPU, though I think Edward made
>>>>>>>
>>>>>>>I don't think that LZO beats LZF in both speed and compression ratio.
>>>>>>>
>>>>>>>LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
>>>>>>>of LZO for the next generation suspend-to-disk code of the Linux kernel.
>>>>>>>
>>>>>>>see: http://www.goof.com/pcg/marc/liblzf.html
>>>>>>
>>>>>>thanks for the info, we will compare them
>>>>>
>>>>>For Suspend2, we ended up converting the LZF support to a cryptoapi
>>>>>plugin. Is there any chance that you could use cryptoapi modules? We
>>>>>could then have a hope of sharing the support.
>>>>
>>>>I am throwing in gzip: would it be meaningful to use that instead? The 
>>>>decoder (inflate.c) is already there.
>>>>
>>>>06:04 shanghai:~/liblzf-1.6 > l configure*
>>>>-rwxr-xr-x  1 jengelh users 154894 Mar  3  2005 configure
>>>>-rwxr-xr-x  1 jengelh users  26810 Mar  3  2005 configure.bz2
>>>>-rw-r--r--  1 jengelh users  30611 Aug 28 20:32 configure.gz-z9
>>>>-rw-r--r--  1 jengelh users  30693 Aug 28 20:32 configure.gz-z6
>>>>-rw-r--r--  1 jengelh users  53077 Aug 28 20:32 configure.lzf
>>>
>>>We used gzip when we first implemented compression support, and found it
>>>to be far too slow. Even with the fastest compression options, we were
>>>only getting a few megabytes per second. Perhaps I did something wrong
>>>in configuring it, but there's not that many things to get wrong!
>>
>>All that comes to mind is the speed/quality setting -- the number from 1 
>>to 9.  Recently, I backed up someone's hard drive using -1, and I 
>>believe I was still able to saturate... the _network_.  Definitely try 
>>again if you haven't changed this, but I can't imagine I'm the first 
>>persson to think of it.
>>
>> From what I remember, gzip -1 wasn't faster than the disk.  But at 
>>least for (very) repetitive data, I was wrong:
>>
>>eve:~ sanity$ time bash -c 'dd if=/dev/zero of=test bs=10m count=10; sync'
>>10+0 records in
>>10+0 records out
>>104857600 bytes transferred in 3.261990 secs (32145287 bytes/sec)
>>
>>real    0m3.746s
>>user    0m0.005s
>>sys     0m0.627s
>>eve:~ sanity$ time bash -c 'dd if=/dev/zero bs=10m count=10 | gzip -v1 > 
>>test; sync'
>>10+0 records in
>>10+0 records out
>>104857600 bytes transferred in 2.404093 secs (43616282 bytes/sec)
>>  99.5%
>>
>>real    0m2.558s
>>user    0m1.554s
>>sys     0m0.680s
>>eve:~ sanity$
>>
>>
>>
>>This was on OS X, but I think it's still valid -- this is a slightly 
>>older Powerbook, with a 5400 RPM drive, 1.6 ghz G4.
>>
>>-1 is still worlds better than nothing.  The backup was over 15 gigs, 
>>down to about 6 -- loads of repetitive data, I'm sure, but that's where 
>>you win with compression anyway.
> 
> 
> Wow. That's a lot better; I guess I did get something wrong in trying to
> tune deflate. That was pre-cryptoapi though; looking at
> cryptoapi/deflate.c, I don't see any way of controlling the compression
> level. Am I missing anything?
> 

zlib is tunable, not cryptoapi's deflate.
look at zlib_deflateInit2()

> 
>>Well, you use cryptoapi anyway, so it should be easy to just let the 
>>user pick a plugin, right?
> 
> 
> Right. They can already pick deflate if they want to.
> 
> Regards,
> 
> Nigel
> 
> 
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-27  8:04 ` Andrew Morton
                     ` (2 preceding siblings ...)
  2006-08-28 12:42   ` Jörn Engel
@ 2006-08-29 13:14   ` PFC
  2006-08-29 17:38     ` David Masover
  3 siblings, 1 reply; 47+ messages in thread
From: PFC @ 2006-08-29 13:14 UTC (permalink / raw)
  To: Andrew Morton, Alexey Dobriyan; +Cc: reiserfs-list, linux-kernel



	Would it be, by any chance, possible to tweak the thing so that reiserfs  
plugins become kernel modules, so that the reiserfs core can be put in the  
kernel without the plugins slowing down its acceptance ?

	(and updating plugins without rebooting would be a nice extra)

>> The patch below is so-called reiser4 LZO compression plugin as extracted
>> from 2.6.18-rc4-mm3.
>>
>> I think it is an unauditable piece of shit and thus should not enter
>> mainline.
>
> Like lib/inflate.c (and this new code should arguably be in lib/).
>
> The problem is that if we clean this up, we've diverged very much from  
> the
> upstream implementation.  So taking in fixes and features from upstream
> becomes harder and more error-prone.
>
> I'd suspect that the maturity of these utilities is such that we could
> afford to turn them into kernel code in the expectation that any future
> changes will be small.  But it's not a completely simple call.
>
> (iirc the inflate code had a buffer overrun a while back, which was found
> and fixed in the upstream version).
>
>



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29  4:59         ` Paul Mundt
  2006-08-29  5:47           ` Nigel Cunningham
@ 2006-08-29 13:45           ` PFC
  2006-08-29 14:38             ` Stefan Traby
                               ` (2 more replies)
  1 sibling, 3 replies; 47+ messages in thread
From: PFC @ 2006-08-29 13:45 UTC (permalink / raw)
  To: reiserfs-list


	I made a little benchmark on my own PC (Athlon64 3200+ in 64 bit gentoo)

http://peufeu.free.fr/compression.html

	So, gzip could be used on PCs having very fast processors and very slow  
harddrives, like Core Duo laptops.
	However, lzo compresses nearly as much and is still a lot faster. I don't  
see a reason for gzip in a FS application.

	Anyone has a bench for lzf ?

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 13:45           ` PFC
@ 2006-08-29 14:38             ` Stefan Traby
  2006-08-29 15:55               ` PFC
  2006-08-29 15:41             ` Gregory Maxwell
  2006-08-29 17:42             ` Hans Reiser
  2 siblings, 1 reply; 47+ messages in thread
From: Stefan Traby @ 2006-08-29 14:38 UTC (permalink / raw)
  To: PFC; +Cc: reiserfs-list

On Tue, Aug 29, 2006 at 03:45:59PM +0200, PFC wrote:
> 	Anyone has a bench for lzf ?

It's easy, try something like:

wget http://www.goof.com/pcg/marc/data/liblzf-1.6.tar.gz
tar zxvpf liblzf-1.6.tar.gz
cd liblzf-1.6
configure && make

Now you have a small lzf binary that you can use for testing:
cat bigfile|./lzf > bigfile.lzf

use "./lzf -d" for decompression tests

-- 

  ciao - 
    Stefan

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 13:45           ` PFC
  2006-08-29 14:38             ` Stefan Traby
@ 2006-08-29 15:41             ` Gregory Maxwell
  2006-08-29 17:42             ` Hans Reiser
  2 siblings, 0 replies; 47+ messages in thread
From: Gregory Maxwell @ 2006-08-29 15:41 UTC (permalink / raw)
  To: PFC; +Cc: reiserfs-list

On 8/29/06, PFC <lists@peufeu.com> wrote:
>         Anyone has a bench for lzf ?

This is on a opteron 1.8GHz box. Everything tested hot cache.

Testing on a fairly repetative but real test case (an SQL dump of one
of the Wikipedia tables):
-rw-rw-r-- 1 gmaxwell gmaxwell 426162134 Jul 20 06:54 ../page.sql

$time lzop -c ../page.sql > page.sql.lzo
real    0m8.618s
user    0m7.800s
sys     0m0.808s

$time lzop -9c ../page.sql > page.sql.lzo-9
real    4m45.299s
user    4m44.474s
sys     0m0.712s

$time gzip -1 -c ../page.sql > page.sql.gz
real    0m19.292s
user    0m18.545s
sys     0m0.748s

$time lzop -d -c ./page.sql.lzo > /dev/null
real    0m3.061s
user    0m2.836s
sys     0m0.224s

$time gzip -dc page.sql.gz >/dev/null
real    0m7.199s
user    0m7.020s
sys     0m0.176s

$time ./lzf -d  < page.sql.lzf > /dev/null
real    0m2.398s
user    0m2.224s
sys     0m0.172s

-rw-rw-r-- 1 gmaxwell gmaxwell 193853815 Aug 29 10:59 page.sql.gz
-rw-rw-r-- 1 gmaxwell gmaxwell 243497298 Aug 29 10:47 page.sql.lzf
-rw-rw-r-- 1 gmaxwell gmaxwell 259986955 Jul 20 06:54 page.sql.lzo
-rw-rw-r-- 1 gmaxwell gmaxwell 204930904 Jul 20 06:54 page.sql.lzo-9

(decompression of the differing lzo levels is the same speed)

None of them really decompress fast enough to keep up with the disks
in this system, lzf or lzo wouldn't be a big loss. (Bonnie scores:
floodlamp,64G,,,246163,52,145536,35,,,365198,42,781.2,2,16,4540,69,+++++,+++,2454,31,4807,76,+++++,+++,2027,36)

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 14:38             ` Stefan Traby
@ 2006-08-29 15:55               ` PFC
  2006-08-29 17:56                 ` Hans Reiser
  0 siblings, 1 reply; 47+ messages in thread
From: PFC @ 2006-08-29 15:55 UTC (permalink / raw)
  To: reiserfs-list


	I have made a little openoffice spreadsheet with the results.
	You can have fun entering stuff and seeing the results.

http://peufeu.free.fr/compression.ods

	Basically, a laptop having the same processor as my PC and a crummy 15  
MB/s drive (like most laptop drives) will get a 2.5x speedup using lzf,  
while using 40% CPU for compression and 15% CPU for decompression. I'd say  
it's a clear, huuuuge win.

	A desktop computer with a modern IDE drive doing 50 MB/s will still get  
nice speedups (1.8x on write, 2.5x on read) but of course, more CPU will  
be used because of the higher throughput. In this case it is CPU limited  
on compression and disk limited on decompression. However soon everyone  
will have dual core monsters so...

	A big ass RAID will not get much benefit unless :
	- the buffer cache stores compressed pages, so compression virtually  
doubles the RAM cache
	- or the CPU is really fast
	- or you put one of these neat FPGA modules in a free Opteron socket and  
upload a soft-hardware LZF in it with a few gigabytes/s throughput

	...

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 13:14   ` PFC
@ 2006-08-29 17:38     ` David Masover
  0 siblings, 0 replies; 47+ messages in thread
From: David Masover @ 2006-08-29 17:38 UTC (permalink / raw)
  To: PFC; +Cc: Andrew Morton, Alexey Dobriyan, reiserfs-list, linux-kernel

PFC wrote:
> 
> 
>     Would it be, by any chance, possible to tweak the thing so that 
> reiserfs plugins become kernel modules, so that the reiserfs core can be 
> put in the kernel without the plugins slowing down its acceptance ?

I don't see what this has to do with cryptoapi plugins -- those are not 
related to Reiser plugins.

As for the plugins slowing down acceptance, it's actually the concept of 
plugins and the plugin API -- in other words, it's the fact that Reiser4 
supports plugins -- that is slowing it down, if anything about plugins 
is still an issue at all.

Making them modules would make it worse.  Last I saw, Linus doesn't 
particularly like the idea of plugins because of a few misconceptions, 
like the possibility of proprietary (possibly GPL-violating) plugins 
distributed as modules -- basically, something like what nVidia and ATI 
do with their video drivers.

As it is, a good argument in favor of plugins is that this kind of thing 
isn't possible -- we often put "plugins" in quotes because really, it's 
just a nice abstraction layer.  They aren't any more plugins than 
iptables modules or cryptoapi plugins are.  If anything, they're less, 
because they must be compiled into Reiser4, which means either one huge 
monolithic Reiser4 module (including all plugins), or everything 
compiled into the kernel image.

>     (and updating plugins without rebooting would be a nice extra)

It probably wouldn't be as nice as you think.  Remember, if you're using 
a certain plugin in your root FS, it's part of the FS, so I don't think 
you'd be able to remove that plugin any more than you're able to remove 
reiser4.ko if that's your root FS.  You'd have to unmount every FS that 
uses that plugin.

At this point, you don't really gain much -- if you unmount every last 
Reiser4 filesystem, you can then remove reiser4.ko, recompile it, and 
load a new one with different plugins enabled.

Also, these things would typically be part of a kernel update anyway, 
meaning a reboot anyway.

But suppose you could remove a plugin, what then?  What would that mean? 
  Suppose half your files are compressed and you remove cryptocompress 
-- are those files uncompressed when the plugin goes away?  Probably 
not.  The only smart way to handle this that I can think of is to make 
those files unavailable, which is probably not what you want -- how do 
you update cryptocompress when the new reiser4_cryptocompress.ko is 
itself compressed?

That may be an acceptable solution for some plugins, but you'd have to 
be extremely careful which ones you remove.  The only safe way I can 
imagine doing this may not be possible, and if it is, it's extremely 
hackish -- load the plugin under another module name, so 
r4_cryptocompress would be r4_cryptocompress_init -- have the module, 
once loaded, do an atomic switch from the old one to the new one, 
effectively in-place.

But that kind of solution is something I've never seen attempted, and 
only really heard of in strange environments like Erlang.  It would 
probably require much more engineering than the Reiser team can handle 
right now, especially with their hands full with inclusion.

>>> The patch below is so-called reiser4 LZO compression plugin as extracted
>>> from 2.6.18-rc4-mm3.
>>>
>>> I think it is an unauditable piece of shit and thus should not enter
>>> mainline.
>>
>> Like lib/inflate.c (and this new code should arguably be in lib/).
>>
>> The problem is that if we clean this up, we've diverged very much from 
>> the
>> upstream implementation.  So taking in fixes and features from upstream
>> becomes harder and more error-prone.
>>
>> I'd suspect that the maturity of these utilities is such that we could
>> afford to turn them into kernel code in the expectation that any future
>> changes will be small.  But it's not a completely simple call.
>>
>> (iirc the inflate code had a buffer overrun a while back, which was found
>> and fixed in the upstream version).
>>
>>
> 
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 13:45           ` PFC
  2006-08-29 14:38             ` Stefan Traby
  2006-08-29 15:41             ` Gregory Maxwell
@ 2006-08-29 17:42             ` Hans Reiser
  2 siblings, 0 replies; 47+ messages in thread
From: Hans Reiser @ 2006-08-29 17:42 UTC (permalink / raw)
  To: PFC; +Cc: reiserfs-list

PFC wrote:
>
>     I made a little benchmark on my own PC (Athlon64 3200+ in 64 bit
> gentoo)
>
> http://peufeu.free.fr/compression.html
>
>     So, gzip could be used on PCs having very fast processors and very
> slow harddrives, like Core Duo laptops.
>     However, lzo compresses nearly as much and is still a lot faster.
> I don't see a reason for gzip in a FS application.
>
>     Anyone has a bench for lzf ?
>
>
Yes, Edward did equivalent tests, and thus we selected LZO.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 15:55               ` PFC
@ 2006-08-29 17:56                 ` Hans Reiser
  2006-08-29 18:31                   ` David Masover
  0 siblings, 1 reply; 47+ messages in thread
From: Hans Reiser @ 2006-08-29 17:56 UTC (permalink / raw)
  To: PFC; +Cc: reiserfs-list

PFC, thanks for giving us some real data.  May I post it to the lkml thread?

In essence, LZO wins the benchmarks, and the code is hard to read.  I
guess I have to go with LZO, and encourage people to take a stab at
dethroning it.

Hans

PFC wrote:
>
>     I have made a little openoffice spreadsheet with the results.
>     You can have fun entering stuff and seeing the results.
>
> http://peufeu.free.fr/compression.ods
>
>     Basically, a laptop having the same processor as my PC and a
> crummy 15 MB/s drive (like most laptop drives) will get a 2.5x speedup
> using lzf, while using 40% CPU for compression and 15% CPU for
> decompression. I'd say it's a clear, huuuuge win.
>
>     A desktop computer with a modern IDE drive doing 50 MB/s will
> still get nice speedups (1.8x on write, 2.5x on read) but of course,
> more CPU will be used because of the higher throughput. In this case
> it is CPU limited on compression and disk limited on decompression.
> However soon everyone will have dual core monsters so...
>
>     A big ass RAID will not get much benefit unless :
>     - the buffer cache stores compressed pages, so compression
> virtually doubles the RAM cache
>     - or the CPU is really fast
>     - or you put one of these neat FPGA modules in a free Opteron
> socket and upload a soft-hardware LZF in it with a few gigabytes/s
> throughput
Or you look the sysadmin in the eyes, and say, your file servers have
more out of disk space problems than load problems, yes?
>
>     ...
>
>


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 17:56                 ` Hans Reiser
@ 2006-08-29 18:31                   ` David Masover
  2006-08-29 18:36                     ` Gregory Maxwell
  0 siblings, 1 reply; 47+ messages in thread
From: David Masover @ 2006-08-29 18:31 UTC (permalink / raw)
  To: Hans Reiser; +Cc: PFC, reiserfs-list

Hans Reiser wrote:

> PFC wrote:

>>     A big ass RAID will not get much benefit unless :
>>     - the buffer cache stores compressed pages, so compression
>> virtually doubles the RAM cache
>>     - or the CPU is really fast
>>     - or you put one of these neat FPGA modules in a free Opteron
>> socket and upload a soft-hardware LZF in it with a few gigabytes/s
>> throughput
> Or you look the sysadmin in the eyes, and say, your file servers have
> more out of disk space problems than load problems, yes?

I'd look at the IO-Wait number, also.

Compression makes sense if:
   - You spend a lot of time waiting for the disk.
   - You need disk space, and either:
      - You already have enough spare CPU to do compression
      - It's cheaper to buy enough CPU than to buy the space compression 
would save you.

Conversely, compression does NOT make sense if:
   - You spend a lot of time with the CPU busy and the disk idle.
   - You have more than enough disk space.
   - Disk space is cheaper than buying enough CPU to handle compression.
   - You've tried compression, and the CPU requirements slowed you more 
than you saved in disk access.

After a certain amount of RAID -- really, after the second or third disk 
in a mirrored array, or the third or fourth disk in RAID 5 -- at that 
point, I don't think adding more disks is really doing a huge amount to 
increase reliability, which means you're either trying to increase speed 
or space.  You can increase both of these by using compression, if you 
have the spare CPU, so the question becomes:  Does the CPU power 
necessary to do the compression cost more or less than another drive?

Especially in a big-ass RAID, you'll also want to be thinking about heat 
and power consumption, too.

There are still cases where compression loses, but they seem 
pathological enough that you'd want to benchmark to see if they really 
apply to you.  For instance, if you're dealing with lots of quick, 
read-only access to very tiny amounts of data, compression will likely 
slow you down, whereas adding another disk can speed you up.  If your 
data isn't very compressible, then you're just burning cycles for no 
point.  And, of course, the price/performance ratio (CPUs) and price/gig 
ratio (disk space) changes all the time.

And all of this is ignoring the very real possibility of a dedicated 
hardware compressor -- at which point, we could afford pretty much any 
algorithm you like, as long as the hardware can do it quickly enough. 
This is an advantage to using cryptoapi for the cryptocompress plugin, 
by the way -- it's one place where we could call out to the hardware later.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 18:31                   ` David Masover
@ 2006-08-29 18:36                     ` Gregory Maxwell
  2006-08-29 19:11                       ` David Masover
  0 siblings, 1 reply; 47+ messages in thread
From: Gregory Maxwell @ 2006-08-29 18:36 UTC (permalink / raw)
  To: David Masover; +Cc: Hans Reiser, PFC, reiserfs-list

On 8/29/06, David Masover <ninja@slaphack.com> wrote:
[snip]
> Conversely, compression does NOT make sense if:
>    - You spend a lot of time with the CPU busy and the disk idle.
>    - You have more than enough disk space.
>    - Disk space is cheaper than buying enough CPU to handle compression.
>    - You've tried compression, and the CPU requirements slowed you more
> than you saved in disk access.
[snip]

It's also not always this simple ... if you have a single threaded
workload that doesn't overlap CPU and disk well, (de)compression may
be free even if you're still CPU bound a lot as the compression is
using cpu cycles which would have been otherwise idle..

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 18:36                     ` Gregory Maxwell
@ 2006-08-29 19:11                       ` David Masover
  2006-08-29 19:38                         ` Hans Reiser
  0 siblings, 1 reply; 47+ messages in thread
From: David Masover @ 2006-08-29 19:11 UTC (permalink / raw)
  To: Gregory Maxwell; +Cc: Hans Reiser, PFC, reiserfs-list

Gregory Maxwell wrote:
> On 8/29/06, David Masover <ninja@slaphack.com> wrote:
> [snip]
>> Conversely, compression does NOT make sense if:
>>    - You spend a lot of time with the CPU busy and the disk idle.
>>    - You have more than enough disk space.
>>    - Disk space is cheaper than buying enough CPU to handle compression.
>>    - You've tried compression, and the CPU requirements slowed you more
>> than you saved in disk access.
> [snip]
> 
> It's also not always this simple ... if you have a single threaded
> workload that doesn't overlap CPU and disk well, (de)compression may
> be free even if you're still CPU bound a lot as the compression is
> using cpu cycles which would have been otherwise idle..

Isn't that implied, though -- if the CPU is not busy (run top under a 
2.6 kernel and you'll see an IO-Wait number), then the first condition 
isn't satisfied -- CPU is not busy, disk is not idle.

But speaking of single threadedness, more and more desktops are shipping 
with ridiculously more power than people need.  Even a gamer really 
won't benefit that much from having a dual-core system, because 
multithreading is hard, and games haven't been doing it properly.  John 
Carmack is pretty much the only superstar programmer in video games, and 
after his first fairly massive attempt to make Quake 3 have two threads 
(since he'd just gotten a dual-core machine to play with) actually 
resulted in the game running some 30-40% slower than it did with a 
single thread.

So, for the desktop, compression makes perfect sense.  We don't have 
massive amounts of RAID.  If we have newer machines, there's a good 
chance we'll have one CPU sitting mostly idle while playing games. 
Short of gaming, there are few desktop applications that will fully 
utilize even one reasonably fast CPU.  The reason gamers buy dual-core 
systems is they're getting cheap enough to be worth it, and that one 
core sitting idle is a perfect place to do OS/system work not related to 
the game -- antivirus, automatic update checks, the inevitable 
background processes leeching a couple few % off your available CPU.

So for the typical new desktop with about 2 ghz of 64-bit processor 
sitting idle, compression is essentially free.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 19:11                       ` David Masover
@ 2006-08-29 19:38                         ` Hans Reiser
  2006-08-29 20:03                           ` David Masover
  0 siblings, 1 reply; 47+ messages in thread
From: Hans Reiser @ 2006-08-29 19:38 UTC (permalink / raw)
  To: David Masover; +Cc: Gregory Maxwell, PFC, reiserfs-list

David Masover wrote:
>   John Carmack is pretty much the only superstar programmer in video
> games, and after his first fairly massive attempt to make Quake 3 have
> two threads (since he'd just gotten a dual-core machine to play with)
> actually resulted in the game running some 30-40% slower than it did
> with a single thread.
Do the two processors have separate caches, and thus being overly fined
grained makes you memory transfer bound or?

Two processors tends to create a snappier user experience, in that big
CPU processes get throttled nicely.

Hans

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 19:38                         ` Hans Reiser
@ 2006-08-29 20:03                           ` David Masover
  2006-08-29 22:15                             ` Toby Thain
  0 siblings, 1 reply; 47+ messages in thread
From: David Masover @ 2006-08-29 20:03 UTC (permalink / raw)
  To: Hans Reiser; +Cc: Gregory Maxwell, PFC, reiserfs-list

Hans Reiser wrote:
> David Masover wrote:
>>   John Carmack is pretty much the only superstar programmer in video
>> games, and after his first fairly massive attempt to make Quake 3 have
>> two threads (since he'd just gotten a dual-core machine to play with)
>> actually resulted in the game running some 30-40% slower than it did
>> with a single thread.
> Do the two processors have separate caches, and thus being overly fined
> grained makes you memory transfer bound or?

It wasn't anything that intelligent.  Let me see if I can find it...

Taken from
http://techreport.com/etc/2005q3/carmack-quakecon/index.x?pg=1

"Graphics accelerators are a great example of parallelism working well, 
he noted, but game code is not similarly parallelizable. Carmack cited 
his Quake III Arena engine, whose renderer was multithreaded and 
achieved up to 40% performance increases on multiprocessor systems, as a 
good example of where games would have to go. (Q3A's SMP mode was 
notoriously crash-prone and fragile, working only with certain graphics 
driver revisions and the like.) Initial returns on multithreading, he 
projected, will be disappointing."

Basically, it's hard enough to split what we currently do onto even 2 
CPUs, and it definitely seems like we're about to hit a wall in CPU 
frequency just as multicore becomes a practical reality, so future CPUs 
may be measured in how many cores they have, not how fast each core is.

There's also a question of what to use the extra power for.  From the 
same presentation:

"Part of the problem with multithreading, argued Carmack, is knowing how 
to use the power of additional CPU cores to enhance the game experience. 
A.I., can be effective when very simple, as some of the first Doom logic 
was. It was less than a page of code, but players ascribed complex 
behaviors and motivations to the bad guys. However, more complex A.I. 
seems hard to improve to the point where it really changes the game. 
More physics detail, meanwhile, threatens to make games too fragile as 
interactions in the game world become more complex."

So, I humbly predict that Physics cards (so-called PPUs) will fail, and 
be replaced by ever-increasing numbers of cores, which will, for awhile, 
be one step ahead of what we can think of to fill them with.  Thus, 
anything useful (like compression) that can be split off into a separate 
thread is going to be useful for games, and won't hurt performance on 
future mega-multicore monstrosities.

The downside is, most game developers are working on Windows, for which 
FS compression has always sucked.  Thus, they most often implement their 
own compression, often something horrible, like storing the whole game 
in CAB or ZIP files, and loading the entire level into RAM before play 
starts, making load times less relevant for gameplay.  Reiser4's 
cryptocompress would be a marked improvement over that, but it would 
also not be used in many games.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 11:38                 ` Edward Shishkin
@ 2006-08-29 22:03                   ` Nigel Cunningham
  0 siblings, 0 replies; 47+ messages in thread
From: Nigel Cunningham @ 2006-08-29 22:03 UTC (permalink / raw)
  To: Edward Shishkin
  Cc: David Masover, Jan Engelhardt, Stefan Traby, Hans Reiser,
	Alexey Dobriyan, reiserfs-list, linux-kernel, Andrew Morton

Hi.

On Tue, 2006-08-29 at 15:38 +0400, Edward Shishkin wrote:
> Nigel Cunningham wrote:
> > Hi.
> > 
> > On Tue, 2006-08-29 at 03:23 -0500, David Masover wrote:
> > 
> >>Nigel Cunningham wrote:
> >>
> >>>Hi.
> >>>
> >>>On Tue, 2006-08-29 at 06:05 +0200, Jan Engelhardt wrote:
> >>>
> >>>>>>>>Hmm.  LZO is the best compression algorithm for the task as measured by
> >>>>>>>>the objectives of good compression effectiveness while still having very
> >>>>>>>>low CPU usage (the best of those written and GPL'd, there is a slightly
> >>>>>>>>better one which is proprietary and uses more CPU, LZRW if I remember
> >>>>>>>>right.  The gzip code base uses too much CPU, though I think Edward made
> >>>>>>>
> >>>>>>>I don't think that LZO beats LZF in both speed and compression ratio.
> >>>>>>>
> >>>>>>>LZF is also available under GPL (dual-licensed BSD) and was choosen in favor
> >>>>>>>of LZO for the next generation suspend-to-disk code of the Linux kernel.
> >>>>>>>
> >>>>>>>see: http://www.goof.com/pcg/marc/liblzf.html
> >>>>>>
> >>>>>>thanks for the info, we will compare them
> >>>>>
> >>>>>For Suspend2, we ended up converting the LZF support to a cryptoapi
> >>>>>plugin. Is there any chance that you could use cryptoapi modules? We
> >>>>>could then have a hope of sharing the support.
> >>>>
> >>>>I am throwing in gzip: would it be meaningful to use that instead? The 
> >>>>decoder (inflate.c) is already there.
> >>>>
> >>>>06:04 shanghai:~/liblzf-1.6 > l configure*
> >>>>-rwxr-xr-x  1 jengelh users 154894 Mar  3  2005 configure
> >>>>-rwxr-xr-x  1 jengelh users  26810 Mar  3  2005 configure.bz2
> >>>>-rw-r--r--  1 jengelh users  30611 Aug 28 20:32 configure.gz-z9
> >>>>-rw-r--r--  1 jengelh users  30693 Aug 28 20:32 configure.gz-z6
> >>>>-rw-r--r--  1 jengelh users  53077 Aug 28 20:32 configure.lzf
> >>>
> >>>We used gzip when we first implemented compression support, and found it
> >>>to be far too slow. Even with the fastest compression options, we were
> >>>only getting a few megabytes per second. Perhaps I did something wrong
> >>>in configuring it, but there's not that many things to get wrong!
> >>
> >>All that comes to mind is the speed/quality setting -- the number from 1 
> >>to 9.  Recently, I backed up someone's hard drive using -1, and I 
> >>believe I was still able to saturate... the _network_.  Definitely try 
> >>again if you haven't changed this, but I can't imagine I'm the first 
> >>persson to think of it.
> >>
> >> From what I remember, gzip -1 wasn't faster than the disk.  But at 
> >>least for (very) repetitive data, I was wrong:
> >>
> >>eve:~ sanity$ time bash -c 'dd if=/dev/zero of=test bs=10m count=10; sync'
> >>10+0 records in
> >>10+0 records out
> >>104857600 bytes transferred in 3.261990 secs (32145287 bytes/sec)
> >>
> >>real    0m3.746s
> >>user    0m0.005s
> >>sys     0m0.627s
> >>eve:~ sanity$ time bash -c 'dd if=/dev/zero bs=10m count=10 | gzip -v1 > 
> >>test; sync'
> >>10+0 records in
> >>10+0 records out
> >>104857600 bytes transferred in 2.404093 secs (43616282 bytes/sec)
> >>  99.5%
> >>
> >>real    0m2.558s
> >>user    0m1.554s
> >>sys     0m0.680s
> >>eve:~ sanity$
> >>
> >>
> >>
> >>This was on OS X, but I think it's still valid -- this is a slightly 
> >>older Powerbook, with a 5400 RPM drive, 1.6 ghz G4.
> >>
> >>-1 is still worlds better than nothing.  The backup was over 15 gigs, 
> >>down to about 6 -- loads of repetitive data, I'm sure, but that's where 
> >>you win with compression anyway.
> > 
> > 
> > Wow. That's a lot better; I guess I did get something wrong in trying to
> > tune deflate. That was pre-cryptoapi though; looking at
> > cryptoapi/deflate.c, I don't see any way of controlling the compression
> > level. Am I missing anything?
> > 
> 
> zlib is tunable, not cryptoapi's deflate.
> look at zlib_deflateInit2()

Ok; thanks. I wasn't mistaken then :)

Regards,

Nigel


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 20:03                           ` David Masover
@ 2006-08-29 22:15                             ` Toby Thain
  2006-08-29 22:42                               ` David Masover
  0 siblings, 1 reply; 47+ messages in thread
From: Toby Thain @ 2006-08-29 22:15 UTC (permalink / raw)
  Cc: ReiserFS List


On 29-Aug-06, at 4:03 PM, David Masover wrote:

> Hans Reiser wrote:
>> David Masover wrote:
>>>   John Carmack is pretty much the only superstar programmer in video
>>> games, and after his first fairly massive attempt to make Quake 3  
>>> have
>>> two threads (since he'd just gotten a dual-core machine to play  
>>> with)
>>> actually resulted in the game running some 30-40% slower than it did
>>> with a single thread.
>> Do the two processors have separate caches, and thus being overly  
>> fined
>> grained makes you memory transfer bound or?
>
> It wasn't anything that intelligent.  Let me see if I can find it...
>
> Taken from
> http://techreport.com/etc/2005q3/carmack-quakecon/index.x?pg=1
>
> "Graphics accelerators are a great example of parallelism working  
> well, he noted, but game code is not similarly parallelizable. ...
>
> The downside is, most game developers are working on Windows, for  
> which FS compression has always sucked.  Thus, they most often  
> implement their own compression, often something horrible, like  
> storing the whole game in CAB or ZIP files, and loading the entire  
> level into RAM before play starts, making load times less relevant  
> for gameplay.  Reiser4's cryptocompress would be a marked  
> improvement over that, but it would also not be used in many games.


Gamer systems, whether from coder's or player's p.o.v., would appear  
fairly irrelevant to reiserfs and this list. I'd trust Carmack's eye  
candy credentials but doubt he has much to say about filesystems or  
server threading...

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 22:15                             ` Toby Thain
@ 2006-08-29 22:42                               ` David Masover
  2006-08-30  9:17                                 ` PFC
  0 siblings, 1 reply; 47+ messages in thread
From: David Masover @ 2006-08-29 22:42 UTC (permalink / raw)
  To: Toby Thain; +Cc: ReiserFS List

Toby Thain wrote:

> Gamer systems, whether from coder's or player's p.o.v., would appear 
> fairly irrelevant to reiserfs and this list. I'd trust Carmack's eye 
> candy credentials but doubt he has much to say about filesystems or 
> server threading...

Maybe, but Reiser4 is supposed to be a general purpose filesystem, so 
talking about its advantages/disadvantages wrt. gaming makes sense, 
especially considering gamers are the most likely to tune their desktop 
for perfomance.

That was a bit much, though.  I apologize.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-29 22:42                               ` David Masover
@ 2006-08-30  9:17                                 ` PFC
  2006-08-30 10:45                                   ` David Masover
  2006-08-30 16:50                                   ` Edward Shishkin
  0 siblings, 2 replies; 47+ messages in thread
From: PFC @ 2006-08-30  9:17 UTC (permalink / raw)
  To: ReiserFS List; +Cc: ReiserFS List


> Maybe, but Reiser4 is supposed to be a general purpose filesystem
> talking about its advantages/disadvantages wrt. gaming makes sense,

	I don't see a lot of gamers using Linux ;)
	But yes, gaming is what pushes hardware development these days, at least  
on the desktop.

	Also, as you said, gamers (like many others) reinvent filesystems and  
generally use the Big Zip File paradigm, which is not that stupid for a  
read only FS (if you cache all file offsets, reading can be pretty fast).  
However when you start storing ogg-compressed sound and JPEG images inside  
a zip file, it starts to stink.

	***************************

> Does the CPU power necessary to do the compression cost more or less  
> than another drive?

	***************************

	It depends, you have to consider several distinct scenarios.
	For instance, on a big Postgres database server, the rule is to have as  
many spindles as you can.
	- If you are doing a lot of full table scans (like data mining etc), more  
spindles means reads can be parallelized ; of course this will mean more  
data will have to be decompressed.
	- If you are doing a lot of little transactions (web sites), it means  
seeks can be distributed around the various disks. In this case  
compression would be a big win because there is free CPU to use ; besides,  
it would virtually double the RAM cache size.

	You have to ponder cost (in CPU $) of compression versus the cost in  
"virtual RAM" saved for caching and the cost in disks not bought.

	***************************

> Do the two processors have separate caches, and thus being overly fined
> grained makes you memory transfer bound or?

	It depends on which dual core system you use ; future systems (like Core)  
will definitely share cache as this is the best option.

	***************************

	If we analyze the results of my little compression benchmarks, we find  
that :
	- gzip is way too slow.
	- lzo and lzf are pretty close.

	LZF is faster than LZO (especially on decompression) but compresses worse.
	So, when we are disk-bound, LZF will be slower.
	When we are CPU-bound, LZF will be faster.

	The differences are not that huge, though, so it might be worthwile to  
weight this against the respective code cleanliness, of which I have no  
idea.

	However my compression benchmarks mean nothing because I'm compressing  
whole files whereas reiser4 will be compressing little blocks of files. We  
must therefore evaluate the performance of compressors on little blocks,  
which is very different from 300 megabytes files.
	For instance, the setup time of the compressor will be important (wether  
some huffman table needs to be constructed etc), and the compression  
ratios will be worse.

	Let's redo a benchmark then.
	For that I need to know if a compression block in reiser4 will be either :
	- a FS block containing several files (ie. a block will contain several  
small files)
	- a part of a file (ie. a small file will be 1 block)

	I think it's the second option, right ?


	

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-30  9:17                                 ` PFC
@ 2006-08-30 10:45                                   ` David Masover
  2006-08-30 16:50                                   ` Edward Shishkin
  1 sibling, 0 replies; 47+ messages in thread
From: David Masover @ 2006-08-30 10:45 UTC (permalink / raw)
  To: PFC; +Cc: ReiserFS List

PFC wrote:
> 
>> Maybe, but Reiser4 is supposed to be a general purpose filesystem
>> talking about its advantages/disadvantages wrt. gaming makes sense,
> 
>     I don't see a lot of gamers using Linux ;)

There have to be some.  Transgaming seems to still be making a 
successful business out of making games work out-of-the-box under Wine. 
  While I don't imagine there are as many who attempt gaming on Linux, 
I'd guess a significant portion of Linux users, if not the majority, are 
at least casual gamers.

Some will have given up on the PC as a gaming platform long a go, tired 
of its upgrade cycle, crashes, game patches, and install times.  These 
people will have a console for games, probably a PS2 so they can watch 
DVDs, and use their computer for real work, with as much free software 
as they can manage.

Others will compromise somewhat.  I compromise by running the binary 
nVidia drivers, keeping a Windows partition around sometimes, and 
enjoying many old games which have released their source recently, and 
now run under Linux -- as well as a few native Linux games, some Cedega 
games, and some under straight Wine.

Basically, I'll play it on Linux if it works well, otherwise I boot 
Windows.  I'm migrating away from that Windows dependency by making sure 
all my new game purchases work on Linux.

Others will use some or all of the above -- stick to old games, use 
exclusively stuff that works on Linux (one way or the other), or give up 
on Linux gaming entirely and use a Windows partition.

Anything Linux can do to become more game-friendly is one less reason 
for gamers to have to compromise.  Not all gamers are willing to do 
that.  I know at least two who ultimately decided that, with dual boot, 
they end up spending most of their time on Windows anyway.  These are 
the people who would use Linux if they didn't have a good reason to use 
something else, but right now, they do.  This is not the fault of the 
filesystem, but taking the attitude of "There aren't many Linux gamers 
anyway" -- that's a self-fulfilling prophecy, gamers WILL leave because 
of it.

>     Also, as you said, gamers (like many others) reinvent filesystems 
> and generally use the Big Zip File paradigm, which is not that stupid 
> for a read only FS (if you cache all file offsets, reading can be pretty 
> fast). However when you start storing ogg-compressed sound and JPEG 
> images inside a zip file, it starts to stink.

I don't like it as a read-only FS, either.  Take an MMO -- while most 
commercial ones load the entire game to disk from install DVDs, there 
are some smaller ones which only cache the data as you explore the 
world.  Also, even with the bigger ones, the world is always changing 
with patches, and I've seen patches take several hours to install -- not 
download, install -- on a 2.4 ghz amd64 with 2 gigs of RAM, on a striped 
RAID.  You can trust me when I say this was mostly disk-bound, which is 
retarded, because it took less than half an hour to install in the first 
place.

Even simple multiplayer games -- hell, even single-player games can get 
fairly massive updates relatively often.  Half-Life 2 is one example -- 
they've now added HDR to the engine.

In these cases, you still need as fast access as possible to the data 
(to cut down on load time), and it would be nice to save on space as 
well, but a zipfile starts to make less sense.  And yet, I still see 
people using _cabinet_ files.

Compression at the FS layer, plus efficient storing of small files, 
makes this much simpler.  While you can make the zipfile-fs transparent 
to a game, even your mapping tools, it's still not efficient, and it's 
not transparent to your modeling package, Photoshop-alike, audio 
software, or gcc.

But everything understands a filesystem.

>     It depends, you have to consider several distinct scenarios.
>     For instance, on a big Postgres database server, the rule is to have 
> as many spindles as you can.
>     - If you are doing a lot of full table scans (like data mining etc), 
> more spindles means reads can be parallelized ; of course this will mean 
> more data will have to be decompressed.

I don't see why more spindles means more data decompressed.  If 
anything, I'd imagine it would be less reads, total, if there's any kind 
of data locality.  But I'll leave this to the database experts, for now.

>     - If you are doing a lot of little transactions (web sites), it 
> means seeks can be distributed around the various disks. In this case 
> compression would be a big win because there is free CPU to use ; 

Dangerous assumption.  Three words:  Ruby on Rails.  There goes your 
free CPU.  Suddenly, compression makes no sense at all.

But then, Ruby makes no sense at all for any serious load, unless you 
really have that much money to spend, or until the Ruby.NET compiler is 
finished -- that should speed things up.

> besides, it would virtually double the RAM cache size.

No it wouldn't, not the way Reiser4 does it.  Currently, 
compression/decompression, as well as encryption/decryption, happens 
where the data hits the disk.  The idea is, at that point, your storage 
medium is likely a bottleneck, and storing the compressed data in RAM is 
going to slow you down a lot, unless you're short on RAM.  It would be 
nice to make this tunable (even be able to choose a % of cache to leave 
compressed and a % to decompress), for machines which have spare CPU, 
but not as much spare RAM.

I don't know if the architecture can be changed that easily, though. 
The place the cryptocompress plugin operates makes perfect sense for 
crypto, because it's 1:1 as far as space goes -- all that caching the 
encryption version does is make you waste cycles decrypting it every 
time.  But keeping data compressed in RAM, while not generally a great 
idea, was once a valid technique on memory-starved machines -- I 
remember seeing some Mac software that claimed to double your RAM by 
compressing it.

But then, this made sense on a Mac no matter how much performance it 
cost you, because this predated virtual memory on a Mac.  When you ran 
out of physical RAM, you got an "out of memory" dialog, and your program 
crashed.  Some programs couldn't be run at all without a memory upgrade 
-- or this program.

>     However my compression benchmarks mean nothing because I'm 
> compressing whole files whereas reiser4 will be compressing little 
> blocks of files. We must therefore evaluate the performance of 
> compressors on little blocks, which is very different from 300 megabytes 
> files.
>     For instance, the setup time of the compressor will be important 
> (wether some huffman table needs to be constructed etc), and the 
> compression ratios will be worse.

Hmm.  To what extent are modern compressors based on a "dictionary" 
concept?  I believe that's why we compress tarballs, instead of the 
files inside, and why zipfiles are generally worse than compressed 
tarballs for space.

If the dictionary could be shared, that would negate the setup time of 
the compressor and much of the loss of efficiency when compressing small 
blocks instead of huge files.  The obvious disadvantage is potentially 
having to hit both the dictionary and the file.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-30  9:17                                 ` PFC
  2006-08-30 10:45                                   ` David Masover
@ 2006-08-30 16:50                                   ` Edward Shishkin
  2006-08-30 16:55                                     ` Hans Reiser
  1 sibling, 1 reply; 47+ messages in thread
From: Edward Shishkin @ 2006-08-30 16:50 UTC (permalink / raw)
  To: PFC; +Cc: ReiserFS List

PFC wrote:
> 
>> Maybe, but Reiser4 is supposed to be a general purpose filesystem
>> talking about its advantages/disadvantages wrt. gaming makes sense,
> 
> 
>     I don't see a lot of gamers using Linux ;)
>     But yes, gaming is what pushes hardware development these days, at 
> least  on the desktop.
> 
>     Also, as you said, gamers (like many others) reinvent filesystems 
> and  generally use the Big Zip File paradigm, which is not that stupid 
> for a  read only FS (if you cache all file offsets, reading can be 
> pretty fast).  However when you start storing ogg-compressed sound and 
> JPEG images inside  a zip file, it starts to stink.
> 
>     ***************************
> 
>> Does the CPU power necessary to do the compression cost more or less  
>> than another drive?
> 
> 
>     ***************************
> 
>     It depends, you have to consider several distinct scenarios.
>     For instance, on a big Postgres database server, the rule is to have 
> as  many spindles as you can.
>     - If you are doing a lot of full table scans (like data mining etc), 
> more  spindles means reads can be parallelized ; of course this will 
> mean more  data will have to be decompressed.
>     - If you are doing a lot of little transactions (web sites), it 
> means  seeks can be distributed around the various disks. In this case  
> compression would be a big win because there is free CPU to use ; 
> besides,  it would virtually double the RAM cache size.
> 
>     You have to ponder cost (in CPU $) of compression versus the cost 
> in  "virtual RAM" saved for caching and the cost in disks not bought.
> 
>     ***************************
> 
>> Do the two processors have separate caches, and thus being overly fined
>> grained makes you memory transfer bound or?
> 
> 
>     It depends on which dual core system you use ; future systems (like 
> Core)  will definitely share cache as this is the best option.
> 
>     ***************************
> 
>     If we analyze the results of my little compression benchmarks, we 
> find  that :
>     - gzip is way too slow.
>     - lzo and lzf are pretty close.
> 
>     LZF is faster than LZO (especially on decompression) but compresses 
> worse.
>     So, when we are disk-bound, LZF will be slower.
>     When we are CPU-bound, LZF will be faster.
> 
>     The differences are not that huge, though, so it might be worthwile 
> to  weight this against the respective code cleanliness, of which I have 
> no  idea.
> 
>     However my compression benchmarks mean nothing because I'm 
> compressing  whole files whereas reiser4 will be compressing little 
> blocks of files. We  must therefore evaluate the performance of 
> compressors on little blocks,  which is very different from 300 
> megabytes files.
>     For instance, the setup time of the compressor will be important 
> (wether  some huffman table needs to be constructed etc), and the 
> compression  ratios will be worse.
> 
>     Let's redo a benchmark then.
>     For that I need to know if a compression block in reiser4 will be 
> either :
>     - a FS block containing several files (ie. a block will contain 
> several  small files)
>     - a part of a file (ie. a small file will be 1 block)
> 
>     I think it's the second option, right ?

(Plain) file is considered as a set of logical clusters (64K by
default). Minimal unit occupied in memory by (plain) file is one
page. Compressed logical cluster is stored on disk in so-called
"disk clusters". Disk cluster is a set of special items (aka "ctails",
or "compressed bodies"), so that one block can contain (compressed)
data of many files and everything is packed tightly on disk.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-30 16:50                                   ` Edward Shishkin
@ 2006-08-30 16:55                                     ` Hans Reiser
  2006-08-31  9:32                                       ` Clemens Eisserer
  0 siblings, 1 reply; 47+ messages in thread
From: Hans Reiser @ 2006-08-30 16:55 UTC (permalink / raw)
  To: Edward Shishkin; +Cc: PFC, ReiserFS List

Edward Shishkin wrote:
>
> (Plain) file is considered as a set of logical clusters (64K by
> default). Minimal unit occupied in memory by (plain) file is one
> page. Compressed logical cluster is stored on disk in so-called
> "disk clusters". Disk cluster is a set of special items (aka "ctails",
> or "compressed bodies"), so that one block can contain (compressed)
> data of many files and everything is packed tightly on disk.
>
>
>
So the compression unit is 64k for purposes of your benchmarks.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-30 16:55                                     ` Hans Reiser
@ 2006-08-31  9:32                                       ` Clemens Eisserer
  2006-08-31 12:00                                         ` Edward Shishkin
  2006-08-31 19:22                                         ` David Masover
  0 siblings, 2 replies; 47+ messages in thread
From: Clemens Eisserer @ 2006-08-31  9:32 UTC (permalink / raw)
  To: reiserfs-list

> But speaking of single threadedness, more and more desktops are shipping
> with ridiculously more power than people need.  Even a gamer really
Will the LZO compression code in reiser4 be able to use multi-processor systems?
E.g. if I've a Turion-X2 in my laptop will it use 2 threads for
compression/decompression making cpu throughput much better than
whatthe disk could do?

lg Clemens


2006/8/30, Hans Reiser <reiser@namesys.com>:
> Edward Shishkin wrote:
> >
> > (Plain) file is considered as a set of logical clusters (64K by
> > default). Minimal unit occupied in memory by (plain) file is one
> > page. Compressed logical cluster is stored on disk in so-called
> > "disk clusters". Disk cluster is a set of special items (aka "ctails",
> > or "compressed bodies"), so that one block can contain (compressed)
> > data of many files and everything is packed tightly on disk.
> >
> >
> >
> So the compression unit is 64k for purposes of your benchmarks.
>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-31  9:32                                       ` Clemens Eisserer
@ 2006-08-31 12:00                                         ` Edward Shishkin
  2006-08-31 15:14                                           ` Clemens Eisserer
  2006-08-31 16:55                                           ` Hans Reiser
  2006-08-31 19:22                                         ` David Masover
  1 sibling, 2 replies; 47+ messages in thread
From: Edward Shishkin @ 2006-08-31 12:00 UTC (permalink / raw)
  To: Clemens Eisserer; +Cc: reiserfs-list

Clemens Eisserer wrote:
>> But speaking of single threadedness, more and more desktops are shipping
>> with ridiculously more power than people need.  Even a gamer really
> 
> Will the LZO compression code in reiser4 be able to use multi-processor 
> systems?
> E.g. if I've a Turion-X2 in my laptop will it use 2 threads for
> compression/decompression making cpu throughput much better than
> whatthe disk could do?
> 

Compression is going in flush time and there can be more then
one flush thread that processes the same transaction atom.
Decompression is going in the context of readpage/readpages.
So if you mean per file, then yes for compression and no for
decompression.

Edward.


> lg Clemens
> 
> 
> 2006/8/30, Hans Reiser <reiser@namesys.com>:
> 
>> Edward Shishkin wrote:
>> >
>> > (Plain) file is considered as a set of logical clusters (64K by
>> > default). Minimal unit occupied in memory by (plain) file is one
>> > page. Compressed logical cluster is stored on disk in so-called
>> > "disk clusters". Disk cluster is a set of special items (aka "ctails",
>> > or "compressed bodies"), so that one block can contain (compressed)
>> > data of many files and everything is packed tightly on disk.
>> >
>> >
>> >
>> So the compression unit is 64k for purposes of your benchmarks.
>>
> 
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-31 12:00                                         ` Edward Shishkin
@ 2006-08-31 15:14                                           ` Clemens Eisserer
  2006-08-31 16:55                                           ` Hans Reiser
  1 sibling, 0 replies; 47+ messages in thread
From: Clemens Eisserer @ 2006-08-31 15:14 UTC (permalink / raw)
  To: reiserfs-list

Hi Edward,

Thanks a lot for answering.

> Compression is going in flush time and there can be more then
> one flush thread that processes the same transaction atom.
> Decompression is going in the context of readpage/readpages.
> So if you mean per file, then yes for compression and no for
> decompression.
So the parallelism is not really explicit, more or less a bit accidental.
Are threads in the kernel possible - and if yes how large is the
typical workload of stuff which can be decompressed? I guess for
several hundered kb using more than one thread could speed things up
quite a bit?

lg Clemens

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-31 12:00                                         ` Edward Shishkin
  2006-08-31 15:14                                           ` Clemens Eisserer
@ 2006-08-31 16:55                                           ` Hans Reiser
  2006-08-31 18:08                                             ` Edward Shishkin
  1 sibling, 1 reply; 47+ messages in thread
From: Hans Reiser @ 2006-08-31 16:55 UTC (permalink / raw)
  To: Edward Shishkin; +Cc: Clemens Eisserer, reiserfs-list

Edward Shishkin wrote:
> Clemens Eisserer wrote:
>>> But speaking of single threadedness, more and more desktops are
>>> shipping
>>> with ridiculously more power than people need.  Even a gamer really
>>
>> Will the LZO compression code in reiser4 be able to use
>> multi-processor systems?
>> E.g. if I've a Turion-X2 in my laptop will it use 2 threads for
>> compression/decompression making cpu throughput much better than
>> whatthe disk could do?
>>
>
> Compression is going in flush time and there can be more then
> one flush thread that processes the same transaction atom.
> Decompression is going in the context of readpage/readpages.
> So if you mean per file, then yes for compression and no for
> decompression.
I don't think your explanation above is a good one.

If there is more than one process reading a file, then you can have
multiple decompressions at one time of the same file, yes?

Just because there can be more than one flush thread per file does not
mean it is likely there will be.

CPU scheduling of compression/decompression is an area that could use
work in the future.    For now, just understand that what we do is
better than doing nothing.;-/
>
> Edward.
>
>
>> lg Clemens
>>
>>
>> 2006/8/30, Hans Reiser <reiser@namesys.com>:
>>
>>> Edward Shishkin wrote:
>>> >
>>> > (Plain) file is considered as a set of logical clusters (64K by
>>> > default). Minimal unit occupied in memory by (plain) file is one
>>> > page. Compressed logical cluster is stored on disk in so-called
>>> > "disk clusters". Disk cluster is a set of special items (aka
>>> "ctails",
>>> > or "compressed bodies"), so that one block can contain (compressed)
>>> > data of many files and everything is packed tightly on disk.
>>> >
>>> >
>>> >
>>> So the compression unit is 64k for purposes of your benchmarks.
>>>
>>
>>
>
>
>


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-31 16:55                                           ` Hans Reiser
@ 2006-08-31 18:08                                             ` Edward Shishkin
  0 siblings, 0 replies; 47+ messages in thread
From: Edward Shishkin @ 2006-08-31 18:08 UTC (permalink / raw)
  To: Hans Reiser; +Cc: Clemens Eisserer, reiserfs-list

Hans Reiser wrote:
> Edward Shishkin wrote:
> 
>>Clemens Eisserer wrote:
>>
>>>>But speaking of single threadedness, more and more desktops are
>>>>shipping
>>>>with ridiculously more power than people need.  Even a gamer really
>>>
>>>Will the LZO compression code in reiser4 be able to use
>>>multi-processor systems?
>>>E.g. if I've a Turion-X2 in my laptop will it use 2 threads for
>>>compression/decompression making cpu throughput much better than
>>>whatthe disk could do?
>>>
>>
>>Compression is going in flush time and there can be more then
>>one flush thread that processes the same transaction atom.
>>Decompression is going in the context of readpage/readpages.
>>So if you mean per file, then yes for compression and no for
>>decompression.
> 
> I don't think your explanation above is a good one.
> 
> If there is more than one process reading a file, then you can have
> multiple decompressions at one time of the same file, yes?
> 

You are almost right. Unless they read the same logical cluster.


> Just because there can be more than one flush thread per file does not
> mean it is likely there will be.
> 
> CPU scheduling of compression/decompression is an area that could use
> work in the future.    For now, just understand that what we do is
> better than doing nothing.;-/
> 
>>Edward.
>>
>>
>>
>>>lg Clemens
>>>
>>>
>>>2006/8/30, Hans Reiser <reiser@namesys.com>:
>>>
>>>
>>>>Edward Shishkin wrote:
>>>>
>>>>>(Plain) file is considered as a set of logical clusters (64K by
>>>>>default). Minimal unit occupied in memory by (plain) file is one
>>>>>page. Compressed logical cluster is stored on disk in so-called
>>>>>"disk clusters". Disk cluster is a set of special items (aka
>>>>
>>>>"ctails",
>>>>
>>>>>or "compressed bodies"), so that one block can contain (compressed)
>>>>>data of many files and everything is packed tightly on disk.
>>>>>
>>>>>
>>>>>
>>>>
>>>>So the compression unit is 64k for purposes of your benchmarks.
>>>>
>>>
>>>
>>
>>
> 
> 
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Reiser4 und LZO compression
  2006-08-31  9:32                                       ` Clemens Eisserer
  2006-08-31 12:00                                         ` Edward Shishkin
@ 2006-08-31 19:22                                         ` David Masover
  1 sibling, 0 replies; 47+ messages in thread
From: David Masover @ 2006-08-31 19:22 UTC (permalink / raw)
  To: Clemens Eisserer; +Cc: reiserfs-list

Clemens Eisserer wrote:
>> But speaking of single threadedness, more and more desktops are shipping
>> with ridiculously more power than people need.  Even a gamer really
> Will the LZO compression code in reiser4 be able to use multi-processor 
> systems?

Good point, but it wasn't what I was talking about.  I was talking about 
the compression happening on one CPU, meaning even if it takes most of 
the CPU to saturate disk throughput, your other CPU is still 100% 
available, meaning the typical desktop user won't notice their apps 
running slower, they'll just notice disk access being faster.


^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2006-08-31 19:22 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-08-27  0:34 Reiser4 und LZO compression Alexey Dobriyan
2006-08-27  8:04 ` Andrew Morton
2006-08-27  8:49   ` Ray Lee
2006-08-27  9:42   ` David Masover
2006-08-28 17:34     ` Jindrich Makovicka
2006-08-28 18:05       ` Edward Shishkin
2006-08-28 12:42   ` Jörn Engel
2006-08-29 13:14   ` PFC
2006-08-29 17:38     ` David Masover
2006-08-28 17:06 ` Hans Reiser
2006-08-28 17:37   ` Stefan Traby
2006-08-28 18:15     ` Edward Shishkin
2006-08-28 21:48       ` Nigel Cunningham
2006-08-28 23:32         ` Hans Reiser
2006-08-29  4:05         ` Jan Engelhardt
2006-08-29  5:41           ` Nigel Cunningham
2006-08-29  8:23             ` David Masover
2006-08-29  9:57               ` Nigel Cunningham
2006-08-29 11:09                 ` Ray Lee
2006-08-29 11:38                 ` Edward Shishkin
2006-08-29 22:03                   ` Nigel Cunningham
2006-08-29  4:59         ` Paul Mundt
2006-08-29  5:47           ` Nigel Cunningham
2006-08-29 13:45           ` PFC
2006-08-29 14:38             ` Stefan Traby
2006-08-29 15:55               ` PFC
2006-08-29 17:56                 ` Hans Reiser
2006-08-29 18:31                   ` David Masover
2006-08-29 18:36                     ` Gregory Maxwell
2006-08-29 19:11                       ` David Masover
2006-08-29 19:38                         ` Hans Reiser
2006-08-29 20:03                           ` David Masover
2006-08-29 22:15                             ` Toby Thain
2006-08-29 22:42                               ` David Masover
2006-08-30  9:17                                 ` PFC
2006-08-30 10:45                                   ` David Masover
2006-08-30 16:50                                   ` Edward Shishkin
2006-08-30 16:55                                     ` Hans Reiser
2006-08-31  9:32                                       ` Clemens Eisserer
2006-08-31 12:00                                         ` Edward Shishkin
2006-08-31 15:14                                           ` Clemens Eisserer
2006-08-31 16:55                                           ` Hans Reiser
2006-08-31 18:08                                             ` Edward Shishkin
2006-08-31 19:22                                         ` David Masover
2006-08-29 15:41             ` Gregory Maxwell
2006-08-29 17:42             ` Hans Reiser
2006-08-29  9:29         ` Edward Shishkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.