linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 0/5] selftests/powerpc: Add NX-GZIP engine testcase
@ 2020-03-27 18:16 Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 1/5] selftests/powerpc: Add header files for GZIP engine test Raphael Moreira Zinsly
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Raphael Moreira Zinsly @ 2020-03-27 18:16 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto; +Cc: herbert, mpe, haren, abali, dja

This patch series are intended to test the POWER9 Nest
Accelerator (NX) GZIP engine that is being introduced by
https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-March/205659.html
More information about how to access the NX can be found in that patch, also a
complete userspace library and more documentation can be found at:
https://github.com/libnxz/power-gzip

Changes in V2:
	- Fixed errors and warnings caught by scripts/checkpatch.pl, including
	  line breaks inside strings.
	- Fixed infinite loop and out-of-boundaries writing found by Daniel
	  Axtens.

Best regards,
Raphael


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH V2 1/5] selftests/powerpc: Add header files for GZIP engine test
  2020-03-27 18:16 [PATCH V2 0/5] selftests/powerpc: Add NX-GZIP engine testcase Raphael Moreira Zinsly
@ 2020-03-27 18:16 ` Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 2/5] selftests/powerpc: Add header files for NX compresion/decompression Raphael Moreira Zinsly
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Raphael Moreira Zinsly @ 2020-03-27 18:16 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto
  Cc: herbert, mpe, haren, abali, dja, Raphael Moreira Zinsly

Add files to access the powerpc NX-GZIP engine in user space.

Signed-off-by: Bulent Abali <abali@us.ibm.com>
Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
---
 .../selftests/powerpc/nx-gzip/inc/crb.h       | 159 ++++++++++++++++++
 .../selftests/powerpc/nx-gzip/inc/nx-gzip.h   |  27 +++
 .../powerpc/nx-gzip/inc/nx-helpers.h          |  54 ++++++
 .../selftests/powerpc/nx-gzip/inc/nx.h        |  38 +++++
 4 files changed, 278 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/crb.h
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/nx-gzip.h
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/nx-helpers.h
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/nx.h

diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/crb.h b/tools/testing/selftests/powerpc/nx-gzip/inc/crb.h
new file mode 100644
index 000000000000..9056e3dc1831
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/inc/crb.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __CRB_H
+#define __CRB_H
+#include <linux/types.h>
+#include "nx.h"
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+/* CCW 842 CI/FC masks
+ * NX P8 workbook, section 4.3.1, figure 4-6
+ * "CI/FC Boundary by NX CT type"
+ */
+#define CCW_CI_842              (0x00003ff8)
+#define CCW_FC_842              (0x00000007)
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE		(0x3fffffffffffffff)
+#define CCB_ADDRESS		(0xfffffffffffffff8)
+#define CCB_CM			(0x0000000000000007)
+#define CCB_CM0			(0x0000000000000004)
+#define CCB_CM12		(0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS	(0x0)
+#define CCB_CM0_LAST_IN_CHAIN	(0x4)
+#define CCB_CM12_STORE		(0x0)
+#define CCB_CM12_INTERRUPT	(0x1)
+
+#define CCB_SIZE		(0x10)
+#define CCB_ALIGN		CCB_SIZE
+
+struct coprocessor_completion_block {
+	__be64 value;
+	__be64 address;
+} __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V			(0x80)
+#define CSB_F			(0x04)
+#define CSB_CH			(0x03)
+#define CSB_CE_INCOMPLETE	(0x80)
+#define CSB_CE_TERMINATION	(0x40)
+#define CSB_CE_TPBC		(0x20)
+
+#define CSB_CC_SUCCESS		(0)
+#define CSB_CC_INVALID_ALIGN	(1)
+#define CSB_CC_OPERAND_OVERLAP	(2)
+#define CSB_CC_DATA_LENGTH	(3)
+#define CSB_CC_TRANSLATION	(5)
+#define CSB_CC_PROTECTION	(6)
+#define CSB_CC_RD_EXTERNAL	(7)
+#define CSB_CC_INVALID_OPERAND	(8)
+#define CSB_CC_PRIVILEGE	(9)
+#define CSB_CC_INTERNAL		(10)
+#define CSB_CC_WR_EXTERNAL	(12)
+#define CSB_CC_NOSPC		(13)
+#define CSB_CC_EXCESSIVE_DDE	(14)
+#define CSB_CC_WR_TRANSLATION	(15)
+#define CSB_CC_WR_PROTECTION	(16)
+#define CSB_CC_UNKNOWN_CODE	(17)
+#define CSB_CC_ABORT		(18)
+#define CSB_CC_TRANSPORT	(20)
+#define CSB_CC_SEGMENTED_DDL	(31)
+#define CSB_CC_PROGRESS_POINT	(32)
+#define CSB_CC_DDE_OVERFLOW	(33)
+#define CSB_CC_SESSION		(34)
+#define CSB_CC_PROVISION	(36)
+#define CSB_CC_CHAIN		(37)
+#define CSB_CC_SEQUENCE		(38)
+#define CSB_CC_HW		(39)
+
+#define CSB_SIZE		(0x10)
+#define CSB_ALIGN		CSB_SIZE
+
+struct coprocessor_status_block {
+	u8 flags;
+	u8 cs;
+	u8 cc;
+	u8 ce;
+	__be32 count;
+	__be64 address;
+} __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P			(0x8000)
+
+#define DDE_SIZE		(0x10)
+#define DDE_ALIGN		DDE_SIZE
+
+struct data_descriptor_entry {
+	__be16 flags;
+	u8 count;
+	u8 index;
+	__be32 length;
+	__be64 address;
+} __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE		(0x80)
+#define CRB_ALIGN		(0x100) /* Errata: requires 256 alignment */
+
+
+/* Coprocessor Status Block field
+ *   ADDRESS	address of CSB
+ *   C		CCB is valid
+ *   AT		0 = addrs are virtual, 1 = addrs are phys
+ *   M		enable perf monitor
+ */
+#define CRB_CSB_ADDRESS		(0xfffffffffffffff0)
+#define CRB_CSB_C		(0x0000000000000008)
+#define CRB_CSB_AT		(0x0000000000000002)
+#define CRB_CSB_M		(0x0000000000000001)
+
+struct coprocessor_request_block {
+	__be32 ccw;
+	__be32 flags;
+	__be64 csb_addr;
+
+	struct data_descriptor_entry source;
+	struct data_descriptor_entry target;
+
+	struct coprocessor_completion_block ccb;
+
+	u8 reserved[48];
+
+	struct coprocessor_status_block csb;
+} __aligned(CRB_ALIGN);
+
+#define crb_csb_addr(c)         __be64_to_cpu(c->csb_addr)
+#define crb_nx_fault_addr(c)    __be64_to_cpu(c->stamp.nx.fault_storage_addr)
+#define crb_nx_flags(c)         c->stamp.nx.flags
+#define crb_nx_fault_status(c)  c->stamp.nx.fault_status
+#define crb_nx_pswid(c)		c->stamp.nx.pswid
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS                  (0xff000000)
+#define CCW_CT                  (0x00ff0000)
+#define CCW_CD                  (0x0000ffff)
+#define CCW_CL                  (0x0000c000)
+
+#endif
diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/nx-gzip.h b/tools/testing/selftests/powerpc/nx-gzip/inc/nx-gzip.h
new file mode 100644
index 000000000000..75482c45574d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/inc/nx-gzip.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ */
+
+#ifndef _UAPI_MISC_VAS_H
+#define _UAPI_MISC_VAS_H
+
+#include <asm/ioctl.h>
+
+#define VAS_FLAGS_PIN_WINDOW	0x1
+#define VAS_FLAGS_HIGH_PRI	0x2
+
+#define VAS_FTW_SETUP		_IOW('v', 1, struct vas_gzip_setup_attr)
+#define VAS_842_TX_WIN_OPEN	_IOW('v', 2, struct vas_gzip_setup_attr)
+#define VAS_GZIP_TX_WIN_OPEN	_IOW('v', 0x20, struct vas_gzip_setup_attr)
+
+struct vas_gzip_setup_attr {
+	int32_t		version;
+	int16_t		vas_id;
+	int16_t		reserved1;
+	int64_t		flags;
+	int64_t		reserved2[6];
+};
+
+#endif /* _UAPI_MISC_VAS_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/nx-helpers.h b/tools/testing/selftests/powerpc/nx-gzip/inc/nx-helpers.h
new file mode 100644
index 000000000000..e0d68914c941
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/inc/nx-helpers.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <sys/time.h>
+#include <asm/byteorder.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "crb.h"
+
+#define cpu_to_be32		__cpu_to_be32
+#define cpu_to_be64		__cpu_to_be64
+#define be32_to_cpu		__be32_to_cpu
+#define be64_to_cpu		__be64_to_cpu
+
+/*
+ * Several helpers/macros below were copied from the tree
+ * (kernel.h, nx-842.h, nx-ftw.h, asm-compat.h etc)
+ */
+
+/* from kernel.h */
+#define IS_ALIGNED(x, a)	(((x) & ((typeof(x))(a) - 1)) == 0)
+#define __round_mask(x, y)	((__typeof__(x))((y)-1))
+#define round_up(x, y)		((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y)	((x) & ~__round_mask(x, y))
+
+#define min_t(t, x, y)	((x) < (y) ? (x) : (y))
+/*
+ * Get/Set bit fields. (from nx-842.h)
+ */
+#define GET_FIELD(m, v)         (((v) & (m)) >> MASK_LSH(m))
+#define MASK_LSH(m)             (__builtin_ffsl(m) - 1)
+#define SET_FIELD(m, v, val)    \
+		(((v) & ~(m)) | ((((typeof(v))(val)) << MASK_LSH(m)) & (m)))
+
+/* From asm-compat.h */
+#define __stringify_in_c(...)	#__VA_ARGS__
+#define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+
+#define	pr_debug
+#define	pr_debug_ratelimited	printf
+#define	pr_err			printf
+#define	pr_err_ratelimited	printf
+
+#define WARN_ON_ONCE(x)		do {if (x) \
+				printf("WARNING: %s:%d\n", __func__, __LINE__)\
+				} while (0)
+
+extern void dump_buffer(char *msg, char *buf, int len);
+extern void *alloc_aligned_mem(int len, int align, char *msg);
+extern void get_payload(char *buf, int len);
+extern void time_add(struct timeval *in, int seconds, struct timeval *out);
+
+extern bool time_after(struct timeval *a, struct timeval *b);
+extern long time_delta(struct timeval *a, struct timeval *b);
+extern void dump_dde(struct data_descriptor_entry *dde, char *msg);
+extern void copy_paste_crb_data(struct coprocessor_request_block *crb);
diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/nx.h b/tools/testing/selftests/powerpc/nx-gzip/inc/nx.h
new file mode 100644
index 000000000000..1ae8348b59d6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/inc/nx.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ */
+#ifndef _NX_H
+#define _NX_H
+
+#include <stdbool.h>
+
+#define	NX_FUNC_COMP_842	1
+#define NX_FUNC_COMP_GZIP	2
+
+#ifndef __aligned
+#define __aligned(x)	__attribute__((aligned(x)))
+#endif
+
+struct nx842_func_args {
+	bool use_crc;
+	bool decompress;		/* true decompress; false compress */
+	bool move_data;
+	int timeout;			/* seconds */
+};
+
+struct nxbuf_t {
+	int len;
+	char *buf;
+};
+
+/* @function should be EFT (aka 842), GZIP etc */
+extern void *nx_function_begin(int function, int pri);
+
+extern int nx_function(void *handle, struct nxbuf_t *in, struct nxbuf_t *out,
+			void *arg);
+
+extern int nx_function_end(void *handle);
+
+#endif	/* _NX_H */
-- 
2.21.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH V2 2/5] selftests/powerpc: Add header files for NX compresion/decompression
  2020-03-27 18:16 [PATCH V2 0/5] selftests/powerpc: Add NX-GZIP engine testcase Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 1/5] selftests/powerpc: Add header files for GZIP engine test Raphael Moreira Zinsly
@ 2020-03-27 18:16 ` Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase Raphael Moreira Zinsly
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Raphael Moreira Zinsly @ 2020-03-27 18:16 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto
  Cc: herbert, mpe, haren, abali, dja, Raphael Moreira Zinsly

Add files to be able to compress and decompress files using the
powerpc NX-GZIP engine.

Signed-off-by: Bulent Abali <abali@us.ibm.com>
Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
---
 .../powerpc/nx-gzip/inc/copy-paste.h          |  54 ++
 .../selftests/powerpc/nx-gzip/inc/nx_dbg.h    |  95 +++
 .../selftests/powerpc/nx-gzip/inc/nxu.h       | 651 ++++++++++++++++++
 3 files changed, 800 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h

diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h b/tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h
new file mode 100644
index 000000000000..107139b6c7df
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "nx-helpers.h"
+
+/*
+ * Macros taken from arch/powerpc/include/asm/ppc-opcode.h and other
+ * header files.
+ */
+#define ___PPC_RA(a)    (((a) & 0x1f) << 16)
+#define ___PPC_RB(b)    (((b) & 0x1f) << 11)
+
+#define PPC_INST_COPY                   0x7c20060c
+#define PPC_INST_PASTE                  0x7c20070d
+
+#define PPC_COPY(a, b)          stringify_in_c(.long PPC_INST_COPY | \
+						___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_PASTE(a, b)         stringify_in_c(.long PPC_INST_PASTE | \
+						___PPC_RA(a) | ___PPC_RB(b))
+#define CR0_SHIFT	28
+#define CR0_MASK	0xF
+/*
+ * Copy/paste instructions:
+ *
+ *	copy RA,RB
+ *		Copy contents of address (RA) + effective_address(RB)
+ *		to internal copy-buffer.
+ *
+ *	paste RA,RB
+ *		Paste contents of internal copy-buffer to the address
+ *		(RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+	asm volatile(PPC_COPY(%0, %1)";"
+		:
+		: "b" (offset), "b" (crb)
+		: "memory");
+
+	return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+	u32 cr;
+
+	cr = 0;
+	asm volatile(PPC_PASTE(%1, %2)";"
+		"mfocrf %0, 0x80;"
+		: "=r" (cr)
+		: "b" (offset), "b" (paste_address)
+		: "memory", "cr0");
+
+	return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h b/tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h
new file mode 100644
index 000000000000..f2c0eee2317e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright 2020 IBM Corporation
+ *
+ */
+
+#ifndef _NXU_DBG_H_
+#define _NXU_DBG_H_
+
+#include <sys/file.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <pthread.h>
+
+extern FILE * nx_gzip_log;
+extern int nx_gzip_trace;
+extern unsigned int nx_gzip_inflate_impl;
+extern unsigned int nx_gzip_deflate_impl;
+extern unsigned int nx_gzip_inflate_flags;
+extern unsigned int nx_gzip_deflate_flags;
+
+extern int nx_dbg;
+pthread_mutex_t mutex_log;
+
+#define nx_gzip_trace_enabled()       (nx_gzip_trace & 0x1)
+#define nx_gzip_hw_trace_enabled()    (nx_gzip_trace & 0x2)
+#define nx_gzip_sw_trace_enabled()    (nx_gzip_trace & 0x4)
+#define nx_gzip_gather_statistics()   (nx_gzip_trace & 0x8)
+#define nx_gzip_per_stream_stat()     (nx_gzip_trace & 0x10)
+
+#define prt(fmt, ...) do { \
+	pthread_mutex_lock(&mutex_log);					\
+	flock(nx_gzip_log->_fileno, LOCK_EX);				\
+	time_t t; struct tm *m; time(&t); m = localtime(&t);		\
+	fprintf(nx_gzip_log, "[%04d/%02d/%02d %02d:%02d:%02d] "		\
+		"pid %d: " fmt,	\
+		(int)m->tm_year + 1900, (int)m->tm_mon+1, (int)m->tm_mday, \
+		(int)m->tm_hour, (int)m->tm_min, (int)m->tm_sec,	\
+		(int)getpid(), ## __VA_ARGS__);				\
+	fflush(nx_gzip_log);						\
+	flock(nx_gzip_log->_fileno, LOCK_UN);				\
+	pthread_mutex_unlock(&mutex_log);				\
+} while (0)
+
+/* Use in case of an error */
+#define prt_err(fmt, ...) do { if (nx_dbg >= 0) {			\
+	prt("%s:%u: Error: "fmt,					\
+		__FILE__, __LINE__, ## __VA_ARGS__);			\
+}} while (0)
+
+/* Use in case of an warning */
+#define prt_warn(fmt, ...) do {	if (nx_dbg >= 1) {			\
+	prt("%s:%u: Warning: "fmt,					\
+		__FILE__, __LINE__, ## __VA_ARGS__);			\
+}} while (0)
+
+/* Informational printouts */
+#define prt_info(fmt, ...) do {	if (nx_dbg >= 2) {			\
+	prt("Info: "fmt, ## __VA_ARGS__);				\
+}} while (0)
+
+/* Trace zlib wrapper code */
+#define prt_trace(fmt, ...) do { if (nx_gzip_trace_enabled()) {		\
+	prt("### "fmt, ## __VA_ARGS__);					\
+}} while (0)
+
+/* Trace statistics */
+#define prt_stat(fmt, ...) do {	if (nx_gzip_gather_statistics()) {	\
+	prt("### "fmt, ## __VA_ARGS__);					\
+}} while (0)
+
+/* Trace zlib hardware implementation */
+#define hw_trace(fmt, ...) do {						\
+		if (nx_gzip_hw_trace_enabled())				\
+			fprintf(nx_gzip_log, "hhh " fmt, ## __VA_ARGS__); \
+	} while (0)
+
+/* Trace zlib software implementation */
+#define sw_trace(fmt, ...) do {						\
+		if (nx_gzip_sw_trace_enabled())				\
+			fprintf(nx_gzip_log, "sss " fmt, ## __VA_ARGS__); \
+	} while (0)
+
+
+/**
+ * str_to_num - Convert string into number and copy with endings like
+ *              KiB for kilobyte
+ *              MiB for megabyte
+ *              GiB for gigabyte
+ */
+uint64_t str_to_num(char *str);
+void nx_lib_debug(int onoff);
+
+#endif	/* _NXU_DBG_H_ */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h b/tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h
new file mode 100644
index 000000000000..a5cfe8dd0107
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h
@@ -0,0 +1,651 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Hardware interface of the NX-GZIP compression accelerator
+ *
+ * Copyright (C) IBM Corporation, 2020
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+
+#ifndef _NXU_H
+#define _NXU_H
+
+#include <stdint.h>
+#include <endian.h>
+#include "nx.h"
+
+/* deflate */
+#define LLSZ   286
+#define DSZ    30
+
+/* nx */
+#define DHTSZ  18
+#define DHT_MAXSZ 288
+#define MAX_DDE_COUNT 256
+
+/* util */
+#ifdef NXDBG
+#define NXPRT(X)	X
+#else
+#define NXPRT(X)
+#endif
+
+#ifdef NXTIMER
+#include <sys/platform/ppc.h>
+#define NX_CLK(X)	X
+#define nx_get_time()	__ppc_get_timebase()
+#define nx_get_freq()	__ppc_get_timebase_freq()
+#else
+#define NX_CLK(X)
+#define nx_get_time()  (-1)
+#define nx_get_freq()  (-1)
+#endif
+
+/*
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+union nx_qw_t {
+	uint32_t word[4];
+	uint64_t dword[2];
+} __aligned(16);
+
+/*
+ * Note: NX registers with fewer than 32 bits are declared by
+ * convention as uint32_t variables in unions. If *_offset and *_mask
+ * are defined for a variable, then use get_ put_ macros to
+ * conveniently access the register fields for endian conversions.
+ */
+
+struct nx_dde_t {
+	/* Data Descriptor Element, Section 6.4 */
+	union {
+		uint32_t dde_count;
+		/* When dde_count == 0 ddead is a pointer to a data buffer;
+		 * ddebc is the buffer length bytes.
+		 * When dde_count > 0 dde is an indirect dde; ddead is a
+		 * pointer to a contiguous list of direct ddes; ddebc is the
+		 * total length of all data pointed to by the list of direct
+		 * ddes. Note that only one level of indirection is permitted.
+		 * See Section 6.4 of the user manual for additional details.
+		 */
+	};
+	uint32_t ddebc; /* dde byte count */
+	uint64_t ddead; /* dde address */
+} __aligned(16);
+
+struct nx_csb_t {
+	/* Coprocessor Status Block, Section 6.6  */
+	union {
+		uint32_t csb_v;
+		/* Valid bit. v must be set to 0 by the program
+		 * before submitting the coprocessor command.
+		 * Software can poll for the v bit
+		 */
+
+		uint32_t csb_f;
+		/* 16B CSB size. Written to 0 by DMA when it writes the CPB */
+
+		uint32_t csb_cs;
+		/* cs completion sequence; unused */
+
+		uint32_t csb_cc;
+		/* cc completion code; cc != 0 exception occurred */
+
+		uint32_t csb_ce;
+		/* ce completion extension */
+
+	};
+	uint32_t tpbc;
+	/* target processed byte count TPBC */
+
+	uint64_t fsaddr;
+	/* Section 6.12.1 CSB NonZero error summary.  FSA Failing storage
+	 * address.  Address where error occurred. When available, written
+	 * to A field of CSB
+	 */
+} __aligned(16);
+
+struct nx_ccb_t {
+	/* Coprocessor Completion Block, Section 6.7 */
+
+	uint32_t reserved[3];
+	union {
+		/* When crb.c==0 (no ccb defined) it is reserved;
+		 * When crb.c==1 (ccb defined) it is cm
+		 */
+
+		uint32_t ccb_cm;
+		/* Signal interrupt of crb.c==1 and cm==1 */
+
+		uint32_t word;
+		/* generic access to the 32bit word */
+	};
+} __aligned(16);
+
+struct vas_stamped_crb_t {
+	/*
+	 * CRB operand of the paste coprocessor instruction is stamped
+	 * in quadword 4 with the information shown here as its written
+	 * in to the receive FIFO of the coprocessor
+	 */
+
+	union {
+		uint32_t vas_buf_num;
+		/* Verification only vas buffer number which correlates to
+		 * the low order bits of the atag in the paste command
+		 */
+
+		uint32_t send_wc_id;
+		/* Pointer to Send Window Context that provides for NX address
+		 * translation information, such as MSR and LPCR bits, job
+		 * completion interrupt RA, PSWID, and job utilization counter.
+		 */
+
+	};
+	union {
+		uint32_t recv_wc_id;
+		/* Pointer to Receive Window Context. NX uses this to return
+		 * credits to a Receive FIFO as entries are dequeued.
+		 */
+
+	};
+	uint32_t reserved2;
+	union {
+		uint32_t vas_invalid;
+		/* Invalid bit. If this bit is 1 the CRB is discarded by
+		 * NX upon fetching from the receive FIFO. If this bit is 0
+		 * the CRB is processed normally. The bit is stamped to 0
+		 * by VAS and may be written to 1 by hypervisor while
+		 * the CRB is in the receive FIFO (in memory).
+		 */
+
+	};
+};
+
+struct nx_stamped_fault_crb_t {
+	/*
+	 * A CRB that has a translation fault is stamped by NX in quadword 4
+	 * and pasted to the Fault Send Window in VAS.
+	 */
+	uint64_t fsa;
+	union {
+		uint32_t nxsf_t;
+		uint32_t nxsf_fs;
+	};
+	uint32_t pswid;
+};
+
+union stamped_crb_t {
+	struct vas_stamped_crb_t      vas;
+	struct nx_stamped_fault_crb_t nx;
+};
+
+struct nx_gzip_cpb_t {
+	/*
+	 * Coprocessor Parameter Block In/Out are used to pass metadata
+	 * to/from accelerator.  Tables 6.5 and 6.6 of the user manual.
+	 */
+
+	/* CPBInput */
+
+	struct {
+		union {
+		union nx_qw_t qw0;
+			struct {
+				uint32_t in_adler;            /* bits 0:31  */
+				uint32_t in_crc;              /* bits 32:63 */
+				union {
+					uint32_t in_histlen;  /* bits 64:75 */
+					uint32_t in_subc;     /* bits 93:95 */
+				};
+				union {
+					/* bits 108:111 */
+					uint32_t in_sfbt;
+					/* bits 112:127 */
+					uint32_t in_rembytecnt;
+					/* bits 116:127 */
+					uint32_t in_dhtlen;
+				};
+			};
+		};
+		union {
+			union nx_qw_t  in_dht[DHTSZ];	/* qw[1:18]     */
+			char in_dht_char[DHT_MAXSZ];	/* byte access  */
+		};
+		union nx_qw_t  reserved[5];		/* qw[19:23]    */
+	};
+
+	/* CPBOutput */
+
+	struct {
+		union {
+			union nx_qw_t qw24;
+			struct {
+				uint32_t out_adler;    /* bits 0:31  qw[24] */
+				uint32_t out_crc;      /* bits 32:63 qw[24] */
+				union {
+					/* bits 77:79 qw[24] */
+					uint32_t out_tebc;
+					/* bits 80:95 qw[24] */
+					uint32_t out_subc;
+				};
+				union {
+					/* bits 108:111 qw[24] */
+					uint32_t out_sfbt;
+					/* bits 112:127 qw[24] */
+					uint32_t out_rembytecnt;
+					/* bits 116:127 qw[24] */
+					uint32_t out_dhtlen;
+				};
+			};
+		};
+		union {
+			union nx_qw_t  qw25[79];        /* qw[25:103] */
+			/* qw[25] compress no lzcounts or wrap */
+			uint32_t out_spbc_comp_wrap;
+			uint32_t out_spbc_wrap;         /* qw[25] wrap */
+			/* qw[25] compress no lzcounts */
+			uint32_t out_spbc_comp;
+			 /* 286 LL and 30 D symbol counts */
+			uint32_t out_lzcount[LLSZ+DSZ];
+			struct {
+				union nx_qw_t  out_dht[DHTSZ];  /* qw[25:42] */
+				/* qw[43] decompress */
+				uint32_t out_spbc_decomp;
+			};
+		};
+		/* qw[104] compress with lzcounts */
+		uint32_t out_spbc_comp_with_count;
+	};
+} __aligned(128);
+
+struct nx_gzip_crb_t {
+	union {                   /* byte[0:3]   */
+		uint32_t gzip_fc;     /* bits[24-31] */
+	};
+	uint32_t reserved1;       /* byte[4:7]   */
+	union {
+		uint64_t csb_address; /* byte[8:15]  */
+		struct {
+			uint32_t reserved2;
+			union {
+				uint32_t crb_c;
+				/* c==0 no ccb defined */
+
+				uint32_t crb_at;
+				/* at==0 address type is ignored;
+				 * all addrs effective assumed.
+				 */
+
+			};
+		};
+	};
+	struct nx_dde_t source_dde;           /* byte[16:31] */
+	struct nx_dde_t target_dde;           /* byte[32:47] */
+	struct nx_ccb_t ccb;         /* byte[48:63] */
+	union {
+		/* byte[64:239] shift csb by 128 bytes out of the crb; csb was
+		 * in crb earlier; JReilly says csb written with partial inject
+		 */
+		union nx_qw_t reserved64[11];
+		union stamped_crb_t stamp;       /* byte[64:79] */
+	};
+	struct nx_csb_t csb;
+} __aligned(128);
+
+struct nx_gzip_crb_cpb_t {
+	struct nx_gzip_crb_t crb;
+	struct nx_gzip_cpb_t cpb;
+} __aligned(2048);
+
+
+/*
+ * NX hardware convention has the msb bit on the left numbered 0.
+ * The defines below has *_offset defined as the right most bit
+ * position of a field.  x of size_mask(x) is the field width in bits.
+ */
+
+#define size_mask(x)          ((1U<<(x))-1)
+
+/*
+ * Offsets and Widths within the containing 32 bits of the various NX
+ * gzip hardware registers.  Use the getnn/putnn macros to access
+ * these regs
+ */
+
+#define dde_count_mask        size_mask(8)
+#define dde_count_offset      23
+
+/* CSB */
+
+#define csb_v_mask            size_mask(1)
+#define csb_v_offset          0
+#define csb_f_mask            size_mask(1)
+#define csb_f_offset          6
+#define csb_cs_mask           size_mask(8)
+#define csb_cs_offset         15
+#define csb_cc_mask           size_mask(8)
+#define csb_cc_offset         23
+#define csb_ce_mask           size_mask(8)
+#define csb_ce_offset         31
+
+/* CCB */
+
+#define ccb_cm_mask           size_mask(3)
+#define ccb_cm_offset         31
+
+/* VAS stamped CRB fields */
+
+#define vas_buf_num_mask      size_mask(6)
+#define vas_buf_num_offset    5
+#define send_wc_id_mask       size_mask(16)
+#define send_wc_id_offset     31
+#define recv_wc_id_mask       size_mask(16)
+#define recv_wc_id_offset     31
+#define vas_invalid_mask      size_mask(1)
+#define vas_invalid_offset    31
+
+/* NX stamped fault CRB fields */
+
+#define nxsf_t_mask           size_mask(1)
+#define nxsf_t_offset         23
+#define nxsf_fs_mask          size_mask(8)
+#define nxsf_fs_offset        31
+
+/* CPB input */
+
+#define in_histlen_mask       size_mask(12)
+#define in_histlen_offset     11
+#define in_dhtlen_mask        size_mask(12)
+#define in_dhtlen_offset      31
+#define in_subc_mask          size_mask(3)
+#define in_subc_offset        31
+#define in_sfbt_mask          size_mask(4)
+#define in_sfbt_offset        15
+#define in_rembytecnt_mask    size_mask(16)
+#define in_rembytecnt_offset  31
+
+/* CPB output */
+
+#define out_tebc_mask         size_mask(3)
+#define out_tebc_offset       15
+#define out_subc_mask         size_mask(16)
+#define out_subc_offset       31
+#define out_sfbt_mask         size_mask(4)
+#define out_sfbt_offset       15
+#define out_rembytecnt_mask   size_mask(16)
+#define out_rembytecnt_offset 31
+#define out_dhtlen_mask       size_mask(12)
+#define out_dhtlen_offset     31
+
+/* CRB */
+
+#define gzip_fc_mask          size_mask(8)
+#define gzip_fc_offset        31
+#define crb_c_mask            size_mask(1)
+#define crb_c_offset          28
+#define crb_at_mask           size_mask(1)
+#define crb_at_offset         30
+#define csb_address_mask      ~(15UL) /* mask off bottom 4b */
+
+/*
+ * Access macros for the registers.  Do not access registers directly
+ * because of the endian conversion.  P9 processor may run either as
+ * Little or Big endian. However the NX coprocessor regs are always
+ * big endian.
+ * Use the 32 and 64b macros to access respective
+ * register sizes.
+ * Use nn forms for the register fields shorter than 32 bits.
+ */
+
+#define getnn(ST, REG)      ((be32toh(ST.REG) >> (31-REG##_offset)) \
+				 & REG##_mask)
+#define getpnn(ST, REG)     ((be32toh((ST)->REG) >> (31-REG##_offset)) \
+				 & REG##_mask)
+#define get32(ST, REG)      (be32toh(ST.REG))
+#define getp32(ST, REG)     (be32toh((ST)->REG))
+#define get64(ST, REG)      (be64toh(ST.REG))
+#define getp64(ST, REG)     (be64toh((ST)->REG))
+
+#define unget32(ST, REG)    (get32(ST, REG) & ~((REG##_mask) \
+				<< (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define ungetp32(ST, REG)   (getp32(ST, REG) & ~((REG##_mask) \
+				<< (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define clear_regs(ST)      memset((void *)(&(ST)), 0, sizeof(ST))
+#define clear_dde(ST)       do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
+				} while (0)
+#define clearp_dde(ST)      do { (ST)->dde_count = (ST)->ddebc = 0; \
+				 (ST)->ddead = 0; \
+				} while (0)
+#define clear_struct(ST)    memset((void *)(&(ST)), 0, sizeof(ST))
+#define putnn(ST, REG, X)   (ST.REG = htobe32(unget32(ST, REG) | (((X) \
+				 & REG##_mask) << (31-REG##_offset))))
+#define putpnn(ST, REG, X)  ((ST)->REG = htobe32(ungetp32(ST, REG) \
+				| (((X) & REG##_mask) << (31-REG##_offset))))
+
+#define put32(ST, REG, X)   (ST.REG = htobe32(X))
+#define putp32(ST, REG, X)  ((ST)->REG = htobe32(X))
+#define put64(ST, REG, X)   (ST.REG = htobe64(X))
+#define putp64(ST, REG, X)  ((ST)->REG = htobe64(X))
+
+/*
+ * Completion extension ce(0) ce(1) ce(2).  Bits ce(3-7)
+ * unused.  Section 6.6 Figure 6.7.
+ */
+
+#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
+#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
+#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
+
+#define CSB_CE_PARTIAL         0x4
+#define CSB_CE_TERMINATE       0x2
+#define CSB_CE_TPBC_VALID      0x1
+
+#define csb_ce_termination(X)         (!!((X) & CSB_CE_TERMINATE))
+/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
+
+#define csb_ce_check_completion(X)    (!csb_ce_termination(X))
+/* if not terminated then check full or partial completion */
+
+#define csb_ce_partial_completion(X)  (!!((X) & CSB_CE_PARTIAL))
+#define csb_ce_full_completion(X)     (!csb_ce_partial_completion(X))
+#define csb_ce_tpbc_valid(X)          (!!((X) & CSB_CE_TPBC_VALID))
+/* TPBC indicates successfully stored data count */
+
+#define csb_ce_default_err(X)         csb_ce_termination(X)
+/* most error CEs have CE(0)=0 and CE(1)=1 */
+
+#define csb_ce_cc3_partial(X)         csb_ce_partial_completion(X)
+/* some CC=3 are partially completed, Table 6-8 */
+
+#define csb_ce_cc64(X)                ((X)&(CSB_CE_PARTIAL \
+					| CSB_CE_TERMINATE) == 0)
+/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
+ * compress smaller than source.
+ */
+
+/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
+
+#define SFBT_BFINAL 0x1
+#define SFBT_LIT    0x4
+#define SFBT_FHT    0x5
+#define SFBT_DHT    0x6
+#define SFBT_HDR    0x7
+
+/*
+ * NX gzip function codes. Table 6.2.
+ * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
+ * select one of the two Byte Count Limits.
+ */
+
+#define GZIP_FC_LIMIT_MASK                               0x01
+#define GZIP_FC_COMPRESS_FHT                             0x00
+#define GZIP_FC_COMPRESS_DHT                             0x02
+#define GZIP_FC_COMPRESS_FHT_COUNT                       0x04
+#define GZIP_FC_COMPRESS_DHT_COUNT                       0x06
+#define GZIP_FC_COMPRESS_RESUME_FHT                      0x08
+#define GZIP_FC_COMPRESS_RESUME_DHT                      0x0a
+#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT                0x0c
+#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT                0x0e
+#define GZIP_FC_DECOMPRESS                               0x10
+#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND          0x12
+#define GZIP_FC_DECOMPRESS_RESUME                        0x14
+#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND   0x16
+#define GZIP_FC_WRAP                                     0x1e
+
+#define fc_is_compress(fc)  (((fc) & 0x10) == 0)
+#define fc_has_count(fc)    (fc_is_compress(fc) && (((fc) & 0x4) != 0))
+
+/* CSB.CC Error codes */
+
+#define ERR_NX_OK             0
+#define ERR_NX_ALIGNMENT      1
+#define ERR_NX_OPOVERLAP      2
+#define ERR_NX_DATA_LENGTH    3
+#define ERR_NX_TRANSLATION    5
+#define ERR_NX_PROTECTION     6
+#define ERR_NX_EXTERNAL_UE7   7
+#define ERR_NX_INVALID_OP     8
+#define ERR_NX_PRIVILEGE      9
+#define ERR_NX_INTERNAL_UE   10
+#define ERR_NX_EXTERN_UE_WR  12
+#define ERR_NX_TARGET_SPACE  13
+#define ERR_NX_EXCESSIVE_DDE 14
+#define ERR_NX_TRANSL_WR     15
+#define ERR_NX_PROTECT_WR    16
+#define ERR_NX_SUBFUNCTION   17
+#define ERR_NX_FUNC_ABORT    18
+#define ERR_NX_BYTE_MAX      19
+#define ERR_NX_CORRUPT_CRB   20
+#define ERR_NX_INVALID_CRB   21
+#define ERR_NX_INVALID_DDE   30
+#define ERR_NX_SEGMENTED_DDL 31
+#define ERR_NX_DDE_OVERFLOW  33
+#define ERR_NX_TPBC_GT_SPBC  64
+#define ERR_NX_MISSING_CODE  66
+#define ERR_NX_INVALID_DIST  67
+#define ERR_NX_INVALID_DHT   68
+#define ERR_NX_EXTERNAL_UE90 90
+#define ERR_NX_WDOG_TIMER   224
+#define ERR_NX_AT_FAULT     250
+#define ERR_NX_INTR_SERVER  252
+#define ERR_NX_UE253        253
+#define ERR_NX_NO_HW        254
+#define ERR_NX_HUNG_OP      255
+#define ERR_NX_END          256
+
+/* initial values for non-resume operations */
+#define INIT_CRC   0  /* crc32(0L, Z_NULL, 0) */
+#define INIT_ADLER 1  /* adler32(0L, Z_NULL, 0)  adler is initialized to 1 */
+
+/* prototypes */
+#ifdef NX_JOB_CALLBACK
+int nxu_run_job(struct nx_gzip_crb_cpb_t *c, void *handle,
+		int (*callback)(const void *));
+#else
+int nxu_run_job(struct nx_gzip_crb_cpb_t *c, void *handle);
+#endif
+
+
+/* caller supplies a print buffer 4*sizeof(crb) */
+
+char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
+char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_prt_hex(void *cp, int sz, char *prbuf);
+char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_strerror(int e);
+
+#ifdef NX_SIM
+#include <stdio.h>
+int nx_sim_init(void *ctx);
+int nx_sim_end(void *ctx);
+int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
+#endif /* NX_SIM */
+
+/* Deflate stream manipulation */
+
+#define set_final_bit(x)	(x |= (unsigned char)1)
+#define clr_final_bit(x)	(x &= ~(unsigned char)1)
+
+#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
+					} while (0)
+/* append 10 bits 0000001b 00...... ;
+ * assumes appending starts on a byte boundary; b is the final bit.
+ */
+
+
+#ifdef NX_842
+
+/* 842 Engine */
+
+struct nx_eft_crb_t {
+	union {                   /* byte[0:3]   */
+		uint32_t eft_fc;      /* bits[29-31] */
+	};
+	uint32_t reserved1;       /* byte[4:7]   */
+	union {
+		uint64_t csb_address; /* byte[8:15]  */
+		struct {
+			uint32_t reserved2;
+			union {
+				uint32_t crb_c;
+				/* c==0 no ccb defined */
+
+				uint32_t crb_at;
+				/* at==0 address type is ignored;
+				 * all addrs effective assumed.
+				 */
+
+			};
+		};
+	};
+	struct nx_dde_t source_dde;           /* byte[16:31] */
+	struct nx_dde_t target_dde;           /* byte[32:47] */
+	struct nx_ccb_t ccb;                  /* byte[48:63] */
+	union {
+		union nx_qw_t reserved64[3];     /* byte[64:96] */
+	};
+	struct nx_csb_t csb;
+} __aligned(128);
+
+/* 842 CRB */
+
+#define EFT_FC_MASK                 size_mask(3)
+#define EFT_FC_OFFSET               31
+#define EFT_FC_COMPRESS             0x0
+#define EFT_FC_COMPRESS_WITH_CRC    0x1
+#define EFT_FC_DECOMPRESS           0x2
+#define EFT_FC_DECOMPRESS_WITH_CRC  0x3
+#define EFT_FC_BLK_DATA_MOVE        0x4
+#endif /* NX_842 */
+
+#endif /* _NXU_H */
-- 
2.21.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase
  2020-03-27 18:16 [PATCH V2 0/5] selftests/powerpc: Add NX-GZIP engine testcase Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 1/5] selftests/powerpc: Add header files for GZIP engine test Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 2/5] selftests/powerpc: Add header files for NX compresion/decompression Raphael Moreira Zinsly
@ 2020-03-27 18:16 ` Raphael Moreira Zinsly
  2020-04-02  6:17   ` Daniel Axtens
  2020-03-27 18:16 ` [PATCH V2 4/5] selftests/powerpc: Add NX-GZIP engine decompress testcase Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 5/5] selftests/powerpc: Add README for GZIP engine tests Raphael Moreira Zinsly
  4 siblings, 1 reply; 9+ messages in thread
From: Raphael Moreira Zinsly @ 2020-03-27 18:16 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto
  Cc: herbert, mpe, haren, abali, dja, Raphael Moreira Zinsly

Add a compression testcase for the powerpc NX-GZIP engine.

Signed-off-by: Bulent Abali <abali@us.ibm.com>
Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
---
 .../selftests/powerpc/nx-gzip/Makefile        |  21 +
 .../selftests/powerpc/nx-gzip/gzfht_test.c    | 489 ++++++++++++++++++
 .../selftests/powerpc/nx-gzip/gzip_vas.c      | 259 ++++++++++
 3 files changed, 769 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/Makefile
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c

diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
new file mode 100644
index 000000000000..ab903f63bbbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -0,0 +1,21 @@
+CC = gcc
+CFLAGS = -O3
+INC = ./inc
+SRC = gzfht_test.c
+OBJ = $(SRC:.c=.o)
+TESTS = gzfht_test
+EXTRA_SOURCES = gzip_vas.c
+
+all:	$(TESTS)
+
+$(OBJ): %.o: %.c
+	$(CC) $(CFLAGS) -I$(INC) -c $<
+
+$(TESTS): $(OBJ)
+	$(CC) $(CFLAGS) -I$(INC) -o $@ $@.o $(EXTRA_SOURCES)
+
+run_tests: $(TESTS)
+	./gzfht_test gzip_vas.c
+
+clean:
+	rm -f $(TESTS) *.o *~ *.gz
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
new file mode 100644
index 000000000000..7a21c25f5611
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
+ * Not intended for productive uses or for performance or compression
+ * ratio measurements.  For simplicity of demonstration, this sample
+ * code compresses in to fixed Huffman blocks only (Deflate btype=1)
+ * and has very simple memory management.  Dynamic Huffman blocks
+ * (Deflate btype=2) are more involved as detailed in the user guide.
+ * Note also that /dev/crypto/gzip, VAS and skiboot support are
+ * required.
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+void *nx_fault_storage_address;
+
+#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define FNAME_MAX 1024
+#define FEXT ".nx.gz"
+
+/*
+ * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
+ */
+static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
+				uint32_t dstlen, int with_count,
+				struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int cc;
+	uint32_t fc;
+
+	assert(!!cmdp);
+
+	put32(cmdp->crb, gzip_fc, 0);  /* clear */
+	fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
+			    GZIP_FC_COMPRESS_RESUME_FHT;
+	putnn(cmdp->crb, gzip_fc, fc);
+	putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
+	memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+	/* Section 6.6 programming notes; spbc may be in two different
+	 * places depending on FC.
+	 */
+	if (!with_count)
+		put32(cmdp->cpb, out_spbc_comp, 0);
+	else
+		put32(cmdp->cpb, out_spbc_comp_with_count, 0);
+
+	/* Figure 6-3 6-4; CSB location */
+	put64(cmdp->crb, csb_address, 0);
+	put64(cmdp->crb, csb_address,
+	      (uint64_t) &cmdp->crb.csb & csb_address_mask);
+
+	/* Source direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.source_dde);
+	putnn(cmdp->crb.source_dde, dde_count, 0);
+	put32(cmdp->crb.source_dde, ddebc, srclen);
+	put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
+
+	/* Target direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.target_dde);
+	putnn(cmdp->crb.target_dde, dde_count, 0);
+	put32(cmdp->crb.target_dde, ddebc, dstlen);
+	put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
+
+	/* Submit the crb, the job descriptor, to the accelerator */
+	nxu_run_job(cmdp, handle);
+
+	/* Poll for the csb.v bit; you should also consider sleeping
+	 * or interrupts.
+	 */
+	do { ; } while (getnn(cmdp->crb.csb, csb_v) == 0);
+
+	/* CC Table 6-8 */
+	cc = getnn(cmdp->crb.csb, csb_cc);
+
+	return cc;
+}
+
+/*
+ * Prepares a blank no filename no timestamp gzip header and returns
+ * the number of bytes written to buf.
+ * Gzip specification at https://tools.ietf.org/html/rfc1952
+ */
+int gzip_header_blank(char *buf)
+{
+	int i = 0;
+
+	buf[i++] = 0x1f; /* ID1 */
+	buf[i++] = 0x8b; /* ID2 */
+	buf[i++] = 0x08; /* CM  */
+	buf[i++] = 0x00; /* FLG */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x04; /* XFL 4=fastest */
+	buf[i++] = 0x03; /* OS UNIX */
+
+	return i;
+}
+
+/* Caller must free the allocated buffer return nonzero on error. */
+int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
+{
+	struct stat statbuf;
+	FILE *fp;
+	char *p;
+	size_t num_bytes;
+
+	if (stat(fname, &statbuf)) {
+		perror(fname);
+		return(-1);
+	}
+	fp = fopen(fname, "r");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	assert(NULL != (p = (char *) malloc(statbuf.st_size)));
+	num_bytes = fread(p, 1, statbuf.st_size, fp);
+	if (ferror(fp) || (num_bytes != statbuf.st_size)) {
+		perror(fname);
+		return(-1);
+	}
+	*buf = p;
+	*bufsize = num_bytes;
+	return 0;
+}
+
+/* Returns nonzero on error */
+int write_output_file(char *fname, char *buf, size_t bufsize)
+{
+	FILE *fp;
+	size_t num_bytes;
+
+	fp = fopen(fname, "w");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	num_bytes = fwrite(buf, 1, bufsize, fp);
+	if (ferror(fp) || (num_bytes != bufsize)) {
+		perror(fname);
+		return(-1);
+	}
+	fclose(fp);
+	return 0;
+}
+
+/*
+ * Z_SYNC_FLUSH as described in zlib.h.
+ * Returns number of appended bytes
+ */
+int append_sync_flush(char *buf, int tebc, int final)
+{
+	uint64_t flush;
+	int shift = (tebc & 0x7);
+
+	if (tebc > 0) {
+		/* Last byte is partially full */
+		buf = buf - 1;
+		*buf = *buf & (unsigned char) ((1<<tebc)-1);
+	} else
+		*buf = 0;
+	flush = ((0x1ULL & final) << shift) | *buf;
+	shift = shift + 3; /* BFINAL and BTYPE written */
+	shift = (shift <= 8) ? 8 : 16;
+	flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
+	shift = shift + 32;
+	while (shift > 0) {
+		*buf++ = (unsigned char) (flush & 0xffULL);
+		flush = flush >> 8;
+		shift = shift - 8;
+	}
+	return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
+}
+
+/*
+ * Fault in pages prior to NX job submission. wr=1 may be required to
+ * touch writeable pages.  System zero pages do not fault-in the page as
+ * intended.  Typically set wr=1 for NX target pages and set wr=0 for NX
+ * source pages.
+ */
+static int nx_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+	char *begin = buf;
+	char *end = (char *) buf + buf_len - 1;
+	char t;
+
+	assert(buf_len >= 0 && !!buf);
+
+	NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+			(buf + buf_len), buf_len, wr));
+
+	if (buf_len <= 0 || buf == NULL)
+		return -1;
+
+	do {
+		t = *begin;
+		if (wr)
+			*begin = t;
+		begin = begin + page_len;
+	} while (begin < end);
+
+	/* When buf_sz is small or buf tail is in another page */
+	t = *end;
+	if (wr)
+		*end = t;
+
+	return 0;
+}
+
+/*
+ * Final deflate block bit.  This call assumes the block
+ * beginning is byte aligned.
+ */
+static void set_bfinal(void *buf, int bfinal)
+{
+	char *b = buf;
+
+	if (bfinal)
+		*b = *b | (unsigned char) 0x01;
+	else
+		*b = *b & (unsigned char) 0xfe;
+}
+
+int compress_file(int argc, char **argv, void *handle)
+{
+	char *inbuf, *outbuf, *srcbuf, *dstbuf;
+	char outname[FNAME_MAX];
+	uint32_t srclen, dstlen;
+	uint32_t flushlen, chunk;
+	size_t inlen, outlen, dsttotlen, srctotlen;
+	uint32_t crc, spbc, tpbc, tebc;
+	int lzcounts = 0;
+	int cc;
+	int num_hdr_bytes;
+	struct nx_gzip_crb_cpb_t *cmdp;
+	uint32_t pagelen = 65536;
+	int fault_tries = 50;
+
+	cmdp = (void *)(uintptr_t)
+		aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
+			      sizeof(struct nx_gzip_crb_cpb_t));
+
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s <fname>\n", argv[0]);
+		exit(-1);
+	}
+	if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+		exit(-1);
+	fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
+
+	/* Generous output buffer for header/trailer */
+	outlen = 2 * inlen + 1024;
+
+	assert(NULL != (outbuf = (char *)malloc(outlen)));
+	nx_touch_pages(outbuf, outlen, pagelen, 1);
+
+	/* Compress piecemeal in smallish chunks */
+	chunk = 1<<22;
+
+	/* Write the gzip header to the stream */
+	num_hdr_bytes = gzip_header_blank(outbuf);
+	dstbuf    = outbuf + num_hdr_bytes;
+	outlen    = outlen - num_hdr_bytes;
+	dsttotlen = num_hdr_bytes;
+
+	srcbuf    = inbuf;
+	srctotlen = 0;
+
+	/* Init the CRB, the coprocessor request block */
+	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+	/* Initial gzip crc32 */
+	put32(cmdp->cpb, in_crc, 0);
+
+	fault_tries = 50;
+
+	while (inlen > 0) {
+
+		/* Submit chunk size source data per job */
+		srclen = NX_MIN(chunk, inlen);
+		/* Supply large target in case data expands */
+		dstlen = NX_MIN(2*srclen, outlen);
+
+		/* Page faults are handled by the user code */
+
+		/* Fault-in pages; an improved code wouldn't touch so
+		 * many pages but would try to estimate the
+		 * compression ratio and adjust both the src and dst
+		 * touch amounts.
+		 */
+		nx_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
+				1);
+		nx_touch_pages(srcbuf, srclen, pagelen, 0);
+		nx_touch_pages(dstbuf, dstlen, pagelen, 1);
+
+		cc = compress_fht_sample(
+			srcbuf, srclen,
+			dstbuf, dstlen,
+			lzcounts, cmdp, handle);
+
+		if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
+		    cc != ERR_NX_TRANSLATION) {
+			fprintf(stderr, "nx error: cc= %d\n", cc);
+			exit(-1);
+		}
+
+		/* Page faults are handled by the user code */
+		if (cc == ERR_NX_TRANSLATION) {
+			NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
+			NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
+				  fault_tries,
+				  (unsigned long long) cmdp->crb.csb.fsaddr));
+
+			fault_tries--;
+			if (fault_tries > 0) {
+				continue;
+			} else {
+				fprintf(stderr, "error: cannot progress; ");
+				fprintf(stderr, "too many faults\n");
+				exit(-1);
+			};
+		}
+
+		fault_tries = 50; /* Reset for the next chunk */
+
+		inlen     = inlen - srclen;
+		srcbuf    = srcbuf + srclen;
+		srctotlen = srctotlen + srclen;
+
+		/* Two possible locations for spbc depending on the function
+		 * code.
+		 */
+		spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
+			get32(cmdp->cpb, out_spbc_comp_with_count);
+		assert(spbc == srclen);
+
+		/* Target byte count */
+		tpbc = get32(cmdp->crb.csb, tpbc);
+		/* Target ending bit count */
+		tebc = getnn(cmdp->cpb, out_tebc);
+		NXPRT(fprintf(stderr, "compressed chunk %d " spbc));
+		NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
+
+		if (inlen > 0) { /* More chunks to go */
+			set_bfinal(dstbuf, 0);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+			/* Round up to the next byte with a flush
+			 * block; do not set the BFINAqL bit.
+			 */
+			flushlen  = append_sync_flush(dstbuf, tebc, 0);
+			dsttotlen = dsttotlen + flushlen;
+			outlen    = outlen - flushlen;
+			dstbuf    = dstbuf + flushlen;
+			NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
+					flushlen));
+		} else {  /* Done */
+			/* Set the BFINAL bit of the last block per Deflate
+			 * specification.
+			 */
+			set_bfinal(dstbuf, 1);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+		}
+
+		/* Resuming crc32 for the next chunk */
+		crc = get32(cmdp->cpb, out_crc);
+		put32(cmdp->cpb, in_crc, crc);
+		crc = be32toh(crc);
+	}
+
+	/* Append crc32 and ISIZE to the end */
+	memcpy(dstbuf, &crc, 4);
+	memcpy(dstbuf+4, &srctotlen, 4);
+	dsttotlen = dsttotlen + 8;
+	outlen    = outlen - 8;
+
+	assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
+	strcpy(outname, argv[1]);
+	strcat(outname, FEXT);
+	if (write_output_file(outname, outbuf, dsttotlen)) {
+		fprintf(stderr, "write error: %s\n", outname);
+		exit(-1);
+	}
+
+	fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
+		dsttotlen);
+	fprintf(stderr, "crc32 checksum = %08x\n", crc);
+
+	if (inbuf != NULL)
+		free(inbuf);
+
+	if (outbuf != NULL)
+		free(outbuf);
+
+	return 0;
+}
+
+void sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+	fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+		sig, info->si_code, info->si_addr);
+
+	nx_fault_storage_address = info->si_addr;
+}
+
+int main(int argc, char **argv)
+{
+	int rc;
+	struct sigaction act;
+	void *handle;
+
+	nx_dbg = 0;
+	nx_gzip_log = NULL;
+	act.sa_handler = 0;
+	act.sa_sigaction = sigsegv_handler;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGSEGV, &act, NULL);
+
+	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+	if (!handle) {
+		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+		exit(-1);
+	}
+
+	rc = compress_file(argc, argv, handle);
+
+	nx_function_end(handle);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
new file mode 100644
index 000000000000..d28e1efb527b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nx-gzip.h"
+#include "nx.h"
+#include "copy-paste.h"
+#include "nxu.h"
+#include "nx_dbg.h"
+#include <sys/platform/ppc.h>
+
+#define barrier()
+#define hwsync()    ({ asm volatile("hwsync" ::: "memory"); })
+
+#ifndef NX_NO_CPU_PRI
+#define cpu_pri_default()  ({ asm volatile ("or 2, 2, 2"); })
+#define cpu_pri_low()      ({ asm volatile ("or 31, 31, 31"); })
+#else
+#define cpu_pri_default()
+#define cpu_pri_low()
+#endif
+
+void *nx_fault_storage_address;
+
+struct nx_handle {
+	int fd;
+	int function;
+	void *paste_addr;
+};
+
+static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
+{
+	int rc, fd;
+	void *addr;
+	struct vas_gzip_setup_attr txattr;
+
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, " open device name %s\n", devname);
+		return -errno;
+	}
+
+	memset(&txattr, 0, sizeof(txattr));
+	txattr.version = 1;
+	txattr.vas_id = pri;
+	rc = ioctl(fd, VAS_GZIP_TX_WIN_OPEN, (unsigned long)&txattr);
+	if (rc < 0) {
+		fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
+		rc = -errno;
+		goto out;
+	}
+
+	addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
+	if (addr == MAP_FAILED) {
+		fprintf(stderr, "mmap() failed, errno %d\n", errno);
+		rc = -errno;
+		goto out;
+	}
+	handle->fd = fd;
+	handle->paste_addr = (void *)((char *)addr + 0x400);
+
+	rc = 0;
+out:
+	close(fd);
+	return rc;
+}
+
+void *nx_function_begin(int function, int pri)
+{
+	int rc;
+	char *devname = "/dev/crypto/nx-gzip";
+	struct nx_handle *nxhandle;
+
+	if (function != NX_FUNC_COMP_GZIP) {
+		errno = EINVAL;
+		fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
+		return NULL;
+	}
+
+
+	nxhandle = malloc(sizeof(*nxhandle));
+	if (!nxhandle) {
+		errno = ENOMEM;
+		fprintf(stderr, " No memory\n");
+		return NULL;
+	}
+
+	nxhandle->function = function;
+	rc = open_device_nodes(devname, pri, nxhandle);
+	if (rc < 0) {
+		errno = -rc;
+		fprintf(stderr, " open_device_nodes failed\n");
+		return NULL;
+	}
+
+	return nxhandle;
+}
+
+int nx_function_end(void *handle)
+{
+	int rc = 0;
+	struct nx_handle *nxhandle = handle;
+
+	rc = munmap(nxhandle->paste_addr - 0x400, 4096);
+	if (rc < 0) {
+		fprintf(stderr, "munmap() failed, errno %d\n", errno);
+		return rc;
+	}
+	close(nxhandle->fd);
+	free(nxhandle);
+
+	return rc;
+}
+
+static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
+{
+	long poll = 0;
+	uint64_t t;
+
+	/* Save power and let other threads use the h/w. top may show
+	 * 100% but only because OS doesn't know we slowed the this
+	 * h/w thread while polling. We're letting other threads have
+	 * higher throughput on the core.
+	 */
+	cpu_pri_low();
+
+#define CSB_MAX_POLL 200000000UL
+#define USLEEP_TH     300000UL
+
+	t = __ppc_get_timebase();
+
+	while (getnn(cmdp->crb.csb, csb_v) == 0) {
+		++poll;
+		hwsync();
+
+		cpu_pri_low();
+
+		/* usleep(0) takes around 29000 ticks ~60 us.
+		 * 300000 is spinning for about 600 us then
+		 * start sleeping.
+		 */
+		if ((__ppc_get_timebase() - t) > USLEEP_TH) {
+			cpu_pri_default();
+			usleep(1);
+		}
+
+		if (poll > CSB_MAX_POLL)
+			break;
+
+		/* Fault address from signal handler */
+		if (nx_fault_storage_address) {
+			cpu_pri_default();
+			return -EAGAIN;
+		}
+
+	}
+
+	cpu_pri_default();
+
+	/* hw has updated csb and output buffer */
+	hwsync();
+
+	/* Check CSB flags. */
+	if (getnn(cmdp->crb.csb, csb_v) == 0) {
+		fprintf(stderr, "CSB still not valid after %d polls.\n",
+			(int) poll);
+		prt_err("CSB still not valid after %d polls, giving up.\n",
+			(int) poll);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int i, ret, retries;
+	struct nx_handle *nxhandle = handle;
+
+	assert(handle != NULL);
+	i = 0;
+	retries = 5000;
+	while (i++ < retries) {
+		hwsync();
+		vas_copy(&cmdp->crb, 0);
+		ret = vas_paste(nxhandle->paste_addr, 0);
+		hwsync();
+
+		NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
+				i, retries, ret));
+
+		if ((ret == 2) || (ret == 3)) {
+
+			ret = nx_wait_for_csb(cmdp);
+			if (!ret) {
+				goto out;
+			} else if (ret == -EAGAIN) {
+				long x;
+
+				prt_err("Touching address %p, 0x%lx\n",
+					 nx_fault_storage_address,
+					 *(long *) nx_fault_storage_address);
+				x = *(long *) nx_fault_storage_address;
+				*(long *) nx_fault_storage_address = x;
+				nx_fault_storage_address = 0;
+				continue;
+			} else {
+				prt_err("wait_for_csb() returns %d\n", ret);
+				break;
+			}
+		} else {
+			if (i < 10) {
+				/* spin for few ticks */
+#define SPIN_TH 500UL
+				uint64_t fail_spin;
+
+				fail_spin = __ppc_get_timebase();
+				while ((__ppc_get_timebase() - fail_spin) <
+					 SPIN_TH)
+					;
+			} else {
+				/* sleep */
+				unsigned int pr = 0;
+
+				if (pr++ % 100 == 0) {
+					prt_err("Paste attempt %d/", i);
+					prt_err("%d, failed pid= %d\n", retries,
+						getpid());
+				}
+				usleep(1);
+			}
+			continue;
+		}
+	}
+
+out:
+	cpu_pri_default();
+
+	return ret;
+}
-- 
2.21.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH V2 4/5] selftests/powerpc: Add NX-GZIP engine decompress testcase
  2020-03-27 18:16 [PATCH V2 0/5] selftests/powerpc: Add NX-GZIP engine testcase Raphael Moreira Zinsly
                   ` (2 preceding siblings ...)
  2020-03-27 18:16 ` [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase Raphael Moreira Zinsly
@ 2020-03-27 18:16 ` Raphael Moreira Zinsly
  2020-03-27 18:16 ` [PATCH V2 5/5] selftests/powerpc: Add README for GZIP engine tests Raphael Moreira Zinsly
  4 siblings, 0 replies; 9+ messages in thread
From: Raphael Moreira Zinsly @ 2020-03-27 18:16 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto
  Cc: herbert, mpe, haren, abali, dja, Raphael Moreira Zinsly

Include a decompression testcase for the powerpc NX-GZIP
engine.

Signed-off-by: Bulent Abali <abali@us.ibm.com>
Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
---
 .../selftests/powerpc/nx-gzip/Makefile        |    7 +-
 .../selftests/powerpc/nx-gzip/gunz_test.c     | 1078 +++++++++++++++++
 2 files changed, 1082 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gunz_test.c

diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
index ab903f63bbbd..82abc19a49a0 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/Makefile
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -1,9 +1,9 @@
 CC = gcc
 CFLAGS = -O3
 INC = ./inc
-SRC = gzfht_test.c
+SRC = gzfht_test.c gunz_test.c
 OBJ = $(SRC:.c=.o)
-TESTS = gzfht_test
+TESTS = gzfht_test gunz_test
 EXTRA_SOURCES = gzip_vas.c
 
 all:	$(TESTS)
@@ -16,6 +16,7 @@ $(TESTS): $(OBJ)
 
 run_tests: $(TESTS)
 	./gzfht_test gzip_vas.c
+	./gunz_test gzip_vas.c.nx.gz
 
 clean:
-	rm -f $(TESTS) *.o *~ *.gz
+	rm -f $(TESTS) *.o *~ *.gz *.gunzip
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
new file mode 100644
index 000000000000..82eb268a8397
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
@@ -0,0 +1,1078 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gunzip sample code for demonstrating the P9 NX hardware
+ * interface.  Not intended for productive uses or for performance or
+ * compression ratio measurements.  Note also that /dev/crypto/gzip,
+ * VAS and skiboot support are required
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ * Definitions of acronyms used here.  See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+#include "crb.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
+#define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
+
+#define GETINPC(X) fgetc(X)
+#define FNAME_MAX 1024
+
+/* fifo queue management */
+#define fifo_used_bytes(used) (used)
+#define fifo_free_bytes(used, len) ((len)-(used))
+/* amount of free bytes in the first and last parts */
+#define fifo_free_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
+						  ? (len)-((cur)+(used)) : 0)
+#define fifo_free_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
+						  ? (cur) : (len)-(used))
+/* amount of used bytes in the first and last parts */
+#define fifo_used_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
+						  ? (used) : (len)-(cur))
+#define fifo_used_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
+						  ? 0 : ((used)+(cur))-(len))
+/* first and last free parts start here */
+#define fifo_free_first_offset(cur, used)      ((cur)+(used))
+#define fifo_free_last_offset(cur, used, len)  \
+					   fifo_used_last_bytes(cur, used, len)
+/* first and last used parts start here */
+#define fifo_used_first_offset(cur)            (cur)
+#define fifo_used_last_offset(cur)             (0)
+
+const int fifo_in_len = 1<<24;
+const int fifo_out_len = 1<<24;
+const int page_sz = 1<<16;
+const int line_sz = 1<<7;
+const int window_max = 1<<15;
+const int retry_max = 50;
+
+void *nx_fault_storage_address;
+
+/*
+ * Fault in pages prior to NX job submission.  wr=1 may be required to
+ * touch writeable pages.  System zero pages do not fault-in the page as
+ * intended.  Typically set wr=1 for NX target pages and set wr=0 for
+ * NX source pages.
+ */
+static int nx_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+	char *begin = buf;
+	char *end = (char *) buf + buf_len - 1;
+	char t;
+
+	assert(buf_len >= 0 && !!buf);
+
+	NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+			buf + buf_len, buf_len, wr));
+
+	if (buf_len <= 0 || buf == NULL)
+		return -1;
+
+	do {
+		t = *begin;
+		if (wr)
+			*begin = t;
+		begin = begin + page_len;
+	} while (begin < end);
+
+	/* When buf_sz is small or buf tail is in another page. */
+	t = *end;
+	if (wr)
+		*end = t;
+
+	return 0;
+}
+
+void sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+	fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+	       sig, info->si_code, info->si_addr);
+
+	nx_fault_storage_address = info->si_addr;
+}
+
+/*
+ * Adds an (address, len) pair to the list of ddes (ddl) and updates
+ * the base dde.  ddl[0] is the only dde in a direct dde which
+ * contains a single (addr,len) pair.  For more pairs, ddl[0] becomes
+ * the indirect (base) dde that points to a list of direct ddes.
+ * See Section 6.4 of the NX-gzip user manual for DDE description.
+ * Addr=NULL, len=0 clears the ddl[0].  Returns the total number of
+ * bytes in ddl.  Caller is responsible for allocting the array of
+ * nx_dde_t *ddl.  If N addresses are required in the scatter-gather
+ * list, the ddl array must have N+1 entries minimum.
+ */
+static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
+					uint32_t len)
+{
+	uint32_t ddecnt;
+	uint32_t bytes;
+
+	if (addr == NULL && len == 0) {
+		clearp_dde(ddl);
+		return 0;
+	}
+
+	NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
+			__func__, len));
+
+	/* Number of ddes in the dde list ; == 0 when it is a direct dde */
+	ddecnt = getpnn(ddl, dde_count);
+	bytes = getp32(ddl, ddebc);
+
+	if (ddecnt == 0 && bytes == 0) {
+		/* First dde is unused; make it a direct dde */
+		bytes = len;
+		putp32(ddl, ddebc, bytes);
+		putp64(ddl, ddead, (uint64_t) addr);
+	} else if (ddecnt == 0) {
+		/* Converting direct to indirect dde
+		 * ddl[0] becomes head dde of ddl
+		 * copy direct to indirect first.
+		 */
+		ddl[1] = ddl[0];
+
+		/* Add the new dde next */
+		clear_dde(ddl[2]);
+		put32(ddl[2], ddebc, len);
+		put64(ddl[2], ddead, (uint64_t) addr);
+
+		/* Ddl head points to 2 direct ddes */
+		ddecnt = 2;
+		putpnn(ddl, dde_count, ddecnt);
+		bytes = bytes + len;
+		putp32(ddl, ddebc, bytes);
+		/* Pointer to the first direct dde */
+		putp64(ddl, ddead, (uint64_t) &ddl[1]);
+	} else {
+		/* Append a dde to an existing indirect ddl */
+		++ddecnt;
+		clear_dde(ddl[ddecnt]);
+		put64(ddl[ddecnt], ddead, (uint64_t) addr);
+		put32(ddl[ddecnt], ddebc, len);
+
+		putpnn(ddl, dde_count, ddecnt);
+		bytes = bytes + len;
+		putp32(ddl, ddebc, bytes); /* byte sum of all dde */
+	}
+	return bytes;
+}
+
+/*
+ * Touch specified number of pages represented in number bytes
+ * beginning from the first buffer in a dde list.
+ * Do not touch the pages past buf_sz-th byte's page.
+ *
+ * Set buf_sz = 0 to touch all pages described by the ddep.
+ */
+static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
+				int wr)
+{
+	uint32_t indirect_count;
+	uint32_t buf_len;
+	long total;
+	uint64_t buf_addr;
+	struct nx_dde_t *dde_list;
+	int i;
+
+	assert(!!ddep);
+
+	indirect_count = getpnn(ddep, dde_count);
+
+	NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
+			indirect_count));
+	NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
+
+	if (indirect_count == 0) {
+		/* Direct dde */
+		buf_len = getp32(ddep, ddebc);
+		buf_addr = getp64(ddep, ddead);
+
+		NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
+				buf_len, (void *)buf_addr));
+
+		if (buf_sz == 0)
+			nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+		else
+			nx_touch_pages((void *)buf_addr, NX_MIN(buf_len,
+					buf_sz), page_sz, wr);
+
+		return ERR_NX_OK;
+	}
+
+	/* Indirect dde */
+	if (indirect_count > MAX_DDE_COUNT)
+		return ERR_NX_EXCESSIVE_DDE;
+
+	/* First address of the list */
+	dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
+
+	if (buf_sz == 0)
+		buf_sz = getp32(ddep, ddebc);
+
+	total = 0;
+	for (i = 0; i < indirect_count; i++) {
+		buf_len = get32(dde_list[i], ddebc);
+		buf_addr = get64(dde_list[i], ddead);
+		total += buf_len;
+
+		NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
+				buf_len, (void *)buf_addr));
+		NXPRT(fprintf(stderr, "0x%lx\n", total));
+
+		/* Touching fewer pages than encoded in the ddebc */
+		if (total > buf_sz) {
+			buf_len = NX_MIN(buf_len, total - buf_sz);
+			nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+			NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
+				      buf_len));
+			NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
+			break;
+		}
+		nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+	}
+	return ERR_NX_OK;
+}
+
+/*
+ * Src and dst buffers are supplied in scatter gather lists.
+ * NX function code and other parameters supplied in cmdp.
+ */
+static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
+			 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int cc;
+	uint64_t csbaddr;
+
+	memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+	cmdp->crb.source_dde = *src;
+	cmdp->crb.target_dde = *dst;
+
+	/* Status, output byte count in tpbc */
+	csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
+	put64(cmdp->crb, csb_address, csbaddr);
+
+	/* NX reports input bytes in spbc; cleared */
+	cmdp->cpb.out_spbc_comp_wrap = 0;
+	cmdp->cpb.out_spbc_comp_with_count = 0;
+	cmdp->cpb.out_spbc_decomp = 0;
+
+	/* Clear output */
+	put32(cmdp->cpb, out_crc, INIT_CRC);
+	put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+	cc = nxu_run_job(cmdp, handle);
+
+	if (!cc)
+		cc = getnn(cmdp->crb.csb, csb_cc);	/* CC Table 6-8 */
+
+	return cc;
+}
+
+int decompress_file(int argc, char **argv, void *devhandle)
+{
+	FILE *inpf;
+	FILE *outf;
+
+	int c, expect, i, cc, rc = 0;
+	char gzfname[FNAME_MAX];
+
+	/* Queuing, file ops, byte counting */
+	char *fifo_in, *fifo_out;
+	int used_in, cur_in, used_out, cur_out, read_sz, n;
+	int first_free, last_free, first_used, last_used;
+	int first_offset, last_offset;
+	int write_sz, free_space, source_sz;
+	int source_sz_estimate, target_sz_estimate;
+	uint64_t last_comp_ratio; /* 1000 max */
+	uint64_t total_out;
+	int is_final, is_eof;
+
+	/* nx hardware */
+	int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
+	int history_len = 0;
+	struct nx_gzip_crb_cpb_t cmd, *cmdp;
+	struct nx_dde_t *ddl_in;
+	struct nx_dde_t dde_in[6] __aligned(128);
+	struct nx_dde_t *ddl_out;
+	struct nx_dde_t dde_out[6] __aligned(128);
+	int pgfault_retries;
+
+	/* when using mmap'ed files */
+	off_t input_file_offset;
+
+	if (argc > 2) {
+		fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
+		fprintf(stderr, "    writes to stdout or <fname>.nx.gunzip\n");
+		return -1;
+	}
+
+	if (argc == 1) {
+		inpf = stdin;
+		outf = stdout;
+	} else if (argc == 2) {
+		char w[1024];
+		char *wp;
+
+		inpf = fopen(argv[1], "r");
+		if (inpf == NULL) {
+			perror(argv[1]);
+			return -1;
+		}
+
+		/* Make a new file name to write to.  Ignoring '.gz' */
+		wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
+		strcpy(w, wp);
+		strcat(w, ".nx.gunzip");
+
+		outf = fopen(w, "w");
+		if (outf == NULL) {
+			perror(w);
+			return -1;
+		}
+	}
+
+	/* Decode the gzip header */
+	c = GETINPC(inpf); expect = 0x1f; /* ID1 */
+	if (c != expect)
+		goto err1;
+
+	c = GETINPC(inpf); expect = 0x8b; /* ID2 */
+	if (c != expect)
+		goto err1;
+
+	c = GETINPC(inpf); expect = 0x08; /* CM */
+	if (c != expect)
+		goto err1;
+
+	int flg = GETINPC(inpf); /* FLG */
+
+	if (flg & 0xE0 || flg & 0x4 || flg == EOF)
+		goto err2;
+
+	fprintf(stderr, "gzHeader FLG %x\n", flg);
+
+	/* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
+	 * sample code.
+	 */
+	for (i = 0; i < 6; i++) {
+		char tmp[10];
+
+		tmp[i] = GETINPC(inpf);
+		if (tmp[i] == EOF)
+			goto err3;
+		fprintf(stderr, "%02x ", tmp[i]);
+		if (i == 5)
+			fprintf(stderr, "\n");
+	}
+	fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
+
+	/* FNAME */
+	if (flg & 0x8) {
+		int k = 0;
+
+		do {
+			c = GETINPC(inpf);
+			if (c == EOF || k >= FNAME_MAX)
+				goto err3;
+			gzfname[k++] = c;
+		} while (c);
+		fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
+	}
+
+	/* FHCRC */
+	if (flg & 0x2) {
+		c = GETINPC(inpf);
+		if (c == EOF)
+			goto err3;
+		c = GETINPC(inpf);
+		if (c == EOF)
+			goto err3;
+		fprintf(stderr, "gzHeader FHCRC: ignored\n");
+	}
+
+	used_in = cur_in = used_out = cur_out = 0;
+	is_final = is_eof = 0;
+
+	/* Allocate one page larger to prevent page faults due to NX
+	 * overfetching.
+	 * Either do this (char*)(uintptr_t)aligned_alloc or use
+	 * -std=c11 flag to make the int-to-pointer warning go away.
+	 */
+	assert((fifo_in  = (char *)(uintptr_t)aligned_alloc(line_sz,
+				   fifo_in_len + page_sz)) != NULL);
+	assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
+				   fifo_out_len + page_sz + line_sz)) != NULL);
+	/* Leave unused space due to history rounding rules */
+	fifo_out = fifo_out + line_sz;
+	nx_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
+
+	ddl_in  = &dde_in[0];
+	ddl_out = &dde_out[0];
+	cmdp = &cmd;
+	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+read_state:
+
+	/* Read from .gz file */
+
+	NXPRT(fprintf(stderr, "read_state:\n"));
+
+	if (is_eof != 0)
+		goto write_state;
+
+	/* We read in to fifo_in in two steps: first: read in to from
+	 * cur_in to the end of the buffer.  last: if free space wrapped
+	 * around, read from fifo_in offset 0 to offset cur_in.
+	 */
+
+	/* Reset fifo head to reduce unnecessary wrap arounds */
+	cur_in = (used_in == 0) ? 0 : cur_in;
+
+	/* Free space total is reduced by a gap */
+	free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
+			    - line_sz);
+
+	/* Free space may wrap around as first and last */
+	first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
+	last_free  = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
+
+	/* Start offsets of the free memory */
+	first_offset = fifo_free_first_offset(cur_in, used_in);
+	last_offset  = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
+
+	/* Reduce read_sz because of the line_sz gap */
+	read_sz = NX_MIN(free_space, first_free);
+	n = 0;
+	if (read_sz > 0) {
+		/* Read in to offset cur_in + used_in */
+		n = fread(fifo_in + first_offset, 1, read_sz, inpf);
+		used_in = used_in + n;
+		free_space = free_space - n;
+		assert(n <= read_sz);
+		if (n != read_sz) {
+			/* Either EOF or error; exit the read loop */
+			is_eof = 1;
+			goto write_state;
+		}
+	}
+
+	/* If free space wrapped around */
+	if (last_free > 0) {
+		/* Reduce read_sz because of the line_sz gap */
+		read_sz = NX_MIN(free_space, last_free);
+		n = 0;
+		if (read_sz > 0) {
+			n = fread(fifo_in + last_offset, 1, read_sz, inpf);
+			used_in = used_in + n;       /* Increase used space */
+			free_space = free_space - n; /* Decrease free space */
+			assert(n <= read_sz);
+			if (n != read_sz) {
+				/* Either EOF or error; exit the read loop */
+				is_eof = 1;
+				goto write_state;
+			}
+		}
+	}
+
+	/* At this point we have used_in bytes in fifo_in with the
+	 * data head starting at cur_in and possibly wrapping around.
+	 */
+
+write_state:
+
+	/* Write decompressed data to output file */
+
+	NXPRT(fprintf(stderr, "write_state:\n"));
+
+	if (used_out == 0)
+		goto decomp_state;
+
+	/* If fifo_out has data waiting, write it out to the file to
+	 * make free target space for the accelerator used bytes in
+	 * the first and last parts of fifo_out.
+	 */
+
+	first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
+	last_used  = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
+
+	write_sz = first_used;
+
+	n = 0;
+	if (write_sz > 0) {
+		n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
+		used_out = used_out - n;
+		/* Move head of the fifo */
+		cur_out = (cur_out + n) % fifo_out_len;
+		assert(n <= write_sz);
+		if (n != write_sz) {
+			fprintf(stderr, "error: write\n");
+			rc = -1;
+			goto err5;
+		}
+	}
+
+	if (last_used > 0) { /* If more data available in the last part */
+		write_sz = last_used; /* Keep it here for later */
+		n = 0;
+		if (write_sz > 0) {
+			n = fwrite(fifo_out, 1, write_sz, outf);
+			used_out = used_out - n;
+			cur_out = (cur_out + n) % fifo_out_len;
+			assert(n <= write_sz);
+			if (n != write_sz) {
+				fprintf(stderr, "error: write\n");
+				rc = -1;
+				goto err5;
+			}
+		}
+	}
+
+decomp_state:
+
+	/* NX decompresses input data */
+
+	NXPRT(fprintf(stderr, "decomp_state:\n"));
+
+	if (is_final)
+		goto finish_state;
+
+	/* Address/len lists */
+	clearp_dde(ddl_in);
+	clearp_dde(ddl_out);
+
+	/* FC, CRC, HistLen, Table 6-6 */
+	if (resuming) {
+		/* Resuming a partially decompressed input.
+		 * The key to resume is supplying the 32KB
+		 * dictionary (history) to NX, which is basically
+		 * the last 32KB of output produced.
+		 */
+		fc = GZIP_FC_DECOMPRESS_RESUME;
+
+		cmdp->cpb.in_crc   = cmdp->cpb.out_crc;
+		cmdp->cpb.in_adler = cmdp->cpb.out_adler;
+
+		/* Round up the history size to quadword.  Section 2.10 */
+		history_len = (history_len + 15) / 16;
+		putnn(cmdp->cpb, in_histlen, history_len);
+		history_len = history_len * 16; /* bytes */
+
+		if (history_len > 0) {
+			/* Chain in the history buffer to the DDE list */
+			if (cur_out >= history_len) {
+				nx_append_dde(ddl_in, fifo_out
+					      + (cur_out - history_len),
+					      history_len);
+			} else {
+				nx_append_dde(ddl_in, fifo_out
+					      + ((fifo_out_len + cur_out)
+					      - history_len),
+					      history_len - cur_out);
+				/* Up to 32KB history wraps around fifo_out */
+				nx_append_dde(ddl_in, fifo_out, cur_out);
+			}
+
+		}
+	} else {
+		/* First decompress job */
+		fc = GZIP_FC_DECOMPRESS;
+
+		history_len = 0;
+		/* Writing 0 clears out subc as well */
+		cmdp->cpb.in_histlen = 0;
+		total_out = 0;
+
+		put32(cmdp->cpb, in_crc, INIT_CRC);
+		put32(cmdp->cpb, in_adler, INIT_ADLER);
+		put32(cmdp->cpb, out_crc, INIT_CRC);
+		put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+		/* Assuming 10% compression ratio initially; use the
+		 * most recently measured compression ratio as a
+		 * heuristic to estimate the input and output
+		 * sizes.  If we give too much input, the target buffer
+		 * overflows and NX cycles are wasted, and then we
+		 * must retry with smaller input size.  1000 is 100%.
+		 */
+		last_comp_ratio = 100UL;
+	}
+	cmdp->crb.gzip_fc = 0;
+	putnn(cmdp->crb, gzip_fc, fc);
+
+	/*
+	 * NX source buffers
+	 */
+	first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
+	last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
+
+	if (first_used > 0)
+		nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
+
+	if (last_used > 0)
+		nx_append_dde(ddl_in, fifo_in, last_used);
+
+	/*
+	 * NX target buffers
+	 */
+	first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
+	last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
+
+	/* Reduce output free space amount not to overwrite the history */
+	int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
+				- (1<<16));
+
+	NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
+		      target_max));
+
+	first_free = NX_MIN(target_max, first_free);
+	if (first_free > 0) {
+		first_offset = fifo_free_first_offset(cur_out, used_out);
+		nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
+	}
+
+	if (last_free > 0) {
+		last_free = NX_MIN(target_max - first_free, last_free);
+		if (last_free > 0) {
+			last_offset = fifo_free_last_offset(cur_out, used_out,
+							    fifo_out_len);
+			nx_append_dde(ddl_out, fifo_out + last_offset,
+				      last_free);
+		}
+	}
+
+	/* Target buffer size is used to limit the source data size
+	 * based on previous measurements of compression ratio.
+	 */
+
+	/* source_sz includes history */
+	source_sz = getp32(ddl_in, ddebc);
+	assert(source_sz > history_len);
+	source_sz = source_sz - history_len;
+
+	/* Estimating how much source is needed to 3/4 fill a
+	 * target_max size target buffer.  If we overshoot, then NX
+	 * must repeat the job with smaller input and we waste
+	 * bandwidth.  If we undershoot then we use more NX calls than
+	 * necessary.
+	 */
+
+	source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
+				/ 4000;
+
+	if (source_sz_estimate < source_sz) {
+		/* Target might be small, therefore limiting the
+		 * source data.
+		 */
+		source_sz = source_sz_estimate;
+		target_sz_estimate = target_max;
+	} else {
+		/* Source file might be small, therefore limiting target
+		 * touch pages to a smaller value to save processor cycles.
+		 */
+		target_sz_estimate = ((uint64_t)source_sz * 1000UL)
+					/ (last_comp_ratio + 1);
+		target_sz_estimate = NX_MIN(2 * target_sz_estimate,
+					    target_max);
+	}
+
+	source_sz = source_sz + history_len;
+
+	/* Some NX condition codes require submitting the NX job again.
+	 * Kernel doesn't handle NX page faults. Expects user code to
+	 * touch pages.
+	 */
+	pgfault_retries = retry_max;
+
+restart_nx:
+
+	putp32(ddl_in, ddebc, source_sz);
+
+	/* Fault in pages */
+	nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
+	nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
+
+	/* Send job to NX */
+	cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
+
+	switch (cc) {
+
+	case ERR_NX_TRANSLATION:
+
+		/* We touched the pages ahead of time.  In the most common case
+		 * we shouldn't be here.  But may be some pages were paged out.
+		 * Kernel should have placed the faulting address to fsaddr.
+		 */
+		NXPRT(fprintf(stderr, "ERR_NX_TRANSLATION %p\n",
+			      (void *)cmdp->crb.csb.fsaddr));
+
+		/* Touch 1 byte, read-only  */
+		nx_touch_pages((void *)cmdp->crb.csb.fsaddr, 1, page_sz, 0);
+
+		if (pgfault_retries == retry_max) {
+			/* Try once with exact number of pages */
+			--pgfault_retries;
+			goto restart_nx;
+		} else if (pgfault_retries > 0) {
+			/* If still faulting try fewer input pages
+			 * assuming memory outage
+			 */
+			if (source_sz > page_sz)
+				source_sz = NX_MAX(source_sz / 2, page_sz);
+			--pgfault_retries;
+			goto restart_nx;
+		} else {
+			fprintf(stderr, "cannot make progress; too many ");
+			fprintf(stderr, "page fault retries cc= %d\n", cc);
+			rc = -1;
+			goto err5;
+		}
+
+	case ERR_NX_DATA_LENGTH:
+
+		NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
+		NXPRT(fprintf(stderr, "stream may have trailing data\n"));
+
+		/* Not an error in the most common case; it just says
+		 * there is trailing data that we must examine.
+		 *
+		 * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
+		 * Fig.6-7 and Table 6-8.
+		 */
+		nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
+
+		if (!csb_ce_termination(nx_ce) &&
+		    csb_ce_partial_completion(nx_ce)) {
+			/* Check CPB for more information
+			 * spbc and tpbc are valid
+			 */
+			sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
+			subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
+			spbc = get32(cmdp->cpb, out_spbc_decomp);
+			tpbc = get32(cmdp->crb.csb, tpbc);
+			assert(target_max >= tpbc);
+
+			goto ok_cc3; /* not an error */
+		} else {
+			/* History length error when CE(1)=1 CE(0)=0. */
+			rc = -1;
+			fprintf(stderr, "history length error cc= %d\n", cc);
+			goto err5;
+		}
+
+	case ERR_NX_TARGET_SPACE:
+
+		/* Target buffer not large enough; retry smaller input
+		 * data; give at least 1 byte.  SPBC/TPBC are not valid.
+		 */
+		assert(source_sz > history_len);
+		source_sz = ((source_sz - history_len + 2) / 2) + history_len;
+		NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
+		NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
+			      source_sz, history_len));
+		goto restart_nx;
+
+	case ERR_NX_OK:
+
+		/* This should not happen for gzip formatted data;
+		 * we need trailing crc and isize
+		 */
+		fprintf(stderr, "ERR_NX_OK\n");
+		spbc = get32(cmdp->cpb, out_spbc_decomp);
+		tpbc = get32(cmdp->crb.csb, tpbc);
+		assert(target_max >= tpbc);
+		assert(spbc >= history_len);
+		source_sz = spbc - history_len;
+		goto offsets_state;
+
+	default:
+		fprintf(stderr, "error: cc= %d\n", cc);
+		rc = -1;
+		goto err5;
+	}
+
+ok_cc3:
+
+	NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
+
+	assert(spbc > history_len);
+	source_sz = spbc - history_len;
+
+	/* Table 6-4: Source Final Block Type (SFBT) describes the
+	 * last processed deflate block and clues the software how to
+	 * resume the next job.  SUBC indicates how many input bits NX
+	 * consumed but did not process.  SPBC indicates how many
+	 * bytes of source were given to the accelerator including
+	 * history bytes.
+	 */
+
+	switch (sfbt) {
+		int dhtlen;
+
+	case 0x0: /* Deflate final EOB received */
+
+		/* Calculating the checksum start position. */
+
+		source_sz = source_sz - subc / 8;
+		is_final = 1;
+		break;
+
+		/* Resume decompression cases are below. Basically
+		 * indicates where NX has suspended and how to resume
+		 * the input stream.
+		 */
+
+	case 0x8: /* Within a literal block; use rembytecount */
+	case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
+
+		/* Supply the partially processed source byte again */
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* SUBC LS 3bits: number of bits in the first source byte need
+		 * to be processed.
+		 * 000 means all 8 bits;  Table 6-3
+		 * Clear subc, histlen, sfbt, rembytecnt, dhtlen
+		 */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+		putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
+						      out_rembytecnt));
+		break;
+
+	case 0xA: /* Within a FH block; */
+	case 0xB: /* Within a FH block; bfinal=1 */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+		break;
+
+	case 0xC: /* Within a DH block; */
+	case 0xD: /* Within a DH block; bfinal=1 */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+
+		dhtlen = getnn(cmdp->cpb, out_dhtlen);
+		putnn(cmdp->cpb, in_dhtlen, dhtlen);
+		assert(dhtlen >= 42);
+
+		/* Round up to a qword */
+		dhtlen = (dhtlen + 127) / 128;
+
+		while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
+			--dhtlen;
+			cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
+		}
+		break;
+
+	case 0xE: /* Within a block header; bfinal=0; */
+		     /* Also given if source data exactly ends (SUBC=0) with
+		      * EOB code with BFINAL=0.  Means the next byte will
+		      * contain a block header.
+		      */
+	case 0xF: /* within a block header with BFINAL=1. */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+
+		/* Engine did not process any data */
+		if (is_eof && (source_sz == 0))
+			is_final = 1;
+	}
+
+offsets_state:
+
+	/* Adjust the source and target buffer offsets and lengths  */
+
+	NXPRT(fprintf(stderr, "offsets_state:\n"));
+
+	/* Delete input data from fifo_in */
+	used_in = used_in - source_sz;
+	cur_in = (cur_in + source_sz) % fifo_in_len;
+	input_file_offset = input_file_offset + source_sz;
+
+	/* Add output data to fifo_out */
+	used_out = used_out + tpbc;
+
+	assert(used_out <= fifo_out_len);
+
+	total_out = total_out + tpbc;
+
+	/* Deflate history is 32KB max.  No need to supply more
+	 * than 32KB on a resume.
+	 */
+	history_len = (total_out > window_max) ? window_max : total_out;
+
+	/* To estimate expected expansion in the next NX job; 500 means 50%.
+	 * Deflate best case is around 1 to 1000.
+	 */
+	last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
+			  / ((uint64_t)tpbc + 1);
+	last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
+	NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
+		      last_comp_ratio, source_sz, spbc, tpbc));
+
+	resuming = 1;
+
+finish_state:
+
+	NXPRT(fprintf(stderr, "finish_state:\n"));
+
+	if (is_final) {
+		if (used_out)
+			goto write_state; /* More data to write out */
+		else if (used_in < 8) {
+			/* Need at least 8 more bytes containing gzip crc
+			 * and isize.
+			 */
+			rc = -1;
+			goto err4;
+		} else {
+			/* Compare checksums and exit */
+			int i;
+			unsigned char tail[8];
+			uint32_t cksum, isize;
+
+			for (i = 0; i < 8; i++)
+				tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
+			fprintf(stderr, "computed checksum %08x isize %08x\n",
+				cmdp->cpb.out_crc, (uint32_t) (total_out
+				% (1ULL<<32)));
+			cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
+				 | (uint32_t) tail[2]<<16
+				 | (uint32_t) tail[3]<<24);
+			isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
+				 | (uint32_t) tail[6]<<16
+				 | (uint32_t) tail[7]<<24);
+			fprintf(stderr, "stored   checksum %08x isize %08x\n",
+				cksum, isize);
+
+			if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
+			    (total_out % (1ULL<<32))) {
+				rc = 0;	goto ok1;
+			} else {
+				rc = -1; goto err4;
+			}
+		}
+	} else
+		goto read_state;
+
+	return -1;
+
+err1:
+	fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
+		expect, c);
+	return -1;
+
+err2:
+	fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
+	return -1;
+
+err3:
+	fprintf(stderr, "error: gzip header\n");
+	return -1;
+
+err4:
+	fprintf(stderr, "error: checksum missing or mismatch\n");
+
+err5:
+ok1:
+	fprintf(stderr, "decomp is complete: fclose\n");
+	fclose(outf);
+
+	return rc;
+}
+
+
+int main(int argc, char **argv)
+{
+	int rc;
+	struct sigaction act;
+	void *handle;
+
+	nx_dbg = 0;
+	nx_gzip_log = NULL;
+	act.sa_handler = 0;
+	act.sa_sigaction = sigsegv_handler;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGSEGV, &act, NULL);
+
+	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+	if (!handle) {
+		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+		exit(-1);
+	}
+
+	rc = decompress_file(argc, argv, handle);
+
+	nx_function_end(handle);
+
+	return rc;
+}
-- 
2.21.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH V2 5/5] selftests/powerpc: Add README for GZIP engine tests
  2020-03-27 18:16 [PATCH V2 0/5] selftests/powerpc: Add NX-GZIP engine testcase Raphael Moreira Zinsly
                   ` (3 preceding siblings ...)
  2020-03-27 18:16 ` [PATCH V2 4/5] selftests/powerpc: Add NX-GZIP engine decompress testcase Raphael Moreira Zinsly
@ 2020-03-27 18:16 ` Raphael Moreira Zinsly
  4 siblings, 0 replies; 9+ messages in thread
From: Raphael Moreira Zinsly @ 2020-03-27 18:16 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto
  Cc: herbert, mpe, haren, abali, dja, Raphael Moreira Zinsly

Include a README file with the instructions to use the
testcases at selftests/powerpc/nx-gzip.

Signed-off-by: Bulent Abali <abali@us.ibm.com>
Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
---
 .../powerpc/nx-gzip/99-nx-gzip.rules          |  1 +
 .../testing/selftests/powerpc/nx-gzip/README  | 44 +++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/README

diff --git a/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
new file mode 100644
index 000000000000..5a7118495cb3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="nxgzip", KERNEL=="nx-gzip", MODE="0666"
diff --git a/tools/testing/selftests/powerpc/nx-gzip/README b/tools/testing/selftests/powerpc/nx-gzip/README
new file mode 100644
index 000000000000..a80c289f1d2c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/README
@@ -0,0 +1,44 @@
+Test the nx-gzip function:
+=========================
+
+Verify that following device exists:
+  /dev/crypto/nx-gzip
+If you get a permission error run as sudo or set the device permissions:
+   sudo chmod go+rw /dev/crypto/nx-gzip
+However, chmod may not survive across boots. You may create a udev file such
+as:
+   /etc/udev/rules.d/99-nx-gzip.rules
+
+
+Then make and run:
+$ make
+gcc -O3 -I./inc -o gzfht_test gzfht_test.c gzip_vas.c
+gcc -O3 -I./inc -o gunz_test gunz_test.c gzip_vas.c
+
+
+Compress any file using Fixed Huffman mode. Output will have a .nx.gz suffix:
+$ ./gzfht_test gzip_vas.c
+file gzip_vas.c read, 5218 bytes
+compressed 5218 to 2545 bytes total, crc32 checksum = 817543a3
+
+
+Uncompress the previous output. Output will have a .nx.gunzip suffix:
+./gunz_test gzip_vas.c.nx.gz
+gzHeader FLG 0
+00 00 00 00 04 03
+gzHeader MTIME, XFL, OS ignored
+computed checksum 817543a3 isize 00001462
+stored   checksum 817543a3 isize 00001462
+decomp is complete: fclose
+
+
+Compare two files:
+$ sha1sum gzip_vas.c.nx.gz.nx.gunzip gzip_vas.c
+4e87536f3ee9e771ef30fb0fb27572032ca44ef8  gzip_vas.c.nx.gz.nx.gunzip
+4e87536f3ee9e771ef30fb0fb27572032ca44ef8  gzip_vas.c
+
+
+Note that the code here are intended for testing the nx-gzip hardware function.
+They are not intended for demonstrating performance or compression ratio.
+For more information and source code consider using:
+https://github.com/libnxz/power-gzip
-- 
2.21.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase
  2020-03-27 18:16 ` [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase Raphael Moreira Zinsly
@ 2020-04-02  6:17   ` Daniel Axtens
  2020-04-02 10:55     ` Michael Ellerman
  2020-04-08 13:21     ` Raphael Moreira Zinsly
  0 siblings, 2 replies; 9+ messages in thread
From: Daniel Axtens @ 2020-04-02  6:17 UTC (permalink / raw)
  To: Raphael Moreira Zinsly, linuxppc-dev, linux-crypto
  Cc: herbert, mpe, haren, abali, Raphael Moreira Zinsly

Raphael Moreira Zinsly <rzinsly@linux.ibm.com> writes:

> Add a compression testcase for the powerpc NX-GZIP engine.
>
> Signed-off-by: Bulent Abali <abali@us.ibm.com>
> Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
> ---
>  .../selftests/powerpc/nx-gzip/Makefile        |  21 +
>  .../selftests/powerpc/nx-gzip/gzfht_test.c    | 489 ++++++++++++++++++
>  .../selftests/powerpc/nx-gzip/gzip_vas.c      | 259 ++++++++++
>  3 files changed, 769 insertions(+)
>  create mode 100644 tools/testing/selftests/powerpc/nx-gzip/Makefile
>  create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
>  create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
>
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
> new file mode 100644
> index 000000000000..ab903f63bbbd
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
> @@ -0,0 +1,21 @@
> +CC = gcc
> +CFLAGS = -O3
> +INC = ./inc
> +SRC = gzfht_test.c
> +OBJ = $(SRC:.c=.o)
> +TESTS = gzfht_test
> +EXTRA_SOURCES = gzip_vas.c
> +
> +all:	$(TESTS)
> +
> +$(OBJ): %.o: %.c
> +	$(CC) $(CFLAGS) -I$(INC) -c $<
> +
> +$(TESTS): $(OBJ)
> +	$(CC) $(CFLAGS) -I$(INC) -o $@ $@.o $(EXTRA_SOURCES)
> +
> +run_tests: $(TESTS)
> +	./gzfht_test gzip_vas.c
> +
> +clean:
> +	rm -f $(TESTS) *.o *~ *.gz
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
> new file mode 100644
> index 000000000000..7a21c25f5611
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
> @@ -0,0 +1,489 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
> + * Not intended for productive uses or for performance or compression
> + * ratio measurements.  For simplicity of demonstration, this sample
> + * code compresses in to fixed Huffman blocks only (Deflate btype=1)
> + * and has very simple memory management.  Dynamic Huffman blocks
> + * (Deflate btype=2) are more involved as detailed in the user guide.
> + * Note also that /dev/crypto/gzip, VAS and skiboot support are
> + * required.
> + *
> + * Copyright 2020 IBM Corp.
> + *
> + * https://github.com/libnxz/power-gzip for zlib api and other utils
> + *
> + * Author: Bulent Abali <abali@us.ibm.com>
> + *
> + * Definitions of acronyms used here. See
> + * P9 NX Gzip Accelerator User's Manual for details:
> + * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
> + *
> + * adler/crc: 32 bit checksums appended to stream tail
> + * ce:       completion extension
> + * cpb:      coprocessor parameter block (metadata)
> + * crb:      coprocessor request block (command)
> + * csb:      coprocessor status block (status)
> + * dht:      dynamic huffman table
> + * dde:      data descriptor element (address, length)
> + * ddl:      list of ddes
> + * dh/fh:    dynamic and fixed huffman types
> + * fc:       coprocessor function code
> + * histlen:  history/dictionary length
> + * history:  sliding window of up to 32KB of data
> + * lzcount:  Deflate LZ symbol counts
> + * rembytecnt: remaining byte count
> + * sfbt:     source final block type; last block's type during decomp
> + * spbc:     source processed byte count
> + * subc:     source unprocessed bit count
> + * tebc:     target ending bit count; valid bits in the last byte
> + * tpbc:     target processed byte count
> + * vas:      virtual accelerator switch; the user mode interface
> + */
> +
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/fcntl.h>
> +#include <sys/mman.h>
> +#include <endian.h>
> +#include <bits/endian.h>
> +#include <sys/ioctl.h>
> +#include <assert.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include "nxu.h"
> +#include "nx.h"
> +
> +int nx_dbg;
> +FILE *nx_gzip_log;
> +void *nx_fault_storage_address;
> +
> +#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
> +#define FNAME_MAX 1024
> +#define FEXT ".nx.gz"
> +
> +/*
> + * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
> + */
> +static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
> +				uint32_t dstlen, int with_count,
> +				struct nx_gzip_crb_cpb_t *cmdp, void *handle)
> +{
> +	int cc;
> +	uint32_t fc;
> +
> +	assert(!!cmdp);
> +
> +	put32(cmdp->crb, gzip_fc, 0);  /* clear */
> +	fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
> +			    GZIP_FC_COMPRESS_RESUME_FHT;
> +	putnn(cmdp->crb, gzip_fc, fc);
> +	putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
> +	memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
> +
> +	/* Section 6.6 programming notes; spbc may be in two different
> +	 * places depending on FC.
> +	 */
> +	if (!with_count)
> +		put32(cmdp->cpb, out_spbc_comp, 0);
> +	else
> +		put32(cmdp->cpb, out_spbc_comp_with_count, 0);
> +
> +	/* Figure 6-3 6-4; CSB location */
> +	put64(cmdp->crb, csb_address, 0);
> +	put64(cmdp->crb, csb_address,
> +	      (uint64_t) &cmdp->crb.csb & csb_address_mask);
> +
> +	/* Source direct dde (scatter-gather list) */
> +	clear_dde(cmdp->crb.source_dde);
> +	putnn(cmdp->crb.source_dde, dde_count, 0);
> +	put32(cmdp->crb.source_dde, ddebc, srclen);
> +	put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
> +
> +	/* Target direct dde (scatter-gather list) */
> +	clear_dde(cmdp->crb.target_dde);
> +	putnn(cmdp->crb.target_dde, dde_count, 0);
> +	put32(cmdp->crb.target_dde, ddebc, dstlen);
> +	put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
> +
> +	/* Submit the crb, the job descriptor, to the accelerator */
> +	nxu_run_job(cmdp, handle);
> +
> +	/* Poll for the csb.v bit; you should also consider sleeping
> +	 * or interrupts.
> +	 */
> +	do { ; } while (getnn(cmdp->crb.csb, csb_v) == 0);
> +
> +	/* CC Table 6-8 */
> +	cc = getnn(cmdp->crb.csb, csb_cc);
> +
> +	return cc;
> +}
> +
> +/*
> + * Prepares a blank no filename no timestamp gzip header and returns
> + * the number of bytes written to buf.
> + * Gzip specification at https://tools.ietf.org/html/rfc1952
> + */
> +int gzip_header_blank(char *buf)
> +{
> +	int i = 0;
> +
> +	buf[i++] = 0x1f; /* ID1 */
> +	buf[i++] = 0x8b; /* ID2 */
> +	buf[i++] = 0x08; /* CM  */
> +	buf[i++] = 0x00; /* FLG */
> +	buf[i++] = 0x00; /* MTIME */
> +	buf[i++] = 0x00; /* MTIME */
> +	buf[i++] = 0x00; /* MTIME */
> +	buf[i++] = 0x00; /* MTIME */
> +	buf[i++] = 0x04; /* XFL 4=fastest */
> +	buf[i++] = 0x03; /* OS UNIX */
> +
> +	return i;
> +}
> +
> +/* Caller must free the allocated buffer return nonzero on error. */
> +int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
> +{
> +	struct stat statbuf;
> +	FILE *fp;
> +	char *p;
> +	size_t num_bytes;
> +
> +	if (stat(fname, &statbuf)) {
> +		perror(fname);
> +		return(-1);
> +	}
> +	fp = fopen(fname, "r");
> +	if (fp == NULL) {
> +		perror(fname);
> +		return(-1);
> +	}
> +	assert(NULL != (p = (char *) malloc(statbuf.st_size)));
> +	num_bytes = fread(p, 1, statbuf.st_size, fp);
> +	if (ferror(fp) || (num_bytes != statbuf.st_size)) {
> +		perror(fname);
> +		return(-1);
> +	}
> +	*buf = p;
> +	*bufsize = num_bytes;
> +	return 0;
> +}
> +
> +/* Returns nonzero on error */
> +int write_output_file(char *fname, char *buf, size_t bufsize)
> +{
> +	FILE *fp;
> +	size_t num_bytes;
> +
> +	fp = fopen(fname, "w");
> +	if (fp == NULL) {
> +		perror(fname);
> +		return(-1);
> +	}
> +	num_bytes = fwrite(buf, 1, bufsize, fp);
> +	if (ferror(fp) || (num_bytes != bufsize)) {
> +		perror(fname);
> +		return(-1);
> +	}
> +	fclose(fp);
> +	return 0;
> +}
> +
> +/*
> + * Z_SYNC_FLUSH as described in zlib.h.
> + * Returns number of appended bytes
> + */
> +int append_sync_flush(char *buf, int tebc, int final)
> +{
> +	uint64_t flush;
> +	int shift = (tebc & 0x7);
> +
> +	if (tebc > 0) {
> +		/* Last byte is partially full */
> +		buf = buf - 1;
> +		*buf = *buf & (unsigned char) ((1<<tebc)-1);
> +	} else
> +		*buf = 0;
> +	flush = ((0x1ULL & final) << shift) | *buf;
> +	shift = shift + 3; /* BFINAL and BTYPE written */
> +	shift = (shift <= 8) ? 8 : 16;
> +	flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
> +	shift = shift + 32;
> +	while (shift > 0) {
> +		*buf++ = (unsigned char) (flush & 0xffULL);
> +		flush = flush >> 8;
> +		shift = shift - 8;
> +	}
> +	return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
> +}
> +
> +/*
> + * Fault in pages prior to NX job submission. wr=1 may be required to
> + * touch writeable pages.  System zero pages do not fault-in the page as
> + * intended.  Typically set wr=1 for NX target pages and set wr=0 for NX
> + * source pages.
> + */
> +static int nx_touch_pages(void *buf, long buf_len, long page_len, int wr)
> +{
> +	char *begin = buf;
> +	char *end = (char *) buf + buf_len - 1;
> +	char t;
> +
> +	assert(buf_len >= 0 && !!buf);
> +
> +	NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
> +			(buf + buf_len), buf_len, wr));
> +
> +	if (buf_len <= 0 || buf == NULL)
> +		return -1;
> +
> +	do {
> +		t = *begin;
> +		if (wr)
> +			*begin = t;
> +		begin = begin + page_len;
> +	} while (begin < end);
> +
> +	/* When buf_sz is small or buf tail is in another page */
> +	t = *end;
> +	if (wr)
> +		*end = t;
> +
> +	return 0;
> +}
> +
> +/*
> + * Final deflate block bit.  This call assumes the block
> + * beginning is byte aligned.
> + */
> +static void set_bfinal(void *buf, int bfinal)
> +{
> +	char *b = buf;
> +
> +	if (bfinal)
> +		*b = *b | (unsigned char) 0x01;
> +	else
> +		*b = *b & (unsigned char) 0xfe;
> +}
> +
> +int compress_file(int argc, char **argv, void *handle)
> +{
> +	char *inbuf, *outbuf, *srcbuf, *dstbuf;
> +	char outname[FNAME_MAX];
> +	uint32_t srclen, dstlen;
> +	uint32_t flushlen, chunk;
> +	size_t inlen, outlen, dsttotlen, srctotlen;
> +	uint32_t crc, spbc, tpbc, tebc;
> +	int lzcounts = 0;
> +	int cc;
> +	int num_hdr_bytes;
> +	struct nx_gzip_crb_cpb_t *cmdp;
> +	uint32_t pagelen = 65536;
> +	int fault_tries = 50;
> +
> +	cmdp = (void *)(uintptr_t)
> +		aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
> +			      sizeof(struct nx_gzip_crb_cpb_t));
> +
> +	if (argc != 2) {
> +		fprintf(stderr, "usage: %s <fname>\n", argv[0]);
> +		exit(-1);
> +	}
> +	if (read_alloc_input_file(argv[1], &inbuf, &inlen))
> +		exit(-1);
> +	fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
> +
> +	/* Generous output buffer for header/trailer */
> +	outlen = 2 * inlen + 1024;
> +
> +	assert(NULL != (outbuf = (char *)malloc(outlen)));
> +	nx_touch_pages(outbuf, outlen, pagelen, 1);
> +
> +	/* Compress piecemeal in smallish chunks */
> +	chunk = 1<<22;
> +
> +	/* Write the gzip header to the stream */
> +	num_hdr_bytes = gzip_header_blank(outbuf);
> +	dstbuf    = outbuf + num_hdr_bytes;
> +	outlen    = outlen - num_hdr_bytes;
> +	dsttotlen = num_hdr_bytes;
> +
> +	srcbuf    = inbuf;
> +	srctotlen = 0;
> +
> +	/* Init the CRB, the coprocessor request block */
> +	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
> +
> +	/* Initial gzip crc32 */
> +	put32(cmdp->cpb, in_crc, 0);
> +
> +	fault_tries = 50;
> +
> +	while (inlen > 0) {
> +
> +		/* Submit chunk size source data per job */
> +		srclen = NX_MIN(chunk, inlen);
> +		/* Supply large target in case data expands */
> +		dstlen = NX_MIN(2*srclen, outlen);
> +
> +		/* Page faults are handled by the user code */
> +
> +		/* Fault-in pages; an improved code wouldn't touch so
> +		 * many pages but would try to estimate the
> +		 * compression ratio and adjust both the src and dst
> +		 * touch amounts.
> +		 */
> +		nx_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
> +				1);
> +		nx_touch_pages(srcbuf, srclen, pagelen, 0);
> +		nx_touch_pages(dstbuf, dstlen, pagelen, 1);
> +
> +		cc = compress_fht_sample(
> +			srcbuf, srclen,
> +			dstbuf, dstlen,
> +			lzcounts, cmdp, handle);
> +
> +		if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
> +		    cc != ERR_NX_TRANSLATION) {
> +			fprintf(stderr, "nx error: cc= %d\n", cc);
> +			exit(-1);
> +		}
> +
> +		/* Page faults are handled by the user code */
> +		if (cc == ERR_NX_TRANSLATION) {
> +			NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
> +			NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
> +				  fault_tries,
> +				  (unsigned long long) cmdp->crb.csb.fsaddr));
> +
> +			fault_tries--;
> +			if (fault_tries > 0) {
> +				continue;
> +			} else {
> +				fprintf(stderr, "error: cannot progress; ");
> +				fprintf(stderr, "too many faults\n");
> +				exit(-1);
> +			};
> +		}
> +
> +		fault_tries = 50; /* Reset for the next chunk */
> +
> +		inlen     = inlen - srclen;
> +		srcbuf    = srcbuf + srclen;
> +		srctotlen = srctotlen + srclen;
> +
> +		/* Two possible locations for spbc depending on the function
> +		 * code.
> +		 */
> +		spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
> +			get32(cmdp->cpb, out_spbc_comp_with_count);
> +		assert(spbc == srclen);
> +
> +		/* Target byte count */
> +		tpbc = get32(cmdp->crb.csb, tpbc);
> +		/* Target ending bit count */
> +		tebc = getnn(cmdp->cpb, out_tebc);
> +		NXPRT(fprintf(stderr, "compressed chunk %d " spbc));
> +		NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
> +
> +		if (inlen > 0) { /* More chunks to go */
> +			set_bfinal(dstbuf, 0);
> +			dstbuf    = dstbuf + tpbc;
> +			dsttotlen = dsttotlen + tpbc;
> +			outlen    = outlen - tpbc;
> +			/* Round up to the next byte with a flush
> +			 * block; do not set the BFINAqL bit.
> +			 */
> +			flushlen  = append_sync_flush(dstbuf, tebc, 0);
> +			dsttotlen = dsttotlen + flushlen;
> +			outlen    = outlen - flushlen;
> +			dstbuf    = dstbuf + flushlen;
> +			NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
> +					flushlen));
> +		} else {  /* Done */
> +			/* Set the BFINAL bit of the last block per Deflate
> +			 * specification.
> +			 */
> +			set_bfinal(dstbuf, 1);
> +			dstbuf    = dstbuf + tpbc;
> +			dsttotlen = dsttotlen + tpbc;
> +			outlen    = outlen - tpbc;
> +		}
> +
> +		/* Resuming crc32 for the next chunk */
> +		crc = get32(cmdp->cpb, out_crc);
> +		put32(cmdp->cpb, in_crc, crc);
> +		crc = be32toh(crc);
> +	}
> +
> +	/* Append crc32 and ISIZE to the end */
> +	memcpy(dstbuf, &crc, 4);
> +	memcpy(dstbuf+4, &srctotlen, 4);
> +	dsttotlen = dsttotlen + 8;
> +	outlen    = outlen - 8;
> +
> +	assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
> +	strcpy(outname, argv[1]);
> +	strcat(outname, FEXT);
> +	if (write_output_file(outname, outbuf, dsttotlen)) {
> +		fprintf(stderr, "write error: %s\n", outname);
> +		exit(-1);
> +	}
> +
> +	fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
> +		dsttotlen);
> +	fprintf(stderr, "crc32 checksum = %08x\n", crc);
> +
> +	if (inbuf != NULL)
> +		free(inbuf);
> +
> +	if (outbuf != NULL)
> +		free(outbuf);
> +
> +	return 0;
> +}
> +
> +void sigsegv_handler(int sig, siginfo_t *info, void *ctx)
> +{
> +	fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
> +		sig, info->si_code, info->si_addr);
> +
> +	nx_fault_storage_address = info->si_addr;
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int rc;
> +	struct sigaction act;
> +	void *handle;
> +
> +	nx_dbg = 0;
> +	nx_gzip_log = NULL;
> +	act.sa_handler = 0;
> +	act.sa_sigaction = sigsegv_handler;
> +	act.sa_flags = SA_SIGINFO;
> +	act.sa_restorer = 0;
> +	sigemptyset(&act.sa_mask);
> +	sigaction(SIGSEGV, &act, NULL);
> +
> +	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
> +	if (!handle) {
> +		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
> +		exit(-1);
> +	}
> +
> +	rc = compress_file(argc, argv, handle);
> +
> +	nx_function_end(handle);
> +
> +	return rc;
> +}
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
> new file mode 100644
> index 000000000000..d28e1efb527b
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
> @@ -0,0 +1,259 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/* Copyright 2020 IBM Corp.
> + *
> + * Author: Bulent Abali <abali@us.ibm.com>
> + *
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/fcntl.h>
> +#include <sys/mman.h>
> +#include <endian.h>
> +#include <bits/endian.h>
> +#include <sys/ioctl.h>
> +#include <assert.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include "nx-gzip.h"
> +#include "nx.h"
> +#include "copy-paste.h"
> +#include "nxu.h"
> +#include "nx_dbg.h"
> +#include <sys/platform/ppc.h>
> +
> +#define barrier()
> +#define hwsync()    ({ asm volatile("hwsync" ::: "memory"); })

This doesn't compile on the clang version I tried as it doesn't
recognise 'hwsync'. Does
asm volatile("sync" ::: "memory");
do the same thing? That is recognised by clang, but I don't know if
dropping the hw prefix matters!

Regards,
Daniel

> +
> +#ifndef NX_NO_CPU_PRI
> +#define cpu_pri_default()  ({ asm volatile ("or 2, 2, 2"); })
> +#define cpu_pri_low()      ({ asm volatile ("or 31, 31, 31"); })
> +#else
> +#define cpu_pri_default()
> +#define cpu_pri_low()
> +#endif
> +
> +void *nx_fault_storage_address;
> +
> +struct nx_handle {
> +	int fd;
> +	int function;
> +	void *paste_addr;
> +};
> +
> +static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
> +{
> +	int rc, fd;
> +	void *addr;
> +	struct vas_gzip_setup_attr txattr;
> +
> +	fd = open(devname, O_RDWR);
> +	if (fd < 0) {
> +		fprintf(stderr, " open device name %s\n", devname);
> +		return -errno;
> +	}
> +
> +	memset(&txattr, 0, sizeof(txattr));
> +	txattr.version = 1;
> +	txattr.vas_id = pri;
> +	rc = ioctl(fd, VAS_GZIP_TX_WIN_OPEN, (unsigned long)&txattr);
> +	if (rc < 0) {
> +		fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
> +		rc = -errno;
> +		goto out;
> +	}
> +
> +	addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
> +	if (addr == MAP_FAILED) {
> +		fprintf(stderr, "mmap() failed, errno %d\n", errno);
> +		rc = -errno;
> +		goto out;
> +	}
> +	handle->fd = fd;
> +	handle->paste_addr = (void *)((char *)addr + 0x400);
> +
> +	rc = 0;
> +out:
> +	close(fd);
> +	return rc;
> +}
> +
> +void *nx_function_begin(int function, int pri)
> +{
> +	int rc;
> +	char *devname = "/dev/crypto/nx-gzip";
> +	struct nx_handle *nxhandle;
> +
> +	if (function != NX_FUNC_COMP_GZIP) {
> +		errno = EINVAL;
> +		fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
> +		return NULL;
> +	}
> +
> +
> +	nxhandle = malloc(sizeof(*nxhandle));
> +	if (!nxhandle) {
> +		errno = ENOMEM;
> +		fprintf(stderr, " No memory\n");
> +		return NULL;
> +	}
> +
> +	nxhandle->function = function;
> +	rc = open_device_nodes(devname, pri, nxhandle);
> +	if (rc < 0) {
> +		errno = -rc;
> +		fprintf(stderr, " open_device_nodes failed\n");
> +		return NULL;
> +	}
> +
> +	return nxhandle;
> +}
> +
> +int nx_function_end(void *handle)
> +{
> +	int rc = 0;
> +	struct nx_handle *nxhandle = handle;
> +
> +	rc = munmap(nxhandle->paste_addr - 0x400, 4096);
> +	if (rc < 0) {
> +		fprintf(stderr, "munmap() failed, errno %d\n", errno);
> +		return rc;
> +	}
> +	close(nxhandle->fd);
> +	free(nxhandle);
> +
> +	return rc;
> +}
> +
> +static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
> +{
> +	long poll = 0;
> +	uint64_t t;
> +
> +	/* Save power and let other threads use the h/w. top may show
> +	 * 100% but only because OS doesn't know we slowed the this
> +	 * h/w thread while polling. We're letting other threads have
> +	 * higher throughput on the core.
> +	 */
> +	cpu_pri_low();
> +
> +#define CSB_MAX_POLL 200000000UL
> +#define USLEEP_TH     300000UL
> +
> +	t = __ppc_get_timebase();
> +
> +	while (getnn(cmdp->crb.csb, csb_v) == 0) {
> +		++poll;
> +		hwsync();
> +
> +		cpu_pri_low();
> +
> +		/* usleep(0) takes around 29000 ticks ~60 us.
> +		 * 300000 is spinning for about 600 us then
> +		 * start sleeping.
> +		 */
> +		if ((__ppc_get_timebase() - t) > USLEEP_TH) {
> +			cpu_pri_default();
> +			usleep(1);
> +		}
> +
> +		if (poll > CSB_MAX_POLL)
> +			break;
> +
> +		/* Fault address from signal handler */
> +		if (nx_fault_storage_address) {
> +			cpu_pri_default();
> +			return -EAGAIN;
> +		}
> +
> +	}
> +
> +	cpu_pri_default();
> +
> +	/* hw has updated csb and output buffer */
> +	hwsync();
> +
> +	/* Check CSB flags. */
> +	if (getnn(cmdp->crb.csb, csb_v) == 0) {
> +		fprintf(stderr, "CSB still not valid after %d polls.\n",
> +			(int) poll);
> +		prt_err("CSB still not valid after %d polls, giving up.\n",
> +			(int) poll);
> +		return -ETIMEDOUT;
> +	}
> +
> +	return 0;
> +}
> +
> +int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
> +{
> +	int i, ret, retries;
> +	struct nx_handle *nxhandle = handle;
> +
> +	assert(handle != NULL);
> +	i = 0;
> +	retries = 5000;
> +	while (i++ < retries) {
> +		hwsync();
> +		vas_copy(&cmdp->crb, 0);
> +		ret = vas_paste(nxhandle->paste_addr, 0);
> +		hwsync();
> +
> +		NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
> +				i, retries, ret));
> +
> +		if ((ret == 2) || (ret == 3)) {
> +
> +			ret = nx_wait_for_csb(cmdp);
> +			if (!ret) {
> +				goto out;
> +			} else if (ret == -EAGAIN) {
> +				long x;
> +
> +				prt_err("Touching address %p, 0x%lx\n",
> +					 nx_fault_storage_address,
> +					 *(long *) nx_fault_storage_address);
> +				x = *(long *) nx_fault_storage_address;
> +				*(long *) nx_fault_storage_address = x;
> +				nx_fault_storage_address = 0;
> +				continue;
> +			} else {
> +				prt_err("wait_for_csb() returns %d\n", ret);
> +				break;
> +			}
> +		} else {
> +			if (i < 10) {
> +				/* spin for few ticks */
> +#define SPIN_TH 500UL
> +				uint64_t fail_spin;
> +
> +				fail_spin = __ppc_get_timebase();
> +				while ((__ppc_get_timebase() - fail_spin) <
> +					 SPIN_TH)
> +					;
> +			} else {
> +				/* sleep */
> +				unsigned int pr = 0;
> +
> +				if (pr++ % 100 == 0) {
> +					prt_err("Paste attempt %d/", i);
> +					prt_err("%d, failed pid= %d\n", retries,
> +						getpid());
> +				}
> +				usleep(1);
> +			}
> +			continue;
> +		}
> +	}
> +
> +out:
> +	cpu_pri_default();
> +
> +	return ret;
> +}
> -- 
> 2.21.0

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase
  2020-04-02  6:17   ` Daniel Axtens
@ 2020-04-02 10:55     ` Michael Ellerman
  2020-04-08 13:21     ` Raphael Moreira Zinsly
  1 sibling, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2020-04-02 10:55 UTC (permalink / raw)
  To: Daniel Axtens, Raphael Moreira Zinsly, linuxppc-dev, linux-crypto
  Cc: herbert, haren, abali, Raphael Moreira Zinsly

Daniel Axtens <dja@axtens.net> writes:
> Raphael Moreira Zinsly <rzinsly@linux.ibm.com> writes:
>
>> Add a compression testcase for the powerpc NX-GZIP engine.
>>
>> Signed-off-by: Bulent Abali <abali@us.ibm.com>
>> Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
...
>> diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
>> new file mode 100644
>> index 000000000000..d28e1efb527b
>> --- /dev/null
>> +++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
>> @@ -0,0 +1,259 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +
>> +/* Copyright 2020 IBM Corp.
>> + *
>> + * Author: Bulent Abali <abali@us.ibm.com>
>> + *
>> + */
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <stdint.h>
>> +#include <sys/types.h>
>> +#include <sys/stat.h>
>> +#include <sys/time.h>
>> +#include <sys/fcntl.h>
>> +#include <sys/mman.h>
>> +#include <endian.h>
>> +#include <bits/endian.h>
>> +#include <sys/ioctl.h>
>> +#include <assert.h>
>> +#include <errno.h>
>> +#include <signal.h>
>> +#include "nx-gzip.h"
>> +#include "nx.h"
>> +#include "copy-paste.h"
>> +#include "nxu.h"
>> +#include "nx_dbg.h"
>> +#include <sys/platform/ppc.h>
>> +
>> +#define barrier()
>> +#define hwsync()    ({ asm volatile("hwsync" ::: "memory"); })
>
> This doesn't compile on the clang version I tried as it doesn't
> recognise 'hwsync'.

What assembler are you using? I guess the LLVM one?

Can you try with -fno-integrated-as ?

> Does asm volatile("sync" ::: "memory");
> do the same thing? That is recognised by clang, but I don't know if
> dropping the hw prefix matters!

It shouldn't matter.

But you can just try it and look at the generated code to be sure, you
should get 0x7c0004ac.

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase
  2020-04-02  6:17   ` Daniel Axtens
  2020-04-02 10:55     ` Michael Ellerman
@ 2020-04-08 13:21     ` Raphael Moreira Zinsly
  1 sibling, 0 replies; 9+ messages in thread
From: Raphael Moreira Zinsly @ 2020-04-08 13:21 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto, dja
  Cc: herbert, mpe, haren, abali, Raphael Moreira Zinsly

Daniel Axtens <dja@axtens.net> writes:
> Raphael Moreira Zinsly <rzinsly@linux.ibm.com> writes:
...
>> +#define hwsync()    ({ asm volatile("hwsync" ::: "memory"); })
>
> This doesn't compile on the clang version I tried as it doesn't
> recognise 'hwsync'.  Does
> asm volatile("sync" ::: "memory");
> do the same thing?

Both hwsync and sync are extended mnemonics to 'sync 0'.
I just replaced hwsync for sync on this patch, but I'm
surprised that this is not recognized by clang.

--- >8 ---
Add a compression testcase for the powerpc NX-GZIP engine.

Signed-off-by: Bulent Abali <abali@us.ibm.com>
Signed-off-by: Raphael Moreira Zinsly <rzinsly@linux.ibm.com>
---
 .../selftests/powerpc/nx-gzip/Makefile        |  21 +
 .../selftests/powerpc/nx-gzip/gzfht_test.c    | 489 ++++++++++++++++++
 .../selftests/powerpc/nx-gzip/gzip_vas.c      | 259 ++++++++++
 3 files changed, 769 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/Makefile
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c

diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
new file mode 100644
index 000000000000..ab903f63bbbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -0,0 +1,21 @@
+CC = gcc
+CFLAGS = -O3
+INC = ./inc
+SRC = gzfht_test.c
+OBJ = $(SRC:.c=.o)
+TESTS = gzfht_test
+EXTRA_SOURCES = gzip_vas.c
+
+all:	$(TESTS)
+
+$(OBJ): %.o: %.c
+	$(CC) $(CFLAGS) -I$(INC) -c $<
+
+$(TESTS): $(OBJ)
+	$(CC) $(CFLAGS) -I$(INC) -o $@ $@.o $(EXTRA_SOURCES)
+
+run_tests: $(TESTS)
+	./gzfht_test gzip_vas.c
+
+clean:
+	rm -f $(TESTS) *.o *~ *.gz
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
new file mode 100644
index 000000000000..7a21c25f5611
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
+ * Not intended for productive uses or for performance or compression
+ * ratio measurements.  For simplicity of demonstration, this sample
+ * code compresses in to fixed Huffman blocks only (Deflate btype=1)
+ * and has very simple memory management.  Dynamic Huffman blocks
+ * (Deflate btype=2) are more involved as detailed in the user guide.
+ * Note also that /dev/crypto/gzip, VAS and skiboot support are
+ * required.
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+void *nx_fault_storage_address;
+
+#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define FNAME_MAX 1024
+#define FEXT ".nx.gz"
+
+/*
+ * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
+ */
+static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
+				uint32_t dstlen, int with_count,
+				struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int cc;
+	uint32_t fc;
+
+	assert(!!cmdp);
+
+	put32(cmdp->crb, gzip_fc, 0);  /* clear */
+	fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
+			    GZIP_FC_COMPRESS_RESUME_FHT;
+	putnn(cmdp->crb, gzip_fc, fc);
+	putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
+	memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+	/* Section 6.6 programming notes; spbc may be in two different
+	 * places depending on FC.
+	 */
+	if (!with_count)
+		put32(cmdp->cpb, out_spbc_comp, 0);
+	else
+		put32(cmdp->cpb, out_spbc_comp_with_count, 0);
+
+	/* Figure 6-3 6-4; CSB location */
+	put64(cmdp->crb, csb_address, 0);
+	put64(cmdp->crb, csb_address,
+	      (uint64_t) &cmdp->crb.csb & csb_address_mask);
+
+	/* Source direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.source_dde);
+	putnn(cmdp->crb.source_dde, dde_count, 0);
+	put32(cmdp->crb.source_dde, ddebc, srclen);
+	put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
+
+	/* Target direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.target_dde);
+	putnn(cmdp->crb.target_dde, dde_count, 0);
+	put32(cmdp->crb.target_dde, ddebc, dstlen);
+	put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
+
+	/* Submit the crb, the job descriptor, to the accelerator */
+	nxu_run_job(cmdp, handle);
+
+	/* Poll for the csb.v bit; you should also consider sleeping
+	 * or interrupts.
+	 */
+	do { ; } while (getnn(cmdp->crb.csb, csb_v) == 0);
+
+	/* CC Table 6-8 */
+	cc = getnn(cmdp->crb.csb, csb_cc);
+
+	return cc;
+}
+
+/*
+ * Prepares a blank no filename no timestamp gzip header and returns
+ * the number of bytes written to buf.
+ * Gzip specification at https://tools.ietf.org/html/rfc1952
+ */
+int gzip_header_blank(char *buf)
+{
+	int i = 0;
+
+	buf[i++] = 0x1f; /* ID1 */
+	buf[i++] = 0x8b; /* ID2 */
+	buf[i++] = 0x08; /* CM  */
+	buf[i++] = 0x00; /* FLG */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x04; /* XFL 4=fastest */
+	buf[i++] = 0x03; /* OS UNIX */
+
+	return i;
+}
+
+/* Caller must free the allocated buffer return nonzero on error. */
+int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
+{
+	struct stat statbuf;
+	FILE *fp;
+	char *p;
+	size_t num_bytes;
+
+	if (stat(fname, &statbuf)) {
+		perror(fname);
+		return(-1);
+	}
+	fp = fopen(fname, "r");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	assert(NULL != (p = (char *) malloc(statbuf.st_size)));
+	num_bytes = fread(p, 1, statbuf.st_size, fp);
+	if (ferror(fp) || (num_bytes != statbuf.st_size)) {
+		perror(fname);
+		return(-1);
+	}
+	*buf = p;
+	*bufsize = num_bytes;
+	return 0;
+}
+
+/* Returns nonzero on error */
+int write_output_file(char *fname, char *buf, size_t bufsize)
+{
+	FILE *fp;
+	size_t num_bytes;
+
+	fp = fopen(fname, "w");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	num_bytes = fwrite(buf, 1, bufsize, fp);
+	if (ferror(fp) || (num_bytes != bufsize)) {
+		perror(fname);
+		return(-1);
+	}
+	fclose(fp);
+	return 0;
+}
+
+/*
+ * Z_SYNC_FLUSH as described in zlib.h.
+ * Returns number of appended bytes
+ */
+int append_sync_flush(char *buf, int tebc, int final)
+{
+	uint64_t flush;
+	int shift = (tebc & 0x7);
+
+	if (tebc > 0) {
+		/* Last byte is partially full */
+		buf = buf - 1;
+		*buf = *buf & (unsigned char) ((1<<tebc)-1);
+	} else
+		*buf = 0;
+	flush = ((0x1ULL & final) << shift) | *buf;
+	shift = shift + 3; /* BFINAL and BTYPE written */
+	shift = (shift <= 8) ? 8 : 16;
+	flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
+	shift = shift + 32;
+	while (shift > 0) {
+		*buf++ = (unsigned char) (flush & 0xffULL);
+		flush = flush >> 8;
+		shift = shift - 8;
+	}
+	return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
+}
+
+/*
+ * Fault in pages prior to NX job submission. wr=1 may be required to
+ * touch writeable pages.  System zero pages do not fault-in the page as
+ * intended.  Typically set wr=1 for NX target pages and set wr=0 for NX
+ * source pages.
+ */
+static int nx_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+	char *begin = buf;
+	char *end = (char *) buf + buf_len - 1;
+	char t;
+
+	assert(buf_len >= 0 && !!buf);
+
+	NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+			(buf + buf_len), buf_len, wr));
+
+	if (buf_len <= 0 || buf == NULL)
+		return -1;
+
+	do {
+		t = *begin;
+		if (wr)
+			*begin = t;
+		begin = begin + page_len;
+	} while (begin < end);
+
+	/* When buf_sz is small or buf tail is in another page */
+	t = *end;
+	if (wr)
+		*end = t;
+
+	return 0;
+}
+
+/*
+ * Final deflate block bit.  This call assumes the block
+ * beginning is byte aligned.
+ */
+static void set_bfinal(void *buf, int bfinal)
+{
+	char *b = buf;
+
+	if (bfinal)
+		*b = *b | (unsigned char) 0x01;
+	else
+		*b = *b & (unsigned char) 0xfe;
+}
+
+int compress_file(int argc, char **argv, void *handle)
+{
+	char *inbuf, *outbuf, *srcbuf, *dstbuf;
+	char outname[FNAME_MAX];
+	uint32_t srclen, dstlen;
+	uint32_t flushlen, chunk;
+	size_t inlen, outlen, dsttotlen, srctotlen;
+	uint32_t crc, spbc, tpbc, tebc;
+	int lzcounts = 0;
+	int cc;
+	int num_hdr_bytes;
+	struct nx_gzip_crb_cpb_t *cmdp;
+	uint32_t pagelen = 65536;
+	int fault_tries = 50;
+
+	cmdp = (void *)(uintptr_t)
+		aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
+			      sizeof(struct nx_gzip_crb_cpb_t));
+
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s <fname>\n", argv[0]);
+		exit(-1);
+	}
+	if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+		exit(-1);
+	fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
+
+	/* Generous output buffer for header/trailer */
+	outlen = 2 * inlen + 1024;
+
+	assert(NULL != (outbuf = (char *)malloc(outlen)));
+	nx_touch_pages(outbuf, outlen, pagelen, 1);
+
+	/* Compress piecemeal in smallish chunks */
+	chunk = 1<<22;
+
+	/* Write the gzip header to the stream */
+	num_hdr_bytes = gzip_header_blank(outbuf);
+	dstbuf    = outbuf + num_hdr_bytes;
+	outlen    = outlen - num_hdr_bytes;
+	dsttotlen = num_hdr_bytes;
+
+	srcbuf    = inbuf;
+	srctotlen = 0;
+
+	/* Init the CRB, the coprocessor request block */
+	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+	/* Initial gzip crc32 */
+	put32(cmdp->cpb, in_crc, 0);
+
+	fault_tries = 50;
+
+	while (inlen > 0) {
+
+		/* Submit chunk size source data per job */
+		srclen = NX_MIN(chunk, inlen);
+		/* Supply large target in case data expands */
+		dstlen = NX_MIN(2*srclen, outlen);
+
+		/* Page faults are handled by the user code */
+
+		/* Fault-in pages; an improved code wouldn't touch so
+		 * many pages but would try to estimate the
+		 * compression ratio and adjust both the src and dst
+		 * touch amounts.
+		 */
+		nx_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
+				1);
+		nx_touch_pages(srcbuf, srclen, pagelen, 0);
+		nx_touch_pages(dstbuf, dstlen, pagelen, 1);
+
+		cc = compress_fht_sample(
+			srcbuf, srclen,
+			dstbuf, dstlen,
+			lzcounts, cmdp, handle);
+
+		if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
+		    cc != ERR_NX_TRANSLATION) {
+			fprintf(stderr, "nx error: cc= %d\n", cc);
+			exit(-1);
+		}
+
+		/* Page faults are handled by the user code */
+		if (cc == ERR_NX_TRANSLATION) {
+			NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
+			NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
+				  fault_tries,
+				  (unsigned long long) cmdp->crb.csb.fsaddr));
+
+			fault_tries--;
+			if (fault_tries > 0) {
+				continue;
+			} else {
+				fprintf(stderr, "error: cannot progress; ");
+				fprintf(stderr, "too many faults\n");
+				exit(-1);
+			};
+		}
+
+		fault_tries = 50; /* Reset for the next chunk */
+
+		inlen     = inlen - srclen;
+		srcbuf    = srcbuf + srclen;
+		srctotlen = srctotlen + srclen;
+
+		/* Two possible locations for spbc depending on the function
+		 * code.
+		 */
+		spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
+			get32(cmdp->cpb, out_spbc_comp_with_count);
+		assert(spbc == srclen);
+
+		/* Target byte count */
+		tpbc = get32(cmdp->crb.csb, tpbc);
+		/* Target ending bit count */
+		tebc = getnn(cmdp->cpb, out_tebc);
+		NXPRT(fprintf(stderr, "compressed chunk %d " spbc));
+		NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
+
+		if (inlen > 0) { /* More chunks to go */
+			set_bfinal(dstbuf, 0);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+			/* Round up to the next byte with a flush
+			 * block; do not set the BFINAqL bit.
+			 */
+			flushlen  = append_sync_flush(dstbuf, tebc, 0);
+			dsttotlen = dsttotlen + flushlen;
+			outlen    = outlen - flushlen;
+			dstbuf    = dstbuf + flushlen;
+			NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
+					flushlen));
+		} else {  /* Done */
+			/* Set the BFINAL bit of the last block per Deflate
+			 * specification.
+			 */
+			set_bfinal(dstbuf, 1);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+		}
+
+		/* Resuming crc32 for the next chunk */
+		crc = get32(cmdp->cpb, out_crc);
+		put32(cmdp->cpb, in_crc, crc);
+		crc = be32toh(crc);
+	}
+
+	/* Append crc32 and ISIZE to the end */
+	memcpy(dstbuf, &crc, 4);
+	memcpy(dstbuf+4, &srctotlen, 4);
+	dsttotlen = dsttotlen + 8;
+	outlen    = outlen - 8;
+
+	assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
+	strcpy(outname, argv[1]);
+	strcat(outname, FEXT);
+	if (write_output_file(outname, outbuf, dsttotlen)) {
+		fprintf(stderr, "write error: %s\n", outname);
+		exit(-1);
+	}
+
+	fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
+		dsttotlen);
+	fprintf(stderr, "crc32 checksum = %08x\n", crc);
+
+	if (inbuf != NULL)
+		free(inbuf);
+
+	if (outbuf != NULL)
+		free(outbuf);
+
+	return 0;
+}
+
+void sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+	fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+		sig, info->si_code, info->si_addr);
+
+	nx_fault_storage_address = info->si_addr;
+}
+
+int main(int argc, char **argv)
+{
+	int rc;
+	struct sigaction act;
+	void *handle;
+
+	nx_dbg = 0;
+	nx_gzip_log = NULL;
+	act.sa_handler = 0;
+	act.sa_sigaction = sigsegv_handler;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGSEGV, &act, NULL);
+
+	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+	if (!handle) {
+		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+		exit(-1);
+	}
+
+	rc = compress_file(argc, argv, handle);
+
+	nx_function_end(handle);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
new file mode 100644
index 000000000000..c8f943f3c029
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nx-gzip.h"
+#include "nx.h"
+#include "copy-paste.h"
+#include "nxu.h"
+#include "nx_dbg.h"
+#include <sys/platform/ppc.h>
+
+#define barrier()
+#define hwsync()    ({ asm volatile("sync" ::: "memory"); })
+
+#ifndef NX_NO_CPU_PRI
+#define cpu_pri_default()  ({ asm volatile ("or 2, 2, 2"); })
+#define cpu_pri_low()      ({ asm volatile ("or 31, 31, 31"); })
+#else
+#define cpu_pri_default()
+#define cpu_pri_low()
+#endif
+
+void *nx_fault_storage_address;
+
+struct nx_handle {
+	int fd;
+	int function;
+	void *paste_addr;
+};
+
+static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
+{
+	int rc, fd;
+	void *addr;
+	struct vas_gzip_setup_attr txattr;
+
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, " open device name %s\n", devname);
+		return -errno;
+	}
+
+	memset(&txattr, 0, sizeof(txattr));
+	txattr.version = 1;
+	txattr.vas_id = pri;
+	rc = ioctl(fd, VAS_GZIP_TX_WIN_OPEN, (unsigned long)&txattr);
+	if (rc < 0) {
+		fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
+		rc = -errno;
+		goto out;
+	}
+
+	addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
+	if (addr == MAP_FAILED) {
+		fprintf(stderr, "mmap() failed, errno %d\n", errno);
+		rc = -errno;
+		goto out;
+	}
+	handle->fd = fd;
+	handle->paste_addr = (void *)((char *)addr + 0x400);
+
+	rc = 0;
+out:
+	close(fd);
+	return rc;
+}
+
+void *nx_function_begin(int function, int pri)
+{
+	int rc;
+	char *devname = "/dev/crypto/nx-gzip";
+	struct nx_handle *nxhandle;
+
+	if (function != NX_FUNC_COMP_GZIP) {
+		errno = EINVAL;
+		fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
+		return NULL;
+	}
+
+
+	nxhandle = malloc(sizeof(*nxhandle));
+	if (!nxhandle) {
+		errno = ENOMEM;
+		fprintf(stderr, " No memory\n");
+		return NULL;
+	}
+
+	nxhandle->function = function;
+	rc = open_device_nodes(devname, pri, nxhandle);
+	if (rc < 0) {
+		errno = -rc;
+		fprintf(stderr, " open_device_nodes failed\n");
+		return NULL;
+	}
+
+	return nxhandle;
+}
+
+int nx_function_end(void *handle)
+{
+	int rc = 0;
+	struct nx_handle *nxhandle = handle;
+
+	rc = munmap(nxhandle->paste_addr - 0x400, 4096);
+	if (rc < 0) {
+		fprintf(stderr, "munmap() failed, errno %d\n", errno);
+		return rc;
+	}
+	close(nxhandle->fd);
+	free(nxhandle);
+
+	return rc;
+}
+
+static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
+{
+	long poll = 0;
+	uint64_t t;
+
+	/* Save power and let other threads use the h/w. top may show
+	 * 100% but only because OS doesn't know we slowed the this
+	 * h/w thread while polling. We're letting other threads have
+	 * higher throughput on the core.
+	 */
+	cpu_pri_low();
+
+#define CSB_MAX_POLL 200000000UL
+#define USLEEP_TH     300000UL
+
+	t = __ppc_get_timebase();
+
+	while (getnn(cmdp->crb.csb, csb_v) == 0) {
+		++poll;
+		hwsync();
+
+		cpu_pri_low();
+
+		/* usleep(0) takes around 29000 ticks ~60 us.
+		 * 300000 is spinning for about 600 us then
+		 * start sleeping.
+		 */
+		if ((__ppc_get_timebase() - t) > USLEEP_TH) {
+			cpu_pri_default();
+			usleep(1);
+		}
+
+		if (poll > CSB_MAX_POLL)
+			break;
+
+		/* Fault address from signal handler */
+		if (nx_fault_storage_address) {
+			cpu_pri_default();
+			return -EAGAIN;
+		}
+
+	}
+
+	cpu_pri_default();
+
+	/* hw has updated csb and output buffer */
+	hwsync();
+
+	/* Check CSB flags. */
+	if (getnn(cmdp->crb.csb, csb_v) == 0) {
+		fprintf(stderr, "CSB still not valid after %d polls.\n",
+			(int) poll);
+		prt_err("CSB still not valid after %d polls, giving up.\n",
+			(int) poll);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int i, ret, retries;
+	struct nx_handle *nxhandle = handle;
+
+	assert(handle != NULL);
+	i = 0;
+	retries = 5000;
+	while (i++ < retries) {
+		hwsync();
+		vas_copy(&cmdp->crb, 0);
+		ret = vas_paste(nxhandle->paste_addr, 0);
+		hwsync();
+
+		NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
+				i, retries, ret));
+
+		if ((ret == 2) || (ret == 3)) {
+
+			ret = nx_wait_for_csb(cmdp);
+			if (!ret) {
+				goto out;
+			} else if (ret == -EAGAIN) {
+				long x;
+
+				prt_err("Touching address %p, 0x%lx\n",
+					 nx_fault_storage_address,
+					 *(long *) nx_fault_storage_address);
+				x = *(long *) nx_fault_storage_address;
+				*(long *) nx_fault_storage_address = x;
+				nx_fault_storage_address = 0;
+				continue;
+			} else {
+				prt_err("wait_for_csb() returns %d\n", ret);
+				break;
+			}
+		} else {
+			if (i < 10) {
+				/* spin for few ticks */
+#define SPIN_TH 500UL
+				uint64_t fail_spin;
+
+				fail_spin = __ppc_get_timebase();
+				while ((__ppc_get_timebase() - fail_spin) <
+					 SPIN_TH)
+					;
+			} else {
+				/* sleep */
+				unsigned int pr = 0;
+
+				if (pr++ % 100 == 0) {
+					prt_err("Paste attempt %d/", i);
+					prt_err("%d, failed pid= %d\n", retries,
+						getpid());
+				}
+				usleep(1);
+			}
+			continue;
+		}
+	}
+
+out:
+	cpu_pri_default();
+
+	return ret;
+}
-- 
2.21.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-04-08 13:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-27 18:16 [PATCH V2 0/5] selftests/powerpc: Add NX-GZIP engine testcase Raphael Moreira Zinsly
2020-03-27 18:16 ` [PATCH V2 1/5] selftests/powerpc: Add header files for GZIP engine test Raphael Moreira Zinsly
2020-03-27 18:16 ` [PATCH V2 2/5] selftests/powerpc: Add header files for NX compresion/decompression Raphael Moreira Zinsly
2020-03-27 18:16 ` [PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase Raphael Moreira Zinsly
2020-04-02  6:17   ` Daniel Axtens
2020-04-02 10:55     ` Michael Ellerman
2020-04-08 13:21     ` Raphael Moreira Zinsly
2020-03-27 18:16 ` [PATCH V2 4/5] selftests/powerpc: Add NX-GZIP engine decompress testcase Raphael Moreira Zinsly
2020-03-27 18:16 ` [PATCH V2 5/5] selftests/powerpc: Add README for GZIP engine tests Raphael Moreira Zinsly

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).