All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu
@ 2022-06-03 13:05 Zbigniew Kempczyński
  2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Zbigniew Kempczyński @ 2022-06-03 13:05 UTC (permalink / raw)
  To: igt-dev

Add crc32 calculation library on gpu (dg2).

v2: use predication instead of cond_bbe with memory access.
v3: migrate crc32 tables to separate file to avoid license clashes.

Zbigniew Kempczyński (2):
  lib/i915_crc: Introduce crc32 on gpu for DG2
  tests/api_intel_bb: Add crc32 checking test for DG2

 lib/i915/i915_crc.c         | 311 ++++++++++++++++++++++++++++++++++++
 lib/i915/i915_crc.h         |  17 ++
 lib/i915/i915_crc32_table.c | 105 ++++++++++++
 lib/intel_reg.h             |   7 +
 lib/meson.build             |   1 +
 tests/i915/api_intel_bb.c   |  67 ++++++++
 6 files changed, 508 insertions(+)
 create mode 100644 lib/i915/i915_crc.c
 create mode 100644 lib/i915/i915_crc.h
 create mode 100644 lib/i915/i915_crc32_table.c

-- 
2.32.0

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-03 13:05 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
@ 2022-06-03 13:05 ` Zbigniew Kempczyński
  2022-06-03 13:11   ` Petri Latvala
  2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 2/2] tests/api_intel_bb: Add crc32 checking test " Zbigniew Kempczyński
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Zbigniew Kempczyński @ 2022-06-03 13:05 UTC (permalink / raw)
  To: igt-dev

Adding crc32 calculation on gpu gives us new possibility to verify data
integrity without relying on trust cpu mapping is correct.

Patch introduces calculating crc32 on DG2 only. On older gens ALU
(MI_MATH) doesn't support bit-shifting instructions as well as multiply
or divide. Emulating n-bit shifts cost hundred of instructions with
predicated SRM (works on render engine only). Another limitation is lack
of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
to achieve indexed operation on memory.

Due to performance reasons (cpu crc32 calculation even on WC memory is
still much faster than on gpu, also depends on calculated object memory
region) calculation will complete in reasonable of time only for few MiB.

v2: - use registers relative to engine to allow run on all engines (Chris)
    - use predication instead of memory access to get better performance
      (Chris)
    - add location where crc32 implementation comes from (Petri)

v3: - extract crc32 table + cpu_crc32() to separate i915_crc_table.c

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
---
 lib/i915/i915_crc.c         | 311 ++++++++++++++++++++++++++++++++++++
 lib/i915/i915_crc.h         |  17 ++
 lib/i915/i915_crc32_table.c | 105 ++++++++++++
 lib/intel_reg.h             |   7 +
 lib/meson.build             |   1 +
 5 files changed, 441 insertions(+)
 create mode 100644 lib/i915/i915_crc.c
 create mode 100644 lib/i915/i915_crc.h
 create mode 100644 lib/i915/i915_crc32_table.c

diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
new file mode 100644
index 0000000000..c26a8e05b9
--- /dev/null
+++ b/lib/i915/i915_crc.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include "drmtest.h"
+#include "gem_create.h"
+#include "gem_engine_topology.h"
+#include "gem_mman.h"
+#include "i830_reg.h"
+#include "i915_drm.h"
+#include "intel_reg.h"
+#include "intel_chipset.h"
+#include "ioctl_wrappers.h"
+#include "intel_allocator.h"
+#include "i915/i915_crc.h"
+
+/* Include crc32 table + cpu_crc32() */
+#include "i915_crc32_table.c"
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* DG2+ */
+#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
+#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
+#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
+
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
+#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
+#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
+#define CS_GPR(x)                       (0x600 + 8 * (x))
+#define GPR(x)                          CS_GPR(x)
+#define R(x)                            (x)
+#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
+#define OFFSET(obj_offset, current, start) \
+	((obj_offset) + (current - start) * 4)
+
+#define MI_PREDICATE_RESULT             0x3B8
+#define WPARID                          0x21C
+#define CS_MI_ADDRESS_OFFSET            0x3B4
+
+#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
+		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
+		*bb++ = (__reg_src); \
+		*bb++ = (__reg_dst); \
+	} while (0)
+
+#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
+		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__imm1); \
+	} while (0)
+
+#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
+		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__imm1); \
+		*bb++ = (__reg) + 4; \
+		*bb++ = (__imm2); \
+	} while (0)
+
+#define LOAD_REGISTER_MEM(__reg, __offset) do { \
+		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
+		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define STORE_REGISTER_MEM(__reg, __offset) do { \
+		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
+		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define COND_BBE(__value, __offset, __condition) do { \
+		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
+		*bb++ = (__value); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
+		*bb++ = MI_MATH(4); \
+		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
+		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
+		*bb++ = (__op); \
+		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
+	} while (0)
+
+#define BBSIZE 4096
+
+/* Aliasing for easier refactoring */
+#define GPR_SIZE	GPR(0)
+#define R_SIZE		R(0)
+
+#define GPR_CRC		GPR(1)
+#define R_CRC		R(1)
+
+#define GPR_INDATA_IDX  GPR(2)
+#define R_INDATA_IDX	R(2)
+
+#define GPR_TABLE_IDX   GPR(3)
+#define R_TABLE_IDX	R(3)
+
+#define GPR_CURR_DW	GPR(4)
+#define R_CURR_DW	R(4)
+
+#define GPR_CONST_2	GPR(5)
+#define R_CONST_2	R(5)
+
+#define GPR_CONST_4	GPR(6)
+#define R_CONST_4	R(6)
+
+#define GPR_CONST_8	GPR(7)
+#define R_CONST_8	R(7)
+
+#define GPR_CONST_ff	GPR(8)
+#define R_CONST_ff	R(8)
+
+#define GPR_ffffffff    GPR(9)
+#define R_ffffffff	R(9)
+
+#define GPR_TMP_1	GPR(10)
+#define R_TMP_1		R(10)
+
+#define GPR_TMP_2	GPR(11)
+#define R_TMP_2		R(11)
+
+static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
+		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
+{
+	uint32_t *bb, *batch, *jmp;
+	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
+	const int use_64b = gen >= 8;
+	uint64_t offset;
+	uint64_t crc = USERDATA(table_offset, 0);
+
+	igt_assert(data_size % 4 == 0);
+
+	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
+					  PROT_READ | PROT_WRITE);
+	memset(batch, 0, BBSIZE);
+
+	bb = batch;
+
+	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
+	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
+	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
+	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
+	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
+	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
+	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
+	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
+
+	/* for indexed reads from memory */
+	LOAD_REGISTER_IMM32(WPARID, 1);
+
+	jmp = bb;
+
+	*bb++ = MI_SET_PREDICATE;
+	*bb++ = MI_ARB_CHECK;
+
+	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
+	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
+
+	for (int byte = 0; byte < 4; byte++) {
+		if (byte != 0)
+			MATH_4_STORE(R_CURR_DW, R_CONST_8,
+				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
+
+		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
+		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
+			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
+		MATH_4_STORE(R_CRC, R_TMP_1,
+			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
+		MATH_4_STORE(R_TMP_1, R_CONST_ff,
+			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
+		MATH_4_STORE(R_TMP_1, R_CONST_2,
+			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
+
+		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
+		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
+
+		MATH_4_STORE(R_CRC, R_CONST_8,
+			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
+		MATH_4_STORE(R_TMP_2, R_TMP_1,
+			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
+	}
+
+	/* increment data index */
+	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
+
+	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
+
+	*bb++ = MI_MATH(5);
+	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
+	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
+	*bb++ = MI_MATH_SUB;
+	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
+	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
+	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
+
+	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
+	offset = OFFSET(bb_offset, jmp, batch);
+	*bb++ = offset;
+	*bb++ = offset >> 32;
+
+	*bb++ = MI_SET_PREDICATE;
+
+	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
+	STORE_REGISTER_MEM(GPR_TMP_1, crc);
+
+	*bb++ = MI_BATCH_BUFFER_END;
+
+	gem_munmap(batch, BBSIZE);
+}
+
+uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
+		    const struct intel_execution_engine2 *e,
+		    uint32_t data_handle, uint32_t data_size)
+{
+	struct drm_i915_gem_execbuffer2 execbuf = {};
+	struct drm_i915_gem_exec_object2 obj[3] = {};
+	uint64_t bb_offset, table_offset, data_offset;
+	uint32_t bb, table, crc, table_size = 4096;
+	uint32_t *ptr;
+
+	igt_assert(data_size % 4 == 0);
+
+	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
+	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
+
+	table_offset = get_offset(ahnd, table, table_size, 0);
+	data_offset = get_offset(ahnd, data_handle, data_size, 0);
+
+	obj[0].offset = table_offset;
+	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+	obj[0].handle = table;
+
+	obj[1].offset = data_offset;
+	obj[1].flags = EXEC_OBJECT_PINNED;
+	obj[1].handle = data_handle;
+
+	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
+	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
+	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
+	obj[2].offset = bb_offset;
+	obj[2].flags = EXEC_OBJECT_PINNED;
+	obj[2].handle = bb;
+	execbuf.buffer_count = 3;
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.flags = e->flags;
+	execbuf.rsvd1 = ctx->id;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, table);
+
+	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
+	crc = ptr[0x100];
+	gem_munmap(ptr, table_size);
+	gem_close(i915, table);
+	gem_close(i915, bb);
+
+	return crc;
+}
+
+bool supports_gpu_crc32(int i915)
+{
+	uint16_t devid = intel_get_drm_devid(i915);
+
+	return IS_DG2(devid);
+}
diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
new file mode 100644
index 0000000000..bb0195e2a8
--- /dev/null
+++ b/lib/i915/i915_crc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _I915_CRC_H_
+#define _I915_CRC_H_
+
+#include <stdint.h>
+#include "intel_ctx.h"
+
+uint32_t cpu_crc32(const void *buf, size_t size);
+uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
+		    const struct intel_execution_engine2 *e,
+		    uint32_t data_handle, uint32_t data_size);
+bool supports_gpu_crc32(int i915);
+
+#endif /* _I915_CRC_ */
diff --git a/lib/i915/i915_crc32_table.c b/lib/i915/i915_crc32_table.c
new file mode 100644
index 0000000000..eca5e43218
--- /dev/null
+++ b/lib/i915/i915_crc32_table.c
@@ -0,0 +1,105 @@
+/*-
+ *  COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or
+ *  code or tables extracted from it, as desired without restriction.
+ */
+
+/*
+ *  First, the polynomial itself and its table of feedback terms.  The
+ *  polynomial is
+ *  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
+ *
+ *  Note that we take it "backwards" and put the highest-order term in
+ *  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
+ *  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
+ *  the MSB being 1
+ *
+ *  Note that the usual hardware shift register implementation, which
+ *  is what we're using (we're merely optimizing it by doing eight-bit
+ *  chunks at a time) shifts bits into the lowest-order term.  In our
+ *  implementation, that means shifting towards the right.  Why do we
+ *  do it this way?  Because the calculated CRC must be transmitted in
+ *  order from highest-order term to lowest-order term.  UARTs transmit
+ *  characters in order from LSB to MSB.  By storing the CRC this way
+ *  we hand it to the UART in the order low-byte to high-byte; the UART
+ *  sends each low-bit to hight-bit; and the result is transmission bit
+ *  by bit from highest- to lowest-order term without requiring any bit
+ *  shuffling on our part.  Reception works similarly
+ *
+ *  The feedback terms table consists of 256, 32-bit entries.  Notes
+ *
+ *      The table can be generated at runtime if desired; code to do so
+ *      is shown later.  It might not be obvious, but the feedback
+ *      terms simply represent the results of eight shift/xor opera
+ *      tions for all combinations of data and CRC register values
+ *
+ *      The values must be right-shifted by eight bits by the "updcrc
+ *      logic; the shift must be unsigned (bring in zeroes).  On some
+ *      hardware you could probably optimize the shift in assembler by
+ *      using byte-swap instructions
+ *      polynomial $edb88320
+ *
+ *
+ * CRC32 code derived from work by Gary S. Brown.
+ */
+
+#include <stdint.h>
+
+const uint32_t crc32_tab[] = {
+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
+	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
+	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
+	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+uint32_t cpu_crc32(const void *buf, size_t size)
+{
+
+	const uint8_t *p = buf;
+	uint32_t crc;
+
+	crc = ~0U;
+
+	while (size--)
+		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+
+	return crc ^ ~0U;
+}
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index cb62728896..fff32e1816 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
 #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
 #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
+#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
 
 /* Flush */
 #define MI_FLUSH			(0x04<<23)
@@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_BATCH_BUFFER_END	(0xA << 23)
 #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
 #define MI_DO_COMPARE                   (1 << 21)
+#define MAD_GT_IDD			(0 << 12)
+#define MAD_GT_OR_EQ_IDD		(1 << 12)
+#define MAD_LT_IDD			(2 << 12)
+#define MAD_LT_OR_EQ_IDD		(3 << 12)
+#define MAD_EQ_IDD			(4 << 12)
+#define MAD_NEQ_IDD			(5 << 12)
 
 #define MI_BATCH_NON_SECURE		(1)
 #define MI_BATCH_NON_SECURE_I965	(1 << 8)
diff --git a/lib/meson.build b/lib/meson.build
index 0a173c1fc6..b05198ecc9 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -10,6 +10,7 @@ lib_sources = [
 	'i915/gem_ring.c',
 	'i915/gem_mman.c',
 	'i915/gem_vm.c',
+	'i915/i915_crc.c',
 	'i915/intel_memory_region.c',
 	'i915/intel_mocs.c',
 	'i915/i915_blt.c',
-- 
2.32.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [igt-dev] [PATCH i-g-t 2/2] tests/api_intel_bb: Add crc32 checking test for DG2
  2022-06-03 13:05 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
  2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
@ 2022-06-03 13:05 ` Zbigniew Kempczyński
  2022-06-06 14:47   ` Kamil Konieczny
  2022-06-03 13:36 ` [igt-dev] ✗ GitLab.Pipeline: warning for Add crc32 calculation on dg2 gpu (rev3) Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Zbigniew Kempczyński @ 2022-06-03 13:05 UTC (permalink / raw)
  To: igt-dev

Add simple test which compares crc32 sums and calculation times on cpu
and gpu.

v2: convert to dynamic
v3: add assert when cpu_crc != gpu_crc

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
---
 tests/i915/api_intel_bb.c | 67 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/tests/i915/api_intel_bb.c b/tests/i915/api_intel_bb.c
index 92f44cecf4..3213a1df75 100644
--- a/tests/i915/api_intel_bb.c
+++ b/tests/i915/api_intel_bb.c
@@ -38,6 +38,7 @@
 #include <zlib.h>
 #include "intel_bufops.h"
 #include "i915/gem_vm.h"
+#include "i915/i915_crc.h"
 
 #define PAGE_SIZE 4096
 
@@ -1395,6 +1396,57 @@ static void render_ccs(struct buf_ops *bops)
 	igt_assert_f(fails == 0, "render-ccs fails: %d\n", fails);
 }
 
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec)) / 1e6;
+}
+
+static void test_crc32(int i915, const intel_ctx_t *ctx,
+		       const struct intel_execution_engine2 *e,
+		       struct drm_i915_gem_memory_class_instance *r)
+{
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
+	uint32_t data, *ptr;
+
+	uint32_t region = INTEL_MEMORY_REGION_ID(r->memory_class,
+						 r->memory_instance);
+
+	igt_info("[engine: %s, region: %s]\n", e->name,
+		 region == REGION_SMEM ? "smem" : "lmem");
+	for (int i = 12; i < 21; i++) {
+		struct timeval start, end;
+		uint64_t size = 1 << i;
+		uint32_t cpu_crc, gpu_crc;
+
+		double cpu_time, gpu_time;
+
+		data = gem_create_in_memory_regions(i915, size, region);
+		ptr = gem_mmap__device_coherent(i915, data, 0, size, PROT_WRITE);
+		for (int j = 0; j < size / sizeof(*ptr); j++)
+			ptr[j] = j;
+
+		gettimeofday(&start, NULL);
+		cpu_crc = cpu_crc32(ptr, size);
+		gettimeofday(&end, NULL);
+		cpu_time = elapsed(&start, &end);
+
+		gettimeofday(&start, NULL);
+		gpu_crc = i915_crc32(i915, ahnd, ctx, e, data, size);
+		gettimeofday(&end, NULL);
+		gpu_time = elapsed(&start, &end);
+		igt_info("size: %10lld, cpu crc: 0x%08x (time: %.3f), "
+				 "gpu crc: 0x%08x (time: %.3f) [ %s ]\n",
+			 (long long) size, cpu_crc, cpu_time, gpu_crc, gpu_time,
+			 cpu_crc == gpu_crc ? "EQUAL" : "DIFFERENT");
+		munmap(ptr, size);
+		gem_close(i915, data);
+		igt_assert(cpu_crc == gpu_crc);
+	}
+
+	put_ahnd(ahnd);
+}
+
 static int opt_handler(int opt, int opt_index, void *data)
 {
 	switch (opt) {
@@ -1552,6 +1604,21 @@ igt_main_args("dpib", NULL, help_str, opt_handler, NULL)
 	igt_subtest("render-ccs")
 		render_ccs(bops);
 
+	igt_subtest_with_dynamic_f("crc32") {
+		const intel_ctx_t *ctx;
+		const struct intel_execution_engine2 *e;
+
+		igt_require(supports_gpu_crc32(i915));
+
+		ctx = intel_ctx_create_all_physical(i915);
+		for_each_ctx_engine(i915, ctx, e) {
+			for_each_memory_region(r, i915) {
+				igt_dynamic_f("%s-%s", e->name, r->name)
+					test_crc32(i915, ctx, e, &r->ci);
+			}
+		}
+	}
+
 	igt_fixture {
 		buf_ops_destroy(bops);
 		close(i915);
-- 
2.32.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
@ 2022-06-03 13:11   ` Petri Latvala
  2022-06-06  6:33     ` Zbigniew Kempczyński
  0 siblings, 1 reply; 16+ messages in thread
From: Petri Latvala @ 2022-06-03 13:11 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

On Fri, Jun 03, 2022 at 03:05:01PM +0200, Zbigniew Kempczyński wrote:
> Adding crc32 calculation on gpu gives us new possibility to verify data
> integrity without relying on trust cpu mapping is correct.
> 
> Patch introduces calculating crc32 on DG2 only. On older gens ALU
> (MI_MATH) doesn't support bit-shifting instructions as well as multiply
> or divide. Emulating n-bit shifts cost hundred of instructions with
> predicated SRM (works on render engine only). Another limitation is lack
> of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
> to achieve indexed operation on memory.
> 
> Due to performance reasons (cpu crc32 calculation even on WC memory is
> still much faster than on gpu, also depends on calculated object memory
> region) calculation will complete in reasonable of time only for few MiB.
> 
> v2: - use registers relative to engine to allow run on all engines (Chris)
>     - use predication instead of memory access to get better performance
>       (Chris)
>     - add location where crc32 implementation comes from (Petri)
> 
> v3: - extract crc32 table + cpu_crc32() to separate i915_crc_table.c
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> ---
>  lib/i915/i915_crc.c         | 311 ++++++++++++++++++++++++++++++++++++
>  lib/i915/i915_crc.h         |  17 ++
>  lib/i915/i915_crc32_table.c | 105 ++++++++++++
>  lib/intel_reg.h             |   7 +
>  lib/meson.build             |   1 +
>  5 files changed, 441 insertions(+)
>  create mode 100644 lib/i915/i915_crc.c
>  create mode 100644 lib/i915/i915_crc.h
>  create mode 100644 lib/i915/i915_crc32_table.c
> 
> diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
> new file mode 100644
> index 0000000000..c26a8e05b9
> --- /dev/null
> +++ b/lib/i915/i915_crc.c
> @@ -0,0 +1,311 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include "drmtest.h"
> +#include "gem_create.h"
> +#include "gem_engine_topology.h"
> +#include "gem_mman.h"
> +#include "i830_reg.h"
> +#include "i915_drm.h"
> +#include "intel_reg.h"
> +#include "intel_chipset.h"
> +#include "ioctl_wrappers.h"
> +#include "intel_allocator.h"
> +#include "i915/i915_crc.h"
> +
> +/* Include crc32 table + cpu_crc32() */
> +#include "i915_crc32_table.c"

#including .c files is ugly. Can that be a header file with
statics/inlines instead?

That said, it also isn't i915-specific anymore but that's not a
blocker for merging the code at this time.


-- 
Petri Latvala


> +
> +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> +
> +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> +/* Opcodes for MI_MATH_INSTR */
> +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> +/* DG2+ */
> +#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
> +#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
> +#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
> +
> +/* Registers used as operands in MI_MATH_INSTR */
> +#define   MI_MATH_REG(x)                (x)
> +#define   MI_MATH_REG_SRCA              0x20
> +#define   MI_MATH_REG_SRCB              0x21
> +#define   MI_MATH_REG_ACCU              0x31
> +#define   MI_MATH_REG_ZF                0x32
> +#define   MI_MATH_REG_CF                0x33
> +
> +#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
> +#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
> +#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
> +#define CS_GPR(x)                       (0x600 + 8 * (x))
> +#define GPR(x)                          CS_GPR(x)
> +#define R(x)                            (x)
> +#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
> +#define OFFSET(obj_offset, current, start) \
> +	((obj_offset) + (current - start) * 4)
> +
> +#define MI_PREDICATE_RESULT             0x3B8
> +#define WPARID                          0x21C
> +#define CS_MI_ADDRESS_OFFSET            0x3B4
> +
> +#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
> +		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
> +		*bb++ = (__reg_src); \
> +		*bb++ = (__reg_dst); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
> +		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__imm1); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
> +		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__imm1); \
> +		*bb++ = (__reg) + 4; \
> +		*bb++ = (__imm2); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_MEM(__reg, __offset) do { \
> +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
> +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define STORE_REGISTER_MEM(__reg, __offset) do { \
> +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
> +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define COND_BBE(__value, __offset, __condition) do { \
> +		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
> +		*bb++ = (__value); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
> +		*bb++ = MI_MATH(4); \
> +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
> +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
> +		*bb++ = (__op); \
> +		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
> +	} while (0)
> +
> +#define BBSIZE 4096
> +
> +/* Aliasing for easier refactoring */
> +#define GPR_SIZE	GPR(0)
> +#define R_SIZE		R(0)
> +
> +#define GPR_CRC		GPR(1)
> +#define R_CRC		R(1)
> +
> +#define GPR_INDATA_IDX  GPR(2)
> +#define R_INDATA_IDX	R(2)
> +
> +#define GPR_TABLE_IDX   GPR(3)
> +#define R_TABLE_IDX	R(3)
> +
> +#define GPR_CURR_DW	GPR(4)
> +#define R_CURR_DW	R(4)
> +
> +#define GPR_CONST_2	GPR(5)
> +#define R_CONST_2	R(5)
> +
> +#define GPR_CONST_4	GPR(6)
> +#define R_CONST_4	R(6)
> +
> +#define GPR_CONST_8	GPR(7)
> +#define R_CONST_8	R(7)
> +
> +#define GPR_CONST_ff	GPR(8)
> +#define R_CONST_ff	R(8)
> +
> +#define GPR_ffffffff    GPR(9)
> +#define R_ffffffff	R(9)
> +
> +#define GPR_TMP_1	GPR(10)
> +#define R_TMP_1		R(10)
> +
> +#define GPR_TMP_2	GPR(11)
> +#define R_TMP_2		R(11)
> +
> +static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
> +		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
> +{
> +	uint32_t *bb, *batch, *jmp;
> +	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
> +	const int use_64b = gen >= 8;
> +	uint64_t offset;
> +	uint64_t crc = USERDATA(table_offset, 0);
> +
> +	igt_assert(data_size % 4 == 0);
> +
> +	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
> +					  PROT_READ | PROT_WRITE);
> +	memset(batch, 0, BBSIZE);
> +
> +	bb = batch;
> +
> +	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
> +	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
> +	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
> +	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
> +	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
> +
> +	/* for indexed reads from memory */
> +	LOAD_REGISTER_IMM32(WPARID, 1);
> +
> +	jmp = bb;
> +
> +	*bb++ = MI_SET_PREDICATE;
> +	*bb++ = MI_ARB_CHECK;
> +
> +	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
> +	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
> +
> +	for (int byte = 0; byte < 4; byte++) {
> +		if (byte != 0)
> +			MATH_4_STORE(R_CURR_DW, R_CONST_8,
> +				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
> +
> +		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
> +		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
> +			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
> +		MATH_4_STORE(R_CRC, R_TMP_1,
> +			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
> +		MATH_4_STORE(R_TMP_1, R_CONST_ff,
> +			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
> +		MATH_4_STORE(R_TMP_1, R_CONST_2,
> +			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
> +
> +		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
> +		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
> +
> +		MATH_4_STORE(R_CRC, R_CONST_8,
> +			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
> +		MATH_4_STORE(R_TMP_2, R_TMP_1,
> +			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
> +	}
> +
> +	/* increment data index */
> +	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
> +
> +	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
> +
> +	*bb++ = MI_MATH(5);
> +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
> +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
> +	*bb++ = MI_MATH_SUB;
> +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
> +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
> +	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
> +
> +	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
> +	offset = OFFSET(bb_offset, jmp, batch);
> +	*bb++ = offset;
> +	*bb++ = offset >> 32;
> +
> +	*bb++ = MI_SET_PREDICATE;
> +
> +	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
> +	STORE_REGISTER_MEM(GPR_TMP_1, crc);
> +
> +	*bb++ = MI_BATCH_BUFFER_END;
> +
> +	gem_munmap(batch, BBSIZE);
> +}
> +
> +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> +		    const struct intel_execution_engine2 *e,
> +		    uint32_t data_handle, uint32_t data_size)
> +{
> +	struct drm_i915_gem_execbuffer2 execbuf = {};
> +	struct drm_i915_gem_exec_object2 obj[3] = {};
> +	uint64_t bb_offset, table_offset, data_offset;
> +	uint32_t bb, table, crc, table_size = 4096;
> +	uint32_t *ptr;
> +
> +	igt_assert(data_size % 4 == 0);
> +
> +	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
> +	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
> +
> +	table_offset = get_offset(ahnd, table, table_size, 0);
> +	data_offset = get_offset(ahnd, data_handle, data_size, 0);
> +
> +	obj[0].offset = table_offset;
> +	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
> +	obj[0].handle = table;
> +
> +	obj[1].offset = data_offset;
> +	obj[1].flags = EXEC_OBJECT_PINNED;
> +	obj[1].handle = data_handle;
> +
> +	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
> +	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
> +	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
> +	obj[2].offset = bb_offset;
> +	obj[2].flags = EXEC_OBJECT_PINNED;
> +	obj[2].handle = bb;
> +	execbuf.buffer_count = 3;
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.flags = e->flags;
> +	execbuf.rsvd1 = ctx->id;
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, table);
> +
> +	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
> +	crc = ptr[0x100];
> +	gem_munmap(ptr, table_size);
> +	gem_close(i915, table);
> +	gem_close(i915, bb);
> +
> +	return crc;
> +}
> +
> +bool supports_gpu_crc32(int i915)
> +{
> +	uint16_t devid = intel_get_drm_devid(i915);
> +
> +	return IS_DG2(devid);
> +}
> diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
> new file mode 100644
> index 0000000000..bb0195e2a8
> --- /dev/null
> +++ b/lib/i915/i915_crc.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +#ifndef _I915_CRC_H_
> +#define _I915_CRC_H_
> +
> +#include <stdint.h>
> +#include "intel_ctx.h"
> +
> +uint32_t cpu_crc32(const void *buf, size_t size);
> +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> +		    const struct intel_execution_engine2 *e,
> +		    uint32_t data_handle, uint32_t data_size);
> +bool supports_gpu_crc32(int i915);
> +
> +#endif /* _I915_CRC_ */
> diff --git a/lib/i915/i915_crc32_table.c b/lib/i915/i915_crc32_table.c
> new file mode 100644
> index 0000000000..eca5e43218
> --- /dev/null
> +++ b/lib/i915/i915_crc32_table.c
> @@ -0,0 +1,105 @@
> +/*-
> + *  COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or
> + *  code or tables extracted from it, as desired without restriction.
> + */
> +
> +/*
> + *  First, the polynomial itself and its table of feedback terms.  The
> + *  polynomial is
> + *  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
> + *
> + *  Note that we take it "backwards" and put the highest-order term in
> + *  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
> + *  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
> + *  the MSB being 1
> + *
> + *  Note that the usual hardware shift register implementation, which
> + *  is what we're using (we're merely optimizing it by doing eight-bit
> + *  chunks at a time) shifts bits into the lowest-order term.  In our
> + *  implementation, that means shifting towards the right.  Why do we
> + *  do it this way?  Because the calculated CRC must be transmitted in
> + *  order from highest-order term to lowest-order term.  UARTs transmit
> + *  characters in order from LSB to MSB.  By storing the CRC this way
> + *  we hand it to the UART in the order low-byte to high-byte; the UART
> + *  sends each low-bit to hight-bit; and the result is transmission bit
> + *  by bit from highest- to lowest-order term without requiring any bit
> + *  shuffling on our part.  Reception works similarly
> + *
> + *  The feedback terms table consists of 256, 32-bit entries.  Notes
> + *
> + *      The table can be generated at runtime if desired; code to do so
> + *      is shown later.  It might not be obvious, but the feedback
> + *      terms simply represent the results of eight shift/xor opera
> + *      tions for all combinations of data and CRC register values
> + *
> + *      The values must be right-shifted by eight bits by the "updcrc
> + *      logic; the shift must be unsigned (bring in zeroes).  On some
> + *      hardware you could probably optimize the shift in assembler by
> + *      using byte-swap instructions
> + *      polynomial $edb88320
> + *
> + *
> + * CRC32 code derived from work by Gary S. Brown.
> + */
> +
> +#include <stdint.h>
> +
> +const uint32_t crc32_tab[] = {
> +	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> +	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> +	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> +	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> +	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
> +	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> +	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
> +	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> +	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> +	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> +	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
> +	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> +	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> +	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> +	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> +	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> +	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> +	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> +	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> +	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> +	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> +	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> +	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> +	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> +	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> +	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> +	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> +	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> +	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> +	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> +	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> +	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> +	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> +	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> +	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> +	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> +	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> +	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> +	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> +	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> +	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> +	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> +	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> +};
> +
> +uint32_t cpu_crc32(const void *buf, size_t size)
> +{
> +
> +	const uint8_t *p = buf;
> +	uint32_t crc;
> +
> +	crc = ~0U;
> +
> +	while (size--)
> +		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> +
> +	return crc ^ ~0U;
> +}
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index cb62728896..fff32e1816 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
>  #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
>  #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
> +#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
>  
>  /* Flush */
>  #define MI_FLUSH			(0x04<<23)
> @@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define MI_BATCH_BUFFER_END	(0xA << 23)
>  #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
>  #define MI_DO_COMPARE                   (1 << 21)
> +#define MAD_GT_IDD			(0 << 12)
> +#define MAD_GT_OR_EQ_IDD		(1 << 12)
> +#define MAD_LT_IDD			(2 << 12)
> +#define MAD_LT_OR_EQ_IDD		(3 << 12)
> +#define MAD_EQ_IDD			(4 << 12)
> +#define MAD_NEQ_IDD			(5 << 12)
>  
>  #define MI_BATCH_NON_SECURE		(1)
>  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> diff --git a/lib/meson.build b/lib/meson.build
> index 0a173c1fc6..b05198ecc9 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -10,6 +10,7 @@ lib_sources = [
>  	'i915/gem_ring.c',
>  	'i915/gem_mman.c',
>  	'i915/gem_vm.c',
> +	'i915/i915_crc.c',
>  	'i915/intel_memory_region.c',
>  	'i915/intel_mocs.c',
>  	'i915/i915_blt.c',
> -- 
> 2.32.0
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [igt-dev] ✗ GitLab.Pipeline: warning for Add crc32 calculation on dg2 gpu (rev3)
  2022-06-03 13:05 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
  2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
  2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 2/2] tests/api_intel_bb: Add crc32 checking test " Zbigniew Kempczyński
@ 2022-06-03 13:36 ` Patchwork
  2022-06-03 19:07 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
  2022-06-03 21:44 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  4 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2022-06-03 13:36 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

== Series Details ==

Series: Add crc32 calculation on dg2 gpu (rev3)
URL   : https://patchwork.freedesktop.org/series/104657/
State : warning

== Summary ==

Pipeline status: FAILED.

see https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/-/pipelines/603427 for the overview.

test:ninja-test-mips has failed (https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/-/jobs/23526853):
  Ok:                   22
  Expected Fail:         3
  Fail:                289
  Unexpected Pass:       0
  Skipped:               0
  Timeout:               0
  
  Full log written to /builds/gfx-ci/igt-ci-tags/build/meson-logs/testlog.txt
  section_end:1654263217:step_script
  section_start:1654263217:upload_artifacts_on_failure
  Uploading artifacts for failed job
  Uploading artifacts...
  build: found 1746 matching files and directories   
  Uploading artifacts as "archive" to coordinator... 201 Created  id=23526853 responseStatus=201 Created token=8WfQxLz1
  section_end:1654263227:upload_artifacts_on_failure
  section_start:1654263227:cleanup_file_variables
  Cleaning up project directory and file based variables
  section_end:1654263227:cleanup_file_variables
  ERROR: Job failed: exit code 1

== Logs ==

For more details see: https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/-/pipelines/603427

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for Add crc32 calculation on dg2 gpu (rev3)
  2022-06-03 13:05 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
                   ` (2 preceding siblings ...)
  2022-06-03 13:36 ` [igt-dev] ✗ GitLab.Pipeline: warning for Add crc32 calculation on dg2 gpu (rev3) Patchwork
@ 2022-06-03 19:07 ` Patchwork
  2022-06-03 21:44 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  4 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2022-06-03 19:07 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 9757 bytes --]

== Series Details ==

Series: Add crc32 calculation on dg2 gpu (rev3)
URL   : https://patchwork.freedesktop.org/series/104657/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11726 -> IGTPW_7228
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/index.html

Participating hosts (43 -> 46)
------------------------------

  Additional (4): bat-adln-1 fi-kbl-x1275 bat-dg2-9 bat-atsm-1 
  Missing    (1): bat-dg2-8 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_7228:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_frontbuffer_tracking@basic:
    - {bat-adln-1}:       NOTRUN -> [SKIP][1] +19 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/bat-adln-1/igt@kms_frontbuffer_tracking@basic.html

  
Known issues
------------

  Here are the changes found in IGTPW_7228 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_huc_copy@huc-copy:
    - fi-kbl-x1275:       NOTRUN -> [SKIP][2] ([fdo#109271] / [i915#2190])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-kbl-x1275/igt@gem_huc_copy@huc-copy.html

  * igt@gem_lmem_swapping@verify-random:
    - fi-kbl-x1275:       NOTRUN -> [SKIP][3] ([fdo#109271] / [i915#4613]) +3 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-kbl-x1275/igt@gem_lmem_swapping@verify-random.html

  * igt@i915_selftest@live@gt_engines:
    - bat-dg1-5:          [PASS][4] -> [INCOMPLETE][5] ([i915#4418])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/bat-dg1-5/igt@i915_selftest@live@gt_engines.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/bat-dg1-5/igt@i915_selftest@live@gt_engines.html

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-kbl-8809g:       [PASS][6] -> [DMESG-FAIL][7] ([i915#5334])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/fi-kbl-8809g/igt@i915_selftest@live@gt_heartbeat.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-kbl-8809g/igt@i915_selftest@live@gt_heartbeat.html

  * igt@i915_selftest@live@hangcheck:
    - fi-hsw-4770:        [PASS][8] -> [INCOMPLETE][9] ([i915#4785])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/fi-hsw-4770/igt@i915_selftest@live@hangcheck.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-hsw-4770/igt@i915_selftest@live@hangcheck.html
    - bat-dg1-6:          [PASS][10] -> [DMESG-FAIL][11] ([i915#4494] / [i915#4957])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/bat-dg1-6/igt@i915_selftest@live@hangcheck.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/bat-dg1-6/igt@i915_selftest@live@hangcheck.html

  * igt@i915_selftest@live@requests:
    - fi-blb-e6850:       [PASS][12] -> [DMESG-FAIL][13] ([i915#4528])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/fi-blb-e6850/igt@i915_selftest@live@requests.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-blb-e6850/igt@i915_selftest@live@requests.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-rkl-11600:       NOTRUN -> [SKIP][14] ([fdo#111827])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-rkl-11600/igt@kms_chamelium@common-hpd-after-suspend.html
    - fi-pnv-d510:        NOTRUN -> [SKIP][15] ([fdo#109271])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-pnv-d510/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_chamelium@dp-hpd-fast:
    - fi-kbl-x1275:       NOTRUN -> [SKIP][16] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-kbl-x1275/igt@kms_chamelium@dp-hpd-fast.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-kbl-x1275:       NOTRUN -> [SKIP][17] ([fdo#109271] / [i915#533])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-kbl-x1275/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@prime_vgem@basic-userptr:
    - fi-kbl-x1275:       NOTRUN -> [SKIP][18] ([fdo#109271]) +12 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-kbl-x1275/igt@prime_vgem@basic-userptr.html

  * igt@runner@aborted:
    - fi-hsw-4770:        NOTRUN -> [FAIL][19] ([fdo#109271] / [i915#4312] / [i915#5594])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-hsw-4770/igt@runner@aborted.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@requests:
    - fi-pnv-d510:        [DMESG-FAIL][20] ([i915#4528]) -> [PASS][21]
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/fi-pnv-d510/igt@i915_selftest@live@requests.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-pnv-d510/igt@i915_selftest@live@requests.html

  * igt@i915_suspend@basic-s3-without-i915:
    - fi-rkl-11600:       [INCOMPLETE][22] ([i915#5982]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/fi-rkl-11600/igt@i915_suspend@basic-s3-without-i915.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/fi-rkl-11600/igt@i915_suspend@basic-s3-without-i915.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1155]: https://gitlab.freedesktop.org/drm/intel/issues/1155
  [i915#1836]: https://gitlab.freedesktop.org/drm/intel/issues/1836
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3291]: https://gitlab.freedesktop.org/drm/intel/issues/3291
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3595]: https://gitlab.freedesktop.org/drm/intel/issues/3595
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4212]: https://gitlab.freedesktop.org/drm/intel/issues/4212
  [i915#4213]: https://gitlab.freedesktop.org/drm/intel/issues/4213
  [i915#4215]: https://gitlab.freedesktop.org/drm/intel/issues/4215
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4418]: https://gitlab.freedesktop.org/drm/intel/issues/4418
  [i915#4494]: https://gitlab.freedesktop.org/drm/intel/issues/4494
  [i915#4528]: https://gitlab.freedesktop.org/drm/intel/issues/4528
  [i915#4579]: https://gitlab.freedesktop.org/drm/intel/issues/4579
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4785]: https://gitlab.freedesktop.org/drm/intel/issues/4785
  [i915#4873]: https://gitlab.freedesktop.org/drm/intel/issues/4873
  [i915#4957]: https://gitlab.freedesktop.org/drm/intel/issues/4957
  [i915#5174]: https://gitlab.freedesktop.org/drm/intel/issues/5174
  [i915#5190]: https://gitlab.freedesktop.org/drm/intel/issues/5190
  [i915#5274]: https://gitlab.freedesktop.org/drm/intel/issues/5274
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5334]: https://gitlab.freedesktop.org/drm/intel/issues/5334
  [i915#5594]: https://gitlab.freedesktop.org/drm/intel/issues/5594
  [i915#5763]: https://gitlab.freedesktop.org/drm/intel/issues/5763
  [i915#5874]: https://gitlab.freedesktop.org/drm/intel/issues/5874
  [i915#5885]: https://gitlab.freedesktop.org/drm/intel/issues/5885
  [i915#5903]: https://gitlab.freedesktop.org/drm/intel/issues/5903
  [i915#5982]: https://gitlab.freedesktop.org/drm/intel/issues/5982
  [i915#6077]: https://gitlab.freedesktop.org/drm/intel/issues/6077
  [i915#6092]: https://gitlab.freedesktop.org/drm/intel/issues/6092
  [i915#6093]: https://gitlab.freedesktop.org/drm/intel/issues/6093
  [i915#6094]: https://gitlab.freedesktop.org/drm/intel/issues/6094
  [i915#6099]: https://gitlab.freedesktop.org/drm/intel/issues/6099
  [i915#6133]: https://gitlab.freedesktop.org/drm/intel/issues/6133
  [i915#6134]: https://gitlab.freedesktop.org/drm/intel/issues/6134
  [i915#6135]: https://gitlab.freedesktop.org/drm/intel/issues/6135
  [i915#6136]: https://gitlab.freedesktop.org/drm/intel/issues/6136
  [i915#6137]: https://gitlab.freedesktop.org/drm/intel/issues/6137
  [i915#6138]: https://gitlab.freedesktop.org/drm/intel/issues/6138
  [i915#6166]: https://gitlab.freedesktop.org/drm/intel/issues/6166


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_6505 -> IGTPW_7228

  CI-20190529: 20190529
  CI_DRM_11726: 74ea63ad8ffc71b8345436147517253dcabd4793 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_7228: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/index.html
  IGT_6505: edb1a467fb622b23b927e28ff603fa43851fea97 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git


Testlist changes
----------------

+igt@api_intel_bb@crc32

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/index.html

[-- Attachment #2: Type: text/html, Size: 8553 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for Add crc32 calculation on dg2 gpu (rev3)
  2022-06-03 13:05 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
                   ` (3 preceding siblings ...)
  2022-06-03 19:07 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
@ 2022-06-03 21:44 ` Patchwork
  4 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2022-06-03 21:44 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 60051 bytes --]

== Series Details ==

Series: Add crc32 calculation on dg2 gpu (rev3)
URL   : https://patchwork.freedesktop.org/series/104657/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11726_full -> IGTPW_7228_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_7228_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_7228_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/index.html

Participating hosts (13 -> 10)
------------------------------

  Missing    (3): pig-skl-6260u pig-kbl-iris pig-glk-j5005 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_7228_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@api_intel_bb@crc32} (NEW):
    - shard-tglb:         NOTRUN -> [SKIP][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb3/igt@api_intel_bb@crc32.html
    - shard-iclb:         NOTRUN -> [SKIP][2]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb3/igt@api_intel_bb@crc32.html
    - {shard-rkl}:        NOTRUN -> [SKIP][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-4/igt@api_intel_bb@crc32.html
    - {shard-dg1}:        NOTRUN -> [SKIP][4]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-dg1-17/igt@api_intel_bb@crc32.html
    - {shard-tglu}:       NOTRUN -> [SKIP][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglu-2/igt@api_intel_bb@crc32.html

  * igt@kms_hdr@bpc-switch@pipe-a-dp-1:
    - shard-kbl:          NOTRUN -> [FAIL][6]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl7/igt@kms_hdr@bpc-switch@pipe-a-dp-1.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_suspend@forcewake:
    - {shard-tglu}:       [PASS][7] -> [DMESG-WARN][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-tglu-5/igt@i915_suspend@forcewake.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglu-6/igt@i915_suspend@forcewake.html

  
New tests
---------

  New tests have been introduced between CI_DRM_11726_full and IGTPW_7228_full:

### New IGT tests (1) ###

  * igt@api_intel_bb@crc32:
    - Statuses : 9 skip(s)
    - Exec time: [0.0] s

  

Known issues
------------

  Here are the changes found in IGTPW_7228_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_persistence@legacy-engines-mixed-process:
    - shard-snb:          NOTRUN -> [SKIP][9] ([fdo#109271] / [i915#1099]) +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-snb6/igt@gem_ctx_persistence@legacy-engines-mixed-process.html

  * igt@gem_eio@kms:
    - shard-tglb:         NOTRUN -> [FAIL][10] ([i915#5784])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@gem_eio@kms.html

  * igt@gem_eio@unwedge-stress:
    - shard-iclb:         [PASS][11] -> [TIMEOUT][12] ([i915#3070])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb4/igt@gem_eio@unwedge-stress.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb6/igt@gem_eio@unwedge-stress.html

  * igt@gem_exec_balancer@parallel-bb-first:
    - shard-iclb:         [PASS][13] -> [SKIP][14] ([i915#4525])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb1/igt@gem_exec_balancer@parallel-bb-first.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb5/igt@gem_exec_balancer@parallel-bb-first.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-kbl:          NOTRUN -> [FAIL][15] ([i915#6141])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@gem_exec_fair@basic-deadline.html
    - shard-apl:          NOTRUN -> [FAIL][16] ([i915#6141])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl2/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-apl:          NOTRUN -> [FAIL][17] ([i915#2842])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl8/igt@gem_exec_fair@basic-pace-share@rcs0.html
    - shard-tglb:         [PASS][18] -> [FAIL][19] ([i915#2842])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-tglb7/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb7/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-iclb:         [PASS][20] -> [FAIL][21] ([i915#2842]) +1 similar issue
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb8/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
    - shard-iclb:         NOTRUN -> [FAIL][22] ([i915#2842])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb4/igt@gem_exec_fair@basic-pace@vcs1.html

  * igt@gem_exec_fair@basic-pace@vecs0:
    - shard-kbl:          [PASS][23] -> [FAIL][24] ([i915#2842]) +2 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-kbl1/igt@gem_exec_fair@basic-pace@vecs0.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl1/igt@gem_exec_fair@basic-pace@vecs0.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-glk:          NOTRUN -> [FAIL][25] ([i915#2842])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk4/igt@gem_exec_fair@basic-throttle@rcs0.html
    - shard-iclb:         NOTRUN -> [FAIL][26] ([i915#2849])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@gem_exec_fair@basic-throttle@rcs0.html
    - shard-tglb:         NOTRUN -> [FAIL][27] ([i915#2842])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb5/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@gem_exec_suspend@basic-s3@smem:
    - shard-apl:          NOTRUN -> [DMESG-WARN][28] ([i915#180]) +1 similar issue
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl3/igt@gem_exec_suspend@basic-s3@smem.html

  * igt@gem_exec_whisper@basic-fds-forked:
    - shard-glk:          [PASS][29] -> [DMESG-WARN][30] ([i915#118])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-glk1/igt@gem_exec_whisper@basic-fds-forked.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk1/igt@gem_exec_whisper@basic-fds-forked.html

  * igt@gem_lmem_swapping@parallel-random-verify-ccs:
    - shard-kbl:          NOTRUN -> [SKIP][31] ([fdo#109271] / [i915#4613])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl1/igt@gem_lmem_swapping@parallel-random-verify-ccs.html

  * igt@gem_lmem_swapping@random-engines:
    - shard-glk:          NOTRUN -> [SKIP][32] ([fdo#109271] / [i915#4613]) +1 similar issue
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk4/igt@gem_lmem_swapping@random-engines.html
    - shard-iclb:         NOTRUN -> [SKIP][33] ([i915#4613])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@gem_lmem_swapping@random-engines.html
    - shard-apl:          NOTRUN -> [SKIP][34] ([fdo#109271] / [i915#4613])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl7/igt@gem_lmem_swapping@random-engines.html
    - shard-tglb:         NOTRUN -> [SKIP][35] ([i915#4613])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb5/igt@gem_lmem_swapping@random-engines.html

  * igt@gem_pread@exhaustion:
    - shard-kbl:          NOTRUN -> [WARN][36] ([i915#2658])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@gem_pread@exhaustion.html

  * igt@gem_pxp@verify-pxp-key-change-after-suspend-resume:
    - shard-tglb:         NOTRUN -> [SKIP][37] ([i915#4270]) +1 similar issue
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb3/igt@gem_pxp@verify-pxp-key-change-after-suspend-resume.html
    - shard-iclb:         NOTRUN -> [SKIP][38] ([i915#4270]) +1 similar issue
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@gem_pxp@verify-pxp-key-change-after-suspend-resume.html

  * igt@gem_userptr_blits@input-checking:
    - shard-kbl:          NOTRUN -> [DMESG-WARN][39] ([i915#4991])
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl1/igt@gem_userptr_blits@input-checking.html
    - shard-apl:          NOTRUN -> [DMESG-WARN][40] ([i915#4991])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl7/igt@gem_userptr_blits@input-checking.html

  * igt@gem_workarounds@reset-context:
    - shard-snb:          NOTRUN -> [TIMEOUT][41] ([i915#4995])
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-snb7/igt@gem_workarounds@reset-context.html

  * igt@gem_workarounds@suspend-resume:
    - shard-apl:          [PASS][42] -> [DMESG-WARN][43] ([i915#180]) +3 similar issues
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl1/igt@gem_workarounds@suspend-resume.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl6/igt@gem_workarounds@suspend-resume.html

  * igt@gen3_render_linear_blits:
    - shard-iclb:         NOTRUN -> [SKIP][44] ([fdo#109289])
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb6/igt@gen3_render_linear_blits.html
    - shard-tglb:         NOTRUN -> [SKIP][45] ([fdo#109289])
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb7/igt@gen3_render_linear_blits.html

  * igt@gen9_exec_parse@batch-without-end:
    - shard-iclb:         NOTRUN -> [SKIP][46] ([i915#2856])
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@gen9_exec_parse@batch-without-end.html
    - shard-tglb:         NOTRUN -> [SKIP][47] ([i915#2527] / [i915#2856])
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb5/igt@gen9_exec_parse@batch-without-end.html

  * igt@i915_pm_dc@dc9-dpms:
    - shard-apl:          [PASS][48] -> [SKIP][49] ([fdo#109271])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl7/igt@i915_pm_dc@dc9-dpms.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl6/igt@i915_pm_dc@dc9-dpms.html

  * igt@i915_pm_rpm@modeset-non-lpsp-stress-no-wait:
    - shard-iclb:         NOTRUN -> [SKIP][50] ([fdo#110892])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb1/igt@i915_pm_rpm@modeset-non-lpsp-stress-no-wait.html
    - shard-tglb:         NOTRUN -> [SKIP][51] ([fdo#111644] / [i915#1397] / [i915#2411])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb5/igt@i915_pm_rpm@modeset-non-lpsp-stress-no-wait.html

  * igt@i915_pm_rpm@modeset-pc8-residency-stress:
    - shard-tglb:         NOTRUN -> [SKIP][52] ([fdo#109506] / [i915#2411])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb6/igt@i915_pm_rpm@modeset-pc8-residency-stress.html
    - shard-iclb:         NOTRUN -> [SKIP][53] ([fdo#109293] / [fdo#109506])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb3/igt@i915_pm_rpm@modeset-pc8-residency-stress.html

  * igt@i915_query@test-query-geometry-subslices:
    - shard-iclb:         NOTRUN -> [SKIP][54] ([i915#5723])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@i915_query@test-query-geometry-subslices.html
    - shard-tglb:         NOTRUN -> [SKIP][55] ([i915#5723])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@i915_query@test-query-geometry-subslices.html

  * igt@i915_selftest@perf@engine_cs:
    - shard-snb:          [PASS][56] -> [SKIP][57] ([fdo#109271]) +2 similar issues
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-snb7/igt@i915_selftest@perf@engine_cs.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-snb5/igt@i915_selftest@perf@engine_cs.html

  * igt@kms_big_fb@4-tiled-addfb-size-overflow:
    - shard-iclb:         NOTRUN -> [SKIP][58] ([i915#5286])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb4/igt@kms_big_fb@4-tiled-addfb-size-overflow.html
    - shard-tglb:         NOTRUN -> [SKIP][59] ([i915#5286])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb2/igt@kms_big_fb@4-tiled-addfb-size-overflow.html

  * igt@kms_big_fb@yf-tiled-8bpp-rotate-0:
    - shard-iclb:         NOTRUN -> [SKIP][60] ([fdo#110723])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@kms_big_fb@yf-tiled-8bpp-rotate-0.html

  * igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs_cc:
    - shard-apl:          NOTRUN -> [SKIP][61] ([fdo#109271] / [i915#3886]) +4 similar issues
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl3/igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-4_tiled_dg2_rc_ccs_cc:
    - shard-tglb:         NOTRUN -> [SKIP][62] ([i915#6095])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb6/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-4_tiled_dg2_rc_ccs_cc.html

  * igt@kms_ccs@pipe-b-crc-primary-basic-yf_tiled_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][63] ([fdo#111615] / [i915#3689])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb6/igt@kms_ccs@pipe-b-crc-primary-basic-yf_tiled_ccs.html

  * igt@kms_ccs@pipe-c-bad-aux-stride-y_tiled_gen12_rc_ccs_cc:
    - shard-kbl:          NOTRUN -> [SKIP][64] ([fdo#109271] / [i915#3886]) +10 similar issues
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@kms_ccs@pipe-c-bad-aux-stride-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_ccs@pipe-c-crc-primary-rotation-180-4_tiled_dg2_rc_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][65] ([i915#3689]) +1 similar issue
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@kms_ccs@pipe-c-crc-primary-rotation-180-4_tiled_dg2_rc_ccs.html

  * igt@kms_ccs@pipe-c-missing-ccs-buffer-y_tiled_gen12_mc_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][66] ([i915#3689] / [i915#3886])
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@kms_ccs@pipe-c-missing-ccs-buffer-y_tiled_gen12_mc_ccs.html
    - shard-glk:          NOTRUN -> [SKIP][67] ([fdo#109271] / [i915#3886]) +2 similar issues
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk5/igt@kms_ccs@pipe-c-missing-ccs-buffer-y_tiled_gen12_mc_ccs.html
    - shard-iclb:         NOTRUN -> [SKIP][68] ([fdo#109278] / [i915#3886]) +1 similar issue
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb5/igt@kms_ccs@pipe-c-missing-ccs-buffer-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-d-bad-pixel-format-y_tiled_gen12_mc_ccs:
    - shard-apl:          NOTRUN -> [SKIP][69] ([fdo#109271]) +91 similar issues
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl1/igt@kms_ccs@pipe-d-bad-pixel-format-y_tiled_gen12_mc_ccs.html

  * igt@kms_chamelium@dp-hpd-enable-disable-mode:
    - shard-glk:          NOTRUN -> [SKIP][70] ([fdo#109271] / [fdo#111827]) +2 similar issues
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk9/igt@kms_chamelium@dp-hpd-enable-disable-mode.html

  * igt@kms_chamelium@hdmi-hpd-enable-disable-mode:
    - shard-tglb:         NOTRUN -> [SKIP][71] ([fdo#109284] / [fdo#111827])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@kms_chamelium@hdmi-hpd-enable-disable-mode.html
    - shard-iclb:         NOTRUN -> [SKIP][72] ([fdo#109284] / [fdo#111827])
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@kms_chamelium@hdmi-hpd-enable-disable-mode.html

  * igt@kms_chamelium@vga-hpd-without-ddc:
    - shard-snb:          NOTRUN -> [SKIP][73] ([fdo#109271] / [fdo#111827]) +2 similar issues
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-snb2/igt@kms_chamelium@vga-hpd-without-ddc.html

  * igt@kms_color_chamelium@pipe-a-ctm-negative:
    - shard-apl:          NOTRUN -> [SKIP][74] ([fdo#109271] / [fdo#111827]) +3 similar issues
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl3/igt@kms_color_chamelium@pipe-a-ctm-negative.html

  * igt@kms_color_chamelium@pipe-c-ctm-blue-to-red:
    - shard-kbl:          NOTRUN -> [SKIP][75] ([fdo#109271] / [fdo#111827]) +10 similar issues
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl3/igt@kms_color_chamelium@pipe-c-ctm-blue-to-red.html

  * igt@kms_content_protection@srm:
    - shard-kbl:          NOTRUN -> [TIMEOUT][76] ([i915#1319])
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl7/igt@kms_content_protection@srm.html

  * igt@kms_cursor_crc@pipe-a-cursor-max-size-rapid-movement:
    - shard-iclb:         NOTRUN -> [SKIP][77] ([fdo#109278]) +12 similar issues
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@kms_cursor_crc@pipe-a-cursor-max-size-rapid-movement.html

  * igt@kms_cursor_crc@pipe-c-cursor-max-size-onscreen:
    - shard-tglb:         NOTRUN -> [SKIP][78] ([i915#3359]) +1 similar issue
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb7/igt@kms_cursor_crc@pipe-c-cursor-max-size-onscreen.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-kbl:          NOTRUN -> [DMESG-WARN][79] ([i915#180]) +1 similar issue
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl1/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_cursor_crc@pipe-d-cursor-32x32-sliding:
    - shard-tglb:         NOTRUN -> [SKIP][80] ([i915#3319]) +1 similar issue
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb6/igt@kms_cursor_crc@pipe-d-cursor-32x32-sliding.html

  * igt@kms_cursor_crc@pipe-d-cursor-512x512-rapid-movement:
    - shard-tglb:         NOTRUN -> [SKIP][81] ([fdo#109279] / [i915#3359])
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@kms_cursor_crc@pipe-d-cursor-512x512-rapid-movement.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic:
    - shard-tglb:         NOTRUN -> [SKIP][82] ([fdo#109274] / [fdo#111825]) +1 similar issue
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb3/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html
    - shard-iclb:         NOTRUN -> [SKIP][83] ([fdo#109274] / [fdo#109278])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html

  * igt@kms_cursor_legacy@cursor-vs-flip-varying-size:
    - shard-iclb:         [PASS][84] -> [FAIL][85] ([i915#5072])
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb7/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html

  * igt@kms_draw_crc@draw-method-rgb565-render-4tiled:
    - shard-tglb:         NOTRUN -> [SKIP][86] ([i915#5287])
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb5/igt@kms_draw_crc@draw-method-rgb565-render-4tiled.html
    - shard-iclb:         NOTRUN -> [SKIP][87] ([i915#5287])
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@kms_draw_crc@draw-method-rgb565-render-4tiled.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-kbl:          NOTRUN -> [FAIL][88] ([i915#4767])
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_flip@2x-flip-vs-expired-vblank@ac-hdmi-a1-hdmi-a2:
    - shard-glk:          [PASS][89] -> [FAIL][90] ([i915#79])
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-glk9/igt@kms_flip@2x-flip-vs-expired-vblank@ac-hdmi-a1-hdmi-a2.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk2/igt@kms_flip@2x-flip-vs-expired-vblank@ac-hdmi-a1-hdmi-a2.html

  * igt@kms_flip@2x-plain-flip:
    - shard-iclb:         NOTRUN -> [SKIP][91] ([fdo#109274])
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb4/igt@kms_flip@2x-plain-flip.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible@a-hdmi-a1:
    - shard-glk:          [PASS][92] -> [FAIL][93] ([i915#2122])
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-glk9/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-hdmi-a1.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk4/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-hdmi-a1.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-downscaling:
    - shard-iclb:         NOTRUN -> [SKIP][94] ([i915#2587])
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-downscaling.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling:
    - shard-iclb:         [PASS][95] -> [SKIP][96] ([i915#3701]) +1 similar issue
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb4/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling.html
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-indfb-pgflip-blt:
    - shard-iclb:         NOTRUN -> [SKIP][97] ([fdo#109280]) +7 similar issues
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-indfb-pgflip-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-spr-indfb-onoff:
    - shard-kbl:          NOTRUN -> [SKIP][98] ([fdo#109271]) +221 similar issues
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-spr-indfb-onoff.html

  * igt@kms_frontbuffer_tracking@fbcpsr-rgb101010-draw-blt:
    - shard-snb:          NOTRUN -> [SKIP][99] ([fdo#109271]) +174 similar issues
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-snb2/igt@kms_frontbuffer_tracking@fbcpsr-rgb101010-draw-blt.html

  * igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-shrfb-draw-blt:
    - shard-tglb:         NOTRUN -> [SKIP][100] ([fdo#109280] / [fdo#111825]) +7 similar issues
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb6/igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-shrfb-draw-blt.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-d-frame-sequence:
    - shard-kbl:          NOTRUN -> [SKIP][101] ([fdo#109271] / [i915#533])
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl1/igt@kms_pipe_crc_basic@read-crc-pipe-d-frame-sequence.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-kbl:          [PASS][102] -> [DMESG-WARN][103] ([i915#180]) +4 similar issues
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-kbl3/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - shard-kbl:          NOTRUN -> [FAIL][104] ([fdo#108145] / [i915#265]) +1 similar issue
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl7/igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb.html
    - shard-apl:          NOTRUN -> [FAIL][105] ([fdo#108145] / [i915#265])
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl2/igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
    - shard-glk:          NOTRUN -> [FAIL][106] ([fdo#108145] / [i915#265]) +1 similar issue
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk3/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max.html

  * igt@kms_plane_alpha_blend@pipe-c-alpha-transparent-fb:
    - shard-glk:          NOTRUN -> [FAIL][107] ([i915#265])
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk6/igt@kms_plane_alpha_blend@pipe-c-alpha-transparent-fb.html

  * igt@kms_plane_multiple@atomic-pipe-d-tiling-yf:
    - shard-tglb:         NOTRUN -> [SKIP][108] ([fdo#111615]) +2 similar issues
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb7/igt@kms_plane_multiple@atomic-pipe-d-tiling-yf.html

  * igt@kms_plane_scaling@plane-downscale-with-rotation-factor-0-5@pipe-b-edp-1:
    - shard-tglb:         NOTRUN -> [SKIP][109] ([i915#5176]) +3 similar issues
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb7/igt@kms_plane_scaling@plane-downscale-with-rotation-factor-0-5@pipe-b-edp-1.html
    - shard-iclb:         NOTRUN -> [SKIP][110] ([i915#5176]) +2 similar issues
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb6/igt@kms_plane_scaling@plane-downscale-with-rotation-factor-0-5@pipe-b-edp-1.html

  * igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-c-hdmi-a-1:
    - shard-glk:          NOTRUN -> [SKIP][111] ([fdo#109271]) +88 similar issues
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk7/igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-c-hdmi-a-1.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area:
    - shard-apl:          NOTRUN -> [SKIP][112] ([fdo#109271] / [i915#658]) +1 similar issue
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl7/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html
    - shard-tglb:         NOTRUN -> [SKIP][113] ([i915#2920])
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb5/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html
    - shard-glk:          NOTRUN -> [SKIP][114] ([fdo#109271] / [i915#658])
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk4/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html
    - shard-iclb:         NOTRUN -> [SKIP][115] ([fdo#111068] / [i915#658])
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html

  * igt@kms_psr2_su@frontbuffer-xrgb8888:
    - shard-kbl:          NOTRUN -> [SKIP][116] ([fdo#109271] / [i915#658]) +2 similar issues
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@kms_psr2_su@frontbuffer-xrgb8888.html

  * igt@kms_psr2_su@page_flip-xrgb8888:
    - shard-iclb:         [PASS][117] -> [SKIP][118] ([fdo#109642] / [fdo#111068] / [i915#658])
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb2/igt@kms_psr2_su@page_flip-xrgb8888.html
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb5/igt@kms_psr2_su@page_flip-xrgb8888.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [PASS][119] -> [SKIP][120] ([fdo#109441]) +2 similar issues
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb6/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_rotation_crc@primary-4-tiled-reflect-x-180:
    - shard-iclb:         NOTRUN -> [SKIP][121] ([i915#5289])
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb1/igt@kms_rotation_crc@primary-4-tiled-reflect-x-180.html
    - shard-tglb:         NOTRUN -> [SKIP][122] ([i915#5289])
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb5/igt@kms_rotation_crc@primary-4-tiled-reflect-x-180.html

  * igt@kms_rotation_crc@primary-yf-tiled-reflect-x-0:
    - shard-tglb:         NOTRUN -> [SKIP][123] ([fdo#111615] / [i915#5289])
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@kms_rotation_crc@primary-yf-tiled-reflect-x-0.html

  * igt@kms_setmode@basic-clone-single-crtc:
    - shard-iclb:         NOTRUN -> [SKIP][124] ([i915#3555])
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb4/igt@kms_setmode@basic-clone-single-crtc.html
    - shard-tglb:         NOTRUN -> [SKIP][125] ([i915#3555])
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb7/igt@kms_setmode@basic-clone-single-crtc.html

  * igt@kms_writeback@writeback-fb-id:
    - shard-tglb:         NOTRUN -> [SKIP][126] ([i915#2437])
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb2/igt@kms_writeback@writeback-fb-id.html
    - shard-glk:          NOTRUN -> [SKIP][127] ([fdo#109271] / [i915#2437])
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk4/igt@kms_writeback@writeback-fb-id.html
    - shard-iclb:         NOTRUN -> [SKIP][128] ([i915#2437])
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb8/igt@kms_writeback@writeback-fb-id.html
    - shard-kbl:          NOTRUN -> [SKIP][129] ([fdo#109271] / [i915#2437])
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl7/igt@kms_writeback@writeback-fb-id.html
    - shard-apl:          NOTRUN -> [SKIP][130] ([fdo#109271] / [i915#2437])
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl6/igt@kms_writeback@writeback-fb-id.html

  * igt@nouveau_crc@pipe-a-source-outp-complete:
    - shard-tglb:         NOTRUN -> [SKIP][131] ([i915#2530]) +1 similar issue
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb2/igt@nouveau_crc@pipe-a-source-outp-complete.html
    - shard-iclb:         NOTRUN -> [SKIP][132] ([i915#2530])
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb5/igt@nouveau_crc@pipe-a-source-outp-complete.html

  * igt@nouveau_crc@pipe-d-source-rg:
    - shard-iclb:         NOTRUN -> [SKIP][133] ([fdo#109278] / [i915#2530])
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb1/igt@nouveau_crc@pipe-d-source-rg.html

  * igt@prime_nv_api@i915_nv_import_twice:
    - shard-tglb:         NOTRUN -> [SKIP][134] ([fdo#109291]) +2 similar issues
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb7/igt@prime_nv_api@i915_nv_import_twice.html
    - shard-iclb:         NOTRUN -> [SKIP][135] ([fdo#109291]) +2 similar issues
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb6/igt@prime_nv_api@i915_nv_import_twice.html

  * igt@sw_sync@sync_multi_timeline_wait:
    - shard-kbl:          NOTRUN -> [FAIL][136] ([i915#6140])
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl3/igt@sw_sync@sync_multi_timeline_wait.html
    - shard-snb:          NOTRUN -> [FAIL][137] ([i915#6140])
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-snb6/igt@sw_sync@sync_multi_timeline_wait.html

  * igt@sysfs_clients@sema-25:
    - shard-kbl:          NOTRUN -> [SKIP][138] ([fdo#109271] / [i915#2994]) +1 similar issue
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@sysfs_clients@sema-25.html

  * igt@sysfs_clients@split-50:
    - shard-apl:          NOTRUN -> [SKIP][139] ([fdo#109271] / [i915#2994])
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl7/igt@sysfs_clients@split-50.html
    - shard-tglb:         NOTRUN -> [SKIP][140] ([i915#2994])
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb1/igt@sysfs_clients@split-50.html
    - shard-glk:          NOTRUN -> [SKIP][141] ([fdo#109271] / [i915#2994])
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk8/igt@sysfs_clients@split-50.html
    - shard-iclb:         NOTRUN -> [SKIP][142] ([i915#2994])
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb7/igt@sysfs_clients@split-50.html

  
#### Possible fixes ####

  * igt@device_reset@unbind-reset-rebind:
    - shard-iclb:         [DMESG-WARN][143] ([i915#2867]) -> [PASS][144]
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb6/igt@device_reset@unbind-reset-rebind.html
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb1/igt@device_reset@unbind-reset-rebind.html

  * igt@fbdev@info:
    - {shard-rkl}:        [SKIP][145] ([i915#2582]) -> [PASS][146] +2 similar issues
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-1/igt@fbdev@info.html
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-5/igt@fbdev@info.html

  * igt@gem_ctx_isolation@preservation-s3@vcs0:
    - shard-kbl:          [DMESG-WARN][147] ([i915#180]) -> [PASS][148] +9 similar issues
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-kbl4/igt@gem_ctx_isolation@preservation-s3@vcs0.html
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl7/igt@gem_ctx_isolation@preservation-s3@vcs0.html

  * igt@gem_eio@in-flight-1us:
    - shard-tglb:         [TIMEOUT][149] ([i915#3063]) -> [PASS][150]
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-tglb5/igt@gem_eio@in-flight-1us.html
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb3/igt@gem_eio@in-flight-1us.html

  * igt@gem_exec_balancer@parallel-keep-submit-fence:
    - shard-iclb:         [SKIP][151] ([i915#4525]) -> [PASS][152]
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb3/igt@gem_exec_balancer@parallel-keep-submit-fence.html
   [152]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@gem_exec_balancer@parallel-keep-submit-fence.html

  * igt@gem_exec_flush@basic-uc-pro-default:
    - shard-snb:          [SKIP][153] ([fdo#109271]) -> [PASS][154] +3 similar issues
   [153]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-snb6/igt@gem_exec_flush@basic-uc-pro-default.html
   [154]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-snb2/igt@gem_exec_flush@basic-uc-pro-default.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [DMESG-WARN][155] ([i915#5566] / [i915#716]) -> [PASS][156]
   [155]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-glk9/igt@gen9_exec_parse@allowed-all.html
   [156]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk3/igt@gen9_exec_parse@allowed-all.html

  * igt@i915_pm_rpm@dpms-lpsp:
    - {shard-rkl}:        [SKIP][157] ([i915#1397]) -> [PASS][158] +1 similar issue
   [157]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-5/igt@i915_pm_rpm@dpms-lpsp.html
   [158]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@i915_pm_rpm@dpms-lpsp.html

  * igt@kms_big_fb@y-tiled-64bpp-rotate-0:
    - {shard-rkl}:        [SKIP][159] ([i915#1845] / [i915#4098]) -> [PASS][160] +20 similar issues
   [159]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-5/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html
   [160]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html

  * igt@kms_color@pipe-b-ctm-0-5:
    - {shard-rkl}:        [SKIP][161] ([i915#1149] / [i915#1849] / [i915#4070] / [i915#4098]) -> [PASS][162] +1 similar issue
   [161]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-1/igt@kms_color@pipe-b-ctm-0-5.html
   [162]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_color@pipe-b-ctm-0-5.html

  * igt@kms_cursor_crc@pipe-a-cursor-256x256-offscreen:
    - {shard-dg1}:        [SKIP][163] ([i915#1836]) -> [PASS][164] +3 similar issues
   [163]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-dg1-17/igt@kms_cursor_crc@pipe-a-cursor-256x256-offscreen.html
   [164]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-dg1-13/igt@kms_cursor_crc@pipe-a-cursor-256x256-offscreen.html

  * igt@kms_cursor_crc@pipe-b-cursor-256x85-sliding:
    - {shard-rkl}:        [SKIP][165] ([fdo#112022] / [i915#4070]) -> [PASS][166] +5 similar issues
   [165]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-5/igt@kms_cursor_crc@pipe-b-cursor-256x85-sliding.html
   [166]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_cursor_crc@pipe-b-cursor-256x85-sliding.html

  * igt@kms_cursor_legacy@pipe-c-forked-bo:
    - {shard-rkl}:        [SKIP][167] ([i915#4070]) -> [PASS][168] +1 similar issue
   [167]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-6/igt@kms_cursor_legacy@pipe-c-forked-bo.html
   [168]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-4/igt@kms_cursor_legacy@pipe-c-forked-bo.html

  * igt@kms_cursor_legacy@short-flip-before-cursor-toggle:
    - {shard-rkl}:        [SKIP][169] ([fdo#111825] / [i915#4070]) -> [PASS][170]
   [169]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-2/igt@kms_cursor_legacy@short-flip-before-cursor-toggle.html
   [170]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_cursor_legacy@short-flip-before-cursor-toggle.html

  * igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled:
    - {shard-rkl}:        [SKIP][171] ([fdo#111314] / [i915#4098] / [i915#4369]) -> [PASS][172] +3 similar issues
   [171]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-5/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
   [172]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-apl:          [INCOMPLETE][173] ([i915#180] / [i915#1982]) -> [PASS][174]
   [173]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl8/igt@kms_fbcon_fbt@fbc-suspend.html
   [174]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl3/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_flip@flip-vs-suspend-interruptible@b-dp1:
    - shard-apl:          [DMESG-WARN][175] ([i915#180]) -> [PASS][176] +1 similar issue
   [175]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl3/igt@kms_flip@flip-vs-suspend-interruptible@b-dp1.html
   [176]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl6/igt@kms_flip@flip-vs-suspend-interruptible@b-dp1.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling:
    - shard-glk:          [FAIL][177] ([i915#4911]) -> [PASS][178]
   [177]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-glk8/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling.html
   [178]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk5/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling:
    - {shard-rkl}:        [SKIP][179] ([i915#3701]) -> [PASS][180]
   [179]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-1/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling.html
   [180]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling.html

  * igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw:
    - {shard-dg1}:        [SKIP][181] ([i915#5721]) -> [PASS][182]
   [181]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-dg1-17/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html
   [182]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-dg1-19/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-wc:
    - shard-glk:          [FAIL][183] ([i915#1888] / [i915#2546]) -> [PASS][184] +1 similar issue
   [183]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-glk1/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-wc.html
   [184]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-glk4/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-rte:
    - {shard-rkl}:        [SKIP][185] ([i915#1849] / [i915#4098]) -> [PASS][186] +12 similar issues
   [185]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-1/igt@kms_frontbuffer_tracking@fbcpsr-1p-rte.html
   [186]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_frontbuffer_tracking@fbcpsr-1p-rte.html

  * igt@kms_hdmi_inject@inject-audio:
    - {shard-tglu}:       [SKIP][187] ([i915#433]) -> [PASS][188]
   [187]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-tglu-2/igt@kms_hdmi_inject@inject-audio.html
   [188]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglu-1/igt@kms_hdmi_inject@inject-audio.html
    - shard-tglb:         [SKIP][189] ([i915#433]) -> [PASS][190]
   [189]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-tglb3/igt@kms_hdmi_inject@inject-audio.html
   [190]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-tglb2/igt@kms_hdmi_inject@inject-audio.html

  * igt@kms_hdr@bpc-switch-suspend@pipe-a-dp-1:
    - shard-kbl:          [DMESG-FAIL][191] ([i915#180]) -> [PASS][192]
   [191]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-kbl6/igt@kms_hdr@bpc-switch-suspend@pipe-a-dp-1.html
   [192]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl4/igt@kms_hdr@bpc-switch-suspend@pipe-a-dp-1.html

  * igt@kms_plane@pixel-format-source-clamping@pipe-b-planes:
    - {shard-rkl}:        [SKIP][193] ([i915#1849] / [i915#3558]) -> [PASS][194] +1 similar issue
   [193]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-5/igt@kms_plane@pixel-format-source-clamping@pipe-b-planes.html
   [194]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_plane@pixel-format-source-clamping@pipe-b-planes.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - {shard-rkl}:        [SKIP][195] ([i915#1849] / [i915#4070] / [i915#4098]) -> [PASS][196] +2 similar issues
   [195]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-1/igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb.html
   [196]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb.html

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - {shard-rkl}:        [SKIP][197] ([i915#1849] / [i915#3558] / [i915#4070]) -> [PASS][198] +1 similar issue
   [197]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-2/igt@kms_plane_multiple@atomic-pipe-a-tiling-y.html
   [198]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_plane_multiple@atomic-pipe-a-tiling-y.html

  * igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-a-edp-1:
    - shard-iclb:         [SKIP][199] ([i915#5235]) -> [PASS][200] +2 similar issues
   [199]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb2/igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-a-edp-1.html
   [200]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb6/igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-a-edp-1.html

  * igt@kms_psr@psr2_cursor_plane_move:
    - shard-iclb:         [SKIP][201] ([fdo#109441]) -> [PASS][202]
   [201]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb8/igt@kms_psr@psr2_cursor_plane_move.html
   [202]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@kms_psr@psr2_cursor_plane_move.html

  * igt@kms_psr@sprite_plane_onoff:
    - {shard-rkl}:        [SKIP][203] ([i915#1072]) -> [PASS][204]
   [203]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-rkl-5/igt@kms_psr@sprite_plane_onoff.html
   [204]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-rkl-6/igt@kms_psr@sprite_plane_onoff.html

  
#### Warnings ####

  * igt@gem_exec_balancer@parallel-ordering:
    - shard-iclb:         [SKIP][205] ([i915#4525]) -> [FAIL][206] ([i915#6117])
   [205]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb5/igt@gem_exec_balancer@parallel-ordering.html
   [206]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb1/igt@gem_exec_balancer@parallel-ordering.html

  * igt@gem_exec_fair@basic-none-rrul@rcs0:
    - shard-iclb:         [FAIL][207] ([i915#2852]) -> [FAIL][208] ([i915#2842])
   [207]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb2/igt@gem_exec_fair@basic-none-rrul@rcs0.html
   [208]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb6/igt@gem_exec_fair@basic-none-rrul@rcs0.html

  * igt@kms_flip@flip-vs-suspend@c-dp1:
    - shard-kbl:          [DMESG-WARN][209] ([i915#180]) -> [INCOMPLETE][210] ([i915#3614])
   [209]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-kbl7/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [210]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-kbl3/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * igt@kms_psr2_sf@overlay-plane-move-continuous-sf:
    - shard-iclb:         [SKIP][211] ([i915#2920]) -> [SKIP][212] ([i915#658])
   [211]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb2/igt@kms_psr2_sf@overlay-plane-move-continuous-sf.html
   [212]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb1/igt@kms_psr2_sf@overlay-plane-move-continuous-sf.html

  * igt@kms_psr2_sf@overlay-plane-update-continuous-sf:
    - shard-iclb:         [SKIP][213] ([fdo#111068] / [i915#658]) -> [SKIP][214] ([i915#2920])
   [213]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb8/igt@kms_psr2_sf@overlay-plane-update-continuous-sf.html
   [214]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb2/igt@kms_psr2_sf@overlay-plane-update-continuous-sf.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area:
    - shard-iclb:         [SKIP][215] ([i915#2920]) -> [SKIP][216] ([fdo#111068] / [i915#658])
   [215]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-iclb2/igt@kms_psr2_sf@plane-move-sf-dmg-area.html
   [216]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-iclb5/igt@kms_psr2_sf@plane-move-sf-dmg-area.html

  * igt@runner@aborted:
    - shard-apl:          ([FAIL][217], [FAIL][218], [FAIL][219], [FAIL][220], [FAIL][221]) ([i915#180] / [i915#3002] / [i915#4312] / [i915#5257]) -> ([FAIL][222], [FAIL][223], [FAIL][224], [FAIL][225], [FAIL][226], [FAIL][227], [FAIL][228], [FAIL][229]) ([fdo#109271] / [i915#180] / [i915#3002] / [i915#4312] / [i915#5257])
   [217]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl8/igt@runner@aborted.html
   [218]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl7/igt@runner@aborted.html
   [219]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl7/igt@runner@aborted.html
   [220]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl8/igt@runner@aborted.html
   [221]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11726/shard-apl3/igt@runner@aborted.html
   [222]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl6/igt@runner@aborted.html
   [223]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl6/igt@runner@aborted.html
   [224]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl6/igt@runner@aborted.html
   [225]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl7/igt@runner@aborted.html
   [226]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl3/igt@runner@aborted.html
   [227]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl1/igt@runner@aborted.html
   [228]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl3/igt@runner@aborted.html
   [229]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/shard-apl1/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109283]: https://bugs.freedesktop.org/show_bug.cgi?id=109283
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109293]: https://bugs.freedesktop.org/show_bug.cgi?id=109293
  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [fdo#109300]: https://bugs.freedesktop.org/show_bug.cgi?id=109300
  [fdo#109312]: https://bugs.freedesktop.org/show_bug.cgi?id=109312
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109506]: https://bugs.freedesktop.org/show_bug.cgi?id=109506
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110189]: https://bugs.freedesktop.org/show_bug.cgi?id=110189
  [fdo#110254]: https://bugs.freedesktop.org/show_bug.cgi?id=110254
  [fdo#110723]: https://bugs.freedesktop.org/show_bug.cgi?id=110723
  [fdo#110892]: https://bugs.freedesktop.org/show_bug.cgi?id=110892
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111314]: https://bugs.freedesktop.org/show_bug.cgi?id=111314
  [fdo#111614]: https://bugs.freedesktop.org/show_bug.cgi?id=111614
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111644]: https://bugs.freedesktop.org/show_bug.cgi?id=111644
  [fdo#111656]: https://bugs.freedesktop.org/show_bug.cgi?id=111656
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [fdo#112022]: https://bugs.freedesktop.org/show_bug.cgi?id=112022
  [fdo#112054]: https://bugs.freedesktop.org/show_bug.cgi?id=112054
  [i915#1063]: https://gitlab.freedesktop.org/drm/intel/issues/1063
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1099]: https://gitlab.freedesktop.org/drm/intel/issues/1099
  [i915#1149]: https://gitlab.freedesktop.org/drm/intel/issues/1149
  [i915#1155]: https://gitlab.freedesktop.org/drm/intel/issues/1155
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#132]: https://gitlab.freedesktop.org/drm/intel/issues/132
  [i915#1397]: https://gitlab.freedesktop.org/drm/intel/issues/1397
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1825]: https://gitlab.freedesktop.org/drm/intel/issues/1825
  [i915#1836]: https://gitlab.freedesktop.org/drm/intel/issues/1836
  [i915#1845]: https://gitlab.freedesktop.org/drm/intel/issues/1845
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2029]: https://gitlab.freedesktop.org/drm/intel/issues/2029
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2410]: https://gitlab.freedesktop.org/drm/intel/issues/2410
  [i915#2411]: https://gitlab.freedesktop.org/drm/intel/issues/2411
  [i915#2436]: https://gitlab.freedesktop.org/drm/intel/issues/2436
  [i915#2437]: https://gitlab.freedesktop.org/drm/intel/issues/2437
  [i915#2527]: https://gitlab.freedesktop.org/drm/intel/issues/2527
  [i915#2530]: https://gitlab.freedesktop.org/drm/intel/issues/2530
  [i915#2546]: https://gitlab.freedesktop.org/drm/intel/issues/2546
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#2587]: https://gitlab.freedesktop.org/drm/intel/issues/2587
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#2658]: https://gitlab.freedesktop.org/drm/intel/issues/2658
  [i915#2672]: https://gitlab.freedesktop.org/drm/intel/issues/2672
  [i915#2681]: https://gitlab.freedesktop.org/drm/intel/issues/2681
  [i915#2705]: https://gitlab.freedesktop.org/drm/intel/issues/2705
  [i915#280]: https://gitlab.freedesktop.org/drm/intel/issues/280
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2849]: https://gitlab.freedesktop.org/drm/intel/issues/2849
  [i915#2852]: https://gitlab.freedesktop.org/drm/intel/issues/2852
  [i915#2856]: https://gitlab.freedesktop.org/drm/intel/issues/2856
  [i915#2867]: https://gitlab.freedesktop.org/drm/intel/issues/2867
  [i915#2920]: https://gitlab.freedesktop.org/drm/intel/issues/2920
  [i915#2994]: https://gitlab.freedesktop.org/drm/intel/issues/2994
  [i915#3002]: https://gitlab.freedesktop.org/drm/intel/issues/3002
  [i915#3012]: https://gitlab.freedesktop.org/drm/intel/issues/3012
  [i915#3063]: https://gitlab.freedesktop.org/drm/intel/issues/3063
  [i915#3070]: https://gitlab.freedesktop.org/drm/intel/issues/3070
  [i915#3116]: https://gitlab.freedesktop.org/drm/intel/issues/3116
  [i915#3281]: https://gitlab.freedesktop.org/drm/intel/issues/3281
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3297]: https://gitlab.freedesktop.org/drm/intel/issues/3297
  [i915#3299]: https://gitlab.freedesktop.org/drm/intel/issues/3299
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3318]: https://gitlab.freedesktop.org/drm/intel/issues/3318
  [i915#3319]: https://gitlab.freedesktop.org/drm/intel/issues/3319
  [i915#3359]: https://gitlab.freedesktop.org/drm/intel/issues/3359
  [i915#3458]: https://gitlab.freedesktop.org/drm/intel/issues/3458
  [i915#3464]: https://gitlab.freedesktop.org/drm/intel/issues/3464
  [i915#3528]: https://gitlab.freedesktop.org/drm/intel/issues/3528
  [i915#3539]: https://gitlab.freedesktop.org/drm/intel/issues/3539
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3558]: https://gitlab.freedesktop.org/drm/intel/issues/3558
  [i915#3614]: https://gitlab.freedesktop.org/drm/intel/issues/3614
  [i915#3637]: https://gitlab.freedesktop.org/drm/intel/issues/3637
  [i915#3638]: https://gitlab.freedesktop.org/drm/intel/issues/3638
  [i915#3689]: https://gitlab.freedesktop.org/drm/intel/issues/3689
  [i915#3701]: https://gitlab.freedesktop.org/drm/intel/issues/3701
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3734]: https://gitlab.freedesktop.org/drm/intel/issues/3734
  [i915#3742]: https://gitlab.freedesktop.org/drm/intel/issues/3742
  [i915#3810]: https://gitlab.freedesktop.org/drm/intel/issues/3810
  [i915#3828]: https://gitlab.freedesktop.org/drm/intel/issues/3828
  [i915#3886]: https://gitlab.freedesktop.org/drm/intel/issues/3886
  [i915#3952]: https://gitlab.freedesktop.org/drm/intel/issues/3952
  [i915#3955]: https://gitlab.freedesktop.org/drm/intel/issues/3955
  [i915#3987]: https://gitlab.freedesktop.org/drm/intel/issues/3987
  [i915#4016]: https://gitlab.freedesktop.org/drm/intel/issues/4016
  [i915#4070]: https://gitlab.freedesktop.org/drm/intel/issues/4070
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4078]: https://gitlab.freedesktop.org/drm/intel/issues/4078
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4098]: https://gitlab.freedesktop.org/drm/intel/issues/4098
  [i915#4212]: https://gitlab.freedesktop.org/drm/intel/issues/4212
  [i915#426]: https://gitlab.freedesktop.org/drm/intel/issues/426
  [i915#4270]: https://gitlab.freedesktop.org/drm/intel/issues/4270
  [i915#4278]: https://gitlab.freedesktop.org/drm/intel/issues/4278
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#433]: https://gitlab.freedesktop.org/drm/intel/issues/433
  [i915#4349]: https://gitlab.freedesktop.org/drm/intel/issues/4349
  [i915#4369]: https://gitlab.freedesktop.org/drm/intel/issues/4369
  [i915#4391]: https://gitlab.freedesktop.org/drm/intel/issues/4391
  [i915#4525]: https://gitlab.freedesktop.org/drm/intel/issues/4525
  [i915#4538]: https://gitlab.freedesktop.org/drm/intel/issues/4538
  [i915#4565]: https://gitlab.freedesktop.org/drm/intel/issues/4565
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4767]: https://gitlab.freedesktop.org/drm/intel/issues/4767
  [i915#4771]: https://gitlab.freedesktop.org/drm/intel/issues/4771
  [i915#4807]: https://gitlab.freedesktop.org/drm/intel/issues/4807
  [i915#4812]: https://gitlab.freedesktop.org/drm/intel/issues/4812
  [i915#4833]: https://gitlab.freedesktop.org/drm/intel/issues/4833
  [i915#4842]: https://gitlab.freedesktop.org/drm/intel/issues/4842
  [i915#4852]: https://gitlab.freedesktop.org/drm/intel/issues/4852
  [i915#4853]: https://gitlab.freedesktop.org/drm/intel/issues/4853
  [i915#4859]: https://gitlab.freedesktop.org/drm/intel/issues/4859
  [i915#4860]: https://gitlab.freedesktop.org/drm/intel/issues/4860
  [i915#4873]: https://gitlab.freedesktop.org/drm/intel/issues/4873
  [i915#4877]: https://gitlab.freedesktop.org/drm/intel/issues/4877
  [i915#4880]: https://gitlab.freedesktop.org/drm/intel/issues/4880
  [i915#4885]: https://gitlab.freedesktop.org/drm/intel/issues/4885
  [i915#4893]: https://gitlab.freedesktop.org/drm/intel/issues/4893
  [i915#4911]: https://gitlab.freedesktop.org/drm/intel/issues/4911
  [i915#4991]: https://gitlab.freedesktop.org/drm/intel/issues/4991
  [i915#4995]: https://gitlab.freedesktop.org/drm/intel/issues/4995
  [i915#5072]: https://gitlab.freedesktop.org/drm/intel/issues/5072
  [i915#5176]: https://gitlab.freedesktop.org/drm/intel/issues/5176
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#5257]: https://gitlab.freedesktop.org/drm/intel/issues/5257
  [i915#5286]: https://gitlab.freedesktop.org/drm/intel/issues/5286
  [i915#5287]: https://gitlab.freedesktop.org/drm/intel/issues/5287
  [i915#5288]: https://gitlab.freedesktop.org/drm/intel/issues/5288
  [i915#5289]: https://gitlab.freedesktop.org/drm/intel/issues/5289
  [i915#5325]: https://gitlab.freedesktop.org/drm/intel/issues/5325
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5439]: https://gitlab.freedesktop.org/drm/intel/issues/5439
  [i915#5461]: https://gitlab.freedesktop.org/drm/intel/issues/5461
  [i915#5563]: https://gitlab.freedesktop.org/drm/intel/issues/5563
  [i915#5566]: https://gitlab.freedesktop.org/drm/intel/issues/5566
  [i915#5721]: https://gitlab.freedesktop.org/drm/intel/issues/5721
  [i915#5723]: https://gitlab.freedesktop.org/drm/intel/issues/5723
  [i915#5784]: https://gitlab.freedesktop.org/drm/intel/issues/5784
  [i915#6095]: https://gitlab.freedesktop.org/drm/intel/issues/6095
  [i915#6117]: https://gitlab.freedesktop.org/drm/intel/issues/6117
  [i915#6140]: https://gitlab.freedesktop.org/drm/intel/issues/6140
  [i915#6141]: https://gitlab.freedesktop.org/drm/intel/issues/6141
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_6505 -> IGTPW_7228
  * Piglit: piglit_4509 -> None

  CI-20190529: 20190529
  CI_DRM_11726: 74ea63ad8ffc71b8345436147517253dcabd4793 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_7228: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/index.html
  IGT_6505: edb1a467fb622b23b927e28ff603fa43851fea97 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_7228/index.html

[-- Attachment #2: Type: text/html, Size: 69382 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-03 13:11   ` Petri Latvala
@ 2022-06-06  6:33     ` Zbigniew Kempczyński
  2022-06-06  8:07       ` Petri Latvala
  0 siblings, 1 reply; 16+ messages in thread
From: Zbigniew Kempczyński @ 2022-06-06  6:33 UTC (permalink / raw)
  To: Petri Latvala; +Cc: igt-dev

On Fri, Jun 03, 2022 at 04:11:41PM +0300, Petri Latvala wrote:
> On Fri, Jun 03, 2022 at 03:05:01PM +0200, Zbigniew Kempczyński wrote:
> > Adding crc32 calculation on gpu gives us new possibility to verify data
> > integrity without relying on trust cpu mapping is correct.
> > 
> > Patch introduces calculating crc32 on DG2 only. On older gens ALU
> > (MI_MATH) doesn't support bit-shifting instructions as well as multiply
> > or divide. Emulating n-bit shifts cost hundred of instructions with
> > predicated SRM (works on render engine only). Another limitation is lack
> > of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
> > to achieve indexed operation on memory.
> > 
> > Due to performance reasons (cpu crc32 calculation even on WC memory is
> > still much faster than on gpu, also depends on calculated object memory
> > region) calculation will complete in reasonable of time only for few MiB.
> > 
> > v2: - use registers relative to engine to allow run on all engines (Chris)
> >     - use predication instead of memory access to get better performance
> >       (Chris)
> >     - add location where crc32 implementation comes from (Petri)
> > 
> > v3: - extract crc32 table + cpu_crc32() to separate i915_crc_table.c
> > 
> > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> > ---
> >  lib/i915/i915_crc.c         | 311 ++++++++++++++++++++++++++++++++++++
> >  lib/i915/i915_crc.h         |  17 ++
> >  lib/i915/i915_crc32_table.c | 105 ++++++++++++
> >  lib/intel_reg.h             |   7 +
> >  lib/meson.build             |   1 +
> >  5 files changed, 441 insertions(+)
> >  create mode 100644 lib/i915/i915_crc.c
> >  create mode 100644 lib/i915/i915_crc.h
> >  create mode 100644 lib/i915/i915_crc32_table.c
> > 
> > diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
> > new file mode 100644
> > index 0000000000..c26a8e05b9
> > --- /dev/null
> > +++ b/lib/i915/i915_crc.c
> > @@ -0,0 +1,311 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2022 Intel Corporation
> > + */
> > +
> > +#include <stddef.h>
> > +#include <stdint.h>
> > +#include "drmtest.h"
> > +#include "gem_create.h"
> > +#include "gem_engine_topology.h"
> > +#include "gem_mman.h"
> > +#include "i830_reg.h"
> > +#include "i915_drm.h"
> > +#include "intel_reg.h"
> > +#include "intel_chipset.h"
> > +#include "ioctl_wrappers.h"
> > +#include "intel_allocator.h"
> > +#include "i915/i915_crc.h"
> > +
> > +/* Include crc32 table + cpu_crc32() */
> > +#include "i915_crc32_table.c"
> 
> #including .c files is ugly. Can that be a header file with
> statics/inlines instead?

To avoid aesthetic dillemas I'm going to add separate igt_crc.c
file, which will more vendor agnostic with extern of crc table(s).
Assuming other vendors may add some gpu crc counting we may share
crc32 (and maybe other) tables from there. And igt_cpu_crc32() would
also be good to put in there.

> 
> That said, it also isn't i915-specific anymore but that's not a
> blocker for merging the code at this time.

That's fine, better to do few iterations to look better / be more 
future ready than merging because it just works.

Thanks for review, await new version soon.

--
Zbigniew

> 
> 
> -- 
> Petri Latvala
> 
> 
> > +
> > +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> > +
> > +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> > +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> > +/* Opcodes for MI_MATH_INSTR */
> > +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> > +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> > +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> > +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> > +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> > +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> > +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> > +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> > +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> > +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> > +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> > +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> > +/* DG2+ */
> > +#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
> > +#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
> > +#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
> > +
> > +/* Registers used as operands in MI_MATH_INSTR */
> > +#define   MI_MATH_REG(x)                (x)
> > +#define   MI_MATH_REG_SRCA              0x20
> > +#define   MI_MATH_REG_SRCB              0x21
> > +#define   MI_MATH_REG_ACCU              0x31
> > +#define   MI_MATH_REG_ZF                0x32
> > +#define   MI_MATH_REG_CF                0x33
> > +
> > +#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
> > +#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
> > +#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
> > +#define CS_GPR(x)                       (0x600 + 8 * (x))
> > +#define GPR(x)                          CS_GPR(x)
> > +#define R(x)                            (x)
> > +#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
> > +#define OFFSET(obj_offset, current, start) \
> > +	((obj_offset) + (current - start) * 4)
> > +
> > +#define MI_PREDICATE_RESULT             0x3B8
> > +#define WPARID                          0x21C
> > +#define CS_MI_ADDRESS_OFFSET            0x3B4
> > +
> > +#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
> > +		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
> > +		*bb++ = (__reg_src); \
> > +		*bb++ = (__reg_dst); \
> > +	} while (0)
> > +
> > +#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
> > +		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
> > +		*bb++ = (__reg); \
> > +		*bb++ = (__imm1); \
> > +	} while (0)
> > +
> > +#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
> > +		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
> > +		*bb++ = (__reg); \
> > +		*bb++ = (__imm1); \
> > +		*bb++ = (__reg) + 4; \
> > +		*bb++ = (__imm2); \
> > +	} while (0)
> > +
> > +#define LOAD_REGISTER_MEM(__reg, __offset) do { \
> > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
> > +		*bb++ = (__reg); \
> > +		*bb++ = (__offset); \
> > +		*bb++ = (__offset) >> 32; \
> > +	} while (0)
> > +
> > +#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
> > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
> > +		*bb++ = (__reg); \
> > +		*bb++ = (__offset); \
> > +		*bb++ = (__offset) >> 32; \
> > +	} while (0)
> > +
> > +#define STORE_REGISTER_MEM(__reg, __offset) do { \
> > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
> > +		*bb++ = (__reg); \
> > +		*bb++ = (__offset); \
> > +		*bb++ = (__offset) >> 32; \
> > +	} while (0)
> > +
> > +#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
> > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
> > +		*bb++ = (__reg); \
> > +		*bb++ = (__offset); \
> > +		*bb++ = (__offset) >> 32; \
> > +	} while (0)
> > +
> > +#define COND_BBE(__value, __offset, __condition) do { \
> > +		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
> > +		*bb++ = (__value); \
> > +		*bb++ = (__offset); \
> > +		*bb++ = (__offset) >> 32; \
> > +	} while (0)
> > +
> > +#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
> > +		*bb++ = MI_MATH(4); \
> > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
> > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
> > +		*bb++ = (__op); \
> > +		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
> > +	} while (0)
> > +
> > +#define BBSIZE 4096
> > +
> > +/* Aliasing for easier refactoring */
> > +#define GPR_SIZE	GPR(0)
> > +#define R_SIZE		R(0)
> > +
> > +#define GPR_CRC		GPR(1)
> > +#define R_CRC		R(1)
> > +
> > +#define GPR_INDATA_IDX  GPR(2)
> > +#define R_INDATA_IDX	R(2)
> > +
> > +#define GPR_TABLE_IDX   GPR(3)
> > +#define R_TABLE_IDX	R(3)
> > +
> > +#define GPR_CURR_DW	GPR(4)
> > +#define R_CURR_DW	R(4)
> > +
> > +#define GPR_CONST_2	GPR(5)
> > +#define R_CONST_2	R(5)
> > +
> > +#define GPR_CONST_4	GPR(6)
> > +#define R_CONST_4	R(6)
> > +
> > +#define GPR_CONST_8	GPR(7)
> > +#define R_CONST_8	R(7)
> > +
> > +#define GPR_CONST_ff	GPR(8)
> > +#define R_CONST_ff	R(8)
> > +
> > +#define GPR_ffffffff    GPR(9)
> > +#define R_ffffffff	R(9)
> > +
> > +#define GPR_TMP_1	GPR(10)
> > +#define R_TMP_1		R(10)
> > +
> > +#define GPR_TMP_2	GPR(11)
> > +#define R_TMP_2		R(11)
> > +
> > +static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
> > +		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
> > +{
> > +	uint32_t *bb, *batch, *jmp;
> > +	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
> > +	const int use_64b = gen >= 8;
> > +	uint64_t offset;
> > +	uint64_t crc = USERDATA(table_offset, 0);
> > +
> > +	igt_assert(data_size % 4 == 0);
> > +
> > +	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
> > +					  PROT_READ | PROT_WRITE);
> > +	memset(batch, 0, BBSIZE);
> > +
> > +	bb = batch;
> > +
> > +	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
> > +	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
> > +	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
> > +	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
> > +	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
> > +	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
> > +	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
> > +	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
> > +
> > +	/* for indexed reads from memory */
> > +	LOAD_REGISTER_IMM32(WPARID, 1);
> > +
> > +	jmp = bb;
> > +
> > +	*bb++ = MI_SET_PREDICATE;
> > +	*bb++ = MI_ARB_CHECK;
> > +
> > +	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
> > +	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
> > +
> > +	for (int byte = 0; byte < 4; byte++) {
> > +		if (byte != 0)
> > +			MATH_4_STORE(R_CURR_DW, R_CONST_8,
> > +				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
> > +
> > +		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
> > +		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
> > +			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
> > +		MATH_4_STORE(R_CRC, R_TMP_1,
> > +			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
> > +		MATH_4_STORE(R_TMP_1, R_CONST_ff,
> > +			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
> > +		MATH_4_STORE(R_TMP_1, R_CONST_2,
> > +			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
> > +
> > +		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
> > +		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
> > +
> > +		MATH_4_STORE(R_CRC, R_CONST_8,
> > +			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
> > +		MATH_4_STORE(R_TMP_2, R_TMP_1,
> > +			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
> > +	}
> > +
> > +	/* increment data index */
> > +	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
> > +
> > +	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
> > +
> > +	*bb++ = MI_MATH(5);
> > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
> > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
> > +	*bb++ = MI_MATH_SUB;
> > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
> > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
> > +	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
> > +
> > +	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
> > +	offset = OFFSET(bb_offset, jmp, batch);
> > +	*bb++ = offset;
> > +	*bb++ = offset >> 32;
> > +
> > +	*bb++ = MI_SET_PREDICATE;
> > +
> > +	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
> > +	STORE_REGISTER_MEM(GPR_TMP_1, crc);
> > +
> > +	*bb++ = MI_BATCH_BUFFER_END;
> > +
> > +	gem_munmap(batch, BBSIZE);
> > +}
> > +
> > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > +		    const struct intel_execution_engine2 *e,
> > +		    uint32_t data_handle, uint32_t data_size)
> > +{
> > +	struct drm_i915_gem_execbuffer2 execbuf = {};
> > +	struct drm_i915_gem_exec_object2 obj[3] = {};
> > +	uint64_t bb_offset, table_offset, data_offset;
> > +	uint32_t bb, table, crc, table_size = 4096;
> > +	uint32_t *ptr;
> > +
> > +	igt_assert(data_size % 4 == 0);
> > +
> > +	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
> > +	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
> > +
> > +	table_offset = get_offset(ahnd, table, table_size, 0);
> > +	data_offset = get_offset(ahnd, data_handle, data_size, 0);
> > +
> > +	obj[0].offset = table_offset;
> > +	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
> > +	obj[0].handle = table;
> > +
> > +	obj[1].offset = data_offset;
> > +	obj[1].flags = EXEC_OBJECT_PINNED;
> > +	obj[1].handle = data_handle;
> > +
> > +	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
> > +	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
> > +	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
> > +	obj[2].offset = bb_offset;
> > +	obj[2].flags = EXEC_OBJECT_PINNED;
> > +	obj[2].handle = bb;
> > +	execbuf.buffer_count = 3;
> > +	execbuf.buffers_ptr = to_user_pointer(obj);
> > +	execbuf.flags = e->flags;
> > +	execbuf.rsvd1 = ctx->id;
> > +	gem_execbuf(i915, &execbuf);
> > +	gem_sync(i915, table);
> > +
> > +	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
> > +	crc = ptr[0x100];
> > +	gem_munmap(ptr, table_size);
> > +	gem_close(i915, table);
> > +	gem_close(i915, bb);
> > +
> > +	return crc;
> > +}
> > +
> > +bool supports_gpu_crc32(int i915)
> > +{
> > +	uint16_t devid = intel_get_drm_devid(i915);
> > +
> > +	return IS_DG2(devid);
> > +}
> > diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
> > new file mode 100644
> > index 0000000000..bb0195e2a8
> > --- /dev/null
> > +++ b/lib/i915/i915_crc.h
> > @@ -0,0 +1,17 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2022 Intel Corporation
> > + */
> > +#ifndef _I915_CRC_H_
> > +#define _I915_CRC_H_
> > +
> > +#include <stdint.h>
> > +#include "intel_ctx.h"
> > +
> > +uint32_t cpu_crc32(const void *buf, size_t size);
> > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > +		    const struct intel_execution_engine2 *e,
> > +		    uint32_t data_handle, uint32_t data_size);
> > +bool supports_gpu_crc32(int i915);
> > +
> > +#endif /* _I915_CRC_ */
> > diff --git a/lib/i915/i915_crc32_table.c b/lib/i915/i915_crc32_table.c
> > new file mode 100644
> > index 0000000000..eca5e43218
> > --- /dev/null
> > +++ b/lib/i915/i915_crc32_table.c
> > @@ -0,0 +1,105 @@
> > +/*-
> > + *  COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or
> > + *  code or tables extracted from it, as desired without restriction.
> > + */
> > +
> > +/*
> > + *  First, the polynomial itself and its table of feedback terms.  The
> > + *  polynomial is
> > + *  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
> > + *
> > + *  Note that we take it "backwards" and put the highest-order term in
> > + *  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
> > + *  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
> > + *  the MSB being 1
> > + *
> > + *  Note that the usual hardware shift register implementation, which
> > + *  is what we're using (we're merely optimizing it by doing eight-bit
> > + *  chunks at a time) shifts bits into the lowest-order term.  In our
> > + *  implementation, that means shifting towards the right.  Why do we
> > + *  do it this way?  Because the calculated CRC must be transmitted in
> > + *  order from highest-order term to lowest-order term.  UARTs transmit
> > + *  characters in order from LSB to MSB.  By storing the CRC this way
> > + *  we hand it to the UART in the order low-byte to high-byte; the UART
> > + *  sends each low-bit to hight-bit; and the result is transmission bit
> > + *  by bit from highest- to lowest-order term without requiring any bit
> > + *  shuffling on our part.  Reception works similarly
> > + *
> > + *  The feedback terms table consists of 256, 32-bit entries.  Notes
> > + *
> > + *      The table can be generated at runtime if desired; code to do so
> > + *      is shown later.  It might not be obvious, but the feedback
> > + *      terms simply represent the results of eight shift/xor opera
> > + *      tions for all combinations of data and CRC register values
> > + *
> > + *      The values must be right-shifted by eight bits by the "updcrc
> > + *      logic; the shift must be unsigned (bring in zeroes).  On some
> > + *      hardware you could probably optimize the shift in assembler by
> > + *      using byte-swap instructions
> > + *      polynomial $edb88320
> > + *
> > + *
> > + * CRC32 code derived from work by Gary S. Brown.
> > + */
> > +
> > +#include <stdint.h>
> > +
> > +const uint32_t crc32_tab[] = {
> > +	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> > +	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> > +	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> > +	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> > +	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
> > +	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> > +	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
> > +	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> > +	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> > +	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> > +	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
> > +	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> > +	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> > +	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> > +	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> > +	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> > +	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> > +	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> > +	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> > +	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> > +	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> > +	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> > +	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> > +	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> > +	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> > +	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> > +	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> > +	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> > +	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> > +	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> > +	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> > +	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> > +	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> > +	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> > +	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> > +	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> > +	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> > +	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> > +	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> > +	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> > +	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> > +	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> > +	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> > +};
> > +
> > +uint32_t cpu_crc32(const void *buf, size_t size)
> > +{
> > +
> > +	const uint8_t *p = buf;
> > +	uint32_t crc;
> > +
> > +	crc = ~0U;
> > +
> > +	while (size--)
> > +		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> > +
> > +	return crc ^ ~0U;
> > +}
> > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > index cb62728896..fff32e1816 100644
> > --- a/lib/intel_reg.h
> > +++ b/lib/intel_reg.h
> > @@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
> >  #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
> >  #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
> > +#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
> >  
> >  /* Flush */
> >  #define MI_FLUSH			(0x04<<23)
> > @@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  #define MI_BATCH_BUFFER_END	(0xA << 23)
> >  #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
> >  #define MI_DO_COMPARE                   (1 << 21)
> > +#define MAD_GT_IDD			(0 << 12)
> > +#define MAD_GT_OR_EQ_IDD		(1 << 12)
> > +#define MAD_LT_IDD			(2 << 12)
> > +#define MAD_LT_OR_EQ_IDD		(3 << 12)
> > +#define MAD_EQ_IDD			(4 << 12)
> > +#define MAD_NEQ_IDD			(5 << 12)
> >  
> >  #define MI_BATCH_NON_SECURE		(1)
> >  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> > diff --git a/lib/meson.build b/lib/meson.build
> > index 0a173c1fc6..b05198ecc9 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -10,6 +10,7 @@ lib_sources = [
> >  	'i915/gem_ring.c',
> >  	'i915/gem_mman.c',
> >  	'i915/gem_vm.c',
> > +	'i915/i915_crc.c',
> >  	'i915/intel_memory_region.c',
> >  	'i915/intel_mocs.c',
> >  	'i915/i915_blt.c',
> > -- 
> > 2.32.0
> > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-06  6:33     ` Zbigniew Kempczyński
@ 2022-06-06  8:07       ` Petri Latvala
  2022-06-08  9:17         ` Zbigniew Kempczyński
  0 siblings, 1 reply; 16+ messages in thread
From: Petri Latvala @ 2022-06-06  8:07 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

On Mon, Jun 06, 2022 at 08:33:05AM +0200, Zbigniew Kempczyński wrote:
> On Fri, Jun 03, 2022 at 04:11:41PM +0300, Petri Latvala wrote:
> > On Fri, Jun 03, 2022 at 03:05:01PM +0200, Zbigniew Kempczyński wrote:
> > > Adding crc32 calculation on gpu gives us new possibility to verify data
> > > integrity without relying on trust cpu mapping is correct.
> > > 
> > > Patch introduces calculating crc32 on DG2 only. On older gens ALU
> > > (MI_MATH) doesn't support bit-shifting instructions as well as multiply
> > > or divide. Emulating n-bit shifts cost hundred of instructions with
> > > predicated SRM (works on render engine only). Another limitation is lack
> > > of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
> > > to achieve indexed operation on memory.
> > > 
> > > Due to performance reasons (cpu crc32 calculation even on WC memory is
> > > still much faster than on gpu, also depends on calculated object memory
> > > region) calculation will complete in reasonable of time only for few MiB.
> > > 
> > > v2: - use registers relative to engine to allow run on all engines (Chris)
> > >     - use predication instead of memory access to get better performance
> > >       (Chris)
> > >     - add location where crc32 implementation comes from (Petri)
> > > 
> > > v3: - extract crc32 table + cpu_crc32() to separate i915_crc_table.c
> > > 
> > > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> > > ---
> > >  lib/i915/i915_crc.c         | 311 ++++++++++++++++++++++++++++++++++++
> > >  lib/i915/i915_crc.h         |  17 ++
> > >  lib/i915/i915_crc32_table.c | 105 ++++++++++++
> > >  lib/intel_reg.h             |   7 +
> > >  lib/meson.build             |   1 +
> > >  5 files changed, 441 insertions(+)
> > >  create mode 100644 lib/i915/i915_crc.c
> > >  create mode 100644 lib/i915/i915_crc.h
> > >  create mode 100644 lib/i915/i915_crc32_table.c
> > > 
> > > diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
> > > new file mode 100644
> > > index 0000000000..c26a8e05b9
> > > --- /dev/null
> > > +++ b/lib/i915/i915_crc.c
> > > @@ -0,0 +1,311 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright © 2022 Intel Corporation
> > > + */
> > > +
> > > +#include <stddef.h>
> > > +#include <stdint.h>
> > > +#include "drmtest.h"
> > > +#include "gem_create.h"
> > > +#include "gem_engine_topology.h"
> > > +#include "gem_mman.h"
> > > +#include "i830_reg.h"
> > > +#include "i915_drm.h"
> > > +#include "intel_reg.h"
> > > +#include "intel_chipset.h"
> > > +#include "ioctl_wrappers.h"
> > > +#include "intel_allocator.h"
> > > +#include "i915/i915_crc.h"
> > > +
> > > +/* Include crc32 table + cpu_crc32() */
> > > +#include "i915_crc32_table.c"
> > 
> > #including .c files is ugly. Can that be a header file with
> > statics/inlines instead?
> 
> To avoid aesthetic dillemas I'm going to add separate igt_crc.c
> file, which will more vendor agnostic with extern of crc table(s).
> Assuming other vendors may add some gpu crc counting we may share
> crc32 (and maybe other) tables from there. And igt_cpu_crc32() would
> also be good to put in there.

Speaking of other tables, we have a crc calculation code in chamelium
code, that's........ crc16? Anyway, the function chamelium_xrgb_hash16
in lib/igt_chamelium.c. Might make sense to move that code to the new
home of cpu crc calcs.

> 
> > 
> > That said, it also isn't i915-specific anymore but that's not a
> > blocker for merging the code at this time.
> 
> That's fine, better to do few iterations to look better / be more 
> future ready than merging because it just works.

Excellent, thanks!


-- 
Petri Latvala


> 
> Thanks for review, await new version soon.
> 
> --
> Zbigniew
> 
> > 
> > 
> > -- 
> > Petri Latvala
> > 
> > 
> > > +
> > > +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> > > +
> > > +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> > > +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> > > +/* Opcodes for MI_MATH_INSTR */
> > > +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> > > +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> > > +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> > > +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> > > +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> > > +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> > > +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> > > +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> > > +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> > > +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> > > +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> > > +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> > > +/* DG2+ */
> > > +#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
> > > +#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
> > > +#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
> > > +
> > > +/* Registers used as operands in MI_MATH_INSTR */
> > > +#define   MI_MATH_REG(x)                (x)
> > > +#define   MI_MATH_REG_SRCA              0x20
> > > +#define   MI_MATH_REG_SRCB              0x21
> > > +#define   MI_MATH_REG_ACCU              0x31
> > > +#define   MI_MATH_REG_ZF                0x32
> > > +#define   MI_MATH_REG_CF                0x33
> > > +
> > > +#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
> > > +#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
> > > +#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
> > > +#define CS_GPR(x)                       (0x600 + 8 * (x))
> > > +#define GPR(x)                          CS_GPR(x)
> > > +#define R(x)                            (x)
> > > +#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
> > > +#define OFFSET(obj_offset, current, start) \
> > > +	((obj_offset) + (current - start) * 4)
> > > +
> > > +#define MI_PREDICATE_RESULT             0x3B8
> > > +#define WPARID                          0x21C
> > > +#define CS_MI_ADDRESS_OFFSET            0x3B4
> > > +
> > > +#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
> > > +		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
> > > +		*bb++ = (__reg_src); \
> > > +		*bb++ = (__reg_dst); \
> > > +	} while (0)
> > > +
> > > +#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
> > > +		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
> > > +		*bb++ = (__reg); \
> > > +		*bb++ = (__imm1); \
> > > +	} while (0)
> > > +
> > > +#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
> > > +		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
> > > +		*bb++ = (__reg); \
> > > +		*bb++ = (__imm1); \
> > > +		*bb++ = (__reg) + 4; \
> > > +		*bb++ = (__imm2); \
> > > +	} while (0)
> > > +
> > > +#define LOAD_REGISTER_MEM(__reg, __offset) do { \
> > > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
> > > +		*bb++ = (__reg); \
> > > +		*bb++ = (__offset); \
> > > +		*bb++ = (__offset) >> 32; \
> > > +	} while (0)
> > > +
> > > +#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
> > > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
> > > +		*bb++ = (__reg); \
> > > +		*bb++ = (__offset); \
> > > +		*bb++ = (__offset) >> 32; \
> > > +	} while (0)
> > > +
> > > +#define STORE_REGISTER_MEM(__reg, __offset) do { \
> > > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
> > > +		*bb++ = (__reg); \
> > > +		*bb++ = (__offset); \
> > > +		*bb++ = (__offset) >> 32; \
> > > +	} while (0)
> > > +
> > > +#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
> > > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
> > > +		*bb++ = (__reg); \
> > > +		*bb++ = (__offset); \
> > > +		*bb++ = (__offset) >> 32; \
> > > +	} while (0)
> > > +
> > > +#define COND_BBE(__value, __offset, __condition) do { \
> > > +		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
> > > +		*bb++ = (__value); \
> > > +		*bb++ = (__offset); \
> > > +		*bb++ = (__offset) >> 32; \
> > > +	} while (0)
> > > +
> > > +#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
> > > +		*bb++ = MI_MATH(4); \
> > > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
> > > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
> > > +		*bb++ = (__op); \
> > > +		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
> > > +	} while (0)
> > > +
> > > +#define BBSIZE 4096
> > > +
> > > +/* Aliasing for easier refactoring */
> > > +#define GPR_SIZE	GPR(0)
> > > +#define R_SIZE		R(0)
> > > +
> > > +#define GPR_CRC		GPR(1)
> > > +#define R_CRC		R(1)
> > > +
> > > +#define GPR_INDATA_IDX  GPR(2)
> > > +#define R_INDATA_IDX	R(2)
> > > +
> > > +#define GPR_TABLE_IDX   GPR(3)
> > > +#define R_TABLE_IDX	R(3)
> > > +
> > > +#define GPR_CURR_DW	GPR(4)
> > > +#define R_CURR_DW	R(4)
> > > +
> > > +#define GPR_CONST_2	GPR(5)
> > > +#define R_CONST_2	R(5)
> > > +
> > > +#define GPR_CONST_4	GPR(6)
> > > +#define R_CONST_4	R(6)
> > > +
> > > +#define GPR_CONST_8	GPR(7)
> > > +#define R_CONST_8	R(7)
> > > +
> > > +#define GPR_CONST_ff	GPR(8)
> > > +#define R_CONST_ff	R(8)
> > > +
> > > +#define GPR_ffffffff    GPR(9)
> > > +#define R_ffffffff	R(9)
> > > +
> > > +#define GPR_TMP_1	GPR(10)
> > > +#define R_TMP_1		R(10)
> > > +
> > > +#define GPR_TMP_2	GPR(11)
> > > +#define R_TMP_2		R(11)
> > > +
> > > +static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
> > > +		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
> > > +{
> > > +	uint32_t *bb, *batch, *jmp;
> > > +	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
> > > +	const int use_64b = gen >= 8;
> > > +	uint64_t offset;
> > > +	uint64_t crc = USERDATA(table_offset, 0);
> > > +
> > > +	igt_assert(data_size % 4 == 0);
> > > +
> > > +	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
> > > +					  PROT_READ | PROT_WRITE);
> > > +	memset(batch, 0, BBSIZE);
> > > +
> > > +	bb = batch;
> > > +
> > > +	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
> > > +	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
> > > +	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
> > > +	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
> > > +	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
> > > +	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
> > > +	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
> > > +	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
> > > +
> > > +	/* for indexed reads from memory */
> > > +	LOAD_REGISTER_IMM32(WPARID, 1);
> > > +
> > > +	jmp = bb;
> > > +
> > > +	*bb++ = MI_SET_PREDICATE;
> > > +	*bb++ = MI_ARB_CHECK;
> > > +
> > > +	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
> > > +	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
> > > +
> > > +	for (int byte = 0; byte < 4; byte++) {
> > > +		if (byte != 0)
> > > +			MATH_4_STORE(R_CURR_DW, R_CONST_8,
> > > +				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
> > > +
> > > +		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
> > > +		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
> > > +			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
> > > +		MATH_4_STORE(R_CRC, R_TMP_1,
> > > +			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
> > > +		MATH_4_STORE(R_TMP_1, R_CONST_ff,
> > > +			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
> > > +		MATH_4_STORE(R_TMP_1, R_CONST_2,
> > > +			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
> > > +
> > > +		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
> > > +		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
> > > +
> > > +		MATH_4_STORE(R_CRC, R_CONST_8,
> > > +			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
> > > +		MATH_4_STORE(R_TMP_2, R_TMP_1,
> > > +			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
> > > +	}
> > > +
> > > +	/* increment data index */
> > > +	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
> > > +
> > > +	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
> > > +
> > > +	*bb++ = MI_MATH(5);
> > > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
> > > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
> > > +	*bb++ = MI_MATH_SUB;
> > > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
> > > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
> > > +	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
> > > +
> > > +	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
> > > +	offset = OFFSET(bb_offset, jmp, batch);
> > > +	*bb++ = offset;
> > > +	*bb++ = offset >> 32;
> > > +
> > > +	*bb++ = MI_SET_PREDICATE;
> > > +
> > > +	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
> > > +	STORE_REGISTER_MEM(GPR_TMP_1, crc);
> > > +
> > > +	*bb++ = MI_BATCH_BUFFER_END;
> > > +
> > > +	gem_munmap(batch, BBSIZE);
> > > +}
> > > +
> > > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > > +		    const struct intel_execution_engine2 *e,
> > > +		    uint32_t data_handle, uint32_t data_size)
> > > +{
> > > +	struct drm_i915_gem_execbuffer2 execbuf = {};
> > > +	struct drm_i915_gem_exec_object2 obj[3] = {};
> > > +	uint64_t bb_offset, table_offset, data_offset;
> > > +	uint32_t bb, table, crc, table_size = 4096;
> > > +	uint32_t *ptr;
> > > +
> > > +	igt_assert(data_size % 4 == 0);
> > > +
> > > +	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
> > > +	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
> > > +
> > > +	table_offset = get_offset(ahnd, table, table_size, 0);
> > > +	data_offset = get_offset(ahnd, data_handle, data_size, 0);
> > > +
> > > +	obj[0].offset = table_offset;
> > > +	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
> > > +	obj[0].handle = table;
> > > +
> > > +	obj[1].offset = data_offset;
> > > +	obj[1].flags = EXEC_OBJECT_PINNED;
> > > +	obj[1].handle = data_handle;
> > > +
> > > +	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
> > > +	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
> > > +	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
> > > +	obj[2].offset = bb_offset;
> > > +	obj[2].flags = EXEC_OBJECT_PINNED;
> > > +	obj[2].handle = bb;
> > > +	execbuf.buffer_count = 3;
> > > +	execbuf.buffers_ptr = to_user_pointer(obj);
> > > +	execbuf.flags = e->flags;
> > > +	execbuf.rsvd1 = ctx->id;
> > > +	gem_execbuf(i915, &execbuf);
> > > +	gem_sync(i915, table);
> > > +
> > > +	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
> > > +	crc = ptr[0x100];
> > > +	gem_munmap(ptr, table_size);
> > > +	gem_close(i915, table);
> > > +	gem_close(i915, bb);
> > > +
> > > +	return crc;
> > > +}
> > > +
> > > +bool supports_gpu_crc32(int i915)
> > > +{
> > > +	uint16_t devid = intel_get_drm_devid(i915);
> > > +
> > > +	return IS_DG2(devid);
> > > +}
> > > diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
> > > new file mode 100644
> > > index 0000000000..bb0195e2a8
> > > --- /dev/null
> > > +++ b/lib/i915/i915_crc.h
> > > @@ -0,0 +1,17 @@
> > > +/* SPDX-License-Identifier: MIT */
> > > +/*
> > > + * Copyright © 2022 Intel Corporation
> > > + */
> > > +#ifndef _I915_CRC_H_
> > > +#define _I915_CRC_H_
> > > +
> > > +#include <stdint.h>
> > > +#include "intel_ctx.h"
> > > +
> > > +uint32_t cpu_crc32(const void *buf, size_t size);
> > > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > > +		    const struct intel_execution_engine2 *e,
> > > +		    uint32_t data_handle, uint32_t data_size);
> > > +bool supports_gpu_crc32(int i915);
> > > +
> > > +#endif /* _I915_CRC_ */
> > > diff --git a/lib/i915/i915_crc32_table.c b/lib/i915/i915_crc32_table.c
> > > new file mode 100644
> > > index 0000000000..eca5e43218
> > > --- /dev/null
> > > +++ b/lib/i915/i915_crc32_table.c
> > > @@ -0,0 +1,105 @@
> > > +/*-
> > > + *  COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or
> > > + *  code or tables extracted from it, as desired without restriction.
> > > + */
> > > +
> > > +/*
> > > + *  First, the polynomial itself and its table of feedback terms.  The
> > > + *  polynomial is
> > > + *  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
> > > + *
> > > + *  Note that we take it "backwards" and put the highest-order term in
> > > + *  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
> > > + *  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
> > > + *  the MSB being 1
> > > + *
> > > + *  Note that the usual hardware shift register implementation, which
> > > + *  is what we're using (we're merely optimizing it by doing eight-bit
> > > + *  chunks at a time) shifts bits into the lowest-order term.  In our
> > > + *  implementation, that means shifting towards the right.  Why do we
> > > + *  do it this way?  Because the calculated CRC must be transmitted in
> > > + *  order from highest-order term to lowest-order term.  UARTs transmit
> > > + *  characters in order from LSB to MSB.  By storing the CRC this way
> > > + *  we hand it to the UART in the order low-byte to high-byte; the UART
> > > + *  sends each low-bit to hight-bit; and the result is transmission bit
> > > + *  by bit from highest- to lowest-order term without requiring any bit
> > > + *  shuffling on our part.  Reception works similarly
> > > + *
> > > + *  The feedback terms table consists of 256, 32-bit entries.  Notes
> > > + *
> > > + *      The table can be generated at runtime if desired; code to do so
> > > + *      is shown later.  It might not be obvious, but the feedback
> > > + *      terms simply represent the results of eight shift/xor opera
> > > + *      tions for all combinations of data and CRC register values
> > > + *
> > > + *      The values must be right-shifted by eight bits by the "updcrc
> > > + *      logic; the shift must be unsigned (bring in zeroes).  On some
> > > + *      hardware you could probably optimize the shift in assembler by
> > > + *      using byte-swap instructions
> > > + *      polynomial $edb88320
> > > + *
> > > + *
> > > + * CRC32 code derived from work by Gary S. Brown.
> > > + */
> > > +
> > > +#include <stdint.h>
> > > +
> > > +const uint32_t crc32_tab[] = {
> > > +	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> > > +	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> > > +	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> > > +	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> > > +	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
> > > +	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> > > +	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
> > > +	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> > > +	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> > > +	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> > > +	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
> > > +	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> > > +	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> > > +	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> > > +	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> > > +	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> > > +	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> > > +	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> > > +	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> > > +	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> > > +	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> > > +	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> > > +	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> > > +	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> > > +	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> > > +	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> > > +	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> > > +	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> > > +	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> > > +	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> > > +	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> > > +	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> > > +	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> > > +	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> > > +	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> > > +	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> > > +	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> > > +	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> > > +	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> > > +	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> > > +	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> > > +	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> > > +	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> > > +};
> > > +
> > > +uint32_t cpu_crc32(const void *buf, size_t size)
> > > +{
> > > +
> > > +	const uint8_t *p = buf;
> > > +	uint32_t crc;
> > > +
> > > +	crc = ~0U;
> > > +
> > > +	while (size--)
> > > +		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> > > +
> > > +	return crc ^ ~0U;
> > > +}
> > > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > > index cb62728896..fff32e1816 100644
> > > --- a/lib/intel_reg.h
> > > +++ b/lib/intel_reg.h
> > > @@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > >  #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
> > >  #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
> > >  #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
> > > +#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
> > >  
> > >  /* Flush */
> > >  #define MI_FLUSH			(0x04<<23)
> > > @@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > >  #define MI_BATCH_BUFFER_END	(0xA << 23)
> > >  #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
> > >  #define MI_DO_COMPARE                   (1 << 21)
> > > +#define MAD_GT_IDD			(0 << 12)
> > > +#define MAD_GT_OR_EQ_IDD		(1 << 12)
> > > +#define MAD_LT_IDD			(2 << 12)
> > > +#define MAD_LT_OR_EQ_IDD		(3 << 12)
> > > +#define MAD_EQ_IDD			(4 << 12)
> > > +#define MAD_NEQ_IDD			(5 << 12)
> > >  
> > >  #define MI_BATCH_NON_SECURE		(1)
> > >  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> > > diff --git a/lib/meson.build b/lib/meson.build
> > > index 0a173c1fc6..b05198ecc9 100644
> > > --- a/lib/meson.build
> > > +++ b/lib/meson.build
> > > @@ -10,6 +10,7 @@ lib_sources = [
> > >  	'i915/gem_ring.c',
> > >  	'i915/gem_mman.c',
> > >  	'i915/gem_vm.c',
> > > +	'i915/i915_crc.c',
> > >  	'i915/intel_memory_region.c',
> > >  	'i915/intel_mocs.c',
> > >  	'i915/i915_blt.c',
> > > -- 
> > > 2.32.0
> > > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 2/2] tests/api_intel_bb: Add crc32 checking test for DG2
  2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 2/2] tests/api_intel_bb: Add crc32 checking test " Zbigniew Kempczyński
@ 2022-06-06 14:47   ` Kamil Konieczny
  0 siblings, 0 replies; 16+ messages in thread
From: Kamil Konieczny @ 2022-06-06 14:47 UTC (permalink / raw)
  To: igt-dev

Hi Zbigniew,

On 2022-06-03 at 15:05:02 +0200, Zbigniew Kempczyński wrote:
> Add simple test which compares crc32 sums and calculation times on cpu
> and gpu.
> 
> v2: convert to dynamic
> v3: add assert when cpu_crc != gpu_crc
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> ---
>  tests/i915/api_intel_bb.c | 67 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
> 
> diff --git a/tests/i915/api_intel_bb.c b/tests/i915/api_intel_bb.c
> index 92f44cecf4..3213a1df75 100644
> --- a/tests/i915/api_intel_bb.c
> +++ b/tests/i915/api_intel_bb.c
> @@ -38,6 +38,7 @@
>  #include <zlib.h>
>  #include "intel_bufops.h"
>  #include "i915/gem_vm.h"
> +#include "i915/i915_crc.h"
>  
>  #define PAGE_SIZE 4096
>  
> @@ -1395,6 +1396,57 @@ static void render_ccs(struct buf_ops *bops)
>  	igt_assert_f(fails == 0, "render-ccs fails: %d\n", fails);
>  }
>  
> +static double elapsed(const struct timeval *start,
> +		      const struct timeval *end)
> +{
> +	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec)) / 1e6;

imho you can use igt_time_elapsed from igt_core.h instead.

> +}
> +
> +static void test_crc32(int i915, const intel_ctx_t *ctx,
> +		       const struct intel_execution_engine2 *e,
> +		       struct drm_i915_gem_memory_class_instance *r)
> +{
> +	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
> +	uint32_t data, *ptr;
> +
> +	uint32_t region = INTEL_MEMORY_REGION_ID(r->memory_class,
> +						 r->memory_instance);
> +
> +	igt_info("[engine: %s, region: %s]\n", e->name,
> +		 region == REGION_SMEM ? "smem" : "lmem");

Please make it igt_debug.

> +	for (int i = 12; i < 21; i++) {
> +		struct timeval start, end;
> +		uint64_t size = 1 << i;
> +		uint32_t cpu_crc, gpu_crc;
> +
> +		double cpu_time, gpu_time;
> +
> +		data = gem_create_in_memory_regions(i915, size, region);
> +		ptr = gem_mmap__device_coherent(i915, data, 0, size, PROT_WRITE);
> +		for (int j = 0; j < size / sizeof(*ptr); j++)
> +			ptr[j] = j;
> +
> +		gettimeofday(&start, NULL);
> +		cpu_crc = cpu_crc32(ptr, size);
> +		gettimeofday(&end, NULL);
> +		cpu_time = elapsed(&start, &end);
> +
> +		gettimeofday(&start, NULL);
> +		gpu_crc = i915_crc32(i915, ahnd, ctx, e, data, size);
> +		gettimeofday(&end, NULL);
> +		gpu_time = elapsed(&start, &end);
> +		igt_info("size: %10lld, cpu crc: 0x%08x (time: %.3f), "

imho igt_debug would be better.

> +				 "gpu crc: 0x%08x (time: %.3f) [ %s ]\n",
> +			 (long long) size, cpu_crc, cpu_time, gpu_crc, gpu_time,
> +			 cpu_crc == gpu_crc ? "EQUAL" : "DIFFERENT");
> +		munmap(ptr, size);

imho munmap should be after cpu_time calculation ?

> +		gem_close(i915, data);
> +		igt_assert(cpu_crc == gpu_crc);
> +	}
> +
> +	put_ahnd(ahnd);
> +}
> +
>  static int opt_handler(int opt, int opt_index, void *data)
>  {
>  	switch (opt) {
> @@ -1552,6 +1604,21 @@ igt_main_args("dpib", NULL, help_str, opt_handler, NULL)
>  	igt_subtest("render-ccs")
>  		render_ccs(bops);
>  

Please add description to subtest.

With that fixed you can add my r-b tag.

--
Kamil

> +	igt_subtest_with_dynamic_f("crc32") {
> +		const intel_ctx_t *ctx;
> +		const struct intel_execution_engine2 *e;
> +
> +		igt_require(supports_gpu_crc32(i915));
> +
> +		ctx = intel_ctx_create_all_physical(i915);
> +		for_each_ctx_engine(i915, ctx, e) {
> +			for_each_memory_region(r, i915) {
> +				igt_dynamic_f("%s-%s", e->name, r->name)
> +					test_crc32(i915, ctx, e, &r->ci);
> +			}
> +		}
> +	}
> +
>  	igt_fixture {
>  		buf_ops_destroy(bops);
>  		close(i915);
> -- 
> 2.32.0
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-06  8:07       ` Petri Latvala
@ 2022-06-08  9:17         ` Zbigniew Kempczyński
  2022-06-09  7:22           ` Petri Latvala
  0 siblings, 1 reply; 16+ messages in thread
From: Zbigniew Kempczyński @ 2022-06-08  9:17 UTC (permalink / raw)
  To: Petri Latvala; +Cc: igt-dev

On Mon, Jun 06, 2022 at 11:07:53AM +0300, Petri Latvala wrote:
> On Mon, Jun 06, 2022 at 08:33:05AM +0200, Zbigniew Kempczyński wrote:
> > On Fri, Jun 03, 2022 at 04:11:41PM +0300, Petri Latvala wrote:
> > > On Fri, Jun 03, 2022 at 03:05:01PM +0200, Zbigniew Kempczyński wrote:
> > > > Adding crc32 calculation on gpu gives us new possibility to verify data
> > > > integrity without relying on trust cpu mapping is correct.
> > > > 
> > > > Patch introduces calculating crc32 on DG2 only. On older gens ALU
> > > > (MI_MATH) doesn't support bit-shifting instructions as well as multiply
> > > > or divide. Emulating n-bit shifts cost hundred of instructions with
> > > > predicated SRM (works on render engine only). Another limitation is lack
> > > > of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
> > > > to achieve indexed operation on memory.
> > > > 
> > > > Due to performance reasons (cpu crc32 calculation even on WC memory is
> > > > still much faster than on gpu, also depends on calculated object memory
> > > > region) calculation will complete in reasonable of time only for few MiB.
> > > > 
> > > > v2: - use registers relative to engine to allow run on all engines (Chris)
> > > >     - use predication instead of memory access to get better performance
> > > >       (Chris)
> > > >     - add location where crc32 implementation comes from (Petri)
> > > > 
> > > > v3: - extract crc32 table + cpu_crc32() to separate i915_crc_table.c
> > > > 
> > > > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> > > > ---
> > > >  lib/i915/i915_crc.c         | 311 ++++++++++++++++++++++++++++++++++++
> > > >  lib/i915/i915_crc.h         |  17 ++
> > > >  lib/i915/i915_crc32_table.c | 105 ++++++++++++
> > > >  lib/intel_reg.h             |   7 +
> > > >  lib/meson.build             |   1 +
> > > >  5 files changed, 441 insertions(+)
> > > >  create mode 100644 lib/i915/i915_crc.c
> > > >  create mode 100644 lib/i915/i915_crc.h
> > > >  create mode 100644 lib/i915/i915_crc32_table.c
> > > > 
> > > > diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
> > > > new file mode 100644
> > > > index 0000000000..c26a8e05b9
> > > > --- /dev/null
> > > > +++ b/lib/i915/i915_crc.c
> > > > @@ -0,0 +1,311 @@
> > > > +// SPDX-License-Identifier: MIT
> > > > +/*
> > > > + * Copyright © 2022 Intel Corporation
> > > > + */
> > > > +
> > > > +#include <stddef.h>
> > > > +#include <stdint.h>
> > > > +#include "drmtest.h"
> > > > +#include "gem_create.h"
> > > > +#include "gem_engine_topology.h"
> > > > +#include "gem_mman.h"
> > > > +#include "i830_reg.h"
> > > > +#include "i915_drm.h"
> > > > +#include "intel_reg.h"
> > > > +#include "intel_chipset.h"
> > > > +#include "ioctl_wrappers.h"
> > > > +#include "intel_allocator.h"
> > > > +#include "i915/i915_crc.h"
> > > > +
> > > > +/* Include crc32 table + cpu_crc32() */
> > > > +#include "i915_crc32_table.c"
> > > 
> > > #including .c files is ugly. Can that be a header file with
> > > statics/inlines instead?
> > 
> > To avoid aesthetic dillemas I'm going to add separate igt_crc.c
> > file, which will more vendor agnostic with extern of crc table(s).
> > Assuming other vendors may add some gpu crc counting we may share
> > crc32 (and maybe other) tables from there. And igt_cpu_crc32() would
> > also be good to put in there.
> 
> Speaking of other tables, we have a crc calculation code in chamelium
> code, that's........ crc16? Anyway, the function chamelium_xrgb_hash16
> in lib/igt_chamelium.c. Might make sense to move that code to the new
> home of cpu crc calcs.

If I good understand kms part has two different algorithms of counting
crc - chamelium has its own 'hash', much simpler which is based on
bitshifting, and VESA crc16. If update_crc16_dp() can be replaced by
table version of crc16 we can try to replace it. But still XRGB8888
input buffer requires to treat RGB individually (not as contigues buffer).

If above can be deferred for other times it would be great.

--
Zbigniew

> 
> > 
> > > 
> > > That said, it also isn't i915-specific anymore but that's not a
> > > blocker for merging the code at this time.
> > 
> > That's fine, better to do few iterations to look better / be more 
> > future ready than merging because it just works.
> 
> Excellent, thanks!
> 
> 
> -- 
> Petri Latvala
> 
> 
> > 
> > Thanks for review, await new version soon.
> > 
> > --
> > Zbigniew
> > 
> > > 
> > > 
> > > -- 
> > > Petri Latvala
> > > 
> > > 
> > > > +
> > > > +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> > > > +
> > > > +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> > > > +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> > > > +/* Opcodes for MI_MATH_INSTR */
> > > > +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> > > > +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> > > > +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> > > > +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> > > > +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> > > > +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> > > > +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> > > > +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> > > > +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> > > > +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> > > > +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> > > > +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> > > > +/* DG2+ */
> > > > +#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
> > > > +#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
> > > > +#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
> > > > +
> > > > +/* Registers used as operands in MI_MATH_INSTR */
> > > > +#define   MI_MATH_REG(x)                (x)
> > > > +#define   MI_MATH_REG_SRCA              0x20
> > > > +#define   MI_MATH_REG_SRCB              0x21
> > > > +#define   MI_MATH_REG_ACCU              0x31
> > > > +#define   MI_MATH_REG_ZF                0x32
> > > > +#define   MI_MATH_REG_CF                0x33
> > > > +
> > > > +#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
> > > > +#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
> > > > +#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
> > > > +#define CS_GPR(x)                       (0x600 + 8 * (x))
> > > > +#define GPR(x)                          CS_GPR(x)
> > > > +#define R(x)                            (x)
> > > > +#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
> > > > +#define OFFSET(obj_offset, current, start) \
> > > > +	((obj_offset) + (current - start) * 4)
> > > > +
> > > > +#define MI_PREDICATE_RESULT             0x3B8
> > > > +#define WPARID                          0x21C
> > > > +#define CS_MI_ADDRESS_OFFSET            0x3B4
> > > > +
> > > > +#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
> > > > +		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
> > > > +		*bb++ = (__reg_src); \
> > > > +		*bb++ = (__reg_dst); \
> > > > +	} while (0)
> > > > +
> > > > +#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
> > > > +		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
> > > > +		*bb++ = (__reg); \
> > > > +		*bb++ = (__imm1); \
> > > > +	} while (0)
> > > > +
> > > > +#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
> > > > +		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
> > > > +		*bb++ = (__reg); \
> > > > +		*bb++ = (__imm1); \
> > > > +		*bb++ = (__reg) + 4; \
> > > > +		*bb++ = (__imm2); \
> > > > +	} while (0)
> > > > +
> > > > +#define LOAD_REGISTER_MEM(__reg, __offset) do { \
> > > > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
> > > > +		*bb++ = (__reg); \
> > > > +		*bb++ = (__offset); \
> > > > +		*bb++ = (__offset) >> 32; \
> > > > +	} while (0)
> > > > +
> > > > +#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
> > > > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
> > > > +		*bb++ = (__reg); \
> > > > +		*bb++ = (__offset); \
> > > > +		*bb++ = (__offset) >> 32; \
> > > > +	} while (0)
> > > > +
> > > > +#define STORE_REGISTER_MEM(__reg, __offset) do { \
> > > > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
> > > > +		*bb++ = (__reg); \
> > > > +		*bb++ = (__offset); \
> > > > +		*bb++ = (__offset) >> 32; \
> > > > +	} while (0)
> > > > +
> > > > +#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
> > > > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
> > > > +		*bb++ = (__reg); \
> > > > +		*bb++ = (__offset); \
> > > > +		*bb++ = (__offset) >> 32; \
> > > > +	} while (0)
> > > > +
> > > > +#define COND_BBE(__value, __offset, __condition) do { \
> > > > +		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
> > > > +		*bb++ = (__value); \
> > > > +		*bb++ = (__offset); \
> > > > +		*bb++ = (__offset) >> 32; \
> > > > +	} while (0)
> > > > +
> > > > +#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
> > > > +		*bb++ = MI_MATH(4); \
> > > > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
> > > > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
> > > > +		*bb++ = (__op); \
> > > > +		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
> > > > +	} while (0)
> > > > +
> > > > +#define BBSIZE 4096
> > > > +
> > > > +/* Aliasing for easier refactoring */
> > > > +#define GPR_SIZE	GPR(0)
> > > > +#define R_SIZE		R(0)
> > > > +
> > > > +#define GPR_CRC		GPR(1)
> > > > +#define R_CRC		R(1)
> > > > +
> > > > +#define GPR_INDATA_IDX  GPR(2)
> > > > +#define R_INDATA_IDX	R(2)
> > > > +
> > > > +#define GPR_TABLE_IDX   GPR(3)
> > > > +#define R_TABLE_IDX	R(3)
> > > > +
> > > > +#define GPR_CURR_DW	GPR(4)
> > > > +#define R_CURR_DW	R(4)
> > > > +
> > > > +#define GPR_CONST_2	GPR(5)
> > > > +#define R_CONST_2	R(5)
> > > > +
> > > > +#define GPR_CONST_4	GPR(6)
> > > > +#define R_CONST_4	R(6)
> > > > +
> > > > +#define GPR_CONST_8	GPR(7)
> > > > +#define R_CONST_8	R(7)
> > > > +
> > > > +#define GPR_CONST_ff	GPR(8)
> > > > +#define R_CONST_ff	R(8)
> > > > +
> > > > +#define GPR_ffffffff    GPR(9)
> > > > +#define R_ffffffff	R(9)
> > > > +
> > > > +#define GPR_TMP_1	GPR(10)
> > > > +#define R_TMP_1		R(10)
> > > > +
> > > > +#define GPR_TMP_2	GPR(11)
> > > > +#define R_TMP_2		R(11)
> > > > +
> > > > +static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
> > > > +		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
> > > > +{
> > > > +	uint32_t *bb, *batch, *jmp;
> > > > +	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
> > > > +	const int use_64b = gen >= 8;
> > > > +	uint64_t offset;
> > > > +	uint64_t crc = USERDATA(table_offset, 0);
> > > > +
> > > > +	igt_assert(data_size % 4 == 0);
> > > > +
> > > > +	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
> > > > +					  PROT_READ | PROT_WRITE);
> > > > +	memset(batch, 0, BBSIZE);
> > > > +
> > > > +	bb = batch;
> > > > +
> > > > +	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
> > > > +	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
> > > > +	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
> > > > +	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
> > > > +	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
> > > > +	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
> > > > +	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
> > > > +	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
> > > > +
> > > > +	/* for indexed reads from memory */
> > > > +	LOAD_REGISTER_IMM32(WPARID, 1);
> > > > +
> > > > +	jmp = bb;
> > > > +
> > > > +	*bb++ = MI_SET_PREDICATE;
> > > > +	*bb++ = MI_ARB_CHECK;
> > > > +
> > > > +	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
> > > > +	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
> > > > +
> > > > +	for (int byte = 0; byte < 4; byte++) {
> > > > +		if (byte != 0)
> > > > +			MATH_4_STORE(R_CURR_DW, R_CONST_8,
> > > > +				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
> > > > +
> > > > +		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
> > > > +		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
> > > > +			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
> > > > +		MATH_4_STORE(R_CRC, R_TMP_1,
> > > > +			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
> > > > +		MATH_4_STORE(R_TMP_1, R_CONST_ff,
> > > > +			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
> > > > +		MATH_4_STORE(R_TMP_1, R_CONST_2,
> > > > +			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
> > > > +
> > > > +		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
> > > > +		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
> > > > +
> > > > +		MATH_4_STORE(R_CRC, R_CONST_8,
> > > > +			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
> > > > +		MATH_4_STORE(R_TMP_2, R_TMP_1,
> > > > +			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
> > > > +	}
> > > > +
> > > > +	/* increment data index */
> > > > +	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
> > > > +
> > > > +	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
> > > > +
> > > > +	*bb++ = MI_MATH(5);
> > > > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
> > > > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
> > > > +	*bb++ = MI_MATH_SUB;
> > > > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
> > > > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
> > > > +	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
> > > > +
> > > > +	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
> > > > +	offset = OFFSET(bb_offset, jmp, batch);
> > > > +	*bb++ = offset;
> > > > +	*bb++ = offset >> 32;
> > > > +
> > > > +	*bb++ = MI_SET_PREDICATE;
> > > > +
> > > > +	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
> > > > +	STORE_REGISTER_MEM(GPR_TMP_1, crc);
> > > > +
> > > > +	*bb++ = MI_BATCH_BUFFER_END;
> > > > +
> > > > +	gem_munmap(batch, BBSIZE);
> > > > +}
> > > > +
> > > > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > > > +		    const struct intel_execution_engine2 *e,
> > > > +		    uint32_t data_handle, uint32_t data_size)
> > > > +{
> > > > +	struct drm_i915_gem_execbuffer2 execbuf = {};
> > > > +	struct drm_i915_gem_exec_object2 obj[3] = {};
> > > > +	uint64_t bb_offset, table_offset, data_offset;
> > > > +	uint32_t bb, table, crc, table_size = 4096;
> > > > +	uint32_t *ptr;
> > > > +
> > > > +	igt_assert(data_size % 4 == 0);
> > > > +
> > > > +	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
> > > > +	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
> > > > +
> > > > +	table_offset = get_offset(ahnd, table, table_size, 0);
> > > > +	data_offset = get_offset(ahnd, data_handle, data_size, 0);
> > > > +
> > > > +	obj[0].offset = table_offset;
> > > > +	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
> > > > +	obj[0].handle = table;
> > > > +
> > > > +	obj[1].offset = data_offset;
> > > > +	obj[1].flags = EXEC_OBJECT_PINNED;
> > > > +	obj[1].handle = data_handle;
> > > > +
> > > > +	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
> > > > +	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
> > > > +	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
> > > > +	obj[2].offset = bb_offset;
> > > > +	obj[2].flags = EXEC_OBJECT_PINNED;
> > > > +	obj[2].handle = bb;
> > > > +	execbuf.buffer_count = 3;
> > > > +	execbuf.buffers_ptr = to_user_pointer(obj);
> > > > +	execbuf.flags = e->flags;
> > > > +	execbuf.rsvd1 = ctx->id;
> > > > +	gem_execbuf(i915, &execbuf);
> > > > +	gem_sync(i915, table);
> > > > +
> > > > +	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
> > > > +	crc = ptr[0x100];
> > > > +	gem_munmap(ptr, table_size);
> > > > +	gem_close(i915, table);
> > > > +	gem_close(i915, bb);
> > > > +
> > > > +	return crc;
> > > > +}
> > > > +
> > > > +bool supports_gpu_crc32(int i915)
> > > > +{
> > > > +	uint16_t devid = intel_get_drm_devid(i915);
> > > > +
> > > > +	return IS_DG2(devid);
> > > > +}
> > > > diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
> > > > new file mode 100644
> > > > index 0000000000..bb0195e2a8
> > > > --- /dev/null
> > > > +++ b/lib/i915/i915_crc.h
> > > > @@ -0,0 +1,17 @@
> > > > +/* SPDX-License-Identifier: MIT */
> > > > +/*
> > > > + * Copyright © 2022 Intel Corporation
> > > > + */
> > > > +#ifndef _I915_CRC_H_
> > > > +#define _I915_CRC_H_
> > > > +
> > > > +#include <stdint.h>
> > > > +#include "intel_ctx.h"
> > > > +
> > > > +uint32_t cpu_crc32(const void *buf, size_t size);
> > > > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > > > +		    const struct intel_execution_engine2 *e,
> > > > +		    uint32_t data_handle, uint32_t data_size);
> > > > +bool supports_gpu_crc32(int i915);
> > > > +
> > > > +#endif /* _I915_CRC_ */
> > > > diff --git a/lib/i915/i915_crc32_table.c b/lib/i915/i915_crc32_table.c
> > > > new file mode 100644
> > > > index 0000000000..eca5e43218
> > > > --- /dev/null
> > > > +++ b/lib/i915/i915_crc32_table.c
> > > > @@ -0,0 +1,105 @@
> > > > +/*-
> > > > + *  COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or
> > > > + *  code or tables extracted from it, as desired without restriction.
> > > > + */
> > > > +
> > > > +/*
> > > > + *  First, the polynomial itself and its table of feedback terms.  The
> > > > + *  polynomial is
> > > > + *  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
> > > > + *
> > > > + *  Note that we take it "backwards" and put the highest-order term in
> > > > + *  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
> > > > + *  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
> > > > + *  the MSB being 1
> > > > + *
> > > > + *  Note that the usual hardware shift register implementation, which
> > > > + *  is what we're using (we're merely optimizing it by doing eight-bit
> > > > + *  chunks at a time) shifts bits into the lowest-order term.  In our
> > > > + *  implementation, that means shifting towards the right.  Why do we
> > > > + *  do it this way?  Because the calculated CRC must be transmitted in
> > > > + *  order from highest-order term to lowest-order term.  UARTs transmit
> > > > + *  characters in order from LSB to MSB.  By storing the CRC this way
> > > > + *  we hand it to the UART in the order low-byte to high-byte; the UART
> > > > + *  sends each low-bit to hight-bit; and the result is transmission bit
> > > > + *  by bit from highest- to lowest-order term without requiring any bit
> > > > + *  shuffling on our part.  Reception works similarly
> > > > + *
> > > > + *  The feedback terms table consists of 256, 32-bit entries.  Notes
> > > > + *
> > > > + *      The table can be generated at runtime if desired; code to do so
> > > > + *      is shown later.  It might not be obvious, but the feedback
> > > > + *      terms simply represent the results of eight shift/xor opera
> > > > + *      tions for all combinations of data and CRC register values
> > > > + *
> > > > + *      The values must be right-shifted by eight bits by the "updcrc
> > > > + *      logic; the shift must be unsigned (bring in zeroes).  On some
> > > > + *      hardware you could probably optimize the shift in assembler by
> > > > + *      using byte-swap instructions
> > > > + *      polynomial $edb88320
> > > > + *
> > > > + *
> > > > + * CRC32 code derived from work by Gary S. Brown.
> > > > + */
> > > > +
> > > > +#include <stdint.h>
> > > > +
> > > > +const uint32_t crc32_tab[] = {
> > > > +	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> > > > +	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> > > > +	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> > > > +	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> > > > +	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
> > > > +	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> > > > +	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
> > > > +	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> > > > +	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> > > > +	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> > > > +	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
> > > > +	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> > > > +	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> > > > +	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> > > > +	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> > > > +	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> > > > +	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> > > > +	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> > > > +	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> > > > +	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> > > > +	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> > > > +	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> > > > +	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> > > > +	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> > > > +	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> > > > +	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> > > > +	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> > > > +	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> > > > +	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> > > > +	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> > > > +	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> > > > +	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> > > > +	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> > > > +	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> > > > +	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> > > > +	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> > > > +	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> > > > +	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> > > > +	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> > > > +	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> > > > +	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> > > > +	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> > > > +	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> > > > +};
> > > > +
> > > > +uint32_t cpu_crc32(const void *buf, size_t size)
> > > > +{
> > > > +
> > > > +	const uint8_t *p = buf;
> > > > +	uint32_t crc;
> > > > +
> > > > +	crc = ~0U;
> > > > +
> > > > +	while (size--)
> > > > +		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> > > > +
> > > > +	return crc ^ ~0U;
> > > > +}
> > > > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > > > index cb62728896..fff32e1816 100644
> > > > --- a/lib/intel_reg.h
> > > > +++ b/lib/intel_reg.h
> > > > @@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > > >  #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
> > > >  #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
> > > >  #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
> > > > +#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
> > > >  
> > > >  /* Flush */
> > > >  #define MI_FLUSH			(0x04<<23)
> > > > @@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > > >  #define MI_BATCH_BUFFER_END	(0xA << 23)
> > > >  #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
> > > >  #define MI_DO_COMPARE                   (1 << 21)
> > > > +#define MAD_GT_IDD			(0 << 12)
> > > > +#define MAD_GT_OR_EQ_IDD		(1 << 12)
> > > > +#define MAD_LT_IDD			(2 << 12)
> > > > +#define MAD_LT_OR_EQ_IDD		(3 << 12)
> > > > +#define MAD_EQ_IDD			(4 << 12)
> > > > +#define MAD_NEQ_IDD			(5 << 12)
> > > >  
> > > >  #define MI_BATCH_NON_SECURE		(1)
> > > >  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> > > > diff --git a/lib/meson.build b/lib/meson.build
> > > > index 0a173c1fc6..b05198ecc9 100644
> > > > --- a/lib/meson.build
> > > > +++ b/lib/meson.build
> > > > @@ -10,6 +10,7 @@ lib_sources = [
> > > >  	'i915/gem_ring.c',
> > > >  	'i915/gem_mman.c',
> > > >  	'i915/gem_vm.c',
> > > > +	'i915/i915_crc.c',
> > > >  	'i915/intel_memory_region.c',
> > > >  	'i915/intel_mocs.c',
> > > >  	'i915/i915_blt.c',
> > > > -- 
> > > > 2.32.0
> > > > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-08  9:17         ` Zbigniew Kempczyński
@ 2022-06-09  7:22           ` Petri Latvala
  0 siblings, 0 replies; 16+ messages in thread
From: Petri Latvala @ 2022-06-09  7:22 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

On Wed, Jun 08, 2022 at 11:17:57AM +0200, Zbigniew Kempczyński wrote:
> On Mon, Jun 06, 2022 at 11:07:53AM +0300, Petri Latvala wrote:
> > On Mon, Jun 06, 2022 at 08:33:05AM +0200, Zbigniew Kempczyński wrote:
> > > On Fri, Jun 03, 2022 at 04:11:41PM +0300, Petri Latvala wrote:
> > > > On Fri, Jun 03, 2022 at 03:05:01PM +0200, Zbigniew Kempczyński wrote:
> > > > > Adding crc32 calculation on gpu gives us new possibility to verify data
> > > > > integrity without relying on trust cpu mapping is correct.
> > > > > 
> > > > > Patch introduces calculating crc32 on DG2 only. On older gens ALU
> > > > > (MI_MATH) doesn't support bit-shifting instructions as well as multiply
> > > > > or divide. Emulating n-bit shifts cost hundred of instructions with
> > > > > predicated SRM (works on render engine only). Another limitation is lack
> > > > > of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
> > > > > to achieve indexed operation on memory.
> > > > > 
> > > > > Due to performance reasons (cpu crc32 calculation even on WC memory is
> > > > > still much faster than on gpu, also depends on calculated object memory
> > > > > region) calculation will complete in reasonable of time only for few MiB.
> > > > > 
> > > > > v2: - use registers relative to engine to allow run on all engines (Chris)
> > > > >     - use predication instead of memory access to get better performance
> > > > >       (Chris)
> > > > >     - add location where crc32 implementation comes from (Petri)
> > > > > 
> > > > > v3: - extract crc32 table + cpu_crc32() to separate i915_crc_table.c
> > > > > 
> > > > > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> > > > > ---
> > > > >  lib/i915/i915_crc.c         | 311 ++++++++++++++++++++++++++++++++++++
> > > > >  lib/i915/i915_crc.h         |  17 ++
> > > > >  lib/i915/i915_crc32_table.c | 105 ++++++++++++
> > > > >  lib/intel_reg.h             |   7 +
> > > > >  lib/meson.build             |   1 +
> > > > >  5 files changed, 441 insertions(+)
> > > > >  create mode 100644 lib/i915/i915_crc.c
> > > > >  create mode 100644 lib/i915/i915_crc.h
> > > > >  create mode 100644 lib/i915/i915_crc32_table.c
> > > > > 
> > > > > diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
> > > > > new file mode 100644
> > > > > index 0000000000..c26a8e05b9
> > > > > --- /dev/null
> > > > > +++ b/lib/i915/i915_crc.c
> > > > > @@ -0,0 +1,311 @@
> > > > > +// SPDX-License-Identifier: MIT
> > > > > +/*
> > > > > + * Copyright © 2022 Intel Corporation
> > > > > + */
> > > > > +
> > > > > +#include <stddef.h>
> > > > > +#include <stdint.h>
> > > > > +#include "drmtest.h"
> > > > > +#include "gem_create.h"
> > > > > +#include "gem_engine_topology.h"
> > > > > +#include "gem_mman.h"
> > > > > +#include "i830_reg.h"
> > > > > +#include "i915_drm.h"
> > > > > +#include "intel_reg.h"
> > > > > +#include "intel_chipset.h"
> > > > > +#include "ioctl_wrappers.h"
> > > > > +#include "intel_allocator.h"
> > > > > +#include "i915/i915_crc.h"
> > > > > +
> > > > > +/* Include crc32 table + cpu_crc32() */
> > > > > +#include "i915_crc32_table.c"
> > > > 
> > > > #including .c files is ugly. Can that be a header file with
> > > > statics/inlines instead?
> > > 
> > > To avoid aesthetic dillemas I'm going to add separate igt_crc.c
> > > file, which will more vendor agnostic with extern of crc table(s).
> > > Assuming other vendors may add some gpu crc counting we may share
> > > crc32 (and maybe other) tables from there. And igt_cpu_crc32() would
> > > also be good to put in there.
> > 
> > Speaking of other tables, we have a crc calculation code in chamelium
> > code, that's........ crc16? Anyway, the function chamelium_xrgb_hash16
> > in lib/igt_chamelium.c. Might make sense to move that code to the new
> > home of cpu crc calcs.
> 
> If I good understand kms part has two different algorithms of counting
> crc - chamelium has its own 'hash', much simpler which is based on
> bitshifting, and VESA crc16. If update_crc16_dp() can be replaced by
> table version of crc16 we can try to replace it. But still XRGB8888
> input buffer requires to treat RGB individually (not as contigues buffer).
> 
> If above can be deferred for other times it would be great.

Yeah, that's fair.


-- 
Petri Latvala



> 
> --
> Zbigniew
> 
> > 
> > > 
> > > > 
> > > > That said, it also isn't i915-specific anymore but that's not a
> > > > blocker for merging the code at this time.
> > > 
> > > That's fine, better to do few iterations to look better / be more 
> > > future ready than merging because it just works.
> > 
> > Excellent, thanks!
> > 
> > 
> > -- 
> > Petri Latvala
> > 
> > 
> > > 
> > > Thanks for review, await new version soon.
> > > 
> > > --
> > > Zbigniew
> > > 
> > > > 
> > > > 
> > > > -- 
> > > > Petri Latvala
> > > > 
> > > > 
> > > > > +
> > > > > +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> > > > > +
> > > > > +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> > > > > +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> > > > > +/* Opcodes for MI_MATH_INSTR */
> > > > > +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> > > > > +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> > > > > +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> > > > > +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> > > > > +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> > > > > +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> > > > > +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> > > > > +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> > > > > +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> > > > > +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> > > > > +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> > > > > +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> > > > > +/* DG2+ */
> > > > > +#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
> > > > > +#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
> > > > > +#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
> > > > > +
> > > > > +/* Registers used as operands in MI_MATH_INSTR */
> > > > > +#define   MI_MATH_REG(x)                (x)
> > > > > +#define   MI_MATH_REG_SRCA              0x20
> > > > > +#define   MI_MATH_REG_SRCB              0x21
> > > > > +#define   MI_MATH_REG_ACCU              0x31
> > > > > +#define   MI_MATH_REG_ZF                0x32
> > > > > +#define   MI_MATH_REG_CF                0x33
> > > > > +
> > > > > +#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
> > > > > +#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
> > > > > +#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
> > > > > +#define CS_GPR(x)                       (0x600 + 8 * (x))
> > > > > +#define GPR(x)                          CS_GPR(x)
> > > > > +#define R(x)                            (x)
> > > > > +#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
> > > > > +#define OFFSET(obj_offset, current, start) \
> > > > > +	((obj_offset) + (current - start) * 4)
> > > > > +
> > > > > +#define MI_PREDICATE_RESULT             0x3B8
> > > > > +#define WPARID                          0x21C
> > > > > +#define CS_MI_ADDRESS_OFFSET            0x3B4
> > > > > +
> > > > > +#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
> > > > > +		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
> > > > > +		*bb++ = (__reg_src); \
> > > > > +		*bb++ = (__reg_dst); \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
> > > > > +		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
> > > > > +		*bb++ = (__reg); \
> > > > > +		*bb++ = (__imm1); \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
> > > > > +		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
> > > > > +		*bb++ = (__reg); \
> > > > > +		*bb++ = (__imm1); \
> > > > > +		*bb++ = (__reg) + 4; \
> > > > > +		*bb++ = (__imm2); \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define LOAD_REGISTER_MEM(__reg, __offset) do { \
> > > > > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
> > > > > +		*bb++ = (__reg); \
> > > > > +		*bb++ = (__offset); \
> > > > > +		*bb++ = (__offset) >> 32; \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
> > > > > +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
> > > > > +		*bb++ = (__reg); \
> > > > > +		*bb++ = (__offset); \
> > > > > +		*bb++ = (__offset) >> 32; \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define STORE_REGISTER_MEM(__reg, __offset) do { \
> > > > > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
> > > > > +		*bb++ = (__reg); \
> > > > > +		*bb++ = (__offset); \
> > > > > +		*bb++ = (__offset) >> 32; \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
> > > > > +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
> > > > > +		*bb++ = (__reg); \
> > > > > +		*bb++ = (__offset); \
> > > > > +		*bb++ = (__offset) >> 32; \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define COND_BBE(__value, __offset, __condition) do { \
> > > > > +		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
> > > > > +		*bb++ = (__value); \
> > > > > +		*bb++ = (__offset); \
> > > > > +		*bb++ = (__offset) >> 32; \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
> > > > > +		*bb++ = MI_MATH(4); \
> > > > > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
> > > > > +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
> > > > > +		*bb++ = (__op); \
> > > > > +		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
> > > > > +	} while (0)
> > > > > +
> > > > > +#define BBSIZE 4096
> > > > > +
> > > > > +/* Aliasing for easier refactoring */
> > > > > +#define GPR_SIZE	GPR(0)
> > > > > +#define R_SIZE		R(0)
> > > > > +
> > > > > +#define GPR_CRC		GPR(1)
> > > > > +#define R_CRC		R(1)
> > > > > +
> > > > > +#define GPR_INDATA_IDX  GPR(2)
> > > > > +#define R_INDATA_IDX	R(2)
> > > > > +
> > > > > +#define GPR_TABLE_IDX   GPR(3)
> > > > > +#define R_TABLE_IDX	R(3)
> > > > > +
> > > > > +#define GPR_CURR_DW	GPR(4)
> > > > > +#define R_CURR_DW	R(4)
> > > > > +
> > > > > +#define GPR_CONST_2	GPR(5)
> > > > > +#define R_CONST_2	R(5)
> > > > > +
> > > > > +#define GPR_CONST_4	GPR(6)
> > > > > +#define R_CONST_4	R(6)
> > > > > +
> > > > > +#define GPR_CONST_8	GPR(7)
> > > > > +#define R_CONST_8	R(7)
> > > > > +
> > > > > +#define GPR_CONST_ff	GPR(8)
> > > > > +#define R_CONST_ff	R(8)
> > > > > +
> > > > > +#define GPR_ffffffff    GPR(9)
> > > > > +#define R_ffffffff	R(9)
> > > > > +
> > > > > +#define GPR_TMP_1	GPR(10)
> > > > > +#define R_TMP_1		R(10)
> > > > > +
> > > > > +#define GPR_TMP_2	GPR(11)
> > > > > +#define R_TMP_2		R(11)
> > > > > +
> > > > > +static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
> > > > > +		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
> > > > > +{
> > > > > +	uint32_t *bb, *batch, *jmp;
> > > > > +	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
> > > > > +	const int use_64b = gen >= 8;
> > > > > +	uint64_t offset;
> > > > > +	uint64_t crc = USERDATA(table_offset, 0);
> > > > > +
> > > > > +	igt_assert(data_size % 4 == 0);
> > > > > +
> > > > > +	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
> > > > > +					  PROT_READ | PROT_WRITE);
> > > > > +	memset(batch, 0, BBSIZE);
> > > > > +
> > > > > +	bb = batch;
> > > > > +
> > > > > +	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
> > > > > +	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
> > > > > +	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
> > > > > +	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
> > > > > +	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
> > > > > +	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
> > > > > +	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
> > > > > +	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
> > > > > +
> > > > > +	/* for indexed reads from memory */
> > > > > +	LOAD_REGISTER_IMM32(WPARID, 1);
> > > > > +
> > > > > +	jmp = bb;
> > > > > +
> > > > > +	*bb++ = MI_SET_PREDICATE;
> > > > > +	*bb++ = MI_ARB_CHECK;
> > > > > +
> > > > > +	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
> > > > > +	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
> > > > > +
> > > > > +	for (int byte = 0; byte < 4; byte++) {
> > > > > +		if (byte != 0)
> > > > > +			MATH_4_STORE(R_CURR_DW, R_CONST_8,
> > > > > +				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
> > > > > +
> > > > > +		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
> > > > > +		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
> > > > > +			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
> > > > > +		MATH_4_STORE(R_CRC, R_TMP_1,
> > > > > +			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
> > > > > +		MATH_4_STORE(R_TMP_1, R_CONST_ff,
> > > > > +			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
> > > > > +		MATH_4_STORE(R_TMP_1, R_CONST_2,
> > > > > +			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
> > > > > +
> > > > > +		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
> > > > > +		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
> > > > > +
> > > > > +		MATH_4_STORE(R_CRC, R_CONST_8,
> > > > > +			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
> > > > > +		MATH_4_STORE(R_TMP_2, R_TMP_1,
> > > > > +			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
> > > > > +	}
> > > > > +
> > > > > +	/* increment data index */
> > > > > +	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
> > > > > +
> > > > > +	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
> > > > > +
> > > > > +	*bb++ = MI_MATH(5);
> > > > > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
> > > > > +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
> > > > > +	*bb++ = MI_MATH_SUB;
> > > > > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
> > > > > +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
> > > > > +	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
> > > > > +
> > > > > +	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
> > > > > +	offset = OFFSET(bb_offset, jmp, batch);
> > > > > +	*bb++ = offset;
> > > > > +	*bb++ = offset >> 32;
> > > > > +
> > > > > +	*bb++ = MI_SET_PREDICATE;
> > > > > +
> > > > > +	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
> > > > > +	STORE_REGISTER_MEM(GPR_TMP_1, crc);
> > > > > +
> > > > > +	*bb++ = MI_BATCH_BUFFER_END;
> > > > > +
> > > > > +	gem_munmap(batch, BBSIZE);
> > > > > +}
> > > > > +
> > > > > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > > > > +		    const struct intel_execution_engine2 *e,
> > > > > +		    uint32_t data_handle, uint32_t data_size)
> > > > > +{
> > > > > +	struct drm_i915_gem_execbuffer2 execbuf = {};
> > > > > +	struct drm_i915_gem_exec_object2 obj[3] = {};
> > > > > +	uint64_t bb_offset, table_offset, data_offset;
> > > > > +	uint32_t bb, table, crc, table_size = 4096;
> > > > > +	uint32_t *ptr;
> > > > > +
> > > > > +	igt_assert(data_size % 4 == 0);
> > > > > +
> > > > > +	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
> > > > > +	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
> > > > > +
> > > > > +	table_offset = get_offset(ahnd, table, table_size, 0);
> > > > > +	data_offset = get_offset(ahnd, data_handle, data_size, 0);
> > > > > +
> > > > > +	obj[0].offset = table_offset;
> > > > > +	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
> > > > > +	obj[0].handle = table;
> > > > > +
> > > > > +	obj[1].offset = data_offset;
> > > > > +	obj[1].flags = EXEC_OBJECT_PINNED;
> > > > > +	obj[1].handle = data_handle;
> > > > > +
> > > > > +	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
> > > > > +	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
> > > > > +	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
> > > > > +	obj[2].offset = bb_offset;
> > > > > +	obj[2].flags = EXEC_OBJECT_PINNED;
> > > > > +	obj[2].handle = bb;
> > > > > +	execbuf.buffer_count = 3;
> > > > > +	execbuf.buffers_ptr = to_user_pointer(obj);
> > > > > +	execbuf.flags = e->flags;
> > > > > +	execbuf.rsvd1 = ctx->id;
> > > > > +	gem_execbuf(i915, &execbuf);
> > > > > +	gem_sync(i915, table);
> > > > > +
> > > > > +	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
> > > > > +	crc = ptr[0x100];
> > > > > +	gem_munmap(ptr, table_size);
> > > > > +	gem_close(i915, table);
> > > > > +	gem_close(i915, bb);
> > > > > +
> > > > > +	return crc;
> > > > > +}
> > > > > +
> > > > > +bool supports_gpu_crc32(int i915)
> > > > > +{
> > > > > +	uint16_t devid = intel_get_drm_devid(i915);
> > > > > +
> > > > > +	return IS_DG2(devid);
> > > > > +}
> > > > > diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
> > > > > new file mode 100644
> > > > > index 0000000000..bb0195e2a8
> > > > > --- /dev/null
> > > > > +++ b/lib/i915/i915_crc.h
> > > > > @@ -0,0 +1,17 @@
> > > > > +/* SPDX-License-Identifier: MIT */
> > > > > +/*
> > > > > + * Copyright © 2022 Intel Corporation
> > > > > + */
> > > > > +#ifndef _I915_CRC_H_
> > > > > +#define _I915_CRC_H_
> > > > > +
> > > > > +#include <stdint.h>
> > > > > +#include "intel_ctx.h"
> > > > > +
> > > > > +uint32_t cpu_crc32(const void *buf, size_t size);
> > > > > +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> > > > > +		    const struct intel_execution_engine2 *e,
> > > > > +		    uint32_t data_handle, uint32_t data_size);
> > > > > +bool supports_gpu_crc32(int i915);
> > > > > +
> > > > > +#endif /* _I915_CRC_ */
> > > > > diff --git a/lib/i915/i915_crc32_table.c b/lib/i915/i915_crc32_table.c
> > > > > new file mode 100644
> > > > > index 0000000000..eca5e43218
> > > > > --- /dev/null
> > > > > +++ b/lib/i915/i915_crc32_table.c
> > > > > @@ -0,0 +1,105 @@
> > > > > +/*-
> > > > > + *  COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or
> > > > > + *  code or tables extracted from it, as desired without restriction.
> > > > > + */
> > > > > +
> > > > > +/*
> > > > > + *  First, the polynomial itself and its table of feedback terms.  The
> > > > > + *  polynomial is
> > > > > + *  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
> > > > > + *
> > > > > + *  Note that we take it "backwards" and put the highest-order term in
> > > > > + *  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
> > > > > + *  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
> > > > > + *  the MSB being 1
> > > > > + *
> > > > > + *  Note that the usual hardware shift register implementation, which
> > > > > + *  is what we're using (we're merely optimizing it by doing eight-bit
> > > > > + *  chunks at a time) shifts bits into the lowest-order term.  In our
> > > > > + *  implementation, that means shifting towards the right.  Why do we
> > > > > + *  do it this way?  Because the calculated CRC must be transmitted in
> > > > > + *  order from highest-order term to lowest-order term.  UARTs transmit
> > > > > + *  characters in order from LSB to MSB.  By storing the CRC this way
> > > > > + *  we hand it to the UART in the order low-byte to high-byte; the UART
> > > > > + *  sends each low-bit to hight-bit; and the result is transmission bit
> > > > > + *  by bit from highest- to lowest-order term without requiring any bit
> > > > > + *  shuffling on our part.  Reception works similarly
> > > > > + *
> > > > > + *  The feedback terms table consists of 256, 32-bit entries.  Notes
> > > > > + *
> > > > > + *      The table can be generated at runtime if desired; code to do so
> > > > > + *      is shown later.  It might not be obvious, but the feedback
> > > > > + *      terms simply represent the results of eight shift/xor opera
> > > > > + *      tions for all combinations of data and CRC register values
> > > > > + *
> > > > > + *      The values must be right-shifted by eight bits by the "updcrc
> > > > > + *      logic; the shift must be unsigned (bring in zeroes).  On some
> > > > > + *      hardware you could probably optimize the shift in assembler by
> > > > > + *      using byte-swap instructions
> > > > > + *      polynomial $edb88320
> > > > > + *
> > > > > + *
> > > > > + * CRC32 code derived from work by Gary S. Brown.
> > > > > + */
> > > > > +
> > > > > +#include <stdint.h>
> > > > > +
> > > > > +const uint32_t crc32_tab[] = {
> > > > > +	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> > > > > +	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> > > > > +	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> > > > > +	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> > > > > +	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
> > > > > +	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> > > > > +	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
> > > > > +	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> > > > > +	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> > > > > +	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> > > > > +	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
> > > > > +	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> > > > > +	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> > > > > +	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> > > > > +	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> > > > > +	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> > > > > +	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> > > > > +	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> > > > > +	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> > > > > +	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> > > > > +	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> > > > > +	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> > > > > +	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> > > > > +	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> > > > > +	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> > > > > +	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> > > > > +	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> > > > > +	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> > > > > +	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> > > > > +	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> > > > > +	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> > > > > +	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> > > > > +	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> > > > > +	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> > > > > +	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> > > > > +	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> > > > > +	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> > > > > +	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> > > > > +	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> > > > > +	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> > > > > +	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> > > > > +	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> > > > > +	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> > > > > +};
> > > > > +
> > > > > +uint32_t cpu_crc32(const void *buf, size_t size)
> > > > > +{
> > > > > +
> > > > > +	const uint8_t *p = buf;
> > > > > +	uint32_t crc;
> > > > > +
> > > > > +	crc = ~0U;
> > > > > +
> > > > > +	while (size--)
> > > > > +		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> > > > > +
> > > > > +	return crc ^ ~0U;
> > > > > +}
> > > > > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > > > > index cb62728896..fff32e1816 100644
> > > > > --- a/lib/intel_reg.h
> > > > > +++ b/lib/intel_reg.h
> > > > > @@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > > > >  #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
> > > > >  #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
> > > > >  #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
> > > > > +#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
> > > > >  
> > > > >  /* Flush */
> > > > >  #define MI_FLUSH			(0x04<<23)
> > > > > @@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > > > >  #define MI_BATCH_BUFFER_END	(0xA << 23)
> > > > >  #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
> > > > >  #define MI_DO_COMPARE                   (1 << 21)
> > > > > +#define MAD_GT_IDD			(0 << 12)
> > > > > +#define MAD_GT_OR_EQ_IDD		(1 << 12)
> > > > > +#define MAD_LT_IDD			(2 << 12)
> > > > > +#define MAD_LT_OR_EQ_IDD		(3 << 12)
> > > > > +#define MAD_EQ_IDD			(4 << 12)
> > > > > +#define MAD_NEQ_IDD			(5 << 12)
> > > > >  
> > > > >  #define MI_BATCH_NON_SECURE		(1)
> > > > >  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> > > > > diff --git a/lib/meson.build b/lib/meson.build
> > > > > index 0a173c1fc6..b05198ecc9 100644
> > > > > --- a/lib/meson.build
> > > > > +++ b/lib/meson.build
> > > > > @@ -10,6 +10,7 @@ lib_sources = [
> > > > >  	'i915/gem_ring.c',
> > > > >  	'i915/gem_mman.c',
> > > > >  	'i915/gem_vm.c',
> > > > > +	'i915/i915_crc.c',
> > > > >  	'i915/intel_memory_region.c',
> > > > >  	'i915/intel_mocs.c',
> > > > >  	'i915/i915_blt.c',
> > > > > -- 
> > > > > 2.32.0
> > > > > 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-03 11:08 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
@ 2022-06-03 11:39   ` Petri Latvala
  0 siblings, 0 replies; 16+ messages in thread
From: Petri Latvala @ 2022-06-03 11:39 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

On Fri, Jun 03, 2022 at 01:08:27PM +0200, Zbigniew Kempczyński wrote:
> Adding crc32 calculation on gpu gives us new possibility to verify data
> integrity without relying on trust cpu mapping is correct.
> 
> Patch introduces calculating crc32 on DG2 only. On older gens ALU
> (MI_MATH) doesn't support bit-shifting instructions as well as multiply
> or divide. Emulating n-bit shifts cost hundred of instructions with
> predicated SRM (works on render engine only). Another limitation is lack
> of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
> to achieve indexed operation on memory.
> 
> Due to performance reasons (cpu crc32 calculation even on WC memory is
> still much faster than on gpu, also depends on calculated object memory
> region) calculation will complete in reasonable of time only for few MiB.
> 
> v2: - use registers relative to engine to allow run on all engines (Chris)
>     - use predication instead of memory access to get better performance
>       (Chris)
>     - add location where crc32 implementation comes from (Petri)
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> ---
>  lib/i915/i915_crc.c | 373 ++++++++++++++++++++++++++++++++++++++++++++
>  lib/i915/i915_crc.h |  17 ++
>  lib/intel_reg.h     |   7 +
>  lib/meson.build     |   1 +
>  4 files changed, 398 insertions(+)
>  create mode 100644 lib/i915/i915_crc.c
>  create mode 100644 lib/i915/i915_crc.h
> 
> diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
> new file mode 100644
> index 0000000000..ec5c793ff2
> --- /dev/null
> +++ b/lib/i915/i915_crc.c
> @@ -0,0 +1,373 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include "drmtest.h"
> +#include "gem_create.h"
> +#include "gem_engine_topology.h"
> +#include "gem_mman.h"
> +#include "i830_reg.h"
> +#include "i915_drm.h"
> +#include "intel_reg.h"
> +#include "intel_chipset.h"
> +#include "ioctl_wrappers.h"
> +#include "intel_allocator.h"
> +#include "i915/i915_crc.h"
> +
> +/*
> + * Taken from: https://github.com/lattera/freebsd/blob/master/sys/libkern/crc32.c
> + * with small codestyle changes.
> + */

License of the original looks like it allows this copy, and
sublicensing to MIT. Good.

Although there's a copyright statement on top that claims this all is
from Intel... Add 1986 Gary S. Brown there and this is

Reviewed-by: Petri Latvala <petri.latvala@intel.com>


> +const uint32_t crc32_tab[] = {
> +	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> +	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> +	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> +	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> +	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
> +	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> +	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
> +	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> +	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> +	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> +	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
> +	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> +	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> +	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> +	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> +	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> +	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> +	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> +	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> +	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> +	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> +	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> +	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> +	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> +	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> +	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> +	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> +	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> +	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> +	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> +	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> +	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> +	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> +	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> +	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> +	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> +	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> +	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> +	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> +	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> +	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> +	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> +	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> +};
> +
> +uint32_t cpu_crc32(const void *buf, size_t size)
> +{
> +
> +	const uint8_t *p = buf;
> +	uint32_t crc;
> +
> +	crc = ~0U;
> +
> +	while (size--)
> +		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> +
> +	return crc ^ ~0U;
> +}
> +
> +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> +
> +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> +/* Opcodes for MI_MATH_INSTR */
> +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> +/* DG2+ */
> +#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
> +#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
> +#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
> +
> +/* Registers used as operands in MI_MATH_INSTR */
> +#define   MI_MATH_REG(x)                (x)
> +#define   MI_MATH_REG_SRCA              0x20
> +#define   MI_MATH_REG_SRCB              0x21
> +#define   MI_MATH_REG_ACCU              0x31
> +#define   MI_MATH_REG_ZF                0x32
> +#define   MI_MATH_REG_CF                0x33
> +
> +#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
> +#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
> +#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
> +#define CS_GPR(x)                       (0x600 + 8 * (x))
> +#define GPR(x)                          CS_GPR(x)
> +#define R(x)                            (x)
> +#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
> +#define OFFSET(obj_offset, current, start) \
> +	((obj_offset) + (current - start) * 4)
> +
> +#define MI_PREDICATE_RESULT             0x3B8
> +#define WPARID                          0x21C
> +#define CS_MI_ADDRESS_OFFSET            0x3B4
> +
> +#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
> +		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
> +		*bb++ = (__reg_src); \
> +		*bb++ = (__reg_dst); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
> +		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__imm1); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
> +		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__imm1); \
> +		*bb++ = (__reg) + 4; \
> +		*bb++ = (__imm2); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_MEM(__reg, __offset) do { \
> +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
> +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define STORE_REGISTER_MEM(__reg, __offset) do { \
> +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
> +		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define COND_BBE(__value, __offset, __condition) do { \
> +		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
> +		*bb++ = (__value); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
> +		*bb++ = MI_MATH(4); \
> +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
> +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
> +		*bb++ = (__op); \
> +		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
> +	} while (0)
> +
> +#define BBSIZE 4096
> +
> +/* Aliasing for easier refactoring */
> +#define GPR_SIZE	GPR(0)
> +#define R_SIZE		R(0)
> +
> +#define GPR_CRC		GPR(1)
> +#define R_CRC		R(1)
> +
> +#define GPR_INDATA_IDX  GPR(2)
> +#define R_INDATA_IDX	R(2)
> +
> +#define GPR_TABLE_IDX   GPR(3)
> +#define R_TABLE_IDX	R(3)
> +
> +#define GPR_CURR_DW	GPR(4)
> +#define R_CURR_DW	R(4)
> +
> +#define GPR_CONST_2	GPR(5)
> +#define R_CONST_2	R(5)
> +
> +#define GPR_CONST_4	GPR(6)
> +#define R_CONST_4	R(6)
> +
> +#define GPR_CONST_8	GPR(7)
> +#define R_CONST_8	R(7)
> +
> +#define GPR_CONST_ff	GPR(8)
> +#define R_CONST_ff	R(8)
> +
> +#define GPR_ffffffff    GPR(9)
> +#define R_ffffffff	R(9)
> +
> +#define GPR_TMP_1	GPR(10)
> +#define R_TMP_1		R(10)
> +
> +#define GPR_TMP_2	GPR(11)
> +#define R_TMP_2		R(11)
> +
> +static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
> +		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
> +{
> +	uint32_t *bb, *batch, *jmp;
> +	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
> +	const int use_64b = gen >= 8;
> +	uint64_t offset;
> +	uint64_t crc = USERDATA(table_offset, 0);
> +
> +	igt_assert(data_size % 4 == 0);
> +
> +	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
> +					  PROT_READ | PROT_WRITE);
> +	memset(batch, 0, BBSIZE);
> +
> +	bb = batch;
> +
> +	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
> +	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
> +	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
> +	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
> +	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
> +
> +	/* for indexed reads from memory */
> +	LOAD_REGISTER_IMM32(WPARID, 1);
> +
> +	jmp = bb;
> +
> +	*bb++ = MI_SET_PREDICATE;
> +	*bb++ = MI_ARB_CHECK;
> +
> +	/* data -> GPR4 */
> +	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
> +	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
> +
> +	for (int byte = 0; byte < 4; byte++) {
> +		if (byte != 0)
> +			MATH_4_STORE(R_CURR_DW, R_CONST_8,
> +				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
> +
> +		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
> +		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
> +			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
> +		MATH_4_STORE(R_CRC, R_TMP_1,
> +			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
> +		MATH_4_STORE(R_TMP_1, R_CONST_ff,
> +			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
> +		MATH_4_STORE(R_TMP_1, R_CONST_2,
> +			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
> +
> +		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
> +		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
> +
> +		MATH_4_STORE(R_CRC, R_CONST_8,
> +			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
> +		MATH_4_STORE(R_TMP_2, R_TMP_1,
> +			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
> +	}
> +
> +	/* increment data index */
> +	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
> +
> +	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
> +
> +	*bb++ = MI_MATH(5);
> +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
> +	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
> +	*bb++ = MI_MATH_SUB;
> +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
> +	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
> +	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
> +
> +	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
> +	offset = OFFSET(bb_offset, jmp, batch);
> +	*bb++ = offset;
> +	*bb++ = offset >> 32;
> +
> +	*bb++ = MI_SET_PREDICATE;
> +
> +	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
> +	STORE_REGISTER_MEM(GPR_TMP_1, crc);
> +
> +	*bb++ = MI_BATCH_BUFFER_END;
> +
> +	gem_munmap(batch, BBSIZE);
> +}
> +
> +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> +		    const struct intel_execution_engine2 *e,
> +		    uint32_t data_handle, uint32_t data_size)
> +{
> +	struct drm_i915_gem_execbuffer2 execbuf = {};
> +	struct drm_i915_gem_exec_object2 obj[3] = {};
> +	uint64_t bb_offset, table_offset, data_offset;
> +	uint32_t bb, table, crc, table_size = 4096;
> +	uint32_t *ptr;
> +
> +	igt_assert(data_size % 4 == 0);
> +
> +	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
> +	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
> +
> +	table_offset = get_offset(ahnd, table, table_size, 0);
> +	data_offset = get_offset(ahnd, data_handle, data_size, 0);
> +
> +	obj[0].offset = table_offset;
> +	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
> +	obj[0].handle = table;
> +
> +	obj[1].offset = data_offset;
> +	obj[1].flags = EXEC_OBJECT_PINNED;
> +	obj[1].handle = data_handle;
> +
> +	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
> +	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
> +	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
> +	obj[2].offset = bb_offset;
> +	obj[2].flags = EXEC_OBJECT_PINNED;
> +	obj[2].handle = bb;
> +	execbuf.buffer_count = 3;
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.flags = e->flags;
> +	execbuf.rsvd1 = ctx->id;
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, table);
> +
> +	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
> +	crc = ptr[0x100];
> +	gem_munmap(ptr, table_size);
> +	gem_close(i915, table);
> +	gem_close(i915, bb);
> +
> +	return crc;
> +}
> +
> +bool supports_gpu_crc32(int i915)
> +{
> +	uint16_t devid = intel_get_drm_devid(i915);
> +
> +	return IS_DG2(devid);
> +}
> diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
> new file mode 100644
> index 0000000000..bb0195e2a8
> --- /dev/null
> +++ b/lib/i915/i915_crc.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +#ifndef _I915_CRC_H_
> +#define _I915_CRC_H_
> +
> +#include <stdint.h>
> +#include "intel_ctx.h"
> +
> +uint32_t cpu_crc32(const void *buf, size_t size);
> +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> +		    const struct intel_execution_engine2 *e,
> +		    uint32_t data_handle, uint32_t data_size);
> +bool supports_gpu_crc32(int i915);
> +
> +#endif /* _I915_CRC_ */
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index cb62728896..fff32e1816 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
>  #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
>  #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
> +#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
>  
>  /* Flush */
>  #define MI_FLUSH			(0x04<<23)
> @@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define MI_BATCH_BUFFER_END	(0xA << 23)
>  #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
>  #define MI_DO_COMPARE                   (1 << 21)
> +#define MAD_GT_IDD			(0 << 12)
> +#define MAD_GT_OR_EQ_IDD		(1 << 12)
> +#define MAD_LT_IDD			(2 << 12)
> +#define MAD_LT_OR_EQ_IDD		(3 << 12)
> +#define MAD_EQ_IDD			(4 << 12)
> +#define MAD_NEQ_IDD			(5 << 12)
>  
>  #define MI_BATCH_NON_SECURE		(1)
>  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> diff --git a/lib/meson.build b/lib/meson.build
> index 0a173c1fc6..b05198ecc9 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -10,6 +10,7 @@ lib_sources = [
>  	'i915/gem_ring.c',
>  	'i915/gem_mman.c',
>  	'i915/gem_vm.c',
> +	'i915/i915_crc.c',
>  	'i915/intel_memory_region.c',
>  	'i915/intel_mocs.c',
>  	'i915/i915_blt.c',
> -- 
> 2.32.0
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-03 11:08 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
@ 2022-06-03 11:08 ` Zbigniew Kempczyński
  2022-06-03 11:39   ` Petri Latvala
  0 siblings, 1 reply; 16+ messages in thread
From: Zbigniew Kempczyński @ 2022-06-03 11:08 UTC (permalink / raw)
  To: igt-dev

Adding crc32 calculation on gpu gives us new possibility to verify data
integrity without relying on trust cpu mapping is correct.

Patch introduces calculating crc32 on DG2 only. On older gens ALU
(MI_MATH) doesn't support bit-shifting instructions as well as multiply
or divide. Emulating n-bit shifts cost hundred of instructions with
predicated SRM (works on render engine only). Another limitation is lack
of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
to achieve indexed operation on memory.

Due to performance reasons (cpu crc32 calculation even on WC memory is
still much faster than on gpu, also depends on calculated object memory
region) calculation will complete in reasonable of time only for few MiB.

v2: - use registers relative to engine to allow run on all engines (Chris)
    - use predication instead of memory access to get better performance
      (Chris)
    - add location where crc32 implementation comes from (Petri)

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
---
 lib/i915/i915_crc.c | 373 ++++++++++++++++++++++++++++++++++++++++++++
 lib/i915/i915_crc.h |  17 ++
 lib/intel_reg.h     |   7 +
 lib/meson.build     |   1 +
 4 files changed, 398 insertions(+)
 create mode 100644 lib/i915/i915_crc.c
 create mode 100644 lib/i915/i915_crc.h

diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
new file mode 100644
index 0000000000..ec5c793ff2
--- /dev/null
+++ b/lib/i915/i915_crc.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include "drmtest.h"
+#include "gem_create.h"
+#include "gem_engine_topology.h"
+#include "gem_mman.h"
+#include "i830_reg.h"
+#include "i915_drm.h"
+#include "intel_reg.h"
+#include "intel_chipset.h"
+#include "ioctl_wrappers.h"
+#include "intel_allocator.h"
+#include "i915/i915_crc.h"
+
+/*
+ * Taken from: https://github.com/lattera/freebsd/blob/master/sys/libkern/crc32.c
+ * with small codestyle changes.
+ */
+const uint32_t crc32_tab[] = {
+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
+	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
+	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
+	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+uint32_t cpu_crc32(const void *buf, size_t size)
+{
+
+	const uint8_t *p = buf;
+	uint32_t crc;
+
+	crc = ~0U;
+
+	while (size--)
+		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+
+	return crc ^ ~0U;
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* DG2+ */
+#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
+#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
+#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
+
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_SET_PREDICATE                MI_INSTR(0x01, 0)
+#define MI_ARB_CHECK                    MI_INSTR(0x5, 0)
+#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
+#define CS_GPR(x)                       (0x600 + 8 * (x))
+#define GPR(x)                          CS_GPR(x)
+#define R(x)                            (x)
+#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
+#define OFFSET(obj_offset, current, start) \
+	((obj_offset) + (current - start) * 4)
+
+#define MI_PREDICATE_RESULT             0x3B8
+#define WPARID                          0x21C
+#define CS_MI_ADDRESS_OFFSET            0x3B4
+
+#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
+		*bb++ = MI_LOAD_REGISTER_REG | BIT(19) | BIT(18); \
+		*bb++ = (__reg_src); \
+		*bb++ = (__reg_dst); \
+	} while (0)
+
+#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
+		*bb++ = MI_LOAD_REGISTER_IMM | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__imm1); \
+	} while (0)
+
+#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
+		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__imm1); \
+		*bb++ = (__reg) + 4; \
+		*bb++ = (__imm2); \
+	} while (0)
+
+#define LOAD_REGISTER_MEM(__reg, __offset) do { \
+		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
+		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | BIT(19) | BIT(16); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define STORE_REGISTER_MEM(__reg, __offset) do { \
+		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
+		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | BIT(19) | (BIT(21); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define COND_BBE(__value, __offset, __condition) do { \
+		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
+		*bb++ = (__value); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
+		*bb++ = MI_MATH(4); \
+		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
+		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
+		*bb++ = (__op); \
+		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
+	} while (0)
+
+#define BBSIZE 4096
+
+/* Aliasing for easier refactoring */
+#define GPR_SIZE	GPR(0)
+#define R_SIZE		R(0)
+
+#define GPR_CRC		GPR(1)
+#define R_CRC		R(1)
+
+#define GPR_INDATA_IDX  GPR(2)
+#define R_INDATA_IDX	R(2)
+
+#define GPR_TABLE_IDX   GPR(3)
+#define R_TABLE_IDX	R(3)
+
+#define GPR_CURR_DW	GPR(4)
+#define R_CURR_DW	R(4)
+
+#define GPR_CONST_2	GPR(5)
+#define R_CONST_2	R(5)
+
+#define GPR_CONST_4	GPR(6)
+#define R_CONST_4	R(6)
+
+#define GPR_CONST_8	GPR(7)
+#define R_CONST_8	R(7)
+
+#define GPR_CONST_ff	GPR(8)
+#define R_CONST_ff	R(8)
+
+#define GPR_ffffffff    GPR(9)
+#define R_ffffffff	R(9)
+
+#define GPR_TMP_1	GPR(10)
+#define R_TMP_1		R(10)
+
+#define GPR_TMP_2	GPR(11)
+#define R_TMP_2		R(11)
+
+static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
+		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
+{
+	uint32_t *bb, *batch, *jmp;
+	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
+	const int use_64b = gen >= 8;
+	uint64_t offset;
+	uint64_t crc = USERDATA(table_offset, 0);
+
+	igt_assert(data_size % 4 == 0);
+
+	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
+					  PROT_READ | PROT_WRITE);
+	memset(batch, 0, BBSIZE);
+
+	bb = batch;
+
+	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
+	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
+	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
+	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
+	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
+	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
+	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
+	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
+
+	/* for indexed reads from memory */
+	LOAD_REGISTER_IMM32(WPARID, 1);
+
+	jmp = bb;
+
+	*bb++ = MI_SET_PREDICATE;
+	*bb++ = MI_ARB_CHECK;
+
+	/* data -> GPR4 */
+	LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
+	LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
+
+	for (int byte = 0; byte < 4; byte++) {
+		if (byte != 0)
+			MATH_4_STORE(R_CURR_DW, R_CONST_8,
+				     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
+
+		/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
+		MATH_4_STORE(R_CURR_DW, R_CONST_ff,
+			     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
+		MATH_4_STORE(R_CRC, R_TMP_1,
+			     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
+		MATH_4_STORE(R_TMP_1, R_CONST_ff,
+			     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
+		MATH_4_STORE(R_TMP_1, R_CONST_2,
+			     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
+
+		LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
+		LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
+
+		MATH_4_STORE(R_CRC, R_CONST_8,
+			     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
+		MATH_4_STORE(R_TMP_2, R_TMP_1,
+			     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
+	}
+
+	/* increment data index */
+	MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
+
+	/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
+
+	*bb++ = MI_MATH(5);
+	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(R_SIZE));
+	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(R_CONST_4));
+	*bb++ = MI_MATH_SUB;
+	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_SIZE), MI_MATH_REG_ACCU);
+	*bb++ = MI_MATH_STORE(MI_MATH_REG(R_TMP_2), MI_MATH_REG_ZF);
+	LOAD_REGISTER_REG(GPR_TMP_2, MI_PREDICATE_RESULT);
+
+	*bb++ = MI_BATCH_BUFFER_START | BIT(15) | BIT(8) | use_64b;
+	offset = OFFSET(bb_offset, jmp, batch);
+	*bb++ = offset;
+	*bb++ = offset >> 32;
+
+	*bb++ = MI_SET_PREDICATE;
+
+	MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
+	STORE_REGISTER_MEM(GPR_TMP_1, crc);
+
+	*bb++ = MI_BATCH_BUFFER_END;
+
+	gem_munmap(batch, BBSIZE);
+}
+
+uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
+		    const struct intel_execution_engine2 *e,
+		    uint32_t data_handle, uint32_t data_size)
+{
+	struct drm_i915_gem_execbuffer2 execbuf = {};
+	struct drm_i915_gem_exec_object2 obj[3] = {};
+	uint64_t bb_offset, table_offset, data_offset;
+	uint32_t bb, table, crc, table_size = 4096;
+	uint32_t *ptr;
+
+	igt_assert(data_size % 4 == 0);
+
+	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
+	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
+
+	table_offset = get_offset(ahnd, table, table_size, 0);
+	data_offset = get_offset(ahnd, data_handle, data_size, 0);
+
+	obj[0].offset = table_offset;
+	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+	obj[0].handle = table;
+
+	obj[1].offset = data_offset;
+	obj[1].flags = EXEC_OBJECT_PINNED;
+	obj[1].handle = data_handle;
+
+	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
+	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
+	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
+	obj[2].offset = bb_offset;
+	obj[2].flags = EXEC_OBJECT_PINNED;
+	obj[2].handle = bb;
+	execbuf.buffer_count = 3;
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.flags = e->flags;
+	execbuf.rsvd1 = ctx->id;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, table);
+
+	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
+	crc = ptr[0x100];
+	gem_munmap(ptr, table_size);
+	gem_close(i915, table);
+	gem_close(i915, bb);
+
+	return crc;
+}
+
+bool supports_gpu_crc32(int i915)
+{
+	uint16_t devid = intel_get_drm_devid(i915);
+
+	return IS_DG2(devid);
+}
diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
new file mode 100644
index 0000000000..bb0195e2a8
--- /dev/null
+++ b/lib/i915/i915_crc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _I915_CRC_H_
+#define _I915_CRC_H_
+
+#include <stdint.h>
+#include "intel_ctx.h"
+
+uint32_t cpu_crc32(const void *buf, size_t size);
+uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
+		    const struct intel_execution_engine2 *e,
+		    uint32_t data_handle, uint32_t data_size);
+bool supports_gpu_crc32(int i915);
+
+#endif /* _I915_CRC_ */
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index cb62728896..fff32e1816 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
 #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
 #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
+#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
 
 /* Flush */
 #define MI_FLUSH			(0x04<<23)
@@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_BATCH_BUFFER_END	(0xA << 23)
 #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
 #define MI_DO_COMPARE                   (1 << 21)
+#define MAD_GT_IDD			(0 << 12)
+#define MAD_GT_OR_EQ_IDD		(1 << 12)
+#define MAD_LT_IDD			(2 << 12)
+#define MAD_LT_OR_EQ_IDD		(3 << 12)
+#define MAD_EQ_IDD			(4 << 12)
+#define MAD_NEQ_IDD			(5 << 12)
 
 #define MI_BATCH_NON_SECURE		(1)
 #define MI_BATCH_NON_SECURE_I965	(1 << 8)
diff --git a/lib/meson.build b/lib/meson.build
index 0a173c1fc6..b05198ecc9 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -10,6 +10,7 @@ lib_sources = [
 	'i915/gem_ring.c',
 	'i915/gem_mman.c',
 	'i915/gem_vm.c',
+	'i915/i915_crc.c',
 	'i915/intel_memory_region.c',
 	'i915/intel_mocs.c',
 	'i915/i915_blt.c',
-- 
2.32.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-02 13:17 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
@ 2022-06-02 13:30   ` Petri Latvala
  0 siblings, 0 replies; 16+ messages in thread
From: Petri Latvala @ 2022-06-02 13:30 UTC (permalink / raw)
  To: Zbigniew Kempczyński; +Cc: igt-dev

On Thu, Jun 02, 2022 at 03:17:57PM +0200, Zbigniew Kempczyński wrote:
> Adding crc32 calculation on gpu gives us new possibility to verify data
> integrity without relying on trust cpu mapping is correct.
> 
> Patch introduces calculating crc32 on DG2 only. On older gens ALU
> (MI_MATH) doesn't support bit-shifting instructions as well as multiply
> or divide. Emulating n-bit shifts cost hundred of instructions with
> predicated SRM (works on render engine only). Another limitation is lack
> of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
> to achieve indexed operation on memory.
> 
> Due to performance reasons (cpu crc32 calculation even on WC memory is
> still much faster than on gpu, also depends on calculated object memory
> region) calculation will complete in reasonable of time only for few MiB.
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> ---
>  lib/i915/i915_crc.c | 369 ++++++++++++++++++++++++++++++++++++++++++++
>  lib/i915/i915_crc.h |  16 ++
>  lib/intel_reg.h     |   7 +
>  lib/meson.build     |   1 +
>  4 files changed, 393 insertions(+)
>  create mode 100644 lib/i915/i915_crc.c
>  create mode 100644 lib/i915/i915_crc.h
> 
> diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
> new file mode 100644
> index 0000000000..a1851d8073
> --- /dev/null
> +++ b/lib/i915/i915_crc.c
> @@ -0,0 +1,369 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include "drmtest.h"
> +#include "gem_create.h"
> +#include "gem_engine_topology.h"
> +#include "gem_mman.h"
> +#include "i830_reg.h"
> +#include "i915_drm.h"
> +#include "intel_reg.h"
> +#include "intel_chipset.h"
> +#include "ioctl_wrappers.h"
> +#include "intel_allocator.h"
> +#include "i915/i915_crc.h"
> +
> +const uint32_t crc32_tab[] = {
> +	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
> +	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
> +	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
> +	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
> +	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
> +	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
> +	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
> +	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
> +	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
> +	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
> +	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
> +	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
> +	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
> +	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
> +	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
> +	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
> +	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
> +	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
> +	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
> +	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
> +	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
> +	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
> +	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
> +	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
> +	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
> +	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
> +	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
> +	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
> +	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
> +	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
> +	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
> +	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
> +	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
> +	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
> +	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
> +	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
> +	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
> +	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
> +	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
> +	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
> +	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
> +	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
> +	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
> +};
> +
> +uint32_t cpu_crc32(const void *buf, size_t size)
> +{
> +
> +	const uint8_t *p = buf;
> +	uint32_t crc;
> +
> +	crc = ~0U;
> +
> +	while (size--)
> +		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
> +
> +	return crc ^ ~0U;
> +}

Maybe add a reference to where this is copied from in a comment, the
same code can be found in various places in this exact form.

-- 
Petri Latvala



> +
> +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> +
> +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> +/* Opcodes for MI_MATH_INSTR */
> +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> +/* DG2+ */
> +#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
> +#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
> +#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
> +
> +/* Registers used as operands in MI_MATH_INSTR */
> +#define   MI_MATH_REG(x)                (x)
> +#define   MI_MATH_REG_SRCA              0x20
> +#define   MI_MATH_REG_SRCB              0x21
> +#define   MI_MATH_REG_ACCU              0x31
> +#define   MI_MATH_REG_ZF                0x32
> +#define   MI_MATH_REG_CF                0x33
> +
> +#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
> +#define CS_GPR(base, x)                 (base + 0x600 + 8 * (x))
> +#define GPR(x)                          CS_GPR(base, (x))
> +#define R(x)                            (x)
> +#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
> +#define OFFSET(obj_offset, current, start) \
> +	((obj_offset) + (current - start) * 4)
> +
> +#define WPARID                          0x221C
> +#define CS_MI_ADDRESS_OFFSET            0x23B4
> +
> +#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
> +		*bb++ = MI_LOAD_REGISTER_REG; \
> +		*bb++ = (__reg_src); \
> +		*bb++ = (__reg_dst); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
> +		*bb++ = MI_LOAD_REGISTER_IMM; \
> +		*bb++ = (__reg); \
> +		*bb++ = (__imm1); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
> +		*bb++ = MI_LOAD_REGISTER_IMM; \
> +		*bb++ = (__reg); \
> +		*bb++ = (__imm1); \
> +		*bb++ = MI_LOAD_REGISTER_IMM; \
> +		*bb++ = (__reg) + 4; \
> +		*bb++ = (__imm2); \
> +	} while (0)
> +
> +#define LOAD_REGISTER_MEM(__reg, __offset) do { \
> +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8; \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
> +		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | (1 << 16); \
> +		*bb++ = (__reg); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define STORE_REGISTER_MEM(__gpr, __offset) do { \
> +		*bb++ = MI_STORE_REGISTER_MEM_GEN8; \
> +		*bb++ = (__gpr); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define COND_BBE(__value, __offset, __condition) do { \
> +		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
> +		*bb++ = (__value); \
> +		*bb++ = (__offset); \
> +		*bb++ = (__offset) >> 32; \
> +	} while (0)
> +
> +#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
> +		*bb++ = MI_MATH(4); \
> +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
> +		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
> +		*bb++ = (__op); \
> +		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
> +	} while (0)
> +
> +#define UNROLL 16
> +#define CODESIZE 2048
> +#define BBSIZE (UNROLL * CODESIZE)
> +
> +/* Aliasing for easier refactoring */
> +#define GPR_SIZE	GPR(0)
> +#define R_SIZE		R(0)
> +
> +#define GPR_CRC		GPR(1)
> +#define R_CRC		R(1)
> +
> +#define GPR_INDATA_IDX  GPR(2)
> +#define R_INDATA_IDX	R(2)
> +
> +#define GPR_TABLE_IDX   GPR(3)
> +#define R_TABLE_IDX	R(3)
> +
> +#define GPR_CURR_DW	GPR(4)
> +#define R_CURR_DW	R(4)
> +
> +#define GPR_CONST_2	GPR(5)
> +#define R_CONST_2	R(5)
> +
> +#define GPR_CONST_4	GPR(6)
> +#define R_CONST_4	R(6)
> +
> +#define GPR_CONST_8	GPR(7)
> +#define R_CONST_8	R(7)
> +
> +#define GPR_CONST_ff	GPR(8)
> +#define R_CONST_ff	R(8)
> +
> +#define GPR_ffffffff    GPR(9)
> +#define R_ffffffff	R(9)
> +
> +#define GPR_TMP_1	GPR(10)
> +#define R_TMP_1		R(10)
> +
> +#define GPR_TMP_2	GPR(11)
> +#define R_TMP_2		R(11)
> +
> +static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
> +		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
> +{
> +	uint32_t *bb, *batch, *jmp;
> +	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
> +	const int use_64b = gen >= 8;
> +	uint32_t base = gem_engine_mmio_base(i915, "rcs0");
> +	uint64_t offset;
> +	uint64_t loop_status = USERDATA(table_offset, 0);
> +	uint64_t crc = USERDATA(table_offset, 1);
> +
> +	igt_assert(data_size % 4 == 0);
> +
> +	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
> +					  PROT_READ | PROT_WRITE);
> +	memset(batch, 0, BBSIZE);
> +
> +	bb = batch;
> +
> +	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
> +	STORE_REGISTER_MEM(GPR_SIZE, loop_status); /* save to mem for cond-bbe compare */
> +
> +	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
> +	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
> +	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
> +	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
> +	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
> +
> +	/* for indexed reads from memory */
> +	LOAD_REGISTER_IMM32(WPARID, 1);
> +
> +	jmp = bb;
> +
> +	for (int unroll = 0; unroll < UNROLL; unroll++) {
> +
> +		/* data -> GPR4 */
> +		LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
> +		LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
> +
> +		for (int byte = 0; byte < 4; byte++) {
> +			if (byte != 0)
> +				MATH_4_STORE(R_CURR_DW, R_CONST_8,
> +					     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
> +
> +			/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
> +			MATH_4_STORE(R_CURR_DW, R_CONST_ff,
> +				     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
> +			MATH_4_STORE(R_CRC, R_TMP_1,
> +				     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
> +			MATH_4_STORE(R_TMP_1, R_CONST_ff,
> +				     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
> +			MATH_4_STORE(R_TMP_1, R_CONST_2,
> +				     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
> +
> +			LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
> +			LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
> +
> +			MATH_4_STORE(R_CRC, R_CONST_8,
> +				     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
> +			MATH_4_STORE(R_TMP_2, R_TMP_1,
> +				     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
> +		}
> +
> +		MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
> +		STORE_REGISTER_MEM(GPR_TMP_1, crc); /* write, we don't know when loop ends */
> +
> +		/* increment data index */
> +		MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
> +
> +		/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
> +		MATH_4_STORE(R_SIZE, R_CONST_4, MI_MATH_SUB, R_SIZE);
> +		STORE_REGISTER_MEM(GPR_SIZE, loop_status);
> +
> +		/* loop when loop_status != 0 */
> +		COND_BBE(0, loop_status, MAD_NEQ_IDD);
> +	}
> +
> +	*bb++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
> +	offset = OFFSET(bb_offset, jmp, batch);
> +	*bb++ = offset;
> +	*bb++ = offset >> 32;
> +
> +	gem_munmap(batch, BBSIZE);
> +}
> +
> +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> +		    uint32_t data_handle, uint32_t data_size)
> +{
> +	const struct intel_execution_engine2 *e;
> +	struct drm_i915_gem_execbuffer2 execbuf = {};
> +	struct drm_i915_gem_exec_object2 obj[3] = {};
> +	uint64_t bb_offset, table_offset, data_offset;
> +	uint32_t bb, table, crc, table_size = 4096;
> +	uint32_t *ptr;
> +	uint64_t flags;
> +	bool found = false;
> +
> +	igt_assert(data_size % 4 == 0);
> +
> +	for_each_ctx_engine(i915, ctx, e) {
> +		if (e->class == I915_ENGINE_CLASS_RENDER) {
> +			flags = e->flags;
> +			found = true;
> +		}
> +	}
> +	igt_require_f(found, "Context %u doesn't have render engine available\n", ctx->id);
> +
> +	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
> +	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
> +
> +	table_offset = get_offset(ahnd, table, table_size, 0);
> +	data_offset = get_offset(ahnd, data_handle, data_size, 0);
> +
> +	obj[0].offset = table_offset;
> +	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
> +	obj[0].handle = table;
> +
> +	obj[1].offset = data_offset;
> +	obj[1].flags = EXEC_OBJECT_PINNED;
> +	obj[1].handle = data_handle;
> +
> +	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
> +	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
> +	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
> +	obj[2].offset = bb_offset;
> +	obj[2].flags = EXEC_OBJECT_PINNED;
> +	obj[2].handle = bb;
> +	execbuf.buffer_count = 3;
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.flags = flags;
> +	execbuf.rsvd1 = 0;
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, table);
> +
> +	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
> +	crc = ptr[0x101];
> +	gem_munmap(ptr, table_size);
> +	gem_close(i915, table);
> +	gem_close(i915, bb);
> +
> +	return crc;
> +}
> +
> +bool supports_gpu_crc32(int i915)
> +{
> +	uint16_t devid = intel_get_drm_devid(i915);
> +
> +	return IS_DG2(devid);
> +}
> diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
> new file mode 100644
> index 0000000000..51feeceda9
> --- /dev/null
> +++ b/lib/i915/i915_crc.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +#ifndef _I915_CRC_H_
> +#define _I915_CRC_H_
> +
> +#include <stdint.h>
> +#include "intel_ctx.h"
> +
> +uint32_t cpu_crc32(const void *buf, size_t size);
> +uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
> +		    uint32_t obj, uint32_t obj_size);
> +bool supports_gpu_crc32(int i915);
> +
> +#endif /* _I915_CRC_ */
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index cb62728896..fff32e1816 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
>  #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
>  #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
> +#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
>  
>  /* Flush */
>  #define MI_FLUSH			(0x04<<23)
> @@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define MI_BATCH_BUFFER_END	(0xA << 23)
>  #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
>  #define MI_DO_COMPARE                   (1 << 21)
> +#define MAD_GT_IDD			(0 << 12)
> +#define MAD_GT_OR_EQ_IDD		(1 << 12)
> +#define MAD_LT_IDD			(2 << 12)
> +#define MAD_LT_OR_EQ_IDD		(3 << 12)
> +#define MAD_EQ_IDD			(4 << 12)
> +#define MAD_NEQ_IDD			(5 << 12)
>  
>  #define MI_BATCH_NON_SECURE		(1)
>  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> diff --git a/lib/meson.build b/lib/meson.build
> index 0a173c1fc6..b05198ecc9 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -10,6 +10,7 @@ lib_sources = [
>  	'i915/gem_ring.c',
>  	'i915/gem_mman.c',
>  	'i915/gem_vm.c',
> +	'i915/i915_crc.c',
>  	'i915/intel_memory_region.c',
>  	'i915/intel_mocs.c',
>  	'i915/i915_blt.c',
> -- 
> 2.32.0
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2
  2022-06-02 13:17 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
@ 2022-06-02 13:17 ` Zbigniew Kempczyński
  2022-06-02 13:30   ` Petri Latvala
  0 siblings, 1 reply; 16+ messages in thread
From: Zbigniew Kempczyński @ 2022-06-02 13:17 UTC (permalink / raw)
  To: igt-dev

Adding crc32 calculation on gpu gives us new possibility to verify data
integrity without relying on trust cpu mapping is correct.

Patch introduces calculating crc32 on DG2 only. On older gens ALU
(MI_MATH) doesn't support bit-shifting instructions as well as multiply
or divide. Emulating n-bit shifts cost hundred of instructions with
predicated SRM (works on render engine only). Another limitation is lack
of indexed load / store. On DG2 we can use WPARID and CS_MI_ADDRESS_OFFSET
to achieve indexed operation on memory.

Due to performance reasons (cpu crc32 calculation even on WC memory is
still much faster than on gpu, also depends on calculated object memory
region) calculation will complete in reasonable of time only for few MiB.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
---
 lib/i915/i915_crc.c | 369 ++++++++++++++++++++++++++++++++++++++++++++
 lib/i915/i915_crc.h |  16 ++
 lib/intel_reg.h     |   7 +
 lib/meson.build     |   1 +
 4 files changed, 393 insertions(+)
 create mode 100644 lib/i915/i915_crc.c
 create mode 100644 lib/i915/i915_crc.h

diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
new file mode 100644
index 0000000000..a1851d8073
--- /dev/null
+++ b/lib/i915/i915_crc.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include "drmtest.h"
+#include "gem_create.h"
+#include "gem_engine_topology.h"
+#include "gem_mman.h"
+#include "i830_reg.h"
+#include "i915_drm.h"
+#include "intel_reg.h"
+#include "intel_chipset.h"
+#include "ioctl_wrappers.h"
+#include "intel_allocator.h"
+#include "i915/i915_crc.h"
+
+const uint32_t crc32_tab[] = {
+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+	0xe963a535, 0x9e6495a3,	0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+	0xf3b97148, 0x84be41de,	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,	0x14015c4f, 0x63066cd9,
+	0xfa0f3d63, 0x8d080df5,	0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,	0x35b5a8fa, 0x42b2986c,
+	0xdbbbc9d6, 0xacbcf940,	0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,	0x76dc4190, 0x01db7106,
+	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+uint32_t cpu_crc32(const void *buf, size_t size)
+{
+
+	const uint8_t *p = buf;
+	uint32_t crc;
+
+	crc = ~0U;
+
+	while (size--)
+		crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+
+	return crc ^ ~0U;
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* DG2+ */
+#define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
+#define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
+#define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
+
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG            MI_INSTR(0x2A, 1)
+#define CS_GPR(base, x)                 (base + 0x600 + 8 * (x))
+#define GPR(x)                          CS_GPR(base, (x))
+#define R(x)                            (x)
+#define USERDATA(offset, idx)	        ((offset) + (0x100 + (idx)) * 4)
+#define OFFSET(obj_offset, current, start) \
+	((obj_offset) + (current - start) * 4)
+
+#define WPARID                          0x221C
+#define CS_MI_ADDRESS_OFFSET            0x23B4
+
+#define LOAD_REGISTER_REG(__reg_src, __reg_dst) do { \
+		*bb++ = MI_LOAD_REGISTER_REG; \
+		*bb++ = (__reg_src); \
+		*bb++ = (__reg_dst); \
+	} while (0)
+
+#define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
+		*bb++ = MI_LOAD_REGISTER_IMM; \
+		*bb++ = (__reg); \
+		*bb++ = (__imm1); \
+	} while (0)
+
+#define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
+		*bb++ = MI_LOAD_REGISTER_IMM; \
+		*bb++ = (__reg); \
+		*bb++ = (__imm1); \
+		*bb++ = MI_LOAD_REGISTER_IMM; \
+		*bb++ = (__reg) + 4; \
+		*bb++ = (__imm2); \
+	} while (0)
+
+#define LOAD_REGISTER_MEM(__reg, __offset) do { \
+		*bb++ = MI_LOAD_REGISTER_MEM_GEN8; \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
+		*bb++ = MI_LOAD_REGISTER_MEM_GEN8 | (1 << 16); \
+		*bb++ = (__reg); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define STORE_REGISTER_MEM(__gpr, __offset) do { \
+		*bb++ = MI_STORE_REGISTER_MEM_GEN8; \
+		*bb++ = (__gpr); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define COND_BBE(__value, __offset, __condition) do { \
+		*bb++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (__condition) | 2; \
+		*bb++ = (__value); \
+		*bb++ = (__offset); \
+		*bb++ = (__offset) >> 32; \
+	} while (0)
+
+#define MATH_4_STORE(__r1, __r2, __op, __r3) do { \
+		*bb++ = MI_MATH(4); \
+		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(__r1)); \
+		*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(__r2)); \
+		*bb++ = (__op); \
+		*bb++ = MI_MATH_STORE(MI_MATH_REG(__r3), MI_MATH_REG_ACCU); \
+	} while (0)
+
+#define UNROLL 16
+#define CODESIZE 2048
+#define BBSIZE (UNROLL * CODESIZE)
+
+/* Aliasing for easier refactoring */
+#define GPR_SIZE	GPR(0)
+#define R_SIZE		R(0)
+
+#define GPR_CRC		GPR(1)
+#define R_CRC		R(1)
+
+#define GPR_INDATA_IDX  GPR(2)
+#define R_INDATA_IDX	R(2)
+
+#define GPR_TABLE_IDX   GPR(3)
+#define R_TABLE_IDX	R(3)
+
+#define GPR_CURR_DW	GPR(4)
+#define R_CURR_DW	R(4)
+
+#define GPR_CONST_2	GPR(5)
+#define R_CONST_2	R(5)
+
+#define GPR_CONST_4	GPR(6)
+#define R_CONST_4	R(6)
+
+#define GPR_CONST_8	GPR(7)
+#define R_CONST_8	R(7)
+
+#define GPR_CONST_ff	GPR(8)
+#define R_CONST_ff	R(8)
+
+#define GPR_ffffffff    GPR(9)
+#define R_ffffffff	R(9)
+
+#define GPR_TMP_1	GPR(10)
+#define R_TMP_1		R(10)
+
+#define GPR_TMP_2	GPR(11)
+#define R_TMP_2		R(11)
+
+static void fill_batch(int i915, uint32_t bb_handle, uint64_t bb_offset,
+		       uint64_t table_offset, uint64_t data_offset, uint32_t data_size)
+{
+	uint32_t *bb, *batch, *jmp;
+	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
+	const int use_64b = gen >= 8;
+	uint32_t base = gem_engine_mmio_base(i915, "rcs0");
+	uint64_t offset;
+	uint64_t loop_status = USERDATA(table_offset, 0);
+	uint64_t crc = USERDATA(table_offset, 1);
+
+	igt_assert(data_size % 4 == 0);
+
+	batch = gem_mmap__device_coherent(i915, bb_handle, 0, BBSIZE,
+					  PROT_READ | PROT_WRITE);
+	memset(batch, 0, BBSIZE);
+
+	bb = batch;
+
+	LOAD_REGISTER_IMM64(GPR_SIZE, data_size, 0);
+	STORE_REGISTER_MEM(GPR_SIZE, loop_status); /* save to mem for cond-bbe compare */
+
+	LOAD_REGISTER_IMM64(GPR_CRC, ~0U, 0);		/* crc start - 0xffffffff */
+	LOAD_REGISTER_IMM64(GPR_INDATA_IDX, 0, 0);	/* data_offset index (0) */
+	LOAD_REGISTER_IMM64(GPR_CONST_2, 2, 0);		/* const value 2 */
+	LOAD_REGISTER_IMM64(GPR_CONST_4, 4, 0);		/* const value 4 */
+	LOAD_REGISTER_IMM64(GPR_CONST_8, 8, 0);		/* const value 8 */
+	LOAD_REGISTER_IMM64(GPR_CONST_ff, 0xff, 0);	/* const value 0xff */
+	LOAD_REGISTER_IMM64(GPR_ffffffff, ~0U, 0);	/* const value 0xffffffff */
+
+	/* for indexed reads from memory */
+	LOAD_REGISTER_IMM32(WPARID, 1);
+
+	jmp = bb;
+
+	for (int unroll = 0; unroll < UNROLL; unroll++) {
+
+		/* data -> GPR4 */
+		LOAD_REGISTER_REG(GPR_INDATA_IDX, CS_MI_ADDRESS_OFFSET);
+		LOAD_REGISTER_MEM_WPARID(GPR_CURR_DW, data_offset);
+
+		for (int byte = 0; byte < 4; byte++) {
+			if (byte != 0)
+				MATH_4_STORE(R_CURR_DW, R_CONST_8,
+					     MI_MATH_SHR, R_CURR_DW); /* dw >> 8 */
+
+			/* crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); */
+			MATH_4_STORE(R_CURR_DW, R_CONST_ff,
+				     MI_MATH_AND, R_TMP_1); /* dw & 0xff */
+			MATH_4_STORE(R_CRC, R_TMP_1,
+				     MI_MATH_XOR, R_TMP_1); /* crc ^ tmp */
+			MATH_4_STORE(R_TMP_1, R_CONST_ff,
+				     MI_MATH_AND, R_TMP_1); /* tmp & 0xff */
+			MATH_4_STORE(R_TMP_1, R_CONST_2,
+				     MI_MATH_SHL, R_TABLE_IDX); /* tmp << 2 (crc idx) */
+
+			LOAD_REGISTER_REG(GPR_TABLE_IDX, CS_MI_ADDRESS_OFFSET);
+			LOAD_REGISTER_MEM_WPARID(GPR_TMP_1, table_offset);
+
+			MATH_4_STORE(R_CRC, R_CONST_8,
+				     MI_MATH_SHR, R_TMP_2); /* crc >> 8 (shift) */
+			MATH_4_STORE(R_TMP_2, R_TMP_1,
+				     MI_MATH_XOR, R_CRC); /* crc = tab[v] ^ shift */
+		}
+
+		MATH_4_STORE(R_CRC, R_ffffffff, MI_MATH_XOR, R_TMP_1);
+		STORE_REGISTER_MEM(GPR_TMP_1, crc); /* write, we don't know when loop ends */
+
+		/* increment data index */
+		MATH_4_STORE(R_INDATA_IDX, R_CONST_4, MI_MATH_ADD, R_INDATA_IDX);
+
+		/* loop until R_SIZE == 0, R_SIZE = R_SIZE - R_CONST_4 */
+		MATH_4_STORE(R_SIZE, R_CONST_4, MI_MATH_SUB, R_SIZE);
+		STORE_REGISTER_MEM(GPR_SIZE, loop_status);
+
+		/* loop when loop_status != 0 */
+		COND_BBE(0, loop_status, MAD_NEQ_IDD);
+	}
+
+	*bb++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	offset = OFFSET(bb_offset, jmp, batch);
+	*bb++ = offset;
+	*bb++ = offset >> 32;
+
+	gem_munmap(batch, BBSIZE);
+}
+
+uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
+		    uint32_t data_handle, uint32_t data_size)
+{
+	const struct intel_execution_engine2 *e;
+	struct drm_i915_gem_execbuffer2 execbuf = {};
+	struct drm_i915_gem_exec_object2 obj[3] = {};
+	uint64_t bb_offset, table_offset, data_offset;
+	uint32_t bb, table, crc, table_size = 4096;
+	uint32_t *ptr;
+	uint64_t flags;
+	bool found = false;
+
+	igt_assert(data_size % 4 == 0);
+
+	for_each_ctx_engine(i915, ctx, e) {
+		if (e->class == I915_ENGINE_CLASS_RENDER) {
+			flags = e->flags;
+			found = true;
+		}
+	}
+	igt_require_f(found, "Context %u doesn't have render engine available\n", ctx->id);
+
+	table = gem_create_in_memory_regions(i915, table_size, REGION_LMEM(0));
+	gem_write(i915, table, 0, crc32_tab, sizeof(crc32_tab));
+
+	table_offset = get_offset(ahnd, table, table_size, 0);
+	data_offset = get_offset(ahnd, data_handle, data_size, 0);
+
+	obj[0].offset = table_offset;
+	obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+	obj[0].handle = table;
+
+	obj[1].offset = data_offset;
+	obj[1].flags = EXEC_OBJECT_PINNED;
+	obj[1].handle = data_handle;
+
+	bb = gem_create_in_memory_regions(i915, BBSIZE, REGION_LMEM(0));
+	bb_offset = get_offset(ahnd, bb, BBSIZE, 0);
+	fill_batch(i915, bb, bb_offset, table_offset, data_offset, data_size);
+	obj[2].offset = bb_offset;
+	obj[2].flags = EXEC_OBJECT_PINNED;
+	obj[2].handle = bb;
+	execbuf.buffer_count = 3;
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.flags = flags;
+	execbuf.rsvd1 = 0;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, table);
+
+	ptr = gem_mmap__device_coherent(i915, table, 0, table_size, PROT_READ);
+	crc = ptr[0x101];
+	gem_munmap(ptr, table_size);
+	gem_close(i915, table);
+	gem_close(i915, bb);
+
+	return crc;
+}
+
+bool supports_gpu_crc32(int i915)
+{
+	uint16_t devid = intel_get_drm_devid(i915);
+
+	return IS_DG2(devid);
+}
diff --git a/lib/i915/i915_crc.h b/lib/i915/i915_crc.h
new file mode 100644
index 0000000000..51feeceda9
--- /dev/null
+++ b/lib/i915/i915_crc.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+#ifndef _I915_CRC_H_
+#define _I915_CRC_H_
+
+#include <stdint.h>
+#include "intel_ctx.h"
+
+uint32_t cpu_crc32(const void *buf, size_t size);
+uint32_t i915_crc32(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
+		    uint32_t obj, uint32_t obj_size);
+bool supports_gpu_crc32(int i915);
+
+#endif /* _I915_CRC_ */
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index cb62728896..fff32e1816 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2625,6 +2625,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
 #define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
 #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
+#define MI_STORE_REGISTER_MEM_GEN8	((0x24 << 23) | (4 - 2))
 
 /* Flush */
 #define MI_FLUSH			(0x04<<23)
@@ -2657,6 +2658,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_BATCH_BUFFER_END	(0xA << 23)
 #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
 #define MI_DO_COMPARE                   (1 << 21)
+#define MAD_GT_IDD			(0 << 12)
+#define MAD_GT_OR_EQ_IDD		(1 << 12)
+#define MAD_LT_IDD			(2 << 12)
+#define MAD_LT_OR_EQ_IDD		(3 << 12)
+#define MAD_EQ_IDD			(4 << 12)
+#define MAD_NEQ_IDD			(5 << 12)
 
 #define MI_BATCH_NON_SECURE		(1)
 #define MI_BATCH_NON_SECURE_I965	(1 << 8)
diff --git a/lib/meson.build b/lib/meson.build
index 0a173c1fc6..b05198ecc9 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -10,6 +10,7 @@ lib_sources = [
 	'i915/gem_ring.c',
 	'i915/gem_mman.c',
 	'i915/gem_vm.c',
+	'i915/i915_crc.c',
 	'i915/intel_memory_region.c',
 	'i915/intel_mocs.c',
 	'i915/i915_blt.c',
-- 
2.32.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-06-09  7:24 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-03 13:05 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
2022-06-03 13:11   ` Petri Latvala
2022-06-06  6:33     ` Zbigniew Kempczyński
2022-06-06  8:07       ` Petri Latvala
2022-06-08  9:17         ` Zbigniew Kempczyński
2022-06-09  7:22           ` Petri Latvala
2022-06-03 13:05 ` [igt-dev] [PATCH i-g-t 2/2] tests/api_intel_bb: Add crc32 checking test " Zbigniew Kempczyński
2022-06-06 14:47   ` Kamil Konieczny
2022-06-03 13:36 ` [igt-dev] ✗ GitLab.Pipeline: warning for Add crc32 calculation on dg2 gpu (rev3) Patchwork
2022-06-03 19:07 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2022-06-03 21:44 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2022-06-03 11:08 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
2022-06-03 11:08 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
2022-06-03 11:39   ` Petri Latvala
2022-06-02 13:17 [igt-dev] [PATCH i-g-t 0/2] Add crc32 calculation on dg2 gpu Zbigniew Kempczyński
2022-06-02 13:17 ` [igt-dev] [PATCH i-g-t 1/2] lib/i915_crc: Introduce crc32 on gpu for DG2 Zbigniew Kempczyński
2022-06-02 13:30   ` Petri Latvala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.