All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support
@ 2019-11-01 20:13 Imre Deak
  2019-11-01 20:13 ` [igt-dev] [PATCH i-g-t 2/3] tests/gem_render_copy: Adjust the tgl+ compressed buf alignments Imre Deak
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Imre Deak @ 2019-11-01 20:13 UTC (permalink / raw)
  To: igt-dev; +Cc: Brian Welty

On GEN12+ the AUX CCS surfaces required by the render and media
compression must be specified by a 3 level page table directory, which
translates the main surface graphics address to the AUX CCS surface
graphics address. For this purpose add support for creating a GEM buffer
to translate the linear surface address range to the linear AUX surface
address range.

The buffers containing the main surface must be pinned down, since the
directory table entry indices depend on the surface address, and they
must be 64kB aligned. The page table can be relocated OTOH, so allow
that and emit the required relocation entries.

Cc: Mika Kahola <mika.kahola@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 lib/igt_aux_pgtable.c | 381 ++++++++++++++++++++++++++++++++++++++++++
 lib/igt_aux_pgtable.h |  21 +++
 lib/intel_reg.h       |   3 +
 lib/meson.build       |   1 +
 lib/rendercopy_gen9.c | 121 +++++++++++++-
 5 files changed, 521 insertions(+), 6 deletions(-)
 create mode 100644 lib/igt_aux_pgtable.c
 create mode 100644 lib/igt_aux_pgtable.h

diff --git a/lib/igt_aux_pgtable.c b/lib/igt_aux_pgtable.c
new file mode 100644
index 00000000..aaa24cfd
--- /dev/null
+++ b/lib/igt_aux_pgtable.c
@@ -0,0 +1,381 @@
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "drmtest.h"
+#include "igt_aux_pgtable.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "ioctl_wrappers.h"
+
+#include "i915/gem_mman.h"
+
+#define BITS_PER_LONG		(sizeof(long) * 8)
+#define BITMASK(e, s)		((~0UL << (s)) & \
+				 (~0UL >> (BITS_PER_LONG - 1 - (e))))
+
+#define ALIGN_DOWN(x, a)	ALIGN((x) - ((a) - 1), (a))
+
+#define CL_SIZE			64
+/*
+ * The size of a block on the CCS surface that is covered/pointed to by one
+ * L1 AUX pagetable entry. This size corresponds to the 1<<8 alignment of the
+ * pointers in the L1 entry.
+ */
+#define CCS_BLOCK_SIZE		(4 * CL_SIZE)
+/*
+ * 256 bytes per CCS block size *
+ * 8   bits per byte /
+ * 2   bits per surface CL *
+ * 64  bytes per surface CL
+ */
+#define SURFACE_BLOCK_SIZE	(CCS_BLOCK_SIZE * 8 / 2 * CL_SIZE)
+#define AUX_PGTABLE_VALID	1
+#define AUX_PGTABLE_LEVELS	3
+
+#define ADDRESS_BITS		48
+
+#define max(a, b)		((a) > (b) ? (a) : (b))
+
+struct pgtable_level_desc {
+	int idx_shift;
+	int idx_bits;
+	int entry_ptr_shift;
+	int table_size;
+};
+
+struct pgtable_level_info {
+	const struct pgtable_level_desc *desc;
+	int table_count;
+	int alloc_base;
+	int alloc_ptr;
+};
+
+struct pgtable {
+	int levels;
+	struct pgtable_level_info *level_info;
+	int size;
+	int max_align;
+	drm_intel_bo *bo;
+};
+
+static const struct pgtable_level_desc aux_pgtable_desc[AUX_PGTABLE_LEVELS] = {
+	{
+		.idx_shift = 16,
+		.idx_bits = 8,
+		.entry_ptr_shift = 8,
+		.table_size = 8 * 1024,
+	},
+	{
+		.idx_shift = 24,
+		.idx_bits = 12,
+		.entry_ptr_shift = 13,
+		.table_size = 32 * 1024,
+	},
+	{
+		.idx_shift = 36,
+		.idx_bits = 12,
+		.entry_ptr_shift = 15,
+		.table_size = 32 * 1024,
+	},
+};
+
+static int
+pgt_table_count(int address_bits,
+		const struct igt_aux_pgtable_range *ranges, int range_count)
+{
+	uint64_t end;
+	int count;
+	int i;
+
+	count = 0;
+	end = 0;
+	for (i = 0; i < range_count; i++) {
+		const struct igt_aux_pgtable_range *r = &ranges[i];
+		uint64_t start;
+
+		/* We require ranges to be sorted. */
+		igt_assert(i == 0 ||
+			   r->surface_base >= ranges[i - 1].surface_base +
+					      ranges[i - 1].surface_size);
+
+		start = ALIGN_DOWN(r->surface_base, 1UL << address_bits);
+		/* Avoid double counting for overlapping aligned ranges. */
+		start = max(start, end);
+
+		end = ALIGN(r->surface_base + r->surface_size,
+			    1UL << address_bits);
+		igt_assert(end >= start);
+
+		count += (end - start) >> address_bits;
+	}
+
+	return count;
+}
+
+static void
+pgt_calc_size(struct pgtable *pgt,
+	      const struct igt_aux_pgtable_range *ranges, int range_count)
+{
+	int level;
+
+	pgt->size = 0;
+
+	for (level = pgt->levels; level > 0; level--) {
+		struct pgtable_level_info *li = &pgt->level_info[level - 1];
+
+		li->alloc_base = ALIGN(pgt->size, li->desc->table_size);
+		li->alloc_ptr = li->alloc_base;
+
+		li->table_count = pgt_table_count(li->desc->idx_shift +
+						  li->desc->idx_bits,
+						  ranges, range_count);
+
+		pgt->size = li->alloc_base +
+			    li->table_count * li->desc->table_size;
+	}
+}
+
+static uint64_t pgt_alloc_table(struct pgtable *pgt, int level)
+{
+	struct pgtable_level_info *li = &pgt->level_info[level - 1];
+	uint64_t table;
+
+	table = li->alloc_ptr;
+	li->alloc_ptr += li->desc->table_size;
+
+	igt_assert(li->alloc_ptr <=
+		   li->alloc_base + li->table_count * li->desc->table_size);
+
+	return table;
+}
+
+static int pgt_address_index(struct pgtable *pgt, int level, uint64_t address)
+{
+	const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
+	uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1,
+				ld->idx_shift);
+
+	return (address & mask) >> ld->idx_shift;
+}
+
+static uint64_t ptr_mask(struct pgtable *pgt, int level)
+{
+	const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
+
+	return BITMASK(ADDRESS_BITS - 1, ld->entry_ptr_shift);
+}
+
+static uint64_t pgt_entry_ptr(struct pgtable *pgt, int level, uint64_t entry)
+{
+	return entry & ptr_mask(pgt, level);
+}
+
+static uint64_t pgt_mkentry(struct pgtable *pgt, int level, uint64_t ptr,
+			    uint64_t flags)
+{
+	igt_assert(!(ptr & ~ptr_mask(pgt, level)));
+
+	return ptr | flags;
+}
+
+static uint64_t
+pgt_get_table(struct pgtable *pgt, uint64_t parent_table,
+	      int level, uint64_t address, uint64_t flags)
+{
+	uint64_t *table_ptr = pgt->bo->virtual + parent_table;
+	int entry_idx = pgt_address_index(pgt, level, address);
+	uint64_t *entry_ptr;
+
+	entry_ptr = &table_ptr[entry_idx];
+	if (!*entry_ptr) {
+		uint64_t child_table = pgt_alloc_table(pgt, level - 1);
+
+		*entry_ptr = pgt_mkentry(pgt, level, child_table, flags);
+
+		drm_intel_bo_emit_reloc(pgt->bo,
+					parent_table + entry_idx * sizeof(uint64_t),
+					pgt->bo, *entry_ptr,
+					I915_GEM_DOMAIN_INSTRUCTION, 0);
+	}
+
+	return pgt_entry_ptr(pgt, level, *entry_ptr);
+}
+
+static void
+pgt_set_l1_entry(struct pgtable *pgt, uint64_t l1_table,
+		 uint64_t address, uint64_t ptr, uint64_t flags)
+{
+	uint64_t *l1_table_ptr;
+	uint64_t *l1_entry_ptr;
+
+	l1_table_ptr = pgt->bo->virtual + l1_table;
+	l1_entry_ptr = &l1_table_ptr[pgt_address_index(pgt, 1, address)];
+	*l1_entry_ptr = pgt_mkentry(pgt, 1, ptr, flags);
+}
+
+static uint64_t pgt_get_l1_flags(const struct igt_aux_pgtable_range *range)
+{
+	/*
+	 * The offset of .tile_mode isn't specifed by bspec, it's what Mesa
+	 * uses.
+	 */
+	union {
+		struct {
+			uint64_t	valid:1;
+			uint64_t	compression_mod:2;
+			uint64_t	lossy_compression:1;
+			uint64_t	pad:4;
+			uint64_t	addr:40;
+			uint64_t	pad2:4;
+			uint64_t	tile_mode:2;
+			uint64_t	depth:3;
+			uint64_t	ycr:1;
+			uint64_t	format:6;
+		} e;
+		uint64_t l;
+	} entry = {
+		.e = {
+			.valid = 1,
+			.tile_mode = range->tiling == I915_TILING_Y ? 1 : 0,
+			.depth = 5,		/* 32bpp */
+			.format = 0xA,		/* B8G8R8A8_UNORM */
+		}
+	};
+
+	/*
+	 * TODO: Clarify if Yf is supported and if we need to differentiate
+	 *       Ys and Yf.
+	 *       Add support for more formats.
+	 */
+	igt_assert(range->tiling == I915_TILING_Y ||
+		   range->tiling == I915_TILING_Yf ||
+		   range->tiling == I915_TILING_Ys);
+
+	igt_assert(range->bpp == 32);
+
+	return entry.l;
+}
+
+static uint64_t pgt_get_lx_flags(void)
+{
+	union {
+		struct {
+			uint64_t        valid:1;
+			uint64_t        addr:47;
+			uint64_t        pad:16;
+		} e;
+		uint64_t l;
+	} entry = {
+		.e = {
+			.valid = 1,
+		}
+	};
+
+	return entry.l;
+}
+
+static void
+pgt_populate_entries_for_range(struct pgtable *pgt,
+			       const struct igt_aux_pgtable_range *range,
+			       drm_intel_bo *bo,
+			       uint64_t top_table)
+{
+	uint64_t surface_addr = range->surface_base;
+	uint64_t surface_end = surface_addr + range->surface_size;
+	uint64_t aux_addr = range->aux_base;
+	uint64_t l1_flags = pgt_get_l1_flags(range);
+	uint64_t lx_flags = pgt_get_lx_flags();
+
+	pgt->bo = bo;
+
+	for (; surface_addr < surface_end;
+	     surface_addr += SURFACE_BLOCK_SIZE, aux_addr += CCS_BLOCK_SIZE) {
+		uint64_t table = top_table;
+		int level;
+
+		for (level = pgt->levels; level > 1; level--)
+			table = pgt_get_table(pgt, table, level,
+					      surface_addr, lx_flags);
+
+		pgt_set_l1_entry(pgt, table, surface_addr, aux_addr, l1_flags);
+	}
+}
+
+static void pgt_populate_entries(struct pgtable *pgt,
+				 const struct igt_aux_pgtable_range *ranges,
+				 int range_count,
+				 drm_intel_bo *gem_bo)
+{
+	uint64_t top_table;
+	int i;
+
+	igt_assert(gem_bo->size >= pgt->size);
+	memset(gem_bo->virtual, 0, pgt->size);
+
+	top_table = pgt_alloc_table(pgt, pgt->levels);
+	/* Top level table must be at offset 0. */
+	igt_assert(top_table == 0);
+
+	for (i = 0; i < range_count; i++)
+		pgt_populate_entries_for_range(pgt, &ranges[i], gem_bo,
+					       top_table);
+}
+
+static struct pgtable *
+pgt_create(const struct pgtable_level_desc *level_descs, int levels,
+	   const struct igt_aux_pgtable_range *ranges, int range_count)
+{
+	struct pgtable *pgt;
+	int level;
+
+	pgt = calloc(1, sizeof(*pgt));
+	igt_assert(pgt);
+
+	pgt->levels = levels;
+
+	pgt->level_info = calloc(levels, sizeof(*pgt->level_info));
+	igt_assert(pgt->level_info);
+
+	for (level = 0; level < pgt->levels; level++) {
+		struct pgtable_level_info *li = &pgt->level_info[level];
+
+		li->desc = &level_descs[level];
+		if (li->desc->table_size > pgt->max_align)
+			pgt->max_align = li->desc->table_size;
+	}
+
+	pgt_calc_size(pgt, ranges, range_count);
+
+	return pgt;
+}
+
+static void pgt_destroy(struct pgtable *pgt)
+{
+	free(pgt->level_info);
+	free(pgt);
+}
+
+drm_intel_bo *
+igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
+		       const struct igt_aux_pgtable_range *ranges,
+		       int range_count)
+{
+	struct pgtable *pgt;
+	drm_intel_bo *gem_bo;
+
+	pgt = pgt_create(aux_pgtable_desc, AUX_PGTABLE_LEVELS,
+			 ranges, range_count);
+
+	gem_bo = drm_intel_bo_alloc_for_render(bufmgr,
+					       "aux pgt",
+					       pgt->size, pgt->max_align);
+	igt_assert(gem_bo);
+
+	igt_assert(drm_intel_bo_map(gem_bo, true) == 0);
+	pgt_populate_entries(pgt, ranges, range_count, gem_bo);
+	igt_assert(drm_intel_bo_unmap(gem_bo) == 0);
+
+	pgt_destroy(pgt);
+
+	return gem_bo;
+}
diff --git a/lib/igt_aux_pgtable.h b/lib/igt_aux_pgtable.h
new file mode 100644
index 00000000..64c6b21f
--- /dev/null
+++ b/lib/igt_aux_pgtable.h
@@ -0,0 +1,21 @@
+#ifndef _IGT_AUX_PGTABLE_H_
+#define _IGT_AUX_PGTABLE_H_
+
+#include "intel_bufmgr.h"
+
+struct igt_aux_pgtable;
+
+struct igt_aux_pgtable_range {
+	uint64_t surface_base;
+	uint64_t surface_size;
+	uint64_t aux_base;
+	uint32_t tiling;
+	int bpp;
+};
+
+drm_intel_bo *
+igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
+		       const struct igt_aux_pgtable_range *ranges,
+		       int range_count);
+
+#endif
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 069440cb..e7263ce1 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -673,6 +673,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define RING_VALID          0x00000001
 #define RING_INVALID        0x00000000
 
+#define GEN12_GFX_AUX_TABLE_BASE_ADDR	0x4200
+
 
 
 /* BitBlt Instructions
@@ -2570,6 +2572,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define MI_LOAD_SCAN_LINES_INCL		(0x12<<23)
 #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
+#define MI_LOAD_REGISTER_MEM		((0x29 << 23) | (4 - 2))
 
 /* Flush */
 #define MI_FLUSH			(0x04<<23)
diff --git a/lib/meson.build b/lib/meson.build
index 221ae28c..2135ddf3 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -46,6 +46,7 @@ lib_sources = [
 	'sw_sync.c',
 	'intel_reg_map.c',
 	'intel_iosf.c',
+	'igt_aux_pgtable.c',
 	'igt_kms.c',
 	'igt_fb.c',
 	'igt_core.c',
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index 694eb3cf..31e38c2b 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -15,6 +15,7 @@
 #include <i915_drm.h>
 
 #include "drmtest.h"
+#include "igt_aux_pgtable.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
 #include "intel_io.h"
@@ -972,19 +973,113 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
 
 #define BATCH_STATE_SPLIT 2048
 
+static void
+gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state)
+{
+	if (!state)
+		return;
+
+	OUT_BATCH(MI_LOAD_REGISTER_MEM);
+	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state);
+	OUT_BATCH(MI_NOOP);
+
+	OUT_BATCH(MI_LOAD_REGISTER_MEM);
+	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state + 4);
+	OUT_BATCH(MI_NOOP);
+}
+
+static int add_aux_pgtable_range(const struct igt_buf *buf,
+				 struct igt_aux_pgtable_range *range,
+				 uint64_t *pin_offset)
+{
+	if (!buf->aux.stride)
+		return 0;
+
+	drm_intel_bo_set_softpin_offset(buf->bo, *pin_offset);
+	igt_assert(buf->bo->offset64 == *pin_offset);
+
+	range->surface_base = *pin_offset;
+	range->surface_size = buf->size;
+	range->aux_base = *pin_offset + buf->aux.offset;
+	range->tiling = buf->tiling;
+	range->bpp = buf->bpp;
+
+	/* The GEN12+ compressed main surface must be 64kB aligned.  */
+	*pin_offset = ALIGN(*pin_offset + buf->bo->size, 0x10000);
+
+	return 1;
+}
+
+static drm_intel_bo *
+gen12_create_aux_pgtable_bo(drm_intel_bufmgr *bufmgr,
+			    const struct igt_buf *dst_buf,
+			    const struct igt_buf *src_buf)
+{
+	struct igt_aux_pgtable_range ranges[2];
+	int range_count;
+	uint64_t pin_offset;
+	drm_intel_bo *gem_bo;
+
+	range_count = 0;
+	pin_offset = 0;
+
+	range_count += add_aux_pgtable_range(dst_buf,
+					     &ranges[range_count], &pin_offset);
+	range_count += add_aux_pgtable_range(src_buf,
+					     &ranges[range_count], &pin_offset);
+
+	if (!range_count)
+		return NULL;
+
+	gem_bo = igt_aux_pgtable_create(bufmgr, ranges, range_count);
+	igt_assert(gem_bo);
+
+	return gem_bo;
+}
+
+static uint32_t
+gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
+			       drm_intel_bo *aux_pgtable_bo)
+{
+	uint64_t *pgtable_ptr;
+	uint32_t pgtable_ptr_offset;
+	int ret;
+
+	if (!aux_pgtable_bo)
+		return 0;
+
+	pgtable_ptr = intel_batchbuffer_subdata_alloc(batch,
+						      sizeof(*pgtable_ptr),
+						      sizeof(*pgtable_ptr));
+	pgtable_ptr_offset = intel_batchbuffer_subdata_offset(batch,
+							      pgtable_ptr);
+
+	ret = drm_intel_bo_emit_reloc(batch->bo, pgtable_ptr_offset,
+				      aux_pgtable_bo, 0,
+				      I915_GEM_DOMAIN_RENDER, 0);
+	assert(ret == 0);
+
+	return pgtable_ptr_offset;
+}
+
 static
 void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 			  drm_intel_context *context,
 			  const struct igt_buf *src, unsigned src_x,
 			  unsigned src_y, unsigned width, unsigned height,
 			  const struct igt_buf *dst, unsigned dst_x,
-			  unsigned dst_y, const uint32_t ps_kernel[][4],
+			  unsigned dst_y,
+			  drm_intel_bo *aux_pgtable_bo,
+			  const uint32_t ps_kernel[][4],
 			  uint32_t ps_kernel_size)
 {
 	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
 	uint32_t scissor_state;
 	uint32_t vertex_buffer;
 	uint32_t batch_end;
+	uint32_t aux_pgtable_state;
 
 	igt_assert(src->bpp == dst->bpp);
 	intel_batchbuffer_flush_with_context(batch, context);
@@ -1007,6 +1102,10 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	viewport.cc_state = gen6_create_cc_viewport(batch);
 	viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
 	scissor_state = gen6_create_scissor_rect(batch);
+
+	aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
+							   aux_pgtable_bo);
+
 	/* TODO: theree is other state which isn't setup */
 
 	assert(batch->ptr < &batch->buffer[4095]);
@@ -1018,6 +1117,8 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
 				GEN9_PIPELINE_SELECTION_MASK);
 
+	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
+
 	gen8_emit_sip(batch);
 
 	gen7_emit_push_constants(batch);
@@ -1092,8 +1193,8 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
 
 {
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, ps_kernel_gen9,
-			  sizeof(ps_kernel_gen9));
+			  width, height, dst, dst_x, dst_y, NULL,
+			  ps_kernel_gen9, sizeof(ps_kernel_gen9));
 }
 
 void gen11_render_copyfunc(struct intel_batchbuffer *batch,
@@ -1104,8 +1205,8 @@ void gen11_render_copyfunc(struct intel_batchbuffer *batch,
 
 {
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, ps_kernel_gen11,
-			  sizeof(ps_kernel_gen11));
+			  width, height, dst, dst_x, dst_y, NULL,
+			  ps_kernel_gen11, sizeof(ps_kernel_gen11));
 }
 
 void gen12_render_copyfunc(struct intel_batchbuffer *batch,
@@ -1115,7 +1216,15 @@ void gen12_render_copyfunc(struct intel_batchbuffer *batch,
 			   const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
 
 {
+	drm_intel_bo *aux_pgtable_bo;
+
+	aux_pgtable_bo = gen12_create_aux_pgtable_bo(batch->bufmgr, dst, src);
+
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, gen12_render_copy,
+			  width, height, dst, dst_x, dst_y,
+			  aux_pgtable_bo,
+			  gen12_render_copy,
 			  sizeof(gen12_render_copy));
+
+	drm_intel_bo_unreference(aux_pgtable_bo);
 }
-- 
2.17.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [igt-dev] [PATCH i-g-t 2/3] tests/gem_render_copy: Adjust the tgl+ compressed buf alignments
  2019-11-01 20:13 [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support Imre Deak
@ 2019-11-01 20:13 ` Imre Deak
  2019-11-01 20:13 ` [igt-dev] [PATCH i-g-t 3/3] tests/gem_render_copy: Add compressed src to compressed dst subtests Imre Deak
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Imre Deak @ 2019-11-01 20:13 UTC (permalink / raw)
  To: igt-dev; +Cc: Brian Welty

GEN12+ the render and media compressed surface have different alignment
requiremens than ICL, so adjust that.

Cc: Mika Kahola <mika.kahola@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 tests/i915/gem_render_copy.c | 49 ++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/tests/i915/gem_render_copy.c b/tests/i915/gem_render_copy.c
index 261833d2..33a8404b 100644
--- a/tests/i915/gem_render_copy.c
+++ b/tests/i915/gem_render_copy.c
@@ -218,21 +218,35 @@ static void scratch_buf_write_to_png(data_t *data, struct igt_buf *buf,
 	free(linear);
 }
 
-static int scratch_buf_aux_width(const struct igt_buf *buf)
+static int scratch_buf_aux_width(uint32_t devid, const struct igt_buf *buf)
 {
+	/*
+	 * GEN12+: The AUX CCS tile size is 64 bytes x 1 tile row corresponding
+	 * to 4 main surface tiles->4*32=128 pixels.
+	 */
+	if (intel_gen(devid) >= 12)
+		return DIV_ROUND_UP(igt_buf_width(buf), 128) * 64;
+
 	return DIV_ROUND_UP(igt_buf_width(buf), 1024) * 128;
 }
 
-static int scratch_buf_aux_height(const struct igt_buf *buf)
+static int scratch_buf_aux_height(uint32_t devid, const struct igt_buf *buf)
 {
+	/*
+	 * GEN12+: The AUX CCS tile size is 64 bytes x 1 tile row corresponding
+	 * to 1 main surface tile row->32 pixel rows.
+	 */
+	if (intel_gen(devid) >= 12)
+		return DIV_ROUND_UP(igt_buf_height(buf), 32);
+
 	return DIV_ROUND_UP(igt_buf_height(buf), 512) * 32;
 }
 
 static void *linear_copy_aux(data_t *data, struct igt_buf *buf)
 {
 	void *map, *linear;
-	int aux_size = scratch_buf_aux_width(buf) *
-		scratch_buf_aux_height(buf);
+	int aux_size = scratch_buf_aux_width(data->devid, buf) *
+		scratch_buf_aux_height(data->devid, buf);
 
 	igt_assert_eq(posix_memalign(&linear, 16, aux_size), 0);
 
@@ -261,8 +275,8 @@ static void scratch_buf_aux_write_to_png(data_t *data,
 
 	surface = cairo_image_surface_create_for_data(linear,
 						      CAIRO_FORMAT_A8,
-						      scratch_buf_aux_width(buf),
-						      scratch_buf_aux_height(buf),
+						      scratch_buf_aux_width(data->devid, buf),
+						      scratch_buf_aux_height(data->devid, buf),
 						      buf->aux.stride);
 	ret = cairo_surface_write_to_png(surface, make_filename(filename));
 	igt_assert(ret == CAIRO_STATUS_SUCCESS);
@@ -413,13 +427,26 @@ static void scratch_buf_init(data_t *data, struct igt_buf *buf,
 		igt_assert(tiling == I915_TILING_Y ||
 			   tiling == I915_TILING_Yf);
 
-		buf->stride = ALIGN(width * (bpp / 8), 128);
+		/*
+		 * On GEN12+ we align the main surface to 4 * 4 main surface
+		 * tiles, which is 64kB. This corresponds to 4 * 64 bytes of
+		 * AUX CCS data, which in turn is covered/pointed to by one L1
+		 * AUX page table entry.
+		 */
+		if (intel_gen(data->devid) >= 12)
+			buf->stride = ALIGN(width * (bpp / 8), 128 * 4);
+		else
+			buf->stride = ALIGN(width * (bpp / 8), 128);
+
+		if (intel_gen(data->devid) >= 12)
+			height = ALIGN(height, 4 * 32);
+
 		buf->size = buf->stride * height;
 		buf->tiling = tiling;
 		buf->bpp = bpp;
 
-		aux_width = scratch_buf_aux_width(buf);
-		aux_height = scratch_buf_aux_height(buf);
+		aux_width = scratch_buf_aux_width(data->devid, buf);
+		aux_height = scratch_buf_aux_height(data->devid, buf);
 
 		buf->aux.offset = buf->stride * ALIGN(height, 32);
 		buf->aux.stride = aux_width;
@@ -525,8 +552,8 @@ scratch_buf_check_all(data_t *data,
 static void scratch_buf_aux_check(data_t *data,
 				  struct igt_buf *buf)
 {
-	int aux_size = scratch_buf_aux_width(buf) *
-		scratch_buf_aux_height(buf);
+	int aux_size = scratch_buf_aux_width(data->devid, buf) *
+		scratch_buf_aux_height(data->devid, buf);
 	uint8_t *linear;
 	int i;
 
-- 
2.17.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [igt-dev] [PATCH i-g-t 3/3] tests/gem_render_copy: Add compressed src to compressed dst subtests
  2019-11-01 20:13 [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support Imre Deak
  2019-11-01 20:13 ` [igt-dev] [PATCH i-g-t 2/3] tests/gem_render_copy: Adjust the tgl+ compressed buf alignments Imre Deak
@ 2019-11-01 20:13 ` Imre Deak
  2019-11-01 21:19 ` [igt-dev] ✗ GitLab.Pipeline: warning for series starting with [i-g-t,1/3] lib/rendercopy: Add AUX page table support Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Imre Deak @ 2019-11-01 20:13 UTC (permalink / raw)
  To: igt-dev; +Cc: Brian Welty

Add new subtests that blit from a compressed source to a compressed
destination buffer.

Cc: Mika Kahola <mika.kahola@intel.com>
Cc: Brian Welty <brian.welty@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 tests/i915/gem_render_copy.c | 126 ++++++++++++++++++++++++++---------
 1 file changed, 93 insertions(+), 33 deletions(-)

diff --git a/tests/i915/gem_render_copy.c b/tests/i915/gem_render_copy.c
index 33a8404b..b4671c30 100644
--- a/tests/i915/gem_render_copy.c
+++ b/tests/i915/gem_render_copy.c
@@ -570,9 +570,13 @@ static void scratch_buf_aux_check(data_t *data,
 		     "Aux surface indicates that nothing was compressed\n");
 }
 
-static void test(data_t *data, uint32_t tiling, uint64_t ccs_modifier)
+#define DST_COMPRESSED	1
+#define SRC_COMPRESSED	2
+
+static void test(data_t *data, uint32_t dst_tiling, uint32_t src_tiling,
+		 int flags)
 {
-	struct igt_buf dst, ccs, ref;
+	struct igt_buf dst, src_ccs, dst_ccs, ref;
 	struct {
 		struct igt_buf buf;
 		const char *filename;
@@ -600,22 +604,34 @@ static void test(data_t *data, uint32_t tiling, uint64_t ccs_modifier)
 			.x = 1, .y = 1,
 		},
 	};
-
 	int opt_dump_aub = igt_aub_dump_enabled();
 	int num_src = ARRAY_SIZE(src);
+	bool src_compressed = flags & SRC_COMPRESSED;
+	bool dst_compressed = flags & DST_COMPRESSED;
+
+	/*
+	 * The tiling for uncompressed source buffers is determined by the
+	 * tiling of the src[] buffers above.
+	 */
+	igt_assert(!src_tiling || src_compressed);
 
 	/* no Yf before gen9 */
 	if (intel_gen(data->devid) < 9)
 		num_src--;
 
-	if (tiling == I915_TILING_Yf || ccs_modifier)
+	if (dst_tiling == I915_TILING_Yf || src_tiling == I915_TILING_Yf ||
+	    src_compressed || dst_compressed)
 		igt_require(intel_gen(data->devid) >= 9);
 
 	for (int i = 0; i < num_src; i++)
 		scratch_buf_init(data, &src[i].buf, WIDTH, HEIGHT, src[i].tiling, false);
-	scratch_buf_init(data, &dst, WIDTH, HEIGHT, tiling, false);
-	if (ccs_modifier)
-		scratch_buf_init(data, &ccs, WIDTH, HEIGHT, ccs_modifier, true);
+	scratch_buf_init(data, &dst, WIDTH, HEIGHT, dst_tiling, false);
+	if (src_compressed)
+		scratch_buf_init(data, &src_ccs, WIDTH, HEIGHT,
+				 src_tiling, true);
+	if (dst_compressed)
+		scratch_buf_init(data, &dst_ccs, WIDTH, HEIGHT,
+				 dst_tiling, true);
 	scratch_buf_init(data, &ref, WIDTH, HEIGHT, I915_TILING_NONE, false);
 
 	for (int i = 0; i < num_src; i++)
@@ -655,26 +671,45 @@ static void test(data_t *data, uint32_t tiling, uint64_t ccs_modifier)
 	 *	 |dst|src|
 	 *	  -------
 	 */
-	if (ccs_modifier)
+	if (src_compressed)
 		data->render_copy(data->batch, NULL,
 				  &dst, 0, 0, WIDTH, HEIGHT,
-				  &ccs, 0, 0);
+				  &src_ccs, 0, 0);
 
 	for (int i = 0; i < num_src; i++)
 		data->render_copy(data->batch, NULL,
-				  &src[i].buf, WIDTH/4, HEIGHT/4, WIDTH/2-2, HEIGHT/2-2,
-				  ccs_modifier ? &ccs : &dst, src[i].x, src[i].y);
+				  &src[i].buf,
+				  WIDTH/4, HEIGHT/4, WIDTH/2-2, HEIGHT/2-2,
+				  src_compressed ? &src_ccs : &dst,
+				  src[i].x, src[i].y);
+
+	if (src_compressed || dst_compressed)
+		data->render_copy(data->batch, NULL,
+				  src_compressed ? &src_ccs : &dst,
+				  0, 0, WIDTH, HEIGHT,
+				  dst_compressed ? &dst_ccs : &dst,
+				  0, 0);
 
-	if (ccs_modifier)
+	if (dst_compressed)
 		data->render_copy(data->batch, NULL,
-				  &ccs, 0, 0, WIDTH, HEIGHT,
-				  &dst, 0, 0);
+				  &dst_ccs,
+				  0, 0, WIDTH, HEIGHT,
+				  &dst,
+				  0, 0);
 
 	if (opt_dump_png){
 		scratch_buf_write_to_png(data, &dst, "result.png");
-		if (ccs_modifier) {
-			scratch_buf_write_to_png(data, &ccs, "compressed.png");
-			scratch_buf_aux_write_to_png(data, &ccs, "compressed-aux.png");
+		if (src_compressed) {
+			scratch_buf_write_to_png(data, &src_ccs,
+						 "compressed-src.png");
+			scratch_buf_aux_write_to_png(data, &src_ccs,
+						     "compressed-src-aux.png");
+		}
+		if (dst_compressed) {
+			scratch_buf_write_to_png(data, &src_ccs,
+						 "compressed-dst.png");
+			scratch_buf_aux_write_to_png(data, &src_ccs,
+						     "compressed-dst-aux.png");
 		}
 	}
 
@@ -692,12 +727,16 @@ static void test(data_t *data, uint32_t tiling, uint64_t ccs_modifier)
 		scratch_buf_check(data, &dst, &ref, WIDTH - 10, HEIGHT - 10);
 	}
 
-	if (ccs_modifier)
-		scratch_buf_aux_check(data, &ccs);
+	if (src_compressed)
+		scratch_buf_aux_check(data, &src_ccs);
+	if (dst_compressed)
+		scratch_buf_aux_check(data, &dst_ccs);
 
 	scratch_buf_fini(&ref);
-	if (ccs_modifier)
-		scratch_buf_fini(&ccs);
+	if (dst_compressed)
+		scratch_buf_fini(&dst_ccs);
+	if (src_compressed)
+		scratch_buf_fini(&src_ccs);
 	scratch_buf_fini(&dst);
 	for (int i = 0; i < num_src; i++)
 		scratch_buf_fini(&src[i].buf);
@@ -747,31 +786,52 @@ igt_main_args("da", NULL, help_str, opt_handler, NULL)
 	}
 
 	igt_subtest("linear")
-		test(&data, I915_TILING_NONE, 0);
+		test(&data, I915_TILING_NONE, 0, 0);
 	igt_subtest("x-tiled")
-		test(&data, I915_TILING_X, 0);
+		test(&data, I915_TILING_X, 0, 0);
 	igt_subtest("y-tiled")
-		test(&data, I915_TILING_Y, 0);
+		test(&data, I915_TILING_Y, 0, 0);
 	igt_subtest("yf-tiled")
-		test(&data, I915_TILING_Yf, 0);
+		test(&data, I915_TILING_Yf, 0, 0);
 
 	igt_subtest("y-tiled-ccs-to-linear")
-		test(&data, I915_TILING_NONE, I915_TILING_Y);
+		test(&data, I915_TILING_NONE, I915_TILING_Y,
+		     SRC_COMPRESSED);
 	igt_subtest("y-tiled-ccs-to-x-tiled")
-		test(&data, I915_TILING_X, I915_TILING_Y);
+		test(&data, I915_TILING_X, I915_TILING_Y,
+		     SRC_COMPRESSED);
 	igt_subtest("y-tiled-ccs-to-y-tiled")
-		test(&data, I915_TILING_Y, I915_TILING_Y);
+		test(&data, I915_TILING_Y, I915_TILING_Y,
+		     SRC_COMPRESSED);
 	igt_subtest("y-tiled-ccs-to-yf-tiled")
-		test(&data, I915_TILING_Yf, I915_TILING_Y);
+		test(&data, I915_TILING_Yf, I915_TILING_Y,
+		     SRC_COMPRESSED);
 
 	igt_subtest("yf-tiled-ccs-to-linear")
-		test(&data, I915_TILING_NONE, I915_TILING_Yf);
+		test(&data, I915_TILING_NONE, I915_TILING_Yf,
+		     SRC_COMPRESSED);
 	igt_subtest("yf-tiled-ccs-to-x-tiled")
-		test(&data, I915_TILING_X, I915_TILING_Yf);
+		test(&data, I915_TILING_X, I915_TILING_Yf,
+		     SRC_COMPRESSED);
 	igt_subtest("yf-tiled-ccs-to-y-tiled")
-		test(&data, I915_TILING_Y, I915_TILING_Yf);
+		test(&data, I915_TILING_Y, I915_TILING_Yf,
+		     SRC_COMPRESSED);
 	igt_subtest("yf-tiled-ccs-to-yf-tiled")
-		test(&data, I915_TILING_Yf, I915_TILING_Yf);
+		test(&data, I915_TILING_Yf, I915_TILING_Yf,
+		     SRC_COMPRESSED);
+
+	igt_subtest("y-tiled-ccs-to-y-tiled-ccs")
+		test(&data, I915_TILING_Y, I915_TILING_Y,
+		     SRC_COMPRESSED | DST_COMPRESSED);
+	igt_subtest("yf-tiled-ccs-to-yf-tiled-ccs")
+		test(&data, I915_TILING_Yf, I915_TILING_Yf,
+		     SRC_COMPRESSED | DST_COMPRESSED);
+	igt_subtest("y-tiled-ccs-to-yf-tiled-ccs")
+		test(&data, I915_TILING_Yf, I915_TILING_Y,
+		     SRC_COMPRESSED | DST_COMPRESSED);
+	igt_subtest("yf-tiled-ccs-to-y-tiled-ccs")
+		test(&data, I915_TILING_Y, I915_TILING_Yf,
+		     SRC_COMPRESSED | DST_COMPRESSED);
 
 	igt_fixture {
 		igt_stop_hang_detector();
-- 
2.17.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [igt-dev] ✗ GitLab.Pipeline: warning for series starting with [i-g-t,1/3] lib/rendercopy: Add AUX page table support
  2019-11-01 20:13 [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support Imre Deak
  2019-11-01 20:13 ` [igt-dev] [PATCH i-g-t 2/3] tests/gem_render_copy: Adjust the tgl+ compressed buf alignments Imre Deak
  2019-11-01 20:13 ` [igt-dev] [PATCH i-g-t 3/3] tests/gem_render_copy: Add compressed src to compressed dst subtests Imre Deak
@ 2019-11-01 21:19 ` Patchwork
  2019-11-01 21:41 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2019-11-01 21:19 UTC (permalink / raw)
  To: Imre Deak; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/3] lib/rendercopy: Add AUX page table support
URL   : https://patchwork.freedesktop.org/series/68890/
State : warning

== Summary ==

Did not get list of undocumented tests for this run, something is wrong!

Other than that, pipeline status: FAILED.

see https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/pipelines/75408 for more details

== Logs ==

For more details see: https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/pipelines/75408
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/3] lib/rendercopy: Add AUX page table support
  2019-11-01 20:13 [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support Imre Deak
                   ` (2 preceding siblings ...)
  2019-11-01 21:19 ` [igt-dev] ✗ GitLab.Pipeline: warning for series starting with [i-g-t,1/3] lib/rendercopy: Add AUX page table support Patchwork
@ 2019-11-01 21:41 ` Patchwork
  2019-11-04 11:28 ` [igt-dev] [PATCH i-g-t 1/3] " Chris Wilson
  2019-11-04 11:30 ` Chris Wilson
  5 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2019-11-01 21:41 UTC (permalink / raw)
  To: Imre Deak; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/3] lib/rendercopy: Add AUX page table support
URL   : https://patchwork.freedesktop.org/series/68890/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7243 -> IGTPW_3644
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_3644:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@i915_selftest@live_gt_pm}:
    - {fi-icl-guc}:       NOTRUN -> [DMESG-FAIL][1] +2 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-icl-guc/igt@i915_selftest@live_gt_pm.html

  * igt@i915_selftest@live_hugepages:
    - {fi-icl-guc}:       NOTRUN -> [DMESG-WARN][2] +16 similar issues
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-icl-guc/igt@i915_selftest@live_hugepages.html

  
Known issues
------------

  Here are the changes found in IGTPW_3644 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_cpu_reloc@basic:
    - fi-icl-u3:          [PASS][3] -> [DMESG-WARN][4] ([fdo#107724]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7243/fi-icl-u3/igt@gem_cpu_reloc@basic.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-icl-u3/igt@gem_cpu_reloc@basic.html

  
#### Possible fixes ####

  * igt@gem_ctx_create@basic-files:
    - {fi-icl-u4}:        [INCOMPLETE][5] ([fdo#107713] / [fdo#109100]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7243/fi-icl-u4/igt@gem_ctx_create@basic-files.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-icl-u4/igt@gem_ctx_create@basic-files.html
    - {fi-icl-guc}:       [INCOMPLETE][7] ([fdo#107713] / [fdo#109100]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7243/fi-icl-guc/igt@gem_ctx_create@basic-files.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-icl-guc/igt@gem_ctx_create@basic-files.html

  * igt@gem_ctx_switch@legacy-render:
    - fi-apl-guc:         [INCOMPLETE][9] ([fdo#103927]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7243/fi-apl-guc/igt@gem_ctx_switch@legacy-render.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-apl-guc/igt@gem_ctx_switch@legacy-render.html

  * igt@gem_exec_create@basic:
    - {fi-icl-y}:         [INCOMPLETE][11] ([fdo#107713]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7243/fi-icl-y/igt@gem_exec_create@basic.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-icl-y/igt@gem_exec_create@basic.html

  * igt@gem_mmap@basic-small-bo:
    - fi-icl-u3:          [DMESG-WARN][13] ([fdo#107724]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7243/fi-icl-u3/igt@gem_mmap@basic-small-bo.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/fi-icl-u3/igt@gem_mmap@basic-small-bo.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100


Participating hosts (51 -> 42)
------------------------------

  Additional (1): fi-kbl-7500u 
  Missing    (10): fi-hsw-4200u fi-byt-squawks fi-icl-u2 fi-bwr-2160 fi-bsw-cyan fi-ctg-p8600 fi-gdg-551 fi-tgl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5258 -> IGTPW_3644

  CI-20190529: 20190529
  CI_DRM_7243: 1b175df1a7543eff5cd4dadc1589145064db5b12 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_3644: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/index.html
  IGT_5258: c8a88b614ac057b01809a17b9e87a197195b44ad @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_render_copy@yf-tiled-ccs-to-yf-tiled-ccs
+igt@gem_render_copy@yf-tiled-ccs-to-y-tiled-ccs
+igt@gem_render_copy@y-tiled-ccs-to-yf-tiled-ccs
+igt@gem_render_copy@y-tiled-ccs-to-y-tiled-ccs

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3644/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support
  2019-11-01 20:13 [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support Imre Deak
                   ` (3 preceding siblings ...)
  2019-11-01 21:41 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
@ 2019-11-04 11:28 ` Chris Wilson
  2019-11-04 14:07   ` Imre Deak
  2019-11-04 11:30 ` Chris Wilson
  5 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2019-11-04 11:28 UTC (permalink / raw)
  To: Imre Deak, igt-dev; +Cc: Brian Welty

Quoting Imre Deak (2019-11-01 20:13:09)
> On GEN12+ the AUX CCS surfaces required by the render and media
> compression must be specified by a 3 level page table directory, which
> translates the main surface graphics address to the AUX CCS surface
> graphics address. For this purpose add support for creating a GEM buffer
> to translate the linear surface address range to the linear AUX surface
> address range.
> 
> The buffers containing the main surface must be pinned down, since the
> directory table entry indices depend on the surface address, and they
> must be 64kB aligned. The page table can be relocated OTOH, so allow
> that and emit the required relocation entries.
> 
> Cc: Mika Kahola <mika.kahola@intel.com>
> Cc: Brian Welty <brian.welty@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  lib/igt_aux_pgtable.c | 381 ++++++++++++++++++++++++++++++++++++++++++
>  lib/igt_aux_pgtable.h |  21 +++

This is not igt, but an libdrm_intel wrapper.

Missing the autotools support.

>  lib/intel_reg.h       |   3 +
>  lib/meson.build       |   1 +
>  lib/rendercopy_gen9.c | 121 +++++++++++++-
>  5 files changed, 521 insertions(+), 6 deletions(-)
>  create mode 100644 lib/igt_aux_pgtable.c
>  create mode 100644 lib/igt_aux_pgtable.h
> 
> diff --git a/lib/igt_aux_pgtable.c b/lib/igt_aux_pgtable.c
> new file mode 100644
> index 00000000..aaa24cfd
> --- /dev/null
> +++ b/lib/igt_aux_pgtable.c
> @@ -0,0 +1,381 @@
> +#include <stdint.h>
> +#include <stdbool.h>
> +
> +#include "drmtest.h"
> +#include "igt_aux_pgtable.h"
> +#include "intel_bufmgr.h"
> +#include "intel_batchbuffer.h"
> +#include "ioctl_wrappers.h"
> +
> +#include "i915/gem_mman.h"
> +
> +#define BITS_PER_LONG          (sizeof(long) * 8)
> +#define BITMASK(e, s)          ((~0UL << (s)) & \
> +                                (~0UL >> (BITS_PER_LONG - 1 - (e))))
> +
> +#define ALIGN_DOWN(x, a)       ALIGN((x) - ((a) - 1), (a))
> +
> +#define CL_SIZE                        64
> +/*
> + * The size of a block on the CCS surface that is covered/pointed to by one
> + * L1 AUX pagetable entry. This size corresponds to the 1<<8 alignment of the
> + * pointers in the L1 entry.
> + */
> +#define CCS_BLOCK_SIZE         (4 * CL_SIZE)
> +/*
> + * 256 bytes per CCS block size *
> + * 8   bits per byte /
> + * 2   bits per surface CL *
> + * 64  bytes per surface CL
> + */
> +#define SURFACE_BLOCK_SIZE     (CCS_BLOCK_SIZE * 8 / 2 * CL_SIZE)
> +#define AUX_PGTABLE_VALID      1
> +#define AUX_PGTABLE_LEVELS     3
> +
> +#define ADDRESS_BITS           48
> +
> +#define max(a, b)              ((a) > (b) ? (a) : (b))
> +
> +struct pgtable_level_desc {
> +       int idx_shift;
> +       int idx_bits;
> +       int entry_ptr_shift;
> +       int table_size;
> +};
> +
> +struct pgtable_level_info {
> +       const struct pgtable_level_desc *desc;
> +       int table_count;
> +       int alloc_base;
> +       int alloc_ptr;
> +};
> +
> +struct pgtable {
> +       int levels;
> +       struct pgtable_level_info *level_info;
> +       int size;
> +       int max_align;
> +       drm_intel_bo *bo;
> +};
> +
> +static const struct pgtable_level_desc aux_pgtable_desc[AUX_PGTABLE_LEVELS] = {
> +       {
> +               .idx_shift = 16,
> +               .idx_bits = 8,
> +               .entry_ptr_shift = 8,
> +               .table_size = 8 * 1024,
> +       },
> +       {
> +               .idx_shift = 24,
> +               .idx_bits = 12,
> +               .entry_ptr_shift = 13,
> +               .table_size = 32 * 1024,
> +       },
> +       {
> +               .idx_shift = 36,
> +               .idx_bits = 12,
> +               .entry_ptr_shift = 15,
> +               .table_size = 32 * 1024,
> +       },
> +};
> +
> +static int
> +pgt_table_count(int address_bits,
> +               const struct igt_aux_pgtable_range *ranges, int range_count)
> +{
> +       uint64_t end;
> +       int count;
> +       int i;
> +
> +       count = 0;
> +       end = 0;
> +       for (i = 0; i < range_count; i++) {
> +               const struct igt_aux_pgtable_range *r = &ranges[i];
> +               uint64_t start;
> +
> +               /* We require ranges to be sorted. */
> +               igt_assert(i == 0 ||
> +                          r->surface_base >= ranges[i - 1].surface_base +
> +                                             ranges[i - 1].surface_size);
> +
> +               start = ALIGN_DOWN(r->surface_base, 1UL << address_bits);
> +               /* Avoid double counting for overlapping aligned ranges. */
> +               start = max(start, end);
> +
> +               end = ALIGN(r->surface_base + r->surface_size,
> +                           1UL << address_bits);
> +               igt_assert(end >= start);
> +
> +               count += (end - start) >> address_bits;
> +       }
> +
> +       return count;
> +}
> +
> +static void
> +pgt_calc_size(struct pgtable *pgt,
> +             const struct igt_aux_pgtable_range *ranges, int range_count)
> +{
> +       int level;
> +
> +       pgt->size = 0;
> +
> +       for (level = pgt->levels; level > 0; level--) {
> +               struct pgtable_level_info *li = &pgt->level_info[level - 1];
> +
> +               li->alloc_base = ALIGN(pgt->size, li->desc->table_size);
> +               li->alloc_ptr = li->alloc_base;
> +
> +               li->table_count = pgt_table_count(li->desc->idx_shift +
> +                                                 li->desc->idx_bits,
> +                                                 ranges, range_count);
> +
> +               pgt->size = li->alloc_base +
> +                           li->table_count * li->desc->table_size;
> +       }
> +}
> +
> +static uint64_t pgt_alloc_table(struct pgtable *pgt, int level)
> +{
> +       struct pgtable_level_info *li = &pgt->level_info[level - 1];
> +       uint64_t table;
> +
> +       table = li->alloc_ptr;
> +       li->alloc_ptr += li->desc->table_size;
> +
> +       igt_assert(li->alloc_ptr <=
> +                  li->alloc_base + li->table_count * li->desc->table_size);
> +
> +       return table;
> +}
> +
> +static int pgt_address_index(struct pgtable *pgt, int level, uint64_t address)
> +{
> +       const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
> +       uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1,
> +                               ld->idx_shift);
> +
> +       return (address & mask) >> ld->idx_shift;
> +}
> +
> +static uint64_t ptr_mask(struct pgtable *pgt, int level)
> +{
> +       const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
> +
> +       return BITMASK(ADDRESS_BITS - 1, ld->entry_ptr_shift);
> +}
> +
> +static uint64_t pgt_entry_ptr(struct pgtable *pgt, int level, uint64_t entry)
> +{
> +       return entry & ptr_mask(pgt, level);
> +}
> +
> +static uint64_t pgt_mkentry(struct pgtable *pgt, int level, uint64_t ptr,
> +                           uint64_t flags)
> +{
> +       igt_assert(!(ptr & ~ptr_mask(pgt, level)));
> +
> +       return ptr | flags;
> +}
> +
> +static uint64_t
> +pgt_get_table(struct pgtable *pgt, uint64_t parent_table,
> +             int level, uint64_t address, uint64_t flags)
> +{
> +       uint64_t *table_ptr = pgt->bo->virtual + parent_table;
> +       int entry_idx = pgt_address_index(pgt, level, address);
> +       uint64_t *entry_ptr;
> +
> +       entry_ptr = &table_ptr[entry_idx];
> +       if (!*entry_ptr) {
> +               uint64_t child_table = pgt_alloc_table(pgt, level - 1);
> +
> +               *entry_ptr = pgt_mkentry(pgt, level, child_table, flags);
> +
> +               drm_intel_bo_emit_reloc(pgt->bo,
> +                                       parent_table + entry_idx * sizeof(uint64_t),
> +                                       pgt->bo, *entry_ptr,
> +                                       I915_GEM_DOMAIN_INSTRUCTION, 0);

This is missing setting the correct value into the table, and so may be
skipped by relocation pass inside execbuf.

> +       }
> +
> +       return pgt_entry_ptr(pgt, level, *entry_ptr);
> +}
> +
> +static void
> +pgt_set_l1_entry(struct pgtable *pgt, uint64_t l1_table,
> +                uint64_t address, uint64_t ptr, uint64_t flags)
> +{
> +       uint64_t *l1_table_ptr;
> +       uint64_t *l1_entry_ptr;
> +
> +       l1_table_ptr = pgt->bo->virtual + l1_table;
> +       l1_entry_ptr = &l1_table_ptr[pgt_address_index(pgt, 1, address)];
> +       *l1_entry_ptr = pgt_mkentry(pgt, 1, ptr, flags);
> +}
> +
> +static uint64_t pgt_get_l1_flags(const struct igt_aux_pgtable_range *range)
> +{
> +       /*
> +        * The offset of .tile_mode isn't specifed by bspec, it's what Mesa
> +        * uses.
> +        */
> +       union {
> +               struct {
> +                       uint64_t        valid:1;
> +                       uint64_t        compression_mod:2;
> +                       uint64_t        lossy_compression:1;
> +                       uint64_t        pad:4;
> +                       uint64_t        addr:40;
> +                       uint64_t        pad2:4;
> +                       uint64_t        tile_mode:2;
> +                       uint64_t        depth:3;
> +                       uint64_t        ycr:1;
> +                       uint64_t        format:6;
> +               } e;
> +               uint64_t l;
> +       } entry = {
> +               .e = {
> +                       .valid = 1,
> +                       .tile_mode = range->tiling == I915_TILING_Y ? 1 : 0,
> +                       .depth = 5,             /* 32bpp */
> +                       .format = 0xA,          /* B8G8R8A8_UNORM */
> +               }
> +       };
> +
> +       /*
> +        * TODO: Clarify if Yf is supported and if we need to differentiate
> +        *       Ys and Yf.
> +        *       Add support for more formats.
> +        */
> +       igt_assert(range->tiling == I915_TILING_Y ||
> +                  range->tiling == I915_TILING_Yf ||
> +                  range->tiling == I915_TILING_Ys);
> +
> +       igt_assert(range->bpp == 32);
> +
> +       return entry.l;
> +}
> +
> +static uint64_t pgt_get_lx_flags(void)
> +{
> +       union {
> +               struct {
> +                       uint64_t        valid:1;
> +                       uint64_t        addr:47;
> +                       uint64_t        pad:16;
> +               } e;
> +               uint64_t l;
> +       } entry = {
> +               .e = {
> +                       .valid = 1,
> +               }
> +       };
> +
> +       return entry.l;
> +}
> +
> +static void
> +pgt_populate_entries_for_range(struct pgtable *pgt,
> +                              const struct igt_aux_pgtable_range *range,
> +                              drm_intel_bo *bo,
> +                              uint64_t top_table)
> +{
> +       uint64_t surface_addr = range->surface_base;
> +       uint64_t surface_end = surface_addr + range->surface_size;
> +       uint64_t aux_addr = range->aux_base;
> +       uint64_t l1_flags = pgt_get_l1_flags(range);
> +       uint64_t lx_flags = pgt_get_lx_flags();
> +
> +       pgt->bo = bo;
> +
> +       for (; surface_addr < surface_end;
> +            surface_addr += SURFACE_BLOCK_SIZE, aux_addr += CCS_BLOCK_SIZE) {
> +               uint64_t table = top_table;
> +               int level;
> +
> +               for (level = pgt->levels; level > 1; level--)
> +                       table = pgt_get_table(pgt, table, level,
> +                                             surface_addr, lx_flags);
> +
> +               pgt_set_l1_entry(pgt, table, surface_addr, aux_addr, l1_flags);
> +       }
> +}
> +
> +static void pgt_populate_entries(struct pgtable *pgt,
> +                                const struct igt_aux_pgtable_range *ranges,
> +                                int range_count,
> +                                drm_intel_bo *gem_bo)
> +{
> +       uint64_t top_table;
> +       int i;
> +
> +       igt_assert(gem_bo->size >= pgt->size);
> +       memset(gem_bo->virtual, 0, pgt->size);
> +
> +       top_table = pgt_alloc_table(pgt, pgt->levels);
> +       /* Top level table must be at offset 0. */
> +       igt_assert(top_table == 0);
> +
> +       for (i = 0; i < range_count; i++)
> +               pgt_populate_entries_for_range(pgt, &ranges[i], gem_bo,
> +                                              top_table);
> +}
> +
> +static struct pgtable *
> +pgt_create(const struct pgtable_level_desc *level_descs, int levels,
> +          const struct igt_aux_pgtable_range *ranges, int range_count)
> +{
> +       struct pgtable *pgt;
> +       int level;
> +
> +       pgt = calloc(1, sizeof(*pgt));
> +       igt_assert(pgt);
> +
> +       pgt->levels = levels;
> +
> +       pgt->level_info = calloc(levels, sizeof(*pgt->level_info));
> +       igt_assert(pgt->level_info);
> +
> +       for (level = 0; level < pgt->levels; level++) {
> +               struct pgtable_level_info *li = &pgt->level_info[level];
> +
> +               li->desc = &level_descs[level];
> +               if (li->desc->table_size > pgt->max_align)
> +                       pgt->max_align = li->desc->table_size;
> +       }
> +
> +       pgt_calc_size(pgt, ranges, range_count);
> +
> +       return pgt;
> +}
> +
> +static void pgt_destroy(struct pgtable *pgt)
> +{
> +       free(pgt->level_info);
> +       free(pgt);
> +}
> +
> +drm_intel_bo *
> +igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
> +                      const struct igt_aux_pgtable_range *ranges,
> +                      int range_count)
> +{
> +       struct pgtable *pgt;
> +       drm_intel_bo *gem_bo;
> +
> +       pgt = pgt_create(aux_pgtable_desc, AUX_PGTABLE_LEVELS,
> +                        ranges, range_count);
> +
> +       gem_bo = drm_intel_bo_alloc_for_render(bufmgr,
> +                                              "aux pgt",
> +                                              pgt->size, pgt->max_align);
> +       igt_assert(gem_bo);
> +
> +       igt_assert(drm_intel_bo_map(gem_bo, true) == 0);
> +       pgt_populate_entries(pgt, ranges, range_count, gem_bo);
> +       igt_assert(drm_intel_bo_unmap(gem_bo) == 0);
> +
> +       pgt_destroy(pgt);
> +
> +       return gem_bo;
> +}
> diff --git a/lib/igt_aux_pgtable.h b/lib/igt_aux_pgtable.h
> new file mode 100644
> index 00000000..64c6b21f
> --- /dev/null
> +++ b/lib/igt_aux_pgtable.h
> @@ -0,0 +1,21 @@
> +#ifndef _IGT_AUX_PGTABLE_H_
> +#define _IGT_AUX_PGTABLE_H_
> +
> +#include "intel_bufmgr.h"
> +
> +struct igt_aux_pgtable;
> +
> +struct igt_aux_pgtable_range {
> +       uint64_t surface_base;
> +       uint64_t surface_size;
> +       uint64_t aux_base;
> +       uint32_t tiling;
> +       int bpp;
> +};
> +
> +drm_intel_bo *
> +igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
> +                      const struct igt_aux_pgtable_range *ranges,
> +                      int range_count);
> +
> +#endif
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index 069440cb..e7263ce1 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -673,6 +673,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define RING_VALID          0x00000001
>  #define RING_INVALID        0x00000000
>  
> +#define GEN12_GFX_AUX_TABLE_BASE_ADDR  0x4200
> +
>  
>  
>  /* BitBlt Instructions
> @@ -2570,6 +2572,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  
>  #define MI_LOAD_SCAN_LINES_INCL                (0x12<<23)
>  #define MI_LOAD_REGISTER_IMM           ((0x22 << 23) | 1)
> +#define MI_LOAD_REGISTER_MEM           ((0x29 << 23) | (4 - 2))
>  
>  /* Flush */
>  #define MI_FLUSH                       (0x04<<23)
> diff --git a/lib/meson.build b/lib/meson.build
> index 221ae28c..2135ddf3 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -46,6 +46,7 @@ lib_sources = [
>         'sw_sync.c',
>         'intel_reg_map.c',
>         'intel_iosf.c',
> +       'igt_aux_pgtable.c',
>         'igt_kms.c',
>         'igt_fb.c',
>         'igt_core.c',
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index 694eb3cf..31e38c2b 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -15,6 +15,7 @@
>  #include <i915_drm.h>
>  
>  #include "drmtest.h"
> +#include "igt_aux_pgtable.h"
>  #include "intel_bufmgr.h"
>  #include "intel_batchbuffer.h"
>  #include "intel_io.h"
> @@ -972,19 +973,113 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
>  
>  #define BATCH_STATE_SPLIT 2048
>  
> +static void
> +gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state)
> +{
> +       if (!state)
> +               return;
> +
> +       OUT_BATCH(MI_LOAD_REGISTER_MEM);
> +       OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
> +       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state);
> +       OUT_BATCH(MI_NOOP);
> +
> +       OUT_BATCH(MI_LOAD_REGISTER_MEM);
> +       OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
> +       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state + 4);
> +       OUT_BATCH(MI_NOOP);

Extra MI_NOOP.

> +}
> +
> +static int add_aux_pgtable_range(const struct igt_buf *buf,
> +                                struct igt_aux_pgtable_range *range,
> +                                uint64_t *pin_offset)
> +{
> +       if (!buf->aux.stride)
> +               return 0;
> +
> +       drm_intel_bo_set_softpin_offset(buf->bo, *pin_offset);
> +       igt_assert(buf->bo->offset64 == *pin_offset);
> +
> +       range->surface_base = *pin_offset;
> +       range->surface_size = buf->size;
> +       range->aux_base = *pin_offset + buf->aux.offset;
> +       range->tiling = buf->tiling;
> +       range->bpp = buf->bpp;
> +
> +       /* The GEN12+ compressed main surface must be 64kB aligned.  */
> +       *pin_offset = ALIGN(*pin_offset + buf->bo->size, 0x10000);
> +
> +       return 1;
> +}
> +
> +static drm_intel_bo *
> +gen12_create_aux_pgtable_bo(drm_intel_bufmgr *bufmgr,
> +                           const struct igt_buf *dst_buf,
> +                           const struct igt_buf *src_buf)
> +{
> +       struct igt_aux_pgtable_range ranges[2];
> +       int range_count;
> +       uint64_t pin_offset;
> +       drm_intel_bo *gem_bo;
> +
> +       range_count = 0;
> +       pin_offset = 0;
> +
> +       range_count += add_aux_pgtable_range(dst_buf,
> +                                            &ranges[range_count], &pin_offset);
> +       range_count += add_aux_pgtable_range(src_buf,
> +                                            &ranges[range_count], &pin_offset);
> +
> +       if (!range_count)
> +               return NULL;
> +
> +       gem_bo = igt_aux_pgtable_create(bufmgr, ranges, range_count);
> +       igt_assert(gem_bo);
> +
> +       return gem_bo;
> +}
> +
> +static uint32_t
> +gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
> +                              drm_intel_bo *aux_pgtable_bo)
> +{
> +       uint64_t *pgtable_ptr;
> +       uint32_t pgtable_ptr_offset;
> +       int ret;
> +
> +       if (!aux_pgtable_bo)
> +               return 0;
> +
> +       pgtable_ptr = intel_batchbuffer_subdata_alloc(batch,
> +                                                     sizeof(*pgtable_ptr),
> +                                                     sizeof(*pgtable_ptr));
> +       pgtable_ptr_offset = intel_batchbuffer_subdata_offset(batch,
> +                                                             pgtable_ptr);
> +
> +       ret = drm_intel_bo_emit_reloc(batch->bo, pgtable_ptr_offset,
> +                                     aux_pgtable_bo, 0,
> +                                     I915_GEM_DOMAIN_RENDER, 0);
> +       assert(ret == 0);
> +
> +       return pgtable_ptr_offset;
> +}
> +
>  static
>  void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
>                           drm_intel_context *context,
>                           const struct igt_buf *src, unsigned src_x,
>                           unsigned src_y, unsigned width, unsigned height,
>                           const struct igt_buf *dst, unsigned dst_x,
> -                         unsigned dst_y, const uint32_t ps_kernel[][4],
> +                         unsigned dst_y,
> +                         drm_intel_bo *aux_pgtable_bo,
> +                         const uint32_t ps_kernel[][4],
>                           uint32_t ps_kernel_size)
>  {
>         uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
>         uint32_t scissor_state;
>         uint32_t vertex_buffer;
>         uint32_t batch_end;
> +       uint32_t aux_pgtable_state;
>  
>         igt_assert(src->bpp == dst->bpp);
>         intel_batchbuffer_flush_with_context(batch, context);
> @@ -1007,6 +1102,10 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
>         viewport.cc_state = gen6_create_cc_viewport(batch);
>         viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
>         scissor_state = gen6_create_scissor_rect(batch);
> +
> +       aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
> +                                                          aux_pgtable_bo);
> +
>         /* TODO: theree is other state which isn't setup */
>  
>         assert(batch->ptr < &batch->buffer[4095]);
> @@ -1018,6 +1117,8 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
>         OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
>                                 GEN9_PIPELINE_SELECTION_MASK);
>  
> +       gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
> +
>         gen8_emit_sip(batch);
>  
>         gen7_emit_push_constants(batch);
> @@ -1092,8 +1193,8 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
>  
>  {
>         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> -                         width, height, dst, dst_x, dst_y, ps_kernel_gen9,
> -                         sizeof(ps_kernel_gen9));
> +                         width, height, dst, dst_x, dst_y, NULL,
> +                         ps_kernel_gen9, sizeof(ps_kernel_gen9));
>  }
>  
>  void gen11_render_copyfunc(struct intel_batchbuffer *batch,
> @@ -1104,8 +1205,8 @@ void gen11_render_copyfunc(struct intel_batchbuffer *batch,
>  
>  {
>         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> -                         width, height, dst, dst_x, dst_y, ps_kernel_gen11,
> -                         sizeof(ps_kernel_gen11));
> +                         width, height, dst, dst_x, dst_y, NULL,
> +                         ps_kernel_gen11, sizeof(ps_kernel_gen11));
>  }
>  
>  void gen12_render_copyfunc(struct intel_batchbuffer *batch,
> @@ -1115,7 +1216,15 @@ void gen12_render_copyfunc(struct intel_batchbuffer *batch,
>                            const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
>  
>  {
> +       drm_intel_bo *aux_pgtable_bo;
> +
> +       aux_pgtable_bo = gen12_create_aux_pgtable_bo(batch->bufmgr, dst, src);
> +
>         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> -                         width, height, dst, dst_x, dst_y, gen12_render_copy,
> +                         width, height, dst, dst_x, dst_y,
> +                         aux_pgtable_bo,
> +                         gen12_render_copy,
>                           sizeof(gen12_render_copy));
> +
> +       drm_intel_bo_unreference(aux_pgtable_bo);
>  }
> -- 
> 2.17.1
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support
  2019-11-01 20:13 [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support Imre Deak
                   ` (4 preceding siblings ...)
  2019-11-04 11:28 ` [igt-dev] [PATCH i-g-t 1/3] " Chris Wilson
@ 2019-11-04 11:30 ` Chris Wilson
  2019-11-04 14:47   ` Imre Deak
  5 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2019-11-04 11:30 UTC (permalink / raw)
  To: Imre Deak, igt-dev; +Cc: Brian Welty

Quoting Imre Deak (2019-11-01 20:13:09)
> +static drm_intel_bo *
> +gen12_create_aux_pgtable_bo(drm_intel_bufmgr *bufmgr,
> +                           const struct igt_buf *dst_buf,
> +                           const struct igt_buf *src_buf)
> +{
> +       struct igt_aux_pgtable_range ranges[2];
> +       int range_count;
> +       uint64_t pin_offset;
> +       drm_intel_bo *gem_bo;
> +
> +       range_count = 0;
> +       pin_offset = 0;
> +
> +       range_count += add_aux_pgtable_range(dst_buf,
> +                                            &ranges[range_count], &pin_offset);
> +       range_count += add_aux_pgtable_range(src_buf,
> +                                            &ranges[range_count], &pin_offset);
> +
> +       if (!range_count)
> +               return NULL;

So every batch uses the same locations for their pair of surfaces;
causing a complete eviction stall between each batch. That's a nasty
side-effect that will hide flushing bugs between batches.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support
  2019-11-04 11:28 ` [igt-dev] [PATCH i-g-t 1/3] " Chris Wilson
@ 2019-11-04 14:07   ` Imre Deak
  0 siblings, 0 replies; 9+ messages in thread
From: Imre Deak @ 2019-11-04 14:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev, Brian Welty

On Mon, Nov 04, 2019 at 11:28:11AM +0000, Chris Wilson wrote:
> Quoting Imre Deak (2019-11-01 20:13:09)
> > On GEN12+ the AUX CCS surfaces required by the render and media
> > compression must be specified by a 3 level page table directory, which
> > translates the main surface graphics address to the AUX CCS surface
> > graphics address. For this purpose add support for creating a GEM buffer
> > to translate the linear surface address range to the linear AUX surface
> > address range.
> > 
> > The buffers containing the main surface must be pinned down, since the
> > directory table entry indices depend on the surface address, and they
> > must be 64kB aligned. The page table can be relocated OTOH, so allow
> > that and emit the required relocation entries.
> > 
> > Cc: Mika Kahola <mika.kahola@intel.com>
> > Cc: Brian Welty <brian.welty@intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  lib/igt_aux_pgtable.c | 381 ++++++++++++++++++++++++++++++++++++++++++
> >  lib/igt_aux_pgtable.h |  21 +++
> 
> This is not igt, but an libdrm_intel wrapper.

Right, can rename it to lib/intel_aux_pgtable.

> Missing the autotools support.

Yep missed it, will add.

> >  lib/intel_reg.h       |   3 +
> >  lib/meson.build       |   1 +
> >  lib/rendercopy_gen9.c | 121 +++++++++++++-
> >  5 files changed, 521 insertions(+), 6 deletions(-)
> >  create mode 100644 lib/igt_aux_pgtable.c
> >  create mode 100644 lib/igt_aux_pgtable.h
> > 
> > diff --git a/lib/igt_aux_pgtable.c b/lib/igt_aux_pgtable.c
> > new file mode 100644
> > index 00000000..aaa24cfd
> > --- /dev/null
> > +++ b/lib/igt_aux_pgtable.c
> > @@ -0,0 +1,381 @@
> > +#include <stdint.h>
> > +#include <stdbool.h>
> > +
> > +#include "drmtest.h"
> > +#include "igt_aux_pgtable.h"
> > +#include "intel_bufmgr.h"
> > +#include "intel_batchbuffer.h"
> > +#include "ioctl_wrappers.h"
> > +
> > +#include "i915/gem_mman.h"
> > +
> > +#define BITS_PER_LONG          (sizeof(long) * 8)
> > +#define BITMASK(e, s)          ((~0UL << (s)) & \
> > +                                (~0UL >> (BITS_PER_LONG - 1 - (e))))
> > +
> > +#define ALIGN_DOWN(x, a)       ALIGN((x) - ((a) - 1), (a))
> > +
> > +#define CL_SIZE                        64
> > +/*
> > + * The size of a block on the CCS surface that is covered/pointed to by one
> > + * L1 AUX pagetable entry. This size corresponds to the 1<<8 alignment of the
> > + * pointers in the L1 entry.
> > + */
> > +#define CCS_BLOCK_SIZE         (4 * CL_SIZE)
> > +/*
> > + * 256 bytes per CCS block size *
> > + * 8   bits per byte /
> > + * 2   bits per surface CL *
> > + * 64  bytes per surface CL
> > + */
> > +#define SURFACE_BLOCK_SIZE     (CCS_BLOCK_SIZE * 8 / 2 * CL_SIZE)
> > +#define AUX_PGTABLE_VALID      1
> > +#define AUX_PGTABLE_LEVELS     3
> > +
> > +#define ADDRESS_BITS           48
> > +
> > +#define max(a, b)              ((a) > (b) ? (a) : (b))
> > +
> > +struct pgtable_level_desc {
> > +       int idx_shift;
> > +       int idx_bits;
> > +       int entry_ptr_shift;
> > +       int table_size;
> > +};
> > +
> > +struct pgtable_level_info {
> > +       const struct pgtable_level_desc *desc;
> > +       int table_count;
> > +       int alloc_base;
> > +       int alloc_ptr;
> > +};
> > +
> > +struct pgtable {
> > +       int levels;
> > +       struct pgtable_level_info *level_info;
> > +       int size;
> > +       int max_align;
> > +       drm_intel_bo *bo;
> > +};
> > +
> > +static const struct pgtable_level_desc aux_pgtable_desc[AUX_PGTABLE_LEVELS] = {
> > +       {
> > +               .idx_shift = 16,
> > +               .idx_bits = 8,
> > +               .entry_ptr_shift = 8,
> > +               .table_size = 8 * 1024,
> > +       },
> > +       {
> > +               .idx_shift = 24,
> > +               .idx_bits = 12,
> > +               .entry_ptr_shift = 13,
> > +               .table_size = 32 * 1024,
> > +       },
> > +       {
> > +               .idx_shift = 36,
> > +               .idx_bits = 12,
> > +               .entry_ptr_shift = 15,
> > +               .table_size = 32 * 1024,
> > +       },
> > +};
> > +
> > +static int
> > +pgt_table_count(int address_bits,
> > +               const struct igt_aux_pgtable_range *ranges, int range_count)
> > +{
> > +       uint64_t end;
> > +       int count;
> > +       int i;
> > +
> > +       count = 0;
> > +       end = 0;
> > +       for (i = 0; i < range_count; i++) {
> > +               const struct igt_aux_pgtable_range *r = &ranges[i];
> > +               uint64_t start;
> > +
> > +               /* We require ranges to be sorted. */
> > +               igt_assert(i == 0 ||
> > +                          r->surface_base >= ranges[i - 1].surface_base +
> > +                                             ranges[i - 1].surface_size);
> > +
> > +               start = ALIGN_DOWN(r->surface_base, 1UL << address_bits);
> > +               /* Avoid double counting for overlapping aligned ranges. */
> > +               start = max(start, end);
> > +
> > +               end = ALIGN(r->surface_base + r->surface_size,
> > +                           1UL << address_bits);
> > +               igt_assert(end >= start);
> > +
> > +               count += (end - start) >> address_bits;
> > +       }
> > +
> > +       return count;
> > +}
> > +
> > +static void
> > +pgt_calc_size(struct pgtable *pgt,
> > +             const struct igt_aux_pgtable_range *ranges, int range_count)
> > +{
> > +       int level;
> > +
> > +       pgt->size = 0;
> > +
> > +       for (level = pgt->levels; level > 0; level--) {
> > +               struct pgtable_level_info *li = &pgt->level_info[level - 1];
> > +
> > +               li->alloc_base = ALIGN(pgt->size, li->desc->table_size);
> > +               li->alloc_ptr = li->alloc_base;
> > +
> > +               li->table_count = pgt_table_count(li->desc->idx_shift +
> > +                                                 li->desc->idx_bits,
> > +                                                 ranges, range_count);
> > +
> > +               pgt->size = li->alloc_base +
> > +                           li->table_count * li->desc->table_size;
> > +       }
> > +}
> > +
> > +static uint64_t pgt_alloc_table(struct pgtable *pgt, int level)
> > +{
> > +       struct pgtable_level_info *li = &pgt->level_info[level - 1];
> > +       uint64_t table;
> > +
> > +       table = li->alloc_ptr;
> > +       li->alloc_ptr += li->desc->table_size;
> > +
> > +       igt_assert(li->alloc_ptr <=
> > +                  li->alloc_base + li->table_count * li->desc->table_size);
> > +
> > +       return table;
> > +}
> > +
> > +static int pgt_address_index(struct pgtable *pgt, int level, uint64_t address)
> > +{
> > +       const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
> > +       uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1,
> > +                               ld->idx_shift);
> > +
> > +       return (address & mask) >> ld->idx_shift;
> > +}
> > +
> > +static uint64_t ptr_mask(struct pgtable *pgt, int level)
> > +{
> > +       const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
> > +
> > +       return BITMASK(ADDRESS_BITS - 1, ld->entry_ptr_shift);
> > +}
> > +
> > +static uint64_t pgt_entry_ptr(struct pgtable *pgt, int level, uint64_t entry)
> > +{
> > +       return entry & ptr_mask(pgt, level);
> > +}
> > +
> > +static uint64_t pgt_mkentry(struct pgtable *pgt, int level, uint64_t ptr,
> > +                           uint64_t flags)
> > +{
> > +       igt_assert(!(ptr & ~ptr_mask(pgt, level)));
> > +
> > +       return ptr | flags;
> > +}
> > +
> > +static uint64_t
> > +pgt_get_table(struct pgtable *pgt, uint64_t parent_table,
> > +             int level, uint64_t address, uint64_t flags)
> > +{
> > +       uint64_t *table_ptr = pgt->bo->virtual + parent_table;
> > +       int entry_idx = pgt_address_index(pgt, level, address);
> > +       uint64_t *entry_ptr;
> > +
> > +       entry_ptr = &table_ptr[entry_idx];
> > +       if (!*entry_ptr) {
> > +               uint64_t child_table = pgt_alloc_table(pgt, level - 1);
> > +
> > +               *entry_ptr = pgt_mkentry(pgt, level, child_table, flags);
> > +
> > +               drm_intel_bo_emit_reloc(pgt->bo,
> > +                                       parent_table + entry_idx * sizeof(uint64_t),
> > +                                       pgt->bo, *entry_ptr,
> > +                                       I915_GEM_DOMAIN_INSTRUCTION, 0);
> 
> This is missing setting the correct value into the table, and so may be
> skipped by relocation pass inside execbuf.

Arg right, missed that pgt->bo->offset64 can be non-zero for an already
bound object and the kernel reloc optimization. Will fix it, thanks.

> 
> > +       }
> > +
> > +       return pgt_entry_ptr(pgt, level, *entry_ptr);
> > +}
> > +
> > +static void
> > +pgt_set_l1_entry(struct pgtable *pgt, uint64_t l1_table,
> > +                uint64_t address, uint64_t ptr, uint64_t flags)
> > +{
> > +       uint64_t *l1_table_ptr;
> > +       uint64_t *l1_entry_ptr;
> > +
> > +       l1_table_ptr = pgt->bo->virtual + l1_table;
> > +       l1_entry_ptr = &l1_table_ptr[pgt_address_index(pgt, 1, address)];
> > +       *l1_entry_ptr = pgt_mkentry(pgt, 1, ptr, flags);
> > +}
> > +
> > +static uint64_t pgt_get_l1_flags(const struct igt_aux_pgtable_range *range)
> > +{
> > +       /*
> > +        * The offset of .tile_mode isn't specifed by bspec, it's what Mesa
> > +        * uses.
> > +        */
> > +       union {
> > +               struct {
> > +                       uint64_t        valid:1;
> > +                       uint64_t        compression_mod:2;
> > +                       uint64_t        lossy_compression:1;
> > +                       uint64_t        pad:4;
> > +                       uint64_t        addr:40;
> > +                       uint64_t        pad2:4;
> > +                       uint64_t        tile_mode:2;
> > +                       uint64_t        depth:3;
> > +                       uint64_t        ycr:1;
> > +                       uint64_t        format:6;
> > +               } e;
> > +               uint64_t l;
> > +       } entry = {
> > +               .e = {
> > +                       .valid = 1,
> > +                       .tile_mode = range->tiling == I915_TILING_Y ? 1 : 0,
> > +                       .depth = 5,             /* 32bpp */
> > +                       .format = 0xA,          /* B8G8R8A8_UNORM */
> > +               }
> > +       };
> > +
> > +       /*
> > +        * TODO: Clarify if Yf is supported and if we need to differentiate
> > +        *       Ys and Yf.
> > +        *       Add support for more formats.
> > +        */
> > +       igt_assert(range->tiling == I915_TILING_Y ||
> > +                  range->tiling == I915_TILING_Yf ||
> > +                  range->tiling == I915_TILING_Ys);
> > +
> > +       igt_assert(range->bpp == 32);
> > +
> > +       return entry.l;
> > +}
> > +
> > +static uint64_t pgt_get_lx_flags(void)
> > +{
> > +       union {
> > +               struct {
> > +                       uint64_t        valid:1;
> > +                       uint64_t        addr:47;
> > +                       uint64_t        pad:16;
> > +               } e;
> > +               uint64_t l;
> > +       } entry = {
> > +               .e = {
> > +                       .valid = 1,
> > +               }
> > +       };
> > +
> > +       return entry.l;
> > +}
> > +
> > +static void
> > +pgt_populate_entries_for_range(struct pgtable *pgt,
> > +                              const struct igt_aux_pgtable_range *range,
> > +                              drm_intel_bo *bo,
> > +                              uint64_t top_table)
> > +{
> > +       uint64_t surface_addr = range->surface_base;
> > +       uint64_t surface_end = surface_addr + range->surface_size;
> > +       uint64_t aux_addr = range->aux_base;
> > +       uint64_t l1_flags = pgt_get_l1_flags(range);
> > +       uint64_t lx_flags = pgt_get_lx_flags();
> > +
> > +       pgt->bo = bo;
> > +
> > +       for (; surface_addr < surface_end;
> > +            surface_addr += SURFACE_BLOCK_SIZE, aux_addr += CCS_BLOCK_SIZE) {
> > +               uint64_t table = top_table;
> > +               int level;
> > +
> > +               for (level = pgt->levels; level > 1; level--)
> > +                       table = pgt_get_table(pgt, table, level,
> > +                                             surface_addr, lx_flags);
> > +
> > +               pgt_set_l1_entry(pgt, table, surface_addr, aux_addr, l1_flags);
> > +       }
> > +}
> > +
> > +static void pgt_populate_entries(struct pgtable *pgt,
> > +                                const struct igt_aux_pgtable_range *ranges,
> > +                                int range_count,
> > +                                drm_intel_bo *gem_bo)
> > +{
> > +       uint64_t top_table;
> > +       int i;
> > +
> > +       igt_assert(gem_bo->size >= pgt->size);
> > +       memset(gem_bo->virtual, 0, pgt->size);
> > +
> > +       top_table = pgt_alloc_table(pgt, pgt->levels);
> > +       /* Top level table must be at offset 0. */
> > +       igt_assert(top_table == 0);
> > +
> > +       for (i = 0; i < range_count; i++)
> > +               pgt_populate_entries_for_range(pgt, &ranges[i], gem_bo,
> > +                                              top_table);
> > +}
> > +
> > +static struct pgtable *
> > +pgt_create(const struct pgtable_level_desc *level_descs, int levels,
> > +          const struct igt_aux_pgtable_range *ranges, int range_count)
> > +{
> > +       struct pgtable *pgt;
> > +       int level;
> > +
> > +       pgt = calloc(1, sizeof(*pgt));
> > +       igt_assert(pgt);
> > +
> > +       pgt->levels = levels;
> > +
> > +       pgt->level_info = calloc(levels, sizeof(*pgt->level_info));
> > +       igt_assert(pgt->level_info);
> > +
> > +       for (level = 0; level < pgt->levels; level++) {
> > +               struct pgtable_level_info *li = &pgt->level_info[level];
> > +
> > +               li->desc = &level_descs[level];
> > +               if (li->desc->table_size > pgt->max_align)
> > +                       pgt->max_align = li->desc->table_size;
> > +       }
> > +
> > +       pgt_calc_size(pgt, ranges, range_count);
> > +
> > +       return pgt;
> > +}
> > +
> > +static void pgt_destroy(struct pgtable *pgt)
> > +{
> > +       free(pgt->level_info);
> > +       free(pgt);
> > +}
> > +
> > +drm_intel_bo *
> > +igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
> > +                      const struct igt_aux_pgtable_range *ranges,
> > +                      int range_count)
> > +{
> > +       struct pgtable *pgt;
> > +       drm_intel_bo *gem_bo;
> > +
> > +       pgt = pgt_create(aux_pgtable_desc, AUX_PGTABLE_LEVELS,
> > +                        ranges, range_count);
> > +
> > +       gem_bo = drm_intel_bo_alloc_for_render(bufmgr,
> > +                                              "aux pgt",
> > +                                              pgt->size, pgt->max_align);
> > +       igt_assert(gem_bo);
> > +
> > +       igt_assert(drm_intel_bo_map(gem_bo, true) == 0);
> > +       pgt_populate_entries(pgt, ranges, range_count, gem_bo);
> > +       igt_assert(drm_intel_bo_unmap(gem_bo) == 0);
> > +
> > +       pgt_destroy(pgt);
> > +
> > +       return gem_bo;
> > +}
> > diff --git a/lib/igt_aux_pgtable.h b/lib/igt_aux_pgtable.h
> > new file mode 100644
> > index 00000000..64c6b21f
> > --- /dev/null
> > +++ b/lib/igt_aux_pgtable.h
> > @@ -0,0 +1,21 @@
> > +#ifndef _IGT_AUX_PGTABLE_H_
> > +#define _IGT_AUX_PGTABLE_H_
> > +
> > +#include "intel_bufmgr.h"
> > +
> > +struct igt_aux_pgtable;
> > +
> > +struct igt_aux_pgtable_range {
> > +       uint64_t surface_base;
> > +       uint64_t surface_size;
> > +       uint64_t aux_base;
> > +       uint32_t tiling;
> > +       int bpp;
> > +};
> > +
> > +drm_intel_bo *
> > +igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
> > +                      const struct igt_aux_pgtable_range *ranges,
> > +                      int range_count);
> > +
> > +#endif
> > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > index 069440cb..e7263ce1 100644
> > --- a/lib/intel_reg.h
> > +++ b/lib/intel_reg.h
> > @@ -673,6 +673,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  #define RING_VALID          0x00000001
> >  #define RING_INVALID        0x00000000
> >  
> > +#define GEN12_GFX_AUX_TABLE_BASE_ADDR  0x4200
> > +
> >  
> >  
> >  /* BitBlt Instructions
> > @@ -2570,6 +2572,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  
> >  #define MI_LOAD_SCAN_LINES_INCL                (0x12<<23)
> >  #define MI_LOAD_REGISTER_IMM           ((0x22 << 23) | 1)
> > +#define MI_LOAD_REGISTER_MEM           ((0x29 << 23) | (4 - 2))
> >  
> >  /* Flush */
> >  #define MI_FLUSH                       (0x04<<23)
> > diff --git a/lib/meson.build b/lib/meson.build
> > index 221ae28c..2135ddf3 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -46,6 +46,7 @@ lib_sources = [
> >         'sw_sync.c',
> >         'intel_reg_map.c',
> >         'intel_iosf.c',
> > +       'igt_aux_pgtable.c',
> >         'igt_kms.c',
> >         'igt_fb.c',
> >         'igt_core.c',
> > diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> > index 694eb3cf..31e38c2b 100644
> > --- a/lib/rendercopy_gen9.c
> > +++ b/lib/rendercopy_gen9.c
> > @@ -15,6 +15,7 @@
> >  #include <i915_drm.h>
> >  
> >  #include "drmtest.h"
> > +#include "igt_aux_pgtable.h"
> >  #include "intel_bufmgr.h"
> >  #include "intel_batchbuffer.h"
> >  #include "intel_io.h"
> > @@ -972,19 +973,113 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
> >  
> >  #define BATCH_STATE_SPLIT 2048
> >  
> > +static void
> > +gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state)
> > +{
> > +       if (!state)
> > +               return;
> > +
> > +       OUT_BATCH(MI_LOAD_REGISTER_MEM);
> > +       OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
> > +       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state);
> > +       OUT_BATCH(MI_NOOP);
> > +
> > +       OUT_BATCH(MI_LOAD_REGISTER_MEM);
> > +       OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
> > +       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state + 4);
> > +       OUT_BATCH(MI_NOOP);
> 
> Extra MI_NOOP.

Ok, will remove both, not required by the spec. This is a remnant when I
tried to use MI_LOAD_REGISTER_IMM here, but that too requires the noop
_before_ the instruction.

> 
> > +}
> > +
> > +static int add_aux_pgtable_range(const struct igt_buf *buf,
> > +                                struct igt_aux_pgtable_range *range,
> > +                                uint64_t *pin_offset)
> > +{
> > +       if (!buf->aux.stride)
> > +               return 0;
> > +
> > +       drm_intel_bo_set_softpin_offset(buf->bo, *pin_offset);
> > +       igt_assert(buf->bo->offset64 == *pin_offset);
> > +
> > +       range->surface_base = *pin_offset;
> > +       range->surface_size = buf->size;
> > +       range->aux_base = *pin_offset + buf->aux.offset;
> > +       range->tiling = buf->tiling;
> > +       range->bpp = buf->bpp;
> > +
> > +       /* The GEN12+ compressed main surface must be 64kB aligned.  */
> > +       *pin_offset = ALIGN(*pin_offset + buf->bo->size, 0x10000);
> > +
> > +       return 1;
> > +}
> > +
> > +static drm_intel_bo *
> > +gen12_create_aux_pgtable_bo(drm_intel_bufmgr *bufmgr,
> > +                           const struct igt_buf *dst_buf,
> > +                           const struct igt_buf *src_buf)
> > +{
> > +       struct igt_aux_pgtable_range ranges[2];
> > +       int range_count;
> > +       uint64_t pin_offset;
> > +       drm_intel_bo *gem_bo;
> > +
> > +       range_count = 0;
> > +       pin_offset = 0;
> > +
> > +       range_count += add_aux_pgtable_range(dst_buf,
> > +                                            &ranges[range_count], &pin_offset);
> > +       range_count += add_aux_pgtable_range(src_buf,
> > +                                            &ranges[range_count], &pin_offset);
> > +
> > +       if (!range_count)
> > +               return NULL;
> > +
> > +       gem_bo = igt_aux_pgtable_create(bufmgr, ranges, range_count);
> > +       igt_assert(gem_bo);
> > +
> > +       return gem_bo;
> > +}
> > +
> > +static uint32_t
> > +gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
> > +                              drm_intel_bo *aux_pgtable_bo)
> > +{
> > +       uint64_t *pgtable_ptr;
> > +       uint32_t pgtable_ptr_offset;
> > +       int ret;
> > +
> > +       if (!aux_pgtable_bo)
> > +               return 0;
> > +
> > +       pgtable_ptr = intel_batchbuffer_subdata_alloc(batch,
> > +                                                     sizeof(*pgtable_ptr),
> > +                                                     sizeof(*pgtable_ptr));
> > +       pgtable_ptr_offset = intel_batchbuffer_subdata_offset(batch,
> > +                                                             pgtable_ptr);
> > +
> > +       ret = drm_intel_bo_emit_reloc(batch->bo, pgtable_ptr_offset,
> > +                                     aux_pgtable_bo, 0,
> > +                                     I915_GEM_DOMAIN_RENDER, 0);
> > +       assert(ret == 0);
> > +
> > +       return pgtable_ptr_offset;
> > +}
> > +
> >  static
> >  void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >                           drm_intel_context *context,
> >                           const struct igt_buf *src, unsigned src_x,
> >                           unsigned src_y, unsigned width, unsigned height,
> >                           const struct igt_buf *dst, unsigned dst_x,
> > -                         unsigned dst_y, const uint32_t ps_kernel[][4],
> > +                         unsigned dst_y,
> > +                         drm_intel_bo *aux_pgtable_bo,
> > +                         const uint32_t ps_kernel[][4],
> >                           uint32_t ps_kernel_size)
> >  {
> >         uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
> >         uint32_t scissor_state;
> >         uint32_t vertex_buffer;
> >         uint32_t batch_end;
> > +       uint32_t aux_pgtable_state;
> >  
> >         igt_assert(src->bpp == dst->bpp);
> >         intel_batchbuffer_flush_with_context(batch, context);
> > @@ -1007,6 +1102,10 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >         viewport.cc_state = gen6_create_cc_viewport(batch);
> >         viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
> >         scissor_state = gen6_create_scissor_rect(batch);
> > +
> > +       aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
> > +                                                          aux_pgtable_bo);
> > +
> >         /* TODO: theree is other state which isn't setup */
> >  
> >         assert(batch->ptr < &batch->buffer[4095]);
> > @@ -1018,6 +1117,8 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >         OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
> >                                 GEN9_PIPELINE_SELECTION_MASK);
> >  
> > +       gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
> > +
> >         gen8_emit_sip(batch);
> >  
> >         gen7_emit_push_constants(batch);
> > @@ -1092,8 +1193,8 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >  
> >  {
> >         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> > -                         width, height, dst, dst_x, dst_y, ps_kernel_gen9,
> > -                         sizeof(ps_kernel_gen9));
> > +                         width, height, dst, dst_x, dst_y, NULL,
> > +                         ps_kernel_gen9, sizeof(ps_kernel_gen9));
> >  }
> >  
> >  void gen11_render_copyfunc(struct intel_batchbuffer *batch,
> > @@ -1104,8 +1205,8 @@ void gen11_render_copyfunc(struct intel_batchbuffer *batch,
> >  
> >  {
> >         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> > -                         width, height, dst, dst_x, dst_y, ps_kernel_gen11,
> > -                         sizeof(ps_kernel_gen11));
> > +                         width, height, dst, dst_x, dst_y, NULL,
> > +                         ps_kernel_gen11, sizeof(ps_kernel_gen11));
> >  }
> >  
> >  void gen12_render_copyfunc(struct intel_batchbuffer *batch,
> > @@ -1115,7 +1216,15 @@ void gen12_render_copyfunc(struct intel_batchbuffer *batch,
> >                            const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
> >  
> >  {
> > +       drm_intel_bo *aux_pgtable_bo;
> > +
> > +       aux_pgtable_bo = gen12_create_aux_pgtable_bo(batch->bufmgr, dst, src);
> > +
> >         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> > -                         width, height, dst, dst_x, dst_y, gen12_render_copy,
> > +                         width, height, dst, dst_x, dst_y,
> > +                         aux_pgtable_bo,
> > +                         gen12_render_copy,
> >                           sizeof(gen12_render_copy));
> > +
> > +       drm_intel_bo_unreference(aux_pgtable_bo);
> >  }
> > -- 
> > 2.17.1
> > 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support
  2019-11-04 11:30 ` Chris Wilson
@ 2019-11-04 14:47   ` Imre Deak
  0 siblings, 0 replies; 9+ messages in thread
From: Imre Deak @ 2019-11-04 14:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev, Brian Welty

On Mon, Nov 04, 2019 at 11:30:43AM +0000, Chris Wilson wrote:
> Quoting Imre Deak (2019-11-01 20:13:09)
> > +static drm_intel_bo *
> > +gen12_create_aux_pgtable_bo(drm_intel_bufmgr *bufmgr,
> > +                           const struct igt_buf *dst_buf,
> > +                           const struct igt_buf *src_buf)
> > +{
> > +       struct igt_aux_pgtable_range ranges[2];
> > +       int range_count;
> > +       uint64_t pin_offset;
> > +       drm_intel_bo *gem_bo;
> > +
> > +       range_count = 0;
> > +       pin_offset = 0;
> > +
> > +       range_count += add_aux_pgtable_range(dst_buf,
> > +                                            &ranges[range_count], &pin_offset);
> > +       range_count += add_aux_pgtable_range(src_buf,
> > +                                            &ranges[range_count], &pin_offset);
> > +
> > +       if (!range_count)
> > +               return NULL;
> 
> So every batch uses the same locations for their pair of surfaces;
> causing a complete eviction stall between each batch. That's a nasty
> side-effect that will hide flushing bugs between batches.

Ok. As discussed on IRC I'll randomize the offset for either object with
a 0 bo->offset64.

> -Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-11-04 14:49 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-01 20:13 [igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support Imre Deak
2019-11-01 20:13 ` [igt-dev] [PATCH i-g-t 2/3] tests/gem_render_copy: Adjust the tgl+ compressed buf alignments Imre Deak
2019-11-01 20:13 ` [igt-dev] [PATCH i-g-t 3/3] tests/gem_render_copy: Add compressed src to compressed dst subtests Imre Deak
2019-11-01 21:19 ` [igt-dev] ✗ GitLab.Pipeline: warning for series starting with [i-g-t,1/3] lib/rendercopy: Add AUX page table support Patchwork
2019-11-01 21:41 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2019-11-04 11:28 ` [igt-dev] [PATCH i-g-t 1/3] " Chris Wilson
2019-11-04 14:07   ` Imre Deak
2019-11-04 11:30 ` Chris Wilson
2019-11-04 14:47   ` Imre Deak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.