All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag.
@ 2015-06-23 12:21 Michel Thierry
  2015-06-23 12:21 ` [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: Michel Thierry @ 2015-06-23 12:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Intruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

Provide a flag to set when the 4GB limit is not necessary in a given bo.
48-bit range will only be used when explicitly requested.

Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 include/drm/i915_drm.h    |  3 ++-
 intel/intel_bufmgr.c      | 12 ++++++++++++
 intel/intel_bufmgr.h      |  2 ++
 intel/intel_bufmgr_gem.c  | 48 +++++++++++++++++++++++++++++++++++++++++++----
 intel/intel_bufmgr_priv.h | 11 +++++++++++
 5 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..d6fb99d 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48BADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48BADDRESS<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..c4ddecb 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -188,6 +188,18 @@ drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count)
 	return bo_array[0]->bufmgr->check_aperture_space(bo_array, count);
 }
 
+void drm_intel_bo_set_supports_48baddress(drm_intel_bo *bo)
+{
+	if (bo->bufmgr->bo_set_supports_48baddress)
+		bo->bufmgr->bo_set_supports_48baddress(bo);
+}
+
+void drm_intel_bo_clear_supports_48baddress(drm_intel_bo *bo)
+{
+	if (bo->bufmgr->bo_clear_supports_48baddress)
+		bo->bufmgr->bo_clear_supports_48baddress(bo);
+}
+
 int
 drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name)
 {
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 285919e..b51aff8 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -137,6 +137,8 @@ void drm_intel_bo_wait_rendering(drm_intel_bo *bo);
 
 void drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug);
 void drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr);
+void drm_intel_bo_set_supports_48baddress(drm_intel_bo *bo);
+void drm_intel_bo_clear_supports_48baddress(drm_intel_bo *bo);
 int drm_intel_bo_exec(drm_intel_bo *bo, int used,
 		      struct drm_clip_rect *cliprects, int num_cliprects, int DR4);
 int drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used,
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 60c06fc..54e839e 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -143,6 +143,7 @@ typedef struct _drm_intel_bufmgr_gem {
 } drm_intel_bufmgr_gem;
 
 #define DRM_INTEL_RELOC_FENCE (1<<0)
+#define DRM_INTEL_RELOC_SUPPORTS_48BADDRESS (2<<0)
 
 typedef struct _drm_intel_reloc_target_info {
 	drm_intel_bo *bo;
@@ -240,6 +241,14 @@ struct _drm_intel_bo_gem {
 	bool is_userptr;
 
 	/**
+	 * Boolean of whether this buffer can be in the whole 48-bit address.
+	 *
+	 * By default, buffers will be keep in a 32-bit range, unless this
+	 * flag is explicitly set.
+	 */
+	bool supports_48baddress;
+
+	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
 	 * Used to avoid costly tree walking in
@@ -471,13 +480,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
 }
 
 static void
-drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
+			       int supports_48baddress)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
 	int index;
 
 	if (bo_gem->validate_index != -1) {
+		if (supports_48baddress) {
+			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
+				EXEC_OBJECT_SUPPORTS_48BADDRESS;
+		}
 		if (need_fence)
 			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
 				EXEC_OBJECT_NEEDS_FENCE;
@@ -516,6 +530,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 		bufmgr_gem->exec2_objects[index].flags |=
 			EXEC_OBJECT_NEEDS_FENCE;
 	}
+	if (supports_48baddress) {
+		bufmgr_gem->exec2_objects[index].flags |=
+			EXEC_OBJECT_SUPPORTS_48BADDRESS;
+	}
 	bufmgr_gem->exec_count++;
 }
 
@@ -1931,11 +1949,27 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 	else
 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
 
+	if (target_bo_gem->supports_48baddress)
+		bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
+			DRM_INTEL_RELOC_SUPPORTS_48BADDRESS;
+
 	bo_gem->reloc_count++;
 
 	return 0;
 }
 
+static void drm_intel_gem_bo_set_supports_48baddress(drm_intel_bo *bo)
+{
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	bo_gem->supports_48baddress = 1;
+}
+
+static void drm_intel_gem_bo_clear_supports_48baddress(drm_intel_bo *bo)
+{
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	bo_gem->supports_48baddress = 0;
+}
+
 static int
 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
@@ -2049,7 +2083,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 
 	for (i = 0; i < bo_gem->reloc_count; i++) {
 		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
-		int need_fence;
+		int need_fence, supports_48baddr;
 
 		if (target_bo == bo)
 			continue;
@@ -2062,8 +2096,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 		need_fence = (bo_gem->reloc_target_info[i].flags &
 			      DRM_INTEL_RELOC_FENCE);
 
+		supports_48baddr = (bo_gem->reloc_target_info[i].flags &
+				    DRM_INTEL_RELOC_SUPPORTS_48BADDRESS);
+
 		/* Add the target to the validate list */
-		drm_intel_add_validate_buffer2(target_bo, need_fence);
+		drm_intel_add_validate_buffer2(target_bo, need_fence,
+					       supports_48baddr);
 	}
 }
 
@@ -2508,7 +2546,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
 	/* Add the batch buffer to the validation list.  There are no relocations
 	 * pointing to it.
 	 */
-	drm_intel_add_validate_buffer2(bo, 0);
+	drm_intel_add_validate_buffer2(bo, 0, 0);
 
 	memclear(execbuf);
 	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
@@ -3657,6 +3695,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
 	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
 	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
+	bufmgr_gem->bufmgr.bo_set_supports_48baddress = drm_intel_gem_bo_set_supports_48baddress;
+	bufmgr_gem->bufmgr.bo_clear_supports_48baddress = drm_intel_gem_bo_clear_supports_48baddress;
 	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
 	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
 	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 59ebd18..02fe4c8 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -152,6 +152,17 @@ struct _drm_intel_bufmgr {
 	void (*destroy) (drm_intel_bufmgr *bufmgr);
 
 	/**
+	 * Set/Clear 48-bit address support flag in a given bo.
+	 *
+	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
+	 * General State Heap (GSH) or Intructions State Heap (ISH) must
+	 * be in a 32-bit range. 48-bit range will only be used when explicitly
+	 * requested.
+	 */
+	void (*bo_set_supports_48baddress) (drm_intel_bo *bo);
+	void (*bo_clear_supports_48baddress) (drm_intel_bo *bo);
+
+	/**
 	 * Add relocation entry in reloc_buf, which will be updated with the
 	 * target buffer's real offset on on command submission.
 	 *
-- 
2.4.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range
  2015-06-23 12:21 [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Michel Thierry
@ 2015-06-23 12:21 ` Michel Thierry
  2015-06-24  3:51   ` Ben Widawsky
  2015-06-23 12:21 ` [PATCH igt 1/2] lib: Update intel_require_memory to handle +4GB cases Michel Thierry
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 21+ messages in thread
From: Michel Thierry @ 2015-06-23 12:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: mesa-dev

Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Intruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

Set provided bo flag when the 4GB limit is not necessary, to be able to use
the full address space.

Cc: mesa-dev@lists.freedesktop.org
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 src/mesa/drivers/dri/i965/gen8_misc_state.c   | 6 +++---
 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 7 +++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c
index b20038e..26531d0 100644
--- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
@@ -41,17 +41,17 @@ void gen8_upload_state_base_address(struct brw_context *brw)
    OUT_BATCH(0);
    OUT_BATCH(mocs_wb << 16);
    /* Surface state base address: */
-   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+   OUT_RELOC64_32BWA(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
                mocs_wb << 4 | 1);
    /* Dynamic state base address: */
-   OUT_RELOC64(brw->batch.bo,
+   OUT_RELOC64_32BWA(brw->batch.bo,
                I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
                mocs_wb << 4 | 1);
    /* Indirect object base address: MEDIA_OBJECT data */
    OUT_BATCH(mocs_wb << 4 | 1);
    OUT_BATCH(0);
    /* Instruction base address: shader kernels (incl. SIP) */
-   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+   OUT_RELOC64_32BWA(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
                mocs_wb << 4 | 1);
 
    /* General state buffer size */
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 7bdd836..5aa741e 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -177,6 +177,13 @@ intel_batchbuffer_advance(struct brw_context *brw)
 
 /* Handle 48-bit address relocations for Gen8+ */
 #define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \
+   drm_intel_bo_set_supports_48baddress(buf); \
+   intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta);	\
+} while (0)
+
+/* Handle 48-bit address relocations for Gen8+, ask for 32-bit address */
+#define OUT_RELOC64_32BWA(buf, read_domains, write_domain, delta) do { \
+   drm_intel_bo_clear_supports_48baddress(buf); \
    intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta);	\
 } while (0)
 
-- 
2.4.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH igt 1/2] lib: Update intel_require_memory to handle +4GB cases
  2015-06-23 12:21 [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Michel Thierry
  2015-06-23 12:21 ` [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
@ 2015-06-23 12:21 ` Michel Thierry
  2015-06-24 10:20   ` Chris Wilson
  2015-06-24 13:40   ` [PATCH igt v2] " Michel Thierry
  2015-06-23 12:21 ` [PATCH igt 2/2] tests/gem_ppgtt: Check Wa32bitOffsets workarounds Michel Thierry
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 21+ messages in thread
From: Michel Thierry @ 2015-06-23 12:21 UTC (permalink / raw)
  To: intel-gfx

Changed size from u32 to u64 to support +4GB.
48-bit PPGTT test cases may need extra memory available.

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 lib/igt_aux.h  | 2 +-
 lib/intel_os.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index b2dc267..922ae5b 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -86,7 +86,7 @@ uint64_t intel_get_avail_ram_mb(void);
 uint64_t intel_get_total_ram_mb(void);
 uint64_t intel_get_total_swap_mb(void);
 
-void intel_require_memory(uint32_t count, uint32_t size, unsigned mode);
+void intel_require_memory(uint32_t count, uint64_t size, unsigned mode);
 #define CHECK_RAM 0x1
 #define CHECK_SWAP 0x2
 
diff --git a/lib/intel_os.c b/lib/intel_os.c
index 3321a8d..2650788 100644
--- a/lib/intel_os.c
+++ b/lib/intel_os.c
@@ -215,7 +215,7 @@ intel_get_total_swap_mb(void)
  * assumption that any test that needs to check for memory requirements is a
  * thrashing test unsuitable for slow simulated systems.
  */
-void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
+void intel_require_memory(uint32_t count, uint64_t size, unsigned mode)
 {
 /* rough estimate of how many bytes the kernel requires to track each object */
 #define KERNEL_BO_OVERHEAD 512
@@ -225,8 +225,8 @@ void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
 	required *= size + KERNEL_BO_OVERHEAD;
 	required = ALIGN(required, 4096);
 
-	igt_debug("Checking %u surfaces of size %u bytes (total %llu) against %s%s\n",
-		  count, size, (long long)required,
+	igt_debug("Checking %u surfaces of size %llu bytes (total %llu) against %s%s\n",
+		  count, (long long)size, (long long)required,
 		  mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "",
 		  mode & CHECK_SWAP ? " + swap": "");
 
-- 
2.4.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH igt 2/2] tests/gem_ppgtt: Check Wa32bitOffsets workarounds
  2015-06-23 12:21 [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Michel Thierry
  2015-06-23 12:21 ` [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
  2015-06-23 12:21 ` [PATCH igt 1/2] lib: Update intel_require_memory to handle +4GB cases Michel Thierry
@ 2015-06-23 12:21 ` Michel Thierry
  2015-06-23 13:10   ` Chris Wilson
  2015-06-26 15:00 ` [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Dave Gordon
  2015-07-01 15:28 ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Michel Thierry
  4 siblings, 1 reply; 21+ messages in thread
From: Michel Thierry @ 2015-06-23 12:21 UTC (permalink / raw)
  To: intel-gfx

Test EXEC_OBJECT_SUPPORTS_48BADDRESS flag to use +32-bit segment.
Driver will try to use lower PDPs of each PPGTT for the objects
requiring Wa32bitGeneralStateOffset or Wa32bitInstructionBaseOffset.

v2: Add flink cases, (suggested by Daniel Vetter).
v3: 48-bit support flab moved to exec_object.

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 tests/gem_ppgtt.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 147 insertions(+), 6 deletions(-)

diff --git a/tests/gem_ppgtt.c b/tests/gem_ppgtt.c
index d1e484a..5b4dd63 100644
--- a/tests/gem_ppgtt.c
+++ b/tests/gem_ppgtt.c
@@ -47,8 +47,17 @@
 #define STRIDE (WIDTH*4)
 #define HEIGHT 512
 #define SIZE (HEIGHT*STRIDE)
-
-static bool uses_full_ppgtt(int fd)
+#define K (1024l)
+#define M (1024l * K)
+#define G (1024l * M)
+#define LOCAL_EXEC_OBJECT_SUPPORTS_48BADDRESS (1<<3)
+/*
+ * 0 - No PPGTT
+ * 1 - Aliasing PPGTT
+ * 2 - Full PPGTT (32b)
+ * 3 - Full PPGTT (48b)
+ */
+static bool __uses_full_ppgtt(int fd, int min)
 {
 	struct drm_i915_getparam gp;
 	int val = 0;
@@ -61,7 +70,17 @@ static bool uses_full_ppgtt(int fd)
 		return 0;
 
 	errno = 0;
-	return val > 1;
+	return val >= min;
+}
+
+static bool uses_full_ppgtt(int fd)
+{
+	return __uses_full_ppgtt(fd, 2);
+}
+
+static bool uses_48b_full_ppgtt(int fd)
+{
+	return __uses_full_ppgtt(fd, 3);
 }
 
 static drm_intel_bo *create_bo(drm_intel_bufmgr *bufmgr,
@@ -216,7 +235,7 @@ static void surfaces_check(dri_bo **bo, int count, uint32_t expected)
 	}
 }
 
-static uint64_t exec_and_get_offset(int fd, uint32_t batch)
+static uint64_t exec_and_get_offset(int fd, uint32_t batch, bool needs_32b_addr)
 {
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_exec_object2 exec[1];
@@ -226,6 +245,7 @@ static uint64_t exec_and_get_offset(int fd, uint32_t batch)
 
 	memset(exec, 0, sizeof(exec));
 	exec[0].handle = batch;
+	exec[0].flags = (needs_32b_addr) ? 0 : LOCAL_EXEC_OBJECT_SUPPORTS_48BADDRESS;
 
 	memset(&execbuf, 0, sizeof(execbuf));
 	execbuf.buffers_ptr = (uintptr_t)exec;
@@ -252,7 +272,7 @@ static void flink_and_close(void)
 	fd2 = drm_open_any();
 
 	flinked_bo = gem_open(fd2, name);
-	offset = exec_and_get_offset(fd2, flinked_bo);
+	offset = exec_and_get_offset(fd2, flinked_bo, 0);
 	gem_sync(fd2, flinked_bo);
 	gem_close(fd2, flinked_bo);
 
@@ -260,7 +280,7 @@ static void flink_and_close(void)
 	 * same size should get the same offset
 	 */
 	new_bo = gem_create(fd2, 4096);
-	offset_new = exec_and_get_offset(fd2, new_bo);
+	offset_new = exec_and_get_offset(fd2, new_bo, 0);
 	gem_close(fd2, new_bo);
 
 	igt_assert_eq(offset, offset_new);
@@ -270,6 +290,124 @@ static void flink_and_close(void)
 	close(fd2);
 }
 
+static bool is_32b(uint64_t offset)
+{
+	return (offset < (1ULL << 32));
+}
+
+static void reusebo_and_compare_offsets(uint32_t fd,
+					uint64_t buf_size)
+{
+	uint32_t bo;
+	uint64_t offset, offset2;
+
+	bo = gem_create(fd, buf_size);
+	/* support 48b addresses */
+	offset = exec_and_get_offset(fd, bo, 0);
+	gem_sync(fd, bo);
+
+	/* require 32b address */
+	offset2 = exec_and_get_offset(fd, bo, 1);
+	gem_sync(fd, bo);
+
+	igt_assert(is_32b(offset2));
+	igt_assert_neq(offset, offset2);
+	gem_close(fd, bo);
+}
+
+static void flinkbo_and_compare_offsets(uint32_t fd, uint32_t fd2,
+					uint64_t buf_size)
+{
+	uint32_t bo, flinked_bo, name;
+	uint64_t offset, offset2;
+
+	bo = gem_create(fd, buf_size);
+	name = gem_flink(fd, bo);
+
+	/* support 48b addresses */
+	offset = exec_and_get_offset(fd, bo, 0);
+	gem_sync(fd, bo);
+
+	/* require 32b address */
+	flinked_bo = gem_open(fd2, name);
+	offset2 = exec_and_get_offset(fd2, flinked_bo, 1);
+	gem_sync(fd2, flinked_bo);
+
+	igt_assert(is_32b(offset2));
+	igt_assert_neq(offset, offset2);
+	gem_close(fd2, flinked_bo);
+	gem_close(fd, bo);
+}
+
+static void createbo_and_compare_offsets(uint32_t fd, uint32_t fd2,
+					 uint64_t buf_size,
+					 bool needs_32b, bool needs_32b2)
+{
+	uint32_t bo, bo2;
+	uint64_t offset, offset2;
+
+	bo = gem_create(fd, buf_size);
+	offset = exec_and_get_offset(fd, bo, needs_32b);
+	gem_sync(fd, bo);
+
+	bo2 = gem_create(fd2, buf_size);
+	offset2 = exec_and_get_offset(fd2, bo2, needs_32b2);
+	gem_sync(fd2, bo2);
+
+	if (needs_32b == needs_32b2)
+		igt_assert_eq(offset, offset2);
+	else
+		igt_assert_neq(offset, offset2);
+
+
+	/* lower PDPs of each PPGTT are reserved for the objects
+	 * requiring this workaround
+	 */
+	if (needs_32b)
+		igt_assert(is_32b(offset));
+
+	if (needs_32b2)
+		igt_assert(is_32b(offset2));
+
+	gem_close(fd, bo);
+	gem_close(fd2, bo2);
+}
+
+static void wa_32b_offset_test(void)
+{
+	uint32_t fd, fd2, gb;
+
+	fd = drm_open_any();
+	igt_require(uses_48b_full_ppgtt(fd));
+
+	intel_require_memory(1, 4*G, CHECK_RAM);
+
+	fd2 = drm_open_any();
+
+	for (gb = 1; gb < 4; gb++) {
+		/* same bo test */
+		reusebo_and_compare_offsets(fd, gb*G);
+
+
+		/* allow full addr range */
+		createbo_and_compare_offsets(fd, fd2, gb*G, 0, 0);
+
+		/* limit 32b addr range */
+		createbo_and_compare_offsets(fd, fd2, gb*G, 1, 1);
+
+		/* mixed */
+		createbo_and_compare_offsets(fd, fd2, gb*G, 0, 1);
+		createbo_and_compare_offsets(fd, fd2, gb*G, 1, 0);
+
+		/* flink obj */
+		flinkbo_and_compare_offsets(fd, fd2, gb*G);
+	}
+
+	close(fd);
+	close(fd2);
+}
+
+
 #define N_CHILD 8
 int main(int argc, char **argv)
 {
@@ -302,5 +440,8 @@ int main(int argc, char **argv)
 	igt_subtest("flink-and-close-vma-leak")
 		flink_and_close();
 
+	igt_subtest("wa-32b-offset-test")
+		wa_32b_offset_test();
+
 	igt_exit();
 }
-- 
2.4.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH igt 2/2] tests/gem_ppgtt: Check Wa32bitOffsets workarounds
  2015-06-23 12:21 ` [PATCH igt 2/2] tests/gem_ppgtt: Check Wa32bitOffsets workarounds Michel Thierry
@ 2015-06-23 13:10   ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2015-06-23 13:10 UTC (permalink / raw)
  To: Michel Thierry; +Cc: intel-gfx

On Tue, Jun 23, 2015 at 01:21:29PM +0100, Michel Thierry wrote:
> +static void reusebo_and_compare_offsets(uint32_t fd,
> +					uint64_t buf_size)
> +{
> +	uint32_t bo;
> +	uint64_t offset, offset2;
> +
> +	bo = gem_create(fd, buf_size);
> +	/* support 48b addresses */
> +	offset = exec_and_get_offset(fd, bo, 0);
> +	gem_sync(fd, bo);

What are the sync for?

> +	/* require 32b address */
> +	offset2 = exec_and_get_offset(fd, bo, 1);
> +	gem_sync(fd, bo);
> +
> +	igt_assert(is_32b(offset2));
> +	igt_assert_neq(offset, offset2);

Nothing was preventing the first allocation from being a is_32b()
afaict. At least you should have asserted that the top-down allocation
placed it outside of the 32b zone to begin with.

Also there is a requirement that 32b allocations are wholly inside the
32b zone.

> +	gem_close(fd, bo);
> +}
> +
> +static void flinkbo_and_compare_offsets(uint32_t fd, uint32_t fd2,
> +					uint64_t buf_size)
> +
> +static void createbo_and_compare_offsets(uint32_t fd, uint32_t fd2,
> +					 uint64_t buf_size,
> +					 bool needs_32b, bool needs_32b2)
> +{
> +	uint32_t bo, bo2;
> +	uint64_t offset, offset2;
> +
> +	bo = gem_create(fd, buf_size);
> +	offset = exec_and_get_offset(fd, bo, needs_32b);
> +	gem_sync(fd, bo);
> +
> +	bo2 = gem_create(fd2, buf_size);
> +	offset2 = exec_and_get_offset(fd2, bo2, needs_32b2);
> +	gem_sync(fd2, bo2);
> +
> +	if (needs_32b == needs_32b2)
> +		igt_assert_eq(offset, offset2);
> +	else
> +		igt_assert_neq(offset, offset2);

Again there is a large number of assumptions about the internal
behaviour that I think can (a) be made explicity, or (b) be eliminated
entirely.

But first I think these are poor tests, since the two ctx are supposed to
be independent, making assertions between them is wrong.

What you need to focus on testing is that an object executed without the
full-48b flag is moved into the 32b zone no matter it's starting
position - or an error is generated (e.g. a 4G+ object, or 4G+ alignment).
We don't want that flag to imply anything else about the internal
implementation.

(Note this can be speed up on !llc if we have pad-to-size.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range
  2015-06-23 12:21 ` [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
@ 2015-06-24  3:51   ` Ben Widawsky
  2015-06-25 13:10     ` [Mesa-dev] " Michel Thierry
  0 siblings, 1 reply; 21+ messages in thread
From: Ben Widawsky @ 2015-06-24  3:51 UTC (permalink / raw)
  To: Michel Thierry; +Cc: mesa-dev, intel-gfx

Hi. Feel free to Cc me on patches of this nature. I am far behind on mesa-dev,
and no longer read intel-gfx. I'm probably one of the sensible people to look at
this...

On Tue, Jun 23, 2015 at 01:21:27PM +0100, Michel Thierry wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
> 
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Intruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State Offset
> are limited to 32-bits.

I don't think GSH, or ISH are well known terms that have every appeared
anywhere. I'd just keep the bit after the final comma (...because ...)

> 
> Set provided bo flag when the 4GB limit is not necessary, to be able to use
> the full address space.

I'm glad you got around to this. We'd been putting it off for a long time.

> 
> Cc: mesa-dev@lists.freedesktop.org
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  src/mesa/drivers/dri/i965/gen8_misc_state.c   | 6 +++---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.h | 7 +++++++
>  2 files changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c
> index b20038e..26531d0 100644
> --- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
> @@ -41,17 +41,17 @@ void gen8_upload_state_base_address(struct brw_context *brw)
>     OUT_BATCH(0);
>     OUT_BATCH(mocs_wb << 16);
>     /* Surface state base address: */
> -   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> +   OUT_RELOC64_32BWA(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>                 mocs_wb << 4 | 1);
>     /* Dynamic state base address: */
> -   OUT_RELOC64(brw->batch.bo,
> +   OUT_RELOC64_32BWA(brw->batch.bo,
>                 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
>                 mocs_wb << 4 | 1);
>     /* Indirect object base address: MEDIA_OBJECT data */
>     OUT_BATCH(mocs_wb << 4 | 1);
>     OUT_BATCH(0);
>     /* Instruction base address: shader kernels (incl. SIP) */
> -   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> +   OUT_RELOC64_32BWA(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>                 mocs_wb << 4 | 1);
>  
>     /* General state buffer size */
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
> index 7bdd836..5aa741e 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
> @@ -177,6 +177,13 @@ intel_batchbuffer_advance(struct brw_context *brw)
>  
>  /* Handle 48-bit address relocations for Gen8+ */
>  #define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \
> +   drm_intel_bo_set_supports_48baddress(buf); \
> +   intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta);	\
> +} while (0)
> +
> +/* Handle 48-bit address relocations for Gen8+, ask for 32-bit address */
> +#define OUT_RELOC64_32BWA(buf, read_domains, write_domain, delta) do { \
> +   drm_intel_bo_clear_supports_48baddress(buf); \
>     intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta);	\
>  } while (0)
>  

First and least bikesheddy, you need to bump the required libdrm in the
configure.ac to support this new libdrm function (maybe you did, but I don't see
it on mesa-dev).

More bikesheddy, and forgive me here because I haven't looked at any of the
kernel interfaces or libdrm patches (you can Cc those to mesa-dev if they're
relevant fwiw).

Presumably at the end of the day it's drm_intel_bo_emit_reloc which needs to
know about these limitations. Unfortunately we don't have a flags field there.
The implementation here seems like a somewhat cumbersome workaround for that (it
looks like the context execbuf which is pretty crappy - yes, I know who the
author was). Have you already discussed adding a new emit_reloc? I suppose if
people are opposed to a new emit reloc, the only I'd like to see different is
have the functions which need the workaround just call OUT_RELOC, instead of
OUT_RELOC64 (put a comment in the call sites), and make OUT_RELOC call the
drm_intel_bo_clear_supports_48baddress() (which is obviously a nop on pre-gen8
platforms). The OUT_RELOC64 case should be left alone - we shouldn't need to
tell libdrm that I want a 64bit relocation, and it can actually be 64b...

I suspect not many other mesa devs will have an opinion here, but I'm flexible
if they disagree.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH igt 1/2] lib: Update intel_require_memory to handle +4GB cases
  2015-06-23 12:21 ` [PATCH igt 1/2] lib: Update intel_require_memory to handle +4GB cases Michel Thierry
@ 2015-06-24 10:20   ` Chris Wilson
  2015-06-24 13:40   ` [PATCH igt v2] " Michel Thierry
  1 sibling, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2015-06-24 10:20 UTC (permalink / raw)
  To: Michel Thierry; +Cc: intel-gfx

On Tue, Jun 23, 2015 at 01:21:28PM +0100, Michel Thierry wrote:
> Changed size from u32 to u64 to support +4GB.
> 48-bit PPGTT test cases may need extra memory available.
> 
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  lib/igt_aux.h  | 2 +-
>  lib/intel_os.c | 6 +++---
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/igt_aux.h b/lib/igt_aux.h
> index b2dc267..922ae5b 100644
> --- a/lib/igt_aux.h
> +++ b/lib/igt_aux.h
> @@ -86,7 +86,7 @@ uint64_t intel_get_avail_ram_mb(void);
>  uint64_t intel_get_total_ram_mb(void);
>  uint64_t intel_get_total_swap_mb(void);
>  
> -void intel_require_memory(uint32_t count, uint32_t size, unsigned mode);
> +void intel_require_memory(uint32_t count, uint64_t size, unsigned mode);
>  #define CHECK_RAM 0x1
>  #define CHECK_SWAP 0x2
>  
> diff --git a/lib/intel_os.c b/lib/intel_os.c
> index 3321a8d..2650788 100644
> --- a/lib/intel_os.c
> +++ b/lib/intel_os.c
> @@ -215,7 +215,7 @@ intel_get_total_swap_mb(void)
>   * assumption that any test that needs to check for memory requirements is a
>   * thrashing test unsuitable for slow simulated systems.
>   */
> -void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
> +void intel_require_memory(uint32_t count, uint64_t size, unsigned mode)
>  {
>  /* rough estimate of how many bytes the kernel requires to track each object */
>  #define KERNEL_BO_OVERHEAD 512
> @@ -225,8 +225,8 @@ void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
>  	required *= size + KERNEL_BO_OVERHEAD;
>  	required = ALIGN(required, 4096);
>  
> -	igt_debug("Checking %u surfaces of size %u bytes (total %llu) against %s%s\n",
> -		  count, size, (long long)required,
> +	igt_debug("Checking %u surfaces of size %llu bytes (total %llu) against %s%s\n",

I think this is probably a good time to start putting thousand
separators into the output "%'llu" (hoping that we have gcc everywhere).

This would also need

diff --git a/lib/igt_core.c b/lib/igt_core.c
index 1367863..8ac1f33 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -55,6 +55,7 @@
 #include <time.h>
 #include <ctype.h>
 #include <limits.h>
+#include <locale.h>
 
 #include "drmtest.h"
 #include "intel_chipset.h"
@@ -523,6 +524,9 @@ static int common_init(int *argc, char **argv,
        int ret = 0;
        char *env = getenv("IGT_LOG_LEVEL");
 
+       if (isatty(STDOUT_FILENO))
+               setlocale(LC_ALL, "");
+
        if (env) {
                if (strcmp(env, "debug") == 0)
                        igt_log_level = IGT_LOG_DEBUG;
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH igt v2] lib: Update intel_require_memory to handle +4GB cases
  2015-06-23 12:21 ` [PATCH igt 1/2] lib: Update intel_require_memory to handle +4GB cases Michel Thierry
  2015-06-24 10:20   ` Chris Wilson
@ 2015-06-24 13:40   ` Michel Thierry
  2015-06-24 15:38     ` Chris Wilson
  2015-06-26 16:46     ` [PATCH igt v3] " Michel Thierry
  1 sibling, 2 replies; 21+ messages in thread
From: Michel Thierry @ 2015-06-24 13:40 UTC (permalink / raw)
  To: intel-gfx

Changed size from u32 to u64 to support +4GB.
48-bit PPGTT test cases may need extra memory available.

v2: Use thousands separator (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 lib/igt_aux.h  | 2 +-
 lib/intel_os.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 9ea50de..139e5da 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -86,7 +86,7 @@ uint64_t intel_get_avail_ram_mb(void);
 uint64_t intel_get_total_ram_mb(void);
 uint64_t intel_get_total_swap_mb(void);
 
-void intel_require_memory(uint32_t count, uint32_t size, unsigned mode);
+void intel_require_memory(uint32_t count, uint64_t size, unsigned mode);
 #define CHECK_RAM 0x1
 #define CHECK_SWAP 0x2
 
diff --git a/lib/intel_os.c b/lib/intel_os.c
index f82847f..984bb6a 100644
--- a/lib/intel_os.c
+++ b/lib/intel_os.c
@@ -215,7 +215,7 @@ intel_get_total_swap_mb(void)
  * assumption that any test that needs to check for memory requirements is a
  * thrashing test unsuitable for slow simulated systems.
  */
-void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
+void intel_require_memory(uint32_t count, uint64_t size, unsigned mode)
 {
 /* rough estimate of how many bytes the kernel requires to track each object */
 #define KERNEL_BO_OVERHEAD 512
@@ -225,8 +225,8 @@ void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
 	required *= size + KERNEL_BO_OVERHEAD;
 	required = ALIGN(required, 4096);
 
-	igt_debug("Checking %u surfaces of size %u bytes (total %'llu) against %s%s\n",
-		  count, size, (long long)required,
+	igt_debug("Checking %u surfaces of size %'llu bytes (total %'llu) against %s%s\n",
+		  count, (long long)size, (long long)required,
 		  mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "",
 		  mode & CHECK_SWAP ? " + swap": "");
 
-- 
2.4.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH igt v2] lib: Update intel_require_memory to handle +4GB cases
  2015-06-24 13:40   ` [PATCH igt v2] " Michel Thierry
@ 2015-06-24 15:38     ` Chris Wilson
  2015-06-25  7:46       ` Daniel Vetter
  2015-06-26 16:46     ` [PATCH igt v3] " Michel Thierry
  1 sibling, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2015-06-24 15:38 UTC (permalink / raw)
  To: Michel Thierry; +Cc: intel-gfx

On Wed, Jun 24, 2015 at 02:40:31PM +0100, Michel Thierry wrote:
> Changed size from u32 to u64 to support +4GB.
> 48-bit PPGTT test cases may need extra memory available.
> 
> v2: Use thousands separator (Chris)
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

One last thing, whilst you're in this function, mind moving the
igt_skip_on_simulation() to the start?

intel_require_memory()
{
  /* We use intel_require_memory() to detect tests that are designed to
   * with large working sets to stress boundaries such as aperture, and/or
   * memory exhaustion. Functional tests that also require large working
   * sets are split into two, a small subtest to verify the operation with the
   * absolute minimum working set, and the full subtest to verify the
   * interesting corner cases. The former test doesn't require the
   * memory check and so judicious use of intel_require_memory() helps
   * segregate such functional tests from the broader tests, useful for
   * slow verification systems such as the simulator.
   *
   * To recap, lay out behaviour tests like:
   *    igt_subtest("small") {
   *       run_test({.num_surfaces = 2 });
   *    }
   *    igt_subtest("full") {
   *      intel_require_memory(NUM_SURFACES, SURFACE_SIZE, CHECK_RAM);
   *      run_test({.num_surfaces = NUM_SURFACES});
   *    }
   * so that we have a simple check that is run anywhere and everywhere,
   * useful to prove the test itself works as expected, and the full
   * slow check that needs to be run on real hardware.
   */
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH igt v2] lib: Update intel_require_memory to handle +4GB cases
  2015-06-24 15:38     ` Chris Wilson
@ 2015-06-25  7:46       ` Daniel Vetter
  0 siblings, 0 replies; 21+ messages in thread
From: Daniel Vetter @ 2015-06-25  7:46 UTC (permalink / raw)
  To: Chris Wilson, Michel Thierry, intel-gfx

On Wed, Jun 24, 2015 at 04:38:55PM +0100, Chris Wilson wrote:
> On Wed, Jun 24, 2015 at 02:40:31PM +0100, Michel Thierry wrote:
> > Changed size from u32 to u64 to support +4GB.
> > 48-bit PPGTT test cases may need extra memory available.
> > 
> > v2: Use thousands separator (Chris)
> > 
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> One last thing, whilst you're in this function, mind moving the
> igt_skip_on_simulation() to the start?
> 
> intel_require_memory()
> {
>   /* We use intel_require_memory() to detect tests that are designed to
>    * with large working sets to stress boundaries such as aperture, and/or
>    * memory exhaustion. Functional tests that also require large working
>    * sets are split into two, a small subtest to verify the operation with the
>    * absolute minimum working set, and the full subtest to verify the
>    * interesting corner cases. The former test doesn't require the
>    * memory check and so judicious use of intel_require_memory() helps
>    * segregate such functional tests from the broader tests, useful for
>    * slow verification systems such as the simulator.
>    *
>    * To recap, lay out behaviour tests like:
>    *    igt_subtest("small") {
>    *       run_test({.num_surfaces = 2 });
>    *    }
>    *    igt_subtest("full") {
>    *      intel_require_memory(NUM_SURFACES, SURFACE_SIZE, CHECK_RAM);
>    *      run_test({.num_surfaces = NUM_SURFACES});
>    *    }
>    * so that we have a simple check that is run anywhere and everywhere,
>    * useful to prove the test itself works as expected, and the full
>    * slow check that needs to be run on real hardware.
>    */

This is an excellent usage hint, care to add it to the gtkdoc section? Not
sure whether the code quote needs any special treatment. But probably good
to insert a suitable # heading above it, so that this example usage
discussion stands out from the more general function description.

Thanks, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Mesa-dev] [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range
  2015-06-24  3:51   ` Ben Widawsky
@ 2015-06-25 13:10     ` Michel Thierry
  0 siblings, 0 replies; 21+ messages in thread
From: Michel Thierry @ 2015-06-25 13:10 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: mesa-dev, intel-gfx

On 6/24/2015 4:51 AM, Ben Widawsky wrote:
> Hi. Feel free to Cc me on patches of this nature. I am far behind on mesa-dev,
> and no longer read intel-gfx. I'm probably one of the sensible people to look at
> this...
>
> On Tue, Jun 23, 2015 at 01:21:27PM +0100, Michel Thierry wrote:
>> Gen8+ supports 48-bit virtual addresses, but some objects must always be
>> allocated inside the 32-bit address range.
>>
>> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
>> General State Heap (GSH) or Intruction State Heap (ISH) must be in a
>> 32-bit range, because the General State Offset and Instruction State Offset
>> are limited to 32-bits.
>
> I don't think GSH, or ISH are well known terms that have every appeared
> anywhere. I'd just keep the bit after the final comma (...because ...)
>
>>
>> Set provided bo flag when the 4GB limit is not necessary, to be able to use
>> the full address space.
>
> I'm glad you got around to this. We'd been putting it off for a long time.
>
>>
>> Cc: mesa-dev@lists.freedesktop.org
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>> ---
>>   src/mesa/drivers/dri/i965/gen8_misc_state.c   | 6 +++---
>>   src/mesa/drivers/dri/i965/intel_batchbuffer.h | 7 +++++++
>>   2 files changed, 10 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c
>> index b20038e..26531d0 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
>> @@ -41,17 +41,17 @@ void gen8_upload_state_base_address(struct brw_context *brw)
>>      OUT_BATCH(0);
>>      OUT_BATCH(mocs_wb << 16);
>>      /* Surface state base address: */
>> -   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> +   OUT_RELOC64_32BWA(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>>                  mocs_wb << 4 | 1);
>>      /* Dynamic state base address: */
>> -   OUT_RELOC64(brw->batch.bo,
>> +   OUT_RELOC64_32BWA(brw->batch.bo,
>>                  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
>>                  mocs_wb << 4 | 1);
>>      /* Indirect object base address: MEDIA_OBJECT data */
>>      OUT_BATCH(mocs_wb << 4 | 1);
>>      OUT_BATCH(0);
>>      /* Instruction base address: shader kernels (incl. SIP) */
>> -   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> +   OUT_RELOC64_32BWA(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>>                  mocs_wb << 4 | 1);
>>
>>      /* General state buffer size */
>> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
>> index 7bdd836..5aa741e 100644
>> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
>> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
>> @@ -177,6 +177,13 @@ intel_batchbuffer_advance(struct brw_context *brw)
>>
>>   /* Handle 48-bit address relocations for Gen8+ */
>>   #define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \
>> +   drm_intel_bo_set_supports_48baddress(buf); \
>> +   intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta);	\
>> +} while (0)
>> +
>> +/* Handle 48-bit address relocations for Gen8+, ask for 32-bit address */
>> +#define OUT_RELOC64_32BWA(buf, read_domains, write_domain, delta) do { \
>> +   drm_intel_bo_clear_supports_48baddress(buf); \
>>      intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta);	\
>>   } while (0)
>>
>
> First and least bikesheddy, you need to bump the required libdrm in the
> configure.ac to support this new libdrm function (maybe you did, but I don't see
> it on mesa-dev).

I didn't bump the libdrm version either. I'll make sure to include this 
in the next version.

>
> More bikesheddy, and forgive me here because I haven't looked at any of the
> kernel interfaces or libdrm patches (you can Cc those to mesa-dev if they're
> relevant fwiw).
>
> Presumably at the end of the day it's drm_intel_bo_emit_reloc which needs to
> know about these limitations. Unfortunately we don't have a flags field there.
> The implementation here seems like a somewhat cumbersome workaround for that (it
> looks like the context execbuf which is pretty crappy - yes, I know who the
> author was). Have you already discussed adding a new emit_reloc? I suppose if
> people are opposed to a new emit reloc, the only I'd like to see different is
> have the functions which need the workaround just call OUT_RELOC, instead of
> OUT_RELOC64 (put a comment in the call sites), and make OUT_RELOC call the
> drm_intel_bo_clear_supports_48baddress() (which is obviously a nop on pre-gen8
> platforms). The OUT_RELOC64 case should be left alone - we shouldn't need to
> tell libdrm that I want a 64bit relocation, and it can actually be 64b...

Right, at the end I was only looking to flag the exec_objects that can 
be safely outside the first 4GBs (it was decided to be an opt-in to not 
break any other user). I can change it like you suggest, OUT_RELOC will 
mean that we need the wa, while OUT_RELOC64 means we don't.

If you want to take a look, these are the libdrm and kernel changes:

libdrm: 
http://news.gmane.org/find-root.php?message_id=1435062089-19877-1-git-send-email-michel.thierry@intel.com

i915: 
http://news.gmane.org/find-root.php?message_id=1435062065-19717-1-git-send-email-michel.thierry@intel.com


>
> I suspect not many other mesa devs will have an opinion here, but I'm flexible
> if they disagree.
>

I'll cc you when I send the updated patches.

Thanks,

-Michel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag.
  2015-06-23 12:21 [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Michel Thierry
                   ` (2 preceding siblings ...)
  2015-06-23 12:21 ` [PATCH igt 2/2] tests/gem_ppgtt: Check Wa32bitOffsets workarounds Michel Thierry
@ 2015-06-26 15:00 ` Dave Gordon
  2015-07-01 15:28 ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Michel Thierry
  4 siblings, 0 replies; 21+ messages in thread
From: Dave Gordon @ 2015-06-26 15:00 UTC (permalink / raw)
  To: intel-gfx, Thierry, Michel

On 23/06/15 13:21, Michel Thierry wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
> 
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Intruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State Offset
> are limited to 32-bits.
> 
> Provide a flag to set when the 4GB limit is not necessary in a given bo.
> 48-bit range will only be used when explicitly requested.
> 
> Cc: dri-devel@lists.freedesktop.org
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  include/drm/i915_drm.h    |  3 ++-
>  intel/intel_bufmgr.c      | 12 ++++++++++++
>  intel/intel_bufmgr.h      |  2 ++
>  intel/intel_bufmgr_gem.c  | 48 +++++++++++++++++++++++++++++++++++++++++++----
>  intel/intel_bufmgr_priv.h | 11 +++++++++++
>  5 files changed, 71 insertions(+), 5 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index ded43b1..d6fb99d 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>  #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>  #define EXEC_OBJECT_WRITE	(1<<2)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
> +#define EXEC_OBJECT_SUPPORTS_48BADDRESS (1<<3)

Please change the name, so you don't concatenate the "B" of "48B" with
the "A" of "ADDRESS", making something which appears to relate to a "BAD
DRESS". .._48B_ADDRESS.. and corresponding lowercase variants would be
fine :)

.Dave.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH igt v3] lib: Update intel_require_memory to handle +4GB cases
  2015-06-24 13:40   ` [PATCH igt v2] " Michel Thierry
  2015-06-24 15:38     ` Chris Wilson
@ 2015-06-26 16:46     ` Michel Thierry
  1 sibling, 0 replies; 21+ messages in thread
From: Michel Thierry @ 2015-06-26 16:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

Changed size from u32 to u64 to support +4GB.
48-bit PPGTT test cases may need extra memory available.

v2: Use thousands separator (Chris)
v3: Moved igt_skip_on_simulation() to the start (Chris)
    Include Chris' usage hint in the gtk-doc section (Daniel)

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 lib/igt_aux.h  |  2 +-
 lib/intel_os.c | 34 +++++++++++++++++++++++++++++-----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 9ea50de..139e5da 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -86,7 +86,7 @@ uint64_t intel_get_avail_ram_mb(void);
 uint64_t intel_get_total_ram_mb(void);
 uint64_t intel_get_total_swap_mb(void);
 
-void intel_require_memory(uint32_t count, uint32_t size, unsigned mode);
+void intel_require_memory(uint32_t count, uint64_t size, unsigned mode);
 #define CHECK_RAM 0x1
 #define CHECK_SWAP 0x2
 
diff --git a/lib/intel_os.c b/lib/intel_os.c
index f82847f..3e3b9ec 100644
--- a/lib/intel_os.c
+++ b/lib/intel_os.c
@@ -214,19 +214,45 @@ intel_get_total_swap_mb(void)
  * also causes the test to be skipped automatically on simulation under the
  * assumption that any test that needs to check for memory requirements is a
  * thrashing test unsuitable for slow simulated systems.
+ *
+ * We use intel_require_memory() to detect tests that are designed to run with
+ * large working sets to stress boundaries such as aperture, and/or memory
+ * exhaustion. Functional tests that also require large working sets should be
+ * split into two, a small subtest to verify the operation with the absolute
+ * minimum working set, and the full subtest to verify the interesting corner
+ * cases. The former test doesn't require the memory check and so judicious
+ * use of intel_require_memory() helps segregate such functional tests from
+ * the broader tests, useful for slow verification systems such as the
+ * simulator.
+ *
+ * To recap, lay out behaviour tests like:
+ * |[<!-- language="C" -->
+ *    igt_subtest("small") {
+ *       run_test({.num_surfaces = 2 });
+ *    }
+ *    igt_subtest("full") {
+ *      intel_require_memory(NUM_SURFACES, SURFACE_SIZE, CHECK_RAM);
+ *      run_test({.num_surfaces = NUM_SURFACES});
+ *    }
+ * ]|
+ * so that we have a simple check that is run anywhere and everywhere,
+ * useful to prove the test itself works as expected, and the full
+ * slow check that needs to be run on real hardware.
  */
-void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
+void intel_require_memory(uint32_t count, uint64_t size, unsigned mode)
 {
 /* rough estimate of how many bytes the kernel requires to track each object */
 #define KERNEL_BO_OVERHEAD 512
 	uint64_t required, total;
 
+	igt_skip_on_simulation();
+
 	required = count;
 	required *= size + KERNEL_BO_OVERHEAD;
 	required = ALIGN(required, 4096);
 
-	igt_debug("Checking %u surfaces of size %u bytes (total %'llu) against %s%s\n",
-		  count, size, (long long)required,
+	igt_debug("Checking %u surfaces of size %'llu bytes (total %'llu) against %s%s\n",
+		  count, (long long)size, (long long)required,
 		  mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "",
 		  mode & CHECK_SWAP ? " + swap": "");
 
@@ -242,8 +268,6 @@ void intel_require_memory(uint32_t count, uint32_t size, unsigned mode)
 		      (long long)required, (long long)total,
 		      mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "",
 		      mode & CHECK_SWAP ? " + swap": "");
-
-	igt_skip_on_simulation();
 }
 
 void
-- 
2.4.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag.
  2015-06-23 12:21 [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Michel Thierry
                   ` (3 preceding siblings ...)
  2015-06-26 15:00 ` [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Dave Gordon
@ 2015-07-01 15:28 ` Michel Thierry
  2015-07-01 15:28   ` [PATCH libdrm v2 2/2] configure.ac: bump version to 2.4.63 Michel Thierry
                     ` (2 more replies)
  4 siblings, 3 replies; 21+ messages in thread
From: Michel Thierry @ 2015-07-01 15:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky, dri-devel

Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap (GSH) or Intruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State Offset
are limited to 32-bits.

Provide a flag to set when the 4GB limit is not necessary in a given bo.
48-bit range will only be used when explicitly requested.

Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
set automatically, while calls to drm_intel_bo_emit_reloc will clear it.

v2: Make set/clear functions nops on pre-gen8 platforms, and use them
    internally in emit_reloc functions (Ben)
    s/48BADDRESS/48B_ADDRESS/ (Dave)

Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 include/drm/i915_drm.h    |  3 ++-
 intel/intel_bufmgr.c      | 24 +++++++++++++++++++++
 intel/intel_bufmgr.h      |  8 ++++++-
 intel/intel_bufmgr_gem.c  | 54 +++++++++++++++++++++++++++++++++++++++++++----
 intel/intel_bufmgr_priv.h | 11 ++++++++++
 5 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..426b25c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
 	__u64 flags;
 
 	__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..590a855 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -188,6 +188,18 @@ drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count)
 	return bo_array[0]->bufmgr->check_aperture_space(bo_array, count);
 }
 
+void drm_intel_bo_set_supports_48b_address(drm_intel_bo *bo)
+{
+	if (bo->bufmgr->bo_set_supports_48b_address)
+		bo->bufmgr->bo_set_supports_48b_address(bo);
+}
+
+void drm_intel_bo_clear_supports_48b_address(drm_intel_bo *bo)
+{
+	if (bo->bufmgr->bo_clear_supports_48b_address)
+		bo->bufmgr->bo_clear_supports_48b_address(bo);
+}
+
 int
 drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name)
 {
@@ -202,6 +214,18 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			drm_intel_bo *target_bo, uint32_t target_offset,
 			uint32_t read_domains, uint32_t write_domain)
 {
+	drm_intel_bo_clear_supports_48b_address(target_bo);
+	return bo->bufmgr->bo_emit_reloc(bo, offset,
+					 target_bo, target_offset,
+					 read_domains, write_domain);
+}
+
+int
+drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+			drm_intel_bo *target_bo, uint32_t target_offset,
+			uint32_t read_domains, uint32_t write_domain)
+{
+	drm_intel_bo_set_supports_48b_address(target_bo);
 	return bo->bufmgr->bo_emit_reloc(bo, offset,
 					 target_bo, target_offset,
 					 read_domains, write_domain);
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 285919e..62480cb 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -87,7 +87,8 @@ struct _drm_intel_bo {
 	/**
 	 * Last seen card virtual address (offset from the beginning of the
 	 * aperture) for the object.  This should be used to fill relocation
-	 * entries when calling drm_intel_bo_emit_reloc()
+	 * entries when calling drm_intel_bo_emit_reloc() or
+	 * drm_intel_bo_emit_reloc_48bit()
 	 */
 	uint64_t offset64;
 };
@@ -137,6 +138,8 @@ void drm_intel_bo_wait_rendering(drm_intel_bo *bo);
 
 void drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug);
 void drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr);
+void drm_intel_bo_set_supports_48b_address(drm_intel_bo *bo);
+void drm_intel_bo_clear_supports_48b_address(drm_intel_bo *bo);
 int drm_intel_bo_exec(drm_intel_bo *bo, int used,
 		      struct drm_clip_rect *cliprects, int num_cliprects, int DR4);
 int drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used,
@@ -147,6 +150,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count);
 int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
 			    uint32_t read_domains, uint32_t write_domain);
+int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
+				  drm_intel_bo *target_bo, uint32_t target_offset,
+				  uint32_t read_domains, uint32_t write_domain);
 int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
 				  drm_intel_bo *target_bo,
 				  uint32_t target_offset,
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 60c06fc..e3e2b0e 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -143,6 +143,7 @@ typedef struct _drm_intel_bufmgr_gem {
 } drm_intel_bufmgr_gem;
 
 #define DRM_INTEL_RELOC_FENCE (1<<0)
+#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)
 
 typedef struct _drm_intel_reloc_target_info {
 	drm_intel_bo *bo;
@@ -240,6 +241,14 @@ struct _drm_intel_bo_gem {
 	bool is_userptr;
 
 	/**
+	 * Boolean of whether this buffer can be in the whole 48-bit address.
+	 *
+	 * By default, buffers will be keep in a 32-bit range, unless this
+	 * flag is explicitly set.
+	 */
+	bool supports_48b_address;
+
+	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
 	 * Used to avoid costly tree walking in
@@ -471,13 +480,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
 }
 
 static void
-drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
+			       int supports_48b_address)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
 	int index;
 
 	if (bo_gem->validate_index != -1) {
+		if (supports_48b_address) {
+			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
+				EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		}
 		if (need_fence)
 			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
 				EXEC_OBJECT_NEEDS_FENCE;
@@ -516,6 +530,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 		bufmgr_gem->exec2_objects[index].flags |=
 			EXEC_OBJECT_NEEDS_FENCE;
 	}
+	if (supports_48b_address) {
+		bufmgr_gem->exec2_objects[index].flags |=
+			EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+	}
 	bufmgr_gem->exec_count++;
 }
 
@@ -1931,11 +1949,33 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 	else
 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
 
+	if (target_bo_gem->supports_48b_address)
+		bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
+			DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS;
+
 	bo_gem->reloc_count++;
 
 	return 0;
 }
 
+static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	if (bufmgr_gem->gen >= 8)
+		bo_gem->supports_48b_address = 1;
+}
+
+static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+	if (bufmgr_gem->gen >= 8)
+		bo_gem->supports_48b_address = 0;
+}
+
 static int
 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
@@ -2049,7 +2089,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 
 	for (i = 0; i < bo_gem->reloc_count; i++) {
 		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
-		int need_fence;
+		int need_fence, supports_48b_addr;
 
 		if (target_bo == bo)
 			continue;
@@ -2062,8 +2102,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 		need_fence = (bo_gem->reloc_target_info[i].flags &
 			      DRM_INTEL_RELOC_FENCE);
 
+		supports_48b_addr = (bo_gem->reloc_target_info[i].flags &
+				    DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS);
+
 		/* Add the target to the validate list */
-		drm_intel_add_validate_buffer2(target_bo, need_fence);
+		drm_intel_add_validate_buffer2(target_bo, need_fence,
+					       supports_48b_addr);
 	}
 }
 
@@ -2508,7 +2552,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
 	/* Add the batch buffer to the validation list.  There are no relocations
 	 * pointing to it.
 	 */
-	drm_intel_add_validate_buffer2(bo, 0);
+	drm_intel_add_validate_buffer2(bo, 0, 0);
 
 	memclear(execbuf);
 	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
@@ -3657,6 +3701,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
 	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
 	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
+	bufmgr_gem->bufmgr.bo_set_supports_48b_address = drm_intel_gem_bo_set_supports_48b_address;
+	bufmgr_gem->bufmgr.bo_clear_supports_48b_address = drm_intel_gem_bo_clear_supports_48b_address;
 	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
 	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
 	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 59ebd18..774fa02 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -152,6 +152,17 @@ struct _drm_intel_bufmgr {
 	void (*destroy) (drm_intel_bufmgr *bufmgr);
 
 	/**
+	 * Set/Clear 48-bit address support flag in a given bo.
+	 *
+	 * Any resource used with flat/heapless (0x00000000-0xfffff000)
+	 * General State Heap (GSH) or Intructions State Heap (ISH) must
+	 * be in a 32-bit range. 48-bit range will only be used when explicitly
+	 * requested.
+	 */
+	void (*bo_set_supports_48b_address) (drm_intel_bo *bo);
+	void (*bo_clear_supports_48b_address) (drm_intel_bo *bo);
+
+	/**
 	 * Add relocation entry in reloc_buf, which will be updated with the
 	 * target buffer's real offset on on command submission.
 	 *
-- 
2.4.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH libdrm v2 2/2] configure.ac: bump version to 2.4.63
  2015-07-01 15:28 ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Michel Thierry
@ 2015-07-01 15:28   ` Michel Thierry
  2015-07-01 15:28   ` [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
  2015-07-01 17:06   ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Emil Velikov
  2 siblings, 0 replies; 21+ messages in thread
From: Michel Thierry @ 2015-07-01 15:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel

Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 001fd3d..12b8465 100644
--- a/configure.ac
+++ b/configure.ac
@@ -20,7 +20,7 @@
 
 AC_PREREQ([2.63])
 AC_INIT([libdrm],
-        [2.4.62],
+        [2.4.63],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=DRI],
         [libdrm])
 
-- 
2.4.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range
  2015-07-01 15:28 ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Michel Thierry
  2015-07-01 15:28   ` [PATCH libdrm v2 2/2] configure.ac: bump version to 2.4.63 Michel Thierry
@ 2015-07-01 15:28   ` Michel Thierry
  2015-07-02  7:21     ` Chris Wilson
  2015-07-01 17:06   ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Emil Velikov
  2 siblings, 1 reply; 21+ messages in thread
From: Michel Thierry @ 2015-07-01 15:28 UTC (permalink / raw)
  To: intel-gfx; +Cc: mesa-dev, Ben Widawsky

Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap or Intruction State Heap must be in a 32-bit range
(GSH / ISH), because the General State Offset and Instruction State Offset
are limited to 32-bits.

Use drm_intel_bo_emit_reloc_48bit when the 4GB limit is not necessary, and
the bo can be in the full address space.

This commit introduces a dependency of libdrm 2.4.63, which introduces the
drm_intel_bo_emit_reloc_48bit function.

v2: s/48baddress/48b_address/,
    Only use in OUT_RELOC64 cases, OUT_RELOC implies a 32-bit address offset
    is needed (Ben)

Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: mesa-dev@lists.freedesktop.org
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 configure.ac                                  |  2 +-
 src/mesa/drivers/dri/i965/gen8_misc_state.c   | 23 +++++++++++++++--------
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  6 +++---
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/configure.ac b/configure.ac
index af61aa2..c92ca44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -68,7 +68,7 @@ AC_SUBST([OSMESA_VERSION])
 dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.38
 LIBDRM_RADEON_REQUIRED=2.4.56
-LIBDRM_INTEL_REQUIRED=2.4.60
+LIBDRM_INTEL_REQUIRED=2.4.63
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
 LIBDRM_FREEDRENO_REQUIRED=2.4.57
diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c
index b20038e..5c8924d 100644
--- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
@@ -28,6 +28,11 @@
 
 /**
  * Define the base addresses which some state is referenced from.
+ *
+ * Use OUT_RELOC instead of OUT_RELOC64, because the General State
+ * Offset and Instruction State Offset are limited to 32-bits by
+ * hardware [and add OUT_BATCH(0) after each OUT_RELOC to complete
+ * the number of dwords needed for STATE_BASE_ADDRESS].
  */
 void gen8_upload_state_base_address(struct brw_context *brw)
 {
@@ -41,19 +46,21 @@ void gen8_upload_state_base_address(struct brw_context *brw)
    OUT_BATCH(0);
    OUT_BATCH(mocs_wb << 16);
    /* Surface state base address: */
-   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
-               mocs_wb << 4 | 1);
+   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+             mocs_wb << 4 | 1);
+   OUT_BATCH(0);
    /* Dynamic state base address: */
-   OUT_RELOC64(brw->batch.bo,
-               I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
-               mocs_wb << 4 | 1);
+   OUT_RELOC(brw->batch.bo,
+             I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+             mocs_wb << 4 | 1);
+   OUT_BATCH(0);
    /* Indirect object base address: MEDIA_OBJECT data */
    OUT_BATCH(mocs_wb << 4 | 1);
    OUT_BATCH(0);
    /* Instruction base address: shader kernels (incl. SIP) */
-   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-               mocs_wb << 4 | 1);
-
+   OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+             mocs_wb << 4 | 1);
+   OUT_BATCH(0);
    /* General state buffer size */
    OUT_BATCH(0xfffff001);
    /* Dynamic state buffer size */
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 54081a1..220a35b 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -411,9 +411,9 @@ intel_batchbuffer_emit_reloc64(struct brw_context *brw,
                                uint32_t read_domains, uint32_t write_domain,
 			       uint32_t delta)
 {
-   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
-                                     buffer, delta,
-                                     read_domains, write_domain);
+   int ret = drm_intel_bo_emit_reloc_48bit(brw->batch.bo, 4*brw->batch.used,
+                                           buffer, delta,
+                                           read_domains, write_domain);
    assert(ret == 0);
    (void) ret;
 
-- 
2.4.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag.
  2015-07-01 15:28 ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Michel Thierry
  2015-07-01 15:28   ` [PATCH libdrm v2 2/2] configure.ac: bump version to 2.4.63 Michel Thierry
  2015-07-01 15:28   ` [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
@ 2015-07-01 17:06   ` Emil Velikov
  2015-07-02 11:04     ` Michel Thierry
  2 siblings, 1 reply; 21+ messages in thread
From: Emil Velikov @ 2015-07-01 17:06 UTC (permalink / raw)
  To: Michel Thierry; +Cc: intel-gfx, Ben Widawsky, ML dri-devel

Hi Michel,

Although I cannot comment on the exact implementation I can give you
general some tips which you might find useful.

On 1 July 2015 at 16:28, Michel Thierry <michel.thierry@intel.com> wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
>
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Intruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State Offset
> are limited to 32-bits.
>
> Provide a flag to set when the 4GB limit is not necessary in a given bo.
> 48-bit range will only be used when explicitly requested.
>
> Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
> set automatically, while calls to drm_intel_bo_emit_reloc will clear it.
>
> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
>     internally in emit_reloc functions (Ben)
>     s/48BADDRESS/48B_ADDRESS/ (Dave)
>
> Cc: Ben Widawsky <ben@bwidawsk.net>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: dri-devel@lists.freedesktop.org
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  include/drm/i915_drm.h    |  3 ++-
>  intel/intel_bufmgr.c      | 24 +++++++++++++++++++++
>  intel/intel_bufmgr.h      |  8 ++++++-
>  intel/intel_bufmgr_gem.c  | 54 +++++++++++++++++++++++++++++++++++++++++++----
>  intel/intel_bufmgr_priv.h | 11 ++++++++++
>  5 files changed, 94 insertions(+), 6 deletions(-)
>
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index ded43b1..426b25c 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>  #define EXEC_OBJECT_NEEDS_GTT  (1<<1)
>  #define EXEC_OBJECT_WRITE      (1<<2)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
Perhaps you already know this but changes like these go in _after_ the
updated kernel header is part of linux-next or a released kernel
version.

>         __u64 flags;
>
>         __u64 rsvd1;
> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> index 14ea9f9..590a855 100644
> --- a/intel/intel_bufmgr.c
> +++ b/intel/intel_bufmgr.c
> @@ -188,6 +188,18 @@ drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count)
>         return bo_array[0]->bufmgr->check_aperture_space(bo_array, count);
>  }
>
> +void drm_intel_bo_set_supports_48b_address(drm_intel_bo *bo)
> +{
> +       if (bo->bufmgr->bo_set_supports_48b_address)
> +               bo->bufmgr->bo_set_supports_48b_address(bo);
> +}
> +
> +void drm_intel_bo_clear_supports_48b_address(drm_intel_bo *bo)
> +{
> +       if (bo->bufmgr->bo_clear_supports_48b_address)
> +               bo->bufmgr->bo_clear_supports_48b_address(bo);
> +}
> +
>  int
>  drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name)
>  {
> @@ -202,6 +214,18 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>                         drm_intel_bo *target_bo, uint32_t target_offset,
>                         uint32_t read_domains, uint32_t write_domain)
>  {
> +       drm_intel_bo_clear_supports_48b_address(target_bo);
> +       return bo->bufmgr->bo_emit_reloc(bo, offset,
> +                                        target_bo, target_offset,
> +                                        read_domains, write_domain);
> +}
> +
> +int
> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> +                       drm_intel_bo *target_bo, uint32_t target_offset,
> +                       uint32_t read_domains, uint32_t write_domain)
> +{
> +       drm_intel_bo_set_supports_48b_address(target_bo);
>         return bo->bufmgr->bo_emit_reloc(bo, offset,
>                                          target_bo, target_offset,
>                                          read_domains, write_domain);
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 285919e..62480cb 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -87,7 +87,8 @@ struct _drm_intel_bo {
>         /**
>          * Last seen card virtual address (offset from the beginning of the
>          * aperture) for the object.  This should be used to fill relocation
> -        * entries when calling drm_intel_bo_emit_reloc()
> +        * entries when calling drm_intel_bo_emit_reloc() or
> +        * drm_intel_bo_emit_reloc_48bit()
>          */
>         uint64_t offset64;
>  };
> @@ -137,6 +138,8 @@ void drm_intel_bo_wait_rendering(drm_intel_bo *bo);
>
>  void drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug);
>  void drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr);
> +void drm_intel_bo_set_supports_48b_address(drm_intel_bo *bo);
> +void drm_intel_bo_clear_supports_48b_address(drm_intel_bo *bo);
Are these two are internal/implementation specific functions ? If so
please don't include them in this public header and annotate both
declaration and definition with drm_private.

>  int drm_intel_bo_exec(drm_intel_bo *bo, int used,
>                       struct drm_clip_rect *cliprects, int num_cliprects, int DR4);
>  int drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used,
> @@ -147,6 +150,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count);
>  int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>                             drm_intel_bo *target_bo, uint32_t target_offset,
>                             uint32_t read_domains, uint32_t write_domain);
> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> +                                 drm_intel_bo *target_bo, uint32_t target_offset,
> +                                 uint32_t read_domains, uint32_t write_domain);
Please add new (public) symbols to the test (intel-symbol-check). If
in doubt make check will give you a lovely message.

Patches that bump the version number are not normally posted to the
ML, but are committed as part of the release process (see RELEASING
for more info).

Cheers,
Emil
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range
  2015-07-01 15:28   ` [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
@ 2015-07-02  7:21     ` Chris Wilson
  2015-07-02 13:53       ` Michel Thierry
  0 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2015-07-02  7:21 UTC (permalink / raw)
  To: Michel Thierry; +Cc: mesa-dev, intel-gfx, Ben Widawsky

On Wed, Jul 01, 2015 at 04:28:10PM +0100, Michel Thierry wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
> 
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap or Intruction State Heap must be in a 32-bit range
> (GSH / ISH), because the General State Offset and Instruction State Offset
> are limited to 32-bits.
> 
> Use drm_intel_bo_emit_reloc_48bit when the 4GB limit is not necessary, and
> the bo can be in the full address space.
> 
> This commit introduces a dependency of libdrm 2.4.63, which introduces the
> drm_intel_bo_emit_reloc_48bit function.
> 
> v2: s/48baddress/48b_address/,
>     Only use in OUT_RELOC64 cases, OUT_RELOC implies a 32-bit address offset
>     is needed (Ben)
> 
> Cc: Ben Widawsky <ben@bwidawsk.net>
> Cc: mesa-dev@lists.freedesktop.org
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  configure.ac                                  |  2 +-
>  src/mesa/drivers/dri/i965/gen8_misc_state.c   | 23 +++++++++++++++--------
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c |  6 +++---
>  3 files changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index af61aa2..c92ca44 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -68,7 +68,7 @@ AC_SUBST([OSMESA_VERSION])
>  dnl Versions for external dependencies
>  LIBDRM_REQUIRED=2.4.38
>  LIBDRM_RADEON_REQUIRED=2.4.56
> -LIBDRM_INTEL_REQUIRED=2.4.60
> +LIBDRM_INTEL_REQUIRED=2.4.63
>  LIBDRM_NVVIEUX_REQUIRED=2.4.33
>  LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
>  LIBDRM_FREEDRENO_REQUIRED=2.4.57
> diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c
> index b20038e..5c8924d 100644
> --- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
> @@ -28,6 +28,11 @@
>  
>  /**
>   * Define the base addresses which some state is referenced from.
> + *
> + * Use OUT_RELOC instead of OUT_RELOC64, because the General State
> + * Offset and Instruction State Offset are limited to 32-bits by
> + * hardware [and add OUT_BATCH(0) after each OUT_RELOC to complete
> + * the number of dwords needed for STATE_BASE_ADDRESS].
>   */
>  void gen8_upload_state_base_address(struct brw_context *brw)
>  {
> @@ -41,19 +46,21 @@ void gen8_upload_state_base_address(struct brw_context *brw)
>     OUT_BATCH(0);
>     OUT_BATCH(mocs_wb << 16);
>     /* Surface state base address: */
> -   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> -               mocs_wb << 4 | 1);
> +   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> +             mocs_wb << 4 | 1);
> +   OUT_BATCH(0);
>     /* Dynamic state base address: */
> -   OUT_RELOC64(brw->batch.bo,
> -               I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
> -               mocs_wb << 4 | 1);
> +   OUT_RELOC(brw->batch.bo,
> +             I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
> +             mocs_wb << 4 | 1);
> +   OUT_BATCH(0);

Note this is in general dangerous since it lies to the kernel about the
value written into the batch corresponding to the 64bit relocation.
(Aliasing a high object here isn't much of an issue since the relocation
will be forced by having to rebind the buffer low.)

Personally I would have stuck with the OUT_RELOC64 so that any future
cut'n'paste didn't have a subtle bug and that the wa was clearly
indicated by clearing the execobject flag for brw->batch.bo and
brw->cache.bo.

>     /* Indirect object base address: MEDIA_OBJECT data */
>     OUT_BATCH(mocs_wb << 4 | 1);
>     OUT_BATCH(0);
>     /* Instruction base address: shader kernels (incl. SIP) */
> -   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> -               mocs_wb << 4 | 1);
> -
> +   OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
> +             mocs_wb << 4 | 1);
> +   OUT_BATCH(0);
>     /* General state buffer size */
>     OUT_BATCH(0xfffff001);
>     /* Dynamic state buffer size */
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 54081a1..220a35b 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -411,9 +411,9 @@ intel_batchbuffer_emit_reloc64(struct brw_context *brw,
>                                 uint32_t read_domains, uint32_t write_domain,
>  			       uint32_t delta)
>  {
> -   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
> -                                     buffer, delta,
> -                                     read_domains, write_domain);
> +   int ret = drm_intel_bo_emit_reloc_48bit(brw->batch.bo, 4*brw->batch.used,
> +                                           buffer, delta,
> +                                           read_domains, write_domain);

I would have just exposed setting the flag on the execobject.  That way you
still have existing userspace safe by default, can set a
bufmgr-level flag to enable 48bit support by default and then
individually turn off 48bit support for the couple of buffers that
require it.

Just my 2c because I have seen what trouble lying to the kernel about
relocation values can cause and would rather not have that in the
interface by design.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag.
  2015-07-01 17:06   ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Emil Velikov
@ 2015-07-02 11:04     ` Michel Thierry
  0 siblings, 0 replies; 21+ messages in thread
From: Michel Thierry @ 2015-07-02 11:04 UTC (permalink / raw)
  To: Emil Velikov; +Cc: intel-gfx, Ben Widawsky, ML dri-devel

On 7/1/2015 6:06 PM, Emil Velikov wrote:
> Hi Michel,
>
> Although I cannot comment on the exact implementation I can give you
> general some tips which you might find useful.
>
Hi Emil,

> On 1 July 2015 at 16:28, Michel Thierry <michel.thierry@intel.com> wrote:
>> Gen8+ supports 48-bit virtual addresses, but some objects must always be
>> allocated inside the 32-bit address range.
>>
>> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>> index ded43b1..426b25c 100644
>> --- a/include/drm/i915_drm.h
>> +++ b/include/drm/i915_drm.h
>> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>>   #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>>   #define EXEC_OBJECT_NEEDS_GTT  (1<<1)
>>   #define EXEC_OBJECT_WRITE      (1<<2)
>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
>> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> Perhaps you already know this but changes like these go in _after_ the
> updated kernel header is part of linux-next or a released kernel
> version.
>

The kernel changes have just been reviewed in drm-intel. I included the 
userland patches (there's a mesa patch too) so more people could see the 
whole picture.

>>          __u64 flags;
>>
>>          __u64 rsvd1;
>> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
>> index 14ea9f9..590a855 100644
>> --- a/intel/intel_bufmgr.c
>> +++ b/intel/intel_bufmgr.c
>> @@ -188,6 +188,18 @@ drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count)
>>          return bo_array[0]->bufmgr->check_aperture_space(bo_array, count);
>>   }
>>
>> +void drm_intel_bo_set_supports_48b_address(drm_intel_bo *bo)
>> +{
>> +       if (bo->bufmgr->bo_set_supports_48b_address)
>> +               bo->bufmgr->bo_set_supports_48b_address(bo);
>> +}
>> +
>> +void drm_intel_bo_clear_supports_48b_address(drm_intel_bo *bo)
>> +{
>> +       if (bo->bufmgr->bo_clear_supports_48b_address)
>> +               bo->bufmgr->bo_clear_supports_48b_address(bo);
>> +}
>> +
>>   int
>>   drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name)
>>   {
>> @@ -202,6 +214,18 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>                          drm_intel_bo *target_bo, uint32_t target_offset,
>>                          uint32_t read_domains, uint32_t write_domain)
>>   {
>> +       drm_intel_bo_clear_supports_48b_address(target_bo);
>> +       return bo->bufmgr->bo_emit_reloc(bo, offset,
>> +                                        target_bo, target_offset,
>> +                                        read_domains, write_domain);
>> +}
>> +
>> +int
>> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
>> +                       drm_intel_bo *target_bo, uint32_t target_offset,
>> +                       uint32_t read_domains, uint32_t write_domain)
>> +{
>> +       drm_intel_bo_set_supports_48b_address(target_bo);
>>          return bo->bufmgr->bo_emit_reloc(bo, offset,
>>                                           target_bo, target_offset,
>>                                           read_domains, write_domain);
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 285919e..62480cb 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -87,7 +87,8 @@ struct _drm_intel_bo {
>>          /**
>>           * Last seen card virtual address (offset from the beginning of the
>>           * aperture) for the object.  This should be used to fill relocation
>> -        * entries when calling drm_intel_bo_emit_reloc()
>> +        * entries when calling drm_intel_bo_emit_reloc() or
>> +        * drm_intel_bo_emit_reloc_48bit()
>>           */
>>          uint64_t offset64;
>>   };
>> @@ -137,6 +138,8 @@ void drm_intel_bo_wait_rendering(drm_intel_bo *bo);
>>
>>   void drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug);
>>   void drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr);
>> +void drm_intel_bo_set_supports_48b_address(drm_intel_bo *bo);
>> +void drm_intel_bo_clear_supports_48b_address(drm_intel_bo *bo);
> Are these two are internal/implementation specific functions ? If so
> please don't include them in this public header and annotate both
> declaration and definition with drm_private.
>
>>   int drm_intel_bo_exec(drm_intel_bo *bo, int used,
>>                        struct drm_clip_rect *cliprects, int num_cliprects, int DR4);
>>   int drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used,
>> @@ -147,6 +150,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count);
>>   int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>                              drm_intel_bo *target_bo, uint32_t target_offset,
>>                              uint32_t read_domains, uint32_t write_domain);
>> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
>> +                                 drm_intel_bo *target_bo, uint32_t target_offset,
>> +                                 uint32_t read_domains, uint32_t write_domain);
> Please add new (public) symbols to the test (intel-symbol-check). If
> in doubt make check will give you a lovely message.
>
> Patches that bump the version number are not normally posted to the
> ML, but are committed as part of the release process (see RELEASING
> for more info).

I'll update the intel-symbol-check and follow the release process after 
the kernel patch is merged.

Just to confirm, anyone can push to git://anongit.freedesktop.org/mesa/drm ?

Thanks for the tips,

-Michel
>
> Cheers,
> Emil
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range
  2015-07-02  7:21     ` Chris Wilson
@ 2015-07-02 13:53       ` Michel Thierry
  2015-07-02 14:05         ` Chris Wilson
  0 siblings, 1 reply; 21+ messages in thread
From: Michel Thierry @ 2015-07-02 13:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, mesa-dev, Ben Widawsky

On 7/2/2015 8:21 AM, Chris Wilson wrote:
> On Wed, Jul 01, 2015 at 04:28:10PM +0100, Michel Thierry wrote:
>> Gen8+ supports 48-bit virtual addresses, but some objects must always be
>> allocated inside the 32-bit address range.
>>
>>      OUT_BATCH(0);
>>      OUT_BATCH(mocs_wb << 16);
>>      /* Surface state base address: */
>> -   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> -               mocs_wb << 4 | 1);
>> +   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> +             mocs_wb << 4 | 1);
>> +   OUT_BATCH(0);
>>      /* Dynamic state base address: */
>> -   OUT_RELOC64(brw->batch.bo,
>> -               I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
>> -               mocs_wb << 4 | 1);
>> +   OUT_RELOC(brw->batch.bo,
>> +             I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
>> +             mocs_wb << 4 | 1);
>> +   OUT_BATCH(0);
>
> Note this is in general dangerous since it lies to the kernel about the
> value written into the batch corresponding to the 64bit relocation.
> (Aliasing a high object here isn't much of an issue since the relocation
> will be forced by having to rebind the buffer low.)
>
> Personally I would have stuck with the OUT_RELOC64 so that any future
> cut'n'paste didn't have a subtle bug and that the wa was clearly
> indicated by clearing the execobject flag for brw->batch.bo and
> brw->cache.bo.
>
>>      /* Indirect object base address: MEDIA_OBJECT data */
>>      OUT_BATCH(mocs_wb << 4 | 1);
>>      OUT_BATCH(0);
>>      /* Instruction base address: shader kernels (incl. SIP) */
>> -   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> -               mocs_wb << 4 | 1);
>> -
>> +   OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> +             mocs_wb << 4 | 1);
>> +   OUT_BATCH(0);
>>      /* General state buffer size */
>>      OUT_BATCH(0xfffff001);
>>      /* Dynamic state buffer size */
>> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> index 54081a1..220a35b 100644
>> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>> @@ -411,9 +411,9 @@ intel_batchbuffer_emit_reloc64(struct brw_context *brw,
>>                                  uint32_t read_domains, uint32_t write_domain,
>>   			       uint32_t delta)
>>   {
>> -   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
>> -                                     buffer, delta,
>> -                                     read_domains, write_domain);
>> +   int ret = drm_intel_bo_emit_reloc_48bit(brw->batch.bo, 4*brw->batch.used,
>> +                                           buffer, delta,
>> +                                           read_domains, write_domain);
>
> I would have just exposed setting the flag on the execobject.  That way you
> still have existing userspace safe by default, can set a
> bufmgr-level flag to enable 48bit support by default and then
> individually turn off 48bit support for the couple of buffers that
> require it.
>
So, something more like v1? 
http://mid.gmane.org/1435062089-19877-2-git-send-email-michel.thierry@intel.com

(apart of the hacky macro name)

> Just my 2c because I have seen what trouble lying to the kernel about
> relocation values can cause and would rather not have that in the
> interface by design.
> -Chris
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range
  2015-07-02 13:53       ` Michel Thierry
@ 2015-07-02 14:05         ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2015-07-02 14:05 UTC (permalink / raw)
  To: Michel Thierry; +Cc: mesa-dev, intel-gfx, Ben Widawsky

On Thu, Jul 02, 2015 at 02:53:45PM +0100, Michel Thierry wrote:
> >I would have just exposed setting the flag on the execobject.  That way you
> >still have existing userspace safe by default, can set a
> >bufmgr-level flag to enable 48bit support by default and then
> >individually turn off 48bit support for the couple of buffers that
> >require it.
> >
> So, something more like v1? http://mid.gmane.org/1435062089-19877-2-git-send-email-michel.thierry@intel.com
> 
> (apart of the hacky macro name)

Yes. I value having those 2-dwords marked as being the relocation
address even if the top dword must be zero.

You can adopt the libdrm interface Ben suggested underneath the
macros, but having it marked as being both a 64-bit relocation and a w/a
in the source is pricless imo.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2015-07-02 14:05 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-23 12:21 [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Michel Thierry
2015-06-23 12:21 ` [PATCH mesa] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
2015-06-24  3:51   ` Ben Widawsky
2015-06-25 13:10     ` [Mesa-dev] " Michel Thierry
2015-06-23 12:21 ` [PATCH igt 1/2] lib: Update intel_require_memory to handle +4GB cases Michel Thierry
2015-06-24 10:20   ` Chris Wilson
2015-06-24 13:40   ` [PATCH igt v2] " Michel Thierry
2015-06-24 15:38     ` Chris Wilson
2015-06-25  7:46       ` Daniel Vetter
2015-06-26 16:46     ` [PATCH igt v3] " Michel Thierry
2015-06-23 12:21 ` [PATCH igt 2/2] tests/gem_ppgtt: Check Wa32bitOffsets workarounds Michel Thierry
2015-06-23 13:10   ` Chris Wilson
2015-06-26 15:00 ` [PATCH libdrm] intel: Add EXEC_OBJECT_SUPPORTS_48BADDRESS flag Dave Gordon
2015-07-01 15:28 ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Michel Thierry
2015-07-01 15:28   ` [PATCH libdrm v2 2/2] configure.ac: bump version to 2.4.63 Michel Thierry
2015-07-01 15:28   ` [PATCH mesa v2] i965/gen8+: bo in state base address must be in 32-bit address range Michel Thierry
2015-07-02  7:21     ` Chris Wilson
2015-07-02 13:53       ` Michel Thierry
2015-07-02 14:05         ` Chris Wilson
2015-07-01 17:06   ` [PATCH libdrm v2 1/2] intel: Add EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag Emil Velikov
2015-07-02 11:04     ` Michel Thierry

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.