AMD-GFX Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/4] Invalidate and flush caches at the beginning of every gfx IB
@ 2020-03-25 14:29 Andrey Grodzovsky
  2020-03-25 14:29 ` [PATCH 1/4] drm/amdgpu: Add new ring callback to insert memory sync Andrey Grodzovsky
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Andrey Grodzovsky @ 2020-03-25 14:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ken.Qiao, Marek.Olsak, Andrey Grodzovsky

This patchset introduces AQUIRE_MEM packet submission at the begining of each gfx IB
if requested by user mode client. This is helpful in solving issues with cache coherency
during amdgpu_test and Vulkan CTS tests.


Andrey Grodzovsky (4):
  drm/amdgpu: Add new ring callback to insert memory sync
  drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion
  drm/amdgpu: Add mem_sync implementation for all the ASICs.
  drm/amdgpu: Add a UAPI flag for user to call mem_sync

 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 27 +++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c    | 16 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c    | 16 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 17 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 22 ++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/nvd.h         | 48 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/soc15d.h      | 25 ++++++++++++++++-
 include/uapi/drm/amdgpu_drm.h            |  2 ++
 12 files changed, 178 insertions(+), 6 deletions(-)

-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/4] drm/amdgpu: Add new ring callback to insert memory sync
  2020-03-25 14:29 [PATCH 0/4] Invalidate and flush caches at the beginning of every gfx IB Andrey Grodzovsky
@ 2020-03-25 14:29 ` Andrey Grodzovsky
  2020-03-26  9:14   ` Christian König
  2020-03-25 14:29 ` [PATCH 2/4] drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion Andrey Grodzovsky
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: Andrey Grodzovsky @ 2020-03-25 14:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ken.Qiao, Marek.Olsak, Andrey Grodzovsky

Used to flush and invalidate various caches.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 448c76c..ef9c444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -171,6 +171,7 @@ struct amdgpu_ring_funcs {
 	/* Try to soft recover the ring to make the fence signal */
 	void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
 	int (*preempt_ib)(struct amdgpu_ring *ring);
+	void (*mem_sync)(struct amdgpu_ring *ring);
 };
 
 struct amdgpu_ring {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/4] drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion
  2020-03-25 14:29 [PATCH 0/4] Invalidate and flush caches at the beginning of every gfx IB Andrey Grodzovsky
  2020-03-25 14:29 ` [PATCH 1/4] drm/amdgpu: Add new ring callback to insert memory sync Andrey Grodzovsky
@ 2020-03-25 14:29 ` Andrey Grodzovsky
  2020-03-25 14:54   ` Luben Tuikov
  2020-03-25 14:29 ` [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs Andrey Grodzovsky
  2020-03-25 14:29 ` [PATCH 4/4] drm/amdgpu: Add a UAPI flag for user to call mem_sync Andrey Grodzovsky
  3 siblings, 1 reply; 10+ messages in thread
From: Andrey Grodzovsky @ 2020-03-25 14:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ken.Qiao, Marek.Olsak, Andrey Grodzovsky

Add this for gfx10 and gfx9.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nvd.h    | 48 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/soc15d.h | 25 ++++++++++++++++++-
 2 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index f3d8771..7785ea5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -256,6 +256,54 @@
 #define	PACKET3_BLK_CNTX_UPDATE				0x53
 #define	PACKET3_INCR_UPDT_STATE				0x55
 #define	PACKET3_ACQUIRE_MEM				0x58
+/* 1.  HEADER
+ * 2.  COHER_CNTL [30:0]
+ * 2.1 ENGINE_SEL [31:31]
+ * 2.  COHER_SIZE [31:0]
+ * 3.  COHER_SIZE_HI [7:0]
+ * 4.  COHER_BASE_LO [31:0]
+ * 5.  COHER_BASE_HI [23:0]
+ * 7.  POLL_INTERVAL [15:0]
+ * 8.  GCR_CNTL [18:0]
+ */
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
+		/*
+		 * 0:NOP
+		 * 1:ALL
+		 * 2:RANGE
+		 * 3:FIRST_LAST
+		 */
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
+		/*
+		 * 0:ALL
+		 * 1:reserved
+		 * 2:RANGE
+		 * 3:FIRST_LAST
+		 */
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
+		/*
+		 * 0:ALL
+		 * 1:VOL
+		 * 2:RANGE
+		 * 3:FIRST_LAST
+		 */
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x)  ((x) << 13)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
+#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
+		/*
+		 * 0: PARALLEL
+		 * 1: FORWARD
+		 * 2: REVERSE
+		 */
+#              define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA  (1 << 18)
 #define	PACKET3_REWIND					0x59
 #define	PACKET3_INTERRUPT				0x5A
 #define	PACKET3_GEN_PDEPTE				0x5B
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 295d68c..8983871 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -253,7 +253,30 @@
 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
-#define	PACKET3_AQUIRE_MEM				0x58
+#define	PACKET3_ACQUIRE_MEM				0x58
+/* 1.  HEADER
+ * 2.  COHER_CNTL [30:0]
+ * 2.1 ENGINE_SEL [31:31]
+ * 3.  COHER_SIZE [31:0]
+ * 4.  COHER_SIZE_HI [7:0]
+ * 5.  COHER_BASE_LO [31:0]
+ * 6.  COHER_BASE_HI [23:0]
+ * 7.  POLL_INTERVAL [15:0]
+ */
+/* COHER_CNTL fields for CP_COHER_CNTL */
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
+#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
 #define	PACKET3_REWIND					0x59
 #define	PACKET3_LOAD_UCONFIG_REG			0x5E
 #define	PACKET3_LOAD_SH_REG				0x5F
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.
  2020-03-25 14:29 [PATCH 0/4] Invalidate and flush caches at the beginning of every gfx IB Andrey Grodzovsky
  2020-03-25 14:29 ` [PATCH 1/4] drm/amdgpu: Add new ring callback to insert memory sync Andrey Grodzovsky
  2020-03-25 14:29 ` [PATCH 2/4] drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion Andrey Grodzovsky
@ 2020-03-25 14:29 ` Andrey Grodzovsky
  2020-03-26  6:08   ` 回复: " Qiao, Ken
  2020-03-25 14:29 ` [PATCH 4/4] drm/amdgpu: Add a UAPI flag for user to call mem_sync Andrey Grodzovsky
  3 siblings, 1 reply; 10+ messages in thread
From: Andrey Grodzovsky @ 2020-03-25 14:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ken.Qiao, Marek.Olsak, Andrey Grodzovsky

Implement the .mem_sync hook defined earlier.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 ++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c  | 16 +++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 16 +++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 17 ++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 22 +++++++++++++++++++++-
 5 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7f9ac1a1..d7f3177 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5236,6 +5236,29 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v10_0_mem_sync(struct amdgpu_ring *ring)
+{
+	unsigned gcr_cntl = PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+			    /* TODO is this eqvivalent to V_586_GLI_ALL ? */
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
 	.name = "gfx_v10_0",
 	.early_init = gfx_v10_0_early_init,
@@ -5283,7 +5306,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		8 + 8 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		8, /* gfx_v10_0_mem_sync */
 	.emit_ib_size =	4, /* gfx_v10_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -5304,6 +5328,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+	.mem_sync = gfx_v10_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 31f44d0..ced6459 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3466,6 +3466,18 @@ static int gfx_v6_0_set_powergating_state(void *handle,
 	return 0;
 }
 
+static void gfx_v6_0_mem_sync(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
 static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
 	.name = "gfx_v6_0",
 	.early_init = gfx_v6_0_early_init,
@@ -3496,7 +3508,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
-		3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+		3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+		5, /* SURFACE_SYNC */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence,
@@ -3507,6 +3520,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
 	.emit_wreg = gfx_v6_0_ring_emit_wreg,
+	.mem_sync = gfx_v6_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 733d398..88c54c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5001,6 +5001,18 @@ static int gfx_v7_0_set_powergating_state(void *handle,
 	return 0;
 }
 
+static void gfx_v7_0_mem_sync(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
 static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
 	.name = "gfx_v7_0",
 	.early_init = gfx_v7_0_early_init,
@@ -5033,7 +5045,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
-		3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+		3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+		5, /* SURFACE_SYNC */
 	.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
@@ -5048,6 +5061,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
 	.emit_wreg = gfx_v7_0_ring_emit_wreg,
 	.soft_recovery = gfx_v7_0_ring_soft_recovery,
+	.mem_sync = gfx_v7_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fc32586..0b1d3a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6815,6 +6815,19 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v8_0_mem_sync(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA |
+			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
 	.name = "gfx_v8_0",
 	.early_init = gfx_v8_0_early_init,
@@ -6861,7 +6874,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		12 + 12 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		5, /* SURFACE_SYNC */
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
@@ -6879,6 +6893,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
+	.mem_sync = gfx_v8_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fb567cf..f851e80 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6613,6 +6613,24 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v9_0_mem_sync(struct amdgpu_ring *ring)
+{
+	unsigned cp_coher_cntl = PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
 	.name = "gfx_v9_0",
 	.early_init = gfx_v9_0_early_init,
@@ -6659,7 +6677,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		8 + 8 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		7, /* gfx_v9_0_mem_sync */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6680,6 +6699,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
+	.mem_sync = gfx_v9_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 4/4] drm/amdgpu: Add a UAPI flag for user to call mem_sync
  2020-03-25 14:29 [PATCH 0/4] Invalidate and flush caches at the beginning of every gfx IB Andrey Grodzovsky
                   ` (2 preceding siblings ...)
  2020-03-25 14:29 ` [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs Andrey Grodzovsky
@ 2020-03-25 14:29 ` Andrey Grodzovsky
  2020-03-25 15:15   ` Alex Deucher
  3 siblings, 1 reply; 10+ messages in thread
From: Andrey Grodzovsky @ 2020-03-25 14:29 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ken.Qiao, Marek.Olsak, Andrey Grodzovsky

This flag used to avoid calling mem_sync without need.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 +++
 include/uapi/drm/amdgpu_drm.h           | 2 ++
 4 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8304d0c..d9ad841 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -238,6 +238,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 
 	if (p->uf_entry.tv.bo)
 		p->job->uf_addr = uf_offset;
+
+	p->job->sync_mem = cs->in.sync_mem;
+
 	kfree(chunk_array);
 
 	/* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index bece01f..9168150 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -182,6 +182,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dma_fence_put(tmp);
 	}

+	if (job && job->sync_mem && ring->funcs->mem_sync)
+		ring->funcs->mem_sync(ring);
+
 	if (ring->funcs->insert_start)
 		ring->funcs->insert_start(ring);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 2e2110d..7b08a04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -61,6 +61,9 @@ struct amdgpu_job {
 	/* user fence handling */
 	uint64_t		uf_addr;
 	uint64_t		uf_sequence;
+
+	/** UMD flag to flush and invalidate caches */
+	bool			sync_mem;
 };
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index cfbec27..f04998d 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -566,6 +566,8 @@ struct drm_amdgpu_cs_in {
 	__u32		flags;
 	/** this points to __u64 * which point to cs chunks */
 	__u64		chunks;
+	/** Tell KMD to flush and invalidate caches */
+	bool		sync_mem;
 };
 
 struct drm_amdgpu_cs_out {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion
  2020-03-25 14:29 ` [PATCH 2/4] drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion Andrey Grodzovsky
@ 2020-03-25 14:54   ` Luben Tuikov
  0 siblings, 0 replies; 10+ messages in thread
From: Luben Tuikov @ 2020-03-25 14:54 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx; +Cc: Ken.Qiao, Marek.Olsak

On 2020-03-25 10:29, Andrey Grodzovsky wrote:
> Add this for gfx10 and gfx9.
> 
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/nvd.h    | 48 +++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/soc15d.h | 25 ++++++++++++++++++-
>  2 files changed, 72 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
> index f3d8771..7785ea5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nvd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
> @@ -256,6 +256,54 @@
>  #define	PACKET3_BLK_CNTX_UPDATE				0x53
>  #define	PACKET3_INCR_UPDT_STATE				0x55
>  #define	PACKET3_ACQUIRE_MEM				0x58
> +/* 1.  HEADER
> + * 2.  COHER_CNTL [30:0]
> + * 2.1 ENGINE_SEL [31:31]
> + * 2.  COHER_SIZE [31:0]
> + * 3.  COHER_SIZE_HI [7:0]
> + * 4.  COHER_BASE_LO [31:0]
> + * 5.  COHER_BASE_HI [23:0]
> + * 7.  POLL_INTERVAL [15:0]
> + * 8.  GCR_CNTL [18:0]
> + */
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)

I think it's visually better to not break up the "#" and "define" and
to not bunch up "define" and the macro name, to intead look like this:

#define 	PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x)        ((x) << 0)

Which creates visual cadence, for the eyes to follow. It also creates
a vertical visual separation (reading down) since then the left column
is not just white space reading down, but breaks at the definition of
each register field. (once changed for all, you'll see it)

> +		/*
> +		 * 0:NOP
> +		 * 1:ALL
> +		 * 2:RANGE
> +		 * 3:FIRST_LAST
> +		 */
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
> +		/*
> +		 * 0:ALL
> +		 * 1:reserved
> +		 * 2:RANGE
> +		 * 3:FIRST_LAST
> +		 */
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
> +		/*
> +		 * 0:ALL
> +		 * 1:VOL
> +		 * 2:RANGE
> +		 * 3:FIRST_LAST
> +		 */
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x)  ((x) << 13)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
> +#              define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
> +		/*
> +		 * 0: PARALLEL
> +		 * 1: FORWARD
> +		 * 2: REVERSE
> +		 */
> +#              define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA  (1 << 18)
>  #define	PACKET3_REWIND					0x59
>  #define	PACKET3_INTERRUPT				0x5A
>  #define	PACKET3_GEN_PDEPTE				0x5B
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
> index 295d68c..8983871 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
> @@ -253,7 +253,30 @@
>  #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
>  #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
>  #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
> -#define	PACKET3_AQUIRE_MEM				0x58
> +#define	PACKET3_ACQUIRE_MEM				0x58
> +/* 1.  HEADER
> + * 2.  COHER_CNTL [30:0]
> + * 2.1 ENGINE_SEL [31:31]
> + * 3.  COHER_SIZE [31:0]
> + * 4.  COHER_SIZE_HI [7:0]
> + * 5.  COHER_BASE_LO [31:0]
> + * 6.  COHER_BASE_HI [23:0]
> + * 7.  POLL_INTERVAL [15:0]
> + */
> +/* COHER_CNTL fields for CP_COHER_CNTL */
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
> +#              define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)

The same sentiment here. There is no reason to break the "#" and the "define",
which is usually seen as "#define" by the language. Instead keeping
the "#" and the "define" together and then white-space to the macro name would
make for a better and move familiar visual cadence. Like,

#define		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x)           ((x) << 3)
#define		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x)           ((x) << 4)
#define		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5)
#define		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x)        ((x) << 15)
...


>  #define	PACKET3_REWIND					0x59
>  #define	PACKET3_LOAD_UCONFIG_REG			0x5E
>  #define	PACKET3_LOAD_SH_REG				0x5F

Like it is done here ^.

Regards,
Luben

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: Add a UAPI flag for user to call mem_sync
  2020-03-25 14:29 ` [PATCH 4/4] drm/amdgpu: Add a UAPI flag for user to call mem_sync Andrey Grodzovsky
@ 2020-03-25 15:15   ` Alex Deucher
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Deucher @ 2020-03-25 15:15 UTC (permalink / raw)
  To: Andrey Grodzovsky; +Cc: Ken.Qiao, Marek Olšák, amd-gfx list

On Wed, Mar 25, 2020 at 10:30 AM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
> This flag used to avoid calling mem_sync without need.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 3 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 4 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 +++
>  include/uapi/drm/amdgpu_drm.h           | 2 ++
>  4 files changed, 12 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 8304d0c..d9ad841 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -238,6 +238,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
>
>         if (p->uf_entry.tv.bo)
>                 p->job->uf_addr = uf_offset;
> +
> +       p->job->sync_mem = cs->in.sync_mem;
> +
>         kfree(chunk_array);
>
>         /* Use this opportunity to fill in task info for the vm */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index bece01f..9168150 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -182,6 +182,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>                 dma_fence_put(tmp);
>         }
>
> +       if (job && job->sync_mem && ring->funcs->mem_sync)
> +               ring->funcs->mem_sync(ring);
> +
>         if (ring->funcs->insert_start)
>                 ring->funcs->insert_start(ring);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> index 2e2110d..7b08a04 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -61,6 +61,9 @@ struct amdgpu_job {
>         /* user fence handling */
>         uint64_t                uf_addr;
>         uint64_t                uf_sequence;
> +
> +       /** UMD flag to flush and invalidate caches */
> +       bool                    sync_mem;
>  };
>
>  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index cfbec27..f04998d 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -566,6 +566,8 @@ struct drm_amdgpu_cs_in {
>         __u32           flags;
>         /** this points to __u64 * which point to cs chunks */
>         __u64           chunks;
> +       /** Tell KMD to flush and invalidate caches */
> +       bool            sync_mem;
>  };

I think it would be better to add this to drm_amdgpu_cs_chunk_ib.flags
or drm_amdgpu_cs_in.flags.

Alex

>
>  struct drm_amdgpu_cs_out {
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* 回复: [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.
  2020-03-25 14:29 ` [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs Andrey Grodzovsky
@ 2020-03-26  6:08   ` " Qiao, Ken
  2020-03-26  6:42     ` Olsak, Marek
  0 siblings, 1 reply; 10+ messages in thread
From: Qiao, Ken @ 2020-03-26  6:08 UTC (permalink / raw)
  To: Grodzovsky, Andrey, amd-gfx
  Cc: Grodzovsky, Andrey, Olsak, Marek, Zhou, David(ChunMing)

Hi Andrey,

Why not set the default value of sync_mem flag to true in KMD? So the legacy UMD driver can be compatible with it without any change. And if UMD doesn’t need ACQUIRE_MEM then can explicitly disable it by setting the flag to false.

Thanks,
Ken

-----邮件原件-----
发件人: Andrey Grodzovsky <andrey.grodzovsky@amd.com> 
发送时间: 2020年3月25日 22:30
收件人: amd-gfx@lists.freedesktop.org
抄送: Qiao, Ken <Ken.Qiao@amd.com>; Olsak, Marek <Marek.Olsak@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
主题: [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.

Implement the .mem_sync hook defined earlier.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 ++++++++++++++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c  | 16 +++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 16 +++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 17 ++++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 22 +++++++++++++++++++++-
 5 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7f9ac1a1..d7f3177 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5236,6 +5236,29 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v10_0_mem_sync(struct amdgpu_ring *ring) {
+	unsigned gcr_cntl = PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+			    /* TODO is this eqvivalent to V_586_GLI_ALL ? */
+			    PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ }
+
 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
 	.name = "gfx_v10_0",
 	.early_init = gfx_v10_0_early_init,
@@ -5283,7 +5306,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		8 + 8 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		8, /* gfx_v10_0_mem_sync */
 	.emit_ib_size =	4, /* gfx_v10_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v10_0_ring_emit_fence, @@ -5304,6 +5328,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+	.mem_sync = gfx_v10_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 31f44d0..ced6459 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3466,6 +3466,18 @@ static int gfx_v6_0_set_powergating_state(void *handle,
 	return 0;
 }
 
+static void gfx_v6_0_mem_sync(struct amdgpu_ring *ring) {
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }
+
 static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
 	.name = "gfx_v6_0",
 	.early_init = gfx_v6_0_early_init,
@@ -3496,7 +3508,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 		14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
 		SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
-		3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+		3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+		5, /* SURFACE_SYNC */
 	.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
 	.emit_ib = gfx_v6_0_ring_emit_ib,
 	.emit_fence = gfx_v6_0_ring_emit_fence, @@ -3507,6 +3520,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
 	.emit_wreg = gfx_v6_0_ring_emit_wreg,
+	.mem_sync = gfx_v6_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 733d398..88c54c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5001,6 +5001,18 @@ static int gfx_v7_0_set_powergating_state(void *handle,
 	return 0;
 }
 
+static void gfx_v7_0_mem_sync(struct amdgpu_ring *ring) {
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }
+
 static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
 	.name = "gfx_v7_0",
 	.early_init = gfx_v7_0_early_init,
@@ -5033,7 +5045,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
 		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
-		3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+		3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+		5, /* SURFACE_SYNC */
 	.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v7_0_ring_emit_fence_gfx, @@ -5048,6 +5061,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
 	.emit_wreg = gfx_v7_0_ring_emit_wreg,
 	.soft_recovery = gfx_v7_0_ring_soft_recovery,
+	.mem_sync = gfx_v7_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fc32586..0b1d3a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6815,6 +6815,19 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v8_0_mem_sync(struct amdgpu_ring *ring) {
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA |
+			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }
+
 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
 	.name = "gfx_v8_0",
 	.early_init = gfx_v8_0_early_init,
@@ -6861,7 +6874,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		12 + 12 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		5, /* SURFACE_SYNC */
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx, @@ -6879,6 +6893,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
+	.mem_sync = gfx_v8_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fb567cf..f851e80 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6613,6 +6613,24 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void gfx_v9_0_mem_sync(struct amdgpu_ring *ring) {
+	unsigned cp_coher_cntl = PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+				 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ }
+
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
 	.name = "gfx_v9_0",
 	.early_init = gfx_v9_0_early_init,
@@ -6659,7 +6677,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 		3 + /* CNTX_CTRL */
 		5 + /* HDP_INVL */
 		8 + 8 + /* FENCE x2 */
-		2, /* SWITCH_BUFFER */
+		2 + /* SWITCH_BUFFER */
+		7, /* gfx_v9_0_mem_sync */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v9_0_ring_emit_fence, @@ -6680,6 +6699,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
+	.mem_sync = gfx_v9_0_mem_sync,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.
  2020-03-26  6:08   ` 回复: " Qiao, Ken
@ 2020-03-26  6:42     ` Olsak, Marek
  0 siblings, 0 replies; 10+ messages in thread
From: Olsak, Marek @ 2020-03-26  6:42 UTC (permalink / raw)
  To: Qiao, Ken, Grodzovsky, Andrey, amd-gfx; +Cc: Zhou, David(ChunMing)

[-- Attachment #1.1: Type: text/plain, Size: 12911 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Hi Ken,

The KMD has never used ACQUIRE_MEM before. It has to be requested explicitly, because it's a change in KMD behavior.

Marek
________________________________
From: Qiao, Ken <Ken.Qiao@amd.com>
Sent: March 26, 2020 02:08
To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Olsak, Marek <Marek.Olsak@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Zhou, David(ChunMing) <David1.Zhou@amd.com>
Subject: 回复: [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.

Hi Andrey,

Why not set the default value of sync_mem flag to true in KMD? So the legacy UMD driver can be compatible with it without any change. And if UMD doesn’t need ACQUIRE_MEM then can explicitly disable it by setting the flag to false.

Thanks,
Ken

-----邮件原件-----
发件人: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
发送时间: 2020年3月25日 22:30
收件人: amd-gfx@lists.freedesktop.org
抄送: Qiao, Ken <Ken.Qiao@amd.com>; Olsak, Marek <Marek.Olsak@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
主题: [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.

Implement the .mem_sync hook defined earlier.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 ++++++++++++++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c  | 16 +++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 16 +++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 17 ++++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 22 +++++++++++++++++++++-
 5 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7f9ac1a1..d7f3177 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5236,6 +5236,29 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
         return 0;
 }

+static void gfx_v10_0_mem_sync(struct amdgpu_ring *ring) {
+       unsigned gcr_cntl = PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+                           /* TODO is this eqvivalent to V_586_GLI_ALL ? */
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+       /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+       amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+       amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+       amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+       amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+       amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+       amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ }
+
 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
         .name = "gfx_v10_0",
         .early_init = gfx_v10_0_early_init,
@@ -5283,7 +5306,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
                 3 + /* CNTX_CTRL */
                 5 + /* HDP_INVL */
                 8 + 8 + /* FENCE x2 */
-               2, /* SWITCH_BUFFER */
+               2 + /* SWITCH_BUFFER */
+               8, /* gfx_v10_0_mem_sync */
         .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
         .emit_ib = gfx_v10_0_ring_emit_ib_gfx,
         .emit_fence = gfx_v10_0_ring_emit_fence, @@ -5304,6 +5328,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
         .emit_wreg = gfx_v10_0_ring_emit_wreg,
         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
         .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+       .mem_sync = gfx_v10_0_mem_sync,
 };

 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 31f44d0..ced6459 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3466,6 +3466,18 @@ static int gfx_v6_0_set_powergating_state(void *handle,
         return 0;
 }

+static void gfx_v6_0_mem_sync(struct amdgpu_ring *ring) {
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+                         PACKET3_TC_ACTION_ENA |
+                         PACKET3_SH_KCACHE_ACTION_ENA |
+                         PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+       amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }
+
 static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
         .name = "gfx_v6_0",
         .early_init = gfx_v6_0_early_init,
@@ -3496,7 +3508,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
                 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
                 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
                 SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
-               3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+               3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */
+               5, /* SURFACE_SYNC */
         .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
         .emit_ib = gfx_v6_0_ring_emit_ib,
         .emit_fence = gfx_v6_0_ring_emit_fence, @@ -3507,6 +3520,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
         .insert_nop = amdgpu_ring_insert_nop,
         .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
         .emit_wreg = gfx_v6_0_ring_emit_wreg,
+       .mem_sync = gfx_v6_0_mem_sync,
 };

 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 733d398..88c54c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5001,6 +5001,18 @@ static int gfx_v7_0_set_powergating_state(void *handle,
         return 0;
 }

+static void gfx_v7_0_mem_sync(struct amdgpu_ring *ring) {
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+                         PACKET3_TC_ACTION_ENA |
+                         PACKET3_SH_KCACHE_ACTION_ENA |
+                         PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+       amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }
+
 static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
         .name = "gfx_v7_0",
         .early_init = gfx_v7_0_early_init,
@@ -5033,7 +5045,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
                 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
                 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
                 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
-               3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+               3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+               5, /* SURFACE_SYNC */
         .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
         .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
         .emit_fence = gfx_v7_0_ring_emit_fence_gfx, @@ -5048,6 +5061,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
         .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
         .emit_wreg = gfx_v7_0_ring_emit_wreg,
         .soft_recovery = gfx_v7_0_ring_soft_recovery,
+       .mem_sync = gfx_v7_0_mem_sync,
 };

 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fc32586..0b1d3a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6815,6 +6815,19 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
         return 0;
 }

+static void gfx_v8_0_mem_sync(struct amdgpu_ring *ring) {
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+                         PACKET3_TC_ACTION_ENA |
+                         PACKET3_SH_KCACHE_ACTION_ENA |
+                         PACKET3_SH_ICACHE_ACTION_ENA |
+                         PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
+       amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }
+
 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
         .name = "gfx_v8_0",
         .early_init = gfx_v8_0_early_init,
@@ -6861,7 +6874,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
                 3 + /* CNTX_CTRL */
                 5 + /* HDP_INVL */
                 12 + 12 + /* FENCE x2 */
-               2, /* SWITCH_BUFFER */
+               2 + /* SWITCH_BUFFER */
+               5, /* SURFACE_SYNC */
         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
         .emit_fence = gfx_v8_0_ring_emit_fence_gfx, @@ -6879,6 +6893,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
         .emit_wreg = gfx_v8_0_ring_emit_wreg,
         .soft_recovery = gfx_v8_0_ring_soft_recovery,
+       .mem_sync = gfx_v8_0_mem_sync,
 };

 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fb567cf..f851e80 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6613,6 +6613,24 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
         return 0;
 }

+static void gfx_v9_0_mem_sync(struct amdgpu_ring *ring) {
+       unsigned cp_coher_cntl = PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+       /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+       amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+       amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+       amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+       amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+       amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ }
+
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
         .name = "gfx_v9_0",
         .early_init = gfx_v9_0_early_init,
@@ -6659,7 +6677,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
                 3 + /* CNTX_CTRL */
                 5 + /* HDP_INVL */
                 8 + 8 + /* FENCE x2 */
-               2, /* SWITCH_BUFFER */
+               2 + /* SWITCH_BUFFER */
+               7, /* gfx_v9_0_mem_sync */
         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
         .emit_fence = gfx_v9_0_ring_emit_fence, @@ -6680,6 +6699,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
         .soft_recovery = gfx_v9_0_ring_soft_recovery,
+       .mem_sync = gfx_v9_0_mem_sync,
 };

 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
--
2.7.4


[-- Attachment #1.2: Type: text/html, Size: 24456 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: Add new ring callback to insert memory sync
  2020-03-25 14:29 ` [PATCH 1/4] drm/amdgpu: Add new ring callback to insert memory sync Andrey Grodzovsky
@ 2020-03-26  9:14   ` Christian König
  0 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2020-03-26  9:14 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx; +Cc: Ken.Qiao, Marek.Olsak

Am 25.03.20 um 15:29 schrieb Andrey Grodzovsky:
> Used to flush and invalidate various caches.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
>   1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 448c76c..ef9c444 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -171,6 +171,7 @@ struct amdgpu_ring_funcs {
>   	/* Try to soft recover the ring to make the fence signal */
>   	void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
>   	int (*preempt_ib)(struct amdgpu_ring *ring);
> +	void (*mem_sync)(struct amdgpu_ring *ring);

Maybe stick with the naming and call this "emit_mem_sync", apart from 
that looks like it could work to me.

Christian.

>   };
>   
>   struct amdgpu_ring {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, back to index

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-25 14:29 [PATCH 0/4] Invalidate and flush caches at the beginning of every gfx IB Andrey Grodzovsky
2020-03-25 14:29 ` [PATCH 1/4] drm/amdgpu: Add new ring callback to insert memory sync Andrey Grodzovsky
2020-03-26  9:14   ` Christian König
2020-03-25 14:29 ` [PATCH 2/4] drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion Andrey Grodzovsky
2020-03-25 14:54   ` Luben Tuikov
2020-03-25 14:29 ` [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs Andrey Grodzovsky
2020-03-26  6:08   ` 回复: " Qiao, Ken
2020-03-26  6:42     ` Olsak, Marek
2020-03-25 14:29 ` [PATCH 4/4] drm/amdgpu: Add a UAPI flag for user to call mem_sync Andrey Grodzovsky
2020-03-25 15:15   ` Alex Deucher

AMD-GFX Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/amd-gfx/0 amd-gfx/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 amd-gfx amd-gfx/ https://lore.kernel.org/amd-gfx \
		amd-gfx@lists.freedesktop.org
	public-inbox-index amd-gfx

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.freedesktop.lists.amd-gfx


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git