All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg
@ 2018-01-26 20:13 Christian König
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

That got mixed up with the encode ring function.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 44c041a1fe68..24ebc3e296a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -880,6 +880,22 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
 }
 
+static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+					uint32_t reg, uint32_t val)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	amdgpu_ring_write(ring,
+		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+	amdgpu_ring_write(ring, reg << 2);
+	amdgpu_ring_write(ring,
+		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+	amdgpu_ring_write(ring, val);
+	amdgpu_ring_write(ring,
+		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+	amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
+}
+
 /**
  * vcn_v1_0_enc_ring_get_rptr - get enc read pointer
  *
@@ -1097,7 +1113,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = amdgpu_vcn_ring_begin_use,
 	.end_use = amdgpu_vcn_ring_end_use,
-	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
+	.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
 };
 
 static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1124,6 +1140,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = amdgpu_vcn_ring_begin_use,
 	.end_use = amdgpu_vcn_ring_end_use,
+	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
 };
 
 static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 02/13] drm/amdgpu: drop root shadow sync
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-26 20:13   ` Christian König
       [not found]     ` <20180126201326.8829-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-26 20:13   ` [PATCH 03/13] drm/amdgpu: add new emit_reg_wait callback Christian König
                     ` (11 subsequent siblings)
  12 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Completely pointless, it is the same reservation object as the root PD
anyway.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a3b9c3976eb3..5e53b7a2d4d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -956,11 +956,6 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
 				 AMDGPU_FENCE_OWNER_VM, false);
-		if (root->shadow)
-			amdgpu_sync_resv(adev, &job->sync,
-					 root->shadow->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM, false);
-
 		WARN_ON(params.ib->length_dw > ndw);
 		r = amdgpu_job_submit(job, ring, &vm->entity,
 				      AMDGPU_FENCE_OWNER_VM, &fence);
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 03/13] drm/amdgpu: add new emit_reg_wait callback
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-26 20:13   ` [PATCH 02/13] drm/amdgpu: drop root shadow sync Christian König
@ 2018-01-26 20:13   ` Christian König
  2018-01-26 20:13   ` [PATCH 04/13] drm/amdgpu: add gfx_v9_0_ring_emit_reg_wait implementation Christian König
                     ` (10 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Allows us to wait for a register value/mask on a ring.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d7930f3ead33..787f79c80b6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1796,6 +1796,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 70d05ec7bc07..867f53332305 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -150,6 +150,8 @@ struct amdgpu_ring_funcs {
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
+			      uint32_t val, uint32_t mask);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 04/13] drm/amdgpu: add gfx_v9_0_ring_emit_reg_wait implementation
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-26 20:13   ` [PATCH 02/13] drm/amdgpu: drop root shadow sync Christian König
  2018-01-26 20:13   ` [PATCH 03/13] drm/amdgpu: add new emit_reg_wait callback Christian König
@ 2018-01-26 20:13   ` Christian König
  2018-01-26 20:13   ` [PATCH 05/13] drm/amdgpu: implement sdma_v4_0_ring_emit_reg_wait Christian König
                     ` (9 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Implement emit_reg_wait for gfx v9.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e5d5341c459a..801d4a1dd7db 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4003,6 +4003,12 @@ static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
 	amdgpu_ring_write(ring, val);
 }
 
+static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+					uint32_t val, uint32_t mask)
+{
+	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -4321,6 +4327,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4352,6 +4359,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.set_priority = gfx_v9_0_ring_set_priority_compute,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4379,6 +4387,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 };
 
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 05/13] drm/amdgpu: implement sdma_v4_0_ring_emit_reg_wait
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 04/13] drm/amdgpu: add gfx_v9_0_ring_emit_reg_wait implementation Christian König
@ 2018-01-26 20:13   ` Christian König
  2018-01-26 20:13   ` [PATCH 06/13] drm/amdgpu: implement uvd_v7_0_(enc_|)ring_emit_reg_wait Christian König
                     ` (8 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Add emit_reg_wait implementation for SDMA v4.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 8505458d7041..e1ae39f86adf 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1152,6 +1152,20 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, val);
 }
 
+static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+					 uint32_t val, uint32_t mask)
+{
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+	amdgpu_ring_write(ring, reg << 2);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val); /* reference */
+	amdgpu_ring_write(ring, mask); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
 static int sdma_v4_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1588,6 +1602,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 	.insert_nop = sdma_v4_0_ring_insert_nop,
 	.pad_ib = sdma_v4_0_ring_pad_ib,
 	.emit_wreg = sdma_v4_0_ring_emit_wreg,
+	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
 };
 
 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 06/13] drm/amdgpu: implement uvd_v7_0_(enc_|)ring_emit_reg_wait
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 05/13] drm/amdgpu: implement sdma_v4_0_ring_emit_reg_wait Christian König
@ 2018-01-26 20:13   ` Christian König
       [not found]     ` <20180126201326.8829-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-26 20:13   ` [PATCH 07/13] drm/amdgpu: implement vce_v4_0_emit_reg_wait Christian König
                     ` (7 subsequent siblings)
  12 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Add emit_reg_wait implementation for UVD v7.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 40 +++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index d317c764cc91..b8fbc7dc626f 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1241,17 +1241,17 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, 8);
 }
 
-static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
-				uint32_t data0, uint32_t data1, uint32_t mask)
+static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+					uint32_t val, uint32_t mask)
 {
 	struct amdgpu_device *adev = ring->adev;
 
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
-	amdgpu_ring_write(ring, data0);
+	amdgpu_ring_write(ring, reg << 2);
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
-	amdgpu_ring_write(ring, data1);
+	amdgpu_ring_write(ring, val);
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
 	amdgpu_ring_write(ring, mask);
@@ -1271,16 +1271,16 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
 	/* wait for reg writes */
-	data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
+	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
 	data1 = lower_32_bits(pd_addr);
 	mask = 0xffffffff;
-	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
+	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
 
 	/* wait for flush */
-	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
+	data0 = hub->vm_inv_eng0_ack + eng;
 	data1 = 1 << vmid;
 	mask =  1 << vmid;
-	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
+	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
 }
 
 static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -1308,16 +1308,12 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
 	/* wait for reg writes */
-	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
-	amdgpu_ring_write(ring, 0xffffffff);
-	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+	amdgpu_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
+				  lower_32_bits(pd_addr), 0xffffffff);
 
 	/* wait for flush */
-	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-	amdgpu_ring_write(ring, 1 << vmid);
-	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+				  1 << vmid, 1 << vmid);
 }
 
 static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1328,6 +1324,16 @@ static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, val);
 }
 
+static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
+					    uint32_t reg, uint32_t val,
+					    uint32_t mask)
+{
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	reg << 2);
+	amdgpu_ring_write(ring, mask);
+	amdgpu_ring_write(ring, val);
+}
+
 #if 0
 static bool uvd_v7_0_is_idle(void *handle)
 {
@@ -1676,6 +1682,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.begin_use = amdgpu_uvd_ring_begin_use,
 	.end_use = amdgpu_uvd_ring_end_use,
 	.emit_wreg = uvd_v7_0_ring_emit_wreg,
+	.emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
@@ -1704,6 +1711,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.begin_use = amdgpu_uvd_ring_begin_use,
 	.end_use = amdgpu_uvd_ring_end_use,
 	.emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
+	.emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
 };
 
 static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 07/13] drm/amdgpu: implement vce_v4_0_emit_reg_wait
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 06/13] drm/amdgpu: implement uvd_v7_0_(enc_|)ring_emit_reg_wait Christian König
@ 2018-01-26 20:13   ` Christian König
       [not found]     ` <20180126201326.8829-7-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-26 20:13   ` [PATCH 08/13] drm/amdgpu: implement vcn_v1_0_(dec|enc)_ring_emit_reg_wait Christian König
                     ` (6 subsequent siblings)
  12 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Add emit_reg_wait implementation for VCE v4.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index e62a24b90aaf..263dd920be92 100755
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -975,16 +975,12 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
 	/* wait for reg writes */
-	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
-	amdgpu_ring_write(ring, 0xffffffff);
-	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+	amdgpu_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
+				  lower_32_bits(pd_addr), 0xffffffff);
 
 	/* wait for flush */
-	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-	amdgpu_ring_write(ring, 1 << vmid);
-	amdgpu_ring_write(ring, 1 << vmid);
+	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+				  1 << vmid, 1 << vmid);
 }
 
 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
@@ -995,6 +991,15 @@ static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, val);
 }
 
+static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+				   uint32_t val, uint32_t mask)
+{
+	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	reg << 2);
+	amdgpu_ring_write(ring, mask);
+	amdgpu_ring_write(ring, val);
+}
+
 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					unsigned type,
@@ -1079,6 +1084,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.begin_use = amdgpu_vce_ring_begin_use,
 	.end_use = amdgpu_vce_ring_end_use,
 	.emit_wreg = vce_v4_0_emit_wreg,
+	.emit_reg_wait = vce_v4_0_emit_reg_wait,
 };
 
 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 08/13] drm/amdgpu: implement vcn_v1_0_(dec|enc)_ring_emit_reg_wait
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 07/13] drm/amdgpu: implement vce_v4_0_emit_reg_wait Christian König
@ 2018-01-26 20:13   ` Christian König
  2018-01-26 20:13   ` [PATCH 09/13] drm/amdgpu: move waiting for VM flush into gmc_v9_0_emit_flush_gpu_tlb Christian König
                     ` (5 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Add emit_reg_wait implementation for VCN v1.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 24ebc3e296a6..da405cb08f99 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -838,17 +838,18 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
-static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
-				uint32_t data0, uint32_t data1, uint32_t mask)
+static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
+					    uint32_t reg, uint32_t val,
+					    uint32_t mask)
 {
 	struct amdgpu_device *adev = ring->adev;
 
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
-	amdgpu_ring_write(ring, data0);
+	amdgpu_ring_write(ring, reg << 2);
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
-	amdgpu_ring_write(ring, data1);
+	amdgpu_ring_write(ring, val);
 	amdgpu_ring_write(ring,
 		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
 	amdgpu_ring_write(ring, mask);
@@ -868,16 +869,16 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
 	/* wait for register write */
-	data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
+	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
 	data1 = lower_32_bits(pd_addr);
 	mask = 0xffffffff;
-	vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
+	vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
 
 	/* wait for flush */
-	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
+	data0 = hub->vm_inv_eng0_ack + eng;
 	data1 = 1 << vmid;
 	mask =  1 << vmid;
-	vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
+	vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
 }
 
 static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1023,6 +1024,16 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, val);
 }
 
+static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
+					    uint32_t reg, uint32_t val,
+					    uint32_t mask)
+{
+	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, reg << 2);
+	amdgpu_ring_write(ring, mask);
+	amdgpu_ring_write(ring, val);
+}
+
 static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					unsigned type,
@@ -1114,6 +1125,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.begin_use = amdgpu_vcn_ring_begin_use,
 	.end_use = amdgpu_vcn_ring_end_use,
 	.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
+	.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1141,6 +1153,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
 	.begin_use = amdgpu_vcn_ring_begin_use,
 	.end_use = amdgpu_vcn_ring_end_use,
 	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
+	.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
 };
 
 static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 09/13] drm/amdgpu: move waiting for VM flush into gmc_v9_0_emit_flush_gpu_tlb
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (6 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 08/13] drm/amdgpu: implement vcn_v1_0_(dec|enc)_ring_emit_reg_wait Christian König
@ 2018-01-26 20:13   ` Christian König
  2018-01-26 20:13   ` [PATCH 10/13] drm/amdgpu: enable VMID PASID mapping in the ATC Christian König
                     ` (4 subsequent siblings)
  12 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Keep that at a common place instead of spread over all engines.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 19 +++++++++----------
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  4 ++++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 18 +++---------------
 drivers/gpu/drm/amd/amdgpu/soc15.h     |  3 ++-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 20 ++++++--------------
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  9 +++------
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c  | 22 ++++++----------------
 7 files changed, 33 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 801d4a1dd7db..f7363f821cff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3679,15 +3679,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vmid, unsigned pasid,
 					uint64_t pd_addr)
 {
-	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	unsigned eng = ring->vm_inv_eng;
-
 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
-	/* wait for the invalidate to complete */
-	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + eng,
-			      0, 1 << vmid, 1 << vmid, 0x20);
-
 	/* compute doesn't have PFP */
 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
@@ -4295,7 +4288,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
 		4 + /* double SWITCH_BUFFER,
@@ -4344,7 +4339,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* gfx_v9_0_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -4376,7 +4373,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* gfx_v9_0_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2b251df94684..2c60981d2eec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -395,6 +395,10 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 
 	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 
+	/* wait for the invalidate to complete */
+	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+				  1 << vmid, 1 << vmid);
+
 	return pd_addr;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index e1ae39f86adf..ce599fd24412 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1126,21 +1126,7 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					 unsigned vmid, unsigned pasid,
 					 uint64_t pd_addr)
 {
-	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	unsigned eng = ring->vm_inv_eng;
-
 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
-
-	/* wait for flush */
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
-	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, 1 << vmid); /* reference */
-	amdgpu_ring_write(ring, 1 << vmid); /* mask */
-	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
 static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1589,7 +1575,9 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 		6 + /* sdma_v4_0_ring_emit_hdp_flush */
 		3 + /* hdp invalidate */
 		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v4_0_ring_emit_vm_flush */
+		/* sdma_v4_0_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
 		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
 	.emit_ib = sdma_v4_0_ring_emit_ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 0f63ed674d0b..f70da8a29f86 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,7 +27,8 @@
 #include "nbio_v6_1.h"
 #include "nbio_v7_0.h"
 
-#define SOC15_FLUSH_GPU_TLB_NUM_WREG	4
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG		4
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	1
 
 extern const struct amd_ip_funcs soc15_common_ip_funcs;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index b8fbc7dc626f..ae5fb6d73794 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1265,7 +1265,6 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	unsigned eng = ring->vm_inv_eng;
 	uint32_t data0, data1, mask;
 
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
@@ -1275,12 +1274,6 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	data1 = lower_32_bits(pd_addr);
 	mask = 0xffffffff;
 	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
-
-	/* wait for flush */
-	data0 = hub->vm_inv_eng0_ack + eng;
-	data1 = 1 << vmid;
-	mask =  1 << vmid;
-	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
 }
 
 static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -1303,17 +1296,12 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					    uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
 	/* wait for reg writes */
 	amdgpu_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
 				  lower_32_bits(pd_addr), 0xffffffff);
-
-	/* wait for flush */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
 }
 
 static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1669,7 +1657,9 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.set_wptr = uvd_v7_0_ring_set_wptr,
 	.emit_frame_size =
 		6 + 6 + /* hdp flush / invalidate */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 16 + /* uvd_v7_0_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+		8 + /* uvd_v7_0_ring_emit_vm_flush */
 		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
 	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
 	.emit_ib = uvd_v7_0_ring_emit_ib,
@@ -1696,7 +1686,9 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
 	.emit_frame_size =
 		3 + 3 + /* hdp flush / invalidate */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* uvd_v7_0_enc_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+		4 + /* uvd_v7_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
 		1, /* uvd_v7_0_enc_ring_insert_end */
 	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 263dd920be92..3ca8129d7620 100755
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -970,17 +970,12 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 				   uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
 	/* wait for reg writes */
 	amdgpu_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
 				  lower_32_bits(pd_addr), 0xffffffff);
-
-	/* wait for flush */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
 }
 
 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
@@ -1069,7 +1064,9 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.set_wptr = vce_v4_0_ring_set_wptr,
 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
 	.emit_frame_size =
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* vce_v4_0_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+		4 + /* vce_v4_0_emit_vm_flush */
 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
 		1, /* vce_v4_0_ring_insert_end */
 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index da405cb08f99..db33d71def05 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -863,7 +863,6 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					    uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	unsigned eng = ring->vm_inv_eng;
 	uint32_t data0, data1, mask;
 
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
@@ -873,12 +872,6 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	data1 = lower_32_bits(pd_addr);
 	mask = 0xffffffff;
 	vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
-
-	/* wait for flush */
-	data0 = hub->vm_inv_eng0_ack + eng;
-	data1 = 1 << vmid;
-	mask =  1 << vmid;
-	vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
 }
 
 static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -998,7 +991,6 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					    uint64_t pd_addr)
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
-	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
 
@@ -1008,12 +1000,6 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			  (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
 	amdgpu_ring_write(ring, 0xffffffff);
 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-	/* wait for flush */
-	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
-	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-	amdgpu_ring_write(ring, 1 << vmid);
-	amdgpu_ring_write(ring, 1 << vmid);
 }
 
 static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1109,7 +1095,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.set_wptr = vcn_v1_0_dec_ring_set_wptr,
 	.emit_frame_size =
 		6 + 6 + /* hdp invalidate / flush */
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 16 + /* vcn_v1_0_dec_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+		8 + /* vcn_v1_0_dec_ring_emit_vm_flush */
 		14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */
 		6,
 	.emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */
@@ -1138,7 +1126,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
 	.get_wptr = vcn_v1_0_enc_ring_get_wptr,
 	.set_wptr = vcn_v1_0_enc_ring_set_wptr,
 	.emit_frame_size =
-		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* vcn_v1_0_enc_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+		4 + /* vcn_v1_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */
 		1, /* vcn_v1_0_enc_ring_insert_end */
 	.emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 10/13] drm/amdgpu: enable VMID PASID mapping in the ATC
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (7 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 09/13] drm/amdgpu: move waiting for VM flush into gmc_v9_0_emit_flush_gpu_tlb Christian König
@ 2018-01-26 20:13   ` Christian König
       [not found]     ` <20180126201326.8829-10-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-26 20:13   ` [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option Christian König
                     ` (3 subsequent siblings)
  12 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Update the PASID in the ATC as well and wait for the update to finish.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/soc15.h    |  4 ++--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2c60981d2eec..0077db0a451f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -33,6 +33,7 @@
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
 #include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
 #include "oss/osssys_4_0_offset.h"
 
 #include "soc15.h"
@@ -375,7 +376,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
 	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
-	uint32_t reg;
+	uint32_t reg, val;
 
 	amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
 	pd_addr |= flags;
@@ -393,8 +394,26 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 
 	amdgpu_ring_emit_wreg(ring, reg, pasid);
 
+	if (ring->funcs->vmhub == AMDGPU_GFXHUB)
+		reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid;
+	else
+		reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid;
+
+	val = 0;
+	val = REG_SET_FIELD(val, ATC_VMID0_PASID_MAPPING, PASID, pasid);
+	val = REG_SET_FIELD(val, ATC_VMID0_PASID_MAPPING, VALID, 1);
+	amdgpu_ring_emit_wreg(ring, reg, val);
+
 	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 
+	/* wait for the ATC to complete */
+	reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID_PASID_MAPPING_UPDATE_STATUS);
+	if (ring->funcs->vmhub == AMDGPU_GFXHUB)
+		val = 0x1 << vmid;
+	else
+		val = 0x10000 << vmid;
+	amdgpu_ring_emit_reg_wait(ring, reg, val, val);
+
 	/* wait for the invalidate to complete */
 	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
 				  1 << vmid, 1 << vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index f70da8a29f86..1b8833503f4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,8 +27,8 @@
 #include "nbio_v6_1.h"
 #include "nbio_v7_0.h"
 
-#define SOC15_FLUSH_GPU_TLB_NUM_WREG		4
-#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	1
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG		5
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	2
 
 extern const struct amd_ip_funcs soc15_common_ip_funcs;
 
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (8 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 10/13] drm/amdgpu: enable VMID PASID mapping in the ATC Christian König
@ 2018-01-26 20:13   ` Christian König
       [not found]     ` <20180126201326.8829-11-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-26 20:13   ` [PATCH 12/13] drm/amdgpu: add amdgpu_pasid_bind/undbing Christian König
                     ` (2 subsequent siblings)
  12 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Move amd_iommu_v2 initialization into amdgpu when it is enabled.

This is WIP and really ugly since amdgpu should not depend directly on
amd_iommu_v2.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Kconfig         |  8 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 11 ++++++++++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index e8af1f5e8a79..7d3fdcbf0acb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -40,5 +40,13 @@ config DRM_AMDGPU_GART_DEBUGFS
 	  Selecting this option creates a debugfs file to inspect the mapped
 	  pages. Uses more memory for housekeeping, enable only for debugging.
 
+config DRM_AMDGPU_ATC
+	bool "Enable the ATC to provide SVM support"
+	depends on DRM_AMDGPU
+	depends on AMD_IOMMU_V2
+	default y
+	help
+		This enables support for the ATC to provide a shared virtual memory implementation.
+
 source "drivers/gpu/drm/amd/acp/Kconfig"
 source "drivers/gpu/drm/amd/display/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 84281ee34a25..04205236cc5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -35,6 +35,7 @@
 #include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
+#include <linux/amd-iommu.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_i2c.h"
@@ -1355,6 +1356,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		adev->ip_blocks[i].status.hw = true;
 	}
 
+#ifdef CONFIG_DRM_AMDGPU_ATC
+	r = amd_iommu_init_device(adev->pdev, 0x10000);
+	if (r)
+		DRM_ERROR("amd_iommu_init_device error %d\n", r);
+#endif
+
 	amdgpu_amdkfd_device_init(adev);
 
 	if (amdgpu_sriov_vf(adev))
@@ -1428,6 +1435,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	int i, r;
 
 	amdgpu_amdkfd_device_fini(adev);
+
+#ifdef CONFIG_DRM_AMDGPU_ATC
+	amd_iommu_free_device(adev->pdev);
+#endif
+
 	/* need to disable SMC first */
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.hw)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index a8fa33a08de3..7ed090f7c5f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -373,7 +373,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
 
 	amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
 	amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
+
+#ifndef CONFIG_DRM_AMDGPU_ATC
 	amd_iommu_free_device(kfd->pdev);
+#endif
 }
 
 int kgd2kfd_resume(struct kfd_dev *kfd)
@@ -388,11 +391,15 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
 static int kfd_resume(struct kfd_dev *kfd)
 {
 	int err = 0;
+
+#ifndef CONFIG_DRM_AMDGPU_ATC
 	unsigned int pasid_limit = kfd_get_pasid_limit();
 
 	err = amd_iommu_init_device(kfd->pdev, pasid_limit);
 	if (err)
 		return -ENXIO;
+#endif
+
 	amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
 					iommu_pasid_shutdown_callback);
 	amd_iommu_set_invalid_ppr_cb(kfd->pdev,
@@ -414,8 +421,10 @@ static int kfd_resume(struct kfd_dev *kfd)
 
 dqm_start_error:
 processes_bind_error:
-	amd_iommu_free_device(kfd->pdev);
 
+#ifndef CONFIG_DRM_AMDGPU_ATC
+	amd_iommu_free_device(kfd->pdev);
+#endif
 	return err;
 }
 
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 12/13] drm/amdgpu: add amdgpu_pasid_bind/undbing
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (9 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option Christian König
@ 2018-01-26 20:13   ` Christian König
  2018-01-26 20:13   ` [PATCH 13/13] drm/amdgpu: add AMDGPU_VM_OP_ENABLE_SVM IOCTL Christian König
  2018-01-29 23:01   ` [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg Felix Kuehling
  12 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Allow us to bind a PASID to the current process and unbind it when the
VM isn't used any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 28 ++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  2 +-
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index c13cf7e79b2e..8cfdb07a4439 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -24,6 +24,7 @@
 
 #include <linux/idr.h>
 #include <linux/dma-fence-array.h>
+#include <linux/amd-iommu.h>
 #include <drm/drmP.h>
 
 #include "amdgpu.h"
@@ -43,6 +44,7 @@ static DEFINE_IDA(amdgpu_pasid_ida);
 /* Helper to free pasid from a fence callback */
 struct amdgpu_pasid_cb {
 	struct dma_fence_cb cb;
+	struct pci_dev *pdev;
 	unsigned int pasid;
 };
 
@@ -85,12 +87,30 @@ void amdgpu_pasid_free(unsigned int pasid)
 	ida_simple_remove(&amdgpu_pasid_ida, pasid);
 }
 
+int amdgpu_pasid_bind(struct pci_dev *pdev, unsigned int pasid)
+{
+#ifdef CONFIG_DRM_AMDGPU_ATC
+	return amd_iommu_bind_pasid(pdev, pasid, current);
+#else
+	return -ENODEV;
+#endif
+}
+
+void amdgpu_pasid_unbind(struct pci_dev *pdev, unsigned int pasid)
+{
+#ifdef CONFIG_DRM_AMDGPU_ATC
+	if (pdev)
+		amd_iommu_unbind_pasid(pdev, pasid);
+#endif
+}
+
 static void amdgpu_pasid_free_cb(struct dma_fence *fence,
 				 struct dma_fence_cb *_cb)
 {
 	struct amdgpu_pasid_cb *cb =
 		container_of(_cb, struct amdgpu_pasid_cb, cb);
 
+	amdgpu_pasid_unbind(cb->pdev, cb->pasid);
 	amdgpu_pasid_free(cb->pasid);
 	dma_fence_put(fence);
 	kfree(cb);
@@ -100,11 +120,13 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,
  * amdgpu_pasid_free_delayed - free pasid when fences signal
  *
  * @resv: reservation object with the fences to wait for
+ * @pdev: optional pci device to unbind the PASID from
  * @pasid: pasid to free
  *
  * Free the pasid only after all the fences in resv are signaled.
  */
 void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+			       struct pci_dev *pdev,
 			       unsigned int pasid)
 {
 	struct dma_fence *fence, **fences;
@@ -117,6 +139,7 @@ void amdgpu_pasid_free_delayed(struct reservation_object *resv,
 		goto fallback;
 
 	if (count == 0) {
+		amdgpu_pasid_unbind(pdev, pasid);
 		amdgpu_pasid_free(pasid);
 		return;
 	}
@@ -140,10 +163,10 @@ void amdgpu_pasid_free_delayed(struct reservation_object *resv,
 	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
 	if (!cb) {
 		/* Last resort when we are OOM */
-		dma_fence_wait(fence, false);
 		dma_fence_put(fence);
-		amdgpu_pasid_free(pasid);
+		goto fallback;
 	} else {
+		cb->pdev = pdev;
 		cb->pasid = pasid;
 		if (dma_fence_add_callback(fence, &cb->cb,
 					   amdgpu_pasid_free_cb))
@@ -158,6 +181,7 @@ void amdgpu_pasid_free_delayed(struct reservation_object *resv,
 	 */
 	reservation_object_wait_timeout_rcu(resv, true, false,
 					    MAX_SCHEDULE_TIMEOUT);
+	amdgpu_pasid_unbind(pdev, pasid);
 	amdgpu_pasid_free(pasid);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 38f37c16fc5e..ef88fc4f21fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -69,7 +69,10 @@ struct amdgpu_vmid_mgr {
 
 int amdgpu_pasid_alloc(unsigned int bits);
 void amdgpu_pasid_free(unsigned int pasid);
+int amdgpu_pasid_bind(struct pci_dev *pdev, unsigned int pasid);
+void amdgpu_pasid_unbind(struct pci_dev *pdev, unsigned int pasid);
 void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+			       struct pci_dev *pdev,
 			       unsigned int pasid);
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 431038885778..b18920007624 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -927,7 +927,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 	amdgpu_vm_fini(adev, &fpriv->vm);
 	if (pasid)
-		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
+		amdgpu_pasid_free_delayed(pd->tbo.resv, NULL, pasid);
 	amdgpu_bo_unref(&pd);
 
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 13/13] drm/amdgpu: add AMDGPU_VM_OP_ENABLE_SVM IOCTL
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (10 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 12/13] drm/amdgpu: add amdgpu_pasid_bind/undbing Christian König
@ 2018-01-26 20:13   ` Christian König
       [not found]     ` <20180126201326.8829-13-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-01-29 23:01   ` [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg Felix Kuehling
  12 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-26 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Add an IOCTL to enable SVM for the current process.

One step further towards HMM support.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 94 +++++++++++++++++++++++++++++++--
 include/uapi/drm/amdgpu_drm.h           |  1 +
 3 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b18920007624..2f424f8248a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -897,6 +897,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
 	struct amdgpu_bo *pd;
+	struct pci_dev *pdev;
 	unsigned int pasid;
 	int handle;
 
@@ -923,11 +924,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	}
 
 	pasid = fpriv->vm.pasid;
+	pdev = fpriv->vm.pte_support_ats ? adev->pdev : NULL;
 	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
 
 	amdgpu_vm_fini(adev, &fpriv->vm);
 	if (pasid)
-		amdgpu_pasid_free_delayed(pd->tbo.resv, NULL, pasid);
+		amdgpu_pasid_free_delayed(pd->tbo.resv, pdev, pasid);
 	amdgpu_bo_unref(&pd);
 
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5e53b7a2d4d5..84f41385677c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -257,6 +257,24 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 	return ready;
 }
 
+/**
+ * amdgpu_vm_root_ats_entries - number of ATS entries in the root PD
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns number of entries in the root PD which should be initialized for ATS
+ * use.
+ */
+static unsigned amdgpu_vm_root_ats_entries(struct amdgpu_device *adev)
+{
+	unsigned level = adev->vm_manager.root_level;
+	unsigned shift;
+
+	shift = amdgpu_vm_level_shift(adev, level);
+	shift += AMDGPU_GPU_PAGE_SHIFT;
+	return AMDGPU_VA_HOLE_START >> shift;
+}
+
 /**
  * amdgpu_vm_clear_bo - initially clear the PDs/PTs
  *
@@ -283,9 +301,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 
 	if (pte_support_ats) {
 		if (level == adev->vm_manager.root_level) {
-			ats_entries = amdgpu_vm_level_shift(adev, level);
-			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
-			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+			ats_entries = amdgpu_vm_root_ats_entries(adev);
 			ats_entries = min(ats_entries, entries);
 			entries -= ats_entries;
 		} else {
@@ -329,6 +345,9 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
+	amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+			 AMDGPU_FENCE_OWNER_VM, false);
+
 	WARN_ON(job->ibs[0].length_dw > 64);
 	r = amdgpu_job_submit(job, ring, &vm->entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
@@ -2557,6 +2576,71 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 	return true;
 }
 
+/**
+ * amdgpu_vm_enable_svm - enable SVM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to enable SVM
+ *
+ * Initialize SVM.
+ */
+int amdgpu_vm_enable_svm(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	int r;
+
+	if (!vm->pasid)
+		return -ENODEV;
+
+	r = amdgpu_bo_reserve(vm->root.base.bo, false);
+	if (r)
+		return r;
+
+	if (vm->pte_support_ats) {
+		r = -EALREADY;
+		goto error_unlock;
+	}
+
+	if (vm->root.entries) {
+		unsigned i, entries;
+
+		entries = amdgpu_vm_root_ats_entries(adev);
+		for (i = 0; i < entries; ++i) {
+			if (vm->root.entries[i].base.bo) {
+				r = -EEXIST;
+				goto error_unlock;
+			}
+		}
+
+		entries = amdgpu_bo_size(vm->root.base.bo) / 8;
+		spin_lock(&vm->status_lock);
+		for (; i < entries; ++i) {
+			struct amdgpu_vm_pt *pt = &vm->root.entries[i];
+
+			if (pt->base.bo)
+				list_move(&pt->base.vm_status, &vm->moved);
+		}
+		spin_unlock(&vm->status_lock);
+	}
+
+	r = amdgpu_pasid_bind(adev->pdev, vm->pasid);
+	if (r)
+		goto error_unlock;
+
+	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+			       adev->vm_manager.root_level,
+			       true);
+	if (r) {
+		amdgpu_pasid_unbind(adev->pdev, vm->pasid);
+		goto error_unlock;
+	}
+
+	vm->pte_support_ats = true;
+
+error_unlock:
+	amdgpu_bo_unreserve(vm->root.base.bo);
+	return r;
+}
+
 /**
  * amdgpu_vm_manager_init - init the VM manager
  *
@@ -2616,9 +2700,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 
 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
-	union drm_amdgpu_vm *args = data;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	union drm_amdgpu_vm *args = data;
 	int r;
 
 	switch (args->in.op) {
@@ -2631,6 +2715,8 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	case AMDGPU_VM_OP_UNRESERVE_VMID:
 		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
 		break;
+	case AMDGPU_VM_OP_ENABLE_SVM:
+		return amdgpu_vm_enable_svm(adev, &fpriv->vm);
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index fe17b6785441..c5b13ebe8dfc 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -223,6 +223,7 @@ union drm_amdgpu_ctx {
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
+#define AMDGPU_VM_OP_ENABLE_SVM		3
 
 struct drm_amdgpu_vm_in {
 	/** AMDGPU_VM_OP_* */
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* RE: [PATCH 02/13] drm/amdgpu: drop root shadow sync
       [not found]     ` <20180126201326.8829-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-29  9:14       ` He, Roger
  0 siblings, 0 replies; 29+ messages in thread
From: He, Roger @ 2018-01-29  9:14 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


Reviewed-by: Roger He <Hongbo.He@amd.com>

Thanks
Roger(Hongbo.He)
-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Christian K?nig
Sent: Saturday, January 27, 2018 4:13 AM
To: amd-gfx@lists.freedesktop.org
Subject: [PATCH 02/13] drm/amdgpu: drop root shadow sync

Completely pointless, it is the same reservation object as the root PD anyway.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a3b9c3976eb3..5e53b7a2d4d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -956,11 +956,6 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
 				 AMDGPU_FENCE_OWNER_VM, false);
-		if (root->shadow)
-			amdgpu_sync_resv(adev, &job->sync,
-					 root->shadow->tbo.resv,
-					 AMDGPU_FENCE_OWNER_VM, false);
-
 		WARN_ON(params.ib->length_dw > ndw);
 		r = amdgpu_job_submit(job, ring, &vm->entity,
 				      AMDGPU_FENCE_OWNER_VM, &fence);
--
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 06/13] drm/amdgpu: implement uvd_v7_0_(enc_|)ring_emit_reg_wait
       [not found]     ` <20180126201326.8829-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-29 21:28       ` Felix Kuehling
  0 siblings, 0 replies; 29+ messages in thread
From: Felix Kuehling @ 2018-01-29 21:28 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Suggestion inline ...


On 2018-01-26 03:13 PM, Christian König wrote:
> Add emit_reg_wait implementation for UVD v7.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 40 +++++++++++++++++++++--------------
>  1 file changed, 24 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index d317c764cc91..b8fbc7dc626f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -1241,17 +1241,17 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
>  	amdgpu_ring_write(ring, 8);
>  }
>  
> -static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
> -				uint32_t data0, uint32_t data1, uint32_t mask)
> +static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
> +					uint32_t val, uint32_t mask)
>  {
>  	struct amdgpu_device *adev = ring->adev;
>  
>  	amdgpu_ring_write(ring,
>  		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
> -	amdgpu_ring_write(ring, data0);
> +	amdgpu_ring_write(ring, reg << 2);
>  	amdgpu_ring_write(ring,
>  		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
> -	amdgpu_ring_write(ring, data1);
> +	amdgpu_ring_write(ring, val);
>  	amdgpu_ring_write(ring,
>  		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
>  	amdgpu_ring_write(ring, mask);
> @@ -1271,16 +1271,16 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
>  
>  	/* wait for reg writes */
> -	data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
> +	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
>  	data1 = lower_32_bits(pd_addr);
>  	mask = 0xffffffff;
> -	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> +	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
>  
>  	/* wait for flush */
> -	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
> +	data0 = hub->vm_inv_eng0_ack + eng;
>  	data1 = 1 << vmid;
>  	mask =  1 << vmid;
> -	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> +	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
>  }
>  
>  static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> @@ -1308,16 +1308,12 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
>  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
>  
>  	/* wait for reg writes */
> -	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> -	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
> -	amdgpu_ring_write(ring, 0xffffffff);
> -	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +	amdgpu_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
> +				  lower_32_bits(pd_addr), 0xffffffff);

You could call uvd_v7_0_ring_emit_reg_wait directly here and save
yourself an indirect call. It would also allow the compiler to inline it
if appropriate.

>  
>  	/* wait for flush */
> -	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> -	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> -	amdgpu_ring_write(ring, 1 << vmid);
> -	amdgpu_ring_write(ring, 1 << vmid);
> +	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> +				  1 << vmid, 1 << vmid);

Same as above.

Regards,
  Felix

>  }
>  
>  static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
> @@ -1328,6 +1324,16 @@ static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
>  	amdgpu_ring_write(ring, val);
>  }
>  
> +static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
> +					    uint32_t reg, uint32_t val,
> +					    uint32_t mask)
> +{
> +	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> +	amdgpu_ring_write(ring,	reg << 2);
> +	amdgpu_ring_write(ring, mask);
> +	amdgpu_ring_write(ring, val);
> +}
> +
>  #if 0
>  static bool uvd_v7_0_is_idle(void *handle)
>  {
> @@ -1676,6 +1682,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
>  	.begin_use = amdgpu_uvd_ring_begin_use,
>  	.end_use = amdgpu_uvd_ring_end_use,
>  	.emit_wreg = uvd_v7_0_ring_emit_wreg,
> +	.emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
>  };
>  
>  static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
> @@ -1704,6 +1711,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
>  	.begin_use = amdgpu_uvd_ring_begin_use,
>  	.end_use = amdgpu_uvd_ring_end_use,
>  	.emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
> +	.emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
>  };
>  
>  static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 07/13] drm/amdgpu: implement vce_v4_0_emit_reg_wait
       [not found]     ` <20180126201326.8829-7-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-29 21:30       ` Felix Kuehling
  0 siblings, 0 replies; 29+ messages in thread
From: Felix Kuehling @ 2018-01-29 21:30 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Suggestion inline ...


On 2018-01-26 03:13 PM, Christian König wrote:
> Add emit_reg_wait implementation for VCE v4.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 22 ++++++++++++++--------
>  1 file changed, 14 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index e62a24b90aaf..263dd920be92 100755
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -975,16 +975,12 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
>  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);
>  
>  	/* wait for reg writes */
> -	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> -	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
> -	amdgpu_ring_write(ring, 0xffffffff);
> -	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +	amdgpu_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
> +				  lower_32_bits(pd_addr), 0xffffffff);

Call vce_v4_0_emit_reg_wait directly.

>  
>  	/* wait for flush */
> -	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> -	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> -	amdgpu_ring_write(ring, 1 << vmid);
> -	amdgpu_ring_write(ring, 1 << vmid);
> +	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> +				  1 << vmid, 1 << vmid);

Same as above.

Regards,
  Felix

>  }
>  
>  static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
> @@ -995,6 +991,15 @@ static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
>  	amdgpu_ring_write(ring, val);
>  }
>  
> +static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
> +				   uint32_t val, uint32_t mask)
> +{
> +	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> +	amdgpu_ring_write(ring,	reg << 2);
> +	amdgpu_ring_write(ring, mask);
> +	amdgpu_ring_write(ring, val);
> +}
> +
>  static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
>  					struct amdgpu_irq_src *source,
>  					unsigned type,
> @@ -1079,6 +1084,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
>  	.begin_use = amdgpu_vce_ring_begin_use,
>  	.end_use = amdgpu_vce_ring_end_use,
>  	.emit_wreg = vce_v4_0_emit_wreg,
> +	.emit_reg_wait = vce_v4_0_emit_reg_wait,
>  };
>  
>  static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 10/13] drm/amdgpu: enable VMID PASID mapping in the ATC
       [not found]     ` <20180126201326.8829-10-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-29 22:05       ` Felix Kuehling
       [not found]         ` <7313311d-90dc-e0ff-2333-184caf6eb9d2-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Felix Kuehling @ 2018-01-29 22:05 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Could we cache the previous VMID/PASID mapping somewhere, and only
update+wait if it changes?

If this is not the right place to check it (because multiple rings can
be using the same VMID concurrently), maybe add a flag to
emit_flush_gpu_tlb to update the PASID mapping conditionally, and make
the decision higher up in then VM manager.

Regards,
  Felix


On 2018-01-26 03:13 PM, Christian König wrote:
> Update the PASID in the ATC as well and wait for the update to finish.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/soc15.h    |  4 ++--
>  2 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 2c60981d2eec..0077db0a451f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -33,6 +33,7 @@
>  #include "vega10_enum.h"
>  #include "mmhub/mmhub_1_0_offset.h"
>  #include "athub/athub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
>  #include "oss/osssys_4_0_offset.h"
>  
>  #include "soc15.h"
> @@ -375,7 +376,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>  	uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
>  	uint64_t flags = AMDGPU_PTE_VALID;
>  	unsigned eng = ring->vm_inv_eng;
> -	uint32_t reg;
> +	uint32_t reg, val;
>  
>  	amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
>  	pd_addr |= flags;
> @@ -393,8 +394,26 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>  
>  	amdgpu_ring_emit_wreg(ring, reg, pasid);
>  
> +	if (ring->funcs->vmhub == AMDGPU_GFXHUB)
> +		reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid;
> +	else
> +		reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid;
> +
> +	val = 0;
> +	val = REG_SET_FIELD(val, ATC_VMID0_PASID_MAPPING, PASID, pasid);
> +	val = REG_SET_FIELD(val, ATC_VMID0_PASID_MAPPING, VALID, 1);
> +	amdgpu_ring_emit_wreg(ring, reg, val);
> +
>  	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>  
> +	/* wait for the ATC to complete */
> +	reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID_PASID_MAPPING_UPDATE_STATUS);
> +	if (ring->funcs->vmhub == AMDGPU_GFXHUB)
> +		val = 0x1 << vmid;
> +	else
> +		val = 0x10000 << vmid;
> +	amdgpu_ring_emit_reg_wait(ring, reg, val, val);
> +
>  	/* wait for the invalidate to complete */
>  	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
>  				  1 << vmid, 1 << vmid);
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
> index f70da8a29f86..1b8833503f4c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.h
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
> @@ -27,8 +27,8 @@
>  #include "nbio_v6_1.h"
>  #include "nbio_v7_0.h"
>  
> -#define SOC15_FLUSH_GPU_TLB_NUM_WREG		4
> -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	1
> +#define SOC15_FLUSH_GPU_TLB_NUM_WREG		5
> +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	2
>  
>  extern const struct amd_ip_funcs soc15_common_ip_funcs;
>  

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option
       [not found]     ` <20180126201326.8829-11-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-29 22:08       ` Felix Kuehling
       [not found]         ` <6cf4bd37-fbf1-1fb4-496a-d62e403aea6f-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Felix Kuehling @ 2018-01-29 22:08 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-01-26 03:13 PM, Christian König wrote:
> Move amd_iommu_v2 initialization into amdgpu when it is enabled.
>
> This is WIP and really ugly since amdgpu should not depend directly on
> amd_iommu_v2.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Kconfig         |  8 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 11 ++++++++++-
>  3 files changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index e8af1f5e8a79..7d3fdcbf0acb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -40,5 +40,13 @@ config DRM_AMDGPU_GART_DEBUGFS
>  	  Selecting this option creates a debugfs file to inspect the mapped
>  	  pages. Uses more memory for housekeeping, enable only for debugging.
>  
> +config DRM_AMDGPU_ATC
> +	bool "Enable the ATC to provide SVM support"
> +	depends on DRM_AMDGPU
> +	depends on AMD_IOMMU_V2
> +	default y
> +	help
> +		This enables support for the ATC to provide a shared virtual memory implementation.
> +
>  source "drivers/gpu/drm/amd/acp/Kconfig"
>  source "drivers/gpu/drm/amd/display/Kconfig"
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 84281ee34a25..04205236cc5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -35,6 +35,7 @@
>  #include <linux/vgaarb.h>
>  #include <linux/vga_switcheroo.h>
>  #include <linux/efi.h>
> +#include <linux/amd-iommu.h>
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
>  #include "amdgpu_i2c.h"
> @@ -1355,6 +1356,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>  		adev->ip_blocks[i].status.hw = true;
>  	}
>  
> +#ifdef CONFIG_DRM_AMDGPU_ATC
> +	r = amd_iommu_init_device(adev->pdev, 0x10000);

KFD queries how many PASIDs the IOMMU can support with
amd_iommu_device_info. KFD only assigns PASIDs within that range. It can
be much smaller than the 16-bits supported by the GPU.

For a VM that uses ATC, you need to make sure it gets a PASID in the
range supported by the IOMMU. The PASID manager already supports that
and keeps smaller PASIDs for users that really need them.

Regards,
  Felix

> +	if (r)
> +		DRM_ERROR("amd_iommu_init_device error %d\n", r);
> +#endif
> +
>  	amdgpu_amdkfd_device_init(adev);
>  
>  	if (amdgpu_sriov_vf(adev))
> @@ -1428,6 +1435,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
>  	int i, r;
>  
>  	amdgpu_amdkfd_device_fini(adev);
> +
> +#ifdef CONFIG_DRM_AMDGPU_ATC
> +	amd_iommu_free_device(adev->pdev);
> +#endif
> +
>  	/* need to disable SMC first */
>  	for (i = 0; i < adev->num_ip_blocks; i++) {
>  		if (!adev->ip_blocks[i].status.hw)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index a8fa33a08de3..7ed090f7c5f0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -373,7 +373,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
>  
>  	amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
>  	amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
> +
> +#ifndef CONFIG_DRM_AMDGPU_ATC
>  	amd_iommu_free_device(kfd->pdev);
> +#endif
>  }
>  
>  int kgd2kfd_resume(struct kfd_dev *kfd)
> @@ -388,11 +391,15 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
>  static int kfd_resume(struct kfd_dev *kfd)
>  {
>  	int err = 0;
> +
> +#ifndef CONFIG_DRM_AMDGPU_ATC
>  	unsigned int pasid_limit = kfd_get_pasid_limit();
>  
>  	err = amd_iommu_init_device(kfd->pdev, pasid_limit);
>  	if (err)
>  		return -ENXIO;
> +#endif
> +
>  	amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
>  					iommu_pasid_shutdown_callback);
>  	amd_iommu_set_invalid_ppr_cb(kfd->pdev,
> @@ -414,8 +421,10 @@ static int kfd_resume(struct kfd_dev *kfd)
>  
>  dqm_start_error:
>  processes_bind_error:
> -	amd_iommu_free_device(kfd->pdev);
>  
> +#ifndef CONFIG_DRM_AMDGPU_ATC
> +	amd_iommu_free_device(kfd->pdev);
> +#endif
>  	return err;
>  }
>  

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 13/13] drm/amdgpu: add AMDGPU_VM_OP_ENABLE_SVM IOCTL
       [not found]     ` <20180126201326.8829-13-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-29 22:27       ` Felix Kuehling
       [not found]         ` <22437f1c-cecb-1f25-cf90-9f54663bad12-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Felix Kuehling @ 2018-01-29 22:27 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Enabling SVM after the VM has been created and the PASID allocated is
problematic because the IOMMU can support a smaller range of PASIDs than
the GPU. Ideally SVM would be a flag during VM creation, but I see that
doesn't work as it's done in amdgpu_driver_open_kms, not in an ioctl.

Could the PASID be changed on an existing VM if necessary?

One more comment inline ...

On 2018-01-26 03:13 PM, Christian König wrote:
> Add an IOCTL to enable SVM for the current process.
>
> One step further towards HMM support.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 94 +++++++++++++++++++++++++++++++--
>  include/uapi/drm/amdgpu_drm.h           |  1 +
>  3 files changed, 94 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index b18920007624..2f424f8248a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -897,6 +897,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>  	struct amdgpu_bo_list *list;
>  	struct amdgpu_bo *pd;
> +	struct pci_dev *pdev;
>  	unsigned int pasid;
>  	int handle;
>  
> @@ -923,11 +924,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  	}
>  
>  	pasid = fpriv->vm.pasid;
> +	pdev = fpriv->vm.pte_support_ats ? adev->pdev : NULL;
>  	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
>  
>  	amdgpu_vm_fini(adev, &fpriv->vm);
>  	if (pasid)
> -		amdgpu_pasid_free_delayed(pd->tbo.resv, NULL, pasid);
> +		amdgpu_pasid_free_delayed(pd->tbo.resv, pdev, pasid);
>  	amdgpu_bo_unref(&pd);
>  
>  	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 5e53b7a2d4d5..84f41385677c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -257,6 +257,24 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
>  	return ready;
>  }
>  
> +/**
> + * amdgpu_vm_root_ats_entries - number of ATS entries in the root PD
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Returns number of entries in the root PD which should be initialized for ATS
> + * use.
> + */
> +static unsigned amdgpu_vm_root_ats_entries(struct amdgpu_device *adev)
> +{
> +	unsigned level = adev->vm_manager.root_level;
> +	unsigned shift;
> +
> +	shift = amdgpu_vm_level_shift(adev, level);
> +	shift += AMDGPU_GPU_PAGE_SHIFT;
> +	return AMDGPU_VA_HOLE_START >> shift;
> +}
> +
>  /**
>   * amdgpu_vm_clear_bo - initially clear the PDs/PTs
>   *
> @@ -283,9 +301,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>  
>  	if (pte_support_ats) {
>  		if (level == adev->vm_manager.root_level) {
> -			ats_entries = amdgpu_vm_level_shift(adev, level);
> -			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
> -			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
> +			ats_entries = amdgpu_vm_root_ats_entries(adev);
>  			ats_entries = min(ats_entries, entries);
>  			entries -= ats_entries;
>  		} else {
> @@ -329,6 +345,9 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>  
>  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>  
> +	amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
> +			 AMDGPU_FENCE_OWNER_VM, false);
> +
>  	WARN_ON(job->ibs[0].length_dw > 64);
>  	r = amdgpu_job_submit(job, ring, &vm->entity,
>  			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
> @@ -2557,6 +2576,71 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
>  	return true;
>  }
>  
> +/**
> + * amdgpu_vm_enable_svm - enable SVM
> + *
> + * @adev: amdgpu_device pointer
> + * @vm: VM to enable SVM
> + *
> + * Initialize SVM.
> + */
> +int amdgpu_vm_enable_svm(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +{
> +	int r;
> +
> +	if (!vm->pasid)
> +		return -ENODEV;
> +
> +	r = amdgpu_bo_reserve(vm->root.base.bo, false);
> +	if (r)
> +		return r;
> +
> +	if (vm->pte_support_ats) {
> +		r = -EALREADY;
> +		goto error_unlock;
> +	}
> +
> +	if (vm->root.entries) {
> +		unsigned i, entries;
> +
> +		entries = amdgpu_vm_root_ats_entries(adev);
> +		for (i = 0; i < entries; ++i) {
> +			if (vm->root.entries[i].base.bo) {
> +				r = -EEXIST;
> +				goto error_unlock;
> +			}
> +		}
> +
> +		entries = amdgpu_bo_size(vm->root.base.bo) / 8;
> +		spin_lock(&vm->status_lock);
> +		for (; i < entries; ++i) {
> +			struct amdgpu_vm_pt *pt = &vm->root.entries[i];
> +
> +			if (pt->base.bo)
> +				list_move(&pt->base.vm_status, &vm->moved);

I think this is only necessary because you clear the whole root PD BO
with amdgpu_vm_clear_bo. But could that function be more selective and
update only the clear the ATS entries? Maybe with an extra parameter?

Regards,
  Felix

> +		}
> +		spin_unlock(&vm->status_lock);
> +	}
> +
> +	r = amdgpu_pasid_bind(adev->pdev, vm->pasid);
> +	if (r)
> +		goto error_unlock;
> +
> +	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
> +			       adev->vm_manager.root_level,
> +			       true);
> +	if (r) {
> +		amdgpu_pasid_unbind(adev->pdev, vm->pasid);
> +		goto error_unlock;
> +	}
> +
> +	vm->pte_support_ats = true;
> +
> +error_unlock:
> +	amdgpu_bo_unreserve(vm->root.base.bo);
> +	return r;
> +}
> +
>  /**
>   * amdgpu_vm_manager_init - init the VM manager
>   *
> @@ -2616,9 +2700,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>  
>  int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  {
> -	union drm_amdgpu_vm *args = data;
>  	struct amdgpu_device *adev = dev->dev_private;
>  	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	union drm_amdgpu_vm *args = data;
>  	int r;
>  
>  	switch (args->in.op) {
> @@ -2631,6 +2715,8 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  	case AMDGPU_VM_OP_UNRESERVE_VMID:
>  		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
>  		break;
> +	case AMDGPU_VM_OP_ENABLE_SVM:
> +		return amdgpu_vm_enable_svm(adev, &fpriv->vm);
>  	default:
>  		return -EINVAL;
>  	}
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index fe17b6785441..c5b13ebe8dfc 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -223,6 +223,7 @@ union drm_amdgpu_ctx {
>  /* vm ioctl */
>  #define AMDGPU_VM_OP_RESERVE_VMID	1
>  #define AMDGPU_VM_OP_UNRESERVE_VMID	2
> +#define AMDGPU_VM_OP_ENABLE_SVM		3
>  
>  struct drm_amdgpu_vm_in {
>  	/** AMDGPU_VM_OP_* */

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg
       [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (11 preceding siblings ...)
  2018-01-26 20:13   ` [PATCH 13/13] drm/amdgpu: add AMDGPU_VM_OP_ENABLE_SVM IOCTL Christian König
@ 2018-01-29 23:01   ` Felix Kuehling
       [not found]     ` <ccb040a2-27cd-6ff6-45eb-5c81a108c99d-5C7GfCeVMHo@public.gmane.org>
  12 siblings, 1 reply; 29+ messages in thread
From: Felix Kuehling @ 2018-01-29 23:01 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This is a very cool patch series. I made some specific comments on some
of the patches. But overall this is great.

I guess your plan is to start testing the SVM programming model with
ATC, and then enable the same programming model without the IOMMU using
HMM. That means there could be working and validated tests to run on HMM.

I think 4.16 will get an IOMMUv2 driver that we worked on for Raven to
support PPR on newer IOMMU versions. Without that the GPU is not able to
make swapped or new pages resident or trigger a COW.

We're currently still working on one problem with Raven, related to the
way GFX9 retries memory accesses. Many PPR requests for the same virtual
address can be outstanding (in an IOMMU log buffer). After the first
request is handled, the GPU can continue, but the remaining requests are
still in the queue. This can result in the IOMMU driver trying to handle
a PPR for a page that's already freed by the application, which triggers
an invalid PPR callback.

An invalid PPR is like the GPU-equivalent of a segfault, and KFD
implements it like that. With the above behaviour we end up segfaulting
applications that didn't do anything wrong. I guess for your
implementation it's not a problem because you don't implement that
callback yet.

Regards,
  Felix


On 2018-01-26 03:13 PM, Christian König wrote:
> That got mixed up with the encode ring function.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> index 44c041a1fe68..24ebc3e296a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> @@ -880,6 +880,22 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
>  	vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
>  }
>  
> +static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
> +					uint32_t reg, uint32_t val)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +
> +	amdgpu_ring_write(ring,
> +		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
> +	amdgpu_ring_write(ring, reg << 2);
> +	amdgpu_ring_write(ring,
> +		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
> +	amdgpu_ring_write(ring, val);
> +	amdgpu_ring_write(ring,
> +		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
> +	amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
> +}
> +
>  /**
>   * vcn_v1_0_enc_ring_get_rptr - get enc read pointer
>   *
> @@ -1097,7 +1113,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
>  	.pad_ib = amdgpu_ring_generic_pad_ib,
>  	.begin_use = amdgpu_vcn_ring_begin_use,
>  	.end_use = amdgpu_vcn_ring_end_use,
> -	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
> +	.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
>  };
>  
>  static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
> @@ -1124,6 +1140,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
>  	.pad_ib = amdgpu_ring_generic_pad_ib,
>  	.begin_use = amdgpu_vcn_ring_begin_use,
>  	.end_use = amdgpu_vcn_ring_end_use,
> +	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
>  };
>  
>  static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg
       [not found]     ` <ccb040a2-27cd-6ff6-45eb-5c81a108c99d-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-30 13:51       ` Christian König
       [not found]         ` <4701d758-29fe-5403-f6a6-7618bbea03d7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-30 13:51 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 30.01.2018 um 00:01 schrieb Felix Kuehling:
> This is a very cool patch series. I made some specific comments on some
> of the patches. But overall this is great.

Thanks, going to comment on some of those on the patches.

> I guess your plan is to start testing the SVM programming model with
> ATC, and then enable the same programming model without the IOMMU using
> HMM. That means there could be working and validated tests to run on HMM.

Yes, exactly. Well I think it would make implementing HMM much easier if 
we could rely on most of the mapping being done by the ATC instead of 
manually crafted GPUVM page tables.

> I think 4.16 will get an IOMMUv2 driver that we worked on for Raven to
> support PPR on newer IOMMU versions. Without that the GPU is not able to
> make swapped or new pages resident or trigger a COW.

Uff? So the ATC is actually not able to handle page faults?

> We're currently still working on one problem with Raven, related to the
> way GFX9 retries memory accesses. Many PPR requests for the same virtual
> address can be outstanding (in an IOMMU log buffer). After the first
> request is handled, the GPU can continue, but the remaining requests are
> still in the queue. This can result in the IOMMU driver trying to handle
> a PPR for a page that's already freed by the application, which triggers
> an invalid PPR callback.
>
> An invalid PPR is like the GPU-equivalent of a segfault, and KFD
> implements it like that. With the above behaviour we end up segfaulting
> applications that didn't do anything wrong. I guess for your
> implementation it's not a problem because you don't implement that
> callback yet.

Yeah, that is exactly the same problem I'm currently running into with HMM.

The interrupt handling is pipelined (even much much more than the ATC 
path), so what can happen is that applications free up some memory but 
we have stale page faults for that page in the pipeline.

The only valid workaround I can see is to make sure interrupts are 
processed before returning to HMM that it can unmap pages, and that is a 
really show stopper for performance as far as I can see.

Regards,
Christian.

>
> Regards,
>    Felix
>
>
> On 2018-01-26 03:13 PM, Christian König wrote:
>> That got mixed up with the encode ring function.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 19 ++++++++++++++++++-
>>   1 file changed, 18 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>> index 44c041a1fe68..24ebc3e296a6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>> @@ -880,6 +880,22 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>   	vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
>>   }
>>   
>> +static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
>> +					uint32_t reg, uint32_t val)
>> +{
>> +	struct amdgpu_device *adev = ring->adev;
>> +
>> +	amdgpu_ring_write(ring,
>> +		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
>> +	amdgpu_ring_write(ring, reg << 2);
>> +	amdgpu_ring_write(ring,
>> +		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
>> +	amdgpu_ring_write(ring, val);
>> +	amdgpu_ring_write(ring,
>> +		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
>> +	amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
>> +}
>> +
>>   /**
>>    * vcn_v1_0_enc_ring_get_rptr - get enc read pointer
>>    *
>> @@ -1097,7 +1113,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
>>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>>   	.begin_use = amdgpu_vcn_ring_begin_use,
>>   	.end_use = amdgpu_vcn_ring_end_use,
>> -	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
>> +	.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
>>   };
>>   
>>   static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
>> @@ -1124,6 +1140,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
>>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>>   	.begin_use = amdgpu_vcn_ring_begin_use,
>>   	.end_use = amdgpu_vcn_ring_end_use,
>> +	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
>>   };
>>   
>>   static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 10/13] drm/amdgpu: enable VMID PASID mapping in the ATC
       [not found]         ` <7313311d-90dc-e0ff-2333-184caf6eb9d2-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-30 13:53           ` Christian König
  0 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-30 13:53 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.01.2018 um 23:05 schrieb Felix Kuehling:
> Could we cache the previous VMID/PASID mapping somewhere, and only
> update+wait if it changes?

Good point. And yes that needs to be fixed, but this patch set was more 
of a prove of concept anyway.

Regards,
Christian.

>
> If this is not the right place to check it (because multiple rings can
> be using the same VMID concurrently), maybe add a flag to
> emit_flush_gpu_tlb to update the PASID mapping conditionally, and make
> the decision higher up in then VM manager.
>
> Regards,
>    Felix
>
>
> On 2018-01-26 03:13 PM, Christian König wrote:
>> Update the PASID in the ATC as well and wait for the update to finish.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/soc15.h    |  4 ++--
>>   2 files changed, 22 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 2c60981d2eec..0077db0a451f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -33,6 +33,7 @@
>>   #include "vega10_enum.h"
>>   #include "mmhub/mmhub_1_0_offset.h"
>>   #include "athub/athub_1_0_offset.h"
>> +#include "athub/athub_1_0_sh_mask.h"
>>   #include "oss/osssys_4_0_offset.h"
>>   
>>   #include "soc15.h"
>> @@ -375,7 +376,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>   	uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
>>   	uint64_t flags = AMDGPU_PTE_VALID;
>>   	unsigned eng = ring->vm_inv_eng;
>> -	uint32_t reg;
>> +	uint32_t reg, val;
>>   
>>   	amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
>>   	pd_addr |= flags;
>> @@ -393,8 +394,26 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>   
>>   	amdgpu_ring_emit_wreg(ring, reg, pasid);
>>   
>> +	if (ring->funcs->vmhub == AMDGPU_GFXHUB)
>> +		reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid;
>> +	else
>> +		reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid;
>> +
>> +	val = 0;
>> +	val = REG_SET_FIELD(val, ATC_VMID0_PASID_MAPPING, PASID, pasid);
>> +	val = REG_SET_FIELD(val, ATC_VMID0_PASID_MAPPING, VALID, 1);
>> +	amdgpu_ring_emit_wreg(ring, reg, val);
>> +
>>   	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>>   
>> +	/* wait for the ATC to complete */
>> +	reg = SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID_PASID_MAPPING_UPDATE_STATUS);
>> +	if (ring->funcs->vmhub == AMDGPU_GFXHUB)
>> +		val = 0x1 << vmid;
>> +	else
>> +		val = 0x10000 << vmid;
>> +	amdgpu_ring_emit_reg_wait(ring, reg, val, val);
>> +
>>   	/* wait for the invalidate to complete */
>>   	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
>>   				  1 << vmid, 1 << vmid);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
>> index f70da8a29f86..1b8833503f4c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
>> @@ -27,8 +27,8 @@
>>   #include "nbio_v6_1.h"
>>   #include "nbio_v7_0.h"
>>   
>> -#define SOC15_FLUSH_GPU_TLB_NUM_WREG		4
>> -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	1
>> +#define SOC15_FLUSH_GPU_TLB_NUM_WREG		5
>> +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	2
>>   
>>   extern const struct amd_ip_funcs soc15_common_ip_funcs;
>>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option
       [not found]         ` <6cf4bd37-fbf1-1fb4-496a-d62e403aea6f-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-30 13:59           ` Christian König
       [not found]             ` <7ebabb2b-1b18-8c7b-626f-d6874bdaac80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-30 13:59 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.01.2018 um 23:08 schrieb Felix Kuehling:
> On 2018-01-26 03:13 PM, Christian König wrote:
>> [SNIP]
>> +#ifdef CONFIG_DRM_AMDGPU_ATC
>> +	r = amd_iommu_init_device(adev->pdev, 0x10000);
> KFD queries how many PASIDs the IOMMU can support with
> amd_iommu_device_info. KFD only assigns PASIDs within that range. It can
> be much smaller than the 16-bits supported by the GPU.
>
> For a VM that uses ATC, you need to make sure it gets a PASID in the
> range supported by the IOMMU. The PASID manager already supports that
> and keeps smaller PASIDs for users that really need them.

Yeah, seen that and I'm not really keen about it.

Especially since we need multiple types of PASIDs here:
1. For GPUVM debugging and HMM faults, where we can use the full 16bit 
range without worrying about what IOMMU can do.
2. For ATC use case where we need to keep the IOMMU in the picture.

Are there any hardware limitations which blocks us from using a per 
device PASID? That would simplify the whole handling quite a bit.

Additional to that we don't really want this direct relationship between 
amdgpu/amdkfd and the amd_iommu_v2 driver.

So what do you think about moving the PASID handling into the IOMMU 
driver? And abstracting which driver is in use through the iommu_ops?

Regards,
Christian.

>
> Regards,
>    Felix
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 13/13] drm/amdgpu: add AMDGPU_VM_OP_ENABLE_SVM IOCTL
       [not found]         ` <22437f1c-cecb-1f25-cf90-9f54663bad12-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-30 14:02           ` Christian König
  0 siblings, 0 replies; 29+ messages in thread
From: Christian König @ 2018-01-30 14:02 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.01.2018 um 23:27 schrieb Felix Kuehling:
> Enabling SVM after the VM has been created and the PASID allocated is
> problematic because the IOMMU can support a smaller range of PASIDs than
> the GPU. Ideally SVM would be a flag during VM creation, but I see that
> doesn't work as it's done in amdgpu_driver_open_kms, not in an ioctl.
>
> Could the PASID be changed on an existing VM if necessary?

Yeah, that shouldn't be much of a problem.

Another issue is that the VM can potentially be created by the X server, 
but then used by the client with DRI3.

So we would always need a separate IOCTL to note to which process a VM 
should bind.

Regards,
Christian.

>
> One more comment inline ...
>
> On 2018-01-26 03:13 PM, Christian König wrote:
>> Add an IOCTL to enable SVM for the current process.
>>
>> One step further towards HMM support.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 94 +++++++++++++++++++++++++++++++--
>>   include/uapi/drm/amdgpu_drm.h           |  1 +
>>   3 files changed, 94 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index b18920007624..2f424f8248a9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -897,6 +897,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>>   	struct amdgpu_bo_list *list;
>>   	struct amdgpu_bo *pd;
>> +	struct pci_dev *pdev;
>>   	unsigned int pasid;
>>   	int handle;
>>   
>> @@ -923,11 +924,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	}
>>   
>>   	pasid = fpriv->vm.pasid;
>> +	pdev = fpriv->vm.pte_support_ats ? adev->pdev : NULL;
>>   	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
>>   
>>   	amdgpu_vm_fini(adev, &fpriv->vm);
>>   	if (pasid)
>> -		amdgpu_pasid_free_delayed(pd->tbo.resv, NULL, pasid);
>> +		amdgpu_pasid_free_delayed(pd->tbo.resv, pdev, pasid);
>>   	amdgpu_bo_unref(&pd);
>>   
>>   	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 5e53b7a2d4d5..84f41385677c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -257,6 +257,24 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
>>   	return ready;
>>   }
>>   
>> +/**
>> + * amdgpu_vm_root_ats_entries - number of ATS entries in the root PD
>> + *
>> + * @adev: amdgpu_device pointer
>> + *
>> + * Returns number of entries in the root PD which should be initialized for ATS
>> + * use.
>> + */
>> +static unsigned amdgpu_vm_root_ats_entries(struct amdgpu_device *adev)
>> +{
>> +	unsigned level = adev->vm_manager.root_level;
>> +	unsigned shift;
>> +
>> +	shift = amdgpu_vm_level_shift(adev, level);
>> +	shift += AMDGPU_GPU_PAGE_SHIFT;
>> +	return AMDGPU_VA_HOLE_START >> shift;
>> +}
>> +
>>   /**
>>    * amdgpu_vm_clear_bo - initially clear the PDs/PTs
>>    *
>> @@ -283,9 +301,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>>   
>>   	if (pte_support_ats) {
>>   		if (level == adev->vm_manager.root_level) {
>> -			ats_entries = amdgpu_vm_level_shift(adev, level);
>> -			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
>> -			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
>> +			ats_entries = amdgpu_vm_root_ats_entries(adev);
>>   			ats_entries = min(ats_entries, entries);
>>   			entries -= ats_entries;
>>   		} else {
>> @@ -329,6 +345,9 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>>   
>>   	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>>   
>> +	amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
>> +			 AMDGPU_FENCE_OWNER_VM, false);
>> +
>>   	WARN_ON(job->ibs[0].length_dw > 64);
>>   	r = amdgpu_job_submit(job, ring, &vm->entity,
>>   			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>> @@ -2557,6 +2576,71 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
>>   	return true;
>>   }
>>   
>> +/**
>> + * amdgpu_vm_enable_svm - enable SVM
>> + *
>> + * @adev: amdgpu_device pointer
>> + * @vm: VM to enable SVM
>> + *
>> + * Initialize SVM.
>> + */
>> +int amdgpu_vm_enable_svm(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>> +{
>> +	int r;
>> +
>> +	if (!vm->pasid)
>> +		return -ENODEV;
>> +
>> +	r = amdgpu_bo_reserve(vm->root.base.bo, false);
>> +	if (r)
>> +		return r;
>> +
>> +	if (vm->pte_support_ats) {
>> +		r = -EALREADY;
>> +		goto error_unlock;
>> +	}
>> +
>> +	if (vm->root.entries) {
>> +		unsigned i, entries;
>> +
>> +		entries = amdgpu_vm_root_ats_entries(adev);
>> +		for (i = 0; i < entries; ++i) {
>> +			if (vm->root.entries[i].base.bo) {
>> +				r = -EEXIST;
>> +				goto error_unlock;
>> +			}
>> +		}
>> +
>> +		entries = amdgpu_bo_size(vm->root.base.bo) / 8;
>> +		spin_lock(&vm->status_lock);
>> +		for (; i < entries; ++i) {
>> +			struct amdgpu_vm_pt *pt = &vm->root.entries[i];
>> +
>> +			if (pt->base.bo)
>> +				list_move(&pt->base.vm_status, &vm->moved);
> I think this is only necessary because you clear the whole root PD BO
> with amdgpu_vm_clear_bo. But could that function be more selective and
> update only the clear the ATS entries? Maybe with an extra parameter?
>
> Regards,
>    Felix
>
>> +		}
>> +		spin_unlock(&vm->status_lock);
>> +	}
>> +
>> +	r = amdgpu_pasid_bind(adev->pdev, vm->pasid);
>> +	if (r)
>> +		goto error_unlock;
>> +
>> +	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
>> +			       adev->vm_manager.root_level,
>> +			       true);
>> +	if (r) {
>> +		amdgpu_pasid_unbind(adev->pdev, vm->pasid);
>> +		goto error_unlock;
>> +	}
>> +
>> +	vm->pte_support_ats = true;
>> +
>> +error_unlock:
>> +	amdgpu_bo_unreserve(vm->root.base.bo);
>> +	return r;
>> +}
>> +
>>   /**
>>    * amdgpu_vm_manager_init - init the VM manager
>>    *
>> @@ -2616,9 +2700,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>   
>>   int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>>   {
>> -	union drm_amdgpu_vm *args = data;
>>   	struct amdgpu_device *adev = dev->dev_private;
>>   	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +	union drm_amdgpu_vm *args = data;
>>   	int r;
>>   
>>   	switch (args->in.op) {
>> @@ -2631,6 +2715,8 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>>   	case AMDGPU_VM_OP_UNRESERVE_VMID:
>>   		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
>>   		break;
>> +	case AMDGPU_VM_OP_ENABLE_SVM:
>> +		return amdgpu_vm_enable_svm(adev, &fpriv->vm);
>>   	default:
>>   		return -EINVAL;
>>   	}
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index fe17b6785441..c5b13ebe8dfc 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -223,6 +223,7 @@ union drm_amdgpu_ctx {
>>   /* vm ioctl */
>>   #define AMDGPU_VM_OP_RESERVE_VMID	1
>>   #define AMDGPU_VM_OP_UNRESERVE_VMID	2
>> +#define AMDGPU_VM_OP_ENABLE_SVM		3
>>   
>>   struct drm_amdgpu_vm_in {
>>   	/** AMDGPU_VM_OP_* */

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg
       [not found]         ` <4701d758-29fe-5403-f6a6-7618bbea03d7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-01-30 16:44           ` Felix Kuehling
  0 siblings, 0 replies; 29+ messages in thread
From: Felix Kuehling @ 2018-01-30 16:44 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-01-30 08:51 AM, Christian König wrote:
> Am 30.01.2018 um 00:01 schrieb Felix Kuehling:
>> This is a very cool patch series. I made some specific comments on some
>> of the patches. But overall this is great.
>
> Thanks, going to comment on some of those on the patches.
>
>> I guess your plan is to start testing the SVM programming model with
>> ATC, and then enable the same programming model without the IOMMU using
>> HMM. That means there could be working and validated tests to run on
>> HMM.
>
> Yes, exactly. Well I think it would make implementing HMM much easier
> if we could rely on most of the mapping being done by the ATC instead
> of manually crafted GPUVM page tables.
>> I think 4.16 will get an IOMMUv2 driver that we worked on for Raven to
>> support PPR on newer IOMMU versions. Without that the GPU is not able to
>> make swapped or new pages resident or trigger a COW.
>
> Uff? So the ATC is actually not able to handle page faults?

It's supposed to. It worked on Carrizo. As I understand it, the new
IOMMUv2 on Raven needs the driver to enable that feature per-device.
That fix is coming in 4.16.

>
>> We're currently still working on one problem with Raven, related to the
>> way GFX9 retries memory accesses. Many PPR requests for the same virtual
>> address can be outstanding (in an IOMMU log buffer). After the first
>> request is handled, the GPU can continue, but the remaining requests are
>> still in the queue. This can result in the IOMMU driver trying to handle
>> a PPR for a page that's already freed by the application, which triggers
>> an invalid PPR callback.
>>
>> An invalid PPR is like the GPU-equivalent of a segfault, and KFD
>> implements it like that. With the above behaviour we end up segfaulting
>> applications that didn't do anything wrong. I guess for your
>> implementation it's not a problem because you don't implement that
>> callback yet.
>
> Yeah, that is exactly the same problem I'm currently running into with
> HMM.
>
> The interrupt handling is pipelined (even much much more than the ATC
> path), so what can happen is that applications free up some memory but
> we have stale page faults for that page in the pipeline.
>
> The only valid workaround I can see is to make sure interrupts are
> processed before returning to HMM that it can unmap pages, and that is
> a really show stopper for performance as far as I can see.

For ATC my idea is to use an invalidate_range_start MMU notifier to wait
for pending PPRs to get processed before letting the kernel unmap pages.
The underlying assumption is this: when an application frees memory, it
must be sure it's not using that memory any more. So we don't expect any
new faults for the address. We only need to wait for pending faults to
flush out of the pipe.

For HMM I think the prescreen interrupt handler stage I added should
help, so you only see one interrupt per faulting address. But you need
to figure out the right moment to clear the fault after updating the
page table.

Regards,
  Felix

>
> Regards,
> Christian.
>
>>
>> Regards,
>>    Felix
>>
>>
>> On 2018-01-26 03:13 PM, Christian König wrote:
>>> That got mixed up with the encode ring function.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 19 ++++++++++++++++++-
>>>   1 file changed, 18 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> index 44c041a1fe68..24ebc3e296a6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
>>> @@ -880,6 +880,22 @@ static void
>>> vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>>       vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
>>>   }
>>>   +static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
>>> +                    uint32_t reg, uint32_t val)
>>> +{
>>> +    struct amdgpu_device *adev = ring->adev;
>>> +
>>> +    amdgpu_ring_write(ring,
>>> +        PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
>>> +    amdgpu_ring_write(ring, reg << 2);
>>> +    amdgpu_ring_write(ring,
>>> +        PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
>>> +    amdgpu_ring_write(ring, val);
>>> +    amdgpu_ring_write(ring,
>>> +        PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
>>> +    amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
>>> +}
>>> +
>>>   /**
>>>    * vcn_v1_0_enc_ring_get_rptr - get enc read pointer
>>>    *
>>> @@ -1097,7 +1113,7 @@ static const struct amdgpu_ring_funcs
>>> vcn_v1_0_dec_ring_vm_funcs = {
>>>       .pad_ib = amdgpu_ring_generic_pad_ib,
>>>       .begin_use = amdgpu_vcn_ring_begin_use,
>>>       .end_use = amdgpu_vcn_ring_end_use,
>>> -    .emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
>>> +    .emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
>>>   };
>>>     static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs
>>> = {
>>> @@ -1124,6 +1140,7 @@ static const struct amdgpu_ring_funcs
>>> vcn_v1_0_enc_ring_vm_funcs = {
>>>       .pad_ib = amdgpu_ring_generic_pad_ib,
>>>       .begin_use = amdgpu_vcn_ring_begin_use,
>>>       .end_use = amdgpu_vcn_ring_end_use,
>>> +    .emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
>>>   };
>>>     static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option
       [not found]             ` <7ebabb2b-1b18-8c7b-626f-d6874bdaac80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-01-30 16:53               ` Felix Kuehling
       [not found]                 ` <c39b444b-49a1-771a-65ba-ae37ba643673-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Felix Kuehling @ 2018-01-30 16:53 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


On 2018-01-30 08:59 AM, Christian König wrote:
> Am 29.01.2018 um 23:08 schrieb Felix Kuehling:
>> On 2018-01-26 03:13 PM, Christian König wrote:
>>> [SNIP]
>>> +#ifdef CONFIG_DRM_AMDGPU_ATC
>>> +    r = amd_iommu_init_device(adev->pdev, 0x10000);
>> KFD queries how many PASIDs the IOMMU can support with
>> amd_iommu_device_info. KFD only assigns PASIDs within that range. It can
>> be much smaller than the 16-bits supported by the GPU.
>>
>> For a VM that uses ATC, you need to make sure it gets a PASID in the
>> range supported by the IOMMU. The PASID manager already supports that
>> and keeps smaller PASIDs for users that really need them.
>
> Yeah, seen that and I'm not really keen about it.
>
> Especially since we need multiple types of PASIDs here:
> 1. For GPUVM debugging and HMM faults, where we can use the full 16bit
> range without worrying about what IOMMU can do.
> 2. For ATC use case where we need to keep the IOMMU in the picture.
>
> Are there any hardware limitations which blocks us from using a per
> device PASID? That would simplify the whole handling quite a bit.

Conceptually PASIDs are device-specific process IDs. KFD uses the same
PASID on all devices, but that's not necessary.

Currently you allocate PASIDs per VM or per open device file. So you'll
end up with different PASIDs on each device. I think you can even end up
with multiple PASIDs in each process even on the same device, if you
create multiple VMs. That doesn't seem to be a problem for the IOMMU driver.

>
> Additional to that we don't really want this direct relationship
> between amdgpu/amdkfd and the amd_iommu_v2 driver.

Not sure what you mean. Right now amdgpu and amdkfd don't coordinate
their PASIDs (other than using a common allocator to avoid using the
same PASID for different things).

>
> So what do you think about moving the PASID handling into the IOMMU
> driver? And abstracting which driver is in use through the iommu_ops?

What if both drivers want to use the IOMMU?

Regards,
  Felix

>
> Regards,
> Christian.
>
>>
>> Regards,
>>    Felix

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option
       [not found]                 ` <c39b444b-49a1-771a-65ba-ae37ba643673-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-31 12:43                   ` Oded Gabbay
       [not found]                     ` <CAFCwf12ho2gXrBUHtaKRV-mX2kRk45xeu2o=F0-=Q+GFkAs=RA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Oded Gabbay @ 2018-01-31 12:43 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Christian König, amd-gfx list

On Tue, Jan 30, 2018 at 6:53 PM, Felix Kuehling <felix.kuehling@amd.com> wrote:
>
> On 2018-01-30 08:59 AM, Christian König wrote:
>> Am 29.01.2018 um 23:08 schrieb Felix Kuehling:
>>> On 2018-01-26 03:13 PM, Christian König wrote:
>>>> [SNIP]
>>>> +#ifdef CONFIG_DRM_AMDGPU_ATC
>>>> +    r = amd_iommu_init_device(adev->pdev, 0x10000);
>>> KFD queries how many PASIDs the IOMMU can support with
>>> amd_iommu_device_info. KFD only assigns PASIDs within that range. It can
>>> be much smaller than the 16-bits supported by the GPU.
>>>
>>> For a VM that uses ATC, you need to make sure it gets a PASID in the
>>> range supported by the IOMMU. The PASID manager already supports that
>>> and keeps smaller PASIDs for users that really need them.
>>
>> Yeah, seen that and I'm not really keen about it.
>>
>> Especially since we need multiple types of PASIDs here:
>> 1. For GPUVM debugging and HMM faults, where we can use the full 16bit
>> range without worrying about what IOMMU can do.
>> 2. For ATC use case where we need to keep the IOMMU in the picture.
>>
>> Are there any hardware limitations which blocks us from using a per
>> device PASID? That would simplify the whole handling quite a bit.
>
> Conceptually PASIDs are device-specific process IDs. KFD uses the same
> PASID on all devices, but that's not necessary.
>
> Currently you allocate PASIDs per VM or per open device file. So you'll
> end up with different PASIDs on each device. I think you can even end up
> with multiple PASIDs in each process even on the same device, if you
> create multiple VMs. That doesn't seem to be a problem for the IOMMU driver.
>
>>
>> Additional to that we don't really want this direct relationship
>> between amdgpu/amdkfd and the amd_iommu_v2 driver.
>
> Not sure what you mean. Right now amdgpu and amdkfd don't coordinate
> their PASIDs (other than using a common allocator to avoid using the
> same PASID for different things).
>
>>
>> So what do you think about moving the PASID handling into the IOMMU
>> driver? And abstracting which driver is in use through the iommu_ops?
>
> What if both drivers want to use the IOMMU?
>

There was some discussion last year about a generic PASID allocator in
the iommu subsystem:
https://lists.linuxfoundation.org/pipermail/iommu/2017-June/022159.html

Unfortunately, I don't think it got any further then that.

Oded

> Regards,
>   Felix
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>>    Felix
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option
       [not found]                     ` <CAFCwf12ho2gXrBUHtaKRV-mX2kRk45xeu2o=F0-=Q+GFkAs=RA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2018-01-31 13:04                       ` Christian König
       [not found]                         ` <6b19483a-79f3-e5e0-307f-ead73b4e2b7a-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 29+ messages in thread
From: Christian König @ 2018-01-31 13:04 UTC (permalink / raw)
  To: Oded Gabbay, Felix Kuehling, jean-philippe.brucker-5wv7dgnIgG8
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, amd-gfx list

Adding Jean and the IOMMU list as well.

Am 31.01.2018 um 13:43 schrieb Oded Gabbay:
> On Tue, Jan 30, 2018 at 6:53 PM, Felix Kuehling <felix.kuehling-5C7GfCeVMHo@public.gmane.org> wrote:
>> [SNIP]
> There was some discussion last year about a generic PASID allocator in
> the iommu subsystem:
> https://lists.linuxfoundation.org/pipermail/iommu/2017-June/022159.html
>
> Unfortunately, I don't think it got any further then that.

Yeah, that sounds exactly like what I had in mind as well.

Jean any updates on that topic? We are rather interested in common PASID 
handling as well.

Anybody already working on this? Sounds mostly like moving code around 
and creating a common interface.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option
       [not found]                         ` <6b19483a-79f3-e5e0-307f-ead73b4e2b7a-5C7GfCeVMHo@public.gmane.org>
@ 2018-01-31 13:15                           ` Jean-Philippe Brucker
  0 siblings, 0 replies; 29+ messages in thread
From: Jean-Philippe Brucker @ 2018-01-31 13:15 UTC (permalink / raw)
  To: Christian König, Oded Gabbay, Felix Kuehling
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, amd-gfx list

Hi Christian,

On 31/01/18 13:04, Christian König wrote:
> Adding Jean and the IOMMU list as well.
> 
> Am 31.01.2018 um 13:43 schrieb Oded Gabbay:
>> On Tue, Jan 30, 2018 at 6:53 PM, Felix Kuehling <felix.kuehling@amd.com> wrote:
>>> [SNIP]
>> There was some discussion last year about a generic PASID allocator in
>> the iommu subsystem:
>> https://lists.linuxfoundation.org/pipermail/iommu/2017-June/022159.html
>>
>> Unfortunately, I don't think it got any further then that.

I also sent an RFCv2 in October:
https://www.spinics.net/lists/arm-kernel/msg609771.html

> Yeah, that sounds exactly like what I had in mind as well.
> 
> Jean any updates on that topic? We are rather interested in common PASID 
> handling as well.

Good to hear, I plan to send a new version of that series after the merge
window.

Thanks,
Jean
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2018-01-31 13:15 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-26 20:13 [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg Christian König
     [not found] ` <20180126201326.8829-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-01-26 20:13   ` [PATCH 02/13] drm/amdgpu: drop root shadow sync Christian König
     [not found]     ` <20180126201326.8829-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-01-29  9:14       ` He, Roger
2018-01-26 20:13   ` [PATCH 03/13] drm/amdgpu: add new emit_reg_wait callback Christian König
2018-01-26 20:13   ` [PATCH 04/13] drm/amdgpu: add gfx_v9_0_ring_emit_reg_wait implementation Christian König
2018-01-26 20:13   ` [PATCH 05/13] drm/amdgpu: implement sdma_v4_0_ring_emit_reg_wait Christian König
2018-01-26 20:13   ` [PATCH 06/13] drm/amdgpu: implement uvd_v7_0_(enc_|)ring_emit_reg_wait Christian König
     [not found]     ` <20180126201326.8829-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-01-29 21:28       ` Felix Kuehling
2018-01-26 20:13   ` [PATCH 07/13] drm/amdgpu: implement vce_v4_0_emit_reg_wait Christian König
     [not found]     ` <20180126201326.8829-7-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-01-29 21:30       ` Felix Kuehling
2018-01-26 20:13   ` [PATCH 08/13] drm/amdgpu: implement vcn_v1_0_(dec|enc)_ring_emit_reg_wait Christian König
2018-01-26 20:13   ` [PATCH 09/13] drm/amdgpu: move waiting for VM flush into gmc_v9_0_emit_flush_gpu_tlb Christian König
2018-01-26 20:13   ` [PATCH 10/13] drm/amdgpu: enable VMID PASID mapping in the ATC Christian König
     [not found]     ` <20180126201326.8829-10-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-01-29 22:05       ` Felix Kuehling
     [not found]         ` <7313311d-90dc-e0ff-2333-184caf6eb9d2-5C7GfCeVMHo@public.gmane.org>
2018-01-30 13:53           ` Christian König
2018-01-26 20:13   ` [PATCH 11/13] drm/amdgpu: add DRM_AMDGPU_ATC config option Christian König
     [not found]     ` <20180126201326.8829-11-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-01-29 22:08       ` Felix Kuehling
     [not found]         ` <6cf4bd37-fbf1-1fb4-496a-d62e403aea6f-5C7GfCeVMHo@public.gmane.org>
2018-01-30 13:59           ` Christian König
     [not found]             ` <7ebabb2b-1b18-8c7b-626f-d6874bdaac80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-01-30 16:53               ` Felix Kuehling
     [not found]                 ` <c39b444b-49a1-771a-65ba-ae37ba643673-5C7GfCeVMHo@public.gmane.org>
2018-01-31 12:43                   ` Oded Gabbay
     [not found]                     ` <CAFCwf12ho2gXrBUHtaKRV-mX2kRk45xeu2o=F0-=Q+GFkAs=RA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-01-31 13:04                       ` Christian König
     [not found]                         ` <6b19483a-79f3-e5e0-307f-ead73b4e2b7a-5C7GfCeVMHo@public.gmane.org>
2018-01-31 13:15                           ` Jean-Philippe Brucker
2018-01-26 20:13   ` [PATCH 12/13] drm/amdgpu: add amdgpu_pasid_bind/undbing Christian König
2018-01-26 20:13   ` [PATCH 13/13] drm/amdgpu: add AMDGPU_VM_OP_ENABLE_SVM IOCTL Christian König
     [not found]     ` <20180126201326.8829-13-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-01-29 22:27       ` Felix Kuehling
     [not found]         ` <22437f1c-cecb-1f25-cf90-9f54663bad12-5C7GfCeVMHo@public.gmane.org>
2018-01-30 14:02           ` Christian König
2018-01-29 23:01   ` [PATCH 01/13] drm/amdgpu: fix vcn_v1_0_dec_ring_emit_wreg Felix Kuehling
     [not found]     ` <ccb040a2-27cd-6ff6-45eb-5c81a108c99d-5C7GfCeVMHo@public.gmane.org>
2018-01-30 13:51       ` Christian König
     [not found]         ` <4701d758-29fe-5403-f6a6-7618bbea03d7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-01-30 16:44           ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.