All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug
@ 2017-05-03  3:48 Monk Liu
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Monk Liu @ 2017-05-03  3:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

Change-Id: Ie8672e0c9358d9542810ce05c822d9367249bbd7
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 1493301..712f36e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -296,11 +296,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
 {
 	int r;
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
 	if (r)
 		return r;
 
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
 	if (r) {
 		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
 		return r;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-03  3:48   ` Monk Liu
       [not found]     ` <1493783292-2661-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  3:48   ` [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late Monk Liu
                     ` (6 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Monk Liu @ 2017-05-03  3:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

if sriov gpu reset is invoked by job timeout, it is run
in a global work-queue which is very slow and better not call
msleep ortherwise it takes long time to get back CPU.

so make below changes:

1: Change msleep 1 to mdelay 5
2: Ignore the ack fail from pf after time out,
   because VF FLR will clear ack, sometime VF FLR is done
   prior to the beginning of poll_ack so we can ignore this ack

TODO:
Put job_timedout (and the following gpu reset) in a driver thread,
instead of the global work_struct.

Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 712f36e..e967a7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 						     mmBIF_BX_PF0_MAILBOX_CONTROL));
@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 	r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	while (r) {
 		if (timeout <= 0) {
-			pr_err("Doesn't get ack from pf.\n");
+			pr_err("Doesn't get msg:%d from pf.\n", event);
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	}
@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 	/* start to poll ack */
 	r = xgpu_ai_poll_ack(adev);
 	if (r)
-		return r;
+		pr_err("Doesn't get ack from pf, continue\n");
 
 	xgpu_ai_mailbox_set_valid(adev, false);
 
@@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 		req == IDH_REQ_GPU_FINI_ACCESS ||
 		req == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
-		if (r)
+		if (r) {
+			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
 			return r;
+		}
 	}
 
 	return 0;
@@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					struct amdgpu_iv_entry *entry)
 {
-	DRM_DEBUG("get ack intr and do nothing.\n");
+	printk("get ack intr and do nothing.\n");
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 7bdc51b..f0d64f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
 	}
@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		r = xgpu_vi_mailbox_rcv_msg(adev, event);
 	}
@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
 		request == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
 		if (r)
-			return r;
+			pr_err("Doesn't get ack from pf, continue\n");
 	}
 
 	return 0;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  3:48   ` [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox Monk Liu
@ 2017-05-03  3:48   ` Monk Liu
       [not found]     ` <1493783292-2661-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  3:48   ` [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size Monk Liu
                     ` (5 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Monk Liu @ 2017-05-03  3:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

1,this way we make those routines compatible with the sequence
  requirment for both Tonga and Vega10
2,ignore PSP hw init when doing TDR, because for SR-IOV device
the ucode won't get lost after VF FLR, so no need to invoke PSP
doing the ucode reloading again.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 63 ++++++++++++++++++------------
 1 file changed, 39 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5161c20..5573792 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1718,19 +1718,27 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
 {
 	int i, r;
 
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
-			continue;
-
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
-			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+	static enum amd_ip_block_type ip_order[] = {
+		AMD_IP_BLOCK_TYPE_GMC,
+		AMD_IP_BLOCK_TYPE_COMMON,
+		AMD_IP_BLOCK_TYPE_GFXHUB,
+		AMD_IP_BLOCK_TYPE_MMHUB,
+		AMD_IP_BLOCK_TYPE_IH,
+	};
+
+	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {
+		int j;
+		struct amdgpu_ip_block *block;
+
+		for (j = 0; j < adev->num_ip_blocks; j++) {
+			block = &adev->ip_blocks[j];
+
+			if (block->version->type != ip_order[i] ||
+				!block->status.valid)
+				continue;
 
-		if (r) {
-			DRM_ERROR("resume of IP block <%s> failed %d\n",
-				  adev->ip_blocks[i].version->funcs->name, r);
-			return r;
+			r = block->version->funcs->hw_init(adev);
+			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
 		}
 	}
 
@@ -1741,20 +1749,27 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
 {
 	int i, r;
 
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
-			continue;
+	static enum amd_ip_block_type ip_order[] = {
+		AMD_IP_BLOCK_TYPE_SMC,
+		AMD_IP_BLOCK_TYPE_DCE,
+		AMD_IP_BLOCK_TYPE_GFX,
+		AMD_IP_BLOCK_TYPE_SDMA,
+		AMD_IP_BLOCK_TYPE_VCE,
+	};
 
-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
-			continue;
+	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {
+		int j;
+		struct amdgpu_ip_block *block;
 
-		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
-		if (r) {
-			DRM_ERROR("resume of IP block <%s> failed %d\n",
-				  adev->ip_blocks[i].version->funcs->name, r);
-			return r;
+		for (j = 0; j < adev->num_ip_blocks; j++) {
+			block = &adev->ip_blocks[j];
+
+			if (block->version->type != ip_order[i] ||
+				!block->status.valid)
+				continue;
+
+			r = block->version->funcs->hw_init(adev);
+			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
 		}
 	}
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  3:48   ` [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox Monk Liu
  2017-05-03  3:48   ` [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late Monk Liu
@ 2017-05-03  3:48   ` Monk Liu
       [not found]     ` <1493783292-2661-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  3:48   ` [PATCH 5/6] drm/amdgpu:kiq reg access need timeout(v2) Monk Liu
                     ` (4 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Monk Liu @ 2017-05-03  3:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

since we don't need hdp flush/inval for KIQ anymore

Change-Id: I8518f479afebb73c68ef922880f92dae53b665b9
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 --
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 9629f3a..022a319 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6988,8 +6988,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v8_0_ring_emit_gds_switch */
-		7 + /* gfx_v8_0_ring_emit_hdp_flush */
-		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		17 + /* gfx_v8_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2b2a2c2..6d5b66c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3620,8 +3620,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
-		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
 		24 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 5/6] drm/amdgpu:kiq reg access need timeout(v2)
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-05-03  3:48   ` [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size Monk Liu
@ 2017-05-03  3:48   ` Monk Liu
       [not found]     ` <1493783292-2661-5-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  3:48   ` [PATCH 6/6] drm/amdgpu:PTE flag should be 64 bit width Monk Liu
                     ` (3 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Monk Liu @ 2017-05-03  3:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

this is to prevent fence forever waiting if FLR occured
during register accessing.

v2:
use define instead of hardcode for the timeout msec

Change-Id: I32cc219a08f5a67654beb26c45d1b95d2b60cc96
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 966bf7f..757cbc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,6 +22,7 @@
  */
 
 #include "amdgpu.h"
+#define MAX_KIQ_REG_WAIT	100000
 
 int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
 {
@@ -128,10 +129,12 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 	amdgpu_ring_commit(ring);
 	mutex_unlock(&kiq->ring_mutex);
 
-	r = fence_wait(f, false);
-	if (r)
-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+	r = fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
 	fence_put(f);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return ~0;
+	}
 
 	val = adev->wb.wb[adev->virt.reg_val_offs];
 
@@ -154,8 +157,8 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 	amdgpu_ring_commit(ring);
 	mutex_unlock(&kiq->ring_mutex);
 
-	r = fence_wait(f, false);
-	if (r)
+	r = fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
+	if (r < 1)
 		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
 	fence_put(f);
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 6/6] drm/amdgpu:PTE flag should be 64 bit width
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-05-03  3:48   ` [PATCH 5/6] drm/amdgpu:kiq reg access need timeout(v2) Monk Liu
@ 2017-05-03  3:48   ` Monk Liu
       [not found]     ` <1493783292-2661-6-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  6:24   ` [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug Yu, Xiangliang
                     ` (2 subsequent siblings)
  7 siblings, 1 reply; 23+ messages in thread
From: Monk Liu @ 2017-05-03  3:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

otherwise we'll lost the high 32 bit for pte, which lead
to incorrect MTYPE for vega10.

Change-Id: I1b0c7b8df14e340a36d4d2a72c6c03f469fdc29c
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 11c6c44..c3fb2f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -765,7 +765,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 {
 	struct amdgpu_ttm_tt *gtt, *tmp;
 	struct ttm_mem_reg bo_mem;
-	uint32_t flags;
+	uint64_t flags;
 	int r;
 
 	bo_mem.mem_type = TTM_PL_TT;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* RE: [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-05-03  3:48   ` [PATCH 6/6] drm/amdgpu:PTE flag should be 64 bit width Monk Liu
@ 2017-05-03  6:24   ` Yu, Xiangliang
  2017-05-03  9:05   ` Christian König
  2017-05-03 15:19   ` Alex Deucher
  7 siblings, 0 replies; 23+ messages in thread
From: Yu, Xiangliang @ 2017-05-03  6:24 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>


Thanks!
Xiangliang Yu

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Wednesday, May 03, 2017 11:48 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu@amd.com>
> Subject: [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug
> 
> Change-Id: Ie8672e0c9358d9542810ce05c822d9367249bbd7
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 1493301..712f36e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -296,11 +296,11 @@ int xgpu_ai_mailbox_add_irq_id(struct
> amdgpu_device *adev)  {
>  	int r;
> 
> -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135,
> &adev->virt.rcv_irq);
> +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135,
> +&adev->virt.rcv_irq);
>  	if (r)
>  		return r;
> 
> -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138,
> &adev->virt.ack_irq);
> +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138,
> +&adev->virt.ack_irq);
>  	if (r) {
>  		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
>  		return r;
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox
       [not found]     ` <1493783292-2661-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-03  6:27       ` Yu, Xiangliang
  2017-05-03  9:05       ` Christian König
  1 sibling, 0 replies; 23+ messages in thread
From: Yu, Xiangliang @ 2017-05-03  6:27 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>


Thanks!
Xiangliang Yu


> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Wednesday, May 03, 2017 11:48 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu@amd.com>
> Subject: [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox
> 
> if sriov gpu reset is invoked by job timeout, it is run in a global work-queue
> which is very slow and better not call msleep ortherwise it takes long time to
> get back CPU.
> 
> so make below changes:
> 
> 1: Change msleep 1 to mdelay 5
> 2: Ignore the ack fail from pf after time out,
>    because VF FLR will clear ack, sometime VF FLR is done
>    prior to the beginning of poll_ack so we can ignore this ack
> 
> TODO:
> Put job_timedout (and the following gpu reset) in a driver thread, instead of
> the global work_struct.
> 
> Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
> drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
>  2 files changed, 15 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 712f36e..e967a7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device
> *adev)
>  			r = -ETIME;
>  			break;
>  		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
> 
>  		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> 
> mmBIF_BX_PF0_MAILBOX_CONTROL)); @@ -141,12 +141,12 @@ static int
> xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
>  	r = xgpu_ai_mailbox_rcv_msg(adev, event);
>  	while (r) {
>  		if (timeout <= 0) {
> -			pr_err("Doesn't get ack from pf.\n");
> +			pr_err("Doesn't get msg:%d from pf.\n", event);
>  			r = -ETIME;
>  			break;
>  		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
> 
>  		r = xgpu_ai_mailbox_rcv_msg(adev, event);
>  	}
> @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct
> amdgpu_device *adev,
>  	/* start to poll ack */
>  	r = xgpu_ai_poll_ack(adev);
>  	if (r)
> -		return r;
> +		pr_err("Doesn't get ack from pf, continue\n");
> 
>  	xgpu_ai_mailbox_set_valid(adev, false);
> 
> @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct
> amdgpu_device *adev,
>  		req == IDH_REQ_GPU_FINI_ACCESS ||
>  		req == IDH_REQ_GPU_RESET_ACCESS) {
>  		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
> -		if (r)
> +		if (r) {
> +			pr_err("Doesn't get READY_TO_ACCESS_GPU from
> pf, give up\n");
>  			return r;
> +		}
>  	}
> 
>  	return 0;
> @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct
> amdgpu_device *adev,
>  					struct amdgpu_irq_src *source,
>  					struct amdgpu_iv_entry *entry)
>  {
> -	DRM_DEBUG("get ack intr and do nothing.\n");
> +	printk("get ack intr and do nothing.\n");
>  	return 0;
>  }
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> index 7bdc51b..f0d64f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device
> *adev)
>  			r = -ETIME;
>  			break;
>  		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
> 
>  		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
>  	}
> @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device
> *adev, enum idh_event event)
>  			r = -ETIME;
>  			break;
>  		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
> 
>  		r = xgpu_vi_mailbox_rcv_msg(adev, event);
>  	}
> @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct
> amdgpu_device *adev,
>  		request == IDH_REQ_GPU_RESET_ACCESS) {
>  		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
>  		if (r)
> -			return r;
> +			pr_err("Doesn't get ack from pf, continue\n");
>  	}
> 
>  	return 0;
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size
       [not found]     ` <1493783292-2661-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-03  6:33       ` Yu, Xiangliang
  2017-05-03  9:02       ` Christian König
  2017-05-03 15:23       ` Alex Deucher
  2 siblings, 0 replies; 23+ messages in thread
From: Yu, Xiangliang @ 2017-05-03  6:33 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>


Thanks!
Xiangliang Yu


> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Wednesday, May 03, 2017 11:48 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu@amd.com>
> Subject: [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size
> 
> since we don't need hdp flush/inval for KIQ anymore
> 
> Change-Id: I8518f479afebb73c68ef922880f92dae53b665b9
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 --
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 --
>  2 files changed, 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 9629f3a..022a319 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6988,8 +6988,6 @@ static const struct amdgpu_ring_funcs
> gfx_v8_0_ring_funcs_kiq = {
>  	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
>  	.emit_frame_size =
>  		20 + /* gfx_v8_0_ring_emit_gds_switch */
> -		7 + /* gfx_v8_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
>  		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
>  		17 + /* gfx_v8_0_ring_emit_vm_flush */
>  		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence,
> vm fence */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 2b2a2c2..6d5b66c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3620,8 +3620,6 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_kiq = {
>  	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
>  	.emit_frame_size =
>  		20 + /* gfx_v9_0_ring_emit_gds_switch */
> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
>  		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>  		24 + /* gfx_v9_0_ring_emit_vm_flush */
>  		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence,
> vm fence */
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 5/6] drm/amdgpu:kiq reg access need timeout(v2)
       [not found]     ` <1493783292-2661-5-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-03  6:35       ` Yu, Xiangliang
  0 siblings, 0 replies; 23+ messages in thread
From: Yu, Xiangliang @ 2017-05-03  6:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>


Thanks!
Xiangliang Yu


> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Wednesday, May 03, 2017 11:48 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu@amd.com>
> Subject: [PATCH 5/6] drm/amdgpu:kiq reg access need timeout(v2)
> 
> this is to prevent fence forever waiting if FLR occured during register
> accessing.
> 
> v2:
> use define instead of hardcode for the timeout msec
> 
> Change-Id: I32cc219a08f5a67654beb26c45d1b95d2b60cc96
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 966bf7f..757cbc4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -22,6 +22,7 @@
>   */
> 
>  #include "amdgpu.h"
> +#define MAX_KIQ_REG_WAIT	100000
> 
>  int amdgpu_allocate_static_csa(struct amdgpu_device *adev)  { @@ -128,10
> +129,12 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev,
> uint32_t reg)
>  	amdgpu_ring_commit(ring);
>  	mutex_unlock(&kiq->ring_mutex);
> 
> -	r = fence_wait(f, false);
> -	if (r)
> -		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +	r = fence_wait_timeout(f, false,
> msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>  	fence_put(f);
> +	if (r < 1) {
> +		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> +		return ~0;
> +	}
> 
>  	val = adev->wb.wb[adev->virt.reg_val_offs];
> 
> @@ -154,8 +157,8 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device
> *adev, uint32_t reg, uint32_t v)
>  	amdgpu_ring_commit(ring);
>  	mutex_unlock(&kiq->ring_mutex);
> 
> -	r = fence_wait(f, false);
> -	if (r)
> +	r = fence_wait_timeout(f, false,
> msecs_to_jiffies(MAX_KIQ_REG_WAIT));
> +	if (r < 1)
>  		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>  	fence_put(f);
>  }
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late
       [not found]     ` <1493783292-2661-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-03  6:43       ` Yu, Xiangliang
  2017-05-03  9:02       ` Christian König
  1 sibling, 0 replies; 23+ messages in thread
From: Yu, Xiangliang @ 2017-05-03  6:43 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>


Thanks!
Xiangliang Yu


> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Wednesday, May 03, 2017 11:48 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu@amd.com>
> Subject: [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late
> 
> 1,this way we make those routines compatible with the sequence
>   requirment for both Tonga and Vega10
> 2,ignore PSP hw init when doing TDR, because for SR-IOV device the ucode
> won't get lost after VF FLR, so no need to invoke PSP doing the ucode
> reloading again.
> 
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 63
> ++++++++++++++++++------------
>  1 file changed, 39 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 5161c20..5573792 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1718,19 +1718,27 @@ static int amdgpu_sriov_reinit_early(struct
> amdgpu_device *adev)  {
>  	int i, r;
> 
> -	for (i = 0; i < adev->num_ip_blocks; i++) {
> -		if (!adev->ip_blocks[i].status.valid)
> -			continue;
> -
> -		if (adev->ip_blocks[i].version->type ==
> AMD_IP_BLOCK_TYPE_COMMON ||
> -				adev->ip_blocks[i].version->type ==
> AMD_IP_BLOCK_TYPE_GMC ||
> -				adev->ip_blocks[i].version->type ==
> AMD_IP_BLOCK_TYPE_IH)
> -			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> +	static enum amd_ip_block_type ip_order[] = {
> +		AMD_IP_BLOCK_TYPE_GMC,
> +		AMD_IP_BLOCK_TYPE_COMMON,
> +		AMD_IP_BLOCK_TYPE_GFXHUB,
> +		AMD_IP_BLOCK_TYPE_MMHUB,
> +		AMD_IP_BLOCK_TYPE_IH,
> +	};
> +
> +	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {
> +		int j;
> +		struct amdgpu_ip_block *block;
> +
> +		for (j = 0; j < adev->num_ip_blocks; j++) {
> +			block = &adev->ip_blocks[j];
> +
> +			if (block->version->type != ip_order[i] ||
> +				!block->status.valid)
> +				continue;
> 
> -		if (r) {
> -			DRM_ERROR("resume of IP block <%s> failed %d\n",
> -				  adev->ip_blocks[i].version->funcs->name,
> r);
> -			return r;
> +			r = block->version->funcs->hw_init(adev);
> +			DRM_INFO("RE-INIT: %s %s\n", block->version-
> >funcs->name,
> +r?"failed":"successed");
>  		}
>  	}
> 
> @@ -1741,20 +1749,27 @@ static int amdgpu_sriov_reinit_late(struct
> amdgpu_device *adev)  {
>  	int i, r;
> 
> -	for (i = 0; i < adev->num_ip_blocks; i++) {
> -		if (!adev->ip_blocks[i].status.valid)
> -			continue;
> +	static enum amd_ip_block_type ip_order[] = {
> +		AMD_IP_BLOCK_TYPE_SMC,
> +		AMD_IP_BLOCK_TYPE_DCE,
> +		AMD_IP_BLOCK_TYPE_GFX,
> +		AMD_IP_BLOCK_TYPE_SDMA,
> +		AMD_IP_BLOCK_TYPE_VCE,
> +	};
> 
> -		if (adev->ip_blocks[i].version->type ==
> AMD_IP_BLOCK_TYPE_COMMON ||
> -				adev->ip_blocks[i].version->type ==
> AMD_IP_BLOCK_TYPE_GMC ||
> -				adev->ip_blocks[i].version->type ==
> AMD_IP_BLOCK_TYPE_IH )
> -			continue;
> +	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {
> +		int j;
> +		struct amdgpu_ip_block *block;
> 
> -		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> -		if (r) {
> -			DRM_ERROR("resume of IP block <%s> failed %d\n",
> -				  adev->ip_blocks[i].version->funcs->name,
> r);
> -			return r;
> +		for (j = 0; j < adev->num_ip_blocks; j++) {
> +			block = &adev->ip_blocks[j];
> +
> +			if (block->version->type != ip_order[i] ||
> +				!block->status.valid)
> +				continue;
> +
> +			r = block->version->funcs->hw_init(adev);
> +			DRM_INFO("RE-INIT: %s %s\n", block->version-
> >funcs->name,
> +r?"failed":"successed");
>  		}
>  	}
> 
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late
       [not found]     ` <1493783292-2661-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  6:43       ` Yu, Xiangliang
@ 2017-05-03  9:02       ` Christian König
       [not found]         ` <fe18ce3e-dc63-412b-7dbb-aa5265dfad9f-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  1 sibling, 1 reply; 23+ messages in thread
From: Christian König @ 2017-05-03  9:02 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 03.05.2017 um 05:48 schrieb Monk Liu:
> 1,this way we make those routines compatible with the sequence
>    requirment for both Tonga and Vega10
> 2,ignore PSP hw init when doing TDR, because for SR-IOV device
> the ucode won't get lost after VF FLR, so no need to invoke PSP
> doing the ucode reloading again.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 63 ++++++++++++++++++------------
>   1 file changed, 39 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 5161c20..5573792 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1718,19 +1718,27 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
>   {
>   	int i, r;
>   
> -	for (i = 0; i < adev->num_ip_blocks; i++) {
> -		if (!adev->ip_blocks[i].status.valid)
> -			continue;
> -
> -		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
> -			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> +	static enum amd_ip_block_type ip_order[] = {
> +		AMD_IP_BLOCK_TYPE_GMC,
> +		AMD_IP_BLOCK_TYPE_COMMON,
> +		AMD_IP_BLOCK_TYPE_GFXHUB,
> +		AMD_IP_BLOCK_TYPE_MMHUB,
> +		AMD_IP_BLOCK_TYPE_IH,
> +	};
> +
> +	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {

You should use ARRAY_SIZE here instead.

> +		int j;
> +		struct amdgpu_ip_block *block;
> +
> +		for (j = 0; j < adev->num_ip_blocks; j++) {
> +			block = &adev->ip_blocks[j];
> +
> +			if (block->version->type != ip_order[i] ||
> +				!block->status.valid)
> +				continue;
>   
> -		if (r) {
> -			DRM_ERROR("resume of IP block <%s> failed %d\n",
> -				  adev->ip_blocks[i].version->funcs->name, r);
> -			return r;
> +			r = block->version->funcs->hw_init(adev);
> +			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
>   		}
>   	}
>   
> @@ -1741,20 +1749,27 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
>   {
>   	int i, r;
>   
> -	for (i = 0; i < adev->num_ip_blocks; i++) {
> -		if (!adev->ip_blocks[i].status.valid)
> -			continue;
> +	static enum amd_ip_block_type ip_order[] = {
> +		AMD_IP_BLOCK_TYPE_SMC,
> +		AMD_IP_BLOCK_TYPE_DCE,
> +		AMD_IP_BLOCK_TYPE_GFX,
> +		AMD_IP_BLOCK_TYPE_SDMA,
> +		AMD_IP_BLOCK_TYPE_VCE,
> +	};
>   
> -		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
> -			continue;
> +	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {

And here as well.

> +		int j;
> +		struct amdgpu_ip_block *block;
>   
> -		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> -		if (r) {
> -			DRM_ERROR("resume of IP block <%s> failed %d\n",
> -				  adev->ip_blocks[i].version->funcs->name, r);
> -			return r;
> +		for (j = 0; j < adev->num_ip_blocks; j++) {
> +			block = &adev->ip_blocks[j];
> +
> +			if (block->version->type != ip_order[i] ||
> +				!block->status.valid)
> +				continue;
> +
> +			r = block->version->funcs->hw_init(adev);
> +			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");

This changes the order in which blocks are initialized which is probably 
not correct.

Alex needs to take a look at this, but we clearly need to improve the 
handling here.

Regards,
Christian.

>   		}
>   	}
>   


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size
       [not found]     ` <1493783292-2661-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  6:33       ` Yu, Xiangliang
@ 2017-05-03  9:02       ` Christian König
  2017-05-03 15:23       ` Alex Deucher
  2 siblings, 0 replies; 23+ messages in thread
From: Christian König @ 2017-05-03  9:02 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 03.05.2017 um 05:48 schrieb Monk Liu:
> since we don't need hdp flush/inval for KIQ anymore
>
> Change-Id: I8518f479afebb73c68ef922880f92dae53b665b9
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 --
>   2 files changed, 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 9629f3a..022a319 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6988,8 +6988,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
>   	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
>   	.emit_frame_size =
>   		20 + /* gfx_v8_0_ring_emit_gds_switch */
> -		7 + /* gfx_v8_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
>   		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
>   		17 + /* gfx_v8_0_ring_emit_vm_flush */
>   		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 2b2a2c2..6d5b66c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3620,8 +3620,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>   	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
>   	.emit_frame_size =
>   		20 + /* gfx_v9_0_ring_emit_gds_switch */
> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
>   		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>   		24 + /* gfx_v9_0_ring_emit_vm_flush */
>   		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox
       [not found]     ` <1493783292-2661-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  6:27       ` Yu, Xiangliang
@ 2017-05-03  9:05       ` Christian König
       [not found]         ` <09b3c7b4-19f9-2ca2-301e-324b71af1479-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  1 sibling, 1 reply; 23+ messages in thread
From: Christian König @ 2017-05-03  9:05 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 03.05.2017 um 05:48 schrieb Monk Liu:
> if sriov gpu reset is invoked by job timeout, it is run
> in a global work-queue which is very slow and better not call
> msleep ortherwise it takes long time to get back CPU.
>
> so make below changes:
>
> 1: Change msleep 1 to mdelay 5
> 2: Ignore the ack fail from pf after time out,
>     because VF FLR will clear ack, sometime VF FLR is done
>     prior to the beginning of poll_ack so we can ignore this ack
>
> TODO:
> Put job_timedout (and the following gpu reset) in a driver thread,
> instead of the global work_struct.
>
> Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
>   2 files changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 712f36e..e967a7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
>   						     mmBIF_BX_PF0_MAILBOX_CONTROL));
> @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
>   	r = xgpu_ai_mailbox_rcv_msg(adev, event);
>   	while (r) {
>   		if (timeout <= 0) {
> -			pr_err("Doesn't get ack from pf.\n");
> +			pr_err("Doesn't get msg:%d from pf.\n", event);
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		r = xgpu_ai_mailbox_rcv_msg(adev, event);
>   	}
> @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
>   	/* start to poll ack */
>   	r = xgpu_ai_poll_ack(adev);
>   	if (r)
> -		return r;
> +		pr_err("Doesn't get ack from pf, continue\n");
>   
>   	xgpu_ai_mailbox_set_valid(adev, false);
>   
> @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
>   		req == IDH_REQ_GPU_FINI_ACCESS ||
>   		req == IDH_REQ_GPU_RESET_ACCESS) {
>   		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
> -		if (r)
> +		if (r) {
> +			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
>   			return r;
> +		}
>   	}
>   
>   	return 0;
> @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
>   					struct amdgpu_irq_src *source,
>   					struct amdgpu_iv_entry *entry)
>   {
> -	DRM_DEBUG("get ack intr and do nothing.\n");
> +	printk("get ack intr and do nothing.\n");

Changing a DRM_DEBUG to a printk looks odd. How about using pr_warn or 
pr_info instead?

Apart from that patch looks good to me, but I don't deeply into that stuff.

So with the printk fixed feel free to add an Acked-by: Christian König 
<christian.koenig@amd.com> to it.

Regards,
Christian.

>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> index 7bdc51b..f0d64f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
>   	}
> @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
>   			r = -ETIME;
>   			break;
>   		}
> -		msleep(1);
> -		timeout -= 1;
> +		mdelay(5);
> +		timeout -= 5;
>   
>   		r = xgpu_vi_mailbox_rcv_msg(adev, event);
>   	}
> @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
>   		request == IDH_REQ_GPU_RESET_ACCESS) {
>   		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
>   		if (r)
> -			return r;
> +			pr_err("Doesn't get ack from pf, continue\n");
>   	}
>   
>   	return 0;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-05-03  6:24   ` [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug Yu, Xiangliang
@ 2017-05-03  9:05   ` Christian König
  2017-05-03 15:19   ` Alex Deucher
  7 siblings, 0 replies; 23+ messages in thread
From: Christian König @ 2017-05-03  9:05 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 03.05.2017 um 05:48 schrieb Monk Liu:
> Change-Id: Ie8672e0c9358d9542810ce05c822d9367249bbd7
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Acked-by: Christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 1493301..712f36e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -296,11 +296,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
>   {
>   	int r;
>   
> -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
> +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
>   	if (r)
>   		return r;
>   
> -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
> +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
>   	if (r) {
>   		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
>   		return r;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late
       [not found]         ` <fe18ce3e-dc63-412b-7dbb-aa5265dfad9f-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-03  9:10           ` Liu, Monk
       [not found]             ` <DM5PR12MB1610B39D23F8DC3CBCD18A3784160-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Liu, Monk @ 2017-05-03  9:10 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

It's correct and already working  on vega10/tonga for days, 
In fact the guilty context already works at my side 

BR Monk



-----Original Message-----
From: Christian König [mailto:deathsimple@vodafone.de] 
Sent: Wednesday, May 03, 2017 5:02 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late

Am 03.05.2017 um 05:48 schrieb Monk Liu:
> 1,this way we make those routines compatible with the sequence
>    requirment for both Tonga and Vega10 2,ignore PSP hw init when 
> doing TDR, because for SR-IOV device the ucode won't get lost after VF 
> FLR, so no need to invoke PSP doing the ucode reloading again.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 63 ++++++++++++++++++------------
>   1 file changed, 39 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 5161c20..5573792 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1718,19 +1718,27 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
>   {
>   	int i, r;
>   
> -	for (i = 0; i < adev->num_ip_blocks; i++) {
> -		if (!adev->ip_blocks[i].status.valid)
> -			continue;
> -
> -		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
> -			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> +	static enum amd_ip_block_type ip_order[] = {
> +		AMD_IP_BLOCK_TYPE_GMC,
> +		AMD_IP_BLOCK_TYPE_COMMON,
> +		AMD_IP_BLOCK_TYPE_GFXHUB,
> +		AMD_IP_BLOCK_TYPE_MMHUB,
> +		AMD_IP_BLOCK_TYPE_IH,
> +	};
> +
> +	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {

You should use ARRAY_SIZE here instead.

> +		int j;
> +		struct amdgpu_ip_block *block;
> +
> +		for (j = 0; j < adev->num_ip_blocks; j++) {
> +			block = &adev->ip_blocks[j];
> +
> +			if (block->version->type != ip_order[i] ||
> +				!block->status.valid)
> +				continue;
>   
> -		if (r) {
> -			DRM_ERROR("resume of IP block <%s> failed %d\n",
> -				  adev->ip_blocks[i].version->funcs->name, r);
> -			return r;
> +			r = block->version->funcs->hw_init(adev);
> +			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, 
> +r?"failed":"successed");
>   		}
>   	}
>   
> @@ -1741,20 +1749,27 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
>   {
>   	int i, r;
>   
> -	for (i = 0; i < adev->num_ip_blocks; i++) {
> -		if (!adev->ip_blocks[i].status.valid)
> -			continue;
> +	static enum amd_ip_block_type ip_order[] = {
> +		AMD_IP_BLOCK_TYPE_SMC,
> +		AMD_IP_BLOCK_TYPE_DCE,
> +		AMD_IP_BLOCK_TYPE_GFX,
> +		AMD_IP_BLOCK_TYPE_SDMA,
> +		AMD_IP_BLOCK_TYPE_VCE,
> +	};
>   
> -		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
> -				adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
> -			continue;
> +	for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {

And here as well.

> +		int j;
> +		struct amdgpu_ip_block *block;
>   
> -		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> -		if (r) {
> -			DRM_ERROR("resume of IP block <%s> failed %d\n",
> -				  adev->ip_blocks[i].version->funcs->name, r);
> -			return r;
> +		for (j = 0; j < adev->num_ip_blocks; j++) {
> +			block = &adev->ip_blocks[j];
> +
> +			if (block->version->type != ip_order[i] ||
> +				!block->status.valid)
> +				continue;
> +
> +			r = block->version->funcs->hw_init(adev);
> +			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, 
> +r?"failed":"successed");

This changes the order in which blocks are initialized which is probably not correct.

Alex needs to take a look at this, but we clearly need to improve the handling here.

Regards,
Christian.

>   		}
>   	}
>   


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/6] drm/amdgpu:PTE flag should be 64 bit width
       [not found]     ` <1493783292-2661-6-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-03 13:10       ` Alex Deucher
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Deucher @ 2017-05-03 13:10 UTC (permalink / raw)
  To: Monk Liu; +Cc: amd-gfx list

On Tue, May 2, 2017 at 11:48 PM, Monk Liu <Monk.Liu@amd.com> wrote:
> otherwise we'll lost the high 32 bit for pte, which lead
> to incorrect MTYPE for vega10.
>
> Change-Id: I1b0c7b8df14e340a36d4d2a72c6c03f469fdc29c
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 11c6c44..c3fb2f9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -765,7 +765,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
>  {
>         struct amdgpu_ttm_tt *gtt, *tmp;
>         struct ttm_mem_reg bo_mem;
> -       uint32_t flags;
> +       uint64_t flags;
>         int r;
>
>         bo_mem.mem_type = TTM_PL_TT;
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug
       [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (6 preceding siblings ...)
  2017-05-03  9:05   ` Christian König
@ 2017-05-03 15:19   ` Alex Deucher
  7 siblings, 0 replies; 23+ messages in thread
From: Alex Deucher @ 2017-05-03 15:19 UTC (permalink / raw)
  To: Monk Liu; +Cc: amd-gfx list

On Tue, May 2, 2017 at 11:48 PM, Monk Liu <Monk.Liu@amd.com> wrote:
> Change-Id: Ie8672e0c9358d9542810ce05c822d9367249bbd7
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 1493301..712f36e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -296,11 +296,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
>  {
>         int r;
>
> -       r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
> +       r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
>         if (r)
>                 return r;
>
> -       r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
> +       r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
>         if (r) {
>                 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
>                 return r;
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox
       [not found]         ` <09b3c7b4-19f9-2ca2-301e-324b71af1479-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-05-03 15:20           ` Alex Deucher
       [not found]             ` <CADnq5_MHdrQzRKQNHMSR5ucorerVZ9JMA3kE4LOVVx1Lwz0=KA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Alex Deucher @ 2017-05-03 15:20 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list, Monk Liu

On Wed, May 3, 2017 at 5:05 AM, Christian König <deathsimple@vodafone.de> wrote:
> Am 03.05.2017 um 05:48 schrieb Monk Liu:
>>
>> if sriov gpu reset is invoked by job timeout, it is run
>> in a global work-queue which is very slow and better not call
>> msleep ortherwise it takes long time to get back CPU.
>>
>> so make below changes:
>>
>> 1: Change msleep 1 to mdelay 5
>> 2: Ignore the ack fail from pf after time out,
>>     because VF FLR will clear ack, sometime VF FLR is done
>>     prior to the beginning of poll_ack so we can ignore this ack
>>
>> TODO:
>> Put job_timedout (and the following gpu reset) in a driver thread,
>> instead of the global work_struct.
>>
>> Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
>>   drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
>>   2 files changed, 15 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> index 712f36e..e967a7b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device
>> *adev)
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
>>
>> mmBIF_BX_PF0_MAILBOX_CONTROL));
>> @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device
>> *adev, enum idh_event event)
>>         r = xgpu_ai_mailbox_rcv_msg(adev, event);
>>         while (r) {
>>                 if (timeout <= 0) {
>> -                       pr_err("Doesn't get ack from pf.\n");
>> +                       pr_err("Doesn't get msg:%d from pf.\n", event);
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 r = xgpu_ai_mailbox_rcv_msg(adev, event);
>>         }
>> @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct
>> amdgpu_device *adev,
>>         /* start to poll ack */
>>         r = xgpu_ai_poll_ack(adev);
>>         if (r)
>> -               return r;
>> +               pr_err("Doesn't get ack from pf, continue\n");
>>         xgpu_ai_mailbox_set_valid(adev, false);
>>   @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct
>> amdgpu_device *adev,
>>                 req == IDH_REQ_GPU_FINI_ACCESS ||
>>                 req == IDH_REQ_GPU_RESET_ACCESS) {
>>                 r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
>> -               if (r)
>> +               if (r) {
>> +                       pr_err("Doesn't get READY_TO_ACCESS_GPU from pf,
>> give up\n");
>>                         return r;
>> +               }
>>         }
>>         return 0;
>> @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct
>> amdgpu_device *adev,
>>                                         struct amdgpu_irq_src *source,
>>                                         struct amdgpu_iv_entry *entry)
>>   {
>> -       DRM_DEBUG("get ack intr and do nothing.\n");
>> +       printk("get ack intr and do nothing.\n");
>
>
> Changing a DRM_DEBUG to a printk looks odd. How about using pr_warn or
> pr_info instead?
>
> Apart from that patch looks good to me, but I don't deeply into that stuff.
>
> So with the printk fixed feel free to add an Acked-by: Christian König
> <christian.koenig@amd.com> to it.

Same here.  with that fixed:
Acked-by: Alex Deucher <alexander.deucher@amd.com>

>
> Regards,
> Christian.
>
>
>>         return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> index 7bdc51b..f0d64f1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device
>> *adev)
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
>>         }
>> @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device
>> *adev, enum idh_event event)
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 r = xgpu_vi_mailbox_rcv_msg(adev, event);
>>         }
>> @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct
>> amdgpu_device *adev,
>>                 request == IDH_REQ_GPU_RESET_ACCESS) {
>>                 r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
>>                 if (r)
>> -                       return r;
>> +                       pr_err("Doesn't get ack from pf, continue\n");
>>         }
>>         return 0;
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox
       [not found]             ` <CADnq5_MHdrQzRKQNHMSR5ucorerVZ9JMA3kE4LOVVx1Lwz0=KA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-05-03 15:22               ` Liu, Monk
  0 siblings, 0 replies; 23+ messages in thread
From: Liu, Monk @ 2017-05-03 15:22 UTC (permalink / raw)
  To: Alex Deucher, Christian K?nig; +Cc: amd-gfx list

OK, will change later by another amend patch, already submitted 

-----Original Message-----
From: Alex Deucher [mailto:alexdeucher@gmail.com] 
Sent: Wednesday, May 3, 2017 11:20 PM
To: Christian König <deathsimple@vodafone.de>
Cc: Liu, Monk <Monk.Liu@amd.com>; amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox

On Wed, May 3, 2017 at 5:05 AM, Christian König <deathsimple@vodafone.de> wrote:
> Am 03.05.2017 um 05:48 schrieb Monk Liu:
>>
>> if sriov gpu reset is invoked by job timeout, it is run in a global 
>> work-queue which is very slow and better not call msleep ortherwise 
>> it takes long time to get back CPU.
>>
>> so make below changes:
>>
>> 1: Change msleep 1 to mdelay 5
>> 2: Ignore the ack fail from pf after time out,
>>     because VF FLR will clear ack, sometime VF FLR is done
>>     prior to the beginning of poll_ack so we can ignore this ack
>>
>> TODO:
>> Put job_timedout (and the following gpu reset) in a driver thread, 
>> instead of the global work_struct.
>>
>> Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
>>   drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
>>   2 files changed, 15 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> index 712f36e..e967a7b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device
>> *adev)
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
>>
>> mmBIF_BX_PF0_MAILBOX_CONTROL));
>> @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct 
>> amdgpu_device *adev, enum idh_event event)
>>         r = xgpu_ai_mailbox_rcv_msg(adev, event);
>>         while (r) {
>>                 if (timeout <= 0) {
>> -                       pr_err("Doesn't get ack from pf.\n");
>> +                       pr_err("Doesn't get msg:%d from pf.\n", 
>> + event);
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 r = xgpu_ai_mailbox_rcv_msg(adev, event);
>>         }
>> @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct
>> amdgpu_device *adev,
>>         /* start to poll ack */
>>         r = xgpu_ai_poll_ack(adev);
>>         if (r)
>> -               return r;
>> +               pr_err("Doesn't get ack from pf, continue\n");
>>         xgpu_ai_mailbox_set_valid(adev, false);
>>   @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct
>> amdgpu_device *adev,
>>                 req == IDH_REQ_GPU_FINI_ACCESS ||
>>                 req == IDH_REQ_GPU_RESET_ACCESS) {
>>                 r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
>> -               if (r)
>> +               if (r) {
>> +                       pr_err("Doesn't get READY_TO_ACCESS_GPU from 
>> + pf,
>> give up\n");
>>                         return r;
>> +               }
>>         }
>>         return 0;
>> @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct 
>> amdgpu_device *adev,
>>                                         struct amdgpu_irq_src *source,
>>                                         struct amdgpu_iv_entry *entry)
>>   {
>> -       DRM_DEBUG("get ack intr and do nothing.\n");
>> +       printk("get ack intr and do nothing.\n");
>
>
> Changing a DRM_DEBUG to a printk looks odd. How about using pr_warn or 
> pr_info instead?
>
> Apart from that patch looks good to me, but I don't deeply into that stuff.
>
> So with the printk fixed feel free to add an Acked-by: Christian König 
> <christian.koenig@amd.com> to it.

Same here.  with that fixed:
Acked-by: Alex Deucher <alexander.deucher@amd.com>

>
> Regards,
> Christian.
>
>
>>         return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> index 7bdc51b..f0d64f1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
>> @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device
>> *adev)
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
>>         }
>> @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device 
>> *adev, enum idh_event event)
>>                         r = -ETIME;
>>                         break;
>>                 }
>> -               msleep(1);
>> -               timeout -= 1;
>> +               mdelay(5);
>> +               timeout -= 5;
>>                 r = xgpu_vi_mailbox_rcv_msg(adev, event);
>>         }
>> @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct
>> amdgpu_device *adev,
>>                 request == IDH_REQ_GPU_RESET_ACCESS) {
>>                 r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
>>                 if (r)
>> -                       return r;
>> +                       pr_err("Doesn't get ack from pf, 
>> + continue\n");
>>         }
>>         return 0;
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late
       [not found]             ` <DM5PR12MB1610B39D23F8DC3CBCD18A3784160-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-05-03 15:23               ` Alex Deucher
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Deucher @ 2017-05-03 15:23 UTC (permalink / raw)
  To: Liu, Monk; +Cc: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Wed, May 3, 2017 at 5:10 AM, Liu, Monk <Monk.Liu@amd.com> wrote:
> It's correct and already working  on vega10/tonga for days,
> In fact the guilty context already works at my side

Need to use ARRAY_SIZE for the the loops rather than open coding it.
Beyond that, if it works for sr-iov, it's fine.  Maybe we can look at
unifying things for sr-iov and bare metal in this case in the future.
With the ARRAY_SIZE change:

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

Alex

>
> BR Monk
>
>
>
> -----Original Message-----
> From: Christian König [mailto:deathsimple@vodafone.de]
> Sent: Wednesday, May 03, 2017 5:02 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late
>
> Am 03.05.2017 um 05:48 schrieb Monk Liu:
>> 1,this way we make those routines compatible with the sequence
>>    requirment for both Tonga and Vega10 2,ignore PSP hw init when
>> doing TDR, because for SR-IOV device the ucode won't get lost after VF
>> FLR, so no need to invoke PSP doing the ucode reloading again.
>>
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 63 ++++++++++++++++++------------
>>   1 file changed, 39 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 5161c20..5573792 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1718,19 +1718,27 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
>>   {
>>       int i, r;
>>
>> -     for (i = 0; i < adev->num_ip_blocks; i++) {
>> -             if (!adev->ip_blocks[i].status.valid)
>> -                     continue;
>> -
>> -             if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
>> -                             adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
>> -                             adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
>> -                     r = adev->ip_blocks[i].version->funcs->hw_init(adev);
>> +     static enum amd_ip_block_type ip_order[] = {
>> +             AMD_IP_BLOCK_TYPE_GMC,
>> +             AMD_IP_BLOCK_TYPE_COMMON,
>> +             AMD_IP_BLOCK_TYPE_GFXHUB,
>> +             AMD_IP_BLOCK_TYPE_MMHUB,
>> +             AMD_IP_BLOCK_TYPE_IH,
>> +     };
>> +
>> +     for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {
>
> You should use ARRAY_SIZE here instead.
>
>> +             int j;
>> +             struct amdgpu_ip_block *block;
>> +
>> +             for (j = 0; j < adev->num_ip_blocks; j++) {
>> +                     block = &adev->ip_blocks[j];
>> +
>> +                     if (block->version->type != ip_order[i] ||
>> +                             !block->status.valid)
>> +                             continue;
>>
>> -             if (r) {
>> -                     DRM_ERROR("resume of IP block <%s> failed %d\n",
>> -                               adev->ip_blocks[i].version->funcs->name, r);
>> -                     return r;
>> +                     r = block->version->funcs->hw_init(adev);
>> +                     DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name,
>> +r?"failed":"successed");
>>               }
>>       }
>>
>> @@ -1741,20 +1749,27 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
>>   {
>>       int i, r;
>>
>> -     for (i = 0; i < adev->num_ip_blocks; i++) {
>> -             if (!adev->ip_blocks[i].status.valid)
>> -                     continue;
>> +     static enum amd_ip_block_type ip_order[] = {
>> +             AMD_IP_BLOCK_TYPE_SMC,
>> +             AMD_IP_BLOCK_TYPE_DCE,
>> +             AMD_IP_BLOCK_TYPE_GFX,
>> +             AMD_IP_BLOCK_TYPE_SDMA,
>> +             AMD_IP_BLOCK_TYPE_VCE,
>> +     };
>>
>> -             if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
>> -                             adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
>> -                             adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
>> -                     continue;
>> +     for (i = 0; i < sizeof(ip_order)/sizeof(ip_order[0]); i++) {
>
> And here as well.
>
>> +             int j;
>> +             struct amdgpu_ip_block *block;
>>
>> -             r = adev->ip_blocks[i].version->funcs->hw_init(adev);
>> -             if (r) {
>> -                     DRM_ERROR("resume of IP block <%s> failed %d\n",
>> -                               adev->ip_blocks[i].version->funcs->name, r);
>> -                     return r;
>> +             for (j = 0; j < adev->num_ip_blocks; j++) {
>> +                     block = &adev->ip_blocks[j];
>> +
>> +                     if (block->version->type != ip_order[i] ||
>> +                             !block->status.valid)
>> +                             continue;
>> +
>> +                     r = block->version->funcs->hw_init(adev);
>> +                     DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name,
>> +r?"failed":"successed");
>
> This changes the order in which blocks are initialized which is probably not correct.
>
> Alex needs to take a look at this, but we clearly need to improve the handling here.
>
> Regards,
> Christian.
>
>>               }
>>       }
>>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size
       [not found]     ` <1493783292-2661-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-05-03  6:33       ` Yu, Xiangliang
  2017-05-03  9:02       ` Christian König
@ 2017-05-03 15:23       ` Alex Deucher
  2 siblings, 0 replies; 23+ messages in thread
From: Alex Deucher @ 2017-05-03 15:23 UTC (permalink / raw)
  To: Monk Liu; +Cc: amd-gfx list

On Tue, May 2, 2017 at 11:48 PM, Monk Liu <Monk.Liu@amd.com> wrote:
> since we don't need hdp flush/inval for KIQ anymore
>
> Change-Id: I8518f479afebb73c68ef922880f92dae53b665b9
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 --
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 --
>  2 files changed, 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 9629f3a..022a319 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6988,8 +6988,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
>         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v8_0_ring_emit_gds_switch */
> -               7 + /* gfx_v8_0_ring_emit_hdp_flush */
> -               5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
>                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
>                 17 + /* gfx_v8_0_ring_emit_vm_flush */
>                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 2b2a2c2..6d5b66c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3620,8 +3620,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v9_0_ring_emit_gds_switch */
> -               7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -               5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
>                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>                 24 + /* gfx_v9_0_ring_emit_vm_flush */
>                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox
       [not found] ` <1493620480-22002-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-05-01  6:34   ` Monk Liu
  0 siblings, 0 replies; 23+ messages in thread
From: Monk Liu @ 2017-05-01  6:34 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

if sriov gpu reset is invoked by job timeout, it is run
in a global work-queue which is very slow and better not call
msleep ortherwise it takes long time to get back CPU.

so make below changes:

1: Change msleep 1 to mdelay 5
2: Ignore the ack fail from pf after time out,
   because VF FLR will clear ack, sometime VF FLR is done
   prior to the beginning of poll_ack so we can ignore this ack

TODO:
Put job_timedout (and the following gpu reset) in a driver thread,
instead of the global work_struct.

Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++--------
 drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++-----
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 712f36e..e967a7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 						     mmBIF_BX_PF0_MAILBOX_CONTROL));
@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 	r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	while (r) {
 		if (timeout <= 0) {
-			pr_err("Doesn't get ack from pf.\n");
+			pr_err("Doesn't get msg:%d from pf.\n", event);
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		r = xgpu_ai_mailbox_rcv_msg(adev, event);
 	}
@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 	/* start to poll ack */
 	r = xgpu_ai_poll_ack(adev);
 	if (r)
-		return r;
+		pr_err("Doesn't get ack from pf, continue\n");
 
 	xgpu_ai_mailbox_set_valid(adev, false);
 
@@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 		req == IDH_REQ_GPU_FINI_ACCESS ||
 		req == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
-		if (r)
+		if (r) {
+			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
 			return r;
+		}
 	}
 
 	return 0;
@@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					struct amdgpu_iv_entry *entry)
 {
-	DRM_DEBUG("get ack intr and do nothing.\n");
+	printk("get ack intr and do nothing.\n");
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 7bdc51b..f0d64f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
 	}
@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 			r = -ETIME;
 			break;
 		}
-		msleep(1);
-		timeout -= 1;
+		mdelay(5);
+		timeout -= 5;
 
 		r = xgpu_vi_mailbox_rcv_msg(adev, event);
 	}
@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
 		request == IDH_REQ_GPU_RESET_ACCESS) {
 		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
 		if (r)
-			return r;
+			pr_err("Doesn't get ack from pf, continue\n");
 	}
 
 	return 0;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2017-05-03 15:23 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-03  3:48 [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug Monk Liu
     [not found] ` <1493783292-2661-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-05-03  3:48   ` [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox Monk Liu
     [not found]     ` <1493783292-2661-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-05-03  6:27       ` Yu, Xiangliang
2017-05-03  9:05       ` Christian König
     [not found]         ` <09b3c7b4-19f9-2ca2-301e-324b71af1479-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-03 15:20           ` Alex Deucher
     [not found]             ` <CADnq5_MHdrQzRKQNHMSR5ucorerVZ9JMA3kE4LOVVx1Lwz0=KA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-05-03 15:22               ` Liu, Monk
2017-05-03  3:48   ` [PATCH 3/6] drm/amdgpu:re-write sriov_reinit_early/late Monk Liu
     [not found]     ` <1493783292-2661-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-05-03  6:43       ` Yu, Xiangliang
2017-05-03  9:02       ` Christian König
     [not found]         ` <fe18ce3e-dc63-412b-7dbb-aa5265dfad9f-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-03  9:10           ` Liu, Monk
     [not found]             ` <DM5PR12MB1610B39D23F8DC3CBCD18A3784160-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-05-03 15:23               ` Alex Deucher
2017-05-03  3:48   ` [PATCH 4/6] drm/amdgpu:cleanups KIQ ring_funcs emit_frame_size Monk Liu
     [not found]     ` <1493783292-2661-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-05-03  6:33       ` Yu, Xiangliang
2017-05-03  9:02       ` Christian König
2017-05-03 15:23       ` Alex Deucher
2017-05-03  3:48   ` [PATCH 5/6] drm/amdgpu:kiq reg access need timeout(v2) Monk Liu
     [not found]     ` <1493783292-2661-5-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-05-03  6:35       ` Yu, Xiangliang
2017-05-03  3:48   ` [PATCH 6/6] drm/amdgpu:PTE flag should be 64 bit width Monk Liu
     [not found]     ` <1493783292-2661-6-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-05-03 13:10       ` Alex Deucher
2017-05-03  6:24   ` [PATCH 1/6] drm/amdgpu:fix cannot receive rcv/ack irq bug Yu, Xiangliang
2017-05-03  9:05   ` Christian König
2017-05-03 15:19   ` Alex Deucher
  -- strict thread matches above, loose matches on Subject: below --
2017-05-01  6:34 Monk Liu
     [not found] ` <1493620480-22002-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-05-01  6:34   ` [PATCH 2/6] drm/amdgpu:need som change on vega10 mailbox Monk Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.