All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
@ 2022-01-07 16:49 Mario Limonciello
  2022-01-07 16:55 ` Kazlauskas, Nicholas
  0 siblings, 1 reply; 13+ messages in thread
From: Mario Limonciello @ 2022-01-07 16:49 UTC (permalink / raw)
  To: amd-gfx
  Cc: Chris Hixon, Qingqing Zhuo, Scott Bruce, spasswolf,
	Kazlauskas Nicholas, Mario Limonciello

The WA from commit 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard hang
on HPD") and commit 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard
hang on HPD to dcn20") causes a regression in s0ix where the system will
fail to resume properly on many laptops.  Pull the workarounds out to
avoid that s0ix regression in the common case.  This HPD hang happens with
an external device and a new W/A will need to be developed for this in the
future.

Cc: Kazlauskas Nicholas <Nicholas.Kazlauskas@amd.com>
Cc: Qingqing Zhuo <qingqing.zhuo@amd.com>
Reported-by: Scott Bruce <smbruce@gmail.com>
Reported-by: Chris Hixon <linux-kernel-bugs@hixontech.com>
Reported-by: spasswolf@web.de
Link: https://bugzilla.kernel.org/show_bug.cgi?id=215436
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1821
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1852
Fixes: 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard hang on HPD")
Fixes: 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard hang on HPD to dcn20")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c  | 11 +-------
 .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 11 +-------
 .../display/dc/irq/dcn20/irq_service_dcn20.c  | 25 -------------------
 .../display/dc/irq/dcn20/irq_service_dcn20.h  |  2 --
 .../display/dc/irq/dcn21/irq_service_dcn21.c  | 25 -------------------
 .../display/dc/irq/dcn21/irq_service_dcn21.h  |  2 --
 .../gpu/drm/amd/display/dc/irq/irq_service.c  |  2 +-
 .../gpu/drm/amd/display/dc/irq/irq_service.h  |  4 ---
 8 files changed, 3 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 9f35f2e8f971..cac80ba69072 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -38,7 +38,6 @@
 #include "clk/clk_11_0_0_offset.h"
 #include "clk/clk_11_0_0_sh_mask.h"
 
-#include "irq/dcn20/irq_service_dcn20.h"
 
 #undef FN
 #define FN(reg_name, field_name) \
@@ -223,8 +222,6 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
 	bool force_reset = false;
 	bool p_state_change_support;
 	int total_plane_count;
-	int irq_src;
-	uint32_t hpd_state;
 
 	if (dc->work_arounds.skip_clock_update)
 		return;
@@ -242,13 +239,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base,
 	if (dc->res_pool->pp_smu)
 		pp_smu = &dc->res_pool->pp_smu->nv_funcs;
 
-	for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD6; irq_src++) {
-		hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs, irq_src);
-		if (hpd_state)
-			break;
-	}
-
-	if (display_count == 0 && !hpd_state)
+	if (display_count == 0)
 		enter_display_off = true;
 
 	if (enter_display_off == safe_to_lower) {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index fbda42313bfe..f4dee0e48a67 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -42,7 +42,6 @@
 #include "clk/clk_10_0_2_sh_mask.h"
 #include "renoir_ip_offset.h"
 
-#include "irq/dcn21/irq_service_dcn21.h"
 
 /* Constants */
 
@@ -129,11 +128,9 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
 	struct dc *dc = clk_mgr_base->ctx->dc;
 	int display_count;
-	int irq_src;
 	bool update_dppclk = false;
 	bool update_dispclk = false;
 	bool dpp_clock_lowered = false;
-	uint32_t hpd_state;
 
 	struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
 
@@ -150,14 +147,8 @@ static void rn_update_clocks(struct clk_mgr *clk_mgr_base,
 
 			display_count = rn_get_active_display_cnt_wa(dc, context);
 
-			for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <= DC_IRQ_SOURCE_HPD5; irq_src++) {
-				hpd_state = dc_get_hpd_state_dcn21(dc->res_pool->irqs, irq_src);
-				if (hpd_state)
-					break;
-			}
-
 			/* if we can go lower, go lower */
-			if (display_count == 0 && !hpd_state) {
+			if (display_count == 0) {
 				rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER);
 				/* update power state */
 				clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
index 9ccafe007b23..c4b067d01895 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
@@ -132,31 +132,6 @@ enum dc_irq_source to_dal_irq_source_dcn20(
 	}
 }
 
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source)
-{
-	const struct irq_source_info *info;
-	uint32_t addr;
-	uint32_t value;
-	uint32_t current_status;
-
-	info = find_irq_source_info(irq_service, source);
-	if (!info)
-		return 0;
-
-	addr = info->status_reg;
-	if (!addr)
-		return 0;
-
-	value = dm_read_reg(irq_service->ctx, addr);
-	current_status =
-		get_reg_field_value(
-			value,
-			HPD0_DC_HPD_INT_STATUS,
-			DC_HPD_SENSE);
-
-	return current_status;
-}
-
 static bool hpd_ack(
 	struct irq_service *irq_service,
 	const struct irq_source_info *info)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
index 4d69ab24ca25..aee4b37999f1 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
@@ -31,6 +31,4 @@
 struct irq_service *dal_irq_service_dcn20_create(
 	struct irq_service_init_data *init_data);
 
-uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum dc_irq_source source);
-
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 235294534c43..0f15bcada4e9 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -134,31 +134,6 @@ static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_servic
 	return DC_IRQ_SOURCE_INVALID;
 }
 
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source)
-{
-	const struct irq_source_info *info;
-	uint32_t addr;
-	uint32_t value;
-	uint32_t current_status;
-
-	info = find_irq_source_info(irq_service, source);
-	if (!info)
-		return 0;
-
-	addr = info->status_reg;
-	if (!addr)
-		return 0;
-
-	value = dm_read_reg(irq_service->ctx, addr);
-	current_status =
-		get_reg_field_value(
-			value,
-			HPD0_DC_HPD_INT_STATUS,
-			DC_HPD_SENSE);
-
-	return current_status;
-}
-
 static bool hpd_ack(
 	struct irq_service *irq_service,
 	const struct irq_source_info *info)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
index 616470e32380..da2bd0e93d7a 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
@@ -31,6 +31,4 @@
 struct irq_service *dal_irq_service_dcn21_create(
 	struct irq_service_init_data *init_data);
 
-uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum dc_irq_source source);
-
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 4db1133e4466..a2a4fbeb83f8 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -79,7 +79,7 @@ void dal_irq_service_destroy(struct irq_service **irq_service)
 	*irq_service = NULL;
 }
 
-const struct irq_source_info *find_irq_source_info(
+static const struct irq_source_info *find_irq_source_info(
 	struct irq_service *irq_service,
 	enum dc_irq_source source)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
index e60b82480093..dbfcb096eedd 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
@@ -69,10 +69,6 @@ struct irq_service {
 	const struct irq_service_funcs *funcs;
 };
 
-const struct irq_source_info *find_irq_source_info(
-	struct irq_service *irq_service,
-	enum dc_irq_source source);
-
 void dal_irq_service_construct(
 	struct irq_service *irq_service,
 	struct irq_service_init_data *init_data);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* RE: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-07 16:49 [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21 Mario Limonciello
@ 2022-01-07 16:55 ` Kazlauskas, Nicholas
  2022-01-07 18:51   ` Limonciello, Mario
  0 siblings, 1 reply; 13+ messages in thread
From: Kazlauskas, Nicholas @ 2022-01-07 16:55 UTC (permalink / raw)
  To: Limonciello, Mario, amd-gfx
  Cc: Zhuo, Qingqing (Lillian), Scott Bruce, spasswolf, Chris Hixon

[AMD Official Use Only]

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello@amd.com>
> Sent: January 7, 2022 11:50 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Limonciello, Mario <Mario.Limonciello@amd.com>; Kazlauskas, Nicholas
> <Nicholas.Kazlauskas@amd.com>; Zhuo, Qingqing (Lillian)
> <Qingqing.Zhuo@amd.com>; Scott Bruce <smbruce@gmail.com>; Chris
> Hixon <linux-kernel-bugs@hixontech.com>; spasswolf@web.de
> Subject: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on
> DCN20/DCN21
> Importance: High
>
> The WA from commit 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard
> hang
> on HPD") and commit 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard
> hang on HPD to dcn20") causes a regression in s0ix where the system will
> fail to resume properly on many laptops.  Pull the workarounds out to
> avoid that s0ix regression in the common case.  This HPD hang happens with
> an external device and a new W/A will need to be developed for this in the
> future.
>
> Cc: Kazlauskas Nicholas <Nicholas.Kazlauskas@amd.com>
> Cc: Qingqing Zhuo <qingqing.zhuo@amd.com>
> Reported-by: Scott Bruce <smbruce@gmail.com>
> Reported-by: Chris Hixon <linux-kernel-bugs@hixontech.com>
> Reported-by: spasswolf@web.de
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=215436
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1821
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1852
> Fixes: 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard hang on HPD")
> Fixes: 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard hang on HPD to
> dcn20")
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>

I think the revert is fine once we figure out where we're missing calls to:

        .optimize_pwr_state = dcn21_optimize_pwr_state,
        .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,

These are already part of dc_link_detect, so I suspect there's another interface in DC that should be using these.

I think the best way to debug this is to revert the patch locally and add a stack dump when DMCUB hangs our times out.

That way you can know where the PHY was trying to be accessed without the refclk being on.

We had a similar issue in DCN31 which didn't require a W/A like DCN21.

I'd like to hold off on merging this until that hang is verified as gone.

Regards,
Nicholas Kazlauskas

> ---
>  .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c  | 11 +-------
>  .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 11 +-------
>  .../display/dc/irq/dcn20/irq_service_dcn20.c  | 25 -------------------
>  .../display/dc/irq/dcn20/irq_service_dcn20.h  |  2 --
>  .../display/dc/irq/dcn21/irq_service_dcn21.c  | 25 -------------------
>  .../display/dc/irq/dcn21/irq_service_dcn21.h  |  2 --
>  .../gpu/drm/amd/display/dc/irq/irq_service.c  |  2 +-
>  .../gpu/drm/amd/display/dc/irq/irq_service.h  |  4 ---
>  8 files changed, 3 insertions(+), 79 deletions(-)
>
> diff --git
> a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> index 9f35f2e8f971..cac80ba69072 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> @@ -38,7 +38,6 @@
>  #include "clk/clk_11_0_0_offset.h"
>  #include "clk/clk_11_0_0_sh_mask.h"
>
> -#include "irq/dcn20/irq_service_dcn20.h"
>
>  #undef FN
>  #define FN(reg_name, field_name) \
> @@ -223,8 +222,6 @@ void dcn2_update_clocks(struct clk_mgr
> *clk_mgr_base,
>       bool force_reset = false;
>       bool p_state_change_support;
>       int total_plane_count;
> -     int irq_src;
> -     uint32_t hpd_state;
>
>       if (dc->work_arounds.skip_clock_update)
>               return;
> @@ -242,13 +239,7 @@ void dcn2_update_clocks(struct clk_mgr
> *clk_mgr_base,
>       if (dc->res_pool->pp_smu)
>               pp_smu = &dc->res_pool->pp_smu->nv_funcs;
>
> -     for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <=
> DC_IRQ_SOURCE_HPD6; irq_src++) {
> -             hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs,
> irq_src);
> -             if (hpd_state)
> -                     break;
> -     }
> -
> -     if (display_count == 0 && !hpd_state)
> +     if (display_count == 0)
>               enter_display_off = true;
>
>       if (enter_display_off == safe_to_lower) {
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> index fbda42313bfe..f4dee0e48a67 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> @@ -42,7 +42,6 @@
>  #include "clk/clk_10_0_2_sh_mask.h"
>  #include "renoir_ip_offset.h"
>
> -#include "irq/dcn21/irq_service_dcn21.h"
>
>  /* Constants */
>
> @@ -129,11 +128,9 @@ static void rn_update_clocks(struct clk_mgr
> *clk_mgr_base,
>       struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
>       struct dc *dc = clk_mgr_base->ctx->dc;
>       int display_count;
> -     int irq_src;
>       bool update_dppclk = false;
>       bool update_dispclk = false;
>       bool dpp_clock_lowered = false;
> -     uint32_t hpd_state;
>
>       struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
>
> @@ -150,14 +147,8 @@ static void rn_update_clocks(struct clk_mgr
> *clk_mgr_base,
>
>                       display_count = rn_get_active_display_cnt_wa(dc,
> context);
>
> -                     for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <=
> DC_IRQ_SOURCE_HPD5; irq_src++) {
> -                             hpd_state = dc_get_hpd_state_dcn21(dc-
> >res_pool->irqs, irq_src);
> -                             if (hpd_state)
> -                                     break;
> -                     }
> -
>                       /* if we can go lower, go lower */
> -                     if (display_count == 0 && !hpd_state) {
> +                     if (display_count == 0) {
>
>       rn_vbios_smu_set_dcn_low_power_state(clk_mgr,
> DCN_PWR_STATE_LOW_POWER);
>                               /* update power state */
>                               clk_mgr_base->clks.pwr_state =
> DCN_PWR_STATE_LOW_POWER;
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> index 9ccafe007b23..c4b067d01895 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> @@ -132,31 +132,6 @@ enum dc_irq_source to_dal_irq_source_dcn20(
>       }
>  }
>
> -uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum
> dc_irq_source source)
> -{
> -     const struct irq_source_info *info;
> -     uint32_t addr;
> -     uint32_t value;
> -     uint32_t current_status;
> -
> -     info = find_irq_source_info(irq_service, source);
> -     if (!info)
> -             return 0;
> -
> -     addr = info->status_reg;
> -     if (!addr)
> -             return 0;
> -
> -     value = dm_read_reg(irq_service->ctx, addr);
> -     current_status =
> -             get_reg_field_value(
> -                     value,
> -                     HPD0_DC_HPD_INT_STATUS,
> -                     DC_HPD_SENSE);
> -
> -     return current_status;
> -}
> -
>  static bool hpd_ack(
>       struct irq_service *irq_service,
>       const struct irq_source_info *info)
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> index 4d69ab24ca25..aee4b37999f1 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> @@ -31,6 +31,4 @@
>  struct irq_service *dal_irq_service_dcn20_create(
>       struct irq_service_init_data *init_data);
>
> -uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum
> dc_irq_source source);
> -
>  #endif
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> index 235294534c43..0f15bcada4e9 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> @@ -134,31 +134,6 @@ static enum dc_irq_source
> to_dal_irq_source_dcn21(struct irq_service *irq_servic
>       return DC_IRQ_SOURCE_INVALID;
>  }
>
> -uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum
> dc_irq_source source)
> -{
> -     const struct irq_source_info *info;
> -     uint32_t addr;
> -     uint32_t value;
> -     uint32_t current_status;
> -
> -     info = find_irq_source_info(irq_service, source);
> -     if (!info)
> -             return 0;
> -
> -     addr = info->status_reg;
> -     if (!addr)
> -             return 0;
> -
> -     value = dm_read_reg(irq_service->ctx, addr);
> -     current_status =
> -             get_reg_field_value(
> -                     value,
> -                     HPD0_DC_HPD_INT_STATUS,
> -                     DC_HPD_SENSE);
> -
> -     return current_status;
> -}
> -
>  static bool hpd_ack(
>       struct irq_service *irq_service,
>       const struct irq_source_info *info)
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> index 616470e32380..da2bd0e93d7a 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> @@ -31,6 +31,4 @@
>  struct irq_service *dal_irq_service_dcn21_create(
>       struct irq_service_init_data *init_data);
>
> -uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum
> dc_irq_source source);
> -
>  #endif
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> index 4db1133e4466..a2a4fbeb83f8 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> @@ -79,7 +79,7 @@ void dal_irq_service_destroy(struct irq_service
> **irq_service)
>       *irq_service = NULL;
>  }
>
> -const struct irq_source_info *find_irq_source_info(
> +static const struct irq_source_info *find_irq_source_info(
>       struct irq_service *irq_service,
>       enum dc_irq_source source)
>  {
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> index e60b82480093..dbfcb096eedd 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> @@ -69,10 +69,6 @@ struct irq_service {
>       const struct irq_service_funcs *funcs;
>  };
>
> -const struct irq_source_info *find_irq_source_info(
> -     struct irq_service *irq_service,
> -     enum dc_irq_source source);
> -
>  void dal_irq_service_construct(
>       struct irq_service *irq_service,
>       struct irq_service_init_data *init_data);
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-07 16:55 ` Kazlauskas, Nicholas
@ 2022-01-07 18:51   ` Limonciello, Mario
  2022-01-14 12:24     ` Chris Hixon
  0 siblings, 1 reply; 13+ messages in thread
From: Limonciello, Mario @ 2022-01-07 18:51 UTC (permalink / raw)
  To: Kazlauskas, Nicholas, amd-gfx
  Cc: Zhuo, Qingqing (Lillian), Scott Bruce, spasswolf, Chris Hixon

[AMD Official Use Only]


> I think the revert is fine once we figure out where we're missing calls to:
> 
>         .optimize_pwr_state = dcn21_optimize_pwr_state,
>         .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
> 
> These are already part of dc_link_detect, so I suspect there's another interface
> in DC that should be using these.
> 
> I think the best way to debug this is to revert the patch locally and add a stack
> dump when DMCUB hangs our times out.

OK so I did this on top of amd-staging-drm-next with my v5 patch (this revert in place)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 9280f2abd973..0bd32f82f3db 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -789,8 +789,10 @@ enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
        // Execute command
        status = dmub_srv_cmd_execute(dmub);

-       if (status != DMUB_STATUS_OK)
+       if (status != DMUB_STATUS_OK) {
+               ASSERT(0);
                return status;
+       }

        // Wait for DMUB to process command
        status = dmub_srv_wait_for_idle(dmub, 100000);

> 
> That way you can know where the PHY was trying to be accessed without the
> refclk being on.
> 
> We had a similar issue in DCN31 which didn't require a W/A like DCN21.
> 
> I'd like to hold off on merging this until that hang is verified as gone.
> 

Then I took a RN laptop running DMUB 0x01010019 and disabled eDP, and confirmed
no CRTC was configured but plugged in an HDMI cable:

connector[78]: eDP-1
        crtc=(null)
        self_refresh_aware=0
connector[85]: HDMI-A-1
        crtc=crtc-1
        self_refresh_aware=0

I triggered 100 hotplugs like this:

#!/bin/bash
for i in {0..100..1}
do
    echo 1 | tee /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug
    sleep 3
done

Unfortunately, no hang or traceback to be seen (and HDMI continues to work).
I also manually pulled the plug a handful of times I don't know the specifics that Lillian had the
failure though, so this might not be a good enough check.

I'll try to upgrade DMUB to 0x101001c (the latest version) and double check that as well.

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-07 18:51   ` Limonciello, Mario
@ 2022-01-14 12:24     ` Chris Hixon
  2022-01-14 15:38       ` Limonciello, Mario
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Hixon @ 2022-01-14 12:24 UTC (permalink / raw)
  To: Limonciello, Mario, Kazlauskas, Nicholas, amd-gfx
  Cc: Zhuo, Qingqing (Lillian), Scott Bruce, spasswolf

On 1/7/22 11:51, Limonciello, Mario wrote:

> [AMD Official Use Only]
>
>
>> I think the revert is fine once we figure out where we're missing calls to:
>>
>>          .optimize_pwr_state = dcn21_optimize_pwr_state,
>>          .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
>>
>> These are already part of dc_link_detect, so I suspect there's another interface
>> in DC that should be using these.
>>
>> I think the best way to debug this is to revert the patch locally and add a stack
>> dump when DMCUB hangs our times out.
> OK so I did this on top of amd-staging-drm-next with my v5 patch (this revert in place)
>
> diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> index 9280f2abd973..0bd32f82f3db 100644
> --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> @@ -789,8 +789,10 @@ enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
>          // Execute command
>          status = dmub_srv_cmd_execute(dmub);
>
> -       if (status != DMUB_STATUS_OK)
> +       if (status != DMUB_STATUS_OK) {
> +               ASSERT(0);
>                  return status;
> +       }
>
>          // Wait for DMUB to process command
>          status = dmub_srv_wait_for_idle(dmub, 100000);
>
>> That way you can know where the PHY was trying to be accessed without the
>> refclk being on.
>>
>> We had a similar issue in DCN31 which didn't require a W/A like DCN21.
>>
>> I'd like to hold off on merging this until that hang is verified as gone.
>>
> Then I took a RN laptop running DMUB 0x01010019 and disabled eDP, and confirmed
> no CRTC was configured but plugged in an HDMI cable:
>
> connector[78]: eDP-1
>          crtc=(null)
>          self_refresh_aware=0
> connector[85]: HDMI-A-1
>          crtc=crtc-1
>          self_refresh_aware=0
>
> I triggered 100 hotplugs like this:
>
> #!/bin/bash
> for i in {0..100..1}
> do
>      echo 1 | tee /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug
>      sleep 3
> done
>
> Unfortunately, no hang or traceback to be seen (and HDMI continues to work).
> I also manually pulled the plug a handful of times I don't know the specifics that Lillian had the
> failure though, so this might not be a good enough check.
>
> I'll try to upgrade DMUB to 0x101001c (the latest version) and double check that as well.

I applied patch v5 and the above ASSERT patch, on top of both Linux 
5.16-rc8 and 5.16.

Result: no problems with suspend/resume, 16+ cycles.

As far as the hang goes:

I plugged in an HDMI cable connected to my TV, and configured Gnome to 
use the external display only.

connectors from /sys/kernel/debug/dri/0/state:

connector[78]: eDP-1
     crtc=(null)
     self_refresh_aware=0
connector[85]: HDMI-A-1
     crtc=crtc-1
     self_refresh_aware=0
connector[89]: DP-1
     crtc=(null)
     self_refresh_aware=0

I manually unplugged/plugged the HDMI cable 16+ times, and also ran:

$ sudo sh -c 'for ((i=0;i<100;i++)); do echo 1 | tee 
/sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug; sleep 3; done'

The system did not hang, and I saw no kernel log output from the ASSERT.

I also tried a USB-C dock with an HDMI port, with the same results, 
though there are other issues with this (perhaps worthy of other bug 
reports).

Is there some reason to use amd-staging-drm-next for this test?

I don't use the HDMI connection much and I have never experienced a hang 
with HDMI in the first place. Can someone send a link to an 
issue/discussion where this hang is being discussed?

HW: HP ENVY x360 Convertible 15-ds1xxx, AMD Ryzen 7 4700U with Radeon 
Graphics
OS/Desktop: Arch Linux, Gnome 41.3 (Wayland)
FW: linux-firmware-git 20211229.57d6b95-1, DMUB version=0x0101001C






^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-14 12:24     ` Chris Hixon
@ 2022-01-14 15:38       ` Limonciello, Mario
  2022-01-14 16:48         ` Kazlauskas, Nicholas
  0 siblings, 1 reply; 13+ messages in thread
From: Limonciello, Mario @ 2022-01-14 15:38 UTC (permalink / raw)
  To: Chris Hixon, Kazlauskas, Nicholas, amd-gfx
  Cc: Zhuo, Qingqing (Lillian), Scott Bruce, spasswolf

[AMD Official Use Only]

> >
> >
> >> I think the revert is fine once we figure out where we're missing calls to:
> >>
> >>          .optimize_pwr_state = dcn21_optimize_pwr_state,
> >>          .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
> >>
> >> These are already part of dc_link_detect, so I suspect there's another
> interface
> >> in DC that should be using these.
> >>
> >> I think the best way to debug this is to revert the patch locally and add a stack
> >> dump when DMCUB hangs our times out.
> > OK so I did this on top of amd-staging-drm-next with my v5 patch (this revert in
> place)
> >
> > diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > index 9280f2abd973..0bd32f82f3db 100644
> > --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > @@ -789,8 +789,10 @@ enum dmub_status
> dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
> >          // Execute command
> >          status = dmub_srv_cmd_execute(dmub);
> >
> > -       if (status != DMUB_STATUS_OK)
> > +       if (status != DMUB_STATUS_OK) {
> > +               ASSERT(0);
> >                  return status;
> > +       }
> >
> >          // Wait for DMUB to process command
> >          status = dmub_srv_wait_for_idle(dmub, 100000);
> >
> >> That way you can know where the PHY was trying to be accessed without the
> >> refclk being on.
> >>
> >> We had a similar issue in DCN31 which didn't require a W/A like DCN21.
> >>
> >> I'd like to hold off on merging this until that hang is verified as gone.
> >>
> > Then I took a RN laptop running DMUB 0x01010019 and disabled eDP, and
> confirmed
> > no CRTC was configured but plugged in an HDMI cable:
> >
> > connector[78]: eDP-1
> >          crtc=(null)
> >          self_refresh_aware=0
> > connector[85]: HDMI-A-1
> >          crtc=crtc-1
> >          self_refresh_aware=0
> >
> > I triggered 100 hotplugs like this:
> >
> > #!/bin/bash
> > for i in {0..100..1}
> > do
> >      echo 1 | tee /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug
> >      sleep 3
> > done
> >
> > Unfortunately, no hang or traceback to be seen (and HDMI continues to work).
> > I also manually pulled the plug a handful of times I don't know the specifics
> that Lillian had the
> > failure though, so this might not be a good enough check.
> >
> > I'll try to upgrade DMUB to 0x101001c (the latest version) and double check
> that as well.
> 
> I applied patch v5 and the above ASSERT patch, on top of both Linux
> 5.16-rc8 and 5.16.
> 
> Result: no problems with suspend/resume, 16+ cycles.
> 
> As far as the hang goes:
> 
> I plugged in an HDMI cable connected to my TV, and configured Gnome to
> use the external display only.
> 
> connectors from /sys/kernel/debug/dri/0/state:
> 
> connector[78]: eDP-1
>      crtc=(null)
>      self_refresh_aware=0
> connector[85]: HDMI-A-1
>      crtc=crtc-1
>      self_refresh_aware=0
> connector[89]: DP-1
>      crtc=(null)
>      self_refresh_aware=0
> 
> I manually unplugged/plugged the HDMI cable 16+ times, and also ran:
> 
> $ sudo sh -c 'for ((i=0;i<100;i++)); do echo 1 | tee
> /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug; sleep 3; done'
> 
> The system did not hang, and I saw no kernel log output from the ASSERT.
> 
> I also tried a USB-C dock with an HDMI port, with the same results,
> though there are other issues with this (perhaps worthy of other bug
> reports).
> 
> Is there some reason to use amd-staging-drm-next for this test?
> 
> I don't use the HDMI connection much and I have never experienced a hang
> with HDMI in the first place. Can someone send a link to an
> issue/discussion where this hang is being discussed?
> 
> HW: HP ENVY x360 Convertible 15-ds1xxx, AMD Ryzen 7 4700U with Radeon
> Graphics
> OS/Desktop: Arch Linux, Gnome 41.3 (Wayland)
> FW: linux-firmware-git 20211229.57d6b95-1, DMUB version=0x0101001C
> 

Nicholas,

We've got a handful of people now (myself included) who have done a bunch of
physical and software triggered hotplugs on a variety of ports on top of both
amd-staging-drm-next and 5.16 and not seeing any hangs.  Given this is lingering
on 5.16, are you amenable to it and letting Lillian dig further after she returns on
the specific case that she had problems with to see if we're missing anything else?

Thanks,

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-14 15:38       ` Limonciello, Mario
@ 2022-01-14 16:48         ` Kazlauskas, Nicholas
  2022-01-14 18:47           ` Limonciello, Mario
  0 siblings, 1 reply; 13+ messages in thread
From: Kazlauskas, Nicholas @ 2022-01-14 16:48 UTC (permalink / raw)
  To: Limonciello, Mario, Chris Hixon, amd-gfx
  Cc: Zhuo, Qingqing (Lillian), Scott Bruce, spasswolf

[Public]

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello@amd.com>
> Sent: January 14, 2022 10:38 AM
> To: Chris Hixon <linux-kernel-bugs@hixontech.com>; Kazlauskas, Nicholas
> <Nicholas.Kazlauskas@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Zhuo, Qingqing (Lillian) <Qingqing.Zhuo@amd.com>; Scott Bruce
> <smbruce@gmail.com>; spasswolf@web.de
> Subject: RE: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on
> DCN20/DCN21
> Importance: High
>
> [AMD Official Use Only]
>
> > >
> > >
> > >> I think the revert is fine once we figure out where we're missing calls to:
> > >>
> > >>          .optimize_pwr_state = dcn21_optimize_pwr_state,
> > >>          .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
> > >>
> > >> These are already part of dc_link_detect, so I suspect there's another
> > interface
> > >> in DC that should be using these.
> > >>
> > >> I think the best way to debug this is to revert the patch locally and add a
> stack
> > >> dump when DMCUB hangs our times out.
> > > OK so I did this on top of amd-staging-drm-next with my v5 patch (this
> revert in
> > place)
> > >
> > > diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > index 9280f2abd973..0bd32f82f3db 100644
> > > --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > @@ -789,8 +789,10 @@ enum dmub_status
> > dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
> > >          // Execute command
> > >          status = dmub_srv_cmd_execute(dmub);
> > >
> > > -       if (status != DMUB_STATUS_OK)
> > > +       if (status != DMUB_STATUS_OK) {
> > > +               ASSERT(0);
> > >                  return status;
> > > +       }
> > >
> > >          // Wait for DMUB to process command
> > >          status = dmub_srv_wait_for_idle(dmub, 100000);
> > >
> > >> That way you can know where the PHY was trying to be accessed
> without the
> > >> refclk being on.
> > >>
> > >> We had a similar issue in DCN31 which didn't require a W/A like DCN21.
> > >>
> > >> I'd like to hold off on merging this until that hang is verified as gone.
> > >>
> > > Then I took a RN laptop running DMUB 0x01010019 and disabled eDP, and
> > confirmed
> > > no CRTC was configured but plugged in an HDMI cable:
> > >
> > > connector[78]: eDP-1
> > >          crtc=(null)
> > >          self_refresh_aware=0
> > > connector[85]: HDMI-A-1
> > >          crtc=crtc-1
> > >          self_refresh_aware=0
> > >
> > > I triggered 100 hotplugs like this:
> > >
> > > #!/bin/bash
> > > for i in {0..100..1}
> > > do
> > >      echo 1 | tee /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug
> > >      sleep 3
> > > done
> > >
> > > Unfortunately, no hang or traceback to be seen (and HDMI continues to
> work).
> > > I also manually pulled the plug a handful of times I don't know the
> specifics
> > that Lillian had the
> > > failure though, so this might not be a good enough check.
> > >
> > > I'll try to upgrade DMUB to 0x101001c (the latest version) and double
> check
> > that as well.
> >
> > I applied patch v5 and the above ASSERT patch, on top of both Linux
> > 5.16-rc8 and 5.16.
> >
> > Result: no problems with suspend/resume, 16+ cycles.
> >
> > As far as the hang goes:
> >
> > I plugged in an HDMI cable connected to my TV, and configured Gnome to
> > use the external display only.
> >
> > connectors from /sys/kernel/debug/dri/0/state:
> >
> > connector[78]: eDP-1
> >      crtc=(null)
> >      self_refresh_aware=0
> > connector[85]: HDMI-A-1
> >      crtc=crtc-1
> >      self_refresh_aware=0
> > connector[89]: DP-1
> >      crtc=(null)
> >      self_refresh_aware=0
> >
> > I manually unplugged/plugged the HDMI cable 16+ times, and also ran:
> >
> > $ sudo sh -c 'for ((i=0;i<100;i++)); do echo 1 | tee
> > /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug; sleep 3; done'
> >
> > The system did not hang, and I saw no kernel log output from the ASSERT.
> >
> > I also tried a USB-C dock with an HDMI port, with the same results,
> > though there are other issues with this (perhaps worthy of other bug
> > reports).
> >
> > Is there some reason to use amd-staging-drm-next for this test?
> >
> > I don't use the HDMI connection much and I have never experienced a
> hang
> > with HDMI in the first place. Can someone send a link to an
> > issue/discussion where this hang is being discussed?
> >
> > HW: HP ENVY x360 Convertible 15-ds1xxx, AMD Ryzen 7 4700U with
> Radeon
> > Graphics
> > OS/Desktop: Arch Linux, Gnome 41.3 (Wayland)
> > FW: linux-firmware-git 20211229.57d6b95-1, DMUB version=0x0101001C
> >
>
> Nicholas,
>
> We've got a handful of people now (myself included) who have done a
> bunch of
> physical and software triggered hotplugs on a variety of ports on top of both
> amd-staging-drm-next and 5.16 and not seeing any hangs.  Given this is
> lingering
> on 5.16, are you amenable to it and letting Lillian dig further after she returns
> on
> the specific case that she had problems with to see if we're missing anything
> else?
>
> Thanks,

I think it was observed during HDMI compliance testing or frequent HDCP enter/exit on Chrome, I don't remember the details off the top of my head. The system would completely lock up under those conditions.

I'm not familiar with the urgency of the request for your specific issue, but if you feel that the tradeoff is worth it then you can go ahead and revert for now.

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

Regards,
Nicholas Kazlauskas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-14 16:48         ` Kazlauskas, Nicholas
@ 2022-01-14 18:47           ` Limonciello, Mario
  2022-01-14 20:30             ` Alex Deucher
  0 siblings, 1 reply; 13+ messages in thread
From: Limonciello, Mario @ 2022-01-14 18:47 UTC (permalink / raw)
  To: Kazlauskas, Nicholas, Chris Hixon, amd-gfx, Deucher, Alexander
  Cc: Zhuo, Qingqing (Lillian), Scott Bruce, spasswolf

[Public]

> > > >
> > > >
> > > >> I think the revert is fine once we figure out where we're missing calls to:
> > > >>
> > > >>          .optimize_pwr_state = dcn21_optimize_pwr_state,
> > > >>          .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
> > > >>
> > > >> These are already part of dc_link_detect, so I suspect there's another
> > > interface
> > > >> in DC that should be using these.
> > > >>
> > > >> I think the best way to debug this is to revert the patch locally and add a
> > stack
> > > >> dump when DMCUB hangs our times out.
> > > > OK so I did this on top of amd-staging-drm-next with my v5 patch (this
> > revert in
> > > place)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > > index 9280f2abd973..0bd32f82f3db 100644
> > > > --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > > +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > > @@ -789,8 +789,10 @@ enum dmub_status
> > > dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
> > > >          // Execute command
> > > >          status = dmub_srv_cmd_execute(dmub);
> > > >
> > > > -       if (status != DMUB_STATUS_OK)
> > > > +       if (status != DMUB_STATUS_OK) {
> > > > +               ASSERT(0);
> > > >                  return status;
> > > > +       }
> > > >
> > > >          // Wait for DMUB to process command
> > > >          status = dmub_srv_wait_for_idle(dmub, 100000);
> > > >
> > > >> That way you can know where the PHY was trying to be accessed
> > without the
> > > >> refclk being on.
> > > >>
> > > >> We had a similar issue in DCN31 which didn't require a W/A like DCN21.
> > > >>
> > > >> I'd like to hold off on merging this until that hang is verified as gone.
> > > >>
> > > > Then I took a RN laptop running DMUB 0x01010019 and disabled eDP, and
> > > confirmed
> > > > no CRTC was configured but plugged in an HDMI cable:
> > > >
> > > > connector[78]: eDP-1
> > > >          crtc=(null)
> > > >          self_refresh_aware=0
> > > > connector[85]: HDMI-A-1
> > > >          crtc=crtc-1
> > > >          self_refresh_aware=0
> > > >
> > > > I triggered 100 hotplugs like this:
> > > >
> > > > #!/bin/bash
> > > > for i in {0..100..1}
> > > > do
> > > >      echo 1 | tee /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug
> > > >      sleep 3
> > > > done
> > > >
> > > > Unfortunately, no hang or traceback to be seen (and HDMI continues to
> > work).
> > > > I also manually pulled the plug a handful of times I don't know the
> > specifics
> > > that Lillian had the
> > > > failure though, so this might not be a good enough check.
> > > >
> > > > I'll try to upgrade DMUB to 0x101001c (the latest version) and double
> > check
> > > that as well.
> > >
> > > I applied patch v5 and the above ASSERT patch, on top of both Linux
> > > 5.16-rc8 and 5.16.
> > >
> > > Result: no problems with suspend/resume, 16+ cycles.
> > >
> > > As far as the hang goes:
> > >
> > > I plugged in an HDMI cable connected to my TV, and configured Gnome to
> > > use the external display only.
> > >
> > > connectors from /sys/kernel/debug/dri/0/state:
> > >
> > > connector[78]: eDP-1
> > >      crtc=(null)
> > >      self_refresh_aware=0
> > > connector[85]: HDMI-A-1
> > >      crtc=crtc-1
> > >      self_refresh_aware=0
> > > connector[89]: DP-1
> > >      crtc=(null)
> > >      self_refresh_aware=0
> > >
> > > I manually unplugged/plugged the HDMI cable 16+ times, and also ran:
> > >
> > > $ sudo sh -c 'for ((i=0;i<100;i++)); do echo 1 | tee
> > > /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug; sleep 3; done'
> > >
> > > The system did not hang, and I saw no kernel log output from the ASSERT.
> > >
> > > I also tried a USB-C dock with an HDMI port, with the same results,
> > > though there are other issues with this (perhaps worthy of other bug
> > > reports).
> > >
> > > Is there some reason to use amd-staging-drm-next for this test?
> > >
> > > I don't use the HDMI connection much and I have never experienced a
> > hang
> > > with HDMI in the first place. Can someone send a link to an
> > > issue/discussion where this hang is being discussed?
> > >
> > > HW: HP ENVY x360 Convertible 15-ds1xxx, AMD Ryzen 7 4700U with
> > Radeon
> > > Graphics
> > > OS/Desktop: Arch Linux, Gnome 41.3 (Wayland)
> > > FW: linux-firmware-git 20211229.57d6b95-1, DMUB version=0x0101001C
> > >
> >
> > Nicholas,
> >
> > We've got a handful of people now (myself included) who have done a
> > bunch of
> > physical and software triggered hotplugs on a variety of ports on top of both
> > amd-staging-drm-next and 5.16 and not seeing any hangs.  Given this is
> > lingering
> > on 5.16, are you amenable to it and letting Lillian dig further after she returns
> > on
> > the specific case that she had problems with to see if we're missing anything
> > else?
> >
> > Thanks,
> 
> I think it was observed during HDMI compliance testing or frequent HDCP
> enter/exit on Chrome, I don't remember the details off the top of my head. The
> system would completely lock up under those conditions.
> 
> I'm not familiar with the urgency of the request for your specific issue, but if you
> feel that the tradeoff is worth it then you can go ahead and revert for now.
> 
> Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
> 
> Regards,
> Nicholas Kazlauskas

Thanks.  Alex, when this pulls in can you add CC for stable so we get it in 5.16.1 too?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-14 18:47           ` Limonciello, Mario
@ 2022-01-14 20:30             ` Alex Deucher
  0 siblings, 0 replies; 13+ messages in thread
From: Alex Deucher @ 2022-01-14 20:30 UTC (permalink / raw)
  To: Limonciello, Mario
  Cc: Chris Hixon, Zhuo, Qingqing (Lillian),
	amd-gfx, Scott Bruce, spasswolf, Deucher, Alexander, Kazlauskas,
	Nicholas

On Fri, Jan 14, 2022 at 1:47 PM Limonciello, Mario
<Mario.Limonciello@amd.com> wrote:
>
> [Public]
>
> > > > >
> > > > >
> > > > >> I think the revert is fine once we figure out where we're missing calls to:
> > > > >>
> > > > >>          .optimize_pwr_state = dcn21_optimize_pwr_state,
> > > > >>          .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
> > > > >>
> > > > >> These are already part of dc_link_detect, so I suspect there's another
> > > > interface
> > > > >> in DC that should be using these.
> > > > >>
> > > > >> I think the best way to debug this is to revert the patch locally and add a
> > > stack
> > > > >> dump when DMCUB hangs our times out.
> > > > > OK so I did this on top of amd-staging-drm-next with my v5 patch (this
> > > revert in
> > > > place)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > > b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > > > index 9280f2abd973..0bd32f82f3db 100644
> > > > > --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > > > +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
> > > > > @@ -789,8 +789,10 @@ enum dmub_status
> > > > dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
> > > > >          // Execute command
> > > > >          status = dmub_srv_cmd_execute(dmub);
> > > > >
> > > > > -       if (status != DMUB_STATUS_OK)
> > > > > +       if (status != DMUB_STATUS_OK) {
> > > > > +               ASSERT(0);
> > > > >                  return status;
> > > > > +       }
> > > > >
> > > > >          // Wait for DMUB to process command
> > > > >          status = dmub_srv_wait_for_idle(dmub, 100000);
> > > > >
> > > > >> That way you can know where the PHY was trying to be accessed
> > > without the
> > > > >> refclk being on.
> > > > >>
> > > > >> We had a similar issue in DCN31 which didn't require a W/A like DCN21.
> > > > >>
> > > > >> I'd like to hold off on merging this until that hang is verified as gone.
> > > > >>
> > > > > Then I took a RN laptop running DMUB 0x01010019 and disabled eDP, and
> > > > confirmed
> > > > > no CRTC was configured but plugged in an HDMI cable:
> > > > >
> > > > > connector[78]: eDP-1
> > > > >          crtc=(null)
> > > > >          self_refresh_aware=0
> > > > > connector[85]: HDMI-A-1
> > > > >          crtc=crtc-1
> > > > >          self_refresh_aware=0
> > > > >
> > > > > I triggered 100 hotplugs like this:
> > > > >
> > > > > #!/bin/bash
> > > > > for i in {0..100..1}
> > > > > do
> > > > >      echo 1 | tee /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug
> > > > >      sleep 3
> > > > > done
> > > > >
> > > > > Unfortunately, no hang or traceback to be seen (and HDMI continues to
> > > work).
> > > > > I also manually pulled the plug a handful of times I don't know the
> > > specifics
> > > > that Lillian had the
> > > > > failure though, so this might not be a good enough check.
> > > > >
> > > > > I'll try to upgrade DMUB to 0x101001c (the latest version) and double
> > > check
> > > > that as well.
> > > >
> > > > I applied patch v5 and the above ASSERT patch, on top of both Linux
> > > > 5.16-rc8 and 5.16.
> > > >
> > > > Result: no problems with suspend/resume, 16+ cycles.
> > > >
> > > > As far as the hang goes:
> > > >
> > > > I plugged in an HDMI cable connected to my TV, and configured Gnome to
> > > > use the external display only.
> > > >
> > > > connectors from /sys/kernel/debug/dri/0/state:
> > > >
> > > > connector[78]: eDP-1
> > > >      crtc=(null)
> > > >      self_refresh_aware=0
> > > > connector[85]: HDMI-A-1
> > > >      crtc=crtc-1
> > > >      self_refresh_aware=0
> > > > connector[89]: DP-1
> > > >      crtc=(null)
> > > >      self_refresh_aware=0
> > > >
> > > > I manually unplugged/plugged the HDMI cable 16+ times, and also ran:
> > > >
> > > > $ sudo sh -c 'for ((i=0;i<100;i++)); do echo 1 | tee
> > > > /sys/kernel/debug/dri/0/HDMI-A-1/trigger_hotplug; sleep 3; done'
> > > >
> > > > The system did not hang, and I saw no kernel log output from the ASSERT.
> > > >
> > > > I also tried a USB-C dock with an HDMI port, with the same results,
> > > > though there are other issues with this (perhaps worthy of other bug
> > > > reports).
> > > >
> > > > Is there some reason to use amd-staging-drm-next for this test?
> > > >
> > > > I don't use the HDMI connection much and I have never experienced a
> > > hang
> > > > with HDMI in the first place. Can someone send a link to an
> > > > issue/discussion where this hang is being discussed?
> > > >
> > > > HW: HP ENVY x360 Convertible 15-ds1xxx, AMD Ryzen 7 4700U with
> > > Radeon
> > > > Graphics
> > > > OS/Desktop: Arch Linux, Gnome 41.3 (Wayland)
> > > > FW: linux-firmware-git 20211229.57d6b95-1, DMUB version=0x0101001C
> > > >
> > >
> > > Nicholas,
> > >
> > > We've got a handful of people now (myself included) who have done a
> > > bunch of
> > > physical and software triggered hotplugs on a variety of ports on top of both
> > > amd-staging-drm-next and 5.16 and not seeing any hangs.  Given this is
> > > lingering
> > > on 5.16, are you amenable to it and letting Lillian dig further after she returns
> > > on
> > > the specific case that she had problems with to see if we're missing anything
> > > else?
> > >
> > > Thanks,
> >
> > I think it was observed during HDMI compliance testing or frequent HDCP
> > enter/exit on Chrome, I don't remember the details off the top of my head. The
> > system would completely lock up under those conditions.
> >
> > I'm not familiar with the urgency of the request for your specific issue, but if you
> > feel that the tradeoff is worth it then you can go ahead and revert for now.
> >
> > Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
> >
> > Regards,
> > Nicholas Kazlauskas
>
> Thanks.  Alex, when this pulls in can you add CC for stable so we get it in 5.16.1 too?

Yes, will do.

Alex

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-12  2:08 ` Limonciello, Mario
  2022-01-13  9:16   ` Bert Karwatzki
@ 2022-01-15  1:10   ` Bert Karwatzki
  1 sibling, 0 replies; 13+ messages in thread
From: Bert Karwatzki @ 2022-01-15  1:10 UTC (permalink / raw)
  To: amd-gfx
  Cc: Zhuo, Qingqing, ScottBruce, Limonciello, Mario, Chris Hixon,
	Kazlauskas, Nicholas

Am Mittwoch, dem 12.01.2022 um 02:08 +0000 schrieb Limonciello, Mario:
> [AMD Official Use Only]
>
> > -----Original Message-----
> > From: Bert Karwatzki <spasswolf@web.de>
> > Sent: Tuesday, January 11, 2022 19:12
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Limonciello, Mario <Mario.Limonciello@amd.com>; Kazlauskas,
> > Nicholas
> > <Nicholas.Kazlauskas@amd.com>; Zhuo, Qingqing (Lillian)
> > <Qingqing.Zhuo@amd.com>; Scott Bruce <smbruce@gmail.com>; Chris
> > Hixon
> > <linux-kernel-bugs@hixontech.com>
> > Subject: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on
> > DCN20/DCN21
> >
> > I just tested patch v5 applied to amd-staging-drm-next (with HEAD
> > 26c981e27e698c251ef3241f73ac846e66ad7fc3) and suspend and resume
> > work
> > fine. But as amd-staging-drm-next is still based on linux-5.13 I
> > had to
> > replace the mediatek wlan driver by the version from linux-5.16.
>
> FYI it should likely cherry pick to 5.16 too if it's easier to test
> there.
> I was able to cherry-pick to rc8 no problem.
>
> If you can please try any display hotplugging to make sure hotplugs
> don’t
> cause problems for you as well feel comfortable to add a Tested-by
> tag.
>
> Thanks,

Now I was able to get external HDMI displays to work (with gnome and
wayland) and display hotplugging did not cause problems.

Tested-by: Bert Karwatzki <spasswolf@web.de>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-13  9:16   ` Bert Karwatzki
@ 2022-01-13 14:55     ` Limonciello, Mario
  0 siblings, 0 replies; 13+ messages in thread
From: Limonciello, Mario @ 2022-01-13 14:55 UTC (permalink / raw)
  To: Bert Karwatzki, amd-gfx
  Cc: Zhuo, Qingqing, ScottBruce, Kazlauskas, Nicholas, Chris Hixon

On 1/13/2022 03:16, Bert Karwatzki wrote:
> Am Mittwoch, dem 12.01.2022 um 02:08 +0000 schrieb Limonciello, Mario:
>> [AMD Official Use Only]
>>
>>> -----Original Message-----
>>> From: Bert Karwatzki <spasswolf@web.de>
>>> Sent: Tuesday, January 11, 2022 19:12
>>> To: amd-gfx@lists.freedesktop.org
>>> Cc: Limonciello, Mario <Mario.Limonciello@amd.com>; Kazlauskas,
>>> Nicholas
>>> <Nicholas.Kazlauskas@amd.com>; Zhuo, Qingqing (Lillian)
>>> <Qingqing.Zhuo@amd.com>; Scott Bruce <smbruce@gmail.com>; Chris
>>> Hixon
>>> <linux-kernel-bugs@hixontech.com>
>>> Subject: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on
>>> DCN20/DCN21
>>>
>>> I just tested patch v5 applied to amd-staging-drm-next (with HEAD
>>> 26c981e27e698c251ef3241f73ac846e66ad7fc3) and suspend and resume
>>> work
>>> fine. But as amd-staging-drm-next is still based on linux-5.13 I
>>> had to
>>> replace the mediatek wlan driver by the version from linux-5.16.
>>
>> FYI it should likely cherry pick to 5.16 too if it's easier to test
>> there.
>> I was able to cherry-pick to rc8 no problem.
>>
>> If you can please try any display hotplugging to make sure hotplugs
>> don’t
>> cause problems for you as well feel comfortable to add a Tested-by
>> tag.
>>
>> Thanks,
> 
> Unfortunately the external HDMI port on my Notebook (Alpha 15 B5EEK/MS-
> 158L) does not seem to work out-of-the-box. This is a two GPU machine
> 03:00.0 Display controller: Advanced Micro Devices, Inc. [AMD/ATI] Navi
> 23 [Radeon RX 6600/6600 XT/6600M] (rev c3)
> 08:00.0 VGA compatible controller: Advanced Micro Devices, Inc.
> [AMD/ATI] Cezanne (rev c5)
>   The Desktop Environment (gnome with Xorg) is running on the integrated
> Cezanne, but when I connect the external HDMI cable I get a dmesg
> resume message from the Navi GPU. So this either requires some
> additional configuration effort on my part or this is a story for
> another bug report.
> 

Unless this changes without this revert patch in place then I would say 
a story for another bug report.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-12  2:08 ` Limonciello, Mario
@ 2022-01-13  9:16   ` Bert Karwatzki
  2022-01-13 14:55     ` Limonciello, Mario
  2022-01-15  1:10   ` Bert Karwatzki
  1 sibling, 1 reply; 13+ messages in thread
From: Bert Karwatzki @ 2022-01-13  9:16 UTC (permalink / raw)
  To: amd-gfx
  Cc: Zhuo, Qingqing, ScottBruce, Limonciello, Mario, Chris Hixon,
	Kazlauskas, Nicholas

Am Mittwoch, dem 12.01.2022 um 02:08 +0000 schrieb Limonciello, Mario:
> [AMD Official Use Only]
>
> > -----Original Message-----
> > From: Bert Karwatzki <spasswolf@web.de>
> > Sent: Tuesday, January 11, 2022 19:12
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Limonciello, Mario <Mario.Limonciello@amd.com>; Kazlauskas,
> > Nicholas
> > <Nicholas.Kazlauskas@amd.com>; Zhuo, Qingqing (Lillian)
> > <Qingqing.Zhuo@amd.com>; Scott Bruce <smbruce@gmail.com>; Chris
> > Hixon
> > <linux-kernel-bugs@hixontech.com>
> > Subject: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on
> > DCN20/DCN21
> >
> > I just tested patch v5 applied to amd-staging-drm-next (with HEAD
> > 26c981e27e698c251ef3241f73ac846e66ad7fc3) and suspend and resume
> > work
> > fine. But as amd-staging-drm-next is still based on linux-5.13 I
> > had to
> > replace the mediatek wlan driver by the version from linux-5.16.
>
> FYI it should likely cherry pick to 5.16 too if it's easier to test
> there.
> I was able to cherry-pick to rc8 no problem.
>
> If you can please try any display hotplugging to make sure hotplugs
> don’t
> cause problems for you as well feel comfortable to add a Tested-by
> tag.
>
> Thanks,

Unfortunately the external HDMI port on my Notebook (Alpha 15 B5EEK/MS-
158L) does not seem to work out-of-the-box. This is a two GPU machine
03:00.0 Display controller: Advanced Micro Devices, Inc. [AMD/ATI] Navi
23 [Radeon RX 6600/6600 XT/6600M] (rev c3)
08:00.0 VGA compatible controller: Advanced Micro Devices, Inc.
[AMD/ATI] Cezanne (rev c5)
 The Desktop Environment (gnome with Xorg) is running on the integrated
Cezanne, but when I connect the external HDMI cable I get a dmesg
resume message from the Navi GPU. So this either requires some
additional configuration effort on my part or this is a story for
another bug report.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
  2022-01-12  1:11 Bert Karwatzki
@ 2022-01-12  2:08 ` Limonciello, Mario
  2022-01-13  9:16   ` Bert Karwatzki
  2022-01-15  1:10   ` Bert Karwatzki
  0 siblings, 2 replies; 13+ messages in thread
From: Limonciello, Mario @ 2022-01-12  2:08 UTC (permalink / raw)
  To: Bert Karwatzki, amd-gfx
  Cc: Zhuo, Qingqing (Lillian), Scott Bruce, Kazlauskas, Nicholas, Chris Hixon

[AMD Official Use Only]

> -----Original Message-----
> From: Bert Karwatzki <spasswolf@web.de>
> Sent: Tuesday, January 11, 2022 19:12
> To: amd-gfx@lists.freedesktop.org
> Cc: Limonciello, Mario <Mario.Limonciello@amd.com>; Kazlauskas, Nicholas
> <Nicholas.Kazlauskas@amd.com>; Zhuo, Qingqing (Lillian)
> <Qingqing.Zhuo@amd.com>; Scott Bruce <smbruce@gmail.com>; Chris Hixon
> <linux-kernel-bugs@hixontech.com>
> Subject: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on
> DCN20/DCN21
> 
> I just tested patch v5 applied to amd-staging-drm-next (with HEAD
> 26c981e27e698c251ef3241f73ac846e66ad7fc3) and suspend and resume work
> fine. But as amd-staging-drm-next is still based on linux-5.13 I had to
> replace the mediatek wlan driver by the version from linux-5.16.

FYI it should likely cherry pick to 5.16 too if it's easier to test there.
I was able to cherry-pick to rc8 no problem.

If you can please try any display hotplugging to make sure hotplugs don’t
cause problems for you as well feel comfortable to add a Tested-by tag.

Thanks,

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21
@ 2022-01-12  1:11 Bert Karwatzki
  2022-01-12  2:08 ` Limonciello, Mario
  0 siblings, 1 reply; 13+ messages in thread
From: Bert Karwatzki @ 2022-01-12  1:11 UTC (permalink / raw)
  To: amd-gfx
  Cc: Qingqing Zhuo, Scott Bruce, Mario Limonciello, Chris Hixon,
	Kazlauskas Nicholas

I just tested patch v5 applied to amd-staging-drm-next (with HEAD
26c981e27e698c251ef3241f73ac846e66ad7fc3) and suspend and resume work
fine. But as amd-staging-drm-next is still based on linux-5.13 I had to
replace the mediatek wlan driver by the version from linux-5.16.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-01-15 15:00 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-07 16:49 [PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21 Mario Limonciello
2022-01-07 16:55 ` Kazlauskas, Nicholas
2022-01-07 18:51   ` Limonciello, Mario
2022-01-14 12:24     ` Chris Hixon
2022-01-14 15:38       ` Limonciello, Mario
2022-01-14 16:48         ` Kazlauskas, Nicholas
2022-01-14 18:47           ` Limonciello, Mario
2022-01-14 20:30             ` Alex Deucher
2022-01-12  1:11 Bert Karwatzki
2022-01-12  2:08 ` Limonciello, Mario
2022-01-13  9:16   ` Bert Karwatzki
2022-01-13 14:55     ` Limonciello, Mario
2022-01-15  1:10   ` Bert Karwatzki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.